cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest, repeat
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 english_trigram_counts = collections.defaultdict(int)
  38 with open('count_3l.txt', 'r') as f:
  39     for line in f:
  40         (trigram, count) = line.split("\t")
  41         english_trigram_counts[trigram] = int(count)
  42 normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
  43
  44
  45 with open('words.txt', 'r') as f:
  46     keywords = [line.rstrip() for line in f]
  47
  48 modular_division_table = [[0]*26 for x in range(26)]
  49 for a in range(26):
  50     for b in range(26):
  51         c = (a * b) % 26
  52         modular_division_table[b][c] = a
  53
  54 def letters(text):
  55     """Remove all non-alphabetic characters from a text
  56     >>> letters('The Quick')
  57     'TheQuick'
  58     >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  59     'TheQuickBROWNfoxjumpedoverthelazyDOG'
  60     """
  61     return ''.join([c for c in text if c in string.ascii_letters])
  62
  63 def sanitise(text):
  64     """Remove all non-alphabetic characters and convert the text to lowercase
  65
  66     >>> sanitise('The Quick')
  67     'thequick'
  68     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  69     'thequickbrownfoxjumpedoverthelazydog'
  70     """
  71     # sanitised = [c.lower() for c in text if c in string.ascii_letters]
  72     # return ''.join(sanitised)
  73     return letters(text).lower()
  74
  75 def ngrams(text, n):
  76     """Returns all n-grams of a text
  77
  78     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  79     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  80      'nf', 'fo', 'ox']
  81     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  82     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  83      'rown', 'ownf', 'wnfo', 'nfox']
  84     """
  85     return [text[i:i+n] for i in range(len(text)-n+1)]
  86
  87 def every_nth(text, n, fillvalue=''):
  88     """Returns n strings, each of which consists of every nth character,
  89     starting with the 0th, 1st, 2nd, ... (n-1)th character
  90
  91     >>> every_nth(string.ascii_lowercase, 5)
  92     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  93     >>> every_nth(string.ascii_lowercase, 1)
  94     ['abcdefghijklmnopqrstuvwxyz']
  95     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  96     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  97      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  98     >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
  99     ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
 100     """
 101     split_text = [text[i:i+n] for i in range(0, len(text), n)]
 102     return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
 103
 104 def combine_every_nth(split_text):
 105     """Reforms a text split into every_nth strings
 106
 107     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
 108     'abcdefghijklmnopqrstuvwxyz'
 109     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
 110     'abcdefghijklmnopqrstuvwxyz'
 111     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
 112     'abcdefghijklmnopqrstuvwxyz'
 113     """
 114     return ''.join([''.join(l)
 115                     for l in zip_longest(*split_text, fillvalue='')])
 116
 117 def transpose(items, transposition):
 118     """Moves items around according to the given transposition
 119
 120     >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
 121     ['a', 'b', 'c', 'd']
 122     >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
 123     ['d', 'b', 'c', 'a']
 124     >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
 125     [13, 12, 14, 11, 15, 10]
 126     """
 127     transposed = list(repeat('', len(transposition)))
 128     for p, t in enumerate(transposition):
 129        transposed[p] = items[t]
 130     return transposed
 131
 132 def untranspose(items, transposition):
 133     """Undoes a transpose
 134
 135     >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
 136     ['a', 'b', 'c', 'd']
 137     >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
 138     ['a', 'b', 'c', 'd']
 139     >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
 140     [10, 11, 12, 13, 14, 15]
 141     """
 142     transposed  = list(repeat('', len(transposition)))
 143     for p, t in enumerate(transposition):
 144        transposed[t] = items[p]
 145     return transposed
 146
 147
 148 def frequencies(text):
 149     """Count the number of occurrences of each character in text
 150
 151     >>> sorted(frequencies('abcdefabc').items())
 152     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 153     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 154          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 155     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 156      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 157      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 158      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 159     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 160          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 161     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 162      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 163      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 164      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 165      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 166     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 167          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 168     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 169      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 170      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 171      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 172     >>> frequencies('abcdefabcdef')['x']
 173     0
 174     """
 175     #counts = collections.defaultdict(int)
 176     #for c in text:
 177     #    counts[c] += 1
 178     #return counts
 179     return collections.Counter(c for c in text)
 180 letter_frequencies = frequencies
 181
 182 def deduplicate(text):
 183     return list(collections.OrderedDict.fromkeys(text))
 184
 185
 186
 187 def caesar_encipher_letter(letter, shift):
 188     """Encipher a letter, given a shift amount
 189
 190     >>> caesar_encipher_letter('a', 1)
 191     'b'
 192     >>> caesar_encipher_letter('a', 2)
 193     'c'
 194     >>> caesar_encipher_letter('b', 2)
 195     'd'
 196     >>> caesar_encipher_letter('x', 2)
 197     'z'
 198     >>> caesar_encipher_letter('y', 2)
 199     'a'
 200     >>> caesar_encipher_letter('z', 2)
 201     'b'
 202     >>> caesar_encipher_letter('z', -1)
 203     'y'
 204     >>> caesar_encipher_letter('a', -1)
 205     'z'
 206     """
 207     if letter in string.ascii_letters:
 208         if letter in string.ascii_uppercase:
 209             alphabet_start = ord('A')
 210         else:
 211             alphabet_start = ord('a')
 212         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 213                    alphabet_start)
 214     else:
 215         return letter
 216
 217 def caesar_decipher_letter(letter, shift):
 218     """Decipher a letter, given a shift amount
 219
 220     >>> caesar_decipher_letter('b', 1)
 221     'a'
 222     >>> caesar_decipher_letter('b', 2)
 223     'z'
 224     """
 225     return caesar_encipher_letter(letter, -shift)
 226
 227 def caesar_encipher(message, shift):
 228     """Encipher a message with the Caesar cipher of given shift
 229
 230     >>> caesar_encipher('abc', 1)
 231     'bcd'
 232     >>> caesar_encipher('abc', 2)
 233     'cde'
 234     >>> caesar_encipher('abcxyz', 2)
 235     'cdezab'
 236     >>> caesar_encipher('ab cx yz', 2)
 237     'cd ez ab'
 238     """
 239     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 240     return ''.join(enciphered)
 241
 242 def caesar_decipher(message, shift):
 243     """Encipher a message with the Caesar cipher of given shift
 244
 245     >>> caesar_decipher('bcd', 1)
 246     'abc'
 247     >>> caesar_decipher('cde', 2)
 248     'abc'
 249     >>> caesar_decipher('cd ez ab', 2)
 250     'ab cx yz'
 251     """
 252     return caesar_encipher(message, -shift)
 253
 254 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 255     """Encipher a letter, given a multiplier and adder
 256
 257     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 258             for l in string.ascii_uppercase])
 259     'HKNQTWZCFILORUXADGJMPSVYBE'
 260     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 261             for l in string.ascii_uppercase])
 262     'FILORUXADGJMPSVYBEHKNQTWZC'
 263     """
 264     if letter in string.ascii_letters:
 265         if letter in string.ascii_uppercase:
 266             alphabet_start = ord('A')
 267         else:
 268             alphabet_start = ord('a')
 269         letter_number = ord(letter) - alphabet_start
 270         if one_based: letter_number += 1
 271         cipher_number = (letter_number * multiplier + adder) % 26
 272         if one_based: cipher_number -= 1
 273         return chr(cipher_number % 26 + alphabet_start)
 274     else:
 275         return letter
 276
 277 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 278     """Encipher a letter, given a multiplier and adder
 279
 280     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 281             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 282     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 283     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 284             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 285     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 286     """
 287     if letter in string.ascii_letters:
 288         if letter in string.ascii_uppercase:
 289             alphabet_start = ord('A')
 290         else:
 291             alphabet_start = ord('a')
 292         cipher_number = ord(letter) - alphabet_start
 293         if one_based: cipher_number += 1
 294         plaintext_number = ( modular_division_table[multiplier]
 295                                                    [(cipher_number - adder) % 26] )
 296         if one_based: plaintext_number -= 1
 297         return chr(plaintext_number % 26 + alphabet_start)
 298     else:
 299         return letter
 300
 301 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 302     """Encipher a message
 303
 304     >>> affine_encipher('hours passed during which jerico tried every ' \
 305            'trick he could think of', 15, 22, True)
 306     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 307     """
 308     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 309                   for l in message]
 310     return ''.join(enciphered)
 311
 312 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 313     """Decipher a message
 314
 315     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 316            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 317     'hours passed during which jerico tried every trick he could think of'
 318     """
 319     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 320                   for l in message]
 321     return ''.join(enciphered)
 322
 323
 324 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 325     """Find the cipher alphabet given a keyword.
 326     wrap_alphabet controls how the rest of the alphabet is added
 327     after the keyword.
 328     0 : from 'a'
 329     1 : from the last letter in the sanitised keyword
 330     2 : from the largest letter in the sanitised keyword
 331
 332     >>> keyword_cipher_alphabet_of('bayes')
 333     'bayescdfghijklmnopqrtuvwxz'
 334     >>> keyword_cipher_alphabet_of('bayes', 0)
 335     'bayescdfghijklmnopqrtuvwxz'
 336     >>> keyword_cipher_alphabet_of('bayes', 1)
 337     'bayestuvwxzcdfghijklmnopqr'
 338     >>> keyword_cipher_alphabet_of('bayes', 2)
 339     'bayeszcdfghijklmnopqrtuvwx'
 340     """
 341     if wrap_alphabet == 0:
 342         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 343                                               string.ascii_lowercase))
 344     else:
 345         if wrap_alphabet == 1:
 346             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 347         else:
 348             last_keyword_letter = sorted(sanitise(keyword))[-1]
 349         last_keyword_position = string.ascii_lowercase.find(
 350             last_keyword_letter) + 1
 351         cipher_alphabet = ''.join(
 352             deduplicate(sanitise(keyword) +
 353                         string.ascii_lowercase[last_keyword_position:] +
 354                         string.ascii_lowercase))
 355     return cipher_alphabet
 356
 357
 358 def keyword_encipher(message, keyword, wrap_alphabet=0):
 359     """Enciphers a message with a keyword substitution cipher.
 360     wrap_alphabet controls how the rest of the alphabet is added
 361     after the keyword.
 362     0 : from 'a'
 363     1 : from the last letter in the sanitised keyword
 364     2 : from the largest letter in the sanitised keyword
 365
 366     >>> keyword_encipher('test message', 'bayes')
 367     'rsqr ksqqbds'
 368     >>> keyword_encipher('test message', 'bayes', 0)
 369     'rsqr ksqqbds'
 370     >>> keyword_encipher('test message', 'bayes', 1)
 371     'lskl dskkbus'
 372     >>> keyword_encipher('test message', 'bayes', 2)
 373     'qspq jsppbcs'
 374     """
 375     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 376     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 377     return message.lower().translate(cipher_translation)
 378
 379 def keyword_decipher(message, keyword, wrap_alphabet=0):
 380     """Deciphers a message with a keyword substitution cipher.
 381     wrap_alphabet controls how the rest of the alphabet is added
 382     after the keyword.
 383     0 : from 'a'
 384     1 : from the last letter in the sanitised keyword
 385     2 : from the largest letter in the sanitised keyword
 386
 387     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 388     'test message'
 389     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 390     'test message'
 391     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 392     'test message'
 393     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 394     'test message'
 395     """
 396     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 397     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 398     return message.lower().translate(cipher_translation)
 399
 400 def scytale_encipher(message, rows):
 401     """Enciphers using the scytale transposition cipher.
 402     Message is padded with spaces to allow all rows to be the same length.
 403
 404     >>> scytale_encipher('thequickbrownfox', 3)
 405     'tcnhkfeboqrxuo iw '
 406     >>> scytale_encipher('thequickbrownfox', 4)
 407     'tubnhirfecooqkwx'
 408     >>> scytale_encipher('thequickbrownfox', 5)
 409     'tubn hirf ecoo qkwx '
 410     >>> scytale_encipher('thequickbrownfox', 6)
 411     'tqcrnxhukof eibwo '
 412     >>> scytale_encipher('thequickbrownfox', 7)
 413     'tqcrnx hukof  eibwo  '
 414     """
 415     if len(message) % rows != 0:
 416         message += ' '*(rows - len(message) % rows)
 417     row_length = round(len(message) / rows)
 418     slices = [message[i:i+row_length]
 419               for i in range(0, len(message), row_length)]
 420     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 421
 422 def scytale_decipher(message, rows):
 423     """Deciphers using the scytale transposition cipher.
 424     Assumes the message is padded so that all rows are the same length.
 425
 426     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 427     'thequickbrownfox  '
 428     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 429     'thequickbrownfox'
 430     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 431     'thequickbrownfox    '
 432     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 433     'thequickbrownfox  '
 434     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 435     'thequickbrownfox     '
 436     """
 437     cols = round(len(message) / rows)
 438     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 439     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 440
 441
 442 def transpositions_of(keyword):
 443     """Finds the transpostions given by a keyword. For instance, the keyword
 444     'clever' rearranges to 'celrv', so the first column (0) stays first, the
 445     second column (1) moves to third, the third column (2) moves to second,
 446     and so on.
 447
 448     >>> transpositions_of('clever')
 449     [0, 2, 1, 4, 3]
 450     """
 451     key = deduplicate(keyword)
 452     transpositions = [key.index(l) for l in sorted(key)]
 453     return transpositions
 454
 455 def column_transposition_encipher(message, keyword, fillvalue=' '):
 456     """Enciphers using the column transposition cipher.
 457     Message is padded to allow all rows to be the same length.
 458
 459     >>> column_transposition_encipher('hellothere', 'clever')
 460     'hleolteher'
 461     >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
 462     'hleolthre!e!'
 463     """
 464     return column_transposition_worker(message, keyword, encipher=True,
 465                                        fillvalue=fillvalue)
 466
 467 def column_transposition_decipher(message, keyword, fillvalue=' '):
 468     """Deciphers using the column transposition cipher.
 469     Message is padded to allow all rows to be the same length.
 470
 471     >>> column_transposition_decipher('hleolteher', 'clever')
 472     'hellothere'
 473     >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
 474     'hellothere!!'
 475     """
 476     return column_transposition_worker(message, keyword, encipher=False,
 477                                        fillvalue=fillvalue)
 478
 479 def column_transposition_worker(message, keyword,
 480                                 encipher=True, fillvalue=' '):
 481     """Does the actual work of the column transposition cipher.
 482     Message is padded with spaces to allow all rows to be the same length.
 483
 484     >>> column_transposition_worker('hellothere', 'clever')
 485     'hleolteher'
 486     >>> column_transposition_worker('hellothere', 'clever', encipher=True)
 487     'hleolteher'
 488     >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
 489     'hellothere'
 490     """
 491     transpositions = transpositions_of(keyword)
 492     columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
 493     if encipher:
 494         transposed_columns = transpose(columns, transpositions)
 495     else:
 496         transposed_columns = untranspose(columns, transpositions)
 497     return combine_every_nth(transposed_columns)
 498
 499 def vigenere_encipher(message, keyword):
 500
 501
 502
 503
 504 def caesar_break(message,
 505                  metric=norms.euclidean_distance,
 506                  target_counts=normalised_english_counts,
 507                  message_frequency_scaling=norms.normalise):
 508     """Breaks a Caesar cipher using frequency analysis
 509
 510     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 511           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 512     (4, 0.31863952890183...)
 513     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 514           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 515     (19, 0.42152901235832...)
 516     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 517           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 518     (13, 0.316029208075451...)
 519     """
 520     sanitised_message = sanitise(message)
 521     best_shift = 0
 522     best_fit = float("inf")
 523     for shift in range(26):
 524         plaintext = caesar_decipher(sanitised_message, shift)
 525         counts = message_frequency_scaling(letter_frequencies(plaintext))
 526         fit = metric(target_counts, counts)
 527         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 528                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 529         if fit < best_fit:
 530             best_fit = fit
 531             best_shift = shift
 532     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 533                 'decrypt starting: {2}'.format(best_shift, best_fit,
 534                     caesar_decipher(sanitised_message, best_shift)[:50]))
 535     return best_shift, best_fit
 536
 537 def affine_break(message,
 538                  metric=norms.euclidean_distance,
 539                  target_counts=normalised_english_counts,
 540                  message_frequency_scaling=norms.normalise):
 541     """Breaks an affine cipher using frequency analysis
 542
 543     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 544           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 545           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 546           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 547           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 548           'clm ckuxj.') # doctest: +ELLIPSIS
 549     ((15, 22, True), 0.23570361818655...)
 550     """
 551     sanitised_message = sanitise(message)
 552     best_multiplier = 0
 553     best_adder = 0
 554     best_one_based = True
 555     best_fit = float("inf")
 556     for one_based in [True, False]:
 557         for multiplier in range(1, 26, 2):
 558             for adder in range(26):
 559                 plaintext = affine_decipher(sanitised_message,
 560                                             multiplier, adder, one_based)
 561                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 562                 fit = metric(target_counts, counts)
 563                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 564                              'gives fit of {3} and decrypt starting: {4}'.
 565                              format(multiplier, adder, one_based, fit,
 566                                     plaintext[:50]))
 567                 if fit < best_fit:
 568                     best_fit = fit
 569                     best_multiplier = multiplier
 570                     best_adder = adder
 571                     best_one_based = one_based
 572     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 573                 'and decrypt starting: {4}'.format(
 574                     best_multiplier, best_adder, best_one_based, best_fit,
 575                     affine_decipher(sanitised_message, best_multiplier,
 576                         best_adder, best_one_based)[:50]))
 577     return (best_multiplier, best_adder, best_one_based), best_fit
 578
 579 def keyword_break(message,
 580                   wordlist=keywords,
 581                   metric=norms.euclidean_distance,
 582                   target_counts=normalised_english_counts,
 583                   message_frequency_scaling=norms.normalise):
 584     """Breaks a keyword substitution cipher using a dictionary and
 585     frequency analysis
 586
 587     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 588           'keyword decipherment', 'elephant', 1), \
 589           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 590     (('elephant', 1), 0.41643991598441...)
 591     """
 592     best_keyword = ''
 593     best_wrap_alphabet = True
 594     best_fit = float("inf")
 595     for wrap_alphabet in range(3):
 596         for keyword in wordlist:
 597             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 598             counts = message_frequency_scaling(letter_frequencies(plaintext))
 599             fit = metric(target_counts, counts)
 600             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 601                          'gives fit of {2} and decrypt starting: {3}'.format(
 602                              keyword, wrap_alphabet, fit,
 603                              sanitise(plaintext)[:50]))
 604             if fit < best_fit:
 605                 best_fit = fit
 606                 best_keyword = keyword
 607                 best_wrap_alphabet = wrap_alphabet
 608     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 609                 '{2} and decrypt starting: {3}'.format(best_keyword,
 610                     best_wrap_alphabet, best_fit, sanitise(
 611                         keyword_decipher(message, best_keyword,
 612                                          best_wrap_alphabet))[:50]))
 613     return (best_keyword, best_wrap_alphabet), best_fit
 614
 615 def keyword_break_mp(message,
 616                      wordlist=keywords,
 617                      metric=norms.euclidean_distance,
 618                      target_counts=normalised_english_counts,
 619                      message_frequency_scaling=norms.normalise,
 620                      chunksize=500):
 621     """Breaks a keyword substitution cipher using a dictionary and
 622     frequency analysis
 623
 624     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 625           'keyword decipherment', 'elephant', 1), \
 626           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 627     (('elephant', 1), 0.41643991598441...)
 628     """
 629     with Pool() as pool:
 630         helper_args = [(message, word, wrap, metric, target_counts,
 631                         message_frequency_scaling)
 632                        for word in wordlist for wrap in range(3)]
 633         # Gotcha: the helper function here needs to be defined at the top level
 634         #   (limitation of Pool.starmap)
 635         breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
 636         return min(breaks, key=lambda k: k[1])
 637
 638 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 639                       message_frequency_scaling):
 640     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 641     counts = message_frequency_scaling(letter_frequencies(plaintext))
 642     fit = metric(target_counts, counts)
 643     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 644                  '{2} and decrypt starting: {3}'.format(keyword,
 645                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 646     return (keyword, wrap_alphabet), fit
 647
 648 def scytale_break(message,
 649                   metric=norms.euclidean_distance,
 650                   target_counts=normalised_english_bigram_counts,
 651                   message_frequency_scaling=norms.normalise):
 652     """Breaks a Scytale cipher
 653
 654     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 655            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 656            'aek') # doctest: +ELLIPSIS
 657     (6, 0.83453041115025...)
 658     """
 659     best_key = 0
 660     best_fit = float("inf")
 661     ngram_length = len(next(iter(target_counts.keys())))
 662     for key in range(1, 20):
 663         if len(message) % key == 0:
 664             plaintext = scytale_decipher(message, key)
 665             counts = message_frequency_scaling(frequencies(
 666                          ngrams(sanitise(plaintext), ngram_length)))
 667             fit = metric(target_counts, counts)
 668             logger.debug('Scytale break attempt using key {0} gives fit of '
 669                          '{1} and decrypt starting: {2}'.format(key,
 670                              fit, sanitise(plaintext)[:50]))
 671             if fit < best_fit:
 672                 best_fit = fit
 673                 best_key = key
 674     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 675                 'decrypt starting: {2}'.format(best_key, best_fit,
 676                     sanitise(scytale_decipher(message, best_key))[:50]))
 677     return best_key, best_fit
 678
 679 def column_transposition_break(message,
 680                   wordlist=keywords,
 681                   metric=norms.euclidean_distance,
 682                   target_counts=normalised_english_bigram_counts,
 683                   message_frequency_scaling=norms.normalise):
 684     """Breaks a column transposition cipher using a dictionary and
 685     n-gram frequency analysis
 686
 687     >>> column_transposition_break(column_transposition_encipher(sanitise( \
 688         "Turing's homosexuality resulted in a criminal prosecution in 1952, \
 689         when homosexual acts were still illegal in the United Kingdom. "), \
 690         'encipher'), \
 691         wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
 692     ('encipher', 0.898128626285...)
 693     >>> column_transposition_break(column_transposition_encipher(sanitise( \
 694         "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
 695         "when homosexual acts were still illegal in the United Kingdom."), \
 696         'encipher'), \
 697         wordlist=['encipher', 'keyword', 'fourteen'], \
 698         target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
 699     ('encipher', 1.1958792913127...)
 700     """
 701     best_keyword = ''
 702     best_fit = float("inf")
 703     ngram_length = len(next(iter(target_counts.keys())))
 704     for keyword in wordlist:
 705         if len(message) % len(deduplicate(keyword)) == 0:
 706             plaintext = column_transposition_decipher(message, keyword)
 707             counts = message_frequency_scaling(frequencies(
 708                          ngrams(sanitise(plaintext), ngram_length)))
 709             fit = metric(target_counts, counts)
 710             logger.debug('Column transposition break attempt using key {0} '
 711                          'gives fit of {1} and decrypt starting: {2}'.format(
 712                              keyword, fit,
 713                              sanitise(plaintext)[:50]))
 714             if fit < best_fit:
 715                 best_fit = fit
 716                 best_keyword = keyword
 717     logger.info('Column transposition break best fit with key {0} gives fit '
 718                 'of {1} and decrypt starting: {2}'.format(best_keyword,
 719                     best_fit, sanitise(
 720                         column_transposition_decipher(message,
 721                             best_keyword))[:50]))
 722     return best_keyword, best_fit
 723
 724
 725 def column_transposition_break_mp(message,
 726                      wordlist=keywords,
 727                      metric=norms.euclidean_distance,
 728                      target_counts=normalised_english_bigram_counts,
 729                      message_frequency_scaling=norms.normalise,
 730                      chunksize=500):
 731     """Breaks a column transposition cipher using a dictionary and
 732     n-gram frequency analysis
 733
 734     >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
 735         "Turing's homosexuality resulted in a criminal prosecution in 1952, \
 736         when homosexual acts were still illegal in the United Kingdom. "), \
 737         'encipher'), \
 738         wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
 739     ('encipher', 0.898128626285...)
 740     >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
 741         "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
 742         "when homosexual acts were still illegal in the United Kingdom."), \
 743         'encipher'), \
 744         wordlist=['encipher', 'keyword', 'fourteen'], \
 745         target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
 746     ('encipher', 1.1958792913127...)
 747     """
 748     ngram_length = len(next(iter(target_counts.keys())))
 749     with Pool() as pool:
 750         helper_args = [(message, word, metric, target_counts, ngram_length,
 751                         message_frequency_scaling)
 752                        for word in wordlist]
 753         # Gotcha: the helper function here needs to be defined at the top level
 754         #   (limitation of Pool.starmap)
 755         breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize)
 756         return min(breaks, key=lambda k: k[1])
 757
 758 def column_transposition_break_worker(message, keyword, metric, target_counts,
 759                       ngram_length, message_frequency_scaling):
 760     plaintext = column_transposition_decipher(message, keyword)
 761     counts = message_frequency_scaling(frequencies(
 762                          ngrams(sanitise(plaintext), ngram_length)))
 763     fit = metric(target_counts, counts)
 764     logger.debug('Column transposition break attempt using key {0} '
 765                          'gives fit of {1} and decrypt starting: {2}'.format(
 766                              keyword, fit,
 767                              sanitise(plaintext)[:50]))
 768     return keyword, fit
 769
 770
 771
 772 if __name__ == "__main__":
 773     import doctest
 774     doctest.testmod()