cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 with open('words.txt', 'r') as f:
  38     keywords = [line.rstrip() for line in f]
  39
  40 modular_division_table = [[0]*26 for x in range(26)]
  41 for a in range(26):
  42     for b in range(26):
  43         c = (a * b) % 26
  44         modular_division_table[b][c] = a
  45
  46
  47 def sanitise(text):
  48     """Remove all non-alphabetic characters and convert the text to lowercase
  49
  50     >>> sanitise('The Quick')
  51     'thequick'
  52     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  53     'thequickbrownfoxjumpedoverthelazydog'
  54     """
  55     sanitised = [c.lower() for c in text if c in string.ascii_letters]
  56     return ''.join(sanitised)
  57
  58 def ngrams(text, n):
  59     """Returns all n-grams of a text
  60
  61     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  62     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  63      'nf', 'fo', 'ox']
  64     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  65     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  66      'rown', 'ownf', 'wnfo', 'nfox']
  67     """
  68     return [text[i:i+n] for i in range(len(text)-n+1)]
  69
  70 def every_nth(text, n):
  71     """Returns n strings, each of which consists of every nth character,
  72     starting with the 0th, 1st, 2nd, ... (n-1)th character
  73
  74     >>> every_nth(string.ascii_lowercase, 5)
  75     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  76     >>> every_nth(string.ascii_lowercase, 1)
  77     ['abcdefghijklmnopqrstuvwxyz']
  78     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  79     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  80      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  81     """
  82     split_text = [text[i:i+n] for i in range(0, len(text), n)]
  83     return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
  84
  85 def combine_every_nth(split_text):
  86     """Reforms a text split into every_nth strings
  87
  88     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
  89     'abcdefghijklmnopqrstuvwxyz'
  90     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
  91     'abcdefghijklmnopqrstuvwxyz'
  92     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
  93     'abcdefghijklmnopqrstuvwxyz'
  94     """
  95     return ''.join([''.join(l)
  96                     for l in zip_longest(*split_text, fillvalue='')])
  97
  98
  99 def frequencies(text):
 100     """Count the number of occurrences of each character in text
 101
 102     >>> sorted(frequencies('abcdefabc').items())
 103     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 104     >>> sorted(frequencies('the quick brown fox jumped over the lazy dog').items()) # doctest: +NORMALIZE_WHITESPACE
 105     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 106      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 107      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 108      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 109     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 110     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 111      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 112      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 113      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 114      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 115     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 116     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 117      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 118      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 119      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 120     """
 121     counts = collections.defaultdict(int)
 122     for c in text:
 123         counts[c] += 1
 124     return counts
 125 letter_frequencies = frequencies
 126
 127 def deduplicate(text):
 128     return list(collections.OrderedDict.fromkeys(text))
 129
 130
 131
 132 def caesar_encipher_letter(letter, shift):
 133     """Encipher a letter, given a shift amount
 134
 135     >>> caesar_encipher_letter('a', 1)
 136     'b'
 137     >>> caesar_encipher_letter('a', 2)
 138     'c'
 139     >>> caesar_encipher_letter('b', 2)
 140     'd'
 141     >>> caesar_encipher_letter('x', 2)
 142     'z'
 143     >>> caesar_encipher_letter('y', 2)
 144     'a'
 145     >>> caesar_encipher_letter('z', 2)
 146     'b'
 147     >>> caesar_encipher_letter('z', -1)
 148     'y'
 149     >>> caesar_encipher_letter('a', -1)
 150     'z'
 151     """
 152     if letter in string.ascii_letters:
 153         if letter in string.ascii_uppercase:
 154             alphabet_start = ord('A')
 155         else:
 156             alphabet_start = ord('a')
 157         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 158                    alphabet_start)
 159     else:
 160         return letter
 161
 162 def caesar_decipher_letter(letter, shift):
 163     """Decipher a letter, given a shift amount
 164
 165     >>> caesar_decipher_letter('b', 1)
 166     'a'
 167     >>> caesar_decipher_letter('b', 2)
 168     'z'
 169     """
 170     return caesar_encipher_letter(letter, -shift)
 171
 172 def caesar_encipher(message, shift):
 173     """Encipher a message with the Caesar cipher of given shift
 174
 175     >>> caesar_encipher('abc', 1)
 176     'bcd'
 177     >>> caesar_encipher('abc', 2)
 178     'cde'
 179     >>> caesar_encipher('abcxyz', 2)
 180     'cdezab'
 181     >>> caesar_encipher('ab cx yz', 2)
 182     'cd ez ab'
 183     """
 184     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 185     return ''.join(enciphered)
 186
 187 def caesar_decipher(message, shift):
 188     """Encipher a message with the Caesar cipher of given shift
 189
 190     >>> caesar_decipher('bcd', 1)
 191     'abc'
 192     >>> caesar_decipher('cde', 2)
 193     'abc'
 194     >>> caesar_decipher('cd ez ab', 2)
 195     'ab cx yz'
 196     """
 197     return caesar_encipher(message, -shift)
 198
 199 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 200     """Encipher a letter, given a multiplier and adder
 201
 202     >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
 203     'HKNQTWZCFILORUXADGJMPSVYBE'
 204     >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
 205     'FILORUXADGJMPSVYBEHKNQTWZC'
 206     """
 207     if letter in string.ascii_letters:
 208         if letter in string.ascii_uppercase:
 209             alphabet_start = ord('A')
 210         else:
 211             alphabet_start = ord('a')
 212         letter_number = ord(letter) - alphabet_start
 213         if one_based: letter_number += 1
 214         cipher_number = (letter_number * multiplier + adder) % 26
 215         if one_based: cipher_number -= 1
 216         return chr(cipher_number % 26 + alphabet_start)
 217     else:
 218         return letter
 219
 220 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 221     """Encipher a letter, given a multiplier and adder
 222
 223     >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 224     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 225     >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 226     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 227     """
 228     if letter in string.ascii_letters:
 229         if letter in string.ascii_uppercase:
 230             alphabet_start = ord('A')
 231         else:
 232             alphabet_start = ord('a')
 233         cipher_number = ord(letter) - alphabet_start
 234         if one_based: cipher_number += 1
 235         plaintext_number = ( modular_division_table[multiplier]
 236                                                    [(cipher_number - adder) % 26] )
 237         if one_based: plaintext_number -= 1
 238         return chr(plaintext_number % 26 + alphabet_start)
 239     else:
 240         return letter
 241
 242 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 243     """Encipher a message
 244
 245     >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
 246     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 247     """
 248     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 249                   for l in message]
 250     return ''.join(enciphered)
 251
 252 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 253     """Decipher a message
 254
 255     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
 256     'hours passed during which jerico tried every trick he could think of'
 257     """
 258     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 259                   for l in message]
 260     return ''.join(enciphered)
 261
 262
 263 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 264     """Find the cipher alphabet given a keyword.
 265     wrap_alphabet controls how the rest of the alphabet is added
 266     after the keyword.
 267     0 : from 'a'
 268     1 : from the last letter in the sanitised keyword
 269     2 : from the largest letter in the sanitised keyword
 270
 271     >>> keyword_cipher_alphabet_of('bayes')
 272     'bayescdfghijklmnopqrtuvwxz'
 273     >>> keyword_cipher_alphabet_of('bayes', 0)
 274     'bayescdfghijklmnopqrtuvwxz'
 275     >>> keyword_cipher_alphabet_of('bayes', 1)
 276     'bayestuvwxzcdfghijklmnopqr'
 277     >>> keyword_cipher_alphabet_of('bayes', 2)
 278     'bayeszcdfghijklmnopqrtuvwx'
 279     """
 280     if wrap_alphabet == 0:
 281         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 282                                               string.ascii_lowercase))
 283     else:
 284         if wrap_alphabet == 1:
 285             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 286         else:
 287             last_keyword_letter = sorted(sanitise(keyword))[-1]
 288         last_keyword_position = string.ascii_lowercase.find(
 289             last_keyword_letter) + 1
 290         cipher_alphabet = ''.join(
 291             deduplicate(sanitise(keyword) +
 292                         string.ascii_lowercase[last_keyword_position:] +
 293                         string.ascii_lowercase))
 294     return cipher_alphabet
 295
 296
 297 def keyword_encipher(message, keyword, wrap_alphabet=0):
 298     """Enciphers a message with a keyword substitution cipher.
 299     wrap_alphabet controls how the rest of the alphabet is added
 300     after the keyword.
 301     0 : from 'a'
 302     1 : from the last letter in the sanitised keyword
 303     2 : from the largest letter in the sanitised keyword
 304
 305     >>> keyword_encipher('test message', 'bayes')
 306     'rsqr ksqqbds'
 307     >>> keyword_encipher('test message', 'bayes', 0)
 308     'rsqr ksqqbds'
 309     >>> keyword_encipher('test message', 'bayes', 1)
 310     'lskl dskkbus'
 311     >>> keyword_encipher('test message', 'bayes', 2)
 312     'qspq jsppbcs'
 313     """
 314     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 315     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 316     return message.lower().translate(cipher_translation)
 317
 318 def keyword_decipher(message, keyword, wrap_alphabet=0):
 319     """Deciphers a message with a keyword substitution cipher.
 320     wrap_alphabet controls how the rest of the alphabet is added
 321     after the keyword.
 322     0 : from 'a'
 323     1 : from the last letter in the sanitised keyword
 324     2 : from the largest letter in the sanitised keyword
 325
 326     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 327     'test message'
 328     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 329     'test message'
 330     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 331     'test message'
 332     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 333     'test message'
 334     """
 335     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 336     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 337     return message.lower().translate(cipher_translation)
 338
 339 def scytale_encipher(message, rows):
 340     """Enciphers using the scytale transposition cipher.
 341     Message is padded with spaces to allow all rows to be the same length.
 342
 343     >>> scytale_encipher('thequickbrownfox', 3)
 344     'tcnhkfeboqrxuo iw '
 345     >>> scytale_encipher('thequickbrownfox', 4)
 346     'tubnhirfecooqkwx'
 347     >>> scytale_encipher('thequickbrownfox', 5)
 348     'tubn hirf ecoo qkwx '
 349     >>> scytale_encipher('thequickbrownfox', 6)
 350     'tqcrnxhukof eibwo '
 351     >>> scytale_encipher('thequickbrownfox', 7)
 352     'tqcrnx hukof  eibwo  '
 353     """
 354     if len(message) % rows != 0:
 355         message += ' '*(rows - len(message) % rows)
 356     row_length = round(len(message) / rows)
 357     slices = [message[i:i+row_length]
 358               for i in range(0, len(message), row_length)]
 359     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 360
 361 def scytale_decipher(message, rows):
 362     """Deciphers using the scytale transposition cipher.
 363     Assumes the message is padded so that all rows are the same length.
 364
 365     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 366     'thequickbrownfox  '
 367     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 368     'thequickbrownfox'
 369     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 370     'thequickbrownfox    '
 371     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 372     'thequickbrownfox  '
 373     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 374     'thequickbrownfox     '
 375     """
 376     cols = round(len(message) / rows)
 377     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 378     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 379
 380
 381 def caesar_break(message,
 382                  metric=norms.euclidean_distance,
 383                  target_counts=normalised_english_counts,
 384                  message_frequency_scaling=norms.normalise):
 385     """Breaks a Caesar cipher using frequency analysis
 386
 387     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 388     (4, 0.31863952890183...)
 389     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 390     (19, 0.42152901235832...)
 391     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 392     (13, 0.316029208075451...)
 393     """
 394     sanitised_message = sanitise(message)
 395     best_shift = 0
 396     best_fit = float("inf")
 397     for shift in range(26):
 398         plaintext = caesar_decipher(sanitised_message, shift)
 399         counts = message_frequency_scaling(letter_frequencies(plaintext))
 400         fit = metric(target_counts, counts)
 401         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 402                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 403         if fit < best_fit:
 404             best_fit = fit
 405             best_shift = shift
 406     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 407                 'decrypt starting: {2}'.format(best_shift, best_fit,
 408                     caesar_decipher(sanitised_message, best_shift)[:50]))
 409     return best_shift, best_fit
 410
 411 def affine_break(message,
 412                  metric=norms.euclidean_distance,
 413                  target_counts=normalised_english_counts,
 414                  message_frequency_scaling=norms.normalise):
 415     """Breaks an affine cipher using frequency analysis
 416
 417     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
 418     ((15, 22, True), 0.23570361818655...)
 419     """
 420     sanitised_message = sanitise(message)
 421     best_multiplier = 0
 422     best_adder = 0
 423     best_one_based = True
 424     best_fit = float("inf")
 425     for one_based in [True, False]:
 426         for multiplier in range(1, 26, 2):
 427             for adder in range(26):
 428                 plaintext = affine_decipher(sanitised_message,
 429                                             multiplier, adder, one_based)
 430                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 431                 fit = metric(target_counts, counts)
 432                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 433                              'gives fit of {3} and decrypt starting: {4}'.
 434                              format(multiplier, adder, one_based, fit,
 435                                     plaintext[:50]))
 436                 if fit < best_fit:
 437                     best_fit = fit
 438                     best_multiplier = multiplier
 439                     best_adder = adder
 440                     best_one_based = one_based
 441     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 442                 'and decrypt starting: {4}'.format(
 443                     best_multiplier, best_adder, best_one_based, best_fit,
 444                     affine_decipher(sanitised_message, best_multiplier,
 445                         best_adder, best_one_based)[:50]))
 446     return (best_multiplier, best_adder, best_one_based), best_fit
 447
 448 def keyword_break(message,
 449                   wordlist=keywords,
 450                   metric=norms.euclidean_distance,
 451                   target_counts=normalised_english_counts,
 452                   message_frequency_scaling=norms.normalise):
 453     """Breaks a keyword substitution cipher using a dictionary and
 454     frequency analysis
 455
 456     >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 457     (('elephant', 1), 0.41643991598441...)
 458     """
 459     best_keyword = ''
 460     best_wrap_alphabet = True
 461     best_fit = float("inf")
 462     for wrap_alphabet in range(3):
 463         for keyword in wordlist:
 464             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 465             counts = message_frequency_scaling(letter_frequencies(plaintext))
 466             fit = metric(target_counts, counts)
 467             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 468                          'gives fit of {2} and decrypt starting: {3}'.format(
 469                              keyword, wrap_alphabet, fit,
 470                              sanitise(plaintext)[:50]))
 471             if fit < best_fit:
 472                 best_fit = fit
 473                 best_keyword = keyword
 474                 best_wrap_alphabet = wrap_alphabet
 475     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 476                 '{2} and decrypt starting: {3}'.format(best_keyword,
 477                     best_wrap_alphabet, best_fit, sanitise(
 478                         keyword_decipher(message, best_keyword,
 479                                          best_wrap_alphabet))[:50]))
 480     return (best_keyword, best_wrap_alphabet), best_fit
 481
 482 def keyword_break_mp(message,
 483                      wordlist=keywords,
 484                      metric=norms.euclidean_distance,
 485                      target_counts=normalised_english_counts,
 486                      message_frequency_scaling=norms.normalise,
 487                      chunksize=500):
 488     """Breaks a keyword substitution cipher using a dictionary and
 489     frequency analysis
 490
 491     >>> keyword_break_mp(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 492     (('elephant', 1), 0.41643991598441...)
 493     """
 494     with Pool() as pool:
 495         helper_args = [(message, word, wrap, metric, target_counts,
 496                         message_frequency_scaling)
 497                        for word in wordlist for wrap in range(3)]
 498         breaks = pool.starmap(keyword_break_one, helper_args, chunksize) # Gotcha: the helper function here needs to be defined at the top level (limitation of Pool.starmap)
 499         return min(breaks, key=lambda k: k[1])
 500
 501 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 502                       message_frequency_scaling):
 503     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 504     counts = message_frequency_scaling(letter_frequencies(plaintext))
 505     fit = metric(target_counts, counts)
 506     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 507                  '{2} and decrypt starting: {3}'.format(keyword,
 508                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 509     return (keyword, wrap_alphabet), fit
 510
 511 def scytale_break(message,
 512                   metric=norms.euclidean_distance,
 513                   target_counts=normalised_english_bigram_counts,
 514                   message_frequency_scaling=norms.normalise):
 515     """Breaks a Scytale cipher
 516
 517     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoeredcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriiotoaek') # doctest: +ELLIPSIS
 518     (6, 0.83453041115025...)
 519     """
 520     best_key = 0
 521     best_fit = float("inf")
 522     for key in range(1, 20):
 523         if len(message) % key == 0:
 524             plaintext = scytale_decipher(message, key)
 525             counts = message_frequency_scaling(frequencies(
 526                          ngrams(sanitise(plaintext), 2)))
 527             fit = metric(target_counts, counts)
 528             logger.debug('Scytale break attempt using key {0} gives fit of '
 529                          '{1} and decrypt starting: {2}'.format(key,
 530                              fit, sanitise(plaintext)[:50]))
 531             if fit < best_fit:
 532                 best_fit = fit
 533                 best_key = key
 534     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 535                 'decrypt starting: {2}'.format(best_key, best_fit,
 536                     sanitise(scytale_decipher(message, best_key))[:50]))
 537     return best_key, best_fit
 538
 539
 540 if __name__ == "__main__":
 541     import doctest
 542     doctest.testmod()