cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 with open('words.txt', 'r') as f:
  38     keywords = [line.rstrip() for line in f]
  39
  40 modular_division_table = [[0]*26 for x in range(26)]
  41 for a in range(26):
  42     for b in range(26):
  43         c = (a * b) % 26
  44         modular_division_table[b][c] = a
  45
  46
  47 def sanitise(text):
  48     """Remove all non-alphabetic characters and convert the text to lowercase
  49
  50     >>> sanitise('The Quick')
  51     'thequick'
  52     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  53     'thequickbrownfoxjumpedoverthelazydog'
  54     """
  55     sanitised = [c.lower() for c in text if c in string.ascii_letters]
  56     return ''.join(sanitised)
  57
  58 def ngrams(text, n):
  59     """Returns all n-grams of a text
  60
  61     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  62     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  63      'nf', 'fo', 'ox']
  64     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  65     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  66      'rown', 'ownf', 'wnfo', 'nfox']
  67     """
  68     return [text[i:i+n] for i in range(len(text)-n+1)]
  69
  70 def every_nth(text, n):
  71     """Returns n strings, each of which consists of every nth character,
  72     starting with the 0th, 1st, 2nd, ... (n-1)th character
  73
  74     >>> every_nth(string.ascii_lowercase, 5)
  75     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  76     >>> every_nth(string.ascii_lowercase, 1)
  77     ['abcdefghijklmnopqrstuvwxyz']
  78     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  79     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  80      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  81     """
  82     split_text = [text[i:i+n] for i in range(0, len(text), n)]
  83     return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
  84
  85 def combine_every_nth(split_text):
  86     """Reforms a text split into every_nth strings
  87
  88     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
  89     'abcdefghijklmnopqrstuvwxyz'
  90     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
  91     'abcdefghijklmnopqrstuvwxyz'
  92     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
  93     'abcdefghijklmnopqrstuvwxyz'
  94     """
  95     return ''.join([''.join(l)
  96                     for l in zip_longest(*split_text, fillvalue='')])
  97
  98
  99 def frequencies(text):
 100     """Count the number of occurrences of each character in text
 101
 102     >>> sorted(frequencies('abcdefabc').items())
 103     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 104     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 105          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 106     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 107      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 108      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 109      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 110     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 111          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 112     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 113      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 114      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 115      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 116      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 117     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 118          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 119     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 120      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 121      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 122      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 123     """
 124     counts = collections.defaultdict(int)
 125     for c in text:
 126         counts[c] += 1
 127     return counts
 128 letter_frequencies = frequencies
 129
 130 def deduplicate(text):
 131     return list(collections.OrderedDict.fromkeys(text))
 132
 133
 134
 135 def caesar_encipher_letter(letter, shift):
 136     """Encipher a letter, given a shift amount
 137
 138     >>> caesar_encipher_letter('a', 1)
 139     'b'
 140     >>> caesar_encipher_letter('a', 2)
 141     'c'
 142     >>> caesar_encipher_letter('b', 2)
 143     'd'
 144     >>> caesar_encipher_letter('x', 2)
 145     'z'
 146     >>> caesar_encipher_letter('y', 2)
 147     'a'
 148     >>> caesar_encipher_letter('z', 2)
 149     'b'
 150     >>> caesar_encipher_letter('z', -1)
 151     'y'
 152     >>> caesar_encipher_letter('a', -1)
 153     'z'
 154     """
 155     if letter in string.ascii_letters:
 156         if letter in string.ascii_uppercase:
 157             alphabet_start = ord('A')
 158         else:
 159             alphabet_start = ord('a')
 160         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 161                    alphabet_start)
 162     else:
 163         return letter
 164
 165 def caesar_decipher_letter(letter, shift):
 166     """Decipher a letter, given a shift amount
 167
 168     >>> caesar_decipher_letter('b', 1)
 169     'a'
 170     >>> caesar_decipher_letter('b', 2)
 171     'z'
 172     """
 173     return caesar_encipher_letter(letter, -shift)
 174
 175 def caesar_encipher(message, shift):
 176     """Encipher a message with the Caesar cipher of given shift
 177
 178     >>> caesar_encipher('abc', 1)
 179     'bcd'
 180     >>> caesar_encipher('abc', 2)
 181     'cde'
 182     >>> caesar_encipher('abcxyz', 2)
 183     'cdezab'
 184     >>> caesar_encipher('ab cx yz', 2)
 185     'cd ez ab'
 186     """
 187     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 188     return ''.join(enciphered)
 189
 190 def caesar_decipher(message, shift):
 191     """Encipher a message with the Caesar cipher of given shift
 192
 193     >>> caesar_decipher('bcd', 1)
 194     'abc'
 195     >>> caesar_decipher('cde', 2)
 196     'abc'
 197     >>> caesar_decipher('cd ez ab', 2)
 198     'ab cx yz'
 199     """
 200     return caesar_encipher(message, -shift)
 201
 202 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 203     """Encipher a letter, given a multiplier and adder
 204
 205     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 206             for l in string.ascii_uppercase])
 207     'HKNQTWZCFILORUXADGJMPSVYBE'
 208     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 209             for l in string.ascii_uppercase])
 210     'FILORUXADGJMPSVYBEHKNQTWZC'
 211     """
 212     if letter in string.ascii_letters:
 213         if letter in string.ascii_uppercase:
 214             alphabet_start = ord('A')
 215         else:
 216             alphabet_start = ord('a')
 217         letter_number = ord(letter) - alphabet_start
 218         if one_based: letter_number += 1
 219         cipher_number = (letter_number * multiplier + adder) % 26
 220         if one_based: cipher_number -= 1
 221         return chr(cipher_number % 26 + alphabet_start)
 222     else:
 223         return letter
 224
 225 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 226     """Encipher a letter, given a multiplier and adder
 227
 228     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 229             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 230     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 231     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 232             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 233     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 234     """
 235     if letter in string.ascii_letters:
 236         if letter in string.ascii_uppercase:
 237             alphabet_start = ord('A')
 238         else:
 239             alphabet_start = ord('a')
 240         cipher_number = ord(letter) - alphabet_start
 241         if one_based: cipher_number += 1
 242         plaintext_number = ( modular_division_table[multiplier]
 243                                                    [(cipher_number - adder) % 26] )
 244         if one_based: plaintext_number -= 1
 245         return chr(plaintext_number % 26 + alphabet_start)
 246     else:
 247         return letter
 248
 249 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 250     """Encipher a message
 251
 252     >>> affine_encipher('hours passed during which jerico tried every ' \
 253            'trick he could think of', 15, 22, True)
 254     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 255     """
 256     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 257                   for l in message]
 258     return ''.join(enciphered)
 259
 260 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 261     """Decipher a message
 262
 263     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 264            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 265     'hours passed during which jerico tried every trick he could think of'
 266     """
 267     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 268                   for l in message]
 269     return ''.join(enciphered)
 270
 271
 272 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 273     """Find the cipher alphabet given a keyword.
 274     wrap_alphabet controls how the rest of the alphabet is added
 275     after the keyword.
 276     0 : from 'a'
 277     1 : from the last letter in the sanitised keyword
 278     2 : from the largest letter in the sanitised keyword
 279
 280     >>> keyword_cipher_alphabet_of('bayes')
 281     'bayescdfghijklmnopqrtuvwxz'
 282     >>> keyword_cipher_alphabet_of('bayes', 0)
 283     'bayescdfghijklmnopqrtuvwxz'
 284     >>> keyword_cipher_alphabet_of('bayes', 1)
 285     'bayestuvwxzcdfghijklmnopqr'
 286     >>> keyword_cipher_alphabet_of('bayes', 2)
 287     'bayeszcdfghijklmnopqrtuvwx'
 288     """
 289     if wrap_alphabet == 0:
 290         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 291                                               string.ascii_lowercase))
 292     else:
 293         if wrap_alphabet == 1:
 294             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 295         else:
 296             last_keyword_letter = sorted(sanitise(keyword))[-1]
 297         last_keyword_position = string.ascii_lowercase.find(
 298             last_keyword_letter) + 1
 299         cipher_alphabet = ''.join(
 300             deduplicate(sanitise(keyword) +
 301                         string.ascii_lowercase[last_keyword_position:] +
 302                         string.ascii_lowercase))
 303     return cipher_alphabet
 304
 305
 306 def keyword_encipher(message, keyword, wrap_alphabet=0):
 307     """Enciphers a message with a keyword substitution cipher.
 308     wrap_alphabet controls how the rest of the alphabet is added
 309     after the keyword.
 310     0 : from 'a'
 311     1 : from the last letter in the sanitised keyword
 312     2 : from the largest letter in the sanitised keyword
 313
 314     >>> keyword_encipher('test message', 'bayes')
 315     'rsqr ksqqbds'
 316     >>> keyword_encipher('test message', 'bayes', 0)
 317     'rsqr ksqqbds'
 318     >>> keyword_encipher('test message', 'bayes', 1)
 319     'lskl dskkbus'
 320     >>> keyword_encipher('test message', 'bayes', 2)
 321     'qspq jsppbcs'
 322     """
 323     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 324     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 325     return message.lower().translate(cipher_translation)
 326
 327 def keyword_decipher(message, keyword, wrap_alphabet=0):
 328     """Deciphers a message with a keyword substitution cipher.
 329     wrap_alphabet controls how the rest of the alphabet is added
 330     after the keyword.
 331     0 : from 'a'
 332     1 : from the last letter in the sanitised keyword
 333     2 : from the largest letter in the sanitised keyword
 334
 335     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 336     'test message'
 337     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 338     'test message'
 339     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 340     'test message'
 341     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 342     'test message'
 343     """
 344     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 345     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 346     return message.lower().translate(cipher_translation)
 347
 348 def scytale_encipher(message, rows):
 349     """Enciphers using the scytale transposition cipher.
 350     Message is padded with spaces to allow all rows to be the same length.
 351
 352     >>> scytale_encipher('thequickbrownfox', 3)
 353     'tcnhkfeboqrxuo iw '
 354     >>> scytale_encipher('thequickbrownfox', 4)
 355     'tubnhirfecooqkwx'
 356     >>> scytale_encipher('thequickbrownfox', 5)
 357     'tubn hirf ecoo qkwx '
 358     >>> scytale_encipher('thequickbrownfox', 6)
 359     'tqcrnxhukof eibwo '
 360     >>> scytale_encipher('thequickbrownfox', 7)
 361     'tqcrnx hukof  eibwo  '
 362     """
 363     if len(message) % rows != 0:
 364         message += ' '*(rows - len(message) % rows)
 365     row_length = round(len(message) / rows)
 366     slices = [message[i:i+row_length]
 367               for i in range(0, len(message), row_length)]
 368     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 369
 370 def scytale_decipher(message, rows):
 371     """Deciphers using the scytale transposition cipher.
 372     Assumes the message is padded so that all rows are the same length.
 373
 374     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 375     'thequickbrownfox  '
 376     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 377     'thequickbrownfox'
 378     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 379     'thequickbrownfox    '
 380     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 381     'thequickbrownfox  '
 382     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 383     'thequickbrownfox     '
 384     """
 385     cols = round(len(message) / rows)
 386     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 387     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 388
 389
 390 def caesar_break(message,
 391                  metric=norms.euclidean_distance,
 392                  target_counts=normalised_english_counts,
 393                  message_frequency_scaling=norms.normalise):
 394     """Breaks a Caesar cipher using frequency analysis
 395
 396     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 397           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 398     (4, 0.31863952890183...)
 399     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 400           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 401     (19, 0.42152901235832...)
 402     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 403           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 404     (13, 0.316029208075451...)
 405     """
 406     sanitised_message = sanitise(message)
 407     best_shift = 0
 408     best_fit = float("inf")
 409     for shift in range(26):
 410         plaintext = caesar_decipher(sanitised_message, shift)
 411         counts = message_frequency_scaling(letter_frequencies(plaintext))
 412         fit = metric(target_counts, counts)
 413         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 414                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 415         if fit < best_fit:
 416             best_fit = fit
 417             best_shift = shift
 418     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 419                 'decrypt starting: {2}'.format(best_shift, best_fit,
 420                     caesar_decipher(sanitised_message, best_shift)[:50]))
 421     return best_shift, best_fit
 422
 423 def affine_break(message,
 424                  metric=norms.euclidean_distance,
 425                  target_counts=normalised_english_counts,
 426                  message_frequency_scaling=norms.normalise):
 427     """Breaks an affine cipher using frequency analysis
 428
 429     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 430           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 431           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 432           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 433           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 434           'clm ckuxj.') # doctest: +ELLIPSIS
 435     ((15, 22, True), 0.23570361818655...)
 436     """
 437     sanitised_message = sanitise(message)
 438     best_multiplier = 0
 439     best_adder = 0
 440     best_one_based = True
 441     best_fit = float("inf")
 442     for one_based in [True, False]:
 443         for multiplier in range(1, 26, 2):
 444             for adder in range(26):
 445                 plaintext = affine_decipher(sanitised_message,
 446                                             multiplier, adder, one_based)
 447                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 448                 fit = metric(target_counts, counts)
 449                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 450                              'gives fit of {3} and decrypt starting: {4}'.
 451                              format(multiplier, adder, one_based, fit,
 452                                     plaintext[:50]))
 453                 if fit < best_fit:
 454                     best_fit = fit
 455                     best_multiplier = multiplier
 456                     best_adder = adder
 457                     best_one_based = one_based
 458     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 459                 'and decrypt starting: {4}'.format(
 460                     best_multiplier, best_adder, best_one_based, best_fit,
 461                     affine_decipher(sanitised_message, best_multiplier,
 462                         best_adder, best_one_based)[:50]))
 463     return (best_multiplier, best_adder, best_one_based), best_fit
 464
 465 def keyword_break(message,
 466                   wordlist=keywords,
 467                   metric=norms.euclidean_distance,
 468                   target_counts=normalised_english_counts,
 469                   message_frequency_scaling=norms.normalise):
 470     """Breaks a keyword substitution cipher using a dictionary and
 471     frequency analysis
 472
 473     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 474           'keyword decipherment', 'elephant', 1), \
 475           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 476     (('elephant', 1), 0.41643991598441...)
 477     """
 478     best_keyword = ''
 479     best_wrap_alphabet = True
 480     best_fit = float("inf")
 481     for wrap_alphabet in range(3):
 482         for keyword in wordlist:
 483             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 484             counts = message_frequency_scaling(letter_frequencies(plaintext))
 485             fit = metric(target_counts, counts)
 486             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 487                          'gives fit of {2} and decrypt starting: {3}'.format(
 488                              keyword, wrap_alphabet, fit,
 489                              sanitise(plaintext)[:50]))
 490             if fit < best_fit:
 491                 best_fit = fit
 492                 best_keyword = keyword
 493                 best_wrap_alphabet = wrap_alphabet
 494     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 495                 '{2} and decrypt starting: {3}'.format(best_keyword,
 496                     best_wrap_alphabet, best_fit, sanitise(
 497                         keyword_decipher(message, best_keyword,
 498                                          best_wrap_alphabet))[:50]))
 499     return (best_keyword, best_wrap_alphabet), best_fit
 500
 501 def keyword_break_mp(message,
 502                      wordlist=keywords,
 503                      metric=norms.euclidean_distance,
 504                      target_counts=normalised_english_counts,
 505                      message_frequency_scaling=norms.normalise,
 506                      chunksize=500):
 507     """Breaks a keyword substitution cipher using a dictionary and
 508     frequency analysis
 509
 510     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 511           'keyword decipherment', 'elephant', 1), \
 512           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 513     (('elephant', 1), 0.41643991598441...)
 514     """
 515     with Pool() as pool:
 516         helper_args = [(message, word, wrap, metric, target_counts,
 517                         message_frequency_scaling)
 518                        for word in wordlist for wrap in range(3)]
 519         breaks = pool.starmap(keyword_break_one, helper_args, chunksize) # Gotcha: the helper function here needs to be defined at the top level (limitation of Pool.starmap)
 520         return min(breaks, key=lambda k: k[1])
 521
 522 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 523                       message_frequency_scaling):
 524     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 525     counts = message_frequency_scaling(letter_frequencies(plaintext))
 526     fit = metric(target_counts, counts)
 527     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 528                  '{2} and decrypt starting: {3}'.format(keyword,
 529                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 530     return (keyword, wrap_alphabet), fit
 531
 532 def scytale_break(message,
 533                   metric=norms.euclidean_distance,
 534                   target_counts=normalised_english_bigram_counts,
 535                   message_frequency_scaling=norms.normalise):
 536     """Breaks a Scytale cipher
 537
 538     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 539            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 540            'aek') # doctest: +ELLIPSIS
 541     (6, 0.83453041115025...)
 542     """
 543     best_key = 0
 544     best_fit = float("inf")
 545     for key in range(1, 20):
 546         if len(message) % key == 0:
 547             plaintext = scytale_decipher(message, key)
 548             counts = message_frequency_scaling(frequencies(
 549                          ngrams(sanitise(plaintext), 2)))
 550             fit = metric(target_counts, counts)
 551             logger.debug('Scytale break attempt using key {0} gives fit of '
 552                          '{1} and decrypt starting: {2}'.format(key,
 553                              fit, sanitise(plaintext)[:50]))
 554             if fit < best_fit:
 555                 best_fit = fit
 556                 best_key = key
 557     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 558                 'decrypt starting: {2}'.format(best_key, best_fit,
 559                     sanitise(scytale_decipher(message, best_key))[:50]))
 560     return best_key, best_fit
 561
 562
 563 if __name__ == "__main__":
 564     import doctest
 565     doctest.testmod()