cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 with open('words.txt', 'r') as f:
  38     keywords = [line.rstrip() for line in f]
  39
  40 modular_division_table = [[0]*26 for x in range(26)]
  41 for a in range(26):
  42     for b in range(26):
  43         c = (a * b) % 26
  44         modular_division_table[b][c] = a
  45
  46
  47 def sanitise(text):
  48     """Remove all non-alphabetic characters and convert the text to lowercase
  49
  50     >>> sanitise('The Quick')
  51     'thequick'
  52     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  53     'thequickbrownfoxjumpedoverthelazydog'
  54     """
  55     sanitised = [c.lower() for c in text if c in string.ascii_letters]
  56     return ''.join(sanitised)
  57
  58 def ngrams(text, n):
  59     """Returns all n-grams of a text
  60
  61     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  62     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  63      'nf', 'fo', 'ox']
  64     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  65     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  66      'rown', 'ownf', 'wnfo', 'nfox']
  67     """
  68     return [text[i:i+n] for i in range(len(text)-n+1)]
  69
  70 def every_nth(text, n):
  71     """Returns n strings, each of which consists of every nth character,
  72     starting with the 0th, 1st, 2nd, ... (n-1)th character
  73
  74     >>> every_nth(string.ascii_lowercase, 5)
  75     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  76     >>> every_nth(string.ascii_lowercase, 1)
  77     ['abcdefghijklmnopqrstuvwxyz']
  78     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  79     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  80      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  81     """
  82     split_text = [text[i:i+n] for i in range(0, len(text), n)]
  83     return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
  84
  85 def combine_every_nth(split_text):
  86     """Reforms a text split into every_nth strings
  87
  88     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
  89     'abcdefghijklmnopqrstuvwxyz'
  90     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
  91     'abcdefghijklmnopqrstuvwxyz'
  92     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
  93     'abcdefghijklmnopqrstuvwxyz'
  94     """
  95     return ''.join([''.join(l)
  96                     for l in zip_longest(*split_text, fillvalue='')])
  97
  98
  99 def frequencies(text):
 100     """Count the number of occurrences of each character in text
 101
 102     >>> sorted(frequencies('abcdefabc').items())
 103     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 104     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 105          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 106     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 107      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 108      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 109      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 110     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 111          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 112     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 113      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 114      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 115      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 116      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 117     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 118          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 119     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 120      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 121      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 122      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 123     >>> frequencies('abcdefabcdef')['x']
 124     0
 125     """
 126     #counts = collections.defaultdict(int)
 127     #for c in text:
 128     #    counts[c] += 1
 129     #return counts
 130     return collections.Counter(c for c in text)
 131 letter_frequencies = frequencies
 132
 133 def deduplicate(text):
 134     return list(collections.OrderedDict.fromkeys(text))
 135
 136
 137
 138 def caesar_encipher_letter(letter, shift):
 139     """Encipher a letter, given a shift amount
 140
 141     >>> caesar_encipher_letter('a', 1)
 142     'b'
 143     >>> caesar_encipher_letter('a', 2)
 144     'c'
 145     >>> caesar_encipher_letter('b', 2)
 146     'd'
 147     >>> caesar_encipher_letter('x', 2)
 148     'z'
 149     >>> caesar_encipher_letter('y', 2)
 150     'a'
 151     >>> caesar_encipher_letter('z', 2)
 152     'b'
 153     >>> caesar_encipher_letter('z', -1)
 154     'y'
 155     >>> caesar_encipher_letter('a', -1)
 156     'z'
 157     """
 158     if letter in string.ascii_letters:
 159         if letter in string.ascii_uppercase:
 160             alphabet_start = ord('A')
 161         else:
 162             alphabet_start = ord('a')
 163         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 164                    alphabet_start)
 165     else:
 166         return letter
 167
 168 def caesar_decipher_letter(letter, shift):
 169     """Decipher a letter, given a shift amount
 170
 171     >>> caesar_decipher_letter('b', 1)
 172     'a'
 173     >>> caesar_decipher_letter('b', 2)
 174     'z'
 175     """
 176     return caesar_encipher_letter(letter, -shift)
 177
 178 def caesar_encipher(message, shift):
 179     """Encipher a message with the Caesar cipher of given shift
 180
 181     >>> caesar_encipher('abc', 1)
 182     'bcd'
 183     >>> caesar_encipher('abc', 2)
 184     'cde'
 185     >>> caesar_encipher('abcxyz', 2)
 186     'cdezab'
 187     >>> caesar_encipher('ab cx yz', 2)
 188     'cd ez ab'
 189     """
 190     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 191     return ''.join(enciphered)
 192
 193 def caesar_decipher(message, shift):
 194     """Encipher a message with the Caesar cipher of given shift
 195
 196     >>> caesar_decipher('bcd', 1)
 197     'abc'
 198     >>> caesar_decipher('cde', 2)
 199     'abc'
 200     >>> caesar_decipher('cd ez ab', 2)
 201     'ab cx yz'
 202     """
 203     return caesar_encipher(message, -shift)
 204
 205 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 206     """Encipher a letter, given a multiplier and adder
 207
 208     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 209             for l in string.ascii_uppercase])
 210     'HKNQTWZCFILORUXADGJMPSVYBE'
 211     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 212             for l in string.ascii_uppercase])
 213     'FILORUXADGJMPSVYBEHKNQTWZC'
 214     """
 215     if letter in string.ascii_letters:
 216         if letter in string.ascii_uppercase:
 217             alphabet_start = ord('A')
 218         else:
 219             alphabet_start = ord('a')
 220         letter_number = ord(letter) - alphabet_start
 221         if one_based: letter_number += 1
 222         cipher_number = (letter_number * multiplier + adder) % 26
 223         if one_based: cipher_number -= 1
 224         return chr(cipher_number % 26 + alphabet_start)
 225     else:
 226         return letter
 227
 228 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 229     """Encipher a letter, given a multiplier and adder
 230
 231     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 232             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 233     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 234     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 235             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 236     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 237     """
 238     if letter in string.ascii_letters:
 239         if letter in string.ascii_uppercase:
 240             alphabet_start = ord('A')
 241         else:
 242             alphabet_start = ord('a')
 243         cipher_number = ord(letter) - alphabet_start
 244         if one_based: cipher_number += 1
 245         plaintext_number = ( modular_division_table[multiplier]
 246                                                    [(cipher_number - adder) % 26] )
 247         if one_based: plaintext_number -= 1
 248         return chr(plaintext_number % 26 + alphabet_start)
 249     else:
 250         return letter
 251
 252 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 253     """Encipher a message
 254
 255     >>> affine_encipher('hours passed during which jerico tried every ' \
 256            'trick he could think of', 15, 22, True)
 257     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 258     """
 259     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 260                   for l in message]
 261     return ''.join(enciphered)
 262
 263 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 264     """Decipher a message
 265
 266     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 267            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 268     'hours passed during which jerico tried every trick he could think of'
 269     """
 270     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 271                   for l in message]
 272     return ''.join(enciphered)
 273
 274
 275 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 276     """Find the cipher alphabet given a keyword.
 277     wrap_alphabet controls how the rest of the alphabet is added
 278     after the keyword.
 279     0 : from 'a'
 280     1 : from the last letter in the sanitised keyword
 281     2 : from the largest letter in the sanitised keyword
 282
 283     >>> keyword_cipher_alphabet_of('bayes')
 284     'bayescdfghijklmnopqrtuvwxz'
 285     >>> keyword_cipher_alphabet_of('bayes', 0)
 286     'bayescdfghijklmnopqrtuvwxz'
 287     >>> keyword_cipher_alphabet_of('bayes', 1)
 288     'bayestuvwxzcdfghijklmnopqr'
 289     >>> keyword_cipher_alphabet_of('bayes', 2)
 290     'bayeszcdfghijklmnopqrtuvwx'
 291     """
 292     if wrap_alphabet == 0:
 293         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 294                                               string.ascii_lowercase))
 295     else:
 296         if wrap_alphabet == 1:
 297             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 298         else:
 299             last_keyword_letter = sorted(sanitise(keyword))[-1]
 300         last_keyword_position = string.ascii_lowercase.find(
 301             last_keyword_letter) + 1
 302         cipher_alphabet = ''.join(
 303             deduplicate(sanitise(keyword) +
 304                         string.ascii_lowercase[last_keyword_position:] +
 305                         string.ascii_lowercase))
 306     return cipher_alphabet
 307
 308
 309 def keyword_encipher(message, keyword, wrap_alphabet=0):
 310     """Enciphers a message with a keyword substitution cipher.
 311     wrap_alphabet controls how the rest of the alphabet is added
 312     after the keyword.
 313     0 : from 'a'
 314     1 : from the last letter in the sanitised keyword
 315     2 : from the largest letter in the sanitised keyword
 316
 317     >>> keyword_encipher('test message', 'bayes')
 318     'rsqr ksqqbds'
 319     >>> keyword_encipher('test message', 'bayes', 0)
 320     'rsqr ksqqbds'
 321     >>> keyword_encipher('test message', 'bayes', 1)
 322     'lskl dskkbus'
 323     >>> keyword_encipher('test message', 'bayes', 2)
 324     'qspq jsppbcs'
 325     """
 326     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 327     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 328     return message.lower().translate(cipher_translation)
 329
 330 def keyword_decipher(message, keyword, wrap_alphabet=0):
 331     """Deciphers a message with a keyword substitution cipher.
 332     wrap_alphabet controls how the rest of the alphabet is added
 333     after the keyword.
 334     0 : from 'a'
 335     1 : from the last letter in the sanitised keyword
 336     2 : from the largest letter in the sanitised keyword
 337
 338     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 339     'test message'
 340     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 341     'test message'
 342     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 343     'test message'
 344     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 345     'test message'
 346     """
 347     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 348     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 349     return message.lower().translate(cipher_translation)
 350
 351 def scytale_encipher(message, rows):
 352     """Enciphers using the scytale transposition cipher.
 353     Message is padded with spaces to allow all rows to be the same length.
 354
 355     >>> scytale_encipher('thequickbrownfox', 3)
 356     'tcnhkfeboqrxuo iw '
 357     >>> scytale_encipher('thequickbrownfox', 4)
 358     'tubnhirfecooqkwx'
 359     >>> scytale_encipher('thequickbrownfox', 5)
 360     'tubn hirf ecoo qkwx '
 361     >>> scytale_encipher('thequickbrownfox', 6)
 362     'tqcrnxhukof eibwo '
 363     >>> scytale_encipher('thequickbrownfox', 7)
 364     'tqcrnx hukof  eibwo  '
 365     """
 366     if len(message) % rows != 0:
 367         message += ' '*(rows - len(message) % rows)
 368     row_length = round(len(message) / rows)
 369     slices = [message[i:i+row_length]
 370               for i in range(0, len(message), row_length)]
 371     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 372
 373 def scytale_decipher(message, rows):
 374     """Deciphers using the scytale transposition cipher.
 375     Assumes the message is padded so that all rows are the same length.
 376
 377     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 378     'thequickbrownfox  '
 379     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 380     'thequickbrownfox'
 381     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 382     'thequickbrownfox    '
 383     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 384     'thequickbrownfox  '
 385     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 386     'thequickbrownfox     '
 387     """
 388     cols = round(len(message) / rows)
 389     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 390     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 391
 392
 393 def caesar_break(message,
 394                  metric=norms.euclidean_distance,
 395                  target_counts=normalised_english_counts,
 396                  message_frequency_scaling=norms.normalise):
 397     """Breaks a Caesar cipher using frequency analysis
 398
 399     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 400           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 401     (4, 0.31863952890183...)
 402     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 403           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 404     (19, 0.42152901235832...)
 405     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 406           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 407     (13, 0.316029208075451...)
 408     """
 409     sanitised_message = sanitise(message)
 410     best_shift = 0
 411     best_fit = float("inf")
 412     for shift in range(26):
 413         plaintext = caesar_decipher(sanitised_message, shift)
 414         counts = message_frequency_scaling(letter_frequencies(plaintext))
 415         fit = metric(target_counts, counts)
 416         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 417                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 418         if fit < best_fit:
 419             best_fit = fit
 420             best_shift = shift
 421     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 422                 'decrypt starting: {2}'.format(best_shift, best_fit,
 423                     caesar_decipher(sanitised_message, best_shift)[:50]))
 424     return best_shift, best_fit
 425
 426 def affine_break(message,
 427                  metric=norms.euclidean_distance,
 428                  target_counts=normalised_english_counts,
 429                  message_frequency_scaling=norms.normalise):
 430     """Breaks an affine cipher using frequency analysis
 431
 432     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 433           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 434           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 435           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 436           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 437           'clm ckuxj.') # doctest: +ELLIPSIS
 438     ((15, 22, True), 0.23570361818655...)
 439     """
 440     sanitised_message = sanitise(message)
 441     best_multiplier = 0
 442     best_adder = 0
 443     best_one_based = True
 444     best_fit = float("inf")
 445     for one_based in [True, False]:
 446         for multiplier in range(1, 26, 2):
 447             for adder in range(26):
 448                 plaintext = affine_decipher(sanitised_message,
 449                                             multiplier, adder, one_based)
 450                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 451                 fit = metric(target_counts, counts)
 452                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 453                              'gives fit of {3} and decrypt starting: {4}'.
 454                              format(multiplier, adder, one_based, fit,
 455                                     plaintext[:50]))
 456                 if fit < best_fit:
 457                     best_fit = fit
 458                     best_multiplier = multiplier
 459                     best_adder = adder
 460                     best_one_based = one_based
 461     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 462                 'and decrypt starting: {4}'.format(
 463                     best_multiplier, best_adder, best_one_based, best_fit,
 464                     affine_decipher(sanitised_message, best_multiplier,
 465                         best_adder, best_one_based)[:50]))
 466     return (best_multiplier, best_adder, best_one_based), best_fit
 467
 468 def keyword_break(message,
 469                   wordlist=keywords,
 470                   metric=norms.euclidean_distance,
 471                   target_counts=normalised_english_counts,
 472                   message_frequency_scaling=norms.normalise):
 473     """Breaks a keyword substitution cipher using a dictionary and
 474     frequency analysis
 475
 476     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 477           'keyword decipherment', 'elephant', 1), \
 478           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 479     (('elephant', 1), 0.41643991598441...)
 480     """
 481     best_keyword = ''
 482     best_wrap_alphabet = True
 483     best_fit = float("inf")
 484     for wrap_alphabet in range(3):
 485         for keyword in wordlist:
 486             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 487             counts = message_frequency_scaling(letter_frequencies(plaintext))
 488             fit = metric(target_counts, counts)
 489             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 490                          'gives fit of {2} and decrypt starting: {3}'.format(
 491                              keyword, wrap_alphabet, fit,
 492                              sanitise(plaintext)[:50]))
 493             if fit < best_fit:
 494                 best_fit = fit
 495                 best_keyword = keyword
 496                 best_wrap_alphabet = wrap_alphabet
 497     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 498                 '{2} and decrypt starting: {3}'.format(best_keyword,
 499                     best_wrap_alphabet, best_fit, sanitise(
 500                         keyword_decipher(message, best_keyword,
 501                                          best_wrap_alphabet))[:50]))
 502     return (best_keyword, best_wrap_alphabet), best_fit
 503
 504 def keyword_break_mp(message,
 505                      wordlist=keywords,
 506                      metric=norms.euclidean_distance,
 507                      target_counts=normalised_english_counts,
 508                      message_frequency_scaling=norms.normalise,
 509                      chunksize=500):
 510     """Breaks a keyword substitution cipher using a dictionary and
 511     frequency analysis
 512
 513     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 514           'keyword decipherment', 'elephant', 1), \
 515           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 516     (('elephant', 1), 0.41643991598441...)
 517     """
 518     with Pool() as pool:
 519         helper_args = [(message, word, wrap, metric, target_counts,
 520                         message_frequency_scaling)
 521                        for word in wordlist for wrap in range(3)]
 522         # Gotcha: the helper function here needs to be defined at the top level
 523         #   (limitation of Pool.starmap)
 524         breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
 525         return min(breaks, key=lambda k: k[1])
 526
 527 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 528                       message_frequency_scaling):
 529     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 530     counts = message_frequency_scaling(letter_frequencies(plaintext))
 531     fit = metric(target_counts, counts)
 532     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 533                  '{2} and decrypt starting: {3}'.format(keyword,
 534                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 535     return (keyword, wrap_alphabet), fit
 536
 537 def scytale_break(message,
 538                   metric=norms.euclidean_distance,
 539                   target_counts=normalised_english_bigram_counts,
 540                   message_frequency_scaling=norms.normalise):
 541     """Breaks a Scytale cipher
 542
 543     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 544            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 545            'aek') # doctest: +ELLIPSIS
 546     (6, 0.83453041115025...)
 547     """
 548     best_key = 0
 549     best_fit = float("inf")
 550     for key in range(1, 20):
 551         if len(message) % key == 0:
 552             plaintext = scytale_decipher(message, key)
 553             counts = message_frequency_scaling(frequencies(
 554                          ngrams(sanitise(plaintext), 2)))
 555             fit = metric(target_counts, counts)
 556             logger.debug('Scytale break attempt using key {0} gives fit of '
 557                          '{1} and decrypt starting: {2}'.format(key,
 558                              fit, sanitise(plaintext)[:50]))
 559             if fit < best_fit:
 560                 best_fit = fit
 561                 best_key = key
 562     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 563                 'decrypt starting: {2}'.format(best_key, best_fit,
 564                     sanitise(scytale_decipher(message, best_key))[:50]))
 565     return best_key, best_fit
 566
 567
 568 if __name__ == "__main__":
 569     import doctest
 570     doctest.testmod()