cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest, repeat
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 with open('words.txt', 'r') as f:
  38     keywords = [line.rstrip() for line in f]
  39
  40 modular_division_table = [[0]*26 for x in range(26)]
  41 for a in range(26):
  42     for b in range(26):
  43         c = (a * b) % 26
  44         modular_division_table[b][c] = a
  45
  46
  47 def sanitise(text):
  48     """Remove all non-alphabetic characters and convert the text to lowercase
  49
  50     >>> sanitise('The Quick')
  51     'thequick'
  52     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  53     'thequickbrownfoxjumpedoverthelazydog'
  54     """
  55     sanitised = [c.lower() for c in text if c in string.ascii_letters]
  56     return ''.join(sanitised)
  57
  58 def ngrams(text, n):
  59     """Returns all n-grams of a text
  60
  61     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  62     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  63      'nf', 'fo', 'ox']
  64     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  65     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  66      'rown', 'ownf', 'wnfo', 'nfox']
  67     """
  68     return [text[i:i+n] for i in range(len(text)-n+1)]
  69
  70 def every_nth(text, n, fillvalue=''):
  71     """Returns n strings, each of which consists of every nth character,
  72     starting with the 0th, 1st, 2nd, ... (n-1)th character
  73
  74     >>> every_nth(string.ascii_lowercase, 5)
  75     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  76     >>> every_nth(string.ascii_lowercase, 1)
  77     ['abcdefghijklmnopqrstuvwxyz']
  78     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  79     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  80      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  81     >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
  82     ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
  83     """
  84     split_text = [text[i:i+n] for i in range(0, len(text), n)]
  85     return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
  86
  87 def combine_every_nth(split_text):
  88     """Reforms a text split into every_nth strings
  89
  90     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
  91     'abcdefghijklmnopqrstuvwxyz'
  92     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
  93     'abcdefghijklmnopqrstuvwxyz'
  94     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
  95     'abcdefghijklmnopqrstuvwxyz'
  96     """
  97     return ''.join([''.join(l)
  98                     for l in zip_longest(*split_text, fillvalue='')])
  99
 100 def transpose(items, transposition):
 101     """Moves items around according to the given transposition
 102
 103     >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
 104     ['a', 'b', 'c', 'd']
 105     >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
 106     ['d', 'b', 'c', 'a']
 107     >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
 108     [13, 12, 14, 11, 15, 10]
 109     """
 110     transposed = list(repeat('', len(transposition)))
 111     for p, t in enumerate(transposition):
 112        transposed[p] = items[t]
 113     return transposed
 114
 115 def untranspose(items, transposition):
 116     """Undoes a transpose
 117
 118     >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
 119     ['a', 'b', 'c', 'd']
 120     >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
 121     ['a', 'b', 'c', 'd']
 122     >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
 123     [10, 11, 12, 13, 14, 15]
 124     """
 125     transposed  = list(repeat('', len(transposition)))
 126     for p, t in enumerate(transposition):
 127        transposed[t] = items[p]
 128     return transposed
 129
 130
 131 def frequencies(text):
 132     """Count the number of occurrences of each character in text
 133
 134     >>> sorted(frequencies('abcdefabc').items())
 135     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 136     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 137          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 138     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 139      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 140      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 141      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 142     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 143          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 144     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 145      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 146      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 147      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 148      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 149     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 150          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 151     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 152      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 153      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 154      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 155     """
 156     counts = collections.defaultdict(int)
 157     for c in text:
 158         counts[c] += 1
 159     return counts
 160 letter_frequencies = frequencies
 161
 162 def deduplicate(text):
 163     return list(collections.OrderedDict.fromkeys(text))
 164
 165
 166
 167 def caesar_encipher_letter(letter, shift):
 168     """Encipher a letter, given a shift amount
 169
 170     >>> caesar_encipher_letter('a', 1)
 171     'b'
 172     >>> caesar_encipher_letter('a', 2)
 173     'c'
 174     >>> caesar_encipher_letter('b', 2)
 175     'd'
 176     >>> caesar_encipher_letter('x', 2)
 177     'z'
 178     >>> caesar_encipher_letter('y', 2)
 179     'a'
 180     >>> caesar_encipher_letter('z', 2)
 181     'b'
 182     >>> caesar_encipher_letter('z', -1)
 183     'y'
 184     >>> caesar_encipher_letter('a', -1)
 185     'z'
 186     """
 187     if letter in string.ascii_letters:
 188         if letter in string.ascii_uppercase:
 189             alphabet_start = ord('A')
 190         else:
 191             alphabet_start = ord('a')
 192         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 193                    alphabet_start)
 194     else:
 195         return letter
 196
 197 def caesar_decipher_letter(letter, shift):
 198     """Decipher a letter, given a shift amount
 199
 200     >>> caesar_decipher_letter('b', 1)
 201     'a'
 202     >>> caesar_decipher_letter('b', 2)
 203     'z'
 204     """
 205     return caesar_encipher_letter(letter, -shift)
 206
 207 def caesar_encipher(message, shift):
 208     """Encipher a message with the Caesar cipher of given shift
 209
 210     >>> caesar_encipher('abc', 1)
 211     'bcd'
 212     >>> caesar_encipher('abc', 2)
 213     'cde'
 214     >>> caesar_encipher('abcxyz', 2)
 215     'cdezab'
 216     >>> caesar_encipher('ab cx yz', 2)
 217     'cd ez ab'
 218     """
 219     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 220     return ''.join(enciphered)
 221
 222 def caesar_decipher(message, shift):
 223     """Encipher a message with the Caesar cipher of given shift
 224
 225     >>> caesar_decipher('bcd', 1)
 226     'abc'
 227     >>> caesar_decipher('cde', 2)
 228     'abc'
 229     >>> caesar_decipher('cd ez ab', 2)
 230     'ab cx yz'
 231     """
 232     return caesar_encipher(message, -shift)
 233
 234 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 235     """Encipher a letter, given a multiplier and adder
 236
 237     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 238             for l in string.ascii_uppercase])
 239     'HKNQTWZCFILORUXADGJMPSVYBE'
 240     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 241             for l in string.ascii_uppercase])
 242     'FILORUXADGJMPSVYBEHKNQTWZC'
 243     """
 244     if letter in string.ascii_letters:
 245         if letter in string.ascii_uppercase:
 246             alphabet_start = ord('A')
 247         else:
 248             alphabet_start = ord('a')
 249         letter_number = ord(letter) - alphabet_start
 250         if one_based: letter_number += 1
 251         cipher_number = (letter_number * multiplier + adder) % 26
 252         if one_based: cipher_number -= 1
 253         return chr(cipher_number % 26 + alphabet_start)
 254     else:
 255         return letter
 256
 257 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 258     """Encipher a letter, given a multiplier and adder
 259
 260     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 261             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 262     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 263     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 264             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 265     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 266     """
 267     if letter in string.ascii_letters:
 268         if letter in string.ascii_uppercase:
 269             alphabet_start = ord('A')
 270         else:
 271             alphabet_start = ord('a')
 272         cipher_number = ord(letter) - alphabet_start
 273         if one_based: cipher_number += 1
 274         plaintext_number = ( modular_division_table[multiplier]
 275                                                    [(cipher_number - adder) % 26] )
 276         if one_based: plaintext_number -= 1
 277         return chr(plaintext_number % 26 + alphabet_start)
 278     else:
 279         return letter
 280
 281 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 282     """Encipher a message
 283
 284     >>> affine_encipher('hours passed during which jerico tried every ' \
 285            'trick he could think of', 15, 22, True)
 286     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 287     """
 288     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 289                   for l in message]
 290     return ''.join(enciphered)
 291
 292 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 293     """Decipher a message
 294
 295     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 296            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 297     'hours passed during which jerico tried every trick he could think of'
 298     """
 299     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 300                   for l in message]
 301     return ''.join(enciphered)
 302
 303
 304 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 305     """Find the cipher alphabet given a keyword.
 306     wrap_alphabet controls how the rest of the alphabet is added
 307     after the keyword.
 308     0 : from 'a'
 309     1 : from the last letter in the sanitised keyword
 310     2 : from the largest letter in the sanitised keyword
 311
 312     >>> keyword_cipher_alphabet_of('bayes')
 313     'bayescdfghijklmnopqrtuvwxz'
 314     >>> keyword_cipher_alphabet_of('bayes', 0)
 315     'bayescdfghijklmnopqrtuvwxz'
 316     >>> keyword_cipher_alphabet_of('bayes', 1)
 317     'bayestuvwxzcdfghijklmnopqr'
 318     >>> keyword_cipher_alphabet_of('bayes', 2)
 319     'bayeszcdfghijklmnopqrtuvwx'
 320     """
 321     if wrap_alphabet == 0:
 322         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 323                                               string.ascii_lowercase))
 324     else:
 325         if wrap_alphabet == 1:
 326             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 327         else:
 328             last_keyword_letter = sorted(sanitise(keyword))[-1]
 329         last_keyword_position = string.ascii_lowercase.find(
 330             last_keyword_letter) + 1
 331         cipher_alphabet = ''.join(
 332             deduplicate(sanitise(keyword) +
 333                         string.ascii_lowercase[last_keyword_position:] +
 334                         string.ascii_lowercase))
 335     return cipher_alphabet
 336
 337
 338 def keyword_encipher(message, keyword, wrap_alphabet=0):
 339     """Enciphers a message with a keyword substitution cipher.
 340     wrap_alphabet controls how the rest of the alphabet is added
 341     after the keyword.
 342     0 : from 'a'
 343     1 : from the last letter in the sanitised keyword
 344     2 : from the largest letter in the sanitised keyword
 345
 346     >>> keyword_encipher('test message', 'bayes')
 347     'rsqr ksqqbds'
 348     >>> keyword_encipher('test message', 'bayes', 0)
 349     'rsqr ksqqbds'
 350     >>> keyword_encipher('test message', 'bayes', 1)
 351     'lskl dskkbus'
 352     >>> keyword_encipher('test message', 'bayes', 2)
 353     'qspq jsppbcs'
 354     """
 355     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 356     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 357     return message.lower().translate(cipher_translation)
 358
 359 def keyword_decipher(message, keyword, wrap_alphabet=0):
 360     """Deciphers a message with a keyword substitution cipher.
 361     wrap_alphabet controls how the rest of the alphabet is added
 362     after the keyword.
 363     0 : from 'a'
 364     1 : from the last letter in the sanitised keyword
 365     2 : from the largest letter in the sanitised keyword
 366
 367     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 368     'test message'
 369     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 370     'test message'
 371     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 372     'test message'
 373     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 374     'test message'
 375     """
 376     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 377     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 378     return message.lower().translate(cipher_translation)
 379
 380 def scytale_encipher(message, rows):
 381     """Enciphers using the scytale transposition cipher.
 382     Message is padded with spaces to allow all rows to be the same length.
 383
 384     >>> scytale_encipher('thequickbrownfox', 3)
 385     'tcnhkfeboqrxuo iw '
 386     >>> scytale_encipher('thequickbrownfox', 4)
 387     'tubnhirfecooqkwx'
 388     >>> scytale_encipher('thequickbrownfox', 5)
 389     'tubn hirf ecoo qkwx '
 390     >>> scytale_encipher('thequickbrownfox', 6)
 391     'tqcrnxhukof eibwo '
 392     >>> scytale_encipher('thequickbrownfox', 7)
 393     'tqcrnx hukof  eibwo  '
 394     """
 395     if len(message) % rows != 0:
 396         message += ' '*(rows - len(message) % rows)
 397     row_length = round(len(message) / rows)
 398     slices = [message[i:i+row_length]
 399               for i in range(0, len(message), row_length)]
 400     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 401
 402 def scytale_decipher(message, rows):
 403     """Deciphers using the scytale transposition cipher.
 404     Assumes the message is padded so that all rows are the same length.
 405
 406     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 407     'thequickbrownfox  '
 408     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 409     'thequickbrownfox'
 410     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 411     'thequickbrownfox    '
 412     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 413     'thequickbrownfox  '
 414     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 415     'thequickbrownfox     '
 416     """
 417     cols = round(len(message) / rows)
 418     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 419     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 420
 421
 422 def transpositions_of(keyword):
 423     transpositions = []
 424     key = deduplicate(keyword)
 425     for l in sorted(key):
 426         transpositions += [key.index(l)]
 427     return transpositions
 428
 429 def column_transposition_encipher(message, keyword):
 430     transpositions = transpositions_of(keyword)
 431     rows = every_nth(message, len(transpositions), fillvalue=' ')
 432     transposed_rows = [transpose(row, transpositions) for row in rows]
 433     return combine_every_nth(transposed_rows)
 434
 435
 436 def caesar_break(message,
 437                  metric=norms.euclidean_distance,
 438                  target_counts=normalised_english_counts,
 439                  message_frequency_scaling=norms.normalise):
 440     """Breaks a Caesar cipher using frequency analysis
 441
 442     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 443           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 444     (4, 0.31863952890183...)
 445     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 446           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 447     (19, 0.42152901235832...)
 448     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 449           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 450     (13, 0.316029208075451...)
 451     """
 452     sanitised_message = sanitise(message)
 453     best_shift = 0
 454     best_fit = float("inf")
 455     for shift in range(26):
 456         plaintext = caesar_decipher(sanitised_message, shift)
 457         counts = message_frequency_scaling(letter_frequencies(plaintext))
 458         fit = metric(target_counts, counts)
 459         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 460                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 461         if fit < best_fit:
 462             best_fit = fit
 463             best_shift = shift
 464     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 465                 'decrypt starting: {2}'.format(best_shift, best_fit,
 466                     caesar_decipher(sanitised_message, best_shift)[:50]))
 467     return best_shift, best_fit
 468
 469 def affine_break(message,
 470                  metric=norms.euclidean_distance,
 471                  target_counts=normalised_english_counts,
 472                  message_frequency_scaling=norms.normalise):
 473     """Breaks an affine cipher using frequency analysis
 474
 475     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 476           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 477           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 478           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 479           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 480           'clm ckuxj.') # doctest: +ELLIPSIS
 481     ((15, 22, True), 0.23570361818655...)
 482     """
 483     sanitised_message = sanitise(message)
 484     best_multiplier = 0
 485     best_adder = 0
 486     best_one_based = True
 487     best_fit = float("inf")
 488     for one_based in [True, False]:
 489         for multiplier in range(1, 26, 2):
 490             for adder in range(26):
 491                 plaintext = affine_decipher(sanitised_message,
 492                                             multiplier, adder, one_based)
 493                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 494                 fit = metric(target_counts, counts)
 495                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 496                              'gives fit of {3} and decrypt starting: {4}'.
 497                              format(multiplier, adder, one_based, fit,
 498                                     plaintext[:50]))
 499                 if fit < best_fit:
 500                     best_fit = fit
 501                     best_multiplier = multiplier
 502                     best_adder = adder
 503                     best_one_based = one_based
 504     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 505                 'and decrypt starting: {4}'.format(
 506                     best_multiplier, best_adder, best_one_based, best_fit,
 507                     affine_decipher(sanitised_message, best_multiplier,
 508                         best_adder, best_one_based)[:50]))
 509     return (best_multiplier, best_adder, best_one_based), best_fit
 510
 511 def keyword_break(message,
 512                   wordlist=keywords,
 513                   metric=norms.euclidean_distance,
 514                   target_counts=normalised_english_counts,
 515                   message_frequency_scaling=norms.normalise):
 516     """Breaks a keyword substitution cipher using a dictionary and
 517     frequency analysis
 518
 519     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 520           'keyword decipherment', 'elephant', 1), \
 521           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 522     (('elephant', 1), 0.41643991598441...)
 523     """
 524     best_keyword = ''
 525     best_wrap_alphabet = True
 526     best_fit = float("inf")
 527     for wrap_alphabet in range(3):
 528         for keyword in wordlist:
 529             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 530             counts = message_frequency_scaling(letter_frequencies(plaintext))
 531             fit = metric(target_counts, counts)
 532             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 533                          'gives fit of {2} and decrypt starting: {3}'.format(
 534                              keyword, wrap_alphabet, fit,
 535                              sanitise(plaintext)[:50]))
 536             if fit < best_fit:
 537                 best_fit = fit
 538                 best_keyword = keyword
 539                 best_wrap_alphabet = wrap_alphabet
 540     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 541                 '{2} and decrypt starting: {3}'.format(best_keyword,
 542                     best_wrap_alphabet, best_fit, sanitise(
 543                         keyword_decipher(message, best_keyword,
 544                                          best_wrap_alphabet))[:50]))
 545     return (best_keyword, best_wrap_alphabet), best_fit
 546
 547 def keyword_break_mp(message,
 548                      wordlist=keywords,
 549                      metric=norms.euclidean_distance,
 550                      target_counts=normalised_english_counts,
 551                      message_frequency_scaling=norms.normalise,
 552                      chunksize=500):
 553     """Breaks a keyword substitution cipher using a dictionary and
 554     frequency analysis
 555
 556     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 557           'keyword decipherment', 'elephant', 1), \
 558           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 559     (('elephant', 1), 0.41643991598441...)
 560     """
 561     with Pool() as pool:
 562         helper_args = [(message, word, wrap, metric, target_counts,
 563                         message_frequency_scaling)
 564                        for word in wordlist for wrap in range(3)]
 565         # Gotcha: the helper function here needs to be defined at the top level
 566         #   (limitation of Pool.starmap)
 567         breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
 568         return min(breaks, key=lambda k: k[1])
 569
 570 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 571                       message_frequency_scaling):
 572     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 573     counts = message_frequency_scaling(letter_frequencies(plaintext))
 574     fit = metric(target_counts, counts)
 575     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 576                  '{2} and decrypt starting: {3}'.format(keyword,
 577                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 578     return (keyword, wrap_alphabet), fit
 579
 580 def scytale_break(message,
 581                   metric=norms.euclidean_distance,
 582                   target_counts=normalised_english_bigram_counts,
 583                   message_frequency_scaling=norms.normalise):
 584     """Breaks a Scytale cipher
 585
 586     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 587            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 588            'aek') # doctest: +ELLIPSIS
 589     (6, 0.83453041115025...)
 590     """
 591     best_key = 0
 592     best_fit = float("inf")
 593     for key in range(1, 20):
 594         if len(message) % key == 0:
 595             plaintext = scytale_decipher(message, key)
 596             counts = message_frequency_scaling(frequencies(
 597                          ngrams(sanitise(plaintext), 2)))
 598             fit = metric(target_counts, counts)
 599             logger.debug('Scytale break attempt using key {0} gives fit of '
 600                          '{1} and decrypt starting: {2}'.format(key,
 601                              fit, sanitise(plaintext)[:50]))
 602             if fit < best_fit:
 603                 best_fit = fit
 604                 best_key = key
 605     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 606                 'decrypt starting: {2}'.format(best_key, best_fit,
 607                     sanitise(scytale_decipher(message, best_key))[:50]))
 608     return best_key, best_fit
 609
 610
 611 if __name__ == "__main__":
 612     import doctest
 613     doctest.testmod()