cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest, repeat
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 with open('words.txt', 'r') as f:
  38     keywords = [line.rstrip() for line in f]
  39
  40 modular_division_table = [[0]*26 for x in range(26)]
  41 for a in range(26):
  42     for b in range(26):
  43         c = (a * b) % 26
  44         modular_division_table[b][c] = a
  45
  46
  47 def sanitise(text):
  48     """Remove all non-alphabetic characters and convert the text to lowercase
  49
  50     >>> sanitise('The Quick')
  51     'thequick'
  52     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  53     'thequickbrownfoxjumpedoverthelazydog'
  54     """
  55     sanitised = [c.lower() for c in text if c in string.ascii_letters]
  56     return ''.join(sanitised)
  57
  58 def ngrams(text, n):
  59     """Returns all n-grams of a text
  60
  61     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  62     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  63      'nf', 'fo', 'ox']
  64     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  65     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  66      'rown', 'ownf', 'wnfo', 'nfox']
  67     """
  68     return [text[i:i+n] for i in range(len(text)-n+1)]
  69
  70 def every_nth(text, n, fillvalue=''):
  71     """Returns n strings, each of which consists of every nth character,
  72     starting with the 0th, 1st, 2nd, ... (n-1)th character
  73
  74     >>> every_nth(string.ascii_lowercase, 5)
  75     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  76     >>> every_nth(string.ascii_lowercase, 1)
  77     ['abcdefghijklmnopqrstuvwxyz']
  78     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  79     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  80      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  81     >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
  82     ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
  83     """
  84     split_text = [text[i:i+n] for i in range(0, len(text), n)]
  85     return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
  86
  87 def combine_every_nth(split_text):
  88     """Reforms a text split into every_nth strings
  89
  90     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
  91     'abcdefghijklmnopqrstuvwxyz'
  92     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
  93     'abcdefghijklmnopqrstuvwxyz'
  94     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
  95     'abcdefghijklmnopqrstuvwxyz'
  96     """
  97     return ''.join([''.join(l)
  98                     for l in zip_longest(*split_text, fillvalue='')])
  99
 100 def transpose(items, transposition):
 101     """Moves items around according to the given transposition
 102
 103     >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
 104     ['a', 'b', 'c', 'd']
 105     >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
 106     ['d', 'b', 'c', 'a']
 107     >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
 108     [13, 12, 14, 11, 15, 10]
 109     """
 110     transposed = list(repeat('', len(transposition)))
 111     for p, t in enumerate(transposition):
 112        transposed[p] = items[t]
 113     return transposed
 114
 115 def untranspose(items, transposition):
 116     """Undoes a transpose
 117
 118     >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
 119     ['a', 'b', 'c', 'd']
 120     >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
 121     ['a', 'b', 'c', 'd']
 122     >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
 123     [10, 11, 12, 13, 14, 15]
 124     """
 125     transposed  = list(repeat('', len(transposition)))
 126     for p, t in enumerate(transposition):
 127        transposed[t] = items[p]
 128     return transposed
 129
 130
 131 def frequencies(text):
 132     """Count the number of occurrences of each character in text
 133
 134     >>> sorted(frequencies('abcdefabc').items())
 135     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 136     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 137          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 138     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 139      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 140      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 141      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 142     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 143          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 144     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 145      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 146      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 147      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 148      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 149     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 150          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 151     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 152      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 153      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 154      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 155     """
 156     counts = collections.defaultdict(int)
 157     for c in text:
 158         counts[c] += 1
 159     return counts
 160 letter_frequencies = frequencies
 161
 162 def deduplicate(text):
 163     return list(collections.OrderedDict.fromkeys(text))
 164
 165
 166
 167 def caesar_encipher_letter(letter, shift):
 168     """Encipher a letter, given a shift amount
 169
 170     >>> caesar_encipher_letter('a', 1)
 171     'b'
 172     >>> caesar_encipher_letter('a', 2)
 173     'c'
 174     >>> caesar_encipher_letter('b', 2)
 175     'd'
 176     >>> caesar_encipher_letter('x', 2)
 177     'z'
 178     >>> caesar_encipher_letter('y', 2)
 179     'a'
 180     >>> caesar_encipher_letter('z', 2)
 181     'b'
 182     >>> caesar_encipher_letter('z', -1)
 183     'y'
 184     >>> caesar_encipher_letter('a', -1)
 185     'z'
 186     """
 187     if letter in string.ascii_letters:
 188         if letter in string.ascii_uppercase:
 189             alphabet_start = ord('A')
 190         else:
 191             alphabet_start = ord('a')
 192         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 193                    alphabet_start)
 194     else:
 195         return letter
 196
 197 def caesar_decipher_letter(letter, shift):
 198     """Decipher a letter, given a shift amount
 199
 200     >>> caesar_decipher_letter('b', 1)
 201     'a'
 202     >>> caesar_decipher_letter('b', 2)
 203     'z'
 204     """
 205     return caesar_encipher_letter(letter, -shift)
 206
 207 def caesar_encipher(message, shift):
 208     """Encipher a message with the Caesar cipher of given shift
 209
 210     >>> caesar_encipher('abc', 1)
 211     'bcd'
 212     >>> caesar_encipher('abc', 2)
 213     'cde'
 214     >>> caesar_encipher('abcxyz', 2)
 215     'cdezab'
 216     >>> caesar_encipher('ab cx yz', 2)
 217     'cd ez ab'
 218     """
 219     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 220     return ''.join(enciphered)
 221
 222 def caesar_decipher(message, shift):
 223     """Encipher a message with the Caesar cipher of given shift
 224
 225     >>> caesar_decipher('bcd', 1)
 226     'abc'
 227     >>> caesar_decipher('cde', 2)
 228     'abc'
 229     >>> caesar_decipher('cd ez ab', 2)
 230     'ab cx yz'
 231     """
 232     return caesar_encipher(message, -shift)
 233
 234 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 235     """Encipher a letter, given a multiplier and adder
 236
 237     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 238             for l in string.ascii_uppercase])
 239     'HKNQTWZCFILORUXADGJMPSVYBE'
 240     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 241             for l in string.ascii_uppercase])
 242     'FILORUXADGJMPSVYBEHKNQTWZC'
 243     """
 244     if letter in string.ascii_letters:
 245         if letter in string.ascii_uppercase:
 246             alphabet_start = ord('A')
 247         else:
 248             alphabet_start = ord('a')
 249         letter_number = ord(letter) - alphabet_start
 250         if one_based: letter_number += 1
 251         cipher_number = (letter_number * multiplier + adder) % 26
 252         if one_based: cipher_number -= 1
 253         return chr(cipher_number % 26 + alphabet_start)
 254     else:
 255         return letter
 256
 257 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 258     """Encipher a letter, given a multiplier and adder
 259
 260     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 261             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 262     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 263     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 264             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 265     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 266     """
 267     if letter in string.ascii_letters:
 268         if letter in string.ascii_uppercase:
 269             alphabet_start = ord('A')
 270         else:
 271             alphabet_start = ord('a')
 272         cipher_number = ord(letter) - alphabet_start
 273         if one_based: cipher_number += 1
 274         plaintext_number = ( modular_division_table[multiplier]
 275                                                    [(cipher_number - adder) % 26] )
 276         if one_based: plaintext_number -= 1
 277         return chr(plaintext_number % 26 + alphabet_start)
 278     else:
 279         return letter
 280
 281 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 282     """Encipher a message
 283
 284     >>> affine_encipher('hours passed during which jerico tried every ' \
 285            'trick he could think of', 15, 22, True)
 286     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 287     """
 288     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 289                   for l in message]
 290     return ''.join(enciphered)
 291
 292 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 293     """Decipher a message
 294
 295     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 296            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 297     'hours passed during which jerico tried every trick he could think of'
 298     """
 299     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 300                   for l in message]
 301     return ''.join(enciphered)
 302
 303
 304 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 305     """Find the cipher alphabet given a keyword.
 306     wrap_alphabet controls how the rest of the alphabet is added
 307     after the keyword.
 308     0 : from 'a'
 309     1 : from the last letter in the sanitised keyword
 310     2 : from the largest letter in the sanitised keyword
 311
 312     >>> keyword_cipher_alphabet_of('bayes')
 313     'bayescdfghijklmnopqrtuvwxz'
 314     >>> keyword_cipher_alphabet_of('bayes', 0)
 315     'bayescdfghijklmnopqrtuvwxz'
 316     >>> keyword_cipher_alphabet_of('bayes', 1)
 317     'bayestuvwxzcdfghijklmnopqr'
 318     >>> keyword_cipher_alphabet_of('bayes', 2)
 319     'bayeszcdfghijklmnopqrtuvwx'
 320     """
 321     if wrap_alphabet == 0:
 322         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 323                                               string.ascii_lowercase))
 324     else:
 325         if wrap_alphabet == 1:
 326             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 327         else:
 328             last_keyword_letter = sorted(sanitise(keyword))[-1]
 329         last_keyword_position = string.ascii_lowercase.find(
 330             last_keyword_letter) + 1
 331         cipher_alphabet = ''.join(
 332             deduplicate(sanitise(keyword) +
 333                         string.ascii_lowercase[last_keyword_position:] +
 334                         string.ascii_lowercase))
 335     return cipher_alphabet
 336
 337
 338 def keyword_encipher(message, keyword, wrap_alphabet=0):
 339     """Enciphers a message with a keyword substitution cipher.
 340     wrap_alphabet controls how the rest of the alphabet is added
 341     after the keyword.
 342     0 : from 'a'
 343     1 : from the last letter in the sanitised keyword
 344     2 : from the largest letter in the sanitised keyword
 345
 346     >>> keyword_encipher('test message', 'bayes')
 347     'rsqr ksqqbds'
 348     >>> keyword_encipher('test message', 'bayes', 0)
 349     'rsqr ksqqbds'
 350     >>> keyword_encipher('test message', 'bayes', 1)
 351     'lskl dskkbus'
 352     >>> keyword_encipher('test message', 'bayes', 2)
 353     'qspq jsppbcs'
 354     """
 355     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 356     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 357     return message.lower().translate(cipher_translation)
 358
 359 def keyword_decipher(message, keyword, wrap_alphabet=0):
 360     """Deciphers a message with a keyword substitution cipher.
 361     wrap_alphabet controls how the rest of the alphabet is added
 362     after the keyword.
 363     0 : from 'a'
 364     1 : from the last letter in the sanitised keyword
 365     2 : from the largest letter in the sanitised keyword
 366
 367     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 368     'test message'
 369     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 370     'test message'
 371     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 372     'test message'
 373     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 374     'test message'
 375     """
 376     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 377     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 378     return message.lower().translate(cipher_translation)
 379
 380 def scytale_encipher(message, rows):
 381     """Enciphers using the scytale transposition cipher.
 382     Message is padded with spaces to allow all rows to be the same length.
 383
 384     >>> scytale_encipher('thequickbrownfox', 3)
 385     'tcnhkfeboqrxuo iw '
 386     >>> scytale_encipher('thequickbrownfox', 4)
 387     'tubnhirfecooqkwx'
 388     >>> scytale_encipher('thequickbrownfox', 5)
 389     'tubn hirf ecoo qkwx '
 390     >>> scytale_encipher('thequickbrownfox', 6)
 391     'tqcrnxhukof eibwo '
 392     >>> scytale_encipher('thequickbrownfox', 7)
 393     'tqcrnx hukof  eibwo  '
 394     """
 395     if len(message) % rows != 0:
 396         message += ' '*(rows - len(message) % rows)
 397     row_length = round(len(message) / rows)
 398     slices = [message[i:i+row_length]
 399               for i in range(0, len(message), row_length)]
 400     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 401
 402 def scytale_decipher(message, rows):
 403     """Deciphers using the scytale transposition cipher.
 404     Assumes the message is padded so that all rows are the same length.
 405
 406     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 407     'thequickbrownfox  '
 408     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 409     'thequickbrownfox'
 410     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 411     'thequickbrownfox    '
 412     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 413     'thequickbrownfox  '
 414     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 415     'thequickbrownfox     '
 416     """
 417     cols = round(len(message) / rows)
 418     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 419     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 420
 421
 422 def transpositions_of(keyword):
 423     """
 424     >>> transpositions_of('clever')
 425     [0, 2, 1, 4, 3]
 426     """
 427     transpositions = []
 428     key = deduplicate(keyword)
 429     for l in sorted(key):
 430         transpositions += [key.index(l)]
 431     return transpositions
 432
 433 def column_transposition_encipher(message, keyword):
 434     """
 435     >>> column_transposition_encipher('hellothere', 'clever')
 436     'hleolteher'
 437     """
 438     transpositions = transpositions_of(keyword)
 439     columns = every_nth(message, len(transpositions), fillvalue=' ')
 440     transposed_columns = transpose(columns, transpositions)
 441     return combine_every_nth(transposed_columns)
 442
 443 def column_transposition_decipher(message, keyword):
 444     """
 445     >>> column_transposition_decipher('hleolteher', 'clever')
 446     'hellothere'
 447     """
 448     transpositions = transpositions_of(keyword)
 449     columns = every_nth(message, len(transpositions), fillvalue=' ')
 450     transposed_columns = untranspose(columns, transpositions)
 451     return combine_every_nth(transposed_columns)
 452
 453
 454
 455 def caesar_break(message,
 456                  metric=norms.euclidean_distance,
 457                  target_counts=normalised_english_counts,
 458                  message_frequency_scaling=norms.normalise):
 459     """Breaks a Caesar cipher using frequency analysis
 460
 461     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 462           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 463     (4, 0.31863952890183...)
 464     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 465           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 466     (19, 0.42152901235832...)
 467     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 468           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 469     (13, 0.316029208075451...)
 470     """
 471     sanitised_message = sanitise(message)
 472     best_shift = 0
 473     best_fit = float("inf")
 474     for shift in range(26):
 475         plaintext = caesar_decipher(sanitised_message, shift)
 476         counts = message_frequency_scaling(letter_frequencies(plaintext))
 477         fit = metric(target_counts, counts)
 478         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 479                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 480         if fit < best_fit:
 481             best_fit = fit
 482             best_shift = shift
 483     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 484                 'decrypt starting: {2}'.format(best_shift, best_fit,
 485                     caesar_decipher(sanitised_message, best_shift)[:50]))
 486     return best_shift, best_fit
 487
 488 def affine_break(message,
 489                  metric=norms.euclidean_distance,
 490                  target_counts=normalised_english_counts,
 491                  message_frequency_scaling=norms.normalise):
 492     """Breaks an affine cipher using frequency analysis
 493
 494     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 495           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 496           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 497           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 498           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 499           'clm ckuxj.') # doctest: +ELLIPSIS
 500     ((15, 22, True), 0.23570361818655...)
 501     """
 502     sanitised_message = sanitise(message)
 503     best_multiplier = 0
 504     best_adder = 0
 505     best_one_based = True
 506     best_fit = float("inf")
 507     for one_based in [True, False]:
 508         for multiplier in range(1, 26, 2):
 509             for adder in range(26):
 510                 plaintext = affine_decipher(sanitised_message,
 511                                             multiplier, adder, one_based)
 512                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 513                 fit = metric(target_counts, counts)
 514                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 515                              'gives fit of {3} and decrypt starting: {4}'.
 516                              format(multiplier, adder, one_based, fit,
 517                                     plaintext[:50]))
 518                 if fit < best_fit:
 519                     best_fit = fit
 520                     best_multiplier = multiplier
 521                     best_adder = adder
 522                     best_one_based = one_based
 523     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 524                 'and decrypt starting: {4}'.format(
 525                     best_multiplier, best_adder, best_one_based, best_fit,
 526                     affine_decipher(sanitised_message, best_multiplier,
 527                         best_adder, best_one_based)[:50]))
 528     return (best_multiplier, best_adder, best_one_based), best_fit
 529
 530 def keyword_break(message,
 531                   wordlist=keywords,
 532                   metric=norms.euclidean_distance,
 533                   target_counts=normalised_english_counts,
 534                   message_frequency_scaling=norms.normalise):
 535     """Breaks a keyword substitution cipher using a dictionary and
 536     frequency analysis
 537
 538     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 539           'keyword decipherment', 'elephant', 1), \
 540           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 541     (('elephant', 1), 0.41643991598441...)
 542     """
 543     best_keyword = ''
 544     best_wrap_alphabet = True
 545     best_fit = float("inf")
 546     for wrap_alphabet in range(3):
 547         for keyword in wordlist:
 548             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 549             counts = message_frequency_scaling(letter_frequencies(plaintext))
 550             fit = metric(target_counts, counts)
 551             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 552                          'gives fit of {2} and decrypt starting: {3}'.format(
 553                              keyword, wrap_alphabet, fit,
 554                              sanitise(plaintext)[:50]))
 555             if fit < best_fit:
 556                 best_fit = fit
 557                 best_keyword = keyword
 558                 best_wrap_alphabet = wrap_alphabet
 559     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 560                 '{2} and decrypt starting: {3}'.format(best_keyword,
 561                     best_wrap_alphabet, best_fit, sanitise(
 562                         keyword_decipher(message, best_keyword,
 563                                          best_wrap_alphabet))[:50]))
 564     return (best_keyword, best_wrap_alphabet), best_fit
 565
 566 def keyword_break_mp(message,
 567                      wordlist=keywords,
 568                      metric=norms.euclidean_distance,
 569                      target_counts=normalised_english_counts,
 570                      message_frequency_scaling=norms.normalise,
 571                      chunksize=500):
 572     """Breaks a keyword substitution cipher using a dictionary and
 573     frequency analysis
 574
 575     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 576           'keyword decipherment', 'elephant', 1), \
 577           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 578     (('elephant', 1), 0.41643991598441...)
 579     """
 580     with Pool() as pool:
 581         helper_args = [(message, word, wrap, metric, target_counts,
 582                         message_frequency_scaling)
 583                        for word in wordlist for wrap in range(3)]
 584         # Gotcha: the helper function here needs to be defined at the top level
 585         #   (limitation of Pool.starmap)
 586         breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
 587         return min(breaks, key=lambda k: k[1])
 588
 589 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 590                       message_frequency_scaling):
 591     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 592     counts = message_frequency_scaling(letter_frequencies(plaintext))
 593     fit = metric(target_counts, counts)
 594     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 595                  '{2} and decrypt starting: {3}'.format(keyword,
 596                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 597     return (keyword, wrap_alphabet), fit
 598
 599 def scytale_break(message,
 600                   metric=norms.euclidean_distance,
 601                   target_counts=normalised_english_bigram_counts,
 602                   message_frequency_scaling=norms.normalise):
 603     """Breaks a Scytale cipher
 604
 605     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 606            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 607            'aek') # doctest: +ELLIPSIS
 608     (6, 0.83453041115025...)
 609     """
 610     best_key = 0
 611     best_fit = float("inf")
 612     for key in range(1, 20):
 613         if len(message) % key == 0:
 614             plaintext = scytale_decipher(message, key)
 615             counts = message_frequency_scaling(frequencies(
 616                          ngrams(sanitise(plaintext), 2)))
 617             fit = metric(target_counts, counts)
 618             logger.debug('Scytale break attempt using key {0} gives fit of '
 619                          '{1} and decrypt starting: {2}'.format(key,
 620                              fit, sanitise(plaintext)[:50]))
 621             if fit < best_fit:
 622                 best_fit = fit
 623                 best_key = key
 624     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 625                 'decrypt starting: {2}'.format(best_key, best_fit,
 626                     sanitise(scytale_decipher(message, best_key))[:50]))
 627     return best_key, best_fit
 628
 629
 630 if __name__ == "__main__":
 631     import doctest
 632     doctest.testmod()