cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest, repeat
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 with open('words.txt', 'r') as f:
  38     keywords = [line.rstrip() for line in f]
  39
  40 modular_division_table = [[0]*26 for x in range(26)]
  41 for a in range(26):
  42     for b in range(26):
  43         c = (a * b) % 26
  44         modular_division_table[b][c] = a
  45
  46
  47 def sanitise(text):
  48     """Remove all non-alphabetic characters and convert the text to lowercase
  49
  50     >>> sanitise('The Quick')
  51     'thequick'
  52     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  53     'thequickbrownfoxjumpedoverthelazydog'
  54     """
  55     sanitised = [c.lower() for c in text if c in string.ascii_letters]
  56     return ''.join(sanitised)
  57
  58 def ngrams(text, n):
  59     """Returns all n-grams of a text
  60
  61     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  62     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  63      'nf', 'fo', 'ox']
  64     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  65     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  66      'rown', 'ownf', 'wnfo', 'nfox']
  67     """
  68     return [text[i:i+n] for i in range(len(text)-n+1)]
  69
  70 def every_nth(text, n, fillvalue=''):
  71     """Returns n strings, each of which consists of every nth character,
  72     starting with the 0th, 1st, 2nd, ... (n-1)th character
  73
  74     >>> every_nth(string.ascii_lowercase, 5)
  75     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  76     >>> every_nth(string.ascii_lowercase, 1)
  77     ['abcdefghijklmnopqrstuvwxyz']
  78     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  79     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  80      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  81     >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
  82     ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
  83     """
  84     split_text = [text[i:i+n] for i in range(0, len(text), n)]
  85     return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
  86
  87 def combine_every_nth(split_text):
  88     """Reforms a text split into every_nth strings
  89
  90     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
  91     'abcdefghijklmnopqrstuvwxyz'
  92     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
  93     'abcdefghijklmnopqrstuvwxyz'
  94     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
  95     'abcdefghijklmnopqrstuvwxyz'
  96     """
  97     return ''.join([''.join(l)
  98                     for l in zip_longest(*split_text, fillvalue='')])
  99
 100 def transpose(items, transposition):
 101     """Moves items around according to the given transposition
 102
 103     >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
 104     ['a', 'b', 'c', 'd']
 105     >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
 106     ['d', 'b', 'c', 'a']
 107     >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
 108     [13, 12, 14, 11, 15, 10]
 109     """
 110     transposed = list(repeat('', len(transposition)))
 111     for p, t in enumerate(transposition):
 112        transposed[p] = items[t]
 113     return transposed
 114
 115 def untranspose(items, transposition):
 116     """Undoes a transpose
 117
 118     >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
 119     ['a', 'b', 'c', 'd']
 120     >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
 121     ['a', 'b', 'c', 'd']
 122     >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
 123     [10, 11, 12, 13, 14, 15]
 124     """
 125     transposed  = list(repeat('', len(transposition)))
 126     for p, t in enumerate(transposition):
 127        transposed[t] = items[p]
 128     return transposed
 129
 130
 131 def frequencies(text):
 132     """Count the number of occurrences of each character in text
 133
 134     >>> sorted(frequencies('abcdefabc').items())
 135     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 136     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 137          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 138     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 139      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 140      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 141      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 142     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 143          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 144     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 145      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 146      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 147      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 148      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 149     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 150          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 151     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 152      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 153      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 154      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 155     """
 156     counts = collections.defaultdict(int)
 157     for c in text:
 158         counts[c] += 1
 159     return counts
 160 letter_frequencies = frequencies
 161
 162 def deduplicate(text):
 163     return list(collections.OrderedDict.fromkeys(text))
 164
 165
 166
 167 def caesar_encipher_letter(letter, shift):
 168     """Encipher a letter, given a shift amount
 169
 170     >>> caesar_encipher_letter('a', 1)
 171     'b'
 172     >>> caesar_encipher_letter('a', 2)
 173     'c'
 174     >>> caesar_encipher_letter('b', 2)
 175     'd'
 176     >>> caesar_encipher_letter('x', 2)
 177     'z'
 178     >>> caesar_encipher_letter('y', 2)
 179     'a'
 180     >>> caesar_encipher_letter('z', 2)
 181     'b'
 182     >>> caesar_encipher_letter('z', -1)
 183     'y'
 184     >>> caesar_encipher_letter('a', -1)
 185     'z'
 186     """
 187     if letter in string.ascii_letters:
 188         if letter in string.ascii_uppercase:
 189             alphabet_start = ord('A')
 190         else:
 191             alphabet_start = ord('a')
 192         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 193                    alphabet_start)
 194     else:
 195         return letter
 196
 197 def caesar_decipher_letter(letter, shift):
 198     """Decipher a letter, given a shift amount
 199
 200     >>> caesar_decipher_letter('b', 1)
 201     'a'
 202     >>> caesar_decipher_letter('b', 2)
 203     'z'
 204     """
 205     return caesar_encipher_letter(letter, -shift)
 206
 207 def caesar_encipher(message, shift):
 208     """Encipher a message with the Caesar cipher of given shift
 209
 210     >>> caesar_encipher('abc', 1)
 211     'bcd'
 212     >>> caesar_encipher('abc', 2)
 213     'cde'
 214     >>> caesar_encipher('abcxyz', 2)
 215     'cdezab'
 216     >>> caesar_encipher('ab cx yz', 2)
 217     'cd ez ab'
 218     """
 219     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 220     return ''.join(enciphered)
 221
 222 def caesar_decipher(message, shift):
 223     """Encipher a message with the Caesar cipher of given shift
 224
 225     >>> caesar_decipher('bcd', 1)
 226     'abc'
 227     >>> caesar_decipher('cde', 2)
 228     'abc'
 229     >>> caesar_decipher('cd ez ab', 2)
 230     'ab cx yz'
 231     """
 232     return caesar_encipher(message, -shift)
 233
 234 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 235     """Encipher a letter, given a multiplier and adder
 236
 237     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 238             for l in string.ascii_uppercase])
 239     'HKNQTWZCFILORUXADGJMPSVYBE'
 240     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 241             for l in string.ascii_uppercase])
 242     'FILORUXADGJMPSVYBEHKNQTWZC'
 243     """
 244     if letter in string.ascii_letters:
 245         if letter in string.ascii_uppercase:
 246             alphabet_start = ord('A')
 247         else:
 248             alphabet_start = ord('a')
 249         letter_number = ord(letter) - alphabet_start
 250         if one_based: letter_number += 1
 251         cipher_number = (letter_number * multiplier + adder) % 26
 252         if one_based: cipher_number -= 1
 253         return chr(cipher_number % 26 + alphabet_start)
 254     else:
 255         return letter
 256
 257 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 258     """Encipher a letter, given a multiplier and adder
 259
 260     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 261             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 262     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 263     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 264             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 265     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 266     """
 267     if letter in string.ascii_letters:
 268         if letter in string.ascii_uppercase:
 269             alphabet_start = ord('A')
 270         else:
 271             alphabet_start = ord('a')
 272         cipher_number = ord(letter) - alphabet_start
 273         if one_based: cipher_number += 1
 274         plaintext_number = ( modular_division_table[multiplier]
 275                                                    [(cipher_number - adder) % 26] )
 276         if one_based: plaintext_number -= 1
 277         return chr(plaintext_number % 26 + alphabet_start)
 278     else:
 279         return letter
 280
 281 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 282     """Encipher a message
 283
 284     >>> affine_encipher('hours passed during which jerico tried every ' \
 285            'trick he could think of', 15, 22, True)
 286     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 287     """
 288     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 289                   for l in message]
 290     return ''.join(enciphered)
 291
 292 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 293     """Decipher a message
 294
 295     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 296            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 297     'hours passed during which jerico tried every trick he could think of'
 298     """
 299     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 300                   for l in message]
 301     return ''.join(enciphered)
 302
 303
 304 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 305     """Find the cipher alphabet given a keyword.
 306     wrap_alphabet controls how the rest of the alphabet is added
 307     after the keyword.
 308     0 : from 'a'
 309     1 : from the last letter in the sanitised keyword
 310     2 : from the largest letter in the sanitised keyword
 311
 312     >>> keyword_cipher_alphabet_of('bayes')
 313     'bayescdfghijklmnopqrtuvwxz'
 314     >>> keyword_cipher_alphabet_of('bayes', 0)
 315     'bayescdfghijklmnopqrtuvwxz'
 316     >>> keyword_cipher_alphabet_of('bayes', 1)
 317     'bayestuvwxzcdfghijklmnopqr'
 318     >>> keyword_cipher_alphabet_of('bayes', 2)
 319     'bayeszcdfghijklmnopqrtuvwx'
 320     """
 321     if wrap_alphabet == 0:
 322         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 323                                               string.ascii_lowercase))
 324     else:
 325         if wrap_alphabet == 1:
 326             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 327         else:
 328             last_keyword_letter = sorted(sanitise(keyword))[-1]
 329         last_keyword_position = string.ascii_lowercase.find(
 330             last_keyword_letter) + 1
 331         cipher_alphabet = ''.join(
 332             deduplicate(sanitise(keyword) +
 333                         string.ascii_lowercase[last_keyword_position:] +
 334                         string.ascii_lowercase))
 335     return cipher_alphabet
 336
 337
 338 def keyword_encipher(message, keyword, wrap_alphabet=0):
 339     """Enciphers a message with a keyword substitution cipher.
 340     wrap_alphabet controls how the rest of the alphabet is added
 341     after the keyword.
 342     0 : from 'a'
 343     1 : from the last letter in the sanitised keyword
 344     2 : from the largest letter in the sanitised keyword
 345
 346     >>> keyword_encipher('test message', 'bayes')
 347     'rsqr ksqqbds'
 348     >>> keyword_encipher('test message', 'bayes', 0)
 349     'rsqr ksqqbds'
 350     >>> keyword_encipher('test message', 'bayes', 1)
 351     'lskl dskkbus'
 352     >>> keyword_encipher('test message', 'bayes', 2)
 353     'qspq jsppbcs'
 354     """
 355     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 356     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 357     return message.lower().translate(cipher_translation)
 358
 359 def keyword_decipher(message, keyword, wrap_alphabet=0):
 360     """Deciphers a message with a keyword substitution cipher.
 361     wrap_alphabet controls how the rest of the alphabet is added
 362     after the keyword.
 363     0 : from 'a'
 364     1 : from the last letter in the sanitised keyword
 365     2 : from the largest letter in the sanitised keyword
 366
 367     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 368     'test message'
 369     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 370     'test message'
 371     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 372     'test message'
 373     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 374     'test message'
 375     """
 376     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 377     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 378     return message.lower().translate(cipher_translation)
 379
 380 def scytale_encipher(message, rows):
 381     """Enciphers using the scytale transposition cipher.
 382     Message is padded with spaces to allow all rows to be the same length.
 383
 384     >>> scytale_encipher('thequickbrownfox', 3)
 385     'tcnhkfeboqrxuo iw '
 386     >>> scytale_encipher('thequickbrownfox', 4)
 387     'tubnhirfecooqkwx'
 388     >>> scytale_encipher('thequickbrownfox', 5)
 389     'tubn hirf ecoo qkwx '
 390     >>> scytale_encipher('thequickbrownfox', 6)
 391     'tqcrnxhukof eibwo '
 392     >>> scytale_encipher('thequickbrownfox', 7)
 393     'tqcrnx hukof  eibwo  '
 394     """
 395     if len(message) % rows != 0:
 396         message += ' '*(rows - len(message) % rows)
 397     row_length = round(len(message) / rows)
 398     slices = [message[i:i+row_length]
 399               for i in range(0, len(message), row_length)]
 400     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 401
 402 def scytale_decipher(message, rows):
 403     """Deciphers using the scytale transposition cipher.
 404     Assumes the message is padded so that all rows are the same length.
 405
 406     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 407     'thequickbrownfox  '
 408     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 409     'thequickbrownfox'
 410     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 411     'thequickbrownfox    '
 412     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 413     'thequickbrownfox  '
 414     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 415     'thequickbrownfox     '
 416     """
 417     cols = round(len(message) / rows)
 418     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 419     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 420
 421
 422 def transpositions_of(keyword):
 423     """
 424     >>> transpositions_of('clever')
 425     [0, 2, 1, 4, 3]
 426     """
 427     key = deduplicate(keyword)
 428     transpositions = [key.index(l) for l in sorted(key)]
 429     return transpositions
 430
 431 def column_transposition_encipher(message, keyword):
 432     """
 433     >>> column_transposition_encipher('hellothere', 'clever')
 434     'hleolteher'
 435     """
 436     return column_transposition_worker(message, keyword, True)
 437
 438 def column_transposition_decipher(message, keyword):
 439     """
 440     >>> column_transposition_decipher('hleolteher', 'clever')
 441     'hellothere'
 442     """
 443     return column_transposition_worker(message, keyword, False)
 444
 445 def column_transposition_worker(message, keyword, encipher=True):
 446     """
 447     >>> column_transposition_worker('hellothere', 'clever')
 448     'hleolteher'
 449     >>> column_transposition_worker('hellothere', 'clever', True)
 450     'hleolteher'
 451     >>> column_transposition_worker('hleolteher', 'clever', False)
 452     'hellothere'
 453     """
 454     transpositions = transpositions_of(keyword)
 455     columns = every_nth(message, len(transpositions), fillvalue=' ')
 456     if encipher:
 457         transposed_columns = transpose(columns, transpositions)
 458     else:
 459         transposed_columns = untranspose(columns, transpositions)
 460     return combine_every_nth(transposed_columns)
 461
 462
 463
 464 def caesar_break(message,
 465                  metric=norms.euclidean_distance,
 466                  target_counts=normalised_english_counts,
 467                  message_frequency_scaling=norms.normalise):
 468     """Breaks a Caesar cipher using frequency analysis
 469
 470     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 471           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 472     (4, 0.31863952890183...)
 473     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 474           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 475     (19, 0.42152901235832...)
 476     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 477           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 478     (13, 0.316029208075451...)
 479     """
 480     sanitised_message = sanitise(message)
 481     best_shift = 0
 482     best_fit = float("inf")
 483     for shift in range(26):
 484         plaintext = caesar_decipher(sanitised_message, shift)
 485         counts = message_frequency_scaling(letter_frequencies(plaintext))
 486         fit = metric(target_counts, counts)
 487         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 488                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 489         if fit < best_fit:
 490             best_fit = fit
 491             best_shift = shift
 492     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 493                 'decrypt starting: {2}'.format(best_shift, best_fit,
 494                     caesar_decipher(sanitised_message, best_shift)[:50]))
 495     return best_shift, best_fit
 496
 497 def affine_break(message,
 498                  metric=norms.euclidean_distance,
 499                  target_counts=normalised_english_counts,
 500                  message_frequency_scaling=norms.normalise):
 501     """Breaks an affine cipher using frequency analysis
 502
 503     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 504           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 505           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 506           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 507           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 508           'clm ckuxj.') # doctest: +ELLIPSIS
 509     ((15, 22, True), 0.23570361818655...)
 510     """
 511     sanitised_message = sanitise(message)
 512     best_multiplier = 0
 513     best_adder = 0
 514     best_one_based = True
 515     best_fit = float("inf")
 516     for one_based in [True, False]:
 517         for multiplier in range(1, 26, 2):
 518             for adder in range(26):
 519                 plaintext = affine_decipher(sanitised_message,
 520                                             multiplier, adder, one_based)
 521                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 522                 fit = metric(target_counts, counts)
 523                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 524                              'gives fit of {3} and decrypt starting: {4}'.
 525                              format(multiplier, adder, one_based, fit,
 526                                     plaintext[:50]))
 527                 if fit < best_fit:
 528                     best_fit = fit
 529                     best_multiplier = multiplier
 530                     best_adder = adder
 531                     best_one_based = one_based
 532     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 533                 'and decrypt starting: {4}'.format(
 534                     best_multiplier, best_adder, best_one_based, best_fit,
 535                     affine_decipher(sanitised_message, best_multiplier,
 536                         best_adder, best_one_based)[:50]))
 537     return (best_multiplier, best_adder, best_one_based), best_fit
 538
 539 def keyword_break(message,
 540                   wordlist=keywords,
 541                   metric=norms.euclidean_distance,
 542                   target_counts=normalised_english_counts,
 543                   message_frequency_scaling=norms.normalise):
 544     """Breaks a keyword substitution cipher using a dictionary and
 545     frequency analysis
 546
 547     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 548           'keyword decipherment', 'elephant', 1), \
 549           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 550     (('elephant', 1), 0.41643991598441...)
 551     """
 552     best_keyword = ''
 553     best_wrap_alphabet = True
 554     best_fit = float("inf")
 555     for wrap_alphabet in range(3):
 556         for keyword in wordlist:
 557             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 558             counts = message_frequency_scaling(letter_frequencies(plaintext))
 559             fit = metric(target_counts, counts)
 560             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 561                          'gives fit of {2} and decrypt starting: {3}'.format(
 562                              keyword, wrap_alphabet, fit,
 563                              sanitise(plaintext)[:50]))
 564             if fit < best_fit:
 565                 best_fit = fit
 566                 best_keyword = keyword
 567                 best_wrap_alphabet = wrap_alphabet
 568     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 569                 '{2} and decrypt starting: {3}'.format(best_keyword,
 570                     best_wrap_alphabet, best_fit, sanitise(
 571                         keyword_decipher(message, best_keyword,
 572                                          best_wrap_alphabet))[:50]))
 573     return (best_keyword, best_wrap_alphabet), best_fit
 574
 575 def keyword_break_mp(message,
 576                      wordlist=keywords,
 577                      metric=norms.euclidean_distance,
 578                      target_counts=normalised_english_counts,
 579                      message_frequency_scaling=norms.normalise,
 580                      chunksize=500):
 581     """Breaks a keyword substitution cipher using a dictionary and
 582     frequency analysis
 583
 584     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 585           'keyword decipherment', 'elephant', 1), \
 586           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 587     (('elephant', 1), 0.41643991598441...)
 588     """
 589     with Pool() as pool:
 590         helper_args = [(message, word, wrap, metric, target_counts,
 591                         message_frequency_scaling)
 592                        for word in wordlist for wrap in range(3)]
 593         # Gotcha: the helper function here needs to be defined at the top level
 594         #   (limitation of Pool.starmap)
 595         breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
 596         return min(breaks, key=lambda k: k[1])
 597
 598 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 599                       message_frequency_scaling):
 600     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 601     counts = message_frequency_scaling(letter_frequencies(plaintext))
 602     fit = metric(target_counts, counts)
 603     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 604                  '{2} and decrypt starting: {3}'.format(keyword,
 605                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 606     return (keyword, wrap_alphabet), fit
 607
 608 def scytale_break(message,
 609                   metric=norms.euclidean_distance,
 610                   target_counts=normalised_english_bigram_counts,
 611                   message_frequency_scaling=norms.normalise):
 612     """Breaks a Scytale cipher
 613
 614     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 615            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 616            'aek') # doctest: +ELLIPSIS
 617     (6, 0.83453041115025...)
 618     """
 619     best_key = 0
 620     best_fit = float("inf")
 621     for key in range(1, 20):
 622         if len(message) % key == 0:
 623             plaintext = scytale_decipher(message, key)
 624             counts = message_frequency_scaling(frequencies(
 625                          ngrams(sanitise(plaintext), 2)))
 626             fit = metric(target_counts, counts)
 627             logger.debug('Scytale break attempt using key {0} gives fit of '
 628                          '{1} and decrypt starting: {2}'.format(key,
 629                              fit, sanitise(plaintext)[:50]))
 630             if fit < best_fit:
 631                 best_fit = fit
 632                 best_key = key
 633     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 634                 'decrypt starting: {2}'.format(best_key, best_fit,
 635                     sanitise(scytale_decipher(message, best_key))[:50]))
 636     return best_key, best_fit
 637
 638
 639 if __name__ == "__main__":
 640     import doctest
 641     doctest.testmod()