cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest, repeat
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 with open('words.txt', 'r') as f:
  38     keywords = [line.rstrip() for line in f]
  39
  40 modular_division_table = [[0]*26 for x in range(26)]
  41 for a in range(26):
  42     for b in range(26):
  43         c = (a * b) % 26
  44         modular_division_table[b][c] = a
  45
  46 def letters(text):
  47     """Remove all non-alphabetic characters from a text
  48     >>> letters('The Quick')
  49     'TheQuick'
  50     >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  51     'TheQuickBROWNfoxjumpedoverthelazyDOG'
  52     """
  53     return ''.join([c for c in text if c in string.ascii_letters])
  54
  55 def sanitise(text):
  56     """Remove all non-alphabetic characters and convert the text to lowercase
  57
  58     >>> sanitise('The Quick')
  59     'thequick'
  60     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  61     'thequickbrownfoxjumpedoverthelazydog'
  62     """
  63     # sanitised = [c.lower() for c in text if c in string.ascii_letters]
  64     # return ''.join(sanitised)
  65     return letters(text).lower()
  66
  67 def ngrams(text, n):
  68     """Returns all n-grams of a text
  69
  70     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  71     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  72      'nf', 'fo', 'ox']
  73     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  74     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  75      'rown', 'ownf', 'wnfo', 'nfox']
  76     """
  77     return [text[i:i+n] for i in range(len(text)-n+1)]
  78
  79 def every_nth(text, n, fillvalue=''):
  80     """Returns n strings, each of which consists of every nth character,
  81     starting with the 0th, 1st, 2nd, ... (n-1)th character
  82
  83     >>> every_nth(string.ascii_lowercase, 5)
  84     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  85     >>> every_nth(string.ascii_lowercase, 1)
  86     ['abcdefghijklmnopqrstuvwxyz']
  87     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  88     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  89      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  90     >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
  91     ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
  92     """
  93     split_text = [text[i:i+n] for i in range(0, len(text), n)]
  94     return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
  95
  96 def combine_every_nth(split_text):
  97     """Reforms a text split into every_nth strings
  98
  99     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
 100     'abcdefghijklmnopqrstuvwxyz'
 101     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
 102     'abcdefghijklmnopqrstuvwxyz'
 103     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
 104     'abcdefghijklmnopqrstuvwxyz'
 105     """
 106     return ''.join([''.join(l)
 107                     for l in zip_longest(*split_text, fillvalue='')])
 108
 109 def transpose(items, transposition):
 110     """Moves items around according to the given transposition
 111
 112     >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
 113     ['a', 'b', 'c', 'd']
 114     >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
 115     ['d', 'b', 'c', 'a']
 116     >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
 117     [13, 12, 14, 11, 15, 10]
 118     """
 119     transposed = list(repeat('', len(transposition)))
 120     for p, t in enumerate(transposition):
 121        transposed[p] = items[t]
 122     return transposed
 123
 124 def untranspose(items, transposition):
 125     """Undoes a transpose
 126
 127     >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
 128     ['a', 'b', 'c', 'd']
 129     >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
 130     ['a', 'b', 'c', 'd']
 131     >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
 132     [10, 11, 12, 13, 14, 15]
 133     """
 134     transposed  = list(repeat('', len(transposition)))
 135     for p, t in enumerate(transposition):
 136        transposed[t] = items[p]
 137     return transposed
 138
 139
 140 def frequencies(text):
 141     """Count the number of occurrences of each character in text
 142
 143     >>> sorted(frequencies('abcdefabc').items())
 144     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 145     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 146          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 147     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 148      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 149      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 150      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 151     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 152          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 153     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 154      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 155      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 156      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 157      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 158     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 159          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 160     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 161      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 162      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 163      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 164     >>> frequencies('abcdefabcdef')['x']
 165     0
 166     """
 167     #counts = collections.defaultdict(int)
 168     #for c in text:
 169     #    counts[c] += 1
 170     #return counts
 171     return collections.Counter(c for c in text)
 172 letter_frequencies = frequencies
 173
 174 def deduplicate(text):
 175     return list(collections.OrderedDict.fromkeys(text))
 176
 177
 178
 179 def caesar_encipher_letter(letter, shift):
 180     """Encipher a letter, given a shift amount
 181
 182     >>> caesar_encipher_letter('a', 1)
 183     'b'
 184     >>> caesar_encipher_letter('a', 2)
 185     'c'
 186     >>> caesar_encipher_letter('b', 2)
 187     'd'
 188     >>> caesar_encipher_letter('x', 2)
 189     'z'
 190     >>> caesar_encipher_letter('y', 2)
 191     'a'
 192     >>> caesar_encipher_letter('z', 2)
 193     'b'
 194     >>> caesar_encipher_letter('z', -1)
 195     'y'
 196     >>> caesar_encipher_letter('a', -1)
 197     'z'
 198     """
 199     if letter in string.ascii_letters:
 200         if letter in string.ascii_uppercase:
 201             alphabet_start = ord('A')
 202         else:
 203             alphabet_start = ord('a')
 204         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 205                    alphabet_start)
 206     else:
 207         return letter
 208
 209 def caesar_decipher_letter(letter, shift):
 210     """Decipher a letter, given a shift amount
 211
 212     >>> caesar_decipher_letter('b', 1)
 213     'a'
 214     >>> caesar_decipher_letter('b', 2)
 215     'z'
 216     """
 217     return caesar_encipher_letter(letter, -shift)
 218
 219 def caesar_encipher(message, shift):
 220     """Encipher a message with the Caesar cipher of given shift
 221
 222     >>> caesar_encipher('abc', 1)
 223     'bcd'
 224     >>> caesar_encipher('abc', 2)
 225     'cde'
 226     >>> caesar_encipher('abcxyz', 2)
 227     'cdezab'
 228     >>> caesar_encipher('ab cx yz', 2)
 229     'cd ez ab'
 230     """
 231     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 232     return ''.join(enciphered)
 233
 234 def caesar_decipher(message, shift):
 235     """Encipher a message with the Caesar cipher of given shift
 236
 237     >>> caesar_decipher('bcd', 1)
 238     'abc'
 239     >>> caesar_decipher('cde', 2)
 240     'abc'
 241     >>> caesar_decipher('cd ez ab', 2)
 242     'ab cx yz'
 243     """
 244     return caesar_encipher(message, -shift)
 245
 246 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 247     """Encipher a letter, given a multiplier and adder
 248
 249     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 250             for l in string.ascii_uppercase])
 251     'HKNQTWZCFILORUXADGJMPSVYBE'
 252     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 253             for l in string.ascii_uppercase])
 254     'FILORUXADGJMPSVYBEHKNQTWZC'
 255     """
 256     if letter in string.ascii_letters:
 257         if letter in string.ascii_uppercase:
 258             alphabet_start = ord('A')
 259         else:
 260             alphabet_start = ord('a')
 261         letter_number = ord(letter) - alphabet_start
 262         if one_based: letter_number += 1
 263         cipher_number = (letter_number * multiplier + adder) % 26
 264         if one_based: cipher_number -= 1
 265         return chr(cipher_number % 26 + alphabet_start)
 266     else:
 267         return letter
 268
 269 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 270     """Encipher a letter, given a multiplier and adder
 271
 272     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 273             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 274     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 275     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 276             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 277     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 278     """
 279     if letter in string.ascii_letters:
 280         if letter in string.ascii_uppercase:
 281             alphabet_start = ord('A')
 282         else:
 283             alphabet_start = ord('a')
 284         cipher_number = ord(letter) - alphabet_start
 285         if one_based: cipher_number += 1
 286         plaintext_number = ( modular_division_table[multiplier]
 287                                                    [(cipher_number - adder) % 26] )
 288         if one_based: plaintext_number -= 1
 289         return chr(plaintext_number % 26 + alphabet_start)
 290     else:
 291         return letter
 292
 293 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 294     """Encipher a message
 295
 296     >>> affine_encipher('hours passed during which jerico tried every ' \
 297            'trick he could think of', 15, 22, True)
 298     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 299     """
 300     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 301                   for l in message]
 302     return ''.join(enciphered)
 303
 304 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 305     """Decipher a message
 306
 307     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 308            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 309     'hours passed during which jerico tried every trick he could think of'
 310     """
 311     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 312                   for l in message]
 313     return ''.join(enciphered)
 314
 315
 316 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 317     """Find the cipher alphabet given a keyword.
 318     wrap_alphabet controls how the rest of the alphabet is added
 319     after the keyword.
 320     0 : from 'a'
 321     1 : from the last letter in the sanitised keyword
 322     2 : from the largest letter in the sanitised keyword
 323
 324     >>> keyword_cipher_alphabet_of('bayes')
 325     'bayescdfghijklmnopqrtuvwxz'
 326     >>> keyword_cipher_alphabet_of('bayes', 0)
 327     'bayescdfghijklmnopqrtuvwxz'
 328     >>> keyword_cipher_alphabet_of('bayes', 1)
 329     'bayestuvwxzcdfghijklmnopqr'
 330     >>> keyword_cipher_alphabet_of('bayes', 2)
 331     'bayeszcdfghijklmnopqrtuvwx'
 332     """
 333     if wrap_alphabet == 0:
 334         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 335                                               string.ascii_lowercase))
 336     else:
 337         if wrap_alphabet == 1:
 338             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 339         else:
 340             last_keyword_letter = sorted(sanitise(keyword))[-1]
 341         last_keyword_position = string.ascii_lowercase.find(
 342             last_keyword_letter) + 1
 343         cipher_alphabet = ''.join(
 344             deduplicate(sanitise(keyword) +
 345                         string.ascii_lowercase[last_keyword_position:] +
 346                         string.ascii_lowercase))
 347     return cipher_alphabet
 348
 349
 350 def keyword_encipher(message, keyword, wrap_alphabet=0):
 351     """Enciphers a message with a keyword substitution cipher.
 352     wrap_alphabet controls how the rest of the alphabet is added
 353     after the keyword.
 354     0 : from 'a'
 355     1 : from the last letter in the sanitised keyword
 356     2 : from the largest letter in the sanitised keyword
 357
 358     >>> keyword_encipher('test message', 'bayes')
 359     'rsqr ksqqbds'
 360     >>> keyword_encipher('test message', 'bayes', 0)
 361     'rsqr ksqqbds'
 362     >>> keyword_encipher('test message', 'bayes', 1)
 363     'lskl dskkbus'
 364     >>> keyword_encipher('test message', 'bayes', 2)
 365     'qspq jsppbcs'
 366     """
 367     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 368     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 369     return message.lower().translate(cipher_translation)
 370
 371 def keyword_decipher(message, keyword, wrap_alphabet=0):
 372     """Deciphers a message with a keyword substitution cipher.
 373     wrap_alphabet controls how the rest of the alphabet is added
 374     after the keyword.
 375     0 : from 'a'
 376     1 : from the last letter in the sanitised keyword
 377     2 : from the largest letter in the sanitised keyword
 378
 379     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 380     'test message'
 381     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 382     'test message'
 383     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 384     'test message'
 385     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 386     'test message'
 387     """
 388     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 389     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 390     return message.lower().translate(cipher_translation)
 391
 392 def scytale_encipher(message, rows):
 393     """Enciphers using the scytale transposition cipher.
 394     Message is padded with spaces to allow all rows to be the same length.
 395
 396     >>> scytale_encipher('thequickbrownfox', 3)
 397     'tcnhkfeboqrxuo iw '
 398     >>> scytale_encipher('thequickbrownfox', 4)
 399     'tubnhirfecooqkwx'
 400     >>> scytale_encipher('thequickbrownfox', 5)
 401     'tubn hirf ecoo qkwx '
 402     >>> scytale_encipher('thequickbrownfox', 6)
 403     'tqcrnxhukof eibwo '
 404     >>> scytale_encipher('thequickbrownfox', 7)
 405     'tqcrnx hukof  eibwo  '
 406     """
 407     if len(message) % rows != 0:
 408         message += ' '*(rows - len(message) % rows)
 409     row_length = round(len(message) / rows)
 410     slices = [message[i:i+row_length]
 411               for i in range(0, len(message), row_length)]
 412     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 413
 414 def scytale_decipher(message, rows):
 415     """Deciphers using the scytale transposition cipher.
 416     Assumes the message is padded so that all rows are the same length.
 417
 418     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 419     'thequickbrownfox  '
 420     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 421     'thequickbrownfox'
 422     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 423     'thequickbrownfox    '
 424     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 425     'thequickbrownfox  '
 426     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 427     'thequickbrownfox     '
 428     """
 429     cols = round(len(message) / rows)
 430     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 431     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 432
 433
 434 def transpositions_of(keyword):
 435     """
 436     >>> transpositions_of('clever')
 437     [0, 2, 1, 4, 3]
 438     """
 439     key = deduplicate(keyword)
 440     transpositions = [key.index(l) for l in sorted(key)]
 441     return transpositions
 442
 443 def column_transposition_encipher(message, keyword):
 444     """
 445     >>> column_transposition_encipher('hellothere', 'clever')
 446     'hleolteher'
 447     """
 448     return column_transposition_worker(message, keyword, encipher=True)
 449
 450 def column_transposition_decipher(message, keyword):
 451     """
 452     >>> column_transposition_decipher('hleolteher', 'clever')
 453     'hellothere'
 454     """
 455     return column_transposition_worker(message, keyword, encipher=False)
 456
 457 def column_transposition_worker(message, keyword, encipher=True):
 458     """
 459     >>> column_transposition_worker('hellothere', 'clever')
 460     'hleolteher'
 461     >>> column_transposition_worker('hellothere', 'clever', encipher=True)
 462     'hleolteher'
 463     >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
 464     'hellothere'
 465     """
 466     transpositions = transpositions_of(keyword)
 467     columns = every_nth(message, len(transpositions), fillvalue=' ')
 468     if encipher:
 469         transposed_columns = transpose(columns, transpositions)
 470     else:
 471         transposed_columns = untranspose(columns, transpositions)
 472     return combine_every_nth(transposed_columns)
 473
 474
 475
 476 def caesar_break(message,
 477                  metric=norms.euclidean_distance,
 478                  target_counts=normalised_english_counts,
 479                  message_frequency_scaling=norms.normalise):
 480     """Breaks a Caesar cipher using frequency analysis
 481
 482     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 483           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 484     (4, 0.31863952890183...)
 485     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 486           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 487     (19, 0.42152901235832...)
 488     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 489           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 490     (13, 0.316029208075451...)
 491     """
 492     sanitised_message = sanitise(message)
 493     best_shift = 0
 494     best_fit = float("inf")
 495     for shift in range(26):
 496         plaintext = caesar_decipher(sanitised_message, shift)
 497         counts = message_frequency_scaling(letter_frequencies(plaintext))
 498         fit = metric(target_counts, counts)
 499         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 500                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 501         if fit < best_fit:
 502             best_fit = fit
 503             best_shift = shift
 504     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 505                 'decrypt starting: {2}'.format(best_shift, best_fit,
 506                     caesar_decipher(sanitised_message, best_shift)[:50]))
 507     return best_shift, best_fit
 508
 509 def affine_break(message,
 510                  metric=norms.euclidean_distance,
 511                  target_counts=normalised_english_counts,
 512                  message_frequency_scaling=norms.normalise):
 513     """Breaks an affine cipher using frequency analysis
 514
 515     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 516           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 517           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 518           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 519           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 520           'clm ckuxj.') # doctest: +ELLIPSIS
 521     ((15, 22, True), 0.23570361818655...)
 522     """
 523     sanitised_message = sanitise(message)
 524     best_multiplier = 0
 525     best_adder = 0
 526     best_one_based = True
 527     best_fit = float("inf")
 528     for one_based in [True, False]:
 529         for multiplier in range(1, 26, 2):
 530             for adder in range(26):
 531                 plaintext = affine_decipher(sanitised_message,
 532                                             multiplier, adder, one_based)
 533                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 534                 fit = metric(target_counts, counts)
 535                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 536                              'gives fit of {3} and decrypt starting: {4}'.
 537                              format(multiplier, adder, one_based, fit,
 538                                     plaintext[:50]))
 539                 if fit < best_fit:
 540                     best_fit = fit
 541                     best_multiplier = multiplier
 542                     best_adder = adder
 543                     best_one_based = one_based
 544     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 545                 'and decrypt starting: {4}'.format(
 546                     best_multiplier, best_adder, best_one_based, best_fit,
 547                     affine_decipher(sanitised_message, best_multiplier,
 548                         best_adder, best_one_based)[:50]))
 549     return (best_multiplier, best_adder, best_one_based), best_fit
 550
 551 def keyword_break(message,
 552                   wordlist=keywords,
 553                   metric=norms.euclidean_distance,
 554                   target_counts=normalised_english_counts,
 555                   message_frequency_scaling=norms.normalise):
 556     """Breaks a keyword substitution cipher using a dictionary and
 557     frequency analysis
 558
 559     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 560           'keyword decipherment', 'elephant', 1), \
 561           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 562     (('elephant', 1), 0.41643991598441...)
 563     """
 564     best_keyword = ''
 565     best_wrap_alphabet = True
 566     best_fit = float("inf")
 567     for wrap_alphabet in range(3):
 568         for keyword in wordlist:
 569             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 570             counts = message_frequency_scaling(letter_frequencies(plaintext))
 571             fit = metric(target_counts, counts)
 572             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 573                          'gives fit of {2} and decrypt starting: {3}'.format(
 574                              keyword, wrap_alphabet, fit,
 575                              sanitise(plaintext)[:50]))
 576             if fit < best_fit:
 577                 best_fit = fit
 578                 best_keyword = keyword
 579                 best_wrap_alphabet = wrap_alphabet
 580     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 581                 '{2} and decrypt starting: {3}'.format(best_keyword,
 582                     best_wrap_alphabet, best_fit, sanitise(
 583                         keyword_decipher(message, best_keyword,
 584                                          best_wrap_alphabet))[:50]))
 585     return (best_keyword, best_wrap_alphabet), best_fit
 586
 587 def keyword_break_mp(message,
 588                      wordlist=keywords,
 589                      metric=norms.euclidean_distance,
 590                      target_counts=normalised_english_counts,
 591                      message_frequency_scaling=norms.normalise,
 592                      chunksize=500):
 593     """Breaks a keyword substitution cipher using a dictionary and
 594     frequency analysis
 595
 596     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 597           'keyword decipherment', 'elephant', 1), \
 598           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 599     (('elephant', 1), 0.41643991598441...)
 600     """
 601     with Pool() as pool:
 602         helper_args = [(message, word, wrap, metric, target_counts,
 603                         message_frequency_scaling)
 604                        for word in wordlist for wrap in range(3)]
 605         # Gotcha: the helper function here needs to be defined at the top level
 606         #   (limitation of Pool.starmap)
 607         breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
 608         return min(breaks, key=lambda k: k[1])
 609
 610 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 611                       message_frequency_scaling):
 612     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 613     counts = message_frequency_scaling(letter_frequencies(plaintext))
 614     fit = metric(target_counts, counts)
 615     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 616                  '{2} and decrypt starting: {3}'.format(keyword,
 617                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 618     return (keyword, wrap_alphabet), fit
 619
 620 def scytale_break(message,
 621                   metric=norms.euclidean_distance,
 622                   target_counts=normalised_english_bigram_counts,
 623                   message_frequency_scaling=norms.normalise):
 624     """Breaks a Scytale cipher
 625
 626     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 627            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 628            'aek') # doctest: +ELLIPSIS
 629     (6, 0.83453041115025...)
 630     """
 631     best_key = 0
 632     best_fit = float("inf")
 633     for key in range(1, 20):
 634         if len(message) % key == 0:
 635             plaintext = scytale_decipher(message, key)
 636             counts = message_frequency_scaling(frequencies(
 637                          ngrams(sanitise(plaintext), 2)))
 638             fit = metric(target_counts, counts)
 639             logger.debug('Scytale break attempt using key {0} gives fit of '
 640                          '{1} and decrypt starting: {2}'.format(key,
 641                              fit, sanitise(plaintext)[:50]))
 642             if fit < best_fit:
 643                 best_fit = fit
 644                 best_key = key
 645     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 646                 'decrypt starting: {2}'.format(best_key, best_fit,
 647                     sanitise(scytale_decipher(message, best_key))[:50]))
 648     return best_key, best_fit
 649
 650
 651 if __name__ == "__main__":
 652     import doctest
 653     doctest.testmod()