cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest, repeat
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 english_trigram_counts = collections.defaultdict(int)
  38 with open('count_3l.txt', 'r') as f:
  39     for line in f:
  40         (trigram, count) = line.split("\t")
  41         english_trigram_counts[trigram] = int(count)
  42 normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
  43
  44
  45 with open('words.txt', 'r') as f:
  46     keywords = [line.rstrip() for line in f]
  47
  48 modular_division_table = [[0]*26 for x in range(26)]
  49 for a in range(26):
  50     for b in range(26):
  51         c = (a * b) % 26
  52         modular_division_table[b][c] = a
  53
  54
  55 def sanitise(text):
  56     """Remove all non-alphabetic characters and convert the text to lowercase
  57
  58     >>> sanitise('The Quick')
  59     'thequick'
  60     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  61     'thequickbrownfoxjumpedoverthelazydog'
  62     """
  63     sanitised = [c.lower() for c in text if c in string.ascii_letters]
  64     return ''.join(sanitised)
  65
  66 def ngrams(text, n):
  67     """Returns all n-grams of a text
  68
  69     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  70     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  71      'nf', 'fo', 'ox']
  72     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  73     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  74      'rown', 'ownf', 'wnfo', 'nfox']
  75     """
  76     return [text[i:i+n] for i in range(len(text)-n+1)]
  77
  78 def every_nth(text, n, fillvalue=''):
  79     """Returns n strings, each of which consists of every nth character,
  80     starting with the 0th, 1st, 2nd, ... (n-1)th character
  81
  82     >>> every_nth(string.ascii_lowercase, 5)
  83     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  84     >>> every_nth(string.ascii_lowercase, 1)
  85     ['abcdefghijklmnopqrstuvwxyz']
  86     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  87     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  88      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  89     >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
  90     ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
  91     """
  92     split_text = [text[i:i+n] for i in range(0, len(text), n)]
  93     return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
  94
  95 def combine_every_nth(split_text):
  96     """Reforms a text split into every_nth strings
  97
  98     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
  99     'abcdefghijklmnopqrstuvwxyz'
 100     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
 101     'abcdefghijklmnopqrstuvwxyz'
 102     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
 103     'abcdefghijklmnopqrstuvwxyz'
 104     """
 105     return ''.join([''.join(l)
 106                     for l in zip_longest(*split_text, fillvalue='')])
 107
 108 def transpose(items, transposition):
 109     """Moves items around according to the given transposition
 110
 111     >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
 112     ['a', 'b', 'c', 'd']
 113     >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
 114     ['d', 'b', 'c', 'a']
 115     >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
 116     [13, 12, 14, 11, 15, 10]
 117     """
 118     transposed = list(repeat('', len(transposition)))
 119     for p, t in enumerate(transposition):
 120        transposed[p] = items[t]
 121     return transposed
 122
 123 def untranspose(items, transposition):
 124     """Undoes a transpose
 125
 126     >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
 127     ['a', 'b', 'c', 'd']
 128     >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
 129     ['a', 'b', 'c', 'd']
 130     >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
 131     [10, 11, 12, 13, 14, 15]
 132     """
 133     transposed  = list(repeat('', len(transposition)))
 134     for p, t in enumerate(transposition):
 135        transposed[t] = items[p]
 136     return transposed
 137
 138
 139 def frequencies(text):
 140     """Count the number of occurrences of each character in text
 141
 142     >>> sorted(frequencies('abcdefabc').items())
 143     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 144     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 145          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 146     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 147      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 148      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 149      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 150     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 151          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 152     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 153      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 154      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 155      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 156      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 157     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 158          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 159     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 160      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 161      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 162      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 163     """
 164     counts = collections.defaultdict(int)
 165     for c in text:
 166         counts[c] += 1
 167     return counts
 168 letter_frequencies = frequencies
 169
 170 def deduplicate(text):
 171     return list(collections.OrderedDict.fromkeys(text))
 172
 173
 174
 175 def caesar_encipher_letter(letter, shift):
 176     """Encipher a letter, given a shift amount
 177
 178     >>> caesar_encipher_letter('a', 1)
 179     'b'
 180     >>> caesar_encipher_letter('a', 2)
 181     'c'
 182     >>> caesar_encipher_letter('b', 2)
 183     'd'
 184     >>> caesar_encipher_letter('x', 2)
 185     'z'
 186     >>> caesar_encipher_letter('y', 2)
 187     'a'
 188     >>> caesar_encipher_letter('z', 2)
 189     'b'
 190     >>> caesar_encipher_letter('z', -1)
 191     'y'
 192     >>> caesar_encipher_letter('a', -1)
 193     'z'
 194     """
 195     if letter in string.ascii_letters:
 196         if letter in string.ascii_uppercase:
 197             alphabet_start = ord('A')
 198         else:
 199             alphabet_start = ord('a')
 200         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 201                    alphabet_start)
 202     else:
 203         return letter
 204
 205 def caesar_decipher_letter(letter, shift):
 206     """Decipher a letter, given a shift amount
 207
 208     >>> caesar_decipher_letter('b', 1)
 209     'a'
 210     >>> caesar_decipher_letter('b', 2)
 211     'z'
 212     """
 213     return caesar_encipher_letter(letter, -shift)
 214
 215 def caesar_encipher(message, shift):
 216     """Encipher a message with the Caesar cipher of given shift
 217
 218     >>> caesar_encipher('abc', 1)
 219     'bcd'
 220     >>> caesar_encipher('abc', 2)
 221     'cde'
 222     >>> caesar_encipher('abcxyz', 2)
 223     'cdezab'
 224     >>> caesar_encipher('ab cx yz', 2)
 225     'cd ez ab'
 226     """
 227     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 228     return ''.join(enciphered)
 229
 230 def caesar_decipher(message, shift):
 231     """Encipher a message with the Caesar cipher of given shift
 232
 233     >>> caesar_decipher('bcd', 1)
 234     'abc'
 235     >>> caesar_decipher('cde', 2)
 236     'abc'
 237     >>> caesar_decipher('cd ez ab', 2)
 238     'ab cx yz'
 239     """
 240     return caesar_encipher(message, -shift)
 241
 242 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 243     """Encipher a letter, given a multiplier and adder
 244
 245     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 246             for l in string.ascii_uppercase])
 247     'HKNQTWZCFILORUXADGJMPSVYBE'
 248     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 249             for l in string.ascii_uppercase])
 250     'FILORUXADGJMPSVYBEHKNQTWZC'
 251     """
 252     if letter in string.ascii_letters:
 253         if letter in string.ascii_uppercase:
 254             alphabet_start = ord('A')
 255         else:
 256             alphabet_start = ord('a')
 257         letter_number = ord(letter) - alphabet_start
 258         if one_based: letter_number += 1
 259         cipher_number = (letter_number * multiplier + adder) % 26
 260         if one_based: cipher_number -= 1
 261         return chr(cipher_number % 26 + alphabet_start)
 262     else:
 263         return letter
 264
 265 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 266     """Encipher a letter, given a multiplier and adder
 267
 268     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 269             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 270     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 271     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 272             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 273     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 274     """
 275     if letter in string.ascii_letters:
 276         if letter in string.ascii_uppercase:
 277             alphabet_start = ord('A')
 278         else:
 279             alphabet_start = ord('a')
 280         cipher_number = ord(letter) - alphabet_start
 281         if one_based: cipher_number += 1
 282         plaintext_number = ( modular_division_table[multiplier]
 283                                                    [(cipher_number - adder) % 26] )
 284         if one_based: plaintext_number -= 1
 285         return chr(plaintext_number % 26 + alphabet_start)
 286     else:
 287         return letter
 288
 289 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 290     """Encipher a message
 291
 292     >>> affine_encipher('hours passed during which jerico tried every ' \
 293            'trick he could think of', 15, 22, True)
 294     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 295     """
 296     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 297                   for l in message]
 298     return ''.join(enciphered)
 299
 300 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 301     """Decipher a message
 302
 303     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 304            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 305     'hours passed during which jerico tried every trick he could think of'
 306     """
 307     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 308                   for l in message]
 309     return ''.join(enciphered)
 310
 311
 312 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 313     """Find the cipher alphabet given a keyword.
 314     wrap_alphabet controls how the rest of the alphabet is added
 315     after the keyword.
 316     0 : from 'a'
 317     1 : from the last letter in the sanitised keyword
 318     2 : from the largest letter in the sanitised keyword
 319
 320     >>> keyword_cipher_alphabet_of('bayes')
 321     'bayescdfghijklmnopqrtuvwxz'
 322     >>> keyword_cipher_alphabet_of('bayes', 0)
 323     'bayescdfghijklmnopqrtuvwxz'
 324     >>> keyword_cipher_alphabet_of('bayes', 1)
 325     'bayestuvwxzcdfghijklmnopqr'
 326     >>> keyword_cipher_alphabet_of('bayes', 2)
 327     'bayeszcdfghijklmnopqrtuvwx'
 328     """
 329     if wrap_alphabet == 0:
 330         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 331                                               string.ascii_lowercase))
 332     else:
 333         if wrap_alphabet == 1:
 334             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 335         else:
 336             last_keyword_letter = sorted(sanitise(keyword))[-1]
 337         last_keyword_position = string.ascii_lowercase.find(
 338             last_keyword_letter) + 1
 339         cipher_alphabet = ''.join(
 340             deduplicate(sanitise(keyword) +
 341                         string.ascii_lowercase[last_keyword_position:] +
 342                         string.ascii_lowercase))
 343     return cipher_alphabet
 344
 345
 346 def keyword_encipher(message, keyword, wrap_alphabet=0):
 347     """Enciphers a message with a keyword substitution cipher.
 348     wrap_alphabet controls how the rest of the alphabet is added
 349     after the keyword.
 350     0 : from 'a'
 351     1 : from the last letter in the sanitised keyword
 352     2 : from the largest letter in the sanitised keyword
 353
 354     >>> keyword_encipher('test message', 'bayes')
 355     'rsqr ksqqbds'
 356     >>> keyword_encipher('test message', 'bayes', 0)
 357     'rsqr ksqqbds'
 358     >>> keyword_encipher('test message', 'bayes', 1)
 359     'lskl dskkbus'
 360     >>> keyword_encipher('test message', 'bayes', 2)
 361     'qspq jsppbcs'
 362     """
 363     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 364     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 365     return message.lower().translate(cipher_translation)
 366
 367 def keyword_decipher(message, keyword, wrap_alphabet=0):
 368     """Deciphers a message with a keyword substitution cipher.
 369     wrap_alphabet controls how the rest of the alphabet is added
 370     after the keyword.
 371     0 : from 'a'
 372     1 : from the last letter in the sanitised keyword
 373     2 : from the largest letter in the sanitised keyword
 374
 375     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 376     'test message'
 377     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 378     'test message'
 379     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 380     'test message'
 381     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 382     'test message'
 383     """
 384     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 385     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 386     return message.lower().translate(cipher_translation)
 387
 388 def scytale_encipher(message, rows):
 389     """Enciphers using the scytale transposition cipher.
 390     Message is padded with spaces to allow all rows to be the same length.
 391
 392     >>> scytale_encipher('thequickbrownfox', 3)
 393     'tcnhkfeboqrxuo iw '
 394     >>> scytale_encipher('thequickbrownfox', 4)
 395     'tubnhirfecooqkwx'
 396     >>> scytale_encipher('thequickbrownfox', 5)
 397     'tubn hirf ecoo qkwx '
 398     >>> scytale_encipher('thequickbrownfox', 6)
 399     'tqcrnxhukof eibwo '
 400     >>> scytale_encipher('thequickbrownfox', 7)
 401     'tqcrnx hukof  eibwo  '
 402     """
 403     if len(message) % rows != 0:
 404         message += ' '*(rows - len(message) % rows)
 405     row_length = round(len(message) / rows)
 406     slices = [message[i:i+row_length]
 407               for i in range(0, len(message), row_length)]
 408     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 409
 410 def scytale_decipher(message, rows):
 411     """Deciphers using the scytale transposition cipher.
 412     Assumes the message is padded so that all rows are the same length.
 413
 414     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 415     'thequickbrownfox  '
 416     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 417     'thequickbrownfox'
 418     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 419     'thequickbrownfox    '
 420     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 421     'thequickbrownfox  '
 422     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 423     'thequickbrownfox     '
 424     """
 425     cols = round(len(message) / rows)
 426     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 427     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 428
 429
 430 def transpositions_of(keyword):
 431     """Finds the transpostions given by a keyword. For instance, the keyword
 432     'clever' rearranges to 'celrv', so the first column (0) stays first, the
 433     second column (1) moves to third, the third column (2) moves to second,
 434     and so on.
 435
 436     >>> transpositions_of('clever')
 437     [0, 2, 1, 4, 3]
 438     """
 439     key = deduplicate(keyword)
 440     transpositions = [key.index(l) for l in sorted(key)]
 441     return transpositions
 442
 443 def column_transposition_encipher(message, keyword, fillvalue=' '):
 444     """Enciphers using the column transposition cipher.
 445     Message is padded to allow all rows to be the same length.
 446
 447     >>> column_transposition_encipher('hellothere', 'clever')
 448     'hleolteher'
 449     >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
 450     'hleolthre!e!'
 451     """
 452     return column_transposition_worker(message, keyword, encipher=True,
 453                                        fillvalue=fillvalue)
 454
 455 def column_transposition_decipher(message, keyword, fillvalue=' '):
 456     """Deciphers using the column transposition cipher.
 457     Message is padded to allow all rows to be the same length.
 458
 459     >>> column_transposition_decipher('hleolteher', 'clever')
 460     'hellothere'
 461     >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
 462     'hellothere!!'
 463     """
 464     return column_transposition_worker(message, keyword, encipher=False,
 465                                        fillvalue=fillvalue)
 466
 467 def column_transposition_worker(message, keyword,
 468                                 encipher=True, fillvalue=' '):
 469     """Does the actual work of the column transposition cipher.
 470     Message is padded with spaces to allow all rows to be the same length.
 471
 472     >>> column_transposition_worker('hellothere', 'clever')
 473     'hleolteher'
 474     >>> column_transposition_worker('hellothere', 'clever', encipher=True)
 475     'hleolteher'
 476     >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
 477     'hellothere'
 478     """
 479     transpositions = transpositions_of(keyword)
 480     columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
 481     if encipher:
 482         transposed_columns = transpose(columns, transpositions)
 483     else:
 484         transposed_columns = untranspose(columns, transpositions)
 485     return combine_every_nth(transposed_columns)
 486
 487
 488
 489 def caesar_break(message,
 490                  metric=norms.euclidean_distance,
 491                  target_counts=normalised_english_counts,
 492                  message_frequency_scaling=norms.normalise):
 493     """Breaks a Caesar cipher using frequency analysis
 494
 495     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 496           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 497     (4, 0.31863952890183...)
 498     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 499           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 500     (19, 0.42152901235832...)
 501     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 502           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 503     (13, 0.316029208075451...)
 504     """
 505     sanitised_message = sanitise(message)
 506     best_shift = 0
 507     best_fit = float("inf")
 508     for shift in range(26):
 509         plaintext = caesar_decipher(sanitised_message, shift)
 510         counts = message_frequency_scaling(letter_frequencies(plaintext))
 511         fit = metric(target_counts, counts)
 512         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 513                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 514         if fit < best_fit:
 515             best_fit = fit
 516             best_shift = shift
 517     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 518                 'decrypt starting: {2}'.format(best_shift, best_fit,
 519                     caesar_decipher(sanitised_message, best_shift)[:50]))
 520     return best_shift, best_fit
 521
 522 def affine_break(message,
 523                  metric=norms.euclidean_distance,
 524                  target_counts=normalised_english_counts,
 525                  message_frequency_scaling=norms.normalise):
 526     """Breaks an affine cipher using frequency analysis
 527
 528     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 529           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 530           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 531           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 532           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 533           'clm ckuxj.') # doctest: +ELLIPSIS
 534     ((15, 22, True), 0.23570361818655...)
 535     """
 536     sanitised_message = sanitise(message)
 537     best_multiplier = 0
 538     best_adder = 0
 539     best_one_based = True
 540     best_fit = float("inf")
 541     for one_based in [True, False]:
 542         for multiplier in range(1, 26, 2):
 543             for adder in range(26):
 544                 plaintext = affine_decipher(sanitised_message,
 545                                             multiplier, adder, one_based)
 546                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 547                 fit = metric(target_counts, counts)
 548                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 549                              'gives fit of {3} and decrypt starting: {4}'.
 550                              format(multiplier, adder, one_based, fit,
 551                                     plaintext[:50]))
 552                 if fit < best_fit:
 553                     best_fit = fit
 554                     best_multiplier = multiplier
 555                     best_adder = adder
 556                     best_one_based = one_based
 557     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 558                 'and decrypt starting: {4}'.format(
 559                     best_multiplier, best_adder, best_one_based, best_fit,
 560                     affine_decipher(sanitised_message, best_multiplier,
 561                         best_adder, best_one_based)[:50]))
 562     return (best_multiplier, best_adder, best_one_based), best_fit
 563
 564 def keyword_break(message,
 565                   wordlist=keywords,
 566                   metric=norms.euclidean_distance,
 567                   target_counts=normalised_english_counts,
 568                   message_frequency_scaling=norms.normalise):
 569     """Breaks a keyword substitution cipher using a dictionary and
 570     frequency analysis
 571
 572     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 573           'keyword decipherment', 'elephant', 1), \
 574           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 575     (('elephant', 1), 0.41643991598441...)
 576     """
 577     best_keyword = ''
 578     best_wrap_alphabet = True
 579     best_fit = float("inf")
 580     for wrap_alphabet in range(3):
 581         for keyword in wordlist:
 582             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 583             counts = message_frequency_scaling(letter_frequencies(plaintext))
 584             fit = metric(target_counts, counts)
 585             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 586                          'gives fit of {2} and decrypt starting: {3}'.format(
 587                              keyword, wrap_alphabet, fit,
 588                              sanitise(plaintext)[:50]))
 589             if fit < best_fit:
 590                 best_fit = fit
 591                 best_keyword = keyword
 592                 best_wrap_alphabet = wrap_alphabet
 593     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 594                 '{2} and decrypt starting: {3}'.format(best_keyword,
 595                     best_wrap_alphabet, best_fit, sanitise(
 596                         keyword_decipher(message, best_keyword,
 597                                          best_wrap_alphabet))[:50]))
 598     return (best_keyword, best_wrap_alphabet), best_fit
 599
 600 def keyword_break_mp(message,
 601                      wordlist=keywords,
 602                      metric=norms.euclidean_distance,
 603                      target_counts=normalised_english_counts,
 604                      message_frequency_scaling=norms.normalise,
 605                      chunksize=500):
 606     """Breaks a keyword substitution cipher using a dictionary and
 607     frequency analysis
 608
 609     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 610           'keyword decipherment', 'elephant', 1), \
 611           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 612     (('elephant', 1), 0.41643991598441...)
 613     """
 614     with Pool() as pool:
 615         helper_args = [(message, word, wrap, metric, target_counts,
 616                         message_frequency_scaling)
 617                        for word in wordlist for wrap in range(3)]
 618         # Gotcha: the helper function here needs to be defined at the top level
 619         #   (limitation of Pool.starmap)
 620         breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
 621         return min(breaks, key=lambda k: k[1])
 622
 623 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 624                       message_frequency_scaling):
 625     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 626     counts = message_frequency_scaling(letter_frequencies(plaintext))
 627     fit = metric(target_counts, counts)
 628     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 629                  '{2} and decrypt starting: {3}'.format(keyword,
 630                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 631     return (keyword, wrap_alphabet), fit
 632
 633 def scytale_break(message,
 634                   metric=norms.euclidean_distance,
 635                   target_counts=normalised_english_bigram_counts,
 636                   message_frequency_scaling=norms.normalise):
 637     """Breaks a Scytale cipher
 638
 639     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 640            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 641            'aek') # doctest: +ELLIPSIS
 642     (6, 0.83453041115025...)
 643     """
 644     best_key = 0
 645     best_fit = float("inf")
 646     for key in range(1, 20):
 647         if len(message) % key == 0:
 648             plaintext = scytale_decipher(message, key)
 649             counts = message_frequency_scaling(frequencies(
 650                          ngrams(sanitise(plaintext), 2)))
 651             fit = metric(target_counts, counts)
 652             logger.debug('Scytale break attempt using key {0} gives fit of '
 653                          '{1} and decrypt starting: {2}'.format(key,
 654                              fit, sanitise(plaintext)[:50]))
 655             if fit < best_fit:
 656                 best_fit = fit
 657                 best_key = key
 658     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 659                 'decrypt starting: {2}'.format(best_key, best_fit,
 660                     sanitise(scytale_decipher(message, best_key))[:50]))
 661     return best_key, best_fit
 662
 663 def column_transposition_break(message,
 664                   wordlist=keywords,
 665                   metric=norms.euclidean_distance,
 666                   target_counts=normalised_english_bigram_counts,
 667                   message_frequency_scaling=norms.normalise):
 668     """Breaks a column transposition cipher using a dictionary and
 669     n-gram frequency analysis
 670
 671     >>> column_transposition_break(column_transposition_encipher(sanitise( \
 672         "Turing's homosexuality resulted in a criminal prosecution in 1952, \
 673         when homosexual acts were still illegal in the United Kingdom. "), \
 674         'encipher'), \
 675         wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
 676     ('encipher', 0.898128626285...)
 677     >>> column_transposition_break(column_transposition_encipher(sanitise( \
 678         "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
 679         "when homosexual acts were still illegal in the United Kingdom."), \
 680         'encipher'), \
 681         wordlist=['encipher', 'keyword', 'fourteen'], \
 682         target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
 683     ('encipher', 1.1958792913127...)
 684     """
 685     best_keyword = ''
 686     best_fit = float("inf")
 687     ngram_length = len(next(iter(target_counts.keys())))
 688     for keyword in wordlist:
 689         if len(message) % len(deduplicate(keyword)) == 0:
 690             plaintext = column_transposition_decipher(message, keyword)
 691             counts = message_frequency_scaling(frequencies(
 692                          ngrams(sanitise(plaintext), ngram_length)))
 693             fit = metric(target_counts, counts)
 694             logger.debug('Column transposition break attempt using key {0} '
 695                          'gives fit of {1} and decrypt starting: {2}'.format(
 696                              keyword, fit,
 697                              sanitise(plaintext)[:50]))
 698             if fit < best_fit:
 699                 best_fit = fit
 700                 best_keyword = keyword
 701     logger.info('Column transposition break best fit with key {0} gives fit '
 702                 'of {1} and decrypt starting: {2}'.format(best_keyword,
 703                     best_fit, sanitise(
 704                         column_transposition_decipher(message,
 705                             best_keyword))[:50]))
 706     return best_keyword, best_fit
 707
 708
 709 def column_transposition_break_mp(message,
 710                      wordlist=keywords,
 711                      metric=norms.euclidean_distance,
 712                      target_counts=normalised_english_bigram_counts,
 713                      message_frequency_scaling=norms.normalise,
 714                      chunksize=500):
 715     """Breaks a column transposition cipher using a dictionary and
 716     n-gram frequency analysis
 717
 718     >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
 719         "Turing's homosexuality resulted in a criminal prosecution in 1952, \
 720         when homosexual acts were still illegal in the United Kingdom. "), \
 721         'encipher'), \
 722         wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
 723     ('encipher', 0.898128626285...)
 724     >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
 725         "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
 726         "when homosexual acts were still illegal in the United Kingdom."), \
 727         'encipher'), \
 728         wordlist=['encipher', 'keyword', 'fourteen'], \
 729         target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
 730     ('encipher', 1.1958792913127...)
 731     """
 732     ngram_length = len(next(iter(target_counts.keys())))
 733     with Pool() as pool:
 734         helper_args = [(message, word, metric, target_counts, ngram_length,
 735                         message_frequency_scaling)
 736                        for word in wordlist]
 737         # Gotcha: the helper function here needs to be defined at the top level
 738         #   (limitation of Pool.starmap)
 739         breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize)
 740         return min(breaks, key=lambda k: k[1])
 741
 742 def column_transposition_break_worker(message, keyword, metric, target_counts,
 743                       ngram_length, message_frequency_scaling):
 744     plaintext = column_transposition_decipher(message, keyword)
 745     counts = message_frequency_scaling(frequencies(
 746                          ngrams(sanitise(plaintext), ngram_length)))
 747     fit = metric(target_counts, counts)
 748     logger.debug('Column transposition break attempt using key {0} '
 749                          'gives fit of {1} and decrypt starting: {2}'.format(
 750                              keyword, fit,
 751                              sanitise(plaintext)[:50]))
 752     return keyword, fit
 753
 754
 755
 756 if __name__ == "__main__":
 757     import doctest
 758     doctest.testmod()