cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest, repeat
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 english_trigram_counts = collections.defaultdict(int)
  38 with open('count_3l.txt', 'r') as f:
  39     for line in f:
  40         (trigram, count) = line.split("\t")
  41         english_trigram_counts[trigram] = int(count)
  42 normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
  43
  44
  45 with open('words.txt', 'r') as f:
  46     keywords = [line.rstrip() for line in f]
  47
  48 modular_division_table = [[0]*26 for x in range(26)]
  49 for a in range(26):
  50     for b in range(26):
  51         c = (a * b) % 26
  52         modular_division_table[b][c] = a
  53
  54 def letters(text):
  55     """Remove all non-alphabetic characters from a text
  56     >>> letters('The Quick')
  57     'TheQuick'
  58     >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  59     'TheQuickBROWNfoxjumpedoverthelazyDOG'
  60     """
  61     return ''.join([c for c in text if c in string.ascii_letters])
  62
  63 def sanitise(text):
  64     """Remove all non-alphabetic characters and convert the text to lowercase
  65
  66     >>> sanitise('The Quick')
  67     'thequick'
  68     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  69     'thequickbrownfoxjumpedoverthelazydog'
  70     """
  71     # sanitised = [c.lower() for c in text if c in string.ascii_letters]
  72     # return ''.join(sanitised)
  73     return letters(text).lower()
  74
  75 def ngrams(text, n):
  76     """Returns all n-grams of a text
  77
  78     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  79     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  80      'nf', 'fo', 'ox']
  81     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  82     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  83      'rown', 'ownf', 'wnfo', 'nfox']
  84     """
  85     return [text[i:i+n] for i in range(len(text)-n+1)]
  86
  87 def every_nth(text, n, fillvalue=''):
  88     """Returns n strings, each of which consists of every nth character,
  89     starting with the 0th, 1st, 2nd, ... (n-1)th character
  90
  91     >>> every_nth(string.ascii_lowercase, 5)
  92     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  93     >>> every_nth(string.ascii_lowercase, 1)
  94     ['abcdefghijklmnopqrstuvwxyz']
  95     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  96     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  97      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  98     >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
  99     ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
 100     """
 101     split_text = [text[i:i+n] for i in range(0, len(text), n)]
 102     return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
 103
 104 def combine_every_nth(split_text):
 105     """Reforms a text split into every_nth strings
 106
 107     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
 108     'abcdefghijklmnopqrstuvwxyz'
 109     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
 110     'abcdefghijklmnopqrstuvwxyz'
 111     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
 112     'abcdefghijklmnopqrstuvwxyz'
 113     """
 114     return ''.join([''.join(l)
 115                     for l in zip_longest(*split_text, fillvalue='')])
 116
 117 def transpose(items, transposition):
 118     """Moves items around according to the given transposition
 119
 120     >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
 121     ['a', 'b', 'c', 'd']
 122     >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
 123     ['d', 'b', 'c', 'a']
 124     >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
 125     [13, 12, 14, 11, 15, 10]
 126     """
 127     transposed = list(repeat('', len(transposition)))
 128     for p, t in enumerate(transposition):
 129        transposed[p] = items[t]
 130     return transposed
 131
 132 def untranspose(items, transposition):
 133     """Undoes a transpose
 134
 135     >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
 136     ['a', 'b', 'c', 'd']
 137     >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
 138     ['a', 'b', 'c', 'd']
 139     >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
 140     [10, 11, 12, 13, 14, 15]
 141     """
 142     transposed  = list(repeat('', len(transposition)))
 143     for p, t in enumerate(transposition):
 144        transposed[t] = items[p]
 145     return transposed
 146
 147
 148 def frequencies(text):
 149     """Count the number of occurrences of each character in text
 150
 151     >>> sorted(frequencies('abcdefabc').items())
 152     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 153     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 154          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 155     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 156      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 157      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 158      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 159     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 160          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 161     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 162      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 163      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 164      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 165      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 166     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 167          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 168     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 169      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 170      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 171      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 172     >>> frequencies('abcdefabcdef')['x']
 173     0
 174     """
 175     #counts = collections.defaultdict(int)
 176     #for c in text:
 177     #    counts[c] += 1
 178     #return counts
 179     return collections.Counter(c for c in text)
 180 letter_frequencies = frequencies
 181
 182 def deduplicate(text):
 183     return list(collections.OrderedDict.fromkeys(text))
 184
 185
 186
 187 def caesar_encipher_letter(letter, shift):
 188     """Encipher a letter, given a shift amount
 189
 190     >>> caesar_encipher_letter('a', 1)
 191     'b'
 192     >>> caesar_encipher_letter('a', 2)
 193     'c'
 194     >>> caesar_encipher_letter('b', 2)
 195     'd'
 196     >>> caesar_encipher_letter('x', 2)
 197     'z'
 198     >>> caesar_encipher_letter('y', 2)
 199     'a'
 200     >>> caesar_encipher_letter('z', 2)
 201     'b'
 202     >>> caesar_encipher_letter('z', -1)
 203     'y'
 204     >>> caesar_encipher_letter('a', -1)
 205     'z'
 206     """
 207     if letter in string.ascii_letters:
 208         if letter in string.ascii_uppercase:
 209             alphabet_start = ord('A')
 210         else:
 211             alphabet_start = ord('a')
 212         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 213                    alphabet_start)
 214     else:
 215         return letter
 216
 217 def caesar_decipher_letter(letter, shift):
 218     """Decipher a letter, given a shift amount
 219
 220     >>> caesar_decipher_letter('b', 1)
 221     'a'
 222     >>> caesar_decipher_letter('b', 2)
 223     'z'
 224     """
 225     return caesar_encipher_letter(letter, -shift)
 226
 227 def caesar_encipher(message, shift):
 228     """Encipher a message with the Caesar cipher of given shift
 229
 230     >>> caesar_encipher('abc', 1)
 231     'bcd'
 232     >>> caesar_encipher('abc', 2)
 233     'cde'
 234     >>> caesar_encipher('abcxyz', 2)
 235     'cdezab'
 236     >>> caesar_encipher('ab cx yz', 2)
 237     'cd ez ab'
 238     """
 239     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 240     return ''.join(enciphered)
 241
 242 def caesar_decipher(message, shift):
 243     """Encipher a message with the Caesar cipher of given shift
 244
 245     >>> caesar_decipher('bcd', 1)
 246     'abc'
 247     >>> caesar_decipher('cde', 2)
 248     'abc'
 249     >>> caesar_decipher('cd ez ab', 2)
 250     'ab cx yz'
 251     """
 252     return caesar_encipher(message, -shift)
 253
 254 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 255     """Encipher a letter, given a multiplier and adder
 256
 257     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 258             for l in string.ascii_uppercase])
 259     'HKNQTWZCFILORUXADGJMPSVYBE'
 260     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 261             for l in string.ascii_uppercase])
 262     'FILORUXADGJMPSVYBEHKNQTWZC'
 263     """
 264     if letter in string.ascii_letters:
 265         if letter in string.ascii_uppercase:
 266             alphabet_start = ord('A')
 267         else:
 268             alphabet_start = ord('a')
 269         letter_number = ord(letter) - alphabet_start
 270         if one_based: letter_number += 1
 271         cipher_number = (letter_number * multiplier + adder) % 26
 272         if one_based: cipher_number -= 1
 273         return chr(cipher_number % 26 + alphabet_start)
 274     else:
 275         return letter
 276
 277 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 278     """Encipher a letter, given a multiplier and adder
 279
 280     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 281             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 282     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 283     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 284             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 285     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 286     """
 287     if letter in string.ascii_letters:
 288         if letter in string.ascii_uppercase:
 289             alphabet_start = ord('A')
 290         else:
 291             alphabet_start = ord('a')
 292         cipher_number = ord(letter) - alphabet_start
 293         if one_based: cipher_number += 1
 294         plaintext_number = ( modular_division_table[multiplier]
 295                                                    [(cipher_number - adder) % 26] )
 296         if one_based: plaintext_number -= 1
 297         return chr(plaintext_number % 26 + alphabet_start)
 298     else:
 299         return letter
 300
 301 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 302     """Encipher a message
 303
 304     >>> affine_encipher('hours passed during which jerico tried every ' \
 305            'trick he could think of', 15, 22, True)
 306     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 307     """
 308     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 309                   for l in message]
 310     return ''.join(enciphered)
 311
 312 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 313     """Decipher a message
 314
 315     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 316            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 317     'hours passed during which jerico tried every trick he could think of'
 318     """
 319     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 320                   for l in message]
 321     return ''.join(enciphered)
 322
 323
 324 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 325     """Find the cipher alphabet given a keyword.
 326     wrap_alphabet controls how the rest of the alphabet is added
 327     after the keyword.
 328     0 : from 'a'
 329     1 : from the last letter in the sanitised keyword
 330     2 : from the largest letter in the sanitised keyword
 331
 332     >>> keyword_cipher_alphabet_of('bayes')
 333     'bayescdfghijklmnopqrtuvwxz'
 334     >>> keyword_cipher_alphabet_of('bayes', 0)
 335     'bayescdfghijklmnopqrtuvwxz'
 336     >>> keyword_cipher_alphabet_of('bayes', 1)
 337     'bayestuvwxzcdfghijklmnopqr'
 338     >>> keyword_cipher_alphabet_of('bayes', 2)
 339     'bayeszcdfghijklmnopqrtuvwx'
 340     """
 341     if wrap_alphabet == 0:
 342         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 343                                               string.ascii_lowercase))
 344     else:
 345         if wrap_alphabet == 1:
 346             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 347         else:
 348             last_keyword_letter = sorted(sanitise(keyword))[-1]
 349         last_keyword_position = string.ascii_lowercase.find(
 350             last_keyword_letter) + 1
 351         cipher_alphabet = ''.join(
 352             deduplicate(sanitise(keyword) +
 353                         string.ascii_lowercase[last_keyword_position:] +
 354                         string.ascii_lowercase))
 355     return cipher_alphabet
 356
 357
 358 def keyword_encipher(message, keyword, wrap_alphabet=0):
 359     """Enciphers a message with a keyword substitution cipher.
 360     wrap_alphabet controls how the rest of the alphabet is added
 361     after the keyword.
 362     0 : from 'a'
 363     1 : from the last letter in the sanitised keyword
 364     2 : from the largest letter in the sanitised keyword
 365
 366     >>> keyword_encipher('test message', 'bayes')
 367     'rsqr ksqqbds'
 368     >>> keyword_encipher('test message', 'bayes', 0)
 369     'rsqr ksqqbds'
 370     >>> keyword_encipher('test message', 'bayes', 1)
 371     'lskl dskkbus'
 372     >>> keyword_encipher('test message', 'bayes', 2)
 373     'qspq jsppbcs'
 374     """
 375     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 376     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 377     return message.lower().translate(cipher_translation)
 378
 379 def keyword_decipher(message, keyword, wrap_alphabet=0):
 380     """Deciphers a message with a keyword substitution cipher.
 381     wrap_alphabet controls how the rest of the alphabet is added
 382     after the keyword.
 383     0 : from 'a'
 384     1 : from the last letter in the sanitised keyword
 385     2 : from the largest letter in the sanitised keyword
 386
 387     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 388     'test message'
 389     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 390     'test message'
 391     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 392     'test message'
 393     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 394     'test message'
 395     """
 396     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 397     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 398     return message.lower().translate(cipher_translation)
 399
 400 def scytale_encipher(message, rows):
 401     """Enciphers using the scytale transposition cipher.
 402     Message is padded with spaces to allow all rows to be the same length.
 403
 404     >>> scytale_encipher('thequickbrownfox', 3)
 405     'tcnhkfeboqrxuo iw '
 406     >>> scytale_encipher('thequickbrownfox', 4)
 407     'tubnhirfecooqkwx'
 408     >>> scytale_encipher('thequickbrownfox', 5)
 409     'tubn hirf ecoo qkwx '
 410     >>> scytale_encipher('thequickbrownfox', 6)
 411     'tqcrnxhukof eibwo '
 412     >>> scytale_encipher('thequickbrownfox', 7)
 413     'tqcrnx hukof  eibwo  '
 414     """
 415     if len(message) % rows != 0:
 416         message += ' '*(rows - len(message) % rows)
 417     row_length = round(len(message) / rows)
 418     slices = [message[i:i+row_length]
 419               for i in range(0, len(message), row_length)]
 420     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 421
 422 def scytale_decipher(message, rows):
 423     """Deciphers using the scytale transposition cipher.
 424     Assumes the message is padded so that all rows are the same length.
 425
 426     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 427     'thequickbrownfox  '
 428     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 429     'thequickbrownfox'
 430     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 431     'thequickbrownfox    '
 432     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 433     'thequickbrownfox  '
 434     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 435     'thequickbrownfox     '
 436     """
 437     cols = round(len(message) / rows)
 438     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 439     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 440
 441
 442 def transpositions_of(keyword):
 443     """Finds the transpostions given by a keyword. For instance, the keyword
 444     'clever' rearranges to 'celrv', so the first column (0) stays first, the
 445     second column (1) moves to third, the third column (2) moves to second,
 446     and so on.
 447
 448     >>> transpositions_of('clever')
 449     [0, 2, 1, 4, 3]
 450     """
 451     key = deduplicate(keyword)
 452     transpositions = [key.index(l) for l in sorted(key)]
 453     return transpositions
 454
 455 def column_transposition_encipher(message, keyword, fillvalue=' '):
 456     """Enciphers using the column transposition cipher.
 457     Message is padded to allow all rows to be the same length.
 458
 459     >>> column_transposition_encipher('hellothere', 'clever')
 460     'hleolteher'
 461     >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
 462     'hleolthre!e!'
 463     """
 464     return column_transposition_worker(message, keyword, encipher=True,
 465                                        fillvalue=fillvalue)
 466
 467 def column_transposition_decipher(message, keyword, fillvalue=' '):
 468     """Deciphers using the column transposition cipher.
 469     Message is padded to allow all rows to be the same length.
 470
 471     >>> column_transposition_decipher('hleolteher', 'clever')
 472     'hellothere'
 473     >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
 474     'hellothere!!'
 475     """
 476     return column_transposition_worker(message, keyword, encipher=False,
 477                                        fillvalue=fillvalue)
 478
 479 def column_transposition_worker(message, keyword,
 480                                 encipher=True, fillvalue=' '):
 481     """Does the actual work of the column transposition cipher.
 482     Message is padded with spaces to allow all rows to be the same length.
 483
 484     >>> column_transposition_worker('hellothere', 'clever')
 485     'hleolteher'
 486     >>> column_transposition_worker('hellothere', 'clever', encipher=True)
 487     'hleolteher'
 488     >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
 489     'hellothere'
 490     """
 491     transpositions = transpositions_of(keyword)
 492     columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
 493     if encipher:
 494         transposed_columns = transpose(columns, transpositions)
 495     else:
 496         transposed_columns = untranspose(columns, transpositions)
 497     return combine_every_nth(transposed_columns)
 498
 499
 500
 501 def caesar_break(message,
 502                  metric=norms.euclidean_distance,
 503                  target_counts=normalised_english_counts,
 504                  message_frequency_scaling=norms.normalise):
 505     """Breaks a Caesar cipher using frequency analysis
 506
 507     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 508           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 509     (4, 0.31863952890183...)
 510     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 511           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 512     (19, 0.42152901235832...)
 513     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 514           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 515     (13, 0.316029208075451...)
 516     """
 517     sanitised_message = sanitise(message)
 518     best_shift = 0
 519     best_fit = float("inf")
 520     for shift in range(26):
 521         plaintext = caesar_decipher(sanitised_message, shift)
 522         counts = message_frequency_scaling(letter_frequencies(plaintext))
 523         fit = metric(target_counts, counts)
 524         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 525                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 526         if fit < best_fit:
 527             best_fit = fit
 528             best_shift = shift
 529     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 530                 'decrypt starting: {2}'.format(best_shift, best_fit,
 531                     caesar_decipher(sanitised_message, best_shift)[:50]))
 532     return best_shift, best_fit
 533
 534 def affine_break(message,
 535                  metric=norms.euclidean_distance,
 536                  target_counts=normalised_english_counts,
 537                  message_frequency_scaling=norms.normalise):
 538     """Breaks an affine cipher using frequency analysis
 539
 540     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 541           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 542           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 543           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 544           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 545           'clm ckuxj.') # doctest: +ELLIPSIS
 546     ((15, 22, True), 0.23570361818655...)
 547     """
 548     sanitised_message = sanitise(message)
 549     best_multiplier = 0
 550     best_adder = 0
 551     best_one_based = True
 552     best_fit = float("inf")
 553     for one_based in [True, False]:
 554         for multiplier in range(1, 26, 2):
 555             for adder in range(26):
 556                 plaintext = affine_decipher(sanitised_message,
 557                                             multiplier, adder, one_based)
 558                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 559                 fit = metric(target_counts, counts)
 560                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 561                              'gives fit of {3} and decrypt starting: {4}'.
 562                              format(multiplier, adder, one_based, fit,
 563                                     plaintext[:50]))
 564                 if fit < best_fit:
 565                     best_fit = fit
 566                     best_multiplier = multiplier
 567                     best_adder = adder
 568                     best_one_based = one_based
 569     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 570                 'and decrypt starting: {4}'.format(
 571                     best_multiplier, best_adder, best_one_based, best_fit,
 572                     affine_decipher(sanitised_message, best_multiplier,
 573                         best_adder, best_one_based)[:50]))
 574     return (best_multiplier, best_adder, best_one_based), best_fit
 575
 576 def keyword_break(message,
 577                   wordlist=keywords,
 578                   metric=norms.euclidean_distance,
 579                   target_counts=normalised_english_counts,
 580                   message_frequency_scaling=norms.normalise):
 581     """Breaks a keyword substitution cipher using a dictionary and
 582     frequency analysis
 583
 584     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 585           'keyword decipherment', 'elephant', 1), \
 586           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 587     (('elephant', 1), 0.41643991598441...)
 588     """
 589     best_keyword = ''
 590     best_wrap_alphabet = True
 591     best_fit = float("inf")
 592     for wrap_alphabet in range(3):
 593         for keyword in wordlist:
 594             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 595             counts = message_frequency_scaling(letter_frequencies(plaintext))
 596             fit = metric(target_counts, counts)
 597             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 598                          'gives fit of {2} and decrypt starting: {3}'.format(
 599                              keyword, wrap_alphabet, fit,
 600                              sanitise(plaintext)[:50]))
 601             if fit < best_fit:
 602                 best_fit = fit
 603                 best_keyword = keyword
 604                 best_wrap_alphabet = wrap_alphabet
 605     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 606                 '{2} and decrypt starting: {3}'.format(best_keyword,
 607                     best_wrap_alphabet, best_fit, sanitise(
 608                         keyword_decipher(message, best_keyword,
 609                                          best_wrap_alphabet))[:50]))
 610     return (best_keyword, best_wrap_alphabet), best_fit
 611
 612 def keyword_break_mp(message,
 613                      wordlist=keywords,
 614                      metric=norms.euclidean_distance,
 615                      target_counts=normalised_english_counts,
 616                      message_frequency_scaling=norms.normalise,
 617                      chunksize=500):
 618     """Breaks a keyword substitution cipher using a dictionary and
 619     frequency analysis
 620
 621     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 622           'keyword decipherment', 'elephant', 1), \
 623           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 624     (('elephant', 1), 0.41643991598441...)
 625     """
 626     with Pool() as pool:
 627         helper_args = [(message, word, wrap, metric, target_counts,
 628                         message_frequency_scaling)
 629                        for word in wordlist for wrap in range(3)]
 630         # Gotcha: the helper function here needs to be defined at the top level
 631         #   (limitation of Pool.starmap)
 632         breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
 633         return min(breaks, key=lambda k: k[1])
 634
 635 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 636                       message_frequency_scaling):
 637     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 638     counts = message_frequency_scaling(letter_frequencies(plaintext))
 639     fit = metric(target_counts, counts)
 640     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 641                  '{2} and decrypt starting: {3}'.format(keyword,
 642                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 643     return (keyword, wrap_alphabet), fit
 644
 645 def scytale_break(message,
 646                   metric=norms.euclidean_distance,
 647                   target_counts=normalised_english_bigram_counts,
 648                   message_frequency_scaling=norms.normalise):
 649     """Breaks a Scytale cipher
 650
 651     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 652            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 653            'aek') # doctest: +ELLIPSIS
 654     (6, 0.83453041115025...)
 655     """
 656     best_key = 0
 657     best_fit = float("inf")
 658     for key in range(1, 20):
 659         if len(message) % key == 0:
 660             plaintext = scytale_decipher(message, key)
 661             counts = message_frequency_scaling(frequencies(
 662                          ngrams(sanitise(plaintext), 2)))
 663             fit = metric(target_counts, counts)
 664             logger.debug('Scytale break attempt using key {0} gives fit of '
 665                          '{1} and decrypt starting: {2}'.format(key,
 666                              fit, sanitise(plaintext)[:50]))
 667             if fit < best_fit:
 668                 best_fit = fit
 669                 best_key = key
 670     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 671                 'decrypt starting: {2}'.format(best_key, best_fit,
 672                     sanitise(scytale_decipher(message, best_key))[:50]))
 673     return best_key, best_fit
 674
 675 def column_transposition_break(message,
 676                   wordlist=keywords,
 677                   metric=norms.euclidean_distance,
 678                   target_counts=normalised_english_bigram_counts,
 679                   message_frequency_scaling=norms.normalise):
 680     """Breaks a column transposition cipher using a dictionary and
 681     n-gram frequency analysis
 682
 683     >>> column_transposition_break(column_transposition_encipher(sanitise( \
 684         "Turing's homosexuality resulted in a criminal prosecution in 1952, \
 685         when homosexual acts were still illegal in the United Kingdom. "), \
 686         'encipher'), \
 687         wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
 688     ('encipher', 0.898128626285...)
 689     >>> column_transposition_break(column_transposition_encipher(sanitise( \
 690         "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
 691         "when homosexual acts were still illegal in the United Kingdom."), \
 692         'encipher'), \
 693         wordlist=['encipher', 'keyword', 'fourteen'], \
 694         target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
 695     ('encipher', 1.1958792913127...)
 696     """
 697     best_keyword = ''
 698     best_fit = float("inf")
 699     ngram_length = len(next(iter(target_counts.keys())))
 700     for keyword in wordlist:
 701         if len(message) % len(deduplicate(keyword)) == 0:
 702             plaintext = column_transposition_decipher(message, keyword)
 703             counts = message_frequency_scaling(frequencies(
 704                          ngrams(sanitise(plaintext), ngram_length)))
 705             fit = metric(target_counts, counts)
 706             logger.debug('Column transposition break attempt using key {0} '
 707                          'gives fit of {1} and decrypt starting: {2}'.format(
 708                              keyword, fit,
 709                              sanitise(plaintext)[:50]))
 710             if fit < best_fit:
 711                 best_fit = fit
 712                 best_keyword = keyword
 713     logger.info('Column transposition break best fit with key {0} gives fit '
 714                 'of {1} and decrypt starting: {2}'.format(best_keyword,
 715                     best_fit, sanitise(
 716                         column_transposition_decipher(message,
 717                             best_keyword))[:50]))
 718     return best_keyword, best_fit
 719
 720
 721 def column_transposition_break_mp(message,
 722                      wordlist=keywords,
 723                      metric=norms.euclidean_distance,
 724                      target_counts=normalised_english_bigram_counts,
 725                      message_frequency_scaling=norms.normalise,
 726                      chunksize=500):
 727     """Breaks a column transposition cipher using a dictionary and
 728     n-gram frequency analysis
 729
 730     >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
 731         "Turing's homosexuality resulted in a criminal prosecution in 1952, \
 732         when homosexual acts were still illegal in the United Kingdom. "), \
 733         'encipher'), \
 734         wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
 735     ('encipher', 0.898128626285...)
 736     >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
 737         "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
 738         "when homosexual acts were still illegal in the United Kingdom."), \
 739         'encipher'), \
 740         wordlist=['encipher', 'keyword', 'fourteen'], \
 741         target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
 742     ('encipher', 1.1958792913127...)
 743     """
 744     ngram_length = len(next(iter(target_counts.keys())))
 745     with Pool() as pool:
 746         helper_args = [(message, word, metric, target_counts, ngram_length,
 747                         message_frequency_scaling)
 748                        for word in wordlist]
 749         # Gotcha: the helper function here needs to be defined at the top level
 750         #   (limitation of Pool.starmap)
 751         breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize)
 752         return min(breaks, key=lambda k: k[1])
 753
 754 def column_transposition_break_worker(message, keyword, metric, target_counts,
 755                       ngram_length, message_frequency_scaling):
 756     plaintext = column_transposition_decipher(message, keyword)
 757     counts = message_frequency_scaling(frequencies(
 758                          ngrams(sanitise(plaintext), ngram_length)))
 759     fit = metric(target_counts, counts)
 760     logger.debug('Column transposition break attempt using key {0} '
 761                          'gives fit of {1} and decrypt starting: {2}'.format(
 762                              keyword, fit,
 763                              sanitise(plaintext)[:50]))
 764     return keyword, fit
 765
 766
 767
 768 if __name__ == "__main__":
 769     import doctest
 770     doctest.testmod()