cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest, repeat
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 english_trigram_counts = collections.defaultdict(int)
  38 with open('count_3l.txt', 'r') as f:
  39     for line in f:
  40         (trigram, count) = line.split("\t")
  41         english_trigram_counts[trigram] = int(count)
  42 normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
  43
  44
  45 with open('words.txt', 'r') as f:
  46     keywords = [line.rstrip() for line in f]
  47
  48 modular_division_table = [[0]*26 for x in range(26)]
  49 for a in range(26):
  50     for b in range(26):
  51         c = (a * b) % 26
  52         modular_division_table[b][c] = a
  53
  54 def letters(text):
  55     """Remove all non-alphabetic characters from a text
  56     >>> letters('The Quick')
  57     'TheQuick'
  58     >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  59     'TheQuickBROWNfoxjumpedoverthelazyDOG'
  60     """
  61     return ''.join([c for c in text if c in string.ascii_letters])
  62
  63 def sanitise(text):
  64     """Remove all non-alphabetic characters and convert the text to lowercase
  65
  66     >>> sanitise('The Quick')
  67     'thequick'
  68     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  69     'thequickbrownfoxjumpedoverthelazydog'
  70     """
  71     # sanitised = [c.lower() for c in text if c in string.ascii_letters]
  72     # return ''.join(sanitised)
  73     return letters(text).lower()
  74
  75 def ngrams(text, n):
  76     """Returns all n-grams of a text
  77
  78     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  79     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  80      'nf', 'fo', 'ox']
  81     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  82     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  83      'rown', 'ownf', 'wnfo', 'nfox']
  84     """
  85     return [text[i:i+n] for i in range(len(text)-n+1)]
  86
  87 def every_nth(text, n, fillvalue=''):
  88     """Returns n strings, each of which consists of every nth character,
  89     starting with the 0th, 1st, 2nd, ... (n-1)th character
  90
  91     >>> every_nth(string.ascii_lowercase, 5)
  92     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  93     >>> every_nth(string.ascii_lowercase, 1)
  94     ['abcdefghijklmnopqrstuvwxyz']
  95     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  96     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  97      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  98     >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
  99     ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
 100     """
 101     split_text = [text[i:i+n] for i in range(0, len(text), n)]
 102     return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
 103
 104 def combine_every_nth(split_text):
 105     """Reforms a text split into every_nth strings
 106
 107     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
 108     'abcdefghijklmnopqrstuvwxyz'
 109     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
 110     'abcdefghijklmnopqrstuvwxyz'
 111     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
 112     'abcdefghijklmnopqrstuvwxyz'
 113     """
 114     return ''.join([''.join(l)
 115                     for l in zip_longest(*split_text, fillvalue='')])
 116
 117 def transpose(items, transposition):
 118     """Moves items around according to the given transposition
 119
 120     >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
 121     ['a', 'b', 'c', 'd']
 122     >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
 123     ['d', 'b', 'c', 'a']
 124     >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
 125     [13, 12, 14, 11, 15, 10]
 126     """
 127     transposed = list(repeat('', len(transposition)))
 128     for p, t in enumerate(transposition):
 129        transposed[p] = items[t]
 130     return transposed
 131
 132 def untranspose(items, transposition):
 133     """Undoes a transpose
 134
 135     >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
 136     ['a', 'b', 'c', 'd']
 137     >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
 138     ['a', 'b', 'c', 'd']
 139     >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
 140     [10, 11, 12, 13, 14, 15]
 141     """
 142     transposed  = list(repeat('', len(transposition)))
 143     for p, t in enumerate(transposition):
 144        transposed[t] = items[p]
 145     return transposed
 146
 147
 148 def frequencies(text):
 149     """Count the number of occurrences of each character in text
 150
 151     >>> sorted(frequencies('abcdefabc').items())
 152     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 153     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 154          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 155     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 156      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 157      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 158      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 159     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 160          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 161     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 162      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 163      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 164      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 165      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 166     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 167          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 168     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 169      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 170      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 171      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 172     >>> frequencies('abcdefabcdef')['x']
 173     0
 174     """
 175     #counts = collections.defaultdict(int)
 176     #for c in text:
 177     #    counts[c] += 1
 178     #return counts
 179     return collections.Counter(c for c in text)
 180 letter_frequencies = frequencies
 181
 182 def deduplicate(text):
 183     return list(collections.OrderedDict.fromkeys(text))
 184
 185
 186
 187 def caesar_encipher_letter(letter, shift):
 188     """Encipher a letter, given a shift amount
 189
 190     >>> caesar_encipher_letter('a', 1)
 191     'b'
 192     >>> caesar_encipher_letter('a', 2)
 193     'c'
 194     >>> caesar_encipher_letter('b', 2)
 195     'd'
 196     >>> caesar_encipher_letter('x', 2)
 197     'z'
 198     >>> caesar_encipher_letter('y', 2)
 199     'a'
 200     >>> caesar_encipher_letter('z', 2)
 201     'b'
 202     >>> caesar_encipher_letter('z', -1)
 203     'y'
 204     >>> caesar_encipher_letter('a', -1)
 205     'z'
 206     """
 207     if letter in string.ascii_letters:
 208         if letter in string.ascii_uppercase:
 209             alphabet_start = ord('A')
 210         else:
 211             alphabet_start = ord('a')
 212         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 213                    alphabet_start)
 214     else:
 215         return letter
 216
 217 def caesar_decipher_letter(letter, shift):
 218     """Decipher a letter, given a shift amount
 219
 220     >>> caesar_decipher_letter('b', 1)
 221     'a'
 222     >>> caesar_decipher_letter('b', 2)
 223     'z'
 224     """
 225     return caesar_encipher_letter(letter, -shift)
 226
 227 def caesar_encipher(message, shift):
 228     """Encipher a message with the Caesar cipher of given shift
 229
 230     >>> caesar_encipher('abc', 1)
 231     'bcd'
 232     >>> caesar_encipher('abc', 2)
 233     'cde'
 234     >>> caesar_encipher('abcxyz', 2)
 235     'cdezab'
 236     >>> caesar_encipher('ab cx yz', 2)
 237     'cd ez ab'
 238     """
 239     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 240     return ''.join(enciphered)
 241
 242 def caesar_decipher(message, shift):
 243     """Encipher a message with the Caesar cipher of given shift
 244
 245     >>> caesar_decipher('bcd', 1)
 246     'abc'
 247     >>> caesar_decipher('cde', 2)
 248     'abc'
 249     >>> caesar_decipher('cd ez ab', 2)
 250     'ab cx yz'
 251     """
 252     return caesar_encipher(message, -shift)
 253
 254 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 255     """Encipher a letter, given a multiplier and adder
 256
 257     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 258             for l in string.ascii_uppercase])
 259     'HKNQTWZCFILORUXADGJMPSVYBE'
 260     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 261             for l in string.ascii_uppercase])
 262     'FILORUXADGJMPSVYBEHKNQTWZC'
 263     """
 264     if letter in string.ascii_letters:
 265         if letter in string.ascii_uppercase:
 266             alphabet_start = ord('A')
 267         else:
 268             alphabet_start = ord('a')
 269         letter_number = ord(letter) - alphabet_start
 270         if one_based: letter_number += 1
 271         cipher_number = (letter_number * multiplier + adder) % 26
 272         if one_based: cipher_number -= 1
 273         return chr(cipher_number % 26 + alphabet_start)
 274     else:
 275         return letter
 276
 277 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 278     """Encipher a letter, given a multiplier and adder
 279
 280     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 281             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 282     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 283     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 284             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 285     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 286     """
 287     if letter in string.ascii_letters:
 288         if letter in string.ascii_uppercase:
 289             alphabet_start = ord('A')
 290         else:
 291             alphabet_start = ord('a')
 292         cipher_number = ord(letter) - alphabet_start
 293         if one_based: cipher_number += 1
 294         plaintext_number = ( modular_division_table[multiplier]
 295                                                    [(cipher_number - adder) % 26] )
 296         if one_based: plaintext_number -= 1
 297         return chr(plaintext_number % 26 + alphabet_start)
 298     else:
 299         return letter
 300
 301 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 302     """Encipher a message
 303
 304     >>> affine_encipher('hours passed during which jerico tried every ' \
 305            'trick he could think of', 15, 22, True)
 306     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 307     """
 308     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 309                   for l in message]
 310     return ''.join(enciphered)
 311
 312 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 313     """Decipher a message
 314
 315     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 316            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 317     'hours passed during which jerico tried every trick he could think of'
 318     """
 319     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 320                   for l in message]
 321     return ''.join(enciphered)
 322
 323
 324 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 325     """Find the cipher alphabet given a keyword.
 326     wrap_alphabet controls how the rest of the alphabet is added
 327     after the keyword.
 328     0 : from 'a'
 329     1 : from the last letter in the sanitised keyword
 330     2 : from the largest letter in the sanitised keyword
 331
 332     >>> keyword_cipher_alphabet_of('bayes')
 333     'bayescdfghijklmnopqrtuvwxz'
 334     >>> keyword_cipher_alphabet_of('bayes', 0)
 335     'bayescdfghijklmnopqrtuvwxz'
 336     >>> keyword_cipher_alphabet_of('bayes', 1)
 337     'bayestuvwxzcdfghijklmnopqr'
 338     >>> keyword_cipher_alphabet_of('bayes', 2)
 339     'bayeszcdfghijklmnopqrtuvwx'
 340     """
 341     if wrap_alphabet == 0:
 342         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 343                                               string.ascii_lowercase))
 344     else:
 345         if wrap_alphabet == 1:
 346             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 347         else:
 348             last_keyword_letter = sorted(sanitise(keyword))[-1]
 349         last_keyword_position = string.ascii_lowercase.find(
 350             last_keyword_letter) + 1
 351         cipher_alphabet = ''.join(
 352             deduplicate(sanitise(keyword) +
 353                         string.ascii_lowercase[last_keyword_position:] +
 354                         string.ascii_lowercase))
 355     return cipher_alphabet
 356
 357
 358 def keyword_encipher(message, keyword, wrap_alphabet=0):
 359     """Enciphers a message with a keyword substitution cipher.
 360     wrap_alphabet controls how the rest of the alphabet is added
 361     after the keyword.
 362     0 : from 'a'
 363     1 : from the last letter in the sanitised keyword
 364     2 : from the largest letter in the sanitised keyword
 365
 366     >>> keyword_encipher('test message', 'bayes')
 367     'rsqr ksqqbds'
 368     >>> keyword_encipher('test message', 'bayes', 0)
 369     'rsqr ksqqbds'
 370     >>> keyword_encipher('test message', 'bayes', 1)
 371     'lskl dskkbus'
 372     >>> keyword_encipher('test message', 'bayes', 2)
 373     'qspq jsppbcs'
 374     """
 375     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 376     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 377     return message.lower().translate(cipher_translation)
 378
 379 def keyword_decipher(message, keyword, wrap_alphabet=0):
 380     """Deciphers a message with a keyword substitution cipher.
 381     wrap_alphabet controls how the rest of the alphabet is added
 382     after the keyword.
 383     0 : from 'a'
 384     1 : from the last letter in the sanitised keyword
 385     2 : from the largest letter in the sanitised keyword
 386
 387     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 388     'test message'
 389     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 390     'test message'
 391     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 392     'test message'
 393     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 394     'test message'
 395     """
 396     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 397     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 398     return message.lower().translate(cipher_translation)
 399
 400 def scytale_encipher(message, rows):
 401     """Enciphers using the scytale transposition cipher.
 402     Message is padded with spaces to allow all rows to be the same length.
 403
 404     >>> scytale_encipher('thequickbrownfox', 3)
 405     'tcnhkfeboqrxuo iw '
 406     >>> scytale_encipher('thequickbrownfox', 4)
 407     'tubnhirfecooqkwx'
 408     >>> scytale_encipher('thequickbrownfox', 5)
 409     'tubn hirf ecoo qkwx '
 410     >>> scytale_encipher('thequickbrownfox', 6)
 411     'tqcrnxhukof eibwo '
 412     >>> scytale_encipher('thequickbrownfox', 7)
 413     'tqcrnx hukof  eibwo  '
 414     """
 415     if len(message) % rows != 0:
 416         message += ' '*(rows - len(message) % rows)
 417     row_length = round(len(message) / rows)
 418     slices = [message[i:i+row_length]
 419               for i in range(0, len(message), row_length)]
 420     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 421
 422 def scytale_decipher(message, rows):
 423     """Deciphers using the scytale transposition cipher.
 424     Assumes the message is padded so that all rows are the same length.
 425
 426     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 427     'thequickbrownfox  '
 428     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 429     'thequickbrownfox'
 430     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 431     'thequickbrownfox    '
 432     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 433     'thequickbrownfox  '
 434     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 435     'thequickbrownfox     '
 436     """
 437     cols = round(len(message) / rows)
 438     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 439     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 440
 441
 442 def transpositions_of(keyword):
 443     """Finds the transpostions given by a keyword. For instance, the keyword
 444     'clever' rearranges to 'celrv', so the first column (0) stays first, the
 445     second column (1) moves to third, the third column (2) moves to second,
 446     and so on.
 447
 448     >>> transpositions_of('clever')
 449     [0, 2, 1, 4, 3]
 450     """
 451     key = deduplicate(keyword)
 452     transpositions = [key.index(l) for l in sorted(key)]
 453     return transpositions
 454
 455 def column_transposition_encipher(message, keyword, fillvalue=' '):
 456     """Enciphers using the column transposition cipher.
 457     Message is padded to allow all rows to be the same length.
 458
 459     >>> column_transposition_encipher('hellothere', 'clever')
 460     'hleolteher'
 461     >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
 462     'hleolthre!e!'
 463     """
 464     return column_transposition_worker(message, keyword, encipher=True,
 465                                        fillvalue=fillvalue)
 466
 467 def column_transposition_decipher(message, keyword, fillvalue=' '):
 468     """Deciphers using the column transposition cipher.
 469     Message is padded to allow all rows to be the same length.
 470
 471     >>> column_transposition_decipher('hleolteher', 'clever')
 472     'hellothere'
 473     >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
 474     'hellothere!!'
 475     """
 476     return column_transposition_worker(message, keyword, encipher=False,
 477                                        fillvalue=fillvalue)
 478
 479 def column_transposition_worker(message, keyword,
 480                                 encipher=True, fillvalue=' '):
 481     """Does the actual work of the column transposition cipher.
 482     Message is padded with spaces to allow all rows to be the same length.
 483
 484     >>> column_transposition_worker('hellothere', 'clever')
 485     'hleolteher'
 486     >>> column_transposition_worker('hellothere', 'clever', encipher=True)
 487     'hleolteher'
 488     >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
 489     'hellothere'
 490     """
 491     transpositions = transpositions_of(keyword)
 492     columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
 493     if encipher:
 494         transposed_columns = transpose(columns, transpositions)
 495     else:
 496         transposed_columns = untranspose(columns, transpositions)
 497     return combine_every_nth(transposed_columns)
 498
 499
 500
 501 def caesar_break(message,
 502                  metric=norms.euclidean_distance,
 503                  target_counts=normalised_english_counts,
 504                  message_frequency_scaling=norms.normalise):
 505     """Breaks a Caesar cipher using frequency analysis
 506
 507     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 508           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 509     (4, 0.31863952890183...)
 510     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 511           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 512     (19, 0.42152901235832...)
 513     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 514           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 515     (13, 0.316029208075451...)
 516     """
 517     sanitised_message = sanitise(message)
 518     best_shift = 0
 519     best_fit = float("inf")
 520     for shift in range(26):
 521         plaintext = caesar_decipher(sanitised_message, shift)
 522         counts = message_frequency_scaling(letter_frequencies(plaintext))
 523         fit = metric(target_counts, counts)
 524         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 525                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 526         if fit < best_fit:
 527             best_fit = fit
 528             best_shift = shift
 529     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 530                 'decrypt starting: {2}'.format(best_shift, best_fit,
 531                     caesar_decipher(sanitised_message, best_shift)[:50]))
 532     return best_shift, best_fit
 533
 534 def affine_break(message,
 535                  metric=norms.euclidean_distance,
 536                  target_counts=normalised_english_counts,
 537                  message_frequency_scaling=norms.normalise):
 538     """Breaks an affine cipher using frequency analysis
 539
 540     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 541           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 542           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 543           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 544           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 545           'clm ckuxj.') # doctest: +ELLIPSIS
 546     ((15, 22, True), 0.23570361818655...)
 547     """
 548     sanitised_message = sanitise(message)
 549     best_multiplier = 0
 550     best_adder = 0
 551     best_one_based = True
 552     best_fit = float("inf")
 553     for one_based in [True, False]:
 554         for multiplier in range(1, 26, 2):
 555             for adder in range(26):
 556                 plaintext = affine_decipher(sanitised_message,
 557                                             multiplier, adder, one_based)
 558                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 559                 fit = metric(target_counts, counts)
 560                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 561                              'gives fit of {3} and decrypt starting: {4}'.
 562                              format(multiplier, adder, one_based, fit,
 563                                     plaintext[:50]))
 564                 if fit < best_fit:
 565                     best_fit = fit
 566                     best_multiplier = multiplier
 567                     best_adder = adder
 568                     best_one_based = one_based
 569     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 570                 'and decrypt starting: {4}'.format(
 571                     best_multiplier, best_adder, best_one_based, best_fit,
 572                     affine_decipher(sanitised_message, best_multiplier,
 573                         best_adder, best_one_based)[:50]))
 574     return (best_multiplier, best_adder, best_one_based), best_fit
 575
 576 def keyword_break(message,
 577                   wordlist=keywords,
 578                   metric=norms.euclidean_distance,
 579                   target_counts=normalised_english_counts,
 580                   message_frequency_scaling=norms.normalise):
 581     """Breaks a keyword substitution cipher using a dictionary and
 582     frequency analysis
 583
 584     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 585           'keyword decipherment', 'elephant', 1), \
 586           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 587     (('elephant', 1), 0.41643991598441...)
 588     """
 589     best_keyword = ''
 590     best_wrap_alphabet = True
 591     best_fit = float("inf")
 592     for wrap_alphabet in range(3):
 593         for keyword in wordlist:
 594             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 595             counts = message_frequency_scaling(letter_frequencies(plaintext))
 596             fit = metric(target_counts, counts)
 597             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 598                          'gives fit of {2} and decrypt starting: {3}'.format(
 599                              keyword, wrap_alphabet, fit,
 600                              sanitise(plaintext)[:50]))
 601             if fit < best_fit:
 602                 best_fit = fit
 603                 best_keyword = keyword
 604                 best_wrap_alphabet = wrap_alphabet
 605     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 606                 '{2} and decrypt starting: {3}'.format(best_keyword,
 607                     best_wrap_alphabet, best_fit, sanitise(
 608                         keyword_decipher(message, best_keyword,
 609                                          best_wrap_alphabet))[:50]))
 610     return (best_keyword, best_wrap_alphabet), best_fit
 611
 612 def keyword_break_mp(message,
 613                      wordlist=keywords,
 614                      metric=norms.euclidean_distance,
 615                      target_counts=normalised_english_counts,
 616                      message_frequency_scaling=norms.normalise,
 617                      chunksize=500):
 618     """Breaks a keyword substitution cipher using a dictionary and
 619     frequency analysis
 620
 621     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 622           'keyword decipherment', 'elephant', 1), \
 623           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 624     (('elephant', 1), 0.41643991598441...)
 625     """
 626     with Pool() as pool:
 627         helper_args = [(message, word, wrap, metric, target_counts,
 628                         message_frequency_scaling)
 629                        for word in wordlist for wrap in range(3)]
 630         # Gotcha: the helper function here needs to be defined at the top level
 631         #   (limitation of Pool.starmap)
 632         breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
 633         return min(breaks, key=lambda k: k[1])
 634
 635 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 636                       message_frequency_scaling):
 637     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 638     counts = message_frequency_scaling(letter_frequencies(plaintext))
 639     fit = metric(target_counts, counts)
 640     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 641                  '{2} and decrypt starting: {3}'.format(keyword,
 642                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 643     return (keyword, wrap_alphabet), fit
 644
 645 def scytale_break(message,
 646                   metric=norms.euclidean_distance,
 647                   target_counts=normalised_english_bigram_counts,
 648                   message_frequency_scaling=norms.normalise):
 649     """Breaks a Scytale cipher
 650
 651     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 652            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 653            'aek') # doctest: +ELLIPSIS
 654     (6, 0.83453041115025...)
 655     """
 656     best_key = 0
 657     best_fit = float("inf")
 658     ngram_length = len(next(iter(target_counts.keys())))
 659     for key in range(1, 20):
 660         if len(message) % key == 0:
 661             plaintext = scytale_decipher(message, key)
 662             counts = message_frequency_scaling(frequencies(
 663                          ngrams(sanitise(plaintext), ngram_length)))
 664             fit = metric(target_counts, counts)
 665             logger.debug('Scytale break attempt using key {0} gives fit of '
 666                          '{1} and decrypt starting: {2}'.format(key,
 667                              fit, sanitise(plaintext)[:50]))
 668             if fit < best_fit:
 669                 best_fit = fit
 670                 best_key = key
 671     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 672                 'decrypt starting: {2}'.format(best_key, best_fit,
 673                     sanitise(scytale_decipher(message, best_key))[:50]))
 674     return best_key, best_fit
 675
 676 def column_transposition_break(message,
 677                   wordlist=keywords,
 678                   metric=norms.euclidean_distance,
 679                   target_counts=normalised_english_bigram_counts,
 680                   message_frequency_scaling=norms.normalise):
 681     """Breaks a column transposition cipher using a dictionary and
 682     n-gram frequency analysis
 683
 684     >>> column_transposition_break(column_transposition_encipher(sanitise( \
 685         "Turing's homosexuality resulted in a criminal prosecution in 1952, \
 686         when homosexual acts were still illegal in the United Kingdom. "), \
 687         'encipher'), \
 688         wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
 689     ('encipher', 0.898128626285...)
 690     >>> column_transposition_break(column_transposition_encipher(sanitise( \
 691         "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
 692         "when homosexual acts were still illegal in the United Kingdom."), \
 693         'encipher'), \
 694         wordlist=['encipher', 'keyword', 'fourteen'], \
 695         target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
 696     ('encipher', 1.1958792913127...)
 697     """
 698     best_keyword = ''
 699     best_fit = float("inf")
 700     ngram_length = len(next(iter(target_counts.keys())))
 701     for keyword in wordlist:
 702         if len(message) % len(deduplicate(keyword)) == 0:
 703             plaintext = column_transposition_decipher(message, keyword)
 704             counts = message_frequency_scaling(frequencies(
 705                          ngrams(sanitise(plaintext), ngram_length)))
 706             fit = metric(target_counts, counts)
 707             logger.debug('Column transposition break attempt using key {0} '
 708                          'gives fit of {1} and decrypt starting: {2}'.format(
 709                              keyword, fit,
 710                              sanitise(plaintext)[:50]))
 711             if fit < best_fit:
 712                 best_fit = fit
 713                 best_keyword = keyword
 714     logger.info('Column transposition break best fit with key {0} gives fit '
 715                 'of {1} and decrypt starting: {2}'.format(best_keyword,
 716                     best_fit, sanitise(
 717                         column_transposition_decipher(message,
 718                             best_keyword))[:50]))
 719     return best_keyword, best_fit
 720
 721
 722 def column_transposition_break_mp(message,
 723                      wordlist=keywords,
 724                      metric=norms.euclidean_distance,
 725                      target_counts=normalised_english_bigram_counts,
 726                      message_frequency_scaling=norms.normalise,
 727                      chunksize=500):
 728     """Breaks a column transposition cipher using a dictionary and
 729     n-gram frequency analysis
 730
 731     >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
 732         "Turing's homosexuality resulted in a criminal prosecution in 1952, \
 733         when homosexual acts were still illegal in the United Kingdom. "), \
 734         'encipher'), \
 735         wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
 736     ('encipher', 0.898128626285...)
 737     >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
 738         "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
 739         "when homosexual acts were still illegal in the United Kingdom."), \
 740         'encipher'), \
 741         wordlist=['encipher', 'keyword', 'fourteen'], \
 742         target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
 743     ('encipher', 1.1958792913127...)
 744     """
 745     ngram_length = len(next(iter(target_counts.keys())))
 746     with Pool() as pool:
 747         helper_args = [(message, word, metric, target_counts, ngram_length,
 748                         message_frequency_scaling)
 749                        for word in wordlist]
 750         # Gotcha: the helper function here needs to be defined at the top level
 751         #   (limitation of Pool.starmap)
 752         breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize)
 753         return min(breaks, key=lambda k: k[1])
 754
 755 def column_transposition_break_worker(message, keyword, metric, target_counts,
 756                       ngram_length, message_frequency_scaling):
 757     plaintext = column_transposition_decipher(message, keyword)
 758     counts = message_frequency_scaling(frequencies(
 759                          ngrams(sanitise(plaintext), ngram_length)))
 760     fit = metric(target_counts, counts)
 761     logger.debug('Column transposition break attempt using key {0} '
 762                          'gives fit of {1} and decrypt starting: {2}'.format(
 763                              keyword, fit,
 764                              sanitise(plaintext)[:50]))
 765     return keyword, fit
 766
 767
 768
 769 if __name__ == "__main__":
 770     import doctest
 771     doctest.testmod()