cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5 import math
   6 from itertools import zip_longest
   7 from segment import segment
   8 from multiprocessing import Pool
   9
  10 # To time a run:
  11 #
  12 # import timeit
  13 # c5a = open('2012/5a.ciphertext', 'r').read()
  14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
  16
  17 logger = logging.getLogger(__name__)
  18 logger.addHandler(logging.FileHandler('cipher.log'))
  19 logger.setLevel(logging.WARNING)
  20 #logger.setLevel(logging.INFO)
  21 #logger.setLevel(logging.DEBUG)
  22
  23 english_counts = collections.defaultdict(int)
  24 with open('count_1l.txt', 'r') as f:
  25     for line in f:
  26         (letter, count) = line.split("\t")
  27         english_counts[letter] = int(count)
  28 normalised_english_counts = norms.normalise(english_counts)
  29
  30 english_bigram_counts = collections.defaultdict(int)
  31 with open('count_2l.txt', 'r') as f:
  32     for line in f:
  33         (bigram, count) = line.split("\t")
  34         english_bigram_counts[bigram] = int(count)
  35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
  36
  37 with open('words.txt', 'r') as f:
  38     keywords = [line.rstrip() for line in f]
  39
  40 modular_division_table = [[0]*26 for x in range(26)]
  41 for a in range(26):
  42     for b in range(26):
  43         c = (a * b) % 26
  44         modular_division_table[b][c] = a
  45
  46 def letters(text):
  47     """Remove all non-alphabetic characters from a text
  48     >>> letters('The Quick')
  49     'TheQuick'
  50     >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  51     'TheQuickBROWNfoxjumpedoverthelazyDOG'
  52     """
  53     return ''.join([c for c in text if c in string.ascii_letters])
  54
  55 def sanitise(text):
  56     """Remove all non-alphabetic characters and convert the text to lowercase
  57
  58     >>> sanitise('The Quick')
  59     'thequick'
  60     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  61     'thequickbrownfoxjumpedoverthelazydog'
  62     """
  63     # sanitised = [c.lower() for c in text if c in string.ascii_letters]
  64     # return ''.join(sanitised)
  65     return letters(text).lower()
  66
  67 def ngrams(text, n):
  68     """Returns all n-grams of a text
  69
  70     >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
  71     ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
  72      'nf', 'fo', 'ox']
  73     >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
  74     ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
  75      'rown', 'ownf', 'wnfo', 'nfox']
  76     """
  77     return [text[i:i+n] for i in range(len(text)-n+1)]
  78
  79 def every_nth(text, n):
  80     """Returns n strings, each of which consists of every nth character,
  81     starting with the 0th, 1st, 2nd, ... (n-1)th character
  82
  83     >>> every_nth(string.ascii_lowercase, 5)
  84     ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
  85     >>> every_nth(string.ascii_lowercase, 1)
  86     ['abcdefghijklmnopqrstuvwxyz']
  87     >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
  88     ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  89      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
  90     """
  91     split_text = [text[i:i+n] for i in range(0, len(text), n)]
  92     return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
  93
  94 def combine_every_nth(split_text):
  95     """Reforms a text split into every_nth strings
  96
  97     >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
  98     'abcdefghijklmnopqrstuvwxyz'
  99     >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
 100     'abcdefghijklmnopqrstuvwxyz'
 101     >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
 102     'abcdefghijklmnopqrstuvwxyz'
 103     """
 104     return ''.join([''.join(l)
 105                     for l in zip_longest(*split_text, fillvalue='')])
 106
 107
 108 def frequencies(text):
 109     """Count the number of occurrences of each character in text
 110
 111     >>> sorted(frequencies('abcdefabc').items())
 112     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
 113     >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
 114          'dog').items()) # doctest: +NORMALIZE_WHITESPACE
 115     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
 116      ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
 117      ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
 118      ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 119     >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
 120          '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
 121     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
 122      ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
 123      ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
 124      ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
 125      ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
 126     >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
 127          'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
 128     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
 129      ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
 130      ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
 131      ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
 132     >>> frequencies('abcdefabcdef')['x']
 133     0
 134     """
 135     #counts = collections.defaultdict(int)
 136     #for c in text:
 137     #    counts[c] += 1
 138     #return counts
 139     return collections.Counter(c for c in text)
 140 letter_frequencies = frequencies
 141
 142 def deduplicate(text):
 143     return list(collections.OrderedDict.fromkeys(text))
 144
 145
 146
 147 def caesar_encipher_letter(letter, shift):
 148     """Encipher a letter, given a shift amount
 149
 150     >>> caesar_encipher_letter('a', 1)
 151     'b'
 152     >>> caesar_encipher_letter('a', 2)
 153     'c'
 154     >>> caesar_encipher_letter('b', 2)
 155     'd'
 156     >>> caesar_encipher_letter('x', 2)
 157     'z'
 158     >>> caesar_encipher_letter('y', 2)
 159     'a'
 160     >>> caesar_encipher_letter('z', 2)
 161     'b'
 162     >>> caesar_encipher_letter('z', -1)
 163     'y'
 164     >>> caesar_encipher_letter('a', -1)
 165     'z'
 166     """
 167     if letter in string.ascii_letters:
 168         if letter in string.ascii_uppercase:
 169             alphabet_start = ord('A')
 170         else:
 171             alphabet_start = ord('a')
 172         return chr(((ord(letter) - alphabet_start + shift) % 26) +
 173                    alphabet_start)
 174     else:
 175         return letter
 176
 177 def caesar_decipher_letter(letter, shift):
 178     """Decipher a letter, given a shift amount
 179
 180     >>> caesar_decipher_letter('b', 1)
 181     'a'
 182     >>> caesar_decipher_letter('b', 2)
 183     'z'
 184     """
 185     return caesar_encipher_letter(letter, -shift)
 186
 187 def caesar_encipher(message, shift):
 188     """Encipher a message with the Caesar cipher of given shift
 189
 190     >>> caesar_encipher('abc', 1)
 191     'bcd'
 192     >>> caesar_encipher('abc', 2)
 193     'cde'
 194     >>> caesar_encipher('abcxyz', 2)
 195     'cdezab'
 196     >>> caesar_encipher('ab cx yz', 2)
 197     'cd ez ab'
 198     """
 199     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 200     return ''.join(enciphered)
 201
 202 def caesar_decipher(message, shift):
 203     """Encipher a message with the Caesar cipher of given shift
 204
 205     >>> caesar_decipher('bcd', 1)
 206     'abc'
 207     >>> caesar_decipher('cde', 2)
 208     'abc'
 209     >>> caesar_decipher('cd ez ab', 2)
 210     'ab cx yz'
 211     """
 212     return caesar_encipher(message, -shift)
 213
 214 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 215     """Encipher a letter, given a multiplier and adder
 216
 217     >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
 218             for l in string.ascii_uppercase])
 219     'HKNQTWZCFILORUXADGJMPSVYBE'
 220     >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
 221             for l in string.ascii_uppercase])
 222     'FILORUXADGJMPSVYBEHKNQTWZC'
 223     """
 224     if letter in string.ascii_letters:
 225         if letter in string.ascii_uppercase:
 226             alphabet_start = ord('A')
 227         else:
 228             alphabet_start = ord('a')
 229         letter_number = ord(letter) - alphabet_start
 230         if one_based: letter_number += 1
 231         cipher_number = (letter_number * multiplier + adder) % 26
 232         if one_based: cipher_number -= 1
 233         return chr(cipher_number % 26 + alphabet_start)
 234     else:
 235         return letter
 236
 237 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 238     """Encipher a letter, given a multiplier and adder
 239
 240     >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
 241             for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 242     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 243     >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
 244             for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 245     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 246     """
 247     if letter in string.ascii_letters:
 248         if letter in string.ascii_uppercase:
 249             alphabet_start = ord('A')
 250         else:
 251             alphabet_start = ord('a')
 252         cipher_number = ord(letter) - alphabet_start
 253         if one_based: cipher_number += 1
 254         plaintext_number = ( modular_division_table[multiplier]
 255                                                    [(cipher_number - adder) % 26] )
 256         if one_based: plaintext_number -= 1
 257         return chr(plaintext_number % 26 + alphabet_start)
 258     else:
 259         return letter
 260
 261 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 262     """Encipher a message
 263
 264     >>> affine_encipher('hours passed during which jerico tried every ' \
 265            'trick he could think of', 15, 22, True)
 266     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 267     """
 268     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
 269                   for l in message]
 270     return ''.join(enciphered)
 271
 272 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 273     """Decipher a message
 274
 275     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
 276            'jfaoe ls omytd jlaxe mh', 15, 22, True)
 277     'hours passed during which jerico tried every trick he could think of'
 278     """
 279     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
 280                   for l in message]
 281     return ''.join(enciphered)
 282
 283
 284 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 285     """Find the cipher alphabet given a keyword.
 286     wrap_alphabet controls how the rest of the alphabet is added
 287     after the keyword.
 288     0 : from 'a'
 289     1 : from the last letter in the sanitised keyword
 290     2 : from the largest letter in the sanitised keyword
 291
 292     >>> keyword_cipher_alphabet_of('bayes')
 293     'bayescdfghijklmnopqrtuvwxz'
 294     >>> keyword_cipher_alphabet_of('bayes', 0)
 295     'bayescdfghijklmnopqrtuvwxz'
 296     >>> keyword_cipher_alphabet_of('bayes', 1)
 297     'bayestuvwxzcdfghijklmnopqr'
 298     >>> keyword_cipher_alphabet_of('bayes', 2)
 299     'bayeszcdfghijklmnopqrtuvwx'
 300     """
 301     if wrap_alphabet == 0:
 302         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 303                                               string.ascii_lowercase))
 304     else:
 305         if wrap_alphabet == 1:
 306             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 307         else:
 308             last_keyword_letter = sorted(sanitise(keyword))[-1]
 309         last_keyword_position = string.ascii_lowercase.find(
 310             last_keyword_letter) + 1
 311         cipher_alphabet = ''.join(
 312             deduplicate(sanitise(keyword) +
 313                         string.ascii_lowercase[last_keyword_position:] +
 314                         string.ascii_lowercase))
 315     return cipher_alphabet
 316
 317
 318 def keyword_encipher(message, keyword, wrap_alphabet=0):
 319     """Enciphers a message with a keyword substitution cipher.
 320     wrap_alphabet controls how the rest of the alphabet is added
 321     after the keyword.
 322     0 : from 'a'
 323     1 : from the last letter in the sanitised keyword
 324     2 : from the largest letter in the sanitised keyword
 325
 326     >>> keyword_encipher('test message', 'bayes')
 327     'rsqr ksqqbds'
 328     >>> keyword_encipher('test message', 'bayes', 0)
 329     'rsqr ksqqbds'
 330     >>> keyword_encipher('test message', 'bayes', 1)
 331     'lskl dskkbus'
 332     >>> keyword_encipher('test message', 'bayes', 2)
 333     'qspq jsppbcs'
 334     """
 335     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 336     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 337     return message.lower().translate(cipher_translation)
 338
 339 def keyword_decipher(message, keyword, wrap_alphabet=0):
 340     """Deciphers a message with a keyword substitution cipher.
 341     wrap_alphabet controls how the rest of the alphabet is added
 342     after the keyword.
 343     0 : from 'a'
 344     1 : from the last letter in the sanitised keyword
 345     2 : from the largest letter in the sanitised keyword
 346
 347     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 348     'test message'
 349     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 350     'test message'
 351     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 352     'test message'
 353     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 354     'test message'
 355     """
 356     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 357     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 358     return message.lower().translate(cipher_translation)
 359
 360 def scytale_encipher(message, rows):
 361     """Enciphers using the scytale transposition cipher.
 362     Message is padded with spaces to allow all rows to be the same length.
 363
 364     >>> scytale_encipher('thequickbrownfox', 3)
 365     'tcnhkfeboqrxuo iw '
 366     >>> scytale_encipher('thequickbrownfox', 4)
 367     'tubnhirfecooqkwx'
 368     >>> scytale_encipher('thequickbrownfox', 5)
 369     'tubn hirf ecoo qkwx '
 370     >>> scytale_encipher('thequickbrownfox', 6)
 371     'tqcrnxhukof eibwo '
 372     >>> scytale_encipher('thequickbrownfox', 7)
 373     'tqcrnx hukof  eibwo  '
 374     """
 375     if len(message) % rows != 0:
 376         message += ' '*(rows - len(message) % rows)
 377     row_length = round(len(message) / rows)
 378     slices = [message[i:i+row_length]
 379               for i in range(0, len(message), row_length)]
 380     return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
 381
 382 def scytale_decipher(message, rows):
 383     """Deciphers using the scytale transposition cipher.
 384     Assumes the message is padded so that all rows are the same length.
 385
 386     >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
 387     'thequickbrownfox  '
 388     >>> scytale_decipher('tubnhirfecooqkwx', 4)
 389     'thequickbrownfox'
 390     >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
 391     'thequickbrownfox    '
 392     >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
 393     'thequickbrownfox  '
 394     >>> scytale_decipher('tqcrnx hukof  eibwo  ', 7)
 395     'thequickbrownfox     '
 396     """
 397     cols = round(len(message) / rows)
 398     columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
 399     return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
 400
 401
 402 def caesar_break(message,
 403                  metric=norms.euclidean_distance,
 404                  target_counts=normalised_english_counts,
 405                  message_frequency_scaling=norms.normalise):
 406     """Breaks a Caesar cipher using frequency analysis
 407
 408     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
 409           'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 410     (4, 0.31863952890183...)
 411     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
 412           'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 413     (19, 0.42152901235832...)
 414     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
 415           'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 416     (13, 0.316029208075451...)
 417     """
 418     sanitised_message = sanitise(message)
 419     best_shift = 0
 420     best_fit = float("inf")
 421     for shift in range(26):
 422         plaintext = caesar_decipher(sanitised_message, shift)
 423         counts = message_frequency_scaling(letter_frequencies(plaintext))
 424         fit = metric(target_counts, counts)
 425         logger.debug('Caesar break attempt using key {0} gives fit of {1} '
 426                       'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 427         if fit < best_fit:
 428             best_fit = fit
 429             best_shift = shift
 430     logger.info('Caesar break best fit: key {0} gives fit of {1} and '
 431                 'decrypt starting: {2}'.format(best_shift, best_fit,
 432                     caesar_decipher(sanitised_message, best_shift)[:50]))
 433     return best_shift, best_fit
 434
 435 def affine_break(message,
 436                  metric=norms.euclidean_distance,
 437                  target_counts=normalised_english_counts,
 438                  message_frequency_scaling=norms.normalise):
 439     """Breaks an affine cipher using frequency analysis
 440
 441     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
 442           'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
 443           'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
 444           'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
 445           'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
 446           'clm ckuxj.') # doctest: +ELLIPSIS
 447     ((15, 22, True), 0.23570361818655...)
 448     """
 449     sanitised_message = sanitise(message)
 450     best_multiplier = 0
 451     best_adder = 0
 452     best_one_based = True
 453     best_fit = float("inf")
 454     for one_based in [True, False]:
 455         for multiplier in range(1, 26, 2):
 456             for adder in range(26):
 457                 plaintext = affine_decipher(sanitised_message,
 458                                             multiplier, adder, one_based)
 459                 counts = message_frequency_scaling(letter_frequencies(plaintext))
 460                 fit = metric(target_counts, counts)
 461                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
 462                              'gives fit of {3} and decrypt starting: {4}'.
 463                              format(multiplier, adder, one_based, fit,
 464                                     plaintext[:50]))
 465                 if fit < best_fit:
 466                     best_fit = fit
 467                     best_multiplier = multiplier
 468                     best_adder = adder
 469                     best_one_based = one_based
 470     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
 471                 'and decrypt starting: {4}'.format(
 472                     best_multiplier, best_adder, best_one_based, best_fit,
 473                     affine_decipher(sanitised_message, best_multiplier,
 474                         best_adder, best_one_based)[:50]))
 475     return (best_multiplier, best_adder, best_one_based), best_fit
 476
 477 def keyword_break(message,
 478                   wordlist=keywords,
 479                   metric=norms.euclidean_distance,
 480                   target_counts=normalised_english_counts,
 481                   message_frequency_scaling=norms.normalise):
 482     """Breaks a keyword substitution cipher using a dictionary and
 483     frequency analysis
 484
 485     >>> keyword_break(keyword_encipher('this is a test message for the ' \
 486           'keyword decipherment', 'elephant', 1), \
 487           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 488     (('elephant', 1), 0.41643991598441...)
 489     """
 490     best_keyword = ''
 491     best_wrap_alphabet = True
 492     best_fit = float("inf")
 493     for wrap_alphabet in range(3):
 494         for keyword in wordlist:
 495             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 496             counts = message_frequency_scaling(letter_frequencies(plaintext))
 497             fit = metric(target_counts, counts)
 498             logger.debug('Keyword break attempt using key {0} (wrap={1}) '
 499                          'gives fit of {2} and decrypt starting: {3}'.format(
 500                              keyword, wrap_alphabet, fit,
 501                              sanitise(plaintext)[:50]))
 502             if fit < best_fit:
 503                 best_fit = fit
 504                 best_keyword = keyword
 505                 best_wrap_alphabet = wrap_alphabet
 506     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
 507                 '{2} and decrypt starting: {3}'.format(best_keyword,
 508                     best_wrap_alphabet, best_fit, sanitise(
 509                         keyword_decipher(message, best_keyword,
 510                                          best_wrap_alphabet))[:50]))
 511     return (best_keyword, best_wrap_alphabet), best_fit
 512
 513 def keyword_break_mp(message,
 514                      wordlist=keywords,
 515                      metric=norms.euclidean_distance,
 516                      target_counts=normalised_english_counts,
 517                      message_frequency_scaling=norms.normalise,
 518                      chunksize=500):
 519     """Breaks a keyword substitution cipher using a dictionary and
 520     frequency analysis
 521
 522     >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
 523           'keyword decipherment', 'elephant', 1), \
 524           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 525     (('elephant', 1), 0.41643991598441...)
 526     """
 527     with Pool() as pool:
 528         helper_args = [(message, word, wrap, metric, target_counts,
 529                         message_frequency_scaling)
 530                        for word in wordlist for wrap in range(3)]
 531         # Gotcha: the helper function here needs to be defined at the top level
 532         #   (limitation of Pool.starmap)
 533         breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
 534         return min(breaks, key=lambda k: k[1])
 535
 536 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
 537                       message_frequency_scaling):
 538     plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 539     counts = message_frequency_scaling(letter_frequencies(plaintext))
 540     fit = metric(target_counts, counts)
 541     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 542                  '{2} and decrypt starting: {3}'.format(keyword,
 543                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 544     return (keyword, wrap_alphabet), fit
 545
 546 def scytale_break(message,
 547                   metric=norms.euclidean_distance,
 548                   target_counts=normalised_english_bigram_counts,
 549                   message_frequency_scaling=norms.normalise):
 550     """Breaks a Scytale cipher
 551
 552     >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
 553            'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
 554            'aek') # doctest: +ELLIPSIS
 555     (6, 0.83453041115025...)
 556     """
 557     best_key = 0
 558     best_fit = float("inf")
 559     for key in range(1, 20):
 560         if len(message) % key == 0:
 561             plaintext = scytale_decipher(message, key)
 562             counts = message_frequency_scaling(frequencies(
 563                          ngrams(sanitise(plaintext), 2)))
 564             fit = metric(target_counts, counts)
 565             logger.debug('Scytale break attempt using key {0} gives fit of '
 566                          '{1} and decrypt starting: {2}'.format(key,
 567                              fit, sanitise(plaintext)[:50]))
 568             if fit < best_fit:
 569                 best_fit = fit
 570                 best_key = key
 571     logger.info('Scytale break best fit with key {0} gives fit of {1} and '
 572                 'decrypt starting: {2}'.format(best_key, best_fit,
 573                     sanitise(scytale_decipher(message, best_key))[:50]))
 574     return best_key, best_fit
 575
 576
 577 if __name__ == "__main__":
 578     import doctest
 579     doctest.testmod()