Added Mona Lisa specific dictionary
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5 import math
6 from itertools import zip_longest
7 from segment import segment
8 from multiprocessing import Pool
9
10 # To time a run:
11 #
12 # import timeit
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
16
17 logger = logging.getLogger(__name__)
18 logger.addHandler(logging.FileHandler('cipher.log'))
19 logger.setLevel(logging.WARNING)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
22
23 english_counts = collections.defaultdict(int)
24 with open('count_1l.txt', 'r') as f:
25 for line in f:
26 (letter, count) = line.split("\t")
27 english_counts[letter] = int(count)
28 normalised_english_counts = norms.normalise(english_counts)
29
30 english_bigram_counts = collections.defaultdict(int)
31 with open('count_2l.txt', 'r') as f:
32 for line in f:
33 (bigram, count) = line.split("\t")
34 english_bigram_counts[bigram] = int(count)
35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
36
37 with open('words.txt', 'r') as f:
38 keywords = [line.rstrip() for line in f]
39
40 modular_division_table = [[0]*26 for x in range(26)]
41 for a in range(26):
42 for b in range(26):
43 c = (a * b) % 26
44 modular_division_table[b][c] = a
45
46 def letters(text):
47 """Remove all non-alphabetic characters from a text
48 >>> letters('The Quick')
49 'TheQuick'
50 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
51 'TheQuickBROWNfoxjumpedoverthelazyDOG'
52 """
53 return ''.join([c for c in text if c in string.ascii_letters])
54
55 def sanitise(text):
56 """Remove all non-alphabetic characters and convert the text to lowercase
57
58 >>> sanitise('The Quick')
59 'thequick'
60 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
61 'thequickbrownfoxjumpedoverthelazydog'
62 """
63 # sanitised = [c.lower() for c in text if c in string.ascii_letters]
64 # return ''.join(sanitised)
65 return letters(text).lower()
66
67 def ngrams(text, n):
68 """Returns all n-grams of a text
69
70 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
71 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
72 'nf', 'fo', 'ox']
73 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
74 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
75 'rown', 'ownf', 'wnfo', 'nfox']
76 """
77 return [text[i:i+n] for i in range(len(text)-n+1)]
78
79 def every_nth(text, n):
80 """Returns n strings, each of which consists of every nth character,
81 starting with the 0th, 1st, 2nd, ... (n-1)th character
82
83 >>> every_nth(string.ascii_lowercase, 5)
84 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
85 >>> every_nth(string.ascii_lowercase, 1)
86 ['abcdefghijklmnopqrstuvwxyz']
87 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
88 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
89 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
90 """
91 split_text = [text[i:i+n] for i in range(0, len(text), n)]
92 return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
93
94 def combine_every_nth(split_text):
95 """Reforms a text split into every_nth strings
96
97 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
98 'abcdefghijklmnopqrstuvwxyz'
99 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
100 'abcdefghijklmnopqrstuvwxyz'
101 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
102 'abcdefghijklmnopqrstuvwxyz'
103 """
104 return ''.join([''.join(l)
105 for l in zip_longest(*split_text, fillvalue='')])
106
107
108 def frequencies(text):
109 """Count the number of occurrences of each character in text
110
111 >>> sorted(frequencies('abcdefabc').items())
112 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
113 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
114 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
115 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
116 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
117 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
118 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
119 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
120 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
121 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
122 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
123 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
124 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
125 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
126 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
127 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
128 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
129 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
130 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
131 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
132 >>> frequencies('abcdefabcdef')['x']
133 0
134 """
135 #counts = collections.defaultdict(int)
136 #for c in text:
137 # counts[c] += 1
138 #return counts
139 return collections.Counter(c for c in text)
140 letter_frequencies = frequencies
141
142 def deduplicate(text):
143 return list(collections.OrderedDict.fromkeys(text))
144
145
146
147 def caesar_encipher_letter(letter, shift):
148 """Encipher a letter, given a shift amount
149
150 >>> caesar_encipher_letter('a', 1)
151 'b'
152 >>> caesar_encipher_letter('a', 2)
153 'c'
154 >>> caesar_encipher_letter('b', 2)
155 'd'
156 >>> caesar_encipher_letter('x', 2)
157 'z'
158 >>> caesar_encipher_letter('y', 2)
159 'a'
160 >>> caesar_encipher_letter('z', 2)
161 'b'
162 >>> caesar_encipher_letter('z', -1)
163 'y'
164 >>> caesar_encipher_letter('a', -1)
165 'z'
166 """
167 if letter in string.ascii_letters:
168 if letter in string.ascii_uppercase:
169 alphabet_start = ord('A')
170 else:
171 alphabet_start = ord('a')
172 return chr(((ord(letter) - alphabet_start + shift) % 26) +
173 alphabet_start)
174 else:
175 return letter
176
177 def caesar_decipher_letter(letter, shift):
178 """Decipher a letter, given a shift amount
179
180 >>> caesar_decipher_letter('b', 1)
181 'a'
182 >>> caesar_decipher_letter('b', 2)
183 'z'
184 """
185 return caesar_encipher_letter(letter, -shift)
186
187 def caesar_encipher(message, shift):
188 """Encipher a message with the Caesar cipher of given shift
189
190 >>> caesar_encipher('abc', 1)
191 'bcd'
192 >>> caesar_encipher('abc', 2)
193 'cde'
194 >>> caesar_encipher('abcxyz', 2)
195 'cdezab'
196 >>> caesar_encipher('ab cx yz', 2)
197 'cd ez ab'
198 """
199 enciphered = [caesar_encipher_letter(l, shift) for l in message]
200 return ''.join(enciphered)
201
202 def caesar_decipher(message, shift):
203 """Encipher a message with the Caesar cipher of given shift
204
205 >>> caesar_decipher('bcd', 1)
206 'abc'
207 >>> caesar_decipher('cde', 2)
208 'abc'
209 >>> caesar_decipher('cd ez ab', 2)
210 'ab cx yz'
211 """
212 return caesar_encipher(message, -shift)
213
214 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
215 """Encipher a letter, given a multiplier and adder
216
217 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
218 for l in string.ascii_uppercase])
219 'HKNQTWZCFILORUXADGJMPSVYBE'
220 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
221 for l in string.ascii_uppercase])
222 'FILORUXADGJMPSVYBEHKNQTWZC'
223 """
224 if letter in string.ascii_letters:
225 if letter in string.ascii_uppercase:
226 alphabet_start = ord('A')
227 else:
228 alphabet_start = ord('a')
229 letter_number = ord(letter) - alphabet_start
230 if one_based: letter_number += 1
231 cipher_number = (letter_number * multiplier + adder) % 26
232 if one_based: cipher_number -= 1
233 return chr(cipher_number % 26 + alphabet_start)
234 else:
235 return letter
236
237 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
238 """Encipher a letter, given a multiplier and adder
239
240 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
241 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
242 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
243 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
244 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
245 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
246 """
247 if letter in string.ascii_letters:
248 if letter in string.ascii_uppercase:
249 alphabet_start = ord('A')
250 else:
251 alphabet_start = ord('a')
252 cipher_number = ord(letter) - alphabet_start
253 if one_based: cipher_number += 1
254 plaintext_number = ( modular_division_table[multiplier]
255 [(cipher_number - adder) % 26] )
256 if one_based: plaintext_number -= 1
257 return chr(plaintext_number % 26 + alphabet_start)
258 else:
259 return letter
260
261 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
262 """Encipher a message
263
264 >>> affine_encipher('hours passed during which jerico tried every ' \
265 'trick he could think of', 15, 22, True)
266 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
267 """
268 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
269 for l in message]
270 return ''.join(enciphered)
271
272 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
273 """Decipher a message
274
275 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
276 'jfaoe ls omytd jlaxe mh', 15, 22, True)
277 'hours passed during which jerico tried every trick he could think of'
278 """
279 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
280 for l in message]
281 return ''.join(enciphered)
282
283
284 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
285 """Find the cipher alphabet given a keyword.
286 wrap_alphabet controls how the rest of the alphabet is added
287 after the keyword.
288 0 : from 'a'
289 1 : from the last letter in the sanitised keyword
290 2 : from the largest letter in the sanitised keyword
291
292 >>> keyword_cipher_alphabet_of('bayes')
293 'bayescdfghijklmnopqrtuvwxz'
294 >>> keyword_cipher_alphabet_of('bayes', 0)
295 'bayescdfghijklmnopqrtuvwxz'
296 >>> keyword_cipher_alphabet_of('bayes', 1)
297 'bayestuvwxzcdfghijklmnopqr'
298 >>> keyword_cipher_alphabet_of('bayes', 2)
299 'bayeszcdfghijklmnopqrtuvwx'
300 """
301 if wrap_alphabet == 0:
302 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
303 string.ascii_lowercase))
304 else:
305 if wrap_alphabet == 1:
306 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
307 else:
308 last_keyword_letter = sorted(sanitise(keyword))[-1]
309 last_keyword_position = string.ascii_lowercase.find(
310 last_keyword_letter) + 1
311 cipher_alphabet = ''.join(
312 deduplicate(sanitise(keyword) +
313 string.ascii_lowercase[last_keyword_position:] +
314 string.ascii_lowercase))
315 return cipher_alphabet
316
317
318 def keyword_encipher(message, keyword, wrap_alphabet=0):
319 """Enciphers a message with a keyword substitution cipher.
320 wrap_alphabet controls how the rest of the alphabet is added
321 after the keyword.
322 0 : from 'a'
323 1 : from the last letter in the sanitised keyword
324 2 : from the largest letter in the sanitised keyword
325
326 >>> keyword_encipher('test message', 'bayes')
327 'rsqr ksqqbds'
328 >>> keyword_encipher('test message', 'bayes', 0)
329 'rsqr ksqqbds'
330 >>> keyword_encipher('test message', 'bayes', 1)
331 'lskl dskkbus'
332 >>> keyword_encipher('test message', 'bayes', 2)
333 'qspq jsppbcs'
334 """
335 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
336 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
337 return message.lower().translate(cipher_translation)
338
339 def keyword_decipher(message, keyword, wrap_alphabet=0):
340 """Deciphers a message with a keyword substitution cipher.
341 wrap_alphabet controls how the rest of the alphabet is added
342 after the keyword.
343 0 : from 'a'
344 1 : from the last letter in the sanitised keyword
345 2 : from the largest letter in the sanitised keyword
346
347 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
348 'test message'
349 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
350 'test message'
351 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
352 'test message'
353 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
354 'test message'
355 """
356 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
357 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
358 return message.lower().translate(cipher_translation)
359
360 def scytale_encipher(message, rows):
361 """Enciphers using the scytale transposition cipher.
362 Message is padded with spaces to allow all rows to be the same length.
363
364 >>> scytale_encipher('thequickbrownfox', 3)
365 'tcnhkfeboqrxuo iw '
366 >>> scytale_encipher('thequickbrownfox', 4)
367 'tubnhirfecooqkwx'
368 >>> scytale_encipher('thequickbrownfox', 5)
369 'tubn hirf ecoo qkwx '
370 >>> scytale_encipher('thequickbrownfox', 6)
371 'tqcrnxhukof eibwo '
372 >>> scytale_encipher('thequickbrownfox', 7)
373 'tqcrnx hukof eibwo '
374 """
375 if len(message) % rows != 0:
376 message += ' '*(rows - len(message) % rows)
377 row_length = round(len(message) / rows)
378 slices = [message[i:i+row_length]
379 for i in range(0, len(message), row_length)]
380 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
381
382 def scytale_decipher(message, rows):
383 """Deciphers using the scytale transposition cipher.
384 Assumes the message is padded so that all rows are the same length.
385
386 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
387 'thequickbrownfox '
388 >>> scytale_decipher('tubnhirfecooqkwx', 4)
389 'thequickbrownfox'
390 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
391 'thequickbrownfox '
392 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
393 'thequickbrownfox '
394 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
395 'thequickbrownfox '
396 """
397 cols = round(len(message) / rows)
398 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
399 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
400
401
402 def caesar_break(message,
403 metric=norms.euclidean_distance,
404 target_counts=normalised_english_counts,
405 message_frequency_scaling=norms.normalise):
406 """Breaks a Caesar cipher using frequency analysis
407
408 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
409 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
410 (4, 0.31863952890183...)
411 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
412 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
413 (19, 0.42152901235832...)
414 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
415 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
416 (13, 0.316029208075451...)
417 """
418 sanitised_message = sanitise(message)
419 best_shift = 0
420 best_fit = float("inf")
421 for shift in range(26):
422 plaintext = caesar_decipher(sanitised_message, shift)
423 counts = message_frequency_scaling(letter_frequencies(plaintext))
424 fit = metric(target_counts, counts)
425 logger.debug('Caesar break attempt using key {0} gives fit of {1} '
426 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
427 if fit < best_fit:
428 best_fit = fit
429 best_shift = shift
430 logger.info('Caesar break best fit: key {0} gives fit of {1} and '
431 'decrypt starting: {2}'.format(best_shift, best_fit,
432 caesar_decipher(sanitised_message, best_shift)[:50]))
433 return best_shift, best_fit
434
435 def affine_break(message,
436 metric=norms.euclidean_distance,
437 target_counts=normalised_english_counts,
438 message_frequency_scaling=norms.normalise):
439 """Breaks an affine cipher using frequency analysis
440
441 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
442 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
443 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
444 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
445 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
446 'clm ckuxj.') # doctest: +ELLIPSIS
447 ((15, 22, True), 0.23570361818655...)
448 """
449 sanitised_message = sanitise(message)
450 best_multiplier = 0
451 best_adder = 0
452 best_one_based = True
453 best_fit = float("inf")
454 for one_based in [True, False]:
455 for multiplier in range(1, 26, 2):
456 for adder in range(26):
457 plaintext = affine_decipher(sanitised_message,
458 multiplier, adder, one_based)
459 counts = message_frequency_scaling(letter_frequencies(plaintext))
460 fit = metric(target_counts, counts)
461 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
462 'gives fit of {3} and decrypt starting: {4}'.
463 format(multiplier, adder, one_based, fit,
464 plaintext[:50]))
465 if fit < best_fit:
466 best_fit = fit
467 best_multiplier = multiplier
468 best_adder = adder
469 best_one_based = one_based
470 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
471 'and decrypt starting: {4}'.format(
472 best_multiplier, best_adder, best_one_based, best_fit,
473 affine_decipher(sanitised_message, best_multiplier,
474 best_adder, best_one_based)[:50]))
475 return (best_multiplier, best_adder, best_one_based), best_fit
476
477 def keyword_break(message,
478 wordlist=keywords,
479 metric=norms.euclidean_distance,
480 target_counts=normalised_english_counts,
481 message_frequency_scaling=norms.normalise):
482 """Breaks a keyword substitution cipher using a dictionary and
483 frequency analysis
484
485 >>> keyword_break(keyword_encipher('this is a test message for the ' \
486 'keyword decipherment', 'elephant', 1), \
487 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
488 (('elephant', 1), 0.41643991598441...)
489 """
490 best_keyword = ''
491 best_wrap_alphabet = True
492 best_fit = float("inf")
493 for wrap_alphabet in range(3):
494 for keyword in wordlist:
495 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
496 counts = message_frequency_scaling(letter_frequencies(plaintext))
497 fit = metric(target_counts, counts)
498 logger.debug('Keyword break attempt using key {0} (wrap={1}) '
499 'gives fit of {2} and decrypt starting: {3}'.format(
500 keyword, wrap_alphabet, fit,
501 sanitise(plaintext)[:50]))
502 if fit < best_fit:
503 best_fit = fit
504 best_keyword = keyword
505 best_wrap_alphabet = wrap_alphabet
506 logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
507 '{2} and decrypt starting: {3}'.format(best_keyword,
508 best_wrap_alphabet, best_fit, sanitise(
509 keyword_decipher(message, best_keyword,
510 best_wrap_alphabet))[:50]))
511 return (best_keyword, best_wrap_alphabet), best_fit
512
513 def keyword_break_mp(message,
514 wordlist=keywords,
515 metric=norms.euclidean_distance,
516 target_counts=normalised_english_counts,
517 message_frequency_scaling=norms.normalise,
518 chunksize=500):
519 """Breaks a keyword substitution cipher using a dictionary and
520 frequency analysis
521
522 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
523 'keyword decipherment', 'elephant', 1), \
524 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
525 (('elephant', 1), 0.41643991598441...)
526 """
527 with Pool() as pool:
528 helper_args = [(message, word, wrap, metric, target_counts,
529 message_frequency_scaling)
530 for word in wordlist for wrap in range(3)]
531 # Gotcha: the helper function here needs to be defined at the top level
532 # (limitation of Pool.starmap)
533 breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
534 return min(breaks, key=lambda k: k[1])
535
536 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
537 message_frequency_scaling):
538 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
539 counts = message_frequency_scaling(letter_frequencies(plaintext))
540 fit = metric(target_counts, counts)
541 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
542 '{2} and decrypt starting: {3}'.format(keyword,
543 wrap_alphabet, fit, sanitise(plaintext)[:50]))
544 return (keyword, wrap_alphabet), fit
545
546 def scytale_break(message,
547 metric=norms.euclidean_distance,
548 target_counts=normalised_english_bigram_counts,
549 message_frequency_scaling=norms.normalise):
550 """Breaks a Scytale cipher
551
552 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
553 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
554 'aek') # doctest: +ELLIPSIS
555 (6, 0.83453041115025...)
556 """
557 best_key = 0
558 best_fit = float("inf")
559 for key in range(1, 20):
560 if len(message) % key == 0:
561 plaintext = scytale_decipher(message, key)
562 counts = message_frequency_scaling(frequencies(
563 ngrams(sanitise(plaintext), 2)))
564 fit = metric(target_counts, counts)
565 logger.debug('Scytale break attempt using key {0} gives fit of '
566 '{1} and decrypt starting: {2}'.format(key,
567 fit, sanitise(plaintext)[:50]))
568 if fit < best_fit:
569 best_fit = fit
570 best_key = key
571 logger.info('Scytale break best fit with key {0} gives fit of {1} and '
572 'decrypt starting: {2}'.format(best_key, best_fit,
573 sanitise(scytale_decipher(message, best_key))[:50]))
574 return best_key, best_fit
575
576
577 if __name__ == "__main__":
578 import doctest
579 doctest.testmod()