Fixed whitespace and line lengths
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5 import math
6 from itertools import zip_longest
7 from segment import segment
8 from multiprocessing import Pool
9
10 # To time a run:
11 #
12 # import timeit
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
16
17 logger = logging.getLogger(__name__)
18 logger.addHandler(logging.FileHandler('cipher.log'))
19 logger.setLevel(logging.WARNING)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
22
23 english_counts = collections.defaultdict(int)
24 with open('count_1l.txt', 'r') as f:
25 for line in f:
26 (letter, count) = line.split("\t")
27 english_counts[letter] = int(count)
28 normalised_english_counts = norms.normalise(english_counts)
29
30 english_bigram_counts = collections.defaultdict(int)
31 with open('count_2l.txt', 'r') as f:
32 for line in f:
33 (bigram, count) = line.split("\t")
34 english_bigram_counts[bigram] = int(count)
35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
36
37 with open('words.txt', 'r') as f:
38 keywords = [line.rstrip() for line in f]
39
40 modular_division_table = [[0]*26 for x in range(26)]
41 for a in range(26):
42 for b in range(26):
43 c = (a * b) % 26
44 modular_division_table[b][c] = a
45
46
47 def sanitise(text):
48 """Remove all non-alphabetic characters and convert the text to lowercase
49
50 >>> sanitise('The Quick')
51 'thequick'
52 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
53 'thequickbrownfoxjumpedoverthelazydog'
54 """
55 sanitised = [c.lower() for c in text if c in string.ascii_letters]
56 return ''.join(sanitised)
57
58 def ngrams(text, n):
59 """Returns all n-grams of a text
60
61 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
62 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
63 'nf', 'fo', 'ox']
64 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
65 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
66 'rown', 'ownf', 'wnfo', 'nfox']
67 """
68 return [text[i:i+n] for i in range(len(text)-n+1)]
69
70 def every_nth(text, n):
71 """Returns n strings, each of which consists of every nth character,
72 starting with the 0th, 1st, 2nd, ... (n-1)th character
73
74 >>> every_nth(string.ascii_lowercase, 5)
75 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
76 >>> every_nth(string.ascii_lowercase, 1)
77 ['abcdefghijklmnopqrstuvwxyz']
78 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
79 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
80 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
81 """
82 split_text = [text[i:i+n] for i in range(0, len(text), n)]
83 return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
84
85 def combine_every_nth(split_text):
86 """Reforms a text split into every_nth strings
87
88 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
89 'abcdefghijklmnopqrstuvwxyz'
90 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
91 'abcdefghijklmnopqrstuvwxyz'
92 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
93 'abcdefghijklmnopqrstuvwxyz'
94 """
95 return ''.join([''.join(l)
96 for l in zip_longest(*split_text, fillvalue='')])
97
98
99 def frequencies(text):
100 """Count the number of occurrences of each character in text
101
102 >>> sorted(frequencies('abcdefabc').items())
103 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
104 >>> sorted(frequencies('the quick brown fox jumped over the lazy dog').items()) # doctest: +NORMALIZE_WHITESPACE
105 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
106 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
107 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
108 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
109 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
110 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
111 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
112 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
113 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
114 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
115 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
116 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
117 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
118 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
119 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
120 """
121 counts = collections.defaultdict(int)
122 for c in text:
123 counts[c] += 1
124 return counts
125 letter_frequencies = frequencies
126
127 def deduplicate(text):
128 return list(collections.OrderedDict.fromkeys(text))
129
130
131
132 def caesar_encipher_letter(letter, shift):
133 """Encipher a letter, given a shift amount
134
135 >>> caesar_encipher_letter('a', 1)
136 'b'
137 >>> caesar_encipher_letter('a', 2)
138 'c'
139 >>> caesar_encipher_letter('b', 2)
140 'd'
141 >>> caesar_encipher_letter('x', 2)
142 'z'
143 >>> caesar_encipher_letter('y', 2)
144 'a'
145 >>> caesar_encipher_letter('z', 2)
146 'b'
147 >>> caesar_encipher_letter('z', -1)
148 'y'
149 >>> caesar_encipher_letter('a', -1)
150 'z'
151 """
152 if letter in string.ascii_letters:
153 if letter in string.ascii_uppercase:
154 alphabet_start = ord('A')
155 else:
156 alphabet_start = ord('a')
157 return chr(((ord(letter) - alphabet_start + shift) % 26) +
158 alphabet_start)
159 else:
160 return letter
161
162 def caesar_decipher_letter(letter, shift):
163 """Decipher a letter, given a shift amount
164
165 >>> caesar_decipher_letter('b', 1)
166 'a'
167 >>> caesar_decipher_letter('b', 2)
168 'z'
169 """
170 return caesar_encipher_letter(letter, -shift)
171
172 def caesar_encipher(message, shift):
173 """Encipher a message with the Caesar cipher of given shift
174
175 >>> caesar_encipher('abc', 1)
176 'bcd'
177 >>> caesar_encipher('abc', 2)
178 'cde'
179 >>> caesar_encipher('abcxyz', 2)
180 'cdezab'
181 >>> caesar_encipher('ab cx yz', 2)
182 'cd ez ab'
183 """
184 enciphered = [caesar_encipher_letter(l, shift) for l in message]
185 return ''.join(enciphered)
186
187 def caesar_decipher(message, shift):
188 """Encipher a message with the Caesar cipher of given shift
189
190 >>> caesar_decipher('bcd', 1)
191 'abc'
192 >>> caesar_decipher('cde', 2)
193 'abc'
194 >>> caesar_decipher('cd ez ab', 2)
195 'ab cx yz'
196 """
197 return caesar_encipher(message, -shift)
198
199 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
200 """Encipher a letter, given a multiplier and adder
201
202 >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
203 'HKNQTWZCFILORUXADGJMPSVYBE'
204 >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
205 'FILORUXADGJMPSVYBEHKNQTWZC'
206 """
207 if letter in string.ascii_letters:
208 if letter in string.ascii_uppercase:
209 alphabet_start = ord('A')
210 else:
211 alphabet_start = ord('a')
212 letter_number = ord(letter) - alphabet_start
213 if one_based: letter_number += 1
214 cipher_number = (letter_number * multiplier + adder) % 26
215 if one_based: cipher_number -= 1
216 return chr(cipher_number % 26 + alphabet_start)
217 else:
218 return letter
219
220 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
221 """Encipher a letter, given a multiplier and adder
222
223 >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
224 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
225 >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
226 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
227 """
228 if letter in string.ascii_letters:
229 if letter in string.ascii_uppercase:
230 alphabet_start = ord('A')
231 else:
232 alphabet_start = ord('a')
233 cipher_number = ord(letter) - alphabet_start
234 if one_based: cipher_number += 1
235 plaintext_number = ( modular_division_table[multiplier]
236 [(cipher_number - adder) % 26] )
237 if one_based: plaintext_number -= 1
238 return chr(plaintext_number % 26 + alphabet_start)
239 else:
240 return letter
241
242 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
243 """Encipher a message
244
245 >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
246 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
247 """
248 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
249 for l in message]
250 return ''.join(enciphered)
251
252 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
253 """Decipher a message
254
255 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
256 'hours passed during which jerico tried every trick he could think of'
257 """
258 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
259 for l in message]
260 return ''.join(enciphered)
261
262
263 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
264 """Find the cipher alphabet given a keyword.
265 wrap_alphabet controls how the rest of the alphabet is added
266 after the keyword.
267 0 : from 'a'
268 1 : from the last letter in the sanitised keyword
269 2 : from the largest letter in the sanitised keyword
270
271 >>> keyword_cipher_alphabet_of('bayes')
272 'bayescdfghijklmnopqrtuvwxz'
273 >>> keyword_cipher_alphabet_of('bayes', 0)
274 'bayescdfghijklmnopqrtuvwxz'
275 >>> keyword_cipher_alphabet_of('bayes', 1)
276 'bayestuvwxzcdfghijklmnopqr'
277 >>> keyword_cipher_alphabet_of('bayes', 2)
278 'bayeszcdfghijklmnopqrtuvwx'
279 """
280 if wrap_alphabet == 0:
281 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
282 string.ascii_lowercase))
283 else:
284 if wrap_alphabet == 1:
285 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
286 else:
287 last_keyword_letter = sorted(sanitise(keyword))[-1]
288 last_keyword_position = string.ascii_lowercase.find(
289 last_keyword_letter) + 1
290 cipher_alphabet = ''.join(
291 deduplicate(sanitise(keyword) +
292 string.ascii_lowercase[last_keyword_position:] +
293 string.ascii_lowercase))
294 return cipher_alphabet
295
296
297 def keyword_encipher(message, keyword, wrap_alphabet=0):
298 """Enciphers a message with a keyword substitution cipher.
299 wrap_alphabet controls how the rest of the alphabet is added
300 after the keyword.
301 0 : from 'a'
302 1 : from the last letter in the sanitised keyword
303 2 : from the largest letter in the sanitised keyword
304
305 >>> keyword_encipher('test message', 'bayes')
306 'rsqr ksqqbds'
307 >>> keyword_encipher('test message', 'bayes', 0)
308 'rsqr ksqqbds'
309 >>> keyword_encipher('test message', 'bayes', 1)
310 'lskl dskkbus'
311 >>> keyword_encipher('test message', 'bayes', 2)
312 'qspq jsppbcs'
313 """
314 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
315 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
316 return message.lower().translate(cipher_translation)
317
318 def keyword_decipher(message, keyword, wrap_alphabet=0):
319 """Deciphers a message with a keyword substitution cipher.
320 wrap_alphabet controls how the rest of the alphabet is added
321 after the keyword.
322 0 : from 'a'
323 1 : from the last letter in the sanitised keyword
324 2 : from the largest letter in the sanitised keyword
325
326 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
327 'test message'
328 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
329 'test message'
330 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
331 'test message'
332 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
333 'test message'
334 """
335 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
336 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
337 return message.lower().translate(cipher_translation)
338
339 def scytale_encipher(message, rows):
340 """Enciphers using the scytale transposition cipher.
341 Message is padded with spaces to allow all rows to be the same length.
342
343 >>> scytale_encipher('thequickbrownfox', 3)
344 'tcnhkfeboqrxuo iw '
345 >>> scytale_encipher('thequickbrownfox', 4)
346 'tubnhirfecooqkwx'
347 >>> scytale_encipher('thequickbrownfox', 5)
348 'tubn hirf ecoo qkwx '
349 >>> scytale_encipher('thequickbrownfox', 6)
350 'tqcrnxhukof eibwo '
351 >>> scytale_encipher('thequickbrownfox', 7)
352 'tqcrnx hukof eibwo '
353 """
354 if len(message) % rows != 0:
355 message += ' '*(rows - len(message) % rows)
356 row_length = round(len(message) / rows)
357 slices = [message[i:i+row_length]
358 for i in range(0, len(message), row_length)]
359 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
360
361 def scytale_decipher(message, rows):
362 """Deciphers using the scytale transposition cipher.
363 Assumes the message is padded so that all rows are the same length.
364
365 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
366 'thequickbrownfox '
367 >>> scytale_decipher('tubnhirfecooqkwx', 4)
368 'thequickbrownfox'
369 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
370 'thequickbrownfox '
371 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
372 'thequickbrownfox '
373 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
374 'thequickbrownfox '
375 """
376 cols = round(len(message) / rows)
377 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
378 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
379
380
381 def caesar_break(message,
382 metric=norms.euclidean_distance,
383 target_counts=normalised_english_counts,
384 message_frequency_scaling=norms.normalise):
385 """Breaks a Caesar cipher using frequency analysis
386
387 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
388 (4, 0.31863952890183...)
389 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
390 (19, 0.42152901235832...)
391 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
392 (13, 0.316029208075451...)
393 """
394 sanitised_message = sanitise(message)
395 best_shift = 0
396 best_fit = float("inf")
397 for shift in range(26):
398 plaintext = caesar_decipher(sanitised_message, shift)
399 counts = message_frequency_scaling(letter_frequencies(plaintext))
400 fit = metric(target_counts, counts)
401 logger.debug('Caesar break attempt using key {0} gives fit of {1} '
402 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
403 if fit < best_fit:
404 best_fit = fit
405 best_shift = shift
406 logger.info('Caesar break best fit: key {0} gives fit of {1} and '
407 'decrypt starting: {2}'.format(best_shift, best_fit,
408 caesar_decipher(sanitised_message, best_shift)[:50]))
409 return best_shift, best_fit
410
411 def affine_break(message,
412 metric=norms.euclidean_distance,
413 target_counts=normalised_english_counts,
414 message_frequency_scaling=norms.normalise):
415 """Breaks an affine cipher using frequency analysis
416
417 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
418 ((15, 22, True), 0.23570361818655...)
419 """
420 sanitised_message = sanitise(message)
421 best_multiplier = 0
422 best_adder = 0
423 best_one_based = True
424 best_fit = float("inf")
425 for one_based in [True, False]:
426 for multiplier in range(1, 26, 2):
427 for adder in range(26):
428 plaintext = affine_decipher(sanitised_message,
429 multiplier, adder, one_based)
430 counts = message_frequency_scaling(letter_frequencies(plaintext))
431 fit = metric(target_counts, counts)
432 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
433 'gives fit of {3} and decrypt starting: {4}'.
434 format(multiplier, adder, one_based, fit,
435 plaintext[:50]))
436 if fit < best_fit:
437 best_fit = fit
438 best_multiplier = multiplier
439 best_adder = adder
440 best_one_based = one_based
441 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
442 'and decrypt starting: {4}'.format(
443 best_multiplier, best_adder, best_one_based, best_fit,
444 affine_decipher(sanitised_message, best_multiplier,
445 best_adder, best_one_based)[:50]))
446 return (best_multiplier, best_adder, best_one_based), best_fit
447
448 def keyword_break(message,
449 wordlist=keywords,
450 metric=norms.euclidean_distance,
451 target_counts=normalised_english_counts,
452 message_frequency_scaling=norms.normalise):
453 """Breaks a keyword substitution cipher using a dictionary and
454 frequency analysis
455
456 >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
457 (('elephant', 1), 0.41643991598441...)
458 """
459 best_keyword = ''
460 best_wrap_alphabet = True
461 best_fit = float("inf")
462 for wrap_alphabet in range(3):
463 for keyword in wordlist:
464 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
465 counts = message_frequency_scaling(letter_frequencies(plaintext))
466 fit = metric(target_counts, counts)
467 logger.debug('Keyword break attempt using key {0} (wrap={1}) '
468 'gives fit of {2} and decrypt starting: {3}'.format(
469 keyword, wrap_alphabet, fit,
470 sanitise(plaintext)[:50]))
471 if fit < best_fit:
472 best_fit = fit
473 best_keyword = keyword
474 best_wrap_alphabet = wrap_alphabet
475 logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
476 '{2} and decrypt starting: {3}'.format(best_keyword,
477 best_wrap_alphabet, best_fit, sanitise(
478 keyword_decipher(message, best_keyword,
479 best_wrap_alphabet))[:50]))
480 return (best_keyword, best_wrap_alphabet), best_fit
481
482 def keyword_break_mp(message,
483 wordlist=keywords,
484 metric=norms.euclidean_distance,
485 target_counts=normalised_english_counts,
486 message_frequency_scaling=norms.normalise,
487 chunksize=500):
488 """Breaks a keyword substitution cipher using a dictionary and
489 frequency analysis
490
491 >>> keyword_break_mp(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
492 (('elephant', 1), 0.41643991598441...)
493 """
494 with Pool() as pool:
495 helper_args = [(message, word, wrap, metric, target_counts,
496 message_frequency_scaling)
497 for word in wordlist for wrap in range(3)]
498 breaks = pool.starmap(keyword_break_one, helper_args, chunksize) # Gotcha: the helper function here needs to be defined at the top level (limitation of Pool.starmap)
499 return min(breaks, key=lambda k: k[1])
500
501 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
502 message_frequency_scaling):
503 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
504 counts = message_frequency_scaling(letter_frequencies(plaintext))
505 fit = metric(target_counts, counts)
506 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
507 '{2} and decrypt starting: {3}'.format(keyword,
508 wrap_alphabet, fit, sanitise(plaintext)[:50]))
509 return (keyword, wrap_alphabet), fit
510
511 def scytale_break(message,
512 metric=norms.euclidean_distance,
513 target_counts=normalised_english_bigram_counts,
514 message_frequency_scaling=norms.normalise):
515 """Breaks a Scytale cipher
516
517 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoeredcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriiotoaek') # doctest: +ELLIPSIS
518 (6, 0.83453041115025...)
519 """
520 best_key = 0
521 best_fit = float("inf")
522 for key in range(1, 20):
523 if len(message) % key == 0:
524 plaintext = scytale_decipher(message, key)
525 counts = message_frequency_scaling(frequencies(
526 ngrams(sanitise(plaintext), 2)))
527 fit = metric(target_counts, counts)
528 logger.debug('Scytale break attempt using key {0} gives fit of '
529 '{1} and decrypt starting: {2}'.format(key,
530 fit, sanitise(plaintext)[:50]))
531 if fit < best_fit:
532 best_fit = fit
533 best_key = key
534 logger.info('Scytale break best fit with key {0} gives fit of {1} and '
535 'decrypt starting: {2}'.format(best_key, best_fit,
536 sanitise(scytale_decipher(message, best_key))[:50]))
537 return best_key, best_fit
538
539
540 if __name__ == "__main__":
541 import doctest
542 doctest.testmod()