More whitespace and line length tidying
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5 import math
6 from itertools import zip_longest
7 from segment import segment
8 from multiprocessing import Pool
9
10 # To time a run:
11 #
12 # import timeit
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
16
17 logger = logging.getLogger(__name__)
18 logger.addHandler(logging.FileHandler('cipher.log'))
19 logger.setLevel(logging.WARNING)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
22
23 english_counts = collections.defaultdict(int)
24 with open('count_1l.txt', 'r') as f:
25 for line in f:
26 (letter, count) = line.split("\t")
27 english_counts[letter] = int(count)
28 normalised_english_counts = norms.normalise(english_counts)
29
30 english_bigram_counts = collections.defaultdict(int)
31 with open('count_2l.txt', 'r') as f:
32 for line in f:
33 (bigram, count) = line.split("\t")
34 english_bigram_counts[bigram] = int(count)
35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
36
37 with open('words.txt', 'r') as f:
38 keywords = [line.rstrip() for line in f]
39
40 modular_division_table = [[0]*26 for x in range(26)]
41 for a in range(26):
42 for b in range(26):
43 c = (a * b) % 26
44 modular_division_table[b][c] = a
45
46
47 def sanitise(text):
48 """Remove all non-alphabetic characters and convert the text to lowercase
49
50 >>> sanitise('The Quick')
51 'thequick'
52 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
53 'thequickbrownfoxjumpedoverthelazydog'
54 """
55 sanitised = [c.lower() for c in text if c in string.ascii_letters]
56 return ''.join(sanitised)
57
58 def ngrams(text, n):
59 """Returns all n-grams of a text
60
61 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
62 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
63 'nf', 'fo', 'ox']
64 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
65 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
66 'rown', 'ownf', 'wnfo', 'nfox']
67 """
68 return [text[i:i+n] for i in range(len(text)-n+1)]
69
70 def every_nth(text, n):
71 """Returns n strings, each of which consists of every nth character,
72 starting with the 0th, 1st, 2nd, ... (n-1)th character
73
74 >>> every_nth(string.ascii_lowercase, 5)
75 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
76 >>> every_nth(string.ascii_lowercase, 1)
77 ['abcdefghijklmnopqrstuvwxyz']
78 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
79 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
80 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
81 """
82 split_text = [text[i:i+n] for i in range(0, len(text), n)]
83 return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
84
85 def combine_every_nth(split_text):
86 """Reforms a text split into every_nth strings
87
88 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
89 'abcdefghijklmnopqrstuvwxyz'
90 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
91 'abcdefghijklmnopqrstuvwxyz'
92 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
93 'abcdefghijklmnopqrstuvwxyz'
94 """
95 return ''.join([''.join(l)
96 for l in zip_longest(*split_text, fillvalue='')])
97
98
99 def frequencies(text):
100 """Count the number of occurrences of each character in text
101
102 >>> sorted(frequencies('abcdefabc').items())
103 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
104 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
105 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
106 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
107 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
108 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
109 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
110 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
111 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
112 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
113 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
114 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
115 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
116 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
117 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
118 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
119 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
120 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
121 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
122 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
123 """
124 counts = collections.defaultdict(int)
125 for c in text:
126 counts[c] += 1
127 return counts
128 letter_frequencies = frequencies
129
130 def deduplicate(text):
131 return list(collections.OrderedDict.fromkeys(text))
132
133
134
135 def caesar_encipher_letter(letter, shift):
136 """Encipher a letter, given a shift amount
137
138 >>> caesar_encipher_letter('a', 1)
139 'b'
140 >>> caesar_encipher_letter('a', 2)
141 'c'
142 >>> caesar_encipher_letter('b', 2)
143 'd'
144 >>> caesar_encipher_letter('x', 2)
145 'z'
146 >>> caesar_encipher_letter('y', 2)
147 'a'
148 >>> caesar_encipher_letter('z', 2)
149 'b'
150 >>> caesar_encipher_letter('z', -1)
151 'y'
152 >>> caesar_encipher_letter('a', -1)
153 'z'
154 """
155 if letter in string.ascii_letters:
156 if letter in string.ascii_uppercase:
157 alphabet_start = ord('A')
158 else:
159 alphabet_start = ord('a')
160 return chr(((ord(letter) - alphabet_start + shift) % 26) +
161 alphabet_start)
162 else:
163 return letter
164
165 def caesar_decipher_letter(letter, shift):
166 """Decipher a letter, given a shift amount
167
168 >>> caesar_decipher_letter('b', 1)
169 'a'
170 >>> caesar_decipher_letter('b', 2)
171 'z'
172 """
173 return caesar_encipher_letter(letter, -shift)
174
175 def caesar_encipher(message, shift):
176 """Encipher a message with the Caesar cipher of given shift
177
178 >>> caesar_encipher('abc', 1)
179 'bcd'
180 >>> caesar_encipher('abc', 2)
181 'cde'
182 >>> caesar_encipher('abcxyz', 2)
183 'cdezab'
184 >>> caesar_encipher('ab cx yz', 2)
185 'cd ez ab'
186 """
187 enciphered = [caesar_encipher_letter(l, shift) for l in message]
188 return ''.join(enciphered)
189
190 def caesar_decipher(message, shift):
191 """Encipher a message with the Caesar cipher of given shift
192
193 >>> caesar_decipher('bcd', 1)
194 'abc'
195 >>> caesar_decipher('cde', 2)
196 'abc'
197 >>> caesar_decipher('cd ez ab', 2)
198 'ab cx yz'
199 """
200 return caesar_encipher(message, -shift)
201
202 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
203 """Encipher a letter, given a multiplier and adder
204
205 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
206 for l in string.ascii_uppercase])
207 'HKNQTWZCFILORUXADGJMPSVYBE'
208 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
209 for l in string.ascii_uppercase])
210 'FILORUXADGJMPSVYBEHKNQTWZC'
211 """
212 if letter in string.ascii_letters:
213 if letter in string.ascii_uppercase:
214 alphabet_start = ord('A')
215 else:
216 alphabet_start = ord('a')
217 letter_number = ord(letter) - alphabet_start
218 if one_based: letter_number += 1
219 cipher_number = (letter_number * multiplier + adder) % 26
220 if one_based: cipher_number -= 1
221 return chr(cipher_number % 26 + alphabet_start)
222 else:
223 return letter
224
225 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
226 """Encipher a letter, given a multiplier and adder
227
228 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
229 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
230 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
231 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
232 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
233 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
234 """
235 if letter in string.ascii_letters:
236 if letter in string.ascii_uppercase:
237 alphabet_start = ord('A')
238 else:
239 alphabet_start = ord('a')
240 cipher_number = ord(letter) - alphabet_start
241 if one_based: cipher_number += 1
242 plaintext_number = ( modular_division_table[multiplier]
243 [(cipher_number - adder) % 26] )
244 if one_based: plaintext_number -= 1
245 return chr(plaintext_number % 26 + alphabet_start)
246 else:
247 return letter
248
249 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
250 """Encipher a message
251
252 >>> affine_encipher('hours passed during which jerico tried every ' \
253 'trick he could think of', 15, 22, True)
254 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
255 """
256 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
257 for l in message]
258 return ''.join(enciphered)
259
260 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
261 """Decipher a message
262
263 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
264 'jfaoe ls omytd jlaxe mh', 15, 22, True)
265 'hours passed during which jerico tried every trick he could think of'
266 """
267 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
268 for l in message]
269 return ''.join(enciphered)
270
271
272 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
273 """Find the cipher alphabet given a keyword.
274 wrap_alphabet controls how the rest of the alphabet is added
275 after the keyword.
276 0 : from 'a'
277 1 : from the last letter in the sanitised keyword
278 2 : from the largest letter in the sanitised keyword
279
280 >>> keyword_cipher_alphabet_of('bayes')
281 'bayescdfghijklmnopqrtuvwxz'
282 >>> keyword_cipher_alphabet_of('bayes', 0)
283 'bayescdfghijklmnopqrtuvwxz'
284 >>> keyword_cipher_alphabet_of('bayes', 1)
285 'bayestuvwxzcdfghijklmnopqr'
286 >>> keyword_cipher_alphabet_of('bayes', 2)
287 'bayeszcdfghijklmnopqrtuvwx'
288 """
289 if wrap_alphabet == 0:
290 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
291 string.ascii_lowercase))
292 else:
293 if wrap_alphabet == 1:
294 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
295 else:
296 last_keyword_letter = sorted(sanitise(keyword))[-1]
297 last_keyword_position = string.ascii_lowercase.find(
298 last_keyword_letter) + 1
299 cipher_alphabet = ''.join(
300 deduplicate(sanitise(keyword) +
301 string.ascii_lowercase[last_keyword_position:] +
302 string.ascii_lowercase))
303 return cipher_alphabet
304
305
306 def keyword_encipher(message, keyword, wrap_alphabet=0):
307 """Enciphers a message with a keyword substitution cipher.
308 wrap_alphabet controls how the rest of the alphabet is added
309 after the keyword.
310 0 : from 'a'
311 1 : from the last letter in the sanitised keyword
312 2 : from the largest letter in the sanitised keyword
313
314 >>> keyword_encipher('test message', 'bayes')
315 'rsqr ksqqbds'
316 >>> keyword_encipher('test message', 'bayes', 0)
317 'rsqr ksqqbds'
318 >>> keyword_encipher('test message', 'bayes', 1)
319 'lskl dskkbus'
320 >>> keyword_encipher('test message', 'bayes', 2)
321 'qspq jsppbcs'
322 """
323 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
324 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
325 return message.lower().translate(cipher_translation)
326
327 def keyword_decipher(message, keyword, wrap_alphabet=0):
328 """Deciphers a message with a keyword substitution cipher.
329 wrap_alphabet controls how the rest of the alphabet is added
330 after the keyword.
331 0 : from 'a'
332 1 : from the last letter in the sanitised keyword
333 2 : from the largest letter in the sanitised keyword
334
335 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
336 'test message'
337 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
338 'test message'
339 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
340 'test message'
341 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
342 'test message'
343 """
344 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
345 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
346 return message.lower().translate(cipher_translation)
347
348 def scytale_encipher(message, rows):
349 """Enciphers using the scytale transposition cipher.
350 Message is padded with spaces to allow all rows to be the same length.
351
352 >>> scytale_encipher('thequickbrownfox', 3)
353 'tcnhkfeboqrxuo iw '
354 >>> scytale_encipher('thequickbrownfox', 4)
355 'tubnhirfecooqkwx'
356 >>> scytale_encipher('thequickbrownfox', 5)
357 'tubn hirf ecoo qkwx '
358 >>> scytale_encipher('thequickbrownfox', 6)
359 'tqcrnxhukof eibwo '
360 >>> scytale_encipher('thequickbrownfox', 7)
361 'tqcrnx hukof eibwo '
362 """
363 if len(message) % rows != 0:
364 message += ' '*(rows - len(message) % rows)
365 row_length = round(len(message) / rows)
366 slices = [message[i:i+row_length]
367 for i in range(0, len(message), row_length)]
368 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
369
370 def scytale_decipher(message, rows):
371 """Deciphers using the scytale transposition cipher.
372 Assumes the message is padded so that all rows are the same length.
373
374 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
375 'thequickbrownfox '
376 >>> scytale_decipher('tubnhirfecooqkwx', 4)
377 'thequickbrownfox'
378 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
379 'thequickbrownfox '
380 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
381 'thequickbrownfox '
382 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
383 'thequickbrownfox '
384 """
385 cols = round(len(message) / rows)
386 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
387 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
388
389
390 def caesar_break(message,
391 metric=norms.euclidean_distance,
392 target_counts=normalised_english_counts,
393 message_frequency_scaling=norms.normalise):
394 """Breaks a Caesar cipher using frequency analysis
395
396 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
397 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
398 (4, 0.31863952890183...)
399 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
400 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
401 (19, 0.42152901235832...)
402 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
403 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
404 (13, 0.316029208075451...)
405 """
406 sanitised_message = sanitise(message)
407 best_shift = 0
408 best_fit = float("inf")
409 for shift in range(26):
410 plaintext = caesar_decipher(sanitised_message, shift)
411 counts = message_frequency_scaling(letter_frequencies(plaintext))
412 fit = metric(target_counts, counts)
413 logger.debug('Caesar break attempt using key {0} gives fit of {1} '
414 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
415 if fit < best_fit:
416 best_fit = fit
417 best_shift = shift
418 logger.info('Caesar break best fit: key {0} gives fit of {1} and '
419 'decrypt starting: {2}'.format(best_shift, best_fit,
420 caesar_decipher(sanitised_message, best_shift)[:50]))
421 return best_shift, best_fit
422
423 def affine_break(message,
424 metric=norms.euclidean_distance,
425 target_counts=normalised_english_counts,
426 message_frequency_scaling=norms.normalise):
427 """Breaks an affine cipher using frequency analysis
428
429 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
430 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
431 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
432 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
433 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
434 'clm ckuxj.') # doctest: +ELLIPSIS
435 ((15, 22, True), 0.23570361818655...)
436 """
437 sanitised_message = sanitise(message)
438 best_multiplier = 0
439 best_adder = 0
440 best_one_based = True
441 best_fit = float("inf")
442 for one_based in [True, False]:
443 for multiplier in range(1, 26, 2):
444 for adder in range(26):
445 plaintext = affine_decipher(sanitised_message,
446 multiplier, adder, one_based)
447 counts = message_frequency_scaling(letter_frequencies(plaintext))
448 fit = metric(target_counts, counts)
449 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
450 'gives fit of {3} and decrypt starting: {4}'.
451 format(multiplier, adder, one_based, fit,
452 plaintext[:50]))
453 if fit < best_fit:
454 best_fit = fit
455 best_multiplier = multiplier
456 best_adder = adder
457 best_one_based = one_based
458 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
459 'and decrypt starting: {4}'.format(
460 best_multiplier, best_adder, best_one_based, best_fit,
461 affine_decipher(sanitised_message, best_multiplier,
462 best_adder, best_one_based)[:50]))
463 return (best_multiplier, best_adder, best_one_based), best_fit
464
465 def keyword_break(message,
466 wordlist=keywords,
467 metric=norms.euclidean_distance,
468 target_counts=normalised_english_counts,
469 message_frequency_scaling=norms.normalise):
470 """Breaks a keyword substitution cipher using a dictionary and
471 frequency analysis
472
473 >>> keyword_break(keyword_encipher('this is a test message for the ' \
474 'keyword decipherment', 'elephant', 1), \
475 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
476 (('elephant', 1), 0.41643991598441...)
477 """
478 best_keyword = ''
479 best_wrap_alphabet = True
480 best_fit = float("inf")
481 for wrap_alphabet in range(3):
482 for keyword in wordlist:
483 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
484 counts = message_frequency_scaling(letter_frequencies(plaintext))
485 fit = metric(target_counts, counts)
486 logger.debug('Keyword break attempt using key {0} (wrap={1}) '
487 'gives fit of {2} and decrypt starting: {3}'.format(
488 keyword, wrap_alphabet, fit,
489 sanitise(plaintext)[:50]))
490 if fit < best_fit:
491 best_fit = fit
492 best_keyword = keyword
493 best_wrap_alphabet = wrap_alphabet
494 logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
495 '{2} and decrypt starting: {3}'.format(best_keyword,
496 best_wrap_alphabet, best_fit, sanitise(
497 keyword_decipher(message, best_keyword,
498 best_wrap_alphabet))[:50]))
499 return (best_keyword, best_wrap_alphabet), best_fit
500
501 def keyword_break_mp(message,
502 wordlist=keywords,
503 metric=norms.euclidean_distance,
504 target_counts=normalised_english_counts,
505 message_frequency_scaling=norms.normalise,
506 chunksize=500):
507 """Breaks a keyword substitution cipher using a dictionary and
508 frequency analysis
509
510 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
511 'keyword decipherment', 'elephant', 1), \
512 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
513 (('elephant', 1), 0.41643991598441...)
514 """
515 with Pool() as pool:
516 helper_args = [(message, word, wrap, metric, target_counts,
517 message_frequency_scaling)
518 for word in wordlist for wrap in range(3)]
519 breaks = pool.starmap(keyword_break_one, helper_args, chunksize) # Gotcha: the helper function here needs to be defined at the top level (limitation of Pool.starmap)
520 return min(breaks, key=lambda k: k[1])
521
522 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
523 message_frequency_scaling):
524 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
525 counts = message_frequency_scaling(letter_frequencies(plaintext))
526 fit = metric(target_counts, counts)
527 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
528 '{2} and decrypt starting: {3}'.format(keyword,
529 wrap_alphabet, fit, sanitise(plaintext)[:50]))
530 return (keyword, wrap_alphabet), fit
531
532 def scytale_break(message,
533 metric=norms.euclidean_distance,
534 target_counts=normalised_english_bigram_counts,
535 message_frequency_scaling=norms.normalise):
536 """Breaks a Scytale cipher
537
538 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
539 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
540 'aek') # doctest: +ELLIPSIS
541 (6, 0.83453041115025...)
542 """
543 best_key = 0
544 best_fit = float("inf")
545 for key in range(1, 20):
546 if len(message) % key == 0:
547 plaintext = scytale_decipher(message, key)
548 counts = message_frequency_scaling(frequencies(
549 ngrams(sanitise(plaintext), 2)))
550 fit = metric(target_counts, counts)
551 logger.debug('Scytale break attempt using key {0} gives fit of '
552 '{1} and decrypt starting: {2}'.format(key,
553 fit, sanitise(plaintext)[:50]))
554 if fit < best_fit:
555 best_fit = fit
556 best_key = key
557 logger.info('Scytale break best fit with key {0} gives fit of {1} and '
558 'decrypt starting: {2}'.format(best_key, best_fit,
559 sanitise(scytale_decipher(message, best_key))[:50]))
560 return best_key, best_fit
561
562
563 if __name__ == "__main__":
564 import doctest
565 doctest.testmod()