Changed frequencies procedure to use a Counter object
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5 import math
6 from itertools import zip_longest
7 from segment import segment
8 from multiprocessing import Pool
9
10 # To time a run:
11 #
12 # import timeit
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
16
17 logger = logging.getLogger(__name__)
18 logger.addHandler(logging.FileHandler('cipher.log'))
19 logger.setLevel(logging.WARNING)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
22
23 english_counts = collections.defaultdict(int)
24 with open('count_1l.txt', 'r') as f:
25 for line in f:
26 (letter, count) = line.split("\t")
27 english_counts[letter] = int(count)
28 normalised_english_counts = norms.normalise(english_counts)
29
30 english_bigram_counts = collections.defaultdict(int)
31 with open('count_2l.txt', 'r') as f:
32 for line in f:
33 (bigram, count) = line.split("\t")
34 english_bigram_counts[bigram] = int(count)
35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
36
37 with open('words.txt', 'r') as f:
38 keywords = [line.rstrip() for line in f]
39
40 modular_division_table = [[0]*26 for x in range(26)]
41 for a in range(26):
42 for b in range(26):
43 c = (a * b) % 26
44 modular_division_table[b][c] = a
45
46
47 def sanitise(text):
48 """Remove all non-alphabetic characters and convert the text to lowercase
49
50 >>> sanitise('The Quick')
51 'thequick'
52 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
53 'thequickbrownfoxjumpedoverthelazydog'
54 """
55 sanitised = [c.lower() for c in text if c in string.ascii_letters]
56 return ''.join(sanitised)
57
58 def ngrams(text, n):
59 """Returns all n-grams of a text
60
61 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
62 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
63 'nf', 'fo', 'ox']
64 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
65 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
66 'rown', 'ownf', 'wnfo', 'nfox']
67 """
68 return [text[i:i+n] for i in range(len(text)-n+1)]
69
70 def every_nth(text, n):
71 """Returns n strings, each of which consists of every nth character,
72 starting with the 0th, 1st, 2nd, ... (n-1)th character
73
74 >>> every_nth(string.ascii_lowercase, 5)
75 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
76 >>> every_nth(string.ascii_lowercase, 1)
77 ['abcdefghijklmnopqrstuvwxyz']
78 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
79 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
80 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
81 """
82 split_text = [text[i:i+n] for i in range(0, len(text), n)]
83 return [''.join(l) for l in zip_longest(*split_text, fillvalue='')]
84
85 def combine_every_nth(split_text):
86 """Reforms a text split into every_nth strings
87
88 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
89 'abcdefghijklmnopqrstuvwxyz'
90 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
91 'abcdefghijklmnopqrstuvwxyz'
92 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
93 'abcdefghijklmnopqrstuvwxyz'
94 """
95 return ''.join([''.join(l)
96 for l in zip_longest(*split_text, fillvalue='')])
97
98
99 def frequencies(text):
100 """Count the number of occurrences of each character in text
101
102 >>> sorted(frequencies('abcdefabc').items())
103 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
104 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
105 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
106 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
107 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
108 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
109 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
110 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
111 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
112 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
113 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
114 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
115 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
116 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
117 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
118 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
119 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
120 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
121 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
122 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
123 >>> frequencies('abcdefabcdef')['x']
124 0
125 """
126 #counts = collections.defaultdict(int)
127 #for c in text:
128 # counts[c] += 1
129 #return counts
130 return collections.Counter(c for c in text)
131 letter_frequencies = frequencies
132
133 def deduplicate(text):
134 return list(collections.OrderedDict.fromkeys(text))
135
136
137
138 def caesar_encipher_letter(letter, shift):
139 """Encipher a letter, given a shift amount
140
141 >>> caesar_encipher_letter('a', 1)
142 'b'
143 >>> caesar_encipher_letter('a', 2)
144 'c'
145 >>> caesar_encipher_letter('b', 2)
146 'd'
147 >>> caesar_encipher_letter('x', 2)
148 'z'
149 >>> caesar_encipher_letter('y', 2)
150 'a'
151 >>> caesar_encipher_letter('z', 2)
152 'b'
153 >>> caesar_encipher_letter('z', -1)
154 'y'
155 >>> caesar_encipher_letter('a', -1)
156 'z'
157 """
158 if letter in string.ascii_letters:
159 if letter in string.ascii_uppercase:
160 alphabet_start = ord('A')
161 else:
162 alphabet_start = ord('a')
163 return chr(((ord(letter) - alphabet_start + shift) % 26) +
164 alphabet_start)
165 else:
166 return letter
167
168 def caesar_decipher_letter(letter, shift):
169 """Decipher a letter, given a shift amount
170
171 >>> caesar_decipher_letter('b', 1)
172 'a'
173 >>> caesar_decipher_letter('b', 2)
174 'z'
175 """
176 return caesar_encipher_letter(letter, -shift)
177
178 def caesar_encipher(message, shift):
179 """Encipher a message with the Caesar cipher of given shift
180
181 >>> caesar_encipher('abc', 1)
182 'bcd'
183 >>> caesar_encipher('abc', 2)
184 'cde'
185 >>> caesar_encipher('abcxyz', 2)
186 'cdezab'
187 >>> caesar_encipher('ab cx yz', 2)
188 'cd ez ab'
189 """
190 enciphered = [caesar_encipher_letter(l, shift) for l in message]
191 return ''.join(enciphered)
192
193 def caesar_decipher(message, shift):
194 """Encipher a message with the Caesar cipher of given shift
195
196 >>> caesar_decipher('bcd', 1)
197 'abc'
198 >>> caesar_decipher('cde', 2)
199 'abc'
200 >>> caesar_decipher('cd ez ab', 2)
201 'ab cx yz'
202 """
203 return caesar_encipher(message, -shift)
204
205 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
206 """Encipher a letter, given a multiplier and adder
207
208 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
209 for l in string.ascii_uppercase])
210 'HKNQTWZCFILORUXADGJMPSVYBE'
211 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
212 for l in string.ascii_uppercase])
213 'FILORUXADGJMPSVYBEHKNQTWZC'
214 """
215 if letter in string.ascii_letters:
216 if letter in string.ascii_uppercase:
217 alphabet_start = ord('A')
218 else:
219 alphabet_start = ord('a')
220 letter_number = ord(letter) - alphabet_start
221 if one_based: letter_number += 1
222 cipher_number = (letter_number * multiplier + adder) % 26
223 if one_based: cipher_number -= 1
224 return chr(cipher_number % 26 + alphabet_start)
225 else:
226 return letter
227
228 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
229 """Encipher a letter, given a multiplier and adder
230
231 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
232 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
233 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
234 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
235 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
236 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
237 """
238 if letter in string.ascii_letters:
239 if letter in string.ascii_uppercase:
240 alphabet_start = ord('A')
241 else:
242 alphabet_start = ord('a')
243 cipher_number = ord(letter) - alphabet_start
244 if one_based: cipher_number += 1
245 plaintext_number = ( modular_division_table[multiplier]
246 [(cipher_number - adder) % 26] )
247 if one_based: plaintext_number -= 1
248 return chr(plaintext_number % 26 + alphabet_start)
249 else:
250 return letter
251
252 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
253 """Encipher a message
254
255 >>> affine_encipher('hours passed during which jerico tried every ' \
256 'trick he could think of', 15, 22, True)
257 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
258 """
259 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
260 for l in message]
261 return ''.join(enciphered)
262
263 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
264 """Decipher a message
265
266 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
267 'jfaoe ls omytd jlaxe mh', 15, 22, True)
268 'hours passed during which jerico tried every trick he could think of'
269 """
270 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
271 for l in message]
272 return ''.join(enciphered)
273
274
275 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
276 """Find the cipher alphabet given a keyword.
277 wrap_alphabet controls how the rest of the alphabet is added
278 after the keyword.
279 0 : from 'a'
280 1 : from the last letter in the sanitised keyword
281 2 : from the largest letter in the sanitised keyword
282
283 >>> keyword_cipher_alphabet_of('bayes')
284 'bayescdfghijklmnopqrtuvwxz'
285 >>> keyword_cipher_alphabet_of('bayes', 0)
286 'bayescdfghijklmnopqrtuvwxz'
287 >>> keyword_cipher_alphabet_of('bayes', 1)
288 'bayestuvwxzcdfghijklmnopqr'
289 >>> keyword_cipher_alphabet_of('bayes', 2)
290 'bayeszcdfghijklmnopqrtuvwx'
291 """
292 if wrap_alphabet == 0:
293 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
294 string.ascii_lowercase))
295 else:
296 if wrap_alphabet == 1:
297 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
298 else:
299 last_keyword_letter = sorted(sanitise(keyword))[-1]
300 last_keyword_position = string.ascii_lowercase.find(
301 last_keyword_letter) + 1
302 cipher_alphabet = ''.join(
303 deduplicate(sanitise(keyword) +
304 string.ascii_lowercase[last_keyword_position:] +
305 string.ascii_lowercase))
306 return cipher_alphabet
307
308
309 def keyword_encipher(message, keyword, wrap_alphabet=0):
310 """Enciphers a message with a keyword substitution cipher.
311 wrap_alphabet controls how the rest of the alphabet is added
312 after the keyword.
313 0 : from 'a'
314 1 : from the last letter in the sanitised keyword
315 2 : from the largest letter in the sanitised keyword
316
317 >>> keyword_encipher('test message', 'bayes')
318 'rsqr ksqqbds'
319 >>> keyword_encipher('test message', 'bayes', 0)
320 'rsqr ksqqbds'
321 >>> keyword_encipher('test message', 'bayes', 1)
322 'lskl dskkbus'
323 >>> keyword_encipher('test message', 'bayes', 2)
324 'qspq jsppbcs'
325 """
326 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
327 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
328 return message.lower().translate(cipher_translation)
329
330 def keyword_decipher(message, keyword, wrap_alphabet=0):
331 """Deciphers a message with a keyword substitution cipher.
332 wrap_alphabet controls how the rest of the alphabet is added
333 after the keyword.
334 0 : from 'a'
335 1 : from the last letter in the sanitised keyword
336 2 : from the largest letter in the sanitised keyword
337
338 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
339 'test message'
340 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
341 'test message'
342 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
343 'test message'
344 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
345 'test message'
346 """
347 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
348 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
349 return message.lower().translate(cipher_translation)
350
351 def scytale_encipher(message, rows):
352 """Enciphers using the scytale transposition cipher.
353 Message is padded with spaces to allow all rows to be the same length.
354
355 >>> scytale_encipher('thequickbrownfox', 3)
356 'tcnhkfeboqrxuo iw '
357 >>> scytale_encipher('thequickbrownfox', 4)
358 'tubnhirfecooqkwx'
359 >>> scytale_encipher('thequickbrownfox', 5)
360 'tubn hirf ecoo qkwx '
361 >>> scytale_encipher('thequickbrownfox', 6)
362 'tqcrnxhukof eibwo '
363 >>> scytale_encipher('thequickbrownfox', 7)
364 'tqcrnx hukof eibwo '
365 """
366 if len(message) % rows != 0:
367 message += ' '*(rows - len(message) % rows)
368 row_length = round(len(message) / rows)
369 slices = [message[i:i+row_length]
370 for i in range(0, len(message), row_length)]
371 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
372
373 def scytale_decipher(message, rows):
374 """Deciphers using the scytale transposition cipher.
375 Assumes the message is padded so that all rows are the same length.
376
377 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
378 'thequickbrownfox '
379 >>> scytale_decipher('tubnhirfecooqkwx', 4)
380 'thequickbrownfox'
381 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
382 'thequickbrownfox '
383 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
384 'thequickbrownfox '
385 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
386 'thequickbrownfox '
387 """
388 cols = round(len(message) / rows)
389 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
390 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
391
392
393 def caesar_break(message,
394 metric=norms.euclidean_distance,
395 target_counts=normalised_english_counts,
396 message_frequency_scaling=norms.normalise):
397 """Breaks a Caesar cipher using frequency analysis
398
399 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
400 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
401 (4, 0.31863952890183...)
402 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
403 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
404 (19, 0.42152901235832...)
405 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
406 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
407 (13, 0.316029208075451...)
408 """
409 sanitised_message = sanitise(message)
410 best_shift = 0
411 best_fit = float("inf")
412 for shift in range(26):
413 plaintext = caesar_decipher(sanitised_message, shift)
414 counts = message_frequency_scaling(letter_frequencies(plaintext))
415 fit = metric(target_counts, counts)
416 logger.debug('Caesar break attempt using key {0} gives fit of {1} '
417 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
418 if fit < best_fit:
419 best_fit = fit
420 best_shift = shift
421 logger.info('Caesar break best fit: key {0} gives fit of {1} and '
422 'decrypt starting: {2}'.format(best_shift, best_fit,
423 caesar_decipher(sanitised_message, best_shift)[:50]))
424 return best_shift, best_fit
425
426 def affine_break(message,
427 metric=norms.euclidean_distance,
428 target_counts=normalised_english_counts,
429 message_frequency_scaling=norms.normalise):
430 """Breaks an affine cipher using frequency analysis
431
432 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
433 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
434 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
435 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
436 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
437 'clm ckuxj.') # doctest: +ELLIPSIS
438 ((15, 22, True), 0.23570361818655...)
439 """
440 sanitised_message = sanitise(message)
441 best_multiplier = 0
442 best_adder = 0
443 best_one_based = True
444 best_fit = float("inf")
445 for one_based in [True, False]:
446 for multiplier in range(1, 26, 2):
447 for adder in range(26):
448 plaintext = affine_decipher(sanitised_message,
449 multiplier, adder, one_based)
450 counts = message_frequency_scaling(letter_frequencies(plaintext))
451 fit = metric(target_counts, counts)
452 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
453 'gives fit of {3} and decrypt starting: {4}'.
454 format(multiplier, adder, one_based, fit,
455 plaintext[:50]))
456 if fit < best_fit:
457 best_fit = fit
458 best_multiplier = multiplier
459 best_adder = adder
460 best_one_based = one_based
461 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
462 'and decrypt starting: {4}'.format(
463 best_multiplier, best_adder, best_one_based, best_fit,
464 affine_decipher(sanitised_message, best_multiplier,
465 best_adder, best_one_based)[:50]))
466 return (best_multiplier, best_adder, best_one_based), best_fit
467
468 def keyword_break(message,
469 wordlist=keywords,
470 metric=norms.euclidean_distance,
471 target_counts=normalised_english_counts,
472 message_frequency_scaling=norms.normalise):
473 """Breaks a keyword substitution cipher using a dictionary and
474 frequency analysis
475
476 >>> keyword_break(keyword_encipher('this is a test message for the ' \
477 'keyword decipherment', 'elephant', 1), \
478 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
479 (('elephant', 1), 0.41643991598441...)
480 """
481 best_keyword = ''
482 best_wrap_alphabet = True
483 best_fit = float("inf")
484 for wrap_alphabet in range(3):
485 for keyword in wordlist:
486 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
487 counts = message_frequency_scaling(letter_frequencies(plaintext))
488 fit = metric(target_counts, counts)
489 logger.debug('Keyword break attempt using key {0} (wrap={1}) '
490 'gives fit of {2} and decrypt starting: {3}'.format(
491 keyword, wrap_alphabet, fit,
492 sanitise(plaintext)[:50]))
493 if fit < best_fit:
494 best_fit = fit
495 best_keyword = keyword
496 best_wrap_alphabet = wrap_alphabet
497 logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
498 '{2} and decrypt starting: {3}'.format(best_keyword,
499 best_wrap_alphabet, best_fit, sanitise(
500 keyword_decipher(message, best_keyword,
501 best_wrap_alphabet))[:50]))
502 return (best_keyword, best_wrap_alphabet), best_fit
503
504 def keyword_break_mp(message,
505 wordlist=keywords,
506 metric=norms.euclidean_distance,
507 target_counts=normalised_english_counts,
508 message_frequency_scaling=norms.normalise,
509 chunksize=500):
510 """Breaks a keyword substitution cipher using a dictionary and
511 frequency analysis
512
513 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
514 'keyword decipherment', 'elephant', 1), \
515 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
516 (('elephant', 1), 0.41643991598441...)
517 """
518 with Pool() as pool:
519 helper_args = [(message, word, wrap, metric, target_counts,
520 message_frequency_scaling)
521 for word in wordlist for wrap in range(3)]
522 # Gotcha: the helper function here needs to be defined at the top level
523 # (limitation of Pool.starmap)
524 breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
525 return min(breaks, key=lambda k: k[1])
526
527 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
528 message_frequency_scaling):
529 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
530 counts = message_frequency_scaling(letter_frequencies(plaintext))
531 fit = metric(target_counts, counts)
532 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
533 '{2} and decrypt starting: {3}'.format(keyword,
534 wrap_alphabet, fit, sanitise(plaintext)[:50]))
535 return (keyword, wrap_alphabet), fit
536
537 def scytale_break(message,
538 metric=norms.euclidean_distance,
539 target_counts=normalised_english_bigram_counts,
540 message_frequency_scaling=norms.normalise):
541 """Breaks a Scytale cipher
542
543 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
544 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
545 'aek') # doctest: +ELLIPSIS
546 (6, 0.83453041115025...)
547 """
548 best_key = 0
549 best_fit = float("inf")
550 for key in range(1, 20):
551 if len(message) % key == 0:
552 plaintext = scytale_decipher(message, key)
553 counts = message_frequency_scaling(frequencies(
554 ngrams(sanitise(plaintext), 2)))
555 fit = metric(target_counts, counts)
556 logger.debug('Scytale break attempt using key {0} gives fit of '
557 '{1} and decrypt starting: {2}'.format(key,
558 fit, sanitise(plaintext)[:50]))
559 if fit < best_fit:
560 best_fit = fit
561 best_key = key
562 logger.info('Scytale break best fit with key {0} gives fit of {1} and '
563 'decrypt starting: {2}'.format(best_key, best_fit,
564 sanitise(scytale_decipher(message, best_key))[:50]))
565 return best_key, best_fit
566
567
568 if __name__ == "__main__":
569 import doctest
570 doctest.testmod()