d857e0427b96951ba9f5454f23877c2b393dc9b1
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5 import math
6 from itertools import zip_longest, repeat
7 from segment import segment
8 from multiprocessing import Pool
9
10 # To time a run:
11 #
12 # import timeit
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
16
17 logger = logging.getLogger(__name__)
18 logger.addHandler(logging.FileHandler('cipher.log'))
19 logger.setLevel(logging.WARNING)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
22
23 english_counts = collections.defaultdict(int)
24 with open('count_1l.txt', 'r') as f:
25 for line in f:
26 (letter, count) = line.split("\t")
27 english_counts[letter] = int(count)
28 normalised_english_counts = norms.normalise(english_counts)
29
30 english_bigram_counts = collections.defaultdict(int)
31 with open('count_2l.txt', 'r') as f:
32 for line in f:
33 (bigram, count) = line.split("\t")
34 english_bigram_counts[bigram] = int(count)
35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
36
37 with open('words.txt', 'r') as f:
38 keywords = [line.rstrip() for line in f]
39
40 modular_division_table = [[0]*26 for x in range(26)]
41 for a in range(26):
42 for b in range(26):
43 c = (a * b) % 26
44 modular_division_table[b][c] = a
45
46
47 def sanitise(text):
48 """Remove all non-alphabetic characters and convert the text to lowercase
49
50 >>> sanitise('The Quick')
51 'thequick'
52 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
53 'thequickbrownfoxjumpedoverthelazydog'
54 """
55 sanitised = [c.lower() for c in text if c in string.ascii_letters]
56 return ''.join(sanitised)
57
58 def ngrams(text, n):
59 """Returns all n-grams of a text
60
61 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
62 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
63 'nf', 'fo', 'ox']
64 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
65 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
66 'rown', 'ownf', 'wnfo', 'nfox']
67 """
68 return [text[i:i+n] for i in range(len(text)-n+1)]
69
70 def every_nth(text, n, fillvalue=''):
71 """Returns n strings, each of which consists of every nth character,
72 starting with the 0th, 1st, 2nd, ... (n-1)th character
73
74 >>> every_nth(string.ascii_lowercase, 5)
75 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
76 >>> every_nth(string.ascii_lowercase, 1)
77 ['abcdefghijklmnopqrstuvwxyz']
78 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
79 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
80 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
81 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
82 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
83 """
84 split_text = [text[i:i+n] for i in range(0, len(text), n)]
85 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
86
87 def combine_every_nth(split_text):
88 """Reforms a text split into every_nth strings
89
90 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
91 'abcdefghijklmnopqrstuvwxyz'
92 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
93 'abcdefghijklmnopqrstuvwxyz'
94 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
95 'abcdefghijklmnopqrstuvwxyz'
96 """
97 return ''.join([''.join(l)
98 for l in zip_longest(*split_text, fillvalue='')])
99
100 def transpose(items, transposition):
101 """Moves items around according to the given transposition
102
103 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
104 ['a', 'b', 'c', 'd']
105 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
106 ['d', 'b', 'c', 'a']
107 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
108 [13, 12, 14, 11, 15, 10]
109 """
110 transposed = list(repeat('', len(transposition)))
111 for p, t in enumerate(transposition):
112 transposed[p] = items[t]
113 return transposed
114
115 def untranspose(items, transposition):
116 """Undoes a transpose
117
118 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
119 ['a', 'b', 'c', 'd']
120 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
121 ['a', 'b', 'c', 'd']
122 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
123 [10, 11, 12, 13, 14, 15]
124 """
125 transposed = list(repeat('', len(transposition)))
126 for p, t in enumerate(transposition):
127 transposed[t] = items[p]
128 return transposed
129
130
131 def frequencies(text):
132 """Count the number of occurrences of each character in text
133
134 >>> sorted(frequencies('abcdefabc').items())
135 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
136 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
137 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
138 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
139 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
140 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
141 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
142 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
143 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
144 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
145 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
146 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
147 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
148 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
149 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
150 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
151 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
152 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
153 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
154 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
155 """
156 counts = collections.defaultdict(int)
157 for c in text:
158 counts[c] += 1
159 return counts
160 letter_frequencies = frequencies
161
162 def deduplicate(text):
163 return list(collections.OrderedDict.fromkeys(text))
164
165
166
167 def caesar_encipher_letter(letter, shift):
168 """Encipher a letter, given a shift amount
169
170 >>> caesar_encipher_letter('a', 1)
171 'b'
172 >>> caesar_encipher_letter('a', 2)
173 'c'
174 >>> caesar_encipher_letter('b', 2)
175 'd'
176 >>> caesar_encipher_letter('x', 2)
177 'z'
178 >>> caesar_encipher_letter('y', 2)
179 'a'
180 >>> caesar_encipher_letter('z', 2)
181 'b'
182 >>> caesar_encipher_letter('z', -1)
183 'y'
184 >>> caesar_encipher_letter('a', -1)
185 'z'
186 """
187 if letter in string.ascii_letters:
188 if letter in string.ascii_uppercase:
189 alphabet_start = ord('A')
190 else:
191 alphabet_start = ord('a')
192 return chr(((ord(letter) - alphabet_start + shift) % 26) +
193 alphabet_start)
194 else:
195 return letter
196
197 def caesar_decipher_letter(letter, shift):
198 """Decipher a letter, given a shift amount
199
200 >>> caesar_decipher_letter('b', 1)
201 'a'
202 >>> caesar_decipher_letter('b', 2)
203 'z'
204 """
205 return caesar_encipher_letter(letter, -shift)
206
207 def caesar_encipher(message, shift):
208 """Encipher a message with the Caesar cipher of given shift
209
210 >>> caesar_encipher('abc', 1)
211 'bcd'
212 >>> caesar_encipher('abc', 2)
213 'cde'
214 >>> caesar_encipher('abcxyz', 2)
215 'cdezab'
216 >>> caesar_encipher('ab cx yz', 2)
217 'cd ez ab'
218 """
219 enciphered = [caesar_encipher_letter(l, shift) for l in message]
220 return ''.join(enciphered)
221
222 def caesar_decipher(message, shift):
223 """Encipher a message with the Caesar cipher of given shift
224
225 >>> caesar_decipher('bcd', 1)
226 'abc'
227 >>> caesar_decipher('cde', 2)
228 'abc'
229 >>> caesar_decipher('cd ez ab', 2)
230 'ab cx yz'
231 """
232 return caesar_encipher(message, -shift)
233
234 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
235 """Encipher a letter, given a multiplier and adder
236
237 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
238 for l in string.ascii_uppercase])
239 'HKNQTWZCFILORUXADGJMPSVYBE'
240 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
241 for l in string.ascii_uppercase])
242 'FILORUXADGJMPSVYBEHKNQTWZC'
243 """
244 if letter in string.ascii_letters:
245 if letter in string.ascii_uppercase:
246 alphabet_start = ord('A')
247 else:
248 alphabet_start = ord('a')
249 letter_number = ord(letter) - alphabet_start
250 if one_based: letter_number += 1
251 cipher_number = (letter_number * multiplier + adder) % 26
252 if one_based: cipher_number -= 1
253 return chr(cipher_number % 26 + alphabet_start)
254 else:
255 return letter
256
257 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
258 """Encipher a letter, given a multiplier and adder
259
260 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
261 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
262 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
263 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
264 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
265 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
266 """
267 if letter in string.ascii_letters:
268 if letter in string.ascii_uppercase:
269 alphabet_start = ord('A')
270 else:
271 alphabet_start = ord('a')
272 cipher_number = ord(letter) - alphabet_start
273 if one_based: cipher_number += 1
274 plaintext_number = ( modular_division_table[multiplier]
275 [(cipher_number - adder) % 26] )
276 if one_based: plaintext_number -= 1
277 return chr(plaintext_number % 26 + alphabet_start)
278 else:
279 return letter
280
281 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
282 """Encipher a message
283
284 >>> affine_encipher('hours passed during which jerico tried every ' \
285 'trick he could think of', 15, 22, True)
286 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
287 """
288 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
289 for l in message]
290 return ''.join(enciphered)
291
292 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
293 """Decipher a message
294
295 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
296 'jfaoe ls omytd jlaxe mh', 15, 22, True)
297 'hours passed during which jerico tried every trick he could think of'
298 """
299 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
300 for l in message]
301 return ''.join(enciphered)
302
303
304 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
305 """Find the cipher alphabet given a keyword.
306 wrap_alphabet controls how the rest of the alphabet is added
307 after the keyword.
308 0 : from 'a'
309 1 : from the last letter in the sanitised keyword
310 2 : from the largest letter in the sanitised keyword
311
312 >>> keyword_cipher_alphabet_of('bayes')
313 'bayescdfghijklmnopqrtuvwxz'
314 >>> keyword_cipher_alphabet_of('bayes', 0)
315 'bayescdfghijklmnopqrtuvwxz'
316 >>> keyword_cipher_alphabet_of('bayes', 1)
317 'bayestuvwxzcdfghijklmnopqr'
318 >>> keyword_cipher_alphabet_of('bayes', 2)
319 'bayeszcdfghijklmnopqrtuvwx'
320 """
321 if wrap_alphabet == 0:
322 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
323 string.ascii_lowercase))
324 else:
325 if wrap_alphabet == 1:
326 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
327 else:
328 last_keyword_letter = sorted(sanitise(keyword))[-1]
329 last_keyword_position = string.ascii_lowercase.find(
330 last_keyword_letter) + 1
331 cipher_alphabet = ''.join(
332 deduplicate(sanitise(keyword) +
333 string.ascii_lowercase[last_keyword_position:] +
334 string.ascii_lowercase))
335 return cipher_alphabet
336
337
338 def keyword_encipher(message, keyword, wrap_alphabet=0):
339 """Enciphers a message with a keyword substitution cipher.
340 wrap_alphabet controls how the rest of the alphabet is added
341 after the keyword.
342 0 : from 'a'
343 1 : from the last letter in the sanitised keyword
344 2 : from the largest letter in the sanitised keyword
345
346 >>> keyword_encipher('test message', 'bayes')
347 'rsqr ksqqbds'
348 >>> keyword_encipher('test message', 'bayes', 0)
349 'rsqr ksqqbds'
350 >>> keyword_encipher('test message', 'bayes', 1)
351 'lskl dskkbus'
352 >>> keyword_encipher('test message', 'bayes', 2)
353 'qspq jsppbcs'
354 """
355 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
356 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
357 return message.lower().translate(cipher_translation)
358
359 def keyword_decipher(message, keyword, wrap_alphabet=0):
360 """Deciphers a message with a keyword substitution cipher.
361 wrap_alphabet controls how the rest of the alphabet is added
362 after the keyword.
363 0 : from 'a'
364 1 : from the last letter in the sanitised keyword
365 2 : from the largest letter in the sanitised keyword
366
367 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
368 'test message'
369 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
370 'test message'
371 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
372 'test message'
373 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
374 'test message'
375 """
376 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
377 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
378 return message.lower().translate(cipher_translation)
379
380 def scytale_encipher(message, rows):
381 """Enciphers using the scytale transposition cipher.
382 Message is padded with spaces to allow all rows to be the same length.
383
384 >>> scytale_encipher('thequickbrownfox', 3)
385 'tcnhkfeboqrxuo iw '
386 >>> scytale_encipher('thequickbrownfox', 4)
387 'tubnhirfecooqkwx'
388 >>> scytale_encipher('thequickbrownfox', 5)
389 'tubn hirf ecoo qkwx '
390 >>> scytale_encipher('thequickbrownfox', 6)
391 'tqcrnxhukof eibwo '
392 >>> scytale_encipher('thequickbrownfox', 7)
393 'tqcrnx hukof eibwo '
394 """
395 if len(message) % rows != 0:
396 message += ' '*(rows - len(message) % rows)
397 row_length = round(len(message) / rows)
398 slices = [message[i:i+row_length]
399 for i in range(0, len(message), row_length)]
400 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
401
402 def scytale_decipher(message, rows):
403 """Deciphers using the scytale transposition cipher.
404 Assumes the message is padded so that all rows are the same length.
405
406 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
407 'thequickbrownfox '
408 >>> scytale_decipher('tubnhirfecooqkwx', 4)
409 'thequickbrownfox'
410 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
411 'thequickbrownfox '
412 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
413 'thequickbrownfox '
414 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
415 'thequickbrownfox '
416 """
417 cols = round(len(message) / rows)
418 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
419 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
420
421
422 def transpositions_of(keyword):
423 transpositions = []
424 key = deduplicate(keyword)
425 for l in sorted(key):
426 transpositions += [key.index(l)]
427 return transpositions
428
429 def column_transposition_encipher(message, keyword):
430 transpositions = transpositions_of(keyword)
431 rows = every_nth(message, len(transpositions), fillvalue=' ')
432 transposed_rows = [transpose(row, transpositions) for row in rows]
433 return combine_every_nth(transposed_rows)
434
435
436 def caesar_break(message,
437 metric=norms.euclidean_distance,
438 target_counts=normalised_english_counts,
439 message_frequency_scaling=norms.normalise):
440 """Breaks a Caesar cipher using frequency analysis
441
442 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
443 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
444 (4, 0.31863952890183...)
445 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
446 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
447 (19, 0.42152901235832...)
448 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
449 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
450 (13, 0.316029208075451...)
451 """
452 sanitised_message = sanitise(message)
453 best_shift = 0
454 best_fit = float("inf")
455 for shift in range(26):
456 plaintext = caesar_decipher(sanitised_message, shift)
457 counts = message_frequency_scaling(letter_frequencies(plaintext))
458 fit = metric(target_counts, counts)
459 logger.debug('Caesar break attempt using key {0} gives fit of {1} '
460 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
461 if fit < best_fit:
462 best_fit = fit
463 best_shift = shift
464 logger.info('Caesar break best fit: key {0} gives fit of {1} and '
465 'decrypt starting: {2}'.format(best_shift, best_fit,
466 caesar_decipher(sanitised_message, best_shift)[:50]))
467 return best_shift, best_fit
468
469 def affine_break(message,
470 metric=norms.euclidean_distance,
471 target_counts=normalised_english_counts,
472 message_frequency_scaling=norms.normalise):
473 """Breaks an affine cipher using frequency analysis
474
475 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
476 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
477 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
478 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
479 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
480 'clm ckuxj.') # doctest: +ELLIPSIS
481 ((15, 22, True), 0.23570361818655...)
482 """
483 sanitised_message = sanitise(message)
484 best_multiplier = 0
485 best_adder = 0
486 best_one_based = True
487 best_fit = float("inf")
488 for one_based in [True, False]:
489 for multiplier in range(1, 26, 2):
490 for adder in range(26):
491 plaintext = affine_decipher(sanitised_message,
492 multiplier, adder, one_based)
493 counts = message_frequency_scaling(letter_frequencies(plaintext))
494 fit = metric(target_counts, counts)
495 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
496 'gives fit of {3} and decrypt starting: {4}'.
497 format(multiplier, adder, one_based, fit,
498 plaintext[:50]))
499 if fit < best_fit:
500 best_fit = fit
501 best_multiplier = multiplier
502 best_adder = adder
503 best_one_based = one_based
504 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
505 'and decrypt starting: {4}'.format(
506 best_multiplier, best_adder, best_one_based, best_fit,
507 affine_decipher(sanitised_message, best_multiplier,
508 best_adder, best_one_based)[:50]))
509 return (best_multiplier, best_adder, best_one_based), best_fit
510
511 def keyword_break(message,
512 wordlist=keywords,
513 metric=norms.euclidean_distance,
514 target_counts=normalised_english_counts,
515 message_frequency_scaling=norms.normalise):
516 """Breaks a keyword substitution cipher using a dictionary and
517 frequency analysis
518
519 >>> keyword_break(keyword_encipher('this is a test message for the ' \
520 'keyword decipherment', 'elephant', 1), \
521 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
522 (('elephant', 1), 0.41643991598441...)
523 """
524 best_keyword = ''
525 best_wrap_alphabet = True
526 best_fit = float("inf")
527 for wrap_alphabet in range(3):
528 for keyword in wordlist:
529 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
530 counts = message_frequency_scaling(letter_frequencies(plaintext))
531 fit = metric(target_counts, counts)
532 logger.debug('Keyword break attempt using key {0} (wrap={1}) '
533 'gives fit of {2} and decrypt starting: {3}'.format(
534 keyword, wrap_alphabet, fit,
535 sanitise(plaintext)[:50]))
536 if fit < best_fit:
537 best_fit = fit
538 best_keyword = keyword
539 best_wrap_alphabet = wrap_alphabet
540 logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
541 '{2} and decrypt starting: {3}'.format(best_keyword,
542 best_wrap_alphabet, best_fit, sanitise(
543 keyword_decipher(message, best_keyword,
544 best_wrap_alphabet))[:50]))
545 return (best_keyword, best_wrap_alphabet), best_fit
546
547 def keyword_break_mp(message,
548 wordlist=keywords,
549 metric=norms.euclidean_distance,
550 target_counts=normalised_english_counts,
551 message_frequency_scaling=norms.normalise,
552 chunksize=500):
553 """Breaks a keyword substitution cipher using a dictionary and
554 frequency analysis
555
556 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
557 'keyword decipherment', 'elephant', 1), \
558 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
559 (('elephant', 1), 0.41643991598441...)
560 """
561 with Pool() as pool:
562 helper_args = [(message, word, wrap, metric, target_counts,
563 message_frequency_scaling)
564 for word in wordlist for wrap in range(3)]
565 # Gotcha: the helper function here needs to be defined at the top level
566 # (limitation of Pool.starmap)
567 breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
568 return min(breaks, key=lambda k: k[1])
569
570 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
571 message_frequency_scaling):
572 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
573 counts = message_frequency_scaling(letter_frequencies(plaintext))
574 fit = metric(target_counts, counts)
575 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
576 '{2} and decrypt starting: {3}'.format(keyword,
577 wrap_alphabet, fit, sanitise(plaintext)[:50]))
578 return (keyword, wrap_alphabet), fit
579
580 def scytale_break(message,
581 metric=norms.euclidean_distance,
582 target_counts=normalised_english_bigram_counts,
583 message_frequency_scaling=norms.normalise):
584 """Breaks a Scytale cipher
585
586 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
587 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
588 'aek') # doctest: +ELLIPSIS
589 (6, 0.83453041115025...)
590 """
591 best_key = 0
592 best_fit = float("inf")
593 for key in range(1, 20):
594 if len(message) % key == 0:
595 plaintext = scytale_decipher(message, key)
596 counts = message_frequency_scaling(frequencies(
597 ngrams(sanitise(plaintext), 2)))
598 fit = metric(target_counts, counts)
599 logger.debug('Scytale break attempt using key {0} gives fit of '
600 '{1} and decrypt starting: {2}'.format(key,
601 fit, sanitise(plaintext)[:50]))
602 if fit < best_fit:
603 best_fit = fit
604 best_key = key
605 logger.info('Scytale break best fit with key {0} gives fit of {1} and '
606 'decrypt starting: {2}'.format(best_key, best_fit,
607 sanitise(scytale_decipher(message, best_key))[:50]))
608 return best_key, best_fit
609
610
611 if __name__ == "__main__":
612 import doctest
613 doctest.testmod()