Column transposition working, needs more tests
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5 import math
6 from itertools import zip_longest, repeat
7 from segment import segment
8 from multiprocessing import Pool
9
10 # To time a run:
11 #
12 # import timeit
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
16
17 logger = logging.getLogger(__name__)
18 logger.addHandler(logging.FileHandler('cipher.log'))
19 logger.setLevel(logging.WARNING)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
22
23 english_counts = collections.defaultdict(int)
24 with open('count_1l.txt', 'r') as f:
25 for line in f:
26 (letter, count) = line.split("\t")
27 english_counts[letter] = int(count)
28 normalised_english_counts = norms.normalise(english_counts)
29
30 english_bigram_counts = collections.defaultdict(int)
31 with open('count_2l.txt', 'r') as f:
32 for line in f:
33 (bigram, count) = line.split("\t")
34 english_bigram_counts[bigram] = int(count)
35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
36
37 with open('words.txt', 'r') as f:
38 keywords = [line.rstrip() for line in f]
39
40 modular_division_table = [[0]*26 for x in range(26)]
41 for a in range(26):
42 for b in range(26):
43 c = (a * b) % 26
44 modular_division_table[b][c] = a
45
46
47 def sanitise(text):
48 """Remove all non-alphabetic characters and convert the text to lowercase
49
50 >>> sanitise('The Quick')
51 'thequick'
52 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
53 'thequickbrownfoxjumpedoverthelazydog'
54 """
55 sanitised = [c.lower() for c in text if c in string.ascii_letters]
56 return ''.join(sanitised)
57
58 def ngrams(text, n):
59 """Returns all n-grams of a text
60
61 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
62 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
63 'nf', 'fo', 'ox']
64 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
65 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
66 'rown', 'ownf', 'wnfo', 'nfox']
67 """
68 return [text[i:i+n] for i in range(len(text)-n+1)]
69
70 def every_nth(text, n, fillvalue=''):
71 """Returns n strings, each of which consists of every nth character,
72 starting with the 0th, 1st, 2nd, ... (n-1)th character
73
74 >>> every_nth(string.ascii_lowercase, 5)
75 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
76 >>> every_nth(string.ascii_lowercase, 1)
77 ['abcdefghijklmnopqrstuvwxyz']
78 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
79 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
80 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
81 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
82 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
83 """
84 split_text = [text[i:i+n] for i in range(0, len(text), n)]
85 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
86
87 def combine_every_nth(split_text):
88 """Reforms a text split into every_nth strings
89
90 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
91 'abcdefghijklmnopqrstuvwxyz'
92 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
93 'abcdefghijklmnopqrstuvwxyz'
94 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
95 'abcdefghijklmnopqrstuvwxyz'
96 """
97 return ''.join([''.join(l)
98 for l in zip_longest(*split_text, fillvalue='')])
99
100 def transpose(items, transposition):
101 """Moves items around according to the given transposition
102
103 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
104 ['a', 'b', 'c', 'd']
105 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
106 ['d', 'b', 'c', 'a']
107 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
108 [13, 12, 14, 11, 15, 10]
109 """
110 transposed = list(repeat('', len(transposition)))
111 for p, t in enumerate(transposition):
112 transposed[p] = items[t]
113 return transposed
114
115 def untranspose(items, transposition):
116 """Undoes a transpose
117
118 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
119 ['a', 'b', 'c', 'd']
120 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
121 ['a', 'b', 'c', 'd']
122 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
123 [10, 11, 12, 13, 14, 15]
124 """
125 transposed = list(repeat('', len(transposition)))
126 for p, t in enumerate(transposition):
127 transposed[t] = items[p]
128 return transposed
129
130
131 def frequencies(text):
132 """Count the number of occurrences of each character in text
133
134 >>> sorted(frequencies('abcdefabc').items())
135 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
136 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
137 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
138 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
139 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
140 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
141 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
142 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
143 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
144 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
145 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
146 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
147 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
148 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
149 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
150 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
151 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
152 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
153 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
154 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
155 """
156 counts = collections.defaultdict(int)
157 for c in text:
158 counts[c] += 1
159 return counts
160 letter_frequencies = frequencies
161
162 def deduplicate(text):
163 return list(collections.OrderedDict.fromkeys(text))
164
165
166
167 def caesar_encipher_letter(letter, shift):
168 """Encipher a letter, given a shift amount
169
170 >>> caesar_encipher_letter('a', 1)
171 'b'
172 >>> caesar_encipher_letter('a', 2)
173 'c'
174 >>> caesar_encipher_letter('b', 2)
175 'd'
176 >>> caesar_encipher_letter('x', 2)
177 'z'
178 >>> caesar_encipher_letter('y', 2)
179 'a'
180 >>> caesar_encipher_letter('z', 2)
181 'b'
182 >>> caesar_encipher_letter('z', -1)
183 'y'
184 >>> caesar_encipher_letter('a', -1)
185 'z'
186 """
187 if letter in string.ascii_letters:
188 if letter in string.ascii_uppercase:
189 alphabet_start = ord('A')
190 else:
191 alphabet_start = ord('a')
192 return chr(((ord(letter) - alphabet_start + shift) % 26) +
193 alphabet_start)
194 else:
195 return letter
196
197 def caesar_decipher_letter(letter, shift):
198 """Decipher a letter, given a shift amount
199
200 >>> caesar_decipher_letter('b', 1)
201 'a'
202 >>> caesar_decipher_letter('b', 2)
203 'z'
204 """
205 return caesar_encipher_letter(letter, -shift)
206
207 def caesar_encipher(message, shift):
208 """Encipher a message with the Caesar cipher of given shift
209
210 >>> caesar_encipher('abc', 1)
211 'bcd'
212 >>> caesar_encipher('abc', 2)
213 'cde'
214 >>> caesar_encipher('abcxyz', 2)
215 'cdezab'
216 >>> caesar_encipher('ab cx yz', 2)
217 'cd ez ab'
218 """
219 enciphered = [caesar_encipher_letter(l, shift) for l in message]
220 return ''.join(enciphered)
221
222 def caesar_decipher(message, shift):
223 """Encipher a message with the Caesar cipher of given shift
224
225 >>> caesar_decipher('bcd', 1)
226 'abc'
227 >>> caesar_decipher('cde', 2)
228 'abc'
229 >>> caesar_decipher('cd ez ab', 2)
230 'ab cx yz'
231 """
232 return caesar_encipher(message, -shift)
233
234 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
235 """Encipher a letter, given a multiplier and adder
236
237 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
238 for l in string.ascii_uppercase])
239 'HKNQTWZCFILORUXADGJMPSVYBE'
240 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
241 for l in string.ascii_uppercase])
242 'FILORUXADGJMPSVYBEHKNQTWZC'
243 """
244 if letter in string.ascii_letters:
245 if letter in string.ascii_uppercase:
246 alphabet_start = ord('A')
247 else:
248 alphabet_start = ord('a')
249 letter_number = ord(letter) - alphabet_start
250 if one_based: letter_number += 1
251 cipher_number = (letter_number * multiplier + adder) % 26
252 if one_based: cipher_number -= 1
253 return chr(cipher_number % 26 + alphabet_start)
254 else:
255 return letter
256
257 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
258 """Encipher a letter, given a multiplier and adder
259
260 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
261 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
262 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
263 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
264 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
265 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
266 """
267 if letter in string.ascii_letters:
268 if letter in string.ascii_uppercase:
269 alphabet_start = ord('A')
270 else:
271 alphabet_start = ord('a')
272 cipher_number = ord(letter) - alphabet_start
273 if one_based: cipher_number += 1
274 plaintext_number = ( modular_division_table[multiplier]
275 [(cipher_number - adder) % 26] )
276 if one_based: plaintext_number -= 1
277 return chr(plaintext_number % 26 + alphabet_start)
278 else:
279 return letter
280
281 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
282 """Encipher a message
283
284 >>> affine_encipher('hours passed during which jerico tried every ' \
285 'trick he could think of', 15, 22, True)
286 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
287 """
288 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
289 for l in message]
290 return ''.join(enciphered)
291
292 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
293 """Decipher a message
294
295 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
296 'jfaoe ls omytd jlaxe mh', 15, 22, True)
297 'hours passed during which jerico tried every trick he could think of'
298 """
299 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
300 for l in message]
301 return ''.join(enciphered)
302
303
304 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
305 """Find the cipher alphabet given a keyword.
306 wrap_alphabet controls how the rest of the alphabet is added
307 after the keyword.
308 0 : from 'a'
309 1 : from the last letter in the sanitised keyword
310 2 : from the largest letter in the sanitised keyword
311
312 >>> keyword_cipher_alphabet_of('bayes')
313 'bayescdfghijklmnopqrtuvwxz'
314 >>> keyword_cipher_alphabet_of('bayes', 0)
315 'bayescdfghijklmnopqrtuvwxz'
316 >>> keyword_cipher_alphabet_of('bayes', 1)
317 'bayestuvwxzcdfghijklmnopqr'
318 >>> keyword_cipher_alphabet_of('bayes', 2)
319 'bayeszcdfghijklmnopqrtuvwx'
320 """
321 if wrap_alphabet == 0:
322 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
323 string.ascii_lowercase))
324 else:
325 if wrap_alphabet == 1:
326 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
327 else:
328 last_keyword_letter = sorted(sanitise(keyword))[-1]
329 last_keyword_position = string.ascii_lowercase.find(
330 last_keyword_letter) + 1
331 cipher_alphabet = ''.join(
332 deduplicate(sanitise(keyword) +
333 string.ascii_lowercase[last_keyword_position:] +
334 string.ascii_lowercase))
335 return cipher_alphabet
336
337
338 def keyword_encipher(message, keyword, wrap_alphabet=0):
339 """Enciphers a message with a keyword substitution cipher.
340 wrap_alphabet controls how the rest of the alphabet is added
341 after the keyword.
342 0 : from 'a'
343 1 : from the last letter in the sanitised keyword
344 2 : from the largest letter in the sanitised keyword
345
346 >>> keyword_encipher('test message', 'bayes')
347 'rsqr ksqqbds'
348 >>> keyword_encipher('test message', 'bayes', 0)
349 'rsqr ksqqbds'
350 >>> keyword_encipher('test message', 'bayes', 1)
351 'lskl dskkbus'
352 >>> keyword_encipher('test message', 'bayes', 2)
353 'qspq jsppbcs'
354 """
355 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
356 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
357 return message.lower().translate(cipher_translation)
358
359 def keyword_decipher(message, keyword, wrap_alphabet=0):
360 """Deciphers a message with a keyword substitution cipher.
361 wrap_alphabet controls how the rest of the alphabet is added
362 after the keyword.
363 0 : from 'a'
364 1 : from the last letter in the sanitised keyword
365 2 : from the largest letter in the sanitised keyword
366
367 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
368 'test message'
369 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
370 'test message'
371 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
372 'test message'
373 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
374 'test message'
375 """
376 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
377 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
378 return message.lower().translate(cipher_translation)
379
380 def scytale_encipher(message, rows):
381 """Enciphers using the scytale transposition cipher.
382 Message is padded with spaces to allow all rows to be the same length.
383
384 >>> scytale_encipher('thequickbrownfox', 3)
385 'tcnhkfeboqrxuo iw '
386 >>> scytale_encipher('thequickbrownfox', 4)
387 'tubnhirfecooqkwx'
388 >>> scytale_encipher('thequickbrownfox', 5)
389 'tubn hirf ecoo qkwx '
390 >>> scytale_encipher('thequickbrownfox', 6)
391 'tqcrnxhukof eibwo '
392 >>> scytale_encipher('thequickbrownfox', 7)
393 'tqcrnx hukof eibwo '
394 """
395 if len(message) % rows != 0:
396 message += ' '*(rows - len(message) % rows)
397 row_length = round(len(message) / rows)
398 slices = [message[i:i+row_length]
399 for i in range(0, len(message), row_length)]
400 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
401
402 def scytale_decipher(message, rows):
403 """Deciphers using the scytale transposition cipher.
404 Assumes the message is padded so that all rows are the same length.
405
406 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
407 'thequickbrownfox '
408 >>> scytale_decipher('tubnhirfecooqkwx', 4)
409 'thequickbrownfox'
410 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
411 'thequickbrownfox '
412 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
413 'thequickbrownfox '
414 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
415 'thequickbrownfox '
416 """
417 cols = round(len(message) / rows)
418 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
419 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
420
421
422 def transpositions_of(keyword):
423 """
424 >>> transpositions_of('clever')
425 [0, 2, 1, 4, 3]
426 """
427 transpositions = []
428 key = deduplicate(keyword)
429 for l in sorted(key):
430 transpositions += [key.index(l)]
431 return transpositions
432
433 def column_transposition_encipher(message, keyword):
434 """
435 >>> column_transposition_encipher('hellothere', 'clever')
436 'hleolteher'
437 """
438 transpositions = transpositions_of(keyword)
439 columns = every_nth(message, len(transpositions), fillvalue=' ')
440 transposed_columns = transpose(columns, transpositions)
441 return combine_every_nth(transposed_columns)
442
443 def column_transposition_decipher(message, keyword):
444 """
445 >>> column_transposition_decipher('hleolteher', 'clever')
446 'hellothere'
447 """
448 transpositions = transpositions_of(keyword)
449 columns = every_nth(message, len(transpositions), fillvalue=' ')
450 transposed_columns = untranspose(columns, transpositions)
451 return combine_every_nth(transposed_columns)
452
453
454
455 def caesar_break(message,
456 metric=norms.euclidean_distance,
457 target_counts=normalised_english_counts,
458 message_frequency_scaling=norms.normalise):
459 """Breaks a Caesar cipher using frequency analysis
460
461 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
462 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
463 (4, 0.31863952890183...)
464 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
465 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
466 (19, 0.42152901235832...)
467 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
468 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
469 (13, 0.316029208075451...)
470 """
471 sanitised_message = sanitise(message)
472 best_shift = 0
473 best_fit = float("inf")
474 for shift in range(26):
475 plaintext = caesar_decipher(sanitised_message, shift)
476 counts = message_frequency_scaling(letter_frequencies(plaintext))
477 fit = metric(target_counts, counts)
478 logger.debug('Caesar break attempt using key {0} gives fit of {1} '
479 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
480 if fit < best_fit:
481 best_fit = fit
482 best_shift = shift
483 logger.info('Caesar break best fit: key {0} gives fit of {1} and '
484 'decrypt starting: {2}'.format(best_shift, best_fit,
485 caesar_decipher(sanitised_message, best_shift)[:50]))
486 return best_shift, best_fit
487
488 def affine_break(message,
489 metric=norms.euclidean_distance,
490 target_counts=normalised_english_counts,
491 message_frequency_scaling=norms.normalise):
492 """Breaks an affine cipher using frequency analysis
493
494 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
495 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
496 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
497 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
498 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
499 'clm ckuxj.') # doctest: +ELLIPSIS
500 ((15, 22, True), 0.23570361818655...)
501 """
502 sanitised_message = sanitise(message)
503 best_multiplier = 0
504 best_adder = 0
505 best_one_based = True
506 best_fit = float("inf")
507 for one_based in [True, False]:
508 for multiplier in range(1, 26, 2):
509 for adder in range(26):
510 plaintext = affine_decipher(sanitised_message,
511 multiplier, adder, one_based)
512 counts = message_frequency_scaling(letter_frequencies(plaintext))
513 fit = metric(target_counts, counts)
514 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
515 'gives fit of {3} and decrypt starting: {4}'.
516 format(multiplier, adder, one_based, fit,
517 plaintext[:50]))
518 if fit < best_fit:
519 best_fit = fit
520 best_multiplier = multiplier
521 best_adder = adder
522 best_one_based = one_based
523 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
524 'and decrypt starting: {4}'.format(
525 best_multiplier, best_adder, best_one_based, best_fit,
526 affine_decipher(sanitised_message, best_multiplier,
527 best_adder, best_one_based)[:50]))
528 return (best_multiplier, best_adder, best_one_based), best_fit
529
530 def keyword_break(message,
531 wordlist=keywords,
532 metric=norms.euclidean_distance,
533 target_counts=normalised_english_counts,
534 message_frequency_scaling=norms.normalise):
535 """Breaks a keyword substitution cipher using a dictionary and
536 frequency analysis
537
538 >>> keyword_break(keyword_encipher('this is a test message for the ' \
539 'keyword decipherment', 'elephant', 1), \
540 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
541 (('elephant', 1), 0.41643991598441...)
542 """
543 best_keyword = ''
544 best_wrap_alphabet = True
545 best_fit = float("inf")
546 for wrap_alphabet in range(3):
547 for keyword in wordlist:
548 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
549 counts = message_frequency_scaling(letter_frequencies(plaintext))
550 fit = metric(target_counts, counts)
551 logger.debug('Keyword break attempt using key {0} (wrap={1}) '
552 'gives fit of {2} and decrypt starting: {3}'.format(
553 keyword, wrap_alphabet, fit,
554 sanitise(plaintext)[:50]))
555 if fit < best_fit:
556 best_fit = fit
557 best_keyword = keyword
558 best_wrap_alphabet = wrap_alphabet
559 logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
560 '{2} and decrypt starting: {3}'.format(best_keyword,
561 best_wrap_alphabet, best_fit, sanitise(
562 keyword_decipher(message, best_keyword,
563 best_wrap_alphabet))[:50]))
564 return (best_keyword, best_wrap_alphabet), best_fit
565
566 def keyword_break_mp(message,
567 wordlist=keywords,
568 metric=norms.euclidean_distance,
569 target_counts=normalised_english_counts,
570 message_frequency_scaling=norms.normalise,
571 chunksize=500):
572 """Breaks a keyword substitution cipher using a dictionary and
573 frequency analysis
574
575 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
576 'keyword decipherment', 'elephant', 1), \
577 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
578 (('elephant', 1), 0.41643991598441...)
579 """
580 with Pool() as pool:
581 helper_args = [(message, word, wrap, metric, target_counts,
582 message_frequency_scaling)
583 for word in wordlist for wrap in range(3)]
584 # Gotcha: the helper function here needs to be defined at the top level
585 # (limitation of Pool.starmap)
586 breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
587 return min(breaks, key=lambda k: k[1])
588
589 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
590 message_frequency_scaling):
591 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
592 counts = message_frequency_scaling(letter_frequencies(plaintext))
593 fit = metric(target_counts, counts)
594 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
595 '{2} and decrypt starting: {3}'.format(keyword,
596 wrap_alphabet, fit, sanitise(plaintext)[:50]))
597 return (keyword, wrap_alphabet), fit
598
599 def scytale_break(message,
600 metric=norms.euclidean_distance,
601 target_counts=normalised_english_bigram_counts,
602 message_frequency_scaling=norms.normalise):
603 """Breaks a Scytale cipher
604
605 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
606 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
607 'aek') # doctest: +ELLIPSIS
608 (6, 0.83453041115025...)
609 """
610 best_key = 0
611 best_fit = float("inf")
612 for key in range(1, 20):
613 if len(message) % key == 0:
614 plaintext = scytale_decipher(message, key)
615 counts = message_frequency_scaling(frequencies(
616 ngrams(sanitise(plaintext), 2)))
617 fit = metric(target_counts, counts)
618 logger.debug('Scytale break attempt using key {0} gives fit of '
619 '{1} and decrypt starting: {2}'.format(key,
620 fit, sanitise(plaintext)[:50]))
621 if fit < best_fit:
622 best_fit = fit
623 best_key = key
624 logger.info('Scytale break best fit with key {0} gives fit of {1} and '
625 'decrypt starting: {2}'.format(best_key, best_fit,
626 sanitise(scytale_decipher(message, best_key))[:50]))
627 return best_key, best_fit
628
629
630 if __name__ == "__main__":
631 import doctest
632 doctest.testmod()