Tidyied use of flag in calling column transposition worker
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5 import math
6 from itertools import zip_longest, repeat
7 from segment import segment
8 from multiprocessing import Pool
9
10 # To time a run:
11 #
12 # import timeit
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
16
17 logger = logging.getLogger(__name__)
18 logger.addHandler(logging.FileHandler('cipher.log'))
19 logger.setLevel(logging.WARNING)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
22
23 english_counts = collections.defaultdict(int)
24 with open('count_1l.txt', 'r') as f:
25 for line in f:
26 (letter, count) = line.split("\t")
27 english_counts[letter] = int(count)
28 normalised_english_counts = norms.normalise(english_counts)
29
30 english_bigram_counts = collections.defaultdict(int)
31 with open('count_2l.txt', 'r') as f:
32 for line in f:
33 (bigram, count) = line.split("\t")
34 english_bigram_counts[bigram] = int(count)
35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
36
37 with open('words.txt', 'r') as f:
38 keywords = [line.rstrip() for line in f]
39
40 modular_division_table = [[0]*26 for x in range(26)]
41 for a in range(26):
42 for b in range(26):
43 c = (a * b) % 26
44 modular_division_table[b][c] = a
45
46
47 def sanitise(text):
48 """Remove all non-alphabetic characters and convert the text to lowercase
49
50 >>> sanitise('The Quick')
51 'thequick'
52 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
53 'thequickbrownfoxjumpedoverthelazydog'
54 """
55 sanitised = [c.lower() for c in text if c in string.ascii_letters]
56 return ''.join(sanitised)
57
58 def ngrams(text, n):
59 """Returns all n-grams of a text
60
61 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
62 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
63 'nf', 'fo', 'ox']
64 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
65 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
66 'rown', 'ownf', 'wnfo', 'nfox']
67 """
68 return [text[i:i+n] for i in range(len(text)-n+1)]
69
70 def every_nth(text, n, fillvalue=''):
71 """Returns n strings, each of which consists of every nth character,
72 starting with the 0th, 1st, 2nd, ... (n-1)th character
73
74 >>> every_nth(string.ascii_lowercase, 5)
75 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
76 >>> every_nth(string.ascii_lowercase, 1)
77 ['abcdefghijklmnopqrstuvwxyz']
78 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
79 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
80 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
81 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
82 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
83 """
84 split_text = [text[i:i+n] for i in range(0, len(text), n)]
85 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
86
87 def combine_every_nth(split_text):
88 """Reforms a text split into every_nth strings
89
90 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
91 'abcdefghijklmnopqrstuvwxyz'
92 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
93 'abcdefghijklmnopqrstuvwxyz'
94 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
95 'abcdefghijklmnopqrstuvwxyz'
96 """
97 return ''.join([''.join(l)
98 for l in zip_longest(*split_text, fillvalue='')])
99
100 def transpose(items, transposition):
101 """Moves items around according to the given transposition
102
103 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
104 ['a', 'b', 'c', 'd']
105 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
106 ['d', 'b', 'c', 'a']
107 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
108 [13, 12, 14, 11, 15, 10]
109 """
110 transposed = list(repeat('', len(transposition)))
111 for p, t in enumerate(transposition):
112 transposed[p] = items[t]
113 return transposed
114
115 def untranspose(items, transposition):
116 """Undoes a transpose
117
118 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
119 ['a', 'b', 'c', 'd']
120 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
121 ['a', 'b', 'c', 'd']
122 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
123 [10, 11, 12, 13, 14, 15]
124 """
125 transposed = list(repeat('', len(transposition)))
126 for p, t in enumerate(transposition):
127 transposed[t] = items[p]
128 return transposed
129
130
131 def frequencies(text):
132 """Count the number of occurrences of each character in text
133
134 >>> sorted(frequencies('abcdefabc').items())
135 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
136 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
137 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
138 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
139 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
140 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
141 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
142 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
143 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
144 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
145 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
146 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
147 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
148 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
149 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
150 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
151 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
152 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
153 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
154 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
155 """
156 counts = collections.defaultdict(int)
157 for c in text:
158 counts[c] += 1
159 return counts
160 letter_frequencies = frequencies
161
162 def deduplicate(text):
163 return list(collections.OrderedDict.fromkeys(text))
164
165
166
167 def caesar_encipher_letter(letter, shift):
168 """Encipher a letter, given a shift amount
169
170 >>> caesar_encipher_letter('a', 1)
171 'b'
172 >>> caesar_encipher_letter('a', 2)
173 'c'
174 >>> caesar_encipher_letter('b', 2)
175 'd'
176 >>> caesar_encipher_letter('x', 2)
177 'z'
178 >>> caesar_encipher_letter('y', 2)
179 'a'
180 >>> caesar_encipher_letter('z', 2)
181 'b'
182 >>> caesar_encipher_letter('z', -1)
183 'y'
184 >>> caesar_encipher_letter('a', -1)
185 'z'
186 """
187 if letter in string.ascii_letters:
188 if letter in string.ascii_uppercase:
189 alphabet_start = ord('A')
190 else:
191 alphabet_start = ord('a')
192 return chr(((ord(letter) - alphabet_start + shift) % 26) +
193 alphabet_start)
194 else:
195 return letter
196
197 def caesar_decipher_letter(letter, shift):
198 """Decipher a letter, given a shift amount
199
200 >>> caesar_decipher_letter('b', 1)
201 'a'
202 >>> caesar_decipher_letter('b', 2)
203 'z'
204 """
205 return caesar_encipher_letter(letter, -shift)
206
207 def caesar_encipher(message, shift):
208 """Encipher a message with the Caesar cipher of given shift
209
210 >>> caesar_encipher('abc', 1)
211 'bcd'
212 >>> caesar_encipher('abc', 2)
213 'cde'
214 >>> caesar_encipher('abcxyz', 2)
215 'cdezab'
216 >>> caesar_encipher('ab cx yz', 2)
217 'cd ez ab'
218 """
219 enciphered = [caesar_encipher_letter(l, shift) for l in message]
220 return ''.join(enciphered)
221
222 def caesar_decipher(message, shift):
223 """Encipher a message with the Caesar cipher of given shift
224
225 >>> caesar_decipher('bcd', 1)
226 'abc'
227 >>> caesar_decipher('cde', 2)
228 'abc'
229 >>> caesar_decipher('cd ez ab', 2)
230 'ab cx yz'
231 """
232 return caesar_encipher(message, -shift)
233
234 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
235 """Encipher a letter, given a multiplier and adder
236
237 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
238 for l in string.ascii_uppercase])
239 'HKNQTWZCFILORUXADGJMPSVYBE'
240 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
241 for l in string.ascii_uppercase])
242 'FILORUXADGJMPSVYBEHKNQTWZC'
243 """
244 if letter in string.ascii_letters:
245 if letter in string.ascii_uppercase:
246 alphabet_start = ord('A')
247 else:
248 alphabet_start = ord('a')
249 letter_number = ord(letter) - alphabet_start
250 if one_based: letter_number += 1
251 cipher_number = (letter_number * multiplier + adder) % 26
252 if one_based: cipher_number -= 1
253 return chr(cipher_number % 26 + alphabet_start)
254 else:
255 return letter
256
257 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
258 """Encipher a letter, given a multiplier and adder
259
260 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
261 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
262 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
263 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
264 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
265 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
266 """
267 if letter in string.ascii_letters:
268 if letter in string.ascii_uppercase:
269 alphabet_start = ord('A')
270 else:
271 alphabet_start = ord('a')
272 cipher_number = ord(letter) - alphabet_start
273 if one_based: cipher_number += 1
274 plaintext_number = ( modular_division_table[multiplier]
275 [(cipher_number - adder) % 26] )
276 if one_based: plaintext_number -= 1
277 return chr(plaintext_number % 26 + alphabet_start)
278 else:
279 return letter
280
281 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
282 """Encipher a message
283
284 >>> affine_encipher('hours passed during which jerico tried every ' \
285 'trick he could think of', 15, 22, True)
286 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
287 """
288 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
289 for l in message]
290 return ''.join(enciphered)
291
292 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
293 """Decipher a message
294
295 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
296 'jfaoe ls omytd jlaxe mh', 15, 22, True)
297 'hours passed during which jerico tried every trick he could think of'
298 """
299 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
300 for l in message]
301 return ''.join(enciphered)
302
303
304 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
305 """Find the cipher alphabet given a keyword.
306 wrap_alphabet controls how the rest of the alphabet is added
307 after the keyword.
308 0 : from 'a'
309 1 : from the last letter in the sanitised keyword
310 2 : from the largest letter in the sanitised keyword
311
312 >>> keyword_cipher_alphabet_of('bayes')
313 'bayescdfghijklmnopqrtuvwxz'
314 >>> keyword_cipher_alphabet_of('bayes', 0)
315 'bayescdfghijklmnopqrtuvwxz'
316 >>> keyword_cipher_alphabet_of('bayes', 1)
317 'bayestuvwxzcdfghijklmnopqr'
318 >>> keyword_cipher_alphabet_of('bayes', 2)
319 'bayeszcdfghijklmnopqrtuvwx'
320 """
321 if wrap_alphabet == 0:
322 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
323 string.ascii_lowercase))
324 else:
325 if wrap_alphabet == 1:
326 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
327 else:
328 last_keyword_letter = sorted(sanitise(keyword))[-1]
329 last_keyword_position = string.ascii_lowercase.find(
330 last_keyword_letter) + 1
331 cipher_alphabet = ''.join(
332 deduplicate(sanitise(keyword) +
333 string.ascii_lowercase[last_keyword_position:] +
334 string.ascii_lowercase))
335 return cipher_alphabet
336
337
338 def keyword_encipher(message, keyword, wrap_alphabet=0):
339 """Enciphers a message with a keyword substitution cipher.
340 wrap_alphabet controls how the rest of the alphabet is added
341 after the keyword.
342 0 : from 'a'
343 1 : from the last letter in the sanitised keyword
344 2 : from the largest letter in the sanitised keyword
345
346 >>> keyword_encipher('test message', 'bayes')
347 'rsqr ksqqbds'
348 >>> keyword_encipher('test message', 'bayes', 0)
349 'rsqr ksqqbds'
350 >>> keyword_encipher('test message', 'bayes', 1)
351 'lskl dskkbus'
352 >>> keyword_encipher('test message', 'bayes', 2)
353 'qspq jsppbcs'
354 """
355 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
356 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
357 return message.lower().translate(cipher_translation)
358
359 def keyword_decipher(message, keyword, wrap_alphabet=0):
360 """Deciphers a message with a keyword substitution cipher.
361 wrap_alphabet controls how the rest of the alphabet is added
362 after the keyword.
363 0 : from 'a'
364 1 : from the last letter in the sanitised keyword
365 2 : from the largest letter in the sanitised keyword
366
367 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
368 'test message'
369 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
370 'test message'
371 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
372 'test message'
373 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
374 'test message'
375 """
376 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
377 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
378 return message.lower().translate(cipher_translation)
379
380 def scytale_encipher(message, rows):
381 """Enciphers using the scytale transposition cipher.
382 Message is padded with spaces to allow all rows to be the same length.
383
384 >>> scytale_encipher('thequickbrownfox', 3)
385 'tcnhkfeboqrxuo iw '
386 >>> scytale_encipher('thequickbrownfox', 4)
387 'tubnhirfecooqkwx'
388 >>> scytale_encipher('thequickbrownfox', 5)
389 'tubn hirf ecoo qkwx '
390 >>> scytale_encipher('thequickbrownfox', 6)
391 'tqcrnxhukof eibwo '
392 >>> scytale_encipher('thequickbrownfox', 7)
393 'tqcrnx hukof eibwo '
394 """
395 if len(message) % rows != 0:
396 message += ' '*(rows - len(message) % rows)
397 row_length = round(len(message) / rows)
398 slices = [message[i:i+row_length]
399 for i in range(0, len(message), row_length)]
400 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
401
402 def scytale_decipher(message, rows):
403 """Deciphers using the scytale transposition cipher.
404 Assumes the message is padded so that all rows are the same length.
405
406 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
407 'thequickbrownfox '
408 >>> scytale_decipher('tubnhirfecooqkwx', 4)
409 'thequickbrownfox'
410 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
411 'thequickbrownfox '
412 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
413 'thequickbrownfox '
414 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
415 'thequickbrownfox '
416 """
417 cols = round(len(message) / rows)
418 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
419 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
420
421
422 def transpositions_of(keyword):
423 """
424 >>> transpositions_of('clever')
425 [0, 2, 1, 4, 3]
426 """
427 key = deduplicate(keyword)
428 transpositions = [key.index(l) for l in sorted(key)]
429 return transpositions
430
431 def column_transposition_encipher(message, keyword):
432 """
433 >>> column_transposition_encipher('hellothere', 'clever')
434 'hleolteher'
435 """
436 return column_transposition_worker(message, keyword, encipher=True)
437
438 def column_transposition_decipher(message, keyword):
439 """
440 >>> column_transposition_decipher('hleolteher', 'clever')
441 'hellothere'
442 """
443 return column_transposition_worker(message, keyword, encipher=False)
444
445 def column_transposition_worker(message, keyword, encipher=True):
446 """
447 >>> column_transposition_worker('hellothere', 'clever')
448 'hleolteher'
449 >>> column_transposition_worker('hellothere', 'clever', encipher=True)
450 'hleolteher'
451 >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
452 'hellothere'
453 """
454 transpositions = transpositions_of(keyword)
455 columns = every_nth(message, len(transpositions), fillvalue=' ')
456 if encipher:
457 transposed_columns = transpose(columns, transpositions)
458 else:
459 transposed_columns = untranspose(columns, transpositions)
460 return combine_every_nth(transposed_columns)
461
462
463
464 def caesar_break(message,
465 metric=norms.euclidean_distance,
466 target_counts=normalised_english_counts,
467 message_frequency_scaling=norms.normalise):
468 """Breaks a Caesar cipher using frequency analysis
469
470 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
471 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
472 (4, 0.31863952890183...)
473 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
474 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
475 (19, 0.42152901235832...)
476 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
477 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
478 (13, 0.316029208075451...)
479 """
480 sanitised_message = sanitise(message)
481 best_shift = 0
482 best_fit = float("inf")
483 for shift in range(26):
484 plaintext = caesar_decipher(sanitised_message, shift)
485 counts = message_frequency_scaling(letter_frequencies(plaintext))
486 fit = metric(target_counts, counts)
487 logger.debug('Caesar break attempt using key {0} gives fit of {1} '
488 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
489 if fit < best_fit:
490 best_fit = fit
491 best_shift = shift
492 logger.info('Caesar break best fit: key {0} gives fit of {1} and '
493 'decrypt starting: {2}'.format(best_shift, best_fit,
494 caesar_decipher(sanitised_message, best_shift)[:50]))
495 return best_shift, best_fit
496
497 def affine_break(message,
498 metric=norms.euclidean_distance,
499 target_counts=normalised_english_counts,
500 message_frequency_scaling=norms.normalise):
501 """Breaks an affine cipher using frequency analysis
502
503 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
504 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
505 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
506 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
507 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
508 'clm ckuxj.') # doctest: +ELLIPSIS
509 ((15, 22, True), 0.23570361818655...)
510 """
511 sanitised_message = sanitise(message)
512 best_multiplier = 0
513 best_adder = 0
514 best_one_based = True
515 best_fit = float("inf")
516 for one_based in [True, False]:
517 for multiplier in range(1, 26, 2):
518 for adder in range(26):
519 plaintext = affine_decipher(sanitised_message,
520 multiplier, adder, one_based)
521 counts = message_frequency_scaling(letter_frequencies(plaintext))
522 fit = metric(target_counts, counts)
523 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
524 'gives fit of {3} and decrypt starting: {4}'.
525 format(multiplier, adder, one_based, fit,
526 plaintext[:50]))
527 if fit < best_fit:
528 best_fit = fit
529 best_multiplier = multiplier
530 best_adder = adder
531 best_one_based = one_based
532 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
533 'and decrypt starting: {4}'.format(
534 best_multiplier, best_adder, best_one_based, best_fit,
535 affine_decipher(sanitised_message, best_multiplier,
536 best_adder, best_one_based)[:50]))
537 return (best_multiplier, best_adder, best_one_based), best_fit
538
539 def keyword_break(message,
540 wordlist=keywords,
541 metric=norms.euclidean_distance,
542 target_counts=normalised_english_counts,
543 message_frequency_scaling=norms.normalise):
544 """Breaks a keyword substitution cipher using a dictionary and
545 frequency analysis
546
547 >>> keyword_break(keyword_encipher('this is a test message for the ' \
548 'keyword decipherment', 'elephant', 1), \
549 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
550 (('elephant', 1), 0.41643991598441...)
551 """
552 best_keyword = ''
553 best_wrap_alphabet = True
554 best_fit = float("inf")
555 for wrap_alphabet in range(3):
556 for keyword in wordlist:
557 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
558 counts = message_frequency_scaling(letter_frequencies(plaintext))
559 fit = metric(target_counts, counts)
560 logger.debug('Keyword break attempt using key {0} (wrap={1}) '
561 'gives fit of {2} and decrypt starting: {3}'.format(
562 keyword, wrap_alphabet, fit,
563 sanitise(plaintext)[:50]))
564 if fit < best_fit:
565 best_fit = fit
566 best_keyword = keyword
567 best_wrap_alphabet = wrap_alphabet
568 logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
569 '{2} and decrypt starting: {3}'.format(best_keyword,
570 best_wrap_alphabet, best_fit, sanitise(
571 keyword_decipher(message, best_keyword,
572 best_wrap_alphabet))[:50]))
573 return (best_keyword, best_wrap_alphabet), best_fit
574
575 def keyword_break_mp(message,
576 wordlist=keywords,
577 metric=norms.euclidean_distance,
578 target_counts=normalised_english_counts,
579 message_frequency_scaling=norms.normalise,
580 chunksize=500):
581 """Breaks a keyword substitution cipher using a dictionary and
582 frequency analysis
583
584 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
585 'keyword decipherment', 'elephant', 1), \
586 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
587 (('elephant', 1), 0.41643991598441...)
588 """
589 with Pool() as pool:
590 helper_args = [(message, word, wrap, metric, target_counts,
591 message_frequency_scaling)
592 for word in wordlist for wrap in range(3)]
593 # Gotcha: the helper function here needs to be defined at the top level
594 # (limitation of Pool.starmap)
595 breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
596 return min(breaks, key=lambda k: k[1])
597
598 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
599 message_frequency_scaling):
600 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
601 counts = message_frequency_scaling(letter_frequencies(plaintext))
602 fit = metric(target_counts, counts)
603 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
604 '{2} and decrypt starting: {3}'.format(keyword,
605 wrap_alphabet, fit, sanitise(plaintext)[:50]))
606 return (keyword, wrap_alphabet), fit
607
608 def scytale_break(message,
609 metric=norms.euclidean_distance,
610 target_counts=normalised_english_bigram_counts,
611 message_frequency_scaling=norms.normalise):
612 """Breaks a Scytale cipher
613
614 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
615 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
616 'aek') # doctest: +ELLIPSIS
617 (6, 0.83453041115025...)
618 """
619 best_key = 0
620 best_fit = float("inf")
621 for key in range(1, 20):
622 if len(message) % key == 0:
623 plaintext = scytale_decipher(message, key)
624 counts = message_frequency_scaling(frequencies(
625 ngrams(sanitise(plaintext), 2)))
626 fit = metric(target_counts, counts)
627 logger.debug('Scytale break attempt using key {0} gives fit of '
628 '{1} and decrypt starting: {2}'.format(key,
629 fit, sanitise(plaintext)[:50]))
630 if fit < best_fit:
631 best_fit = fit
632 best_key = key
633 logger.info('Scytale break best fit with key {0} gives fit of {1} and '
634 'decrypt starting: {2}'.format(best_key, best_fit,
635 sanitise(scytale_decipher(message, best_key))[:50]))
636 return best_key, best_fit
637
638
639 if __name__ == "__main__":
640 import doctest
641 doctest.testmod()