Merge branch 'neil' of git.njae.me.uk:national-cipher-challenge into neil
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5 import math
6 from itertools import zip_longest, repeat
7 from segment import segment
8 from multiprocessing import Pool
9
10 # To time a run:
11 #
12 # import timeit
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
16
17 logger = logging.getLogger(__name__)
18 logger.addHandler(logging.FileHandler('cipher.log'))
19 logger.setLevel(logging.WARNING)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
22
23 english_counts = collections.defaultdict(int)
24 with open('count_1l.txt', 'r') as f:
25 for line in f:
26 (letter, count) = line.split("\t")
27 english_counts[letter] = int(count)
28 normalised_english_counts = norms.normalise(english_counts)
29
30 english_bigram_counts = collections.defaultdict(int)
31 with open('count_2l.txt', 'r') as f:
32 for line in f:
33 (bigram, count) = line.split("\t")
34 english_bigram_counts[bigram] = int(count)
35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
36
37 with open('words.txt', 'r') as f:
38 keywords = [line.rstrip() for line in f]
39
40 modular_division_table = [[0]*26 for x in range(26)]
41 for a in range(26):
42 for b in range(26):
43 c = (a * b) % 26
44 modular_division_table[b][c] = a
45
46 def letters(text):
47 """Remove all non-alphabetic characters from a text
48 >>> letters('The Quick')
49 'TheQuick'
50 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
51 'TheQuickBROWNfoxjumpedoverthelazyDOG'
52 """
53 return ''.join([c for c in text if c in string.ascii_letters])
54
55 def sanitise(text):
56 """Remove all non-alphabetic characters and convert the text to lowercase
57
58 >>> sanitise('The Quick')
59 'thequick'
60 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
61 'thequickbrownfoxjumpedoverthelazydog'
62 """
63 # sanitised = [c.lower() for c in text if c in string.ascii_letters]
64 # return ''.join(sanitised)
65 return letters(text).lower()
66
67 def ngrams(text, n):
68 """Returns all n-grams of a text
69
70 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
71 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
72 'nf', 'fo', 'ox']
73 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
74 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
75 'rown', 'ownf', 'wnfo', 'nfox']
76 """
77 return [text[i:i+n] for i in range(len(text)-n+1)]
78
79 def every_nth(text, n, fillvalue=''):
80 """Returns n strings, each of which consists of every nth character,
81 starting with the 0th, 1st, 2nd, ... (n-1)th character
82
83 >>> every_nth(string.ascii_lowercase, 5)
84 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
85 >>> every_nth(string.ascii_lowercase, 1)
86 ['abcdefghijklmnopqrstuvwxyz']
87 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
88 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
89 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
90 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
91 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
92 """
93 split_text = [text[i:i+n] for i in range(0, len(text), n)]
94 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
95
96 def combine_every_nth(split_text):
97 """Reforms a text split into every_nth strings
98
99 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
100 'abcdefghijklmnopqrstuvwxyz'
101 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
102 'abcdefghijklmnopqrstuvwxyz'
103 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
104 'abcdefghijklmnopqrstuvwxyz'
105 """
106 return ''.join([''.join(l)
107 for l in zip_longest(*split_text, fillvalue='')])
108
109 def transpose(items, transposition):
110 """Moves items around according to the given transposition
111
112 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
113 ['a', 'b', 'c', 'd']
114 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
115 ['d', 'b', 'c', 'a']
116 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
117 [13, 12, 14, 11, 15, 10]
118 """
119 transposed = list(repeat('', len(transposition)))
120 for p, t in enumerate(transposition):
121 transposed[p] = items[t]
122 return transposed
123
124 def untranspose(items, transposition):
125 """Undoes a transpose
126
127 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
128 ['a', 'b', 'c', 'd']
129 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
130 ['a', 'b', 'c', 'd']
131 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
132 [10, 11, 12, 13, 14, 15]
133 """
134 transposed = list(repeat('', len(transposition)))
135 for p, t in enumerate(transposition):
136 transposed[t] = items[p]
137 return transposed
138
139
140 def frequencies(text):
141 """Count the number of occurrences of each character in text
142
143 >>> sorted(frequencies('abcdefabc').items())
144 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
145 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
146 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
147 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
148 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
149 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
150 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
151 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
152 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
153 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
154 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
155 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
156 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
157 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
158 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
159 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
160 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
161 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
162 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
163 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
164 >>> frequencies('abcdefabcdef')['x']
165 0
166 """
167 #counts = collections.defaultdict(int)
168 #for c in text:
169 # counts[c] += 1
170 #return counts
171 return collections.Counter(c for c in text)
172 letter_frequencies = frequencies
173
174 def deduplicate(text):
175 return list(collections.OrderedDict.fromkeys(text))
176
177
178
179 def caesar_encipher_letter(letter, shift):
180 """Encipher a letter, given a shift amount
181
182 >>> caesar_encipher_letter('a', 1)
183 'b'
184 >>> caesar_encipher_letter('a', 2)
185 'c'
186 >>> caesar_encipher_letter('b', 2)
187 'd'
188 >>> caesar_encipher_letter('x', 2)
189 'z'
190 >>> caesar_encipher_letter('y', 2)
191 'a'
192 >>> caesar_encipher_letter('z', 2)
193 'b'
194 >>> caesar_encipher_letter('z', -1)
195 'y'
196 >>> caesar_encipher_letter('a', -1)
197 'z'
198 """
199 if letter in string.ascii_letters:
200 if letter in string.ascii_uppercase:
201 alphabet_start = ord('A')
202 else:
203 alphabet_start = ord('a')
204 return chr(((ord(letter) - alphabet_start + shift) % 26) +
205 alphabet_start)
206 else:
207 return letter
208
209 def caesar_decipher_letter(letter, shift):
210 """Decipher a letter, given a shift amount
211
212 >>> caesar_decipher_letter('b', 1)
213 'a'
214 >>> caesar_decipher_letter('b', 2)
215 'z'
216 """
217 return caesar_encipher_letter(letter, -shift)
218
219 def caesar_encipher(message, shift):
220 """Encipher a message with the Caesar cipher of given shift
221
222 >>> caesar_encipher('abc', 1)
223 'bcd'
224 >>> caesar_encipher('abc', 2)
225 'cde'
226 >>> caesar_encipher('abcxyz', 2)
227 'cdezab'
228 >>> caesar_encipher('ab cx yz', 2)
229 'cd ez ab'
230 """
231 enciphered = [caesar_encipher_letter(l, shift) for l in message]
232 return ''.join(enciphered)
233
234 def caesar_decipher(message, shift):
235 """Encipher a message with the Caesar cipher of given shift
236
237 >>> caesar_decipher('bcd', 1)
238 'abc'
239 >>> caesar_decipher('cde', 2)
240 'abc'
241 >>> caesar_decipher('cd ez ab', 2)
242 'ab cx yz'
243 """
244 return caesar_encipher(message, -shift)
245
246 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
247 """Encipher a letter, given a multiplier and adder
248
249 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
250 for l in string.ascii_uppercase])
251 'HKNQTWZCFILORUXADGJMPSVYBE'
252 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
253 for l in string.ascii_uppercase])
254 'FILORUXADGJMPSVYBEHKNQTWZC'
255 """
256 if letter in string.ascii_letters:
257 if letter in string.ascii_uppercase:
258 alphabet_start = ord('A')
259 else:
260 alphabet_start = ord('a')
261 letter_number = ord(letter) - alphabet_start
262 if one_based: letter_number += 1
263 cipher_number = (letter_number * multiplier + adder) % 26
264 if one_based: cipher_number -= 1
265 return chr(cipher_number % 26 + alphabet_start)
266 else:
267 return letter
268
269 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
270 """Encipher a letter, given a multiplier and adder
271
272 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
273 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
274 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
275 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
276 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
277 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
278 """
279 if letter in string.ascii_letters:
280 if letter in string.ascii_uppercase:
281 alphabet_start = ord('A')
282 else:
283 alphabet_start = ord('a')
284 cipher_number = ord(letter) - alphabet_start
285 if one_based: cipher_number += 1
286 plaintext_number = ( modular_division_table[multiplier]
287 [(cipher_number - adder) % 26] )
288 if one_based: plaintext_number -= 1
289 return chr(plaintext_number % 26 + alphabet_start)
290 else:
291 return letter
292
293 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
294 """Encipher a message
295
296 >>> affine_encipher('hours passed during which jerico tried every ' \
297 'trick he could think of', 15, 22, True)
298 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
299 """
300 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
301 for l in message]
302 return ''.join(enciphered)
303
304 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
305 """Decipher a message
306
307 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
308 'jfaoe ls omytd jlaxe mh', 15, 22, True)
309 'hours passed during which jerico tried every trick he could think of'
310 """
311 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
312 for l in message]
313 return ''.join(enciphered)
314
315
316 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
317 """Find the cipher alphabet given a keyword.
318 wrap_alphabet controls how the rest of the alphabet is added
319 after the keyword.
320 0 : from 'a'
321 1 : from the last letter in the sanitised keyword
322 2 : from the largest letter in the sanitised keyword
323
324 >>> keyword_cipher_alphabet_of('bayes')
325 'bayescdfghijklmnopqrtuvwxz'
326 >>> keyword_cipher_alphabet_of('bayes', 0)
327 'bayescdfghijklmnopqrtuvwxz'
328 >>> keyword_cipher_alphabet_of('bayes', 1)
329 'bayestuvwxzcdfghijklmnopqr'
330 >>> keyword_cipher_alphabet_of('bayes', 2)
331 'bayeszcdfghijklmnopqrtuvwx'
332 """
333 if wrap_alphabet == 0:
334 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
335 string.ascii_lowercase))
336 else:
337 if wrap_alphabet == 1:
338 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
339 else:
340 last_keyword_letter = sorted(sanitise(keyword))[-1]
341 last_keyword_position = string.ascii_lowercase.find(
342 last_keyword_letter) + 1
343 cipher_alphabet = ''.join(
344 deduplicate(sanitise(keyword) +
345 string.ascii_lowercase[last_keyword_position:] +
346 string.ascii_lowercase))
347 return cipher_alphabet
348
349
350 def keyword_encipher(message, keyword, wrap_alphabet=0):
351 """Enciphers a message with a keyword substitution cipher.
352 wrap_alphabet controls how the rest of the alphabet is added
353 after the keyword.
354 0 : from 'a'
355 1 : from the last letter in the sanitised keyword
356 2 : from the largest letter in the sanitised keyword
357
358 >>> keyword_encipher('test message', 'bayes')
359 'rsqr ksqqbds'
360 >>> keyword_encipher('test message', 'bayes', 0)
361 'rsqr ksqqbds'
362 >>> keyword_encipher('test message', 'bayes', 1)
363 'lskl dskkbus'
364 >>> keyword_encipher('test message', 'bayes', 2)
365 'qspq jsppbcs'
366 """
367 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
368 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
369 return message.lower().translate(cipher_translation)
370
371 def keyword_decipher(message, keyword, wrap_alphabet=0):
372 """Deciphers a message with a keyword substitution cipher.
373 wrap_alphabet controls how the rest of the alphabet is added
374 after the keyword.
375 0 : from 'a'
376 1 : from the last letter in the sanitised keyword
377 2 : from the largest letter in the sanitised keyword
378
379 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
380 'test message'
381 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
382 'test message'
383 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
384 'test message'
385 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
386 'test message'
387 """
388 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
389 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
390 return message.lower().translate(cipher_translation)
391
392 def scytale_encipher(message, rows):
393 """Enciphers using the scytale transposition cipher.
394 Message is padded with spaces to allow all rows to be the same length.
395
396 >>> scytale_encipher('thequickbrownfox', 3)
397 'tcnhkfeboqrxuo iw '
398 >>> scytale_encipher('thequickbrownfox', 4)
399 'tubnhirfecooqkwx'
400 >>> scytale_encipher('thequickbrownfox', 5)
401 'tubn hirf ecoo qkwx '
402 >>> scytale_encipher('thequickbrownfox', 6)
403 'tqcrnxhukof eibwo '
404 >>> scytale_encipher('thequickbrownfox', 7)
405 'tqcrnx hukof eibwo '
406 """
407 if len(message) % rows != 0:
408 message += ' '*(rows - len(message) % rows)
409 row_length = round(len(message) / rows)
410 slices = [message[i:i+row_length]
411 for i in range(0, len(message), row_length)]
412 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
413
414 def scytale_decipher(message, rows):
415 """Deciphers using the scytale transposition cipher.
416 Assumes the message is padded so that all rows are the same length.
417
418 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
419 'thequickbrownfox '
420 >>> scytale_decipher('tubnhirfecooqkwx', 4)
421 'thequickbrownfox'
422 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
423 'thequickbrownfox '
424 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
425 'thequickbrownfox '
426 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
427 'thequickbrownfox '
428 """
429 cols = round(len(message) / rows)
430 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
431 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
432
433
434 def transpositions_of(keyword):
435 """
436 >>> transpositions_of('clever')
437 [0, 2, 1, 4, 3]
438 """
439 key = deduplicate(keyword)
440 transpositions = [key.index(l) for l in sorted(key)]
441 return transpositions
442
443 def column_transposition_encipher(message, keyword):
444 """
445 >>> column_transposition_encipher('hellothere', 'clever')
446 'hleolteher'
447 """
448 return column_transposition_worker(message, keyword, encipher=True)
449
450 def column_transposition_decipher(message, keyword):
451 """
452 >>> column_transposition_decipher('hleolteher', 'clever')
453 'hellothere'
454 """
455 return column_transposition_worker(message, keyword, encipher=False)
456
457 def column_transposition_worker(message, keyword, encipher=True):
458 """
459 >>> column_transposition_worker('hellothere', 'clever')
460 'hleolteher'
461 >>> column_transposition_worker('hellothere', 'clever', encipher=True)
462 'hleolteher'
463 >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
464 'hellothere'
465 """
466 transpositions = transpositions_of(keyword)
467 columns = every_nth(message, len(transpositions), fillvalue=' ')
468 if encipher:
469 transposed_columns = transpose(columns, transpositions)
470 else:
471 transposed_columns = untranspose(columns, transpositions)
472 return combine_every_nth(transposed_columns)
473
474
475
476 def caesar_break(message,
477 metric=norms.euclidean_distance,
478 target_counts=normalised_english_counts,
479 message_frequency_scaling=norms.normalise):
480 """Breaks a Caesar cipher using frequency analysis
481
482 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
483 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
484 (4, 0.31863952890183...)
485 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
486 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
487 (19, 0.42152901235832...)
488 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
489 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
490 (13, 0.316029208075451...)
491 """
492 sanitised_message = sanitise(message)
493 best_shift = 0
494 best_fit = float("inf")
495 for shift in range(26):
496 plaintext = caesar_decipher(sanitised_message, shift)
497 counts = message_frequency_scaling(letter_frequencies(plaintext))
498 fit = metric(target_counts, counts)
499 logger.debug('Caesar break attempt using key {0} gives fit of {1} '
500 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
501 if fit < best_fit:
502 best_fit = fit
503 best_shift = shift
504 logger.info('Caesar break best fit: key {0} gives fit of {1} and '
505 'decrypt starting: {2}'.format(best_shift, best_fit,
506 caesar_decipher(sanitised_message, best_shift)[:50]))
507 return best_shift, best_fit
508
509 def affine_break(message,
510 metric=norms.euclidean_distance,
511 target_counts=normalised_english_counts,
512 message_frequency_scaling=norms.normalise):
513 """Breaks an affine cipher using frequency analysis
514
515 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
516 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
517 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
518 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
519 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
520 'clm ckuxj.') # doctest: +ELLIPSIS
521 ((15, 22, True), 0.23570361818655...)
522 """
523 sanitised_message = sanitise(message)
524 best_multiplier = 0
525 best_adder = 0
526 best_one_based = True
527 best_fit = float("inf")
528 for one_based in [True, False]:
529 for multiplier in range(1, 26, 2):
530 for adder in range(26):
531 plaintext = affine_decipher(sanitised_message,
532 multiplier, adder, one_based)
533 counts = message_frequency_scaling(letter_frequencies(plaintext))
534 fit = metric(target_counts, counts)
535 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
536 'gives fit of {3} and decrypt starting: {4}'.
537 format(multiplier, adder, one_based, fit,
538 plaintext[:50]))
539 if fit < best_fit:
540 best_fit = fit
541 best_multiplier = multiplier
542 best_adder = adder
543 best_one_based = one_based
544 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
545 'and decrypt starting: {4}'.format(
546 best_multiplier, best_adder, best_one_based, best_fit,
547 affine_decipher(sanitised_message, best_multiplier,
548 best_adder, best_one_based)[:50]))
549 return (best_multiplier, best_adder, best_one_based), best_fit
550
551 def keyword_break(message,
552 wordlist=keywords,
553 metric=norms.euclidean_distance,
554 target_counts=normalised_english_counts,
555 message_frequency_scaling=norms.normalise):
556 """Breaks a keyword substitution cipher using a dictionary and
557 frequency analysis
558
559 >>> keyword_break(keyword_encipher('this is a test message for the ' \
560 'keyword decipherment', 'elephant', 1), \
561 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
562 (('elephant', 1), 0.41643991598441...)
563 """
564 best_keyword = ''
565 best_wrap_alphabet = True
566 best_fit = float("inf")
567 for wrap_alphabet in range(3):
568 for keyword in wordlist:
569 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
570 counts = message_frequency_scaling(letter_frequencies(plaintext))
571 fit = metric(target_counts, counts)
572 logger.debug('Keyword break attempt using key {0} (wrap={1}) '
573 'gives fit of {2} and decrypt starting: {3}'.format(
574 keyword, wrap_alphabet, fit,
575 sanitise(plaintext)[:50]))
576 if fit < best_fit:
577 best_fit = fit
578 best_keyword = keyword
579 best_wrap_alphabet = wrap_alphabet
580 logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
581 '{2} and decrypt starting: {3}'.format(best_keyword,
582 best_wrap_alphabet, best_fit, sanitise(
583 keyword_decipher(message, best_keyword,
584 best_wrap_alphabet))[:50]))
585 return (best_keyword, best_wrap_alphabet), best_fit
586
587 def keyword_break_mp(message,
588 wordlist=keywords,
589 metric=norms.euclidean_distance,
590 target_counts=normalised_english_counts,
591 message_frequency_scaling=norms.normalise,
592 chunksize=500):
593 """Breaks a keyword substitution cipher using a dictionary and
594 frequency analysis
595
596 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
597 'keyword decipherment', 'elephant', 1), \
598 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
599 (('elephant', 1), 0.41643991598441...)
600 """
601 with Pool() as pool:
602 helper_args = [(message, word, wrap, metric, target_counts,
603 message_frequency_scaling)
604 for word in wordlist for wrap in range(3)]
605 # Gotcha: the helper function here needs to be defined at the top level
606 # (limitation of Pool.starmap)
607 breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
608 return min(breaks, key=lambda k: k[1])
609
610 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
611 message_frequency_scaling):
612 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
613 counts = message_frequency_scaling(letter_frequencies(plaintext))
614 fit = metric(target_counts, counts)
615 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
616 '{2} and decrypt starting: {3}'.format(keyword,
617 wrap_alphabet, fit, sanitise(plaintext)[:50]))
618 return (keyword, wrap_alphabet), fit
619
620 def scytale_break(message,
621 metric=norms.euclidean_distance,
622 target_counts=normalised_english_bigram_counts,
623 message_frequency_scaling=norms.normalise):
624 """Breaks a Scytale cipher
625
626 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
627 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
628 'aek') # doctest: +ELLIPSIS
629 (6, 0.83453041115025...)
630 """
631 best_key = 0
632 best_fit = float("inf")
633 for key in range(1, 20):
634 if len(message) % key == 0:
635 plaintext = scytale_decipher(message, key)
636 counts = message_frequency_scaling(frequencies(
637 ngrams(sanitise(plaintext), 2)))
638 fit = metric(target_counts, counts)
639 logger.debug('Scytale break attempt using key {0} gives fit of '
640 '{1} and decrypt starting: {2}'.format(key,
641 fit, sanitise(plaintext)[:50]))
642 if fit < best_fit:
643 best_fit = fit
644 best_key = key
645 logger.info('Scytale break best fit with key {0} gives fit of {1} and '
646 'decrypt starting: {2}'.format(best_key, best_fit,
647 sanitise(scytale_decipher(message, best_key))[:50]))
648 return best_key, best_fit
649
650
651 if __name__ == "__main__":
652 import doctest
653 doctest.testmod()