Tidying
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5 import math
6 from itertools import zip_longest, repeat
7 from segment import segment
8 from multiprocessing import Pool
9
10 # To time a run:
11 #
12 # import timeit
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
16
17 logger = logging.getLogger(__name__)
18 logger.addHandler(logging.FileHandler('cipher.log'))
19 logger.setLevel(logging.WARNING)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
22
23 english_counts = collections.defaultdict(int)
24 with open('count_1l.txt', 'r') as f:
25 for line in f:
26 (letter, count) = line.split("\t")
27 english_counts[letter] = int(count)
28 normalised_english_counts = norms.normalise(english_counts)
29
30 english_bigram_counts = collections.defaultdict(int)
31 with open('count_2l.txt', 'r') as f:
32 for line in f:
33 (bigram, count) = line.split("\t")
34 english_bigram_counts[bigram] = int(count)
35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
36
37 english_trigram_counts = collections.defaultdict(int)
38 with open('count_3l.txt', 'r') as f:
39 for line in f:
40 (trigram, count) = line.split("\t")
41 english_trigram_counts[trigram] = int(count)
42 normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
43
44
45 with open('words.txt', 'r') as f:
46 keywords = [line.rstrip() for line in f]
47
48 modular_division_table = [[0]*26 for x in range(26)]
49 for a in range(26):
50 for b in range(26):
51 c = (a * b) % 26
52 modular_division_table[b][c] = a
53
54
55 def sanitise(text):
56 """Remove all non-alphabetic characters and convert the text to lowercase
57
58 >>> sanitise('The Quick')
59 'thequick'
60 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
61 'thequickbrownfoxjumpedoverthelazydog'
62 """
63 sanitised = [c.lower() for c in text if c in string.ascii_letters]
64 return ''.join(sanitised)
65
66 def ngrams(text, n):
67 """Returns all n-grams of a text
68
69 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
70 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
71 'nf', 'fo', 'ox']
72 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
73 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
74 'rown', 'ownf', 'wnfo', 'nfox']
75 """
76 return [text[i:i+n] for i in range(len(text)-n+1)]
77
78 def every_nth(text, n, fillvalue=''):
79 """Returns n strings, each of which consists of every nth character,
80 starting with the 0th, 1st, 2nd, ... (n-1)th character
81
82 >>> every_nth(string.ascii_lowercase, 5)
83 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
84 >>> every_nth(string.ascii_lowercase, 1)
85 ['abcdefghijklmnopqrstuvwxyz']
86 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
87 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
88 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
89 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
90 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
91 """
92 split_text = [text[i:i+n] for i in range(0, len(text), n)]
93 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
94
95 def combine_every_nth(split_text):
96 """Reforms a text split into every_nth strings
97
98 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
99 'abcdefghijklmnopqrstuvwxyz'
100 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
101 'abcdefghijklmnopqrstuvwxyz'
102 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
103 'abcdefghijklmnopqrstuvwxyz'
104 """
105 return ''.join([''.join(l)
106 for l in zip_longest(*split_text, fillvalue='')])
107
108 def transpose(items, transposition):
109 """Moves items around according to the given transposition
110
111 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
112 ['a', 'b', 'c', 'd']
113 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
114 ['d', 'b', 'c', 'a']
115 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
116 [13, 12, 14, 11, 15, 10]
117 """
118 transposed = list(repeat('', len(transposition)))
119 for p, t in enumerate(transposition):
120 transposed[p] = items[t]
121 return transposed
122
123 def untranspose(items, transposition):
124 """Undoes a transpose
125
126 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
127 ['a', 'b', 'c', 'd']
128 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
129 ['a', 'b', 'c', 'd']
130 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
131 [10, 11, 12, 13, 14, 15]
132 """
133 transposed = list(repeat('', len(transposition)))
134 for p, t in enumerate(transposition):
135 transposed[t] = items[p]
136 return transposed
137
138
139 def frequencies(text):
140 """Count the number of occurrences of each character in text
141
142 >>> sorted(frequencies('abcdefabc').items())
143 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
144 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
145 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
146 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
147 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
148 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
149 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
150 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
151 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
152 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
153 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
154 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
155 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
156 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
157 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
158 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
159 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
160 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
161 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
162 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
163 """
164 counts = collections.defaultdict(int)
165 for c in text:
166 counts[c] += 1
167 return counts
168 letter_frequencies = frequencies
169
170 def deduplicate(text):
171 return list(collections.OrderedDict.fromkeys(text))
172
173
174
175 def caesar_encipher_letter(letter, shift):
176 """Encipher a letter, given a shift amount
177
178 >>> caesar_encipher_letter('a', 1)
179 'b'
180 >>> caesar_encipher_letter('a', 2)
181 'c'
182 >>> caesar_encipher_letter('b', 2)
183 'd'
184 >>> caesar_encipher_letter('x', 2)
185 'z'
186 >>> caesar_encipher_letter('y', 2)
187 'a'
188 >>> caesar_encipher_letter('z', 2)
189 'b'
190 >>> caesar_encipher_letter('z', -1)
191 'y'
192 >>> caesar_encipher_letter('a', -1)
193 'z'
194 """
195 if letter in string.ascii_letters:
196 if letter in string.ascii_uppercase:
197 alphabet_start = ord('A')
198 else:
199 alphabet_start = ord('a')
200 return chr(((ord(letter) - alphabet_start + shift) % 26) +
201 alphabet_start)
202 else:
203 return letter
204
205 def caesar_decipher_letter(letter, shift):
206 """Decipher a letter, given a shift amount
207
208 >>> caesar_decipher_letter('b', 1)
209 'a'
210 >>> caesar_decipher_letter('b', 2)
211 'z'
212 """
213 return caesar_encipher_letter(letter, -shift)
214
215 def caesar_encipher(message, shift):
216 """Encipher a message with the Caesar cipher of given shift
217
218 >>> caesar_encipher('abc', 1)
219 'bcd'
220 >>> caesar_encipher('abc', 2)
221 'cde'
222 >>> caesar_encipher('abcxyz', 2)
223 'cdezab'
224 >>> caesar_encipher('ab cx yz', 2)
225 'cd ez ab'
226 """
227 enciphered = [caesar_encipher_letter(l, shift) for l in message]
228 return ''.join(enciphered)
229
230 def caesar_decipher(message, shift):
231 """Encipher a message with the Caesar cipher of given shift
232
233 >>> caesar_decipher('bcd', 1)
234 'abc'
235 >>> caesar_decipher('cde', 2)
236 'abc'
237 >>> caesar_decipher('cd ez ab', 2)
238 'ab cx yz'
239 """
240 return caesar_encipher(message, -shift)
241
242 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
243 """Encipher a letter, given a multiplier and adder
244
245 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
246 for l in string.ascii_uppercase])
247 'HKNQTWZCFILORUXADGJMPSVYBE'
248 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
249 for l in string.ascii_uppercase])
250 'FILORUXADGJMPSVYBEHKNQTWZC'
251 """
252 if letter in string.ascii_letters:
253 if letter in string.ascii_uppercase:
254 alphabet_start = ord('A')
255 else:
256 alphabet_start = ord('a')
257 letter_number = ord(letter) - alphabet_start
258 if one_based: letter_number += 1
259 cipher_number = (letter_number * multiplier + adder) % 26
260 if one_based: cipher_number -= 1
261 return chr(cipher_number % 26 + alphabet_start)
262 else:
263 return letter
264
265 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
266 """Encipher a letter, given a multiplier and adder
267
268 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
269 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
270 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
271 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
272 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
273 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
274 """
275 if letter in string.ascii_letters:
276 if letter in string.ascii_uppercase:
277 alphabet_start = ord('A')
278 else:
279 alphabet_start = ord('a')
280 cipher_number = ord(letter) - alphabet_start
281 if one_based: cipher_number += 1
282 plaintext_number = ( modular_division_table[multiplier]
283 [(cipher_number - adder) % 26] )
284 if one_based: plaintext_number -= 1
285 return chr(plaintext_number % 26 + alphabet_start)
286 else:
287 return letter
288
289 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
290 """Encipher a message
291
292 >>> affine_encipher('hours passed during which jerico tried every ' \
293 'trick he could think of', 15, 22, True)
294 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
295 """
296 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
297 for l in message]
298 return ''.join(enciphered)
299
300 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
301 """Decipher a message
302
303 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
304 'jfaoe ls omytd jlaxe mh', 15, 22, True)
305 'hours passed during which jerico tried every trick he could think of'
306 """
307 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
308 for l in message]
309 return ''.join(enciphered)
310
311
312 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
313 """Find the cipher alphabet given a keyword.
314 wrap_alphabet controls how the rest of the alphabet is added
315 after the keyword.
316 0 : from 'a'
317 1 : from the last letter in the sanitised keyword
318 2 : from the largest letter in the sanitised keyword
319
320 >>> keyword_cipher_alphabet_of('bayes')
321 'bayescdfghijklmnopqrtuvwxz'
322 >>> keyword_cipher_alphabet_of('bayes', 0)
323 'bayescdfghijklmnopqrtuvwxz'
324 >>> keyword_cipher_alphabet_of('bayes', 1)
325 'bayestuvwxzcdfghijklmnopqr'
326 >>> keyword_cipher_alphabet_of('bayes', 2)
327 'bayeszcdfghijklmnopqrtuvwx'
328 """
329 if wrap_alphabet == 0:
330 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
331 string.ascii_lowercase))
332 else:
333 if wrap_alphabet == 1:
334 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
335 else:
336 last_keyword_letter = sorted(sanitise(keyword))[-1]
337 last_keyword_position = string.ascii_lowercase.find(
338 last_keyword_letter) + 1
339 cipher_alphabet = ''.join(
340 deduplicate(sanitise(keyword) +
341 string.ascii_lowercase[last_keyword_position:] +
342 string.ascii_lowercase))
343 return cipher_alphabet
344
345
346 def keyword_encipher(message, keyword, wrap_alphabet=0):
347 """Enciphers a message with a keyword substitution cipher.
348 wrap_alphabet controls how the rest of the alphabet is added
349 after the keyword.
350 0 : from 'a'
351 1 : from the last letter in the sanitised keyword
352 2 : from the largest letter in the sanitised keyword
353
354 >>> keyword_encipher('test message', 'bayes')
355 'rsqr ksqqbds'
356 >>> keyword_encipher('test message', 'bayes', 0)
357 'rsqr ksqqbds'
358 >>> keyword_encipher('test message', 'bayes', 1)
359 'lskl dskkbus'
360 >>> keyword_encipher('test message', 'bayes', 2)
361 'qspq jsppbcs'
362 """
363 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
364 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
365 return message.lower().translate(cipher_translation)
366
367 def keyword_decipher(message, keyword, wrap_alphabet=0):
368 """Deciphers a message with a keyword substitution cipher.
369 wrap_alphabet controls how the rest of the alphabet is added
370 after the keyword.
371 0 : from 'a'
372 1 : from the last letter in the sanitised keyword
373 2 : from the largest letter in the sanitised keyword
374
375 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
376 'test message'
377 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
378 'test message'
379 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
380 'test message'
381 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
382 'test message'
383 """
384 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
385 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
386 return message.lower().translate(cipher_translation)
387
388 def scytale_encipher(message, rows):
389 """Enciphers using the scytale transposition cipher.
390 Message is padded with spaces to allow all rows to be the same length.
391
392 >>> scytale_encipher('thequickbrownfox', 3)
393 'tcnhkfeboqrxuo iw '
394 >>> scytale_encipher('thequickbrownfox', 4)
395 'tubnhirfecooqkwx'
396 >>> scytale_encipher('thequickbrownfox', 5)
397 'tubn hirf ecoo qkwx '
398 >>> scytale_encipher('thequickbrownfox', 6)
399 'tqcrnxhukof eibwo '
400 >>> scytale_encipher('thequickbrownfox', 7)
401 'tqcrnx hukof eibwo '
402 """
403 if len(message) % rows != 0:
404 message += ' '*(rows - len(message) % rows)
405 row_length = round(len(message) / rows)
406 slices = [message[i:i+row_length]
407 for i in range(0, len(message), row_length)]
408 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
409
410 def scytale_decipher(message, rows):
411 """Deciphers using the scytale transposition cipher.
412 Assumes the message is padded so that all rows are the same length.
413
414 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
415 'thequickbrownfox '
416 >>> scytale_decipher('tubnhirfecooqkwx', 4)
417 'thequickbrownfox'
418 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
419 'thequickbrownfox '
420 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
421 'thequickbrownfox '
422 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
423 'thequickbrownfox '
424 """
425 cols = round(len(message) / rows)
426 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
427 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
428
429
430 def transpositions_of(keyword):
431 """Finds the transpostions given by a keyword. For instance, the keyword
432 'clever' rearranges to 'celrv', so the first column (0) stays first, the
433 second column (1) moves to third, the third column (2) moves to second,
434 and so on.
435
436 >>> transpositions_of('clever')
437 [0, 2, 1, 4, 3]
438 """
439 key = deduplicate(keyword)
440 transpositions = [key.index(l) for l in sorted(key)]
441 return transpositions
442
443 def column_transposition_encipher(message, keyword, fillvalue=' '):
444 """Enciphers using the column transposition cipher.
445 Message is padded to allow all rows to be the same length.
446
447 >>> column_transposition_encipher('hellothere', 'clever')
448 'hleolteher'
449 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
450 'hleolthre!e!'
451 """
452 return column_transposition_worker(message, keyword, encipher=True,
453 fillvalue=fillvalue)
454
455 def column_transposition_decipher(message, keyword, fillvalue=' '):
456 """Deciphers using the column transposition cipher.
457 Message is padded to allow all rows to be the same length.
458
459 >>> column_transposition_decipher('hleolteher', 'clever')
460 'hellothere'
461 >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
462 'hellothere!!'
463 """
464 return column_transposition_worker(message, keyword, encipher=False,
465 fillvalue=fillvalue)
466
467 def column_transposition_worker(message, keyword,
468 encipher=True, fillvalue=' '):
469 """Does the actual work of the column transposition cipher.
470 Message is padded with spaces to allow all rows to be the same length.
471
472 >>> column_transposition_worker('hellothere', 'clever')
473 'hleolteher'
474 >>> column_transposition_worker('hellothere', 'clever', encipher=True)
475 'hleolteher'
476 >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
477 'hellothere'
478 """
479 transpositions = transpositions_of(keyword)
480 columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
481 if encipher:
482 transposed_columns = transpose(columns, transpositions)
483 else:
484 transposed_columns = untranspose(columns, transpositions)
485 return combine_every_nth(transposed_columns)
486
487
488
489 def caesar_break(message,
490 metric=norms.euclidean_distance,
491 target_counts=normalised_english_counts,
492 message_frequency_scaling=norms.normalise):
493 """Breaks a Caesar cipher using frequency analysis
494
495 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
496 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
497 (4, 0.31863952890183...)
498 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
499 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
500 (19, 0.42152901235832...)
501 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
502 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
503 (13, 0.316029208075451...)
504 """
505 sanitised_message = sanitise(message)
506 best_shift = 0
507 best_fit = float("inf")
508 for shift in range(26):
509 plaintext = caesar_decipher(sanitised_message, shift)
510 counts = message_frequency_scaling(letter_frequencies(plaintext))
511 fit = metric(target_counts, counts)
512 logger.debug('Caesar break attempt using key {0} gives fit of {1} '
513 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
514 if fit < best_fit:
515 best_fit = fit
516 best_shift = shift
517 logger.info('Caesar break best fit: key {0} gives fit of {1} and '
518 'decrypt starting: {2}'.format(best_shift, best_fit,
519 caesar_decipher(sanitised_message, best_shift)[:50]))
520 return best_shift, best_fit
521
522 def affine_break(message,
523 metric=norms.euclidean_distance,
524 target_counts=normalised_english_counts,
525 message_frequency_scaling=norms.normalise):
526 """Breaks an affine cipher using frequency analysis
527
528 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
529 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
530 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
531 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
532 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
533 'clm ckuxj.') # doctest: +ELLIPSIS
534 ((15, 22, True), 0.23570361818655...)
535 """
536 sanitised_message = sanitise(message)
537 best_multiplier = 0
538 best_adder = 0
539 best_one_based = True
540 best_fit = float("inf")
541 for one_based in [True, False]:
542 for multiplier in range(1, 26, 2):
543 for adder in range(26):
544 plaintext = affine_decipher(sanitised_message,
545 multiplier, adder, one_based)
546 counts = message_frequency_scaling(letter_frequencies(plaintext))
547 fit = metric(target_counts, counts)
548 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
549 'gives fit of {3} and decrypt starting: {4}'.
550 format(multiplier, adder, one_based, fit,
551 plaintext[:50]))
552 if fit < best_fit:
553 best_fit = fit
554 best_multiplier = multiplier
555 best_adder = adder
556 best_one_based = one_based
557 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
558 'and decrypt starting: {4}'.format(
559 best_multiplier, best_adder, best_one_based, best_fit,
560 affine_decipher(sanitised_message, best_multiplier,
561 best_adder, best_one_based)[:50]))
562 return (best_multiplier, best_adder, best_one_based), best_fit
563
564 def keyword_break(message,
565 wordlist=keywords,
566 metric=norms.euclidean_distance,
567 target_counts=normalised_english_counts,
568 message_frequency_scaling=norms.normalise):
569 """Breaks a keyword substitution cipher using a dictionary and
570 frequency analysis
571
572 >>> keyword_break(keyword_encipher('this is a test message for the ' \
573 'keyword decipherment', 'elephant', 1), \
574 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
575 (('elephant', 1), 0.41643991598441...)
576 """
577 best_keyword = ''
578 best_wrap_alphabet = True
579 best_fit = float("inf")
580 for wrap_alphabet in range(3):
581 for keyword in wordlist:
582 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
583 counts = message_frequency_scaling(letter_frequencies(plaintext))
584 fit = metric(target_counts, counts)
585 logger.debug('Keyword break attempt using key {0} (wrap={1}) '
586 'gives fit of {2} and decrypt starting: {3}'.format(
587 keyword, wrap_alphabet, fit,
588 sanitise(plaintext)[:50]))
589 if fit < best_fit:
590 best_fit = fit
591 best_keyword = keyword
592 best_wrap_alphabet = wrap_alphabet
593 logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
594 '{2} and decrypt starting: {3}'.format(best_keyword,
595 best_wrap_alphabet, best_fit, sanitise(
596 keyword_decipher(message, best_keyword,
597 best_wrap_alphabet))[:50]))
598 return (best_keyword, best_wrap_alphabet), best_fit
599
600 def keyword_break_mp(message,
601 wordlist=keywords,
602 metric=norms.euclidean_distance,
603 target_counts=normalised_english_counts,
604 message_frequency_scaling=norms.normalise,
605 chunksize=500):
606 """Breaks a keyword substitution cipher using a dictionary and
607 frequency analysis
608
609 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
610 'keyword decipherment', 'elephant', 1), \
611 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
612 (('elephant', 1), 0.41643991598441...)
613 """
614 with Pool() as pool:
615 helper_args = [(message, word, wrap, metric, target_counts,
616 message_frequency_scaling)
617 for word in wordlist for wrap in range(3)]
618 # Gotcha: the helper function here needs to be defined at the top level
619 # (limitation of Pool.starmap)
620 breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
621 return min(breaks, key=lambda k: k[1])
622
623 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
624 message_frequency_scaling):
625 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
626 counts = message_frequency_scaling(letter_frequencies(plaintext))
627 fit = metric(target_counts, counts)
628 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
629 '{2} and decrypt starting: {3}'.format(keyword,
630 wrap_alphabet, fit, sanitise(plaintext)[:50]))
631 return (keyword, wrap_alphabet), fit
632
633 def scytale_break(message,
634 metric=norms.euclidean_distance,
635 target_counts=normalised_english_bigram_counts,
636 message_frequency_scaling=norms.normalise):
637 """Breaks a Scytale cipher
638
639 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
640 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
641 'aek') # doctest: +ELLIPSIS
642 (6, 0.83453041115025...)
643 """
644 best_key = 0
645 best_fit = float("inf")
646 for key in range(1, 20):
647 if len(message) % key == 0:
648 plaintext = scytale_decipher(message, key)
649 counts = message_frequency_scaling(frequencies(
650 ngrams(sanitise(plaintext), 2)))
651 fit = metric(target_counts, counts)
652 logger.debug('Scytale break attempt using key {0} gives fit of '
653 '{1} and decrypt starting: {2}'.format(key,
654 fit, sanitise(plaintext)[:50]))
655 if fit < best_fit:
656 best_fit = fit
657 best_key = key
658 logger.info('Scytale break best fit with key {0} gives fit of {1} and '
659 'decrypt starting: {2}'.format(best_key, best_fit,
660 sanitise(scytale_decipher(message, best_key))[:50]))
661 return best_key, best_fit
662
663 def column_transposition_break(message,
664 wordlist=keywords,
665 metric=norms.euclidean_distance,
666 target_counts=normalised_english_bigram_counts,
667 message_frequency_scaling=norms.normalise):
668 """Breaks a column transposition cipher using a dictionary and
669 n-gram frequency analysis
670
671 >>> column_transposition_break(column_transposition_encipher(sanitise( \
672 "Turing's homosexuality resulted in a criminal prosecution in 1952, \
673 when homosexual acts were still illegal in the United Kingdom. "), \
674 'encipher'), \
675 wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
676 ('encipher', 0.898128626285...)
677 >>> column_transposition_break(column_transposition_encipher(sanitise( \
678 "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
679 "when homosexual acts were still illegal in the United Kingdom."), \
680 'encipher'), \
681 wordlist=['encipher', 'keyword', 'fourteen'], \
682 target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
683 ('encipher', 1.1958792913127...)
684 """
685 best_keyword = ''
686 best_fit = float("inf")
687 ngram_length = len(next(iter(target_counts.keys())))
688 for keyword in wordlist:
689 if len(message) % len(deduplicate(keyword)) == 0:
690 plaintext = column_transposition_decipher(message, keyword)
691 counts = message_frequency_scaling(frequencies(
692 ngrams(sanitise(plaintext), ngram_length)))
693 fit = metric(target_counts, counts)
694 logger.debug('Column transposition break attempt using key {0} '
695 'gives fit of {1} and decrypt starting: {2}'.format(
696 keyword, fit,
697 sanitise(plaintext)[:50]))
698 if fit < best_fit:
699 best_fit = fit
700 best_keyword = keyword
701 logger.info('Column transposition break best fit with key {0} gives fit '
702 'of {1} and decrypt starting: {2}'.format(best_keyword,
703 best_fit, sanitise(
704 column_transposition_decipher(message,
705 best_keyword))[:50]))
706 return best_keyword, best_fit
707
708
709 def column_transposition_break_mp(message,
710 wordlist=keywords,
711 metric=norms.euclidean_distance,
712 target_counts=normalised_english_bigram_counts,
713 message_frequency_scaling=norms.normalise,
714 chunksize=500):
715 """Breaks a column transposition cipher using a dictionary and
716 n-gram frequency analysis
717
718 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
719 "Turing's homosexuality resulted in a criminal prosecution in 1952, \
720 when homosexual acts were still illegal in the United Kingdom. "), \
721 'encipher'), \
722 wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
723 ('encipher', 0.898128626285...)
724 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
725 "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
726 "when homosexual acts were still illegal in the United Kingdom."), \
727 'encipher'), \
728 wordlist=['encipher', 'keyword', 'fourteen'], \
729 target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
730 ('encipher', 1.1958792913127...)
731 """
732 ngram_length = len(next(iter(target_counts.keys())))
733 with Pool() as pool:
734 helper_args = [(message, word, metric, target_counts, ngram_length,
735 message_frequency_scaling)
736 for word in wordlist]
737 # Gotcha: the helper function here needs to be defined at the top level
738 # (limitation of Pool.starmap)
739 breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize)
740 return min(breaks, key=lambda k: k[1])
741
742 def column_transposition_break_worker(message, keyword, metric, target_counts,
743 ngram_length, message_frequency_scaling):
744 plaintext = column_transposition_decipher(message, keyword)
745 counts = message_frequency_scaling(frequencies(
746 ngrams(sanitise(plaintext), ngram_length)))
747 fit = metric(target_counts, counts)
748 logger.debug('Column transposition break attempt using key {0} '
749 'gives fit of {1} and decrypt starting: {2}'.format(
750 keyword, fit,
751 sanitise(plaintext)[:50]))
752 return keyword, fit
753
754
755
756 if __name__ == "__main__":
757 import doctest
758 doctest.testmod()