Added files for challenge 5
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5 import math
6 from itertools import zip_longest, repeat
7 from segment import segment
8 from multiprocessing import Pool
9
10 # To time a run:
11 #
12 # import timeit
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
16
17 logger = logging.getLogger(__name__)
18 logger.addHandler(logging.FileHandler('cipher.log'))
19 logger.setLevel(logging.WARNING)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
22
23 english_counts = collections.defaultdict(int)
24 with open('count_1l.txt', 'r') as f:
25 for line in f:
26 (letter, count) = line.split("\t")
27 english_counts[letter] = int(count)
28 normalised_english_counts = norms.normalise(english_counts)
29
30 english_bigram_counts = collections.defaultdict(int)
31 with open('count_2l.txt', 'r') as f:
32 for line in f:
33 (bigram, count) = line.split("\t")
34 english_bigram_counts[bigram] = int(count)
35 normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
36
37 english_trigram_counts = collections.defaultdict(int)
38 with open('count_3l.txt', 'r') as f:
39 for line in f:
40 (trigram, count) = line.split("\t")
41 english_trigram_counts[trigram] = int(count)
42 normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
43
44
45 with open('words.txt', 'r') as f:
46 keywords = [line.rstrip() for line in f]
47
48 modular_division_table = [[0]*26 for x in range(26)]
49 for a in range(26):
50 for b in range(26):
51 c = (a * b) % 26
52 modular_division_table[b][c] = a
53
54 def letters(text):
55 """Remove all non-alphabetic characters from a text
56 >>> letters('The Quick')
57 'TheQuick'
58 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
59 'TheQuickBROWNfoxjumpedoverthelazyDOG'
60 """
61 return ''.join([c for c in text if c in string.ascii_letters])
62
63 def sanitise(text):
64 """Remove all non-alphabetic characters and convert the text to lowercase
65
66 >>> sanitise('The Quick')
67 'thequick'
68 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
69 'thequickbrownfoxjumpedoverthelazydog'
70 """
71 # sanitised = [c.lower() for c in text if c in string.ascii_letters]
72 # return ''.join(sanitised)
73 return letters(text).lower()
74
75 def ngrams(text, n):
76 """Returns all n-grams of a text
77
78 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
79 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
80 'nf', 'fo', 'ox']
81 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
82 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
83 'rown', 'ownf', 'wnfo', 'nfox']
84 """
85 return [text[i:i+n] for i in range(len(text)-n+1)]
86
87 def every_nth(text, n, fillvalue=''):
88 """Returns n strings, each of which consists of every nth character,
89 starting with the 0th, 1st, 2nd, ... (n-1)th character
90
91 >>> every_nth(string.ascii_lowercase, 5)
92 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
93 >>> every_nth(string.ascii_lowercase, 1)
94 ['abcdefghijklmnopqrstuvwxyz']
95 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
96 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
97 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
98 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
99 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
100 """
101 split_text = [text[i:i+n] for i in range(0, len(text), n)]
102 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
103
104 def combine_every_nth(split_text):
105 """Reforms a text split into every_nth strings
106
107 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
108 'abcdefghijklmnopqrstuvwxyz'
109 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
110 'abcdefghijklmnopqrstuvwxyz'
111 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
112 'abcdefghijklmnopqrstuvwxyz'
113 """
114 return ''.join([''.join(l)
115 for l in zip_longest(*split_text, fillvalue='')])
116
117 def transpose(items, transposition):
118 """Moves items around according to the given transposition
119
120 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
121 ['a', 'b', 'c', 'd']
122 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
123 ['d', 'b', 'c', 'a']
124 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
125 [13, 12, 14, 11, 15, 10]
126 """
127 transposed = list(repeat('', len(transposition)))
128 for p, t in enumerate(transposition):
129 transposed[p] = items[t]
130 return transposed
131
132 def untranspose(items, transposition):
133 """Undoes a transpose
134
135 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
136 ['a', 'b', 'c', 'd']
137 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
138 ['a', 'b', 'c', 'd']
139 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
140 [10, 11, 12, 13, 14, 15]
141 """
142 transposed = list(repeat('', len(transposition)))
143 for p, t in enumerate(transposition):
144 transposed[t] = items[p]
145 return transposed
146
147
148 def frequencies(text):
149 """Count the number of occurrences of each character in text
150
151 >>> sorted(frequencies('abcdefabc').items())
152 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
153 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
154 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
155 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
156 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
157 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
158 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
159 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
160 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
161 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
162 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
163 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
164 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
165 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
166 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
167 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
168 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
169 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
170 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
171 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
172 >>> frequencies('abcdefabcdef')['x']
173 0
174 """
175 #counts = collections.defaultdict(int)
176 #for c in text:
177 # counts[c] += 1
178 #return counts
179 return collections.Counter(c for c in text)
180 letter_frequencies = frequencies
181
182 def deduplicate(text):
183 return list(collections.OrderedDict.fromkeys(text))
184
185
186
187 def caesar_encipher_letter(letter, shift):
188 """Encipher a letter, given a shift amount
189
190 >>> caesar_encipher_letter('a', 1)
191 'b'
192 >>> caesar_encipher_letter('a', 2)
193 'c'
194 >>> caesar_encipher_letter('b', 2)
195 'd'
196 >>> caesar_encipher_letter('x', 2)
197 'z'
198 >>> caesar_encipher_letter('y', 2)
199 'a'
200 >>> caesar_encipher_letter('z', 2)
201 'b'
202 >>> caesar_encipher_letter('z', -1)
203 'y'
204 >>> caesar_encipher_letter('a', -1)
205 'z'
206 """
207 if letter in string.ascii_letters:
208 if letter in string.ascii_uppercase:
209 alphabet_start = ord('A')
210 else:
211 alphabet_start = ord('a')
212 return chr(((ord(letter) - alphabet_start + shift) % 26) +
213 alphabet_start)
214 else:
215 return letter
216
217 def caesar_decipher_letter(letter, shift):
218 """Decipher a letter, given a shift amount
219
220 >>> caesar_decipher_letter('b', 1)
221 'a'
222 >>> caesar_decipher_letter('b', 2)
223 'z'
224 """
225 return caesar_encipher_letter(letter, -shift)
226
227 def caesar_encipher(message, shift):
228 """Encipher a message with the Caesar cipher of given shift
229
230 >>> caesar_encipher('abc', 1)
231 'bcd'
232 >>> caesar_encipher('abc', 2)
233 'cde'
234 >>> caesar_encipher('abcxyz', 2)
235 'cdezab'
236 >>> caesar_encipher('ab cx yz', 2)
237 'cd ez ab'
238 """
239 enciphered = [caesar_encipher_letter(l, shift) for l in message]
240 return ''.join(enciphered)
241
242 def caesar_decipher(message, shift):
243 """Encipher a message with the Caesar cipher of given shift
244
245 >>> caesar_decipher('bcd', 1)
246 'abc'
247 >>> caesar_decipher('cde', 2)
248 'abc'
249 >>> caesar_decipher('cd ez ab', 2)
250 'ab cx yz'
251 """
252 return caesar_encipher(message, -shift)
253
254 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
255 """Encipher a letter, given a multiplier and adder
256
257 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
258 for l in string.ascii_uppercase])
259 'HKNQTWZCFILORUXADGJMPSVYBE'
260 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
261 for l in string.ascii_uppercase])
262 'FILORUXADGJMPSVYBEHKNQTWZC'
263 """
264 if letter in string.ascii_letters:
265 if letter in string.ascii_uppercase:
266 alphabet_start = ord('A')
267 else:
268 alphabet_start = ord('a')
269 letter_number = ord(letter) - alphabet_start
270 if one_based: letter_number += 1
271 cipher_number = (letter_number * multiplier + adder) % 26
272 if one_based: cipher_number -= 1
273 return chr(cipher_number % 26 + alphabet_start)
274 else:
275 return letter
276
277 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
278 """Encipher a letter, given a multiplier and adder
279
280 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
281 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
282 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
283 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
284 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
285 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
286 """
287 if letter in string.ascii_letters:
288 if letter in string.ascii_uppercase:
289 alphabet_start = ord('A')
290 else:
291 alphabet_start = ord('a')
292 cipher_number = ord(letter) - alphabet_start
293 if one_based: cipher_number += 1
294 plaintext_number = ( modular_division_table[multiplier]
295 [(cipher_number - adder) % 26] )
296 if one_based: plaintext_number -= 1
297 return chr(plaintext_number % 26 + alphabet_start)
298 else:
299 return letter
300
301 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
302 """Encipher a message
303
304 >>> affine_encipher('hours passed during which jerico tried every ' \
305 'trick he could think of', 15, 22, True)
306 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
307 """
308 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
309 for l in message]
310 return ''.join(enciphered)
311
312 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
313 """Decipher a message
314
315 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
316 'jfaoe ls omytd jlaxe mh', 15, 22, True)
317 'hours passed during which jerico tried every trick he could think of'
318 """
319 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
320 for l in message]
321 return ''.join(enciphered)
322
323
324 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
325 """Find the cipher alphabet given a keyword.
326 wrap_alphabet controls how the rest of the alphabet is added
327 after the keyword.
328 0 : from 'a'
329 1 : from the last letter in the sanitised keyword
330 2 : from the largest letter in the sanitised keyword
331
332 >>> keyword_cipher_alphabet_of('bayes')
333 'bayescdfghijklmnopqrtuvwxz'
334 >>> keyword_cipher_alphabet_of('bayes', 0)
335 'bayescdfghijklmnopqrtuvwxz'
336 >>> keyword_cipher_alphabet_of('bayes', 1)
337 'bayestuvwxzcdfghijklmnopqr'
338 >>> keyword_cipher_alphabet_of('bayes', 2)
339 'bayeszcdfghijklmnopqrtuvwx'
340 """
341 if wrap_alphabet == 0:
342 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
343 string.ascii_lowercase))
344 else:
345 if wrap_alphabet == 1:
346 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
347 else:
348 last_keyword_letter = sorted(sanitise(keyword))[-1]
349 last_keyword_position = string.ascii_lowercase.find(
350 last_keyword_letter) + 1
351 cipher_alphabet = ''.join(
352 deduplicate(sanitise(keyword) +
353 string.ascii_lowercase[last_keyword_position:] +
354 string.ascii_lowercase))
355 return cipher_alphabet
356
357
358 def keyword_encipher(message, keyword, wrap_alphabet=0):
359 """Enciphers a message with a keyword substitution cipher.
360 wrap_alphabet controls how the rest of the alphabet is added
361 after the keyword.
362 0 : from 'a'
363 1 : from the last letter in the sanitised keyword
364 2 : from the largest letter in the sanitised keyword
365
366 >>> keyword_encipher('test message', 'bayes')
367 'rsqr ksqqbds'
368 >>> keyword_encipher('test message', 'bayes', 0)
369 'rsqr ksqqbds'
370 >>> keyword_encipher('test message', 'bayes', 1)
371 'lskl dskkbus'
372 >>> keyword_encipher('test message', 'bayes', 2)
373 'qspq jsppbcs'
374 """
375 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
376 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
377 return message.lower().translate(cipher_translation)
378
379 def keyword_decipher(message, keyword, wrap_alphabet=0):
380 """Deciphers a message with a keyword substitution cipher.
381 wrap_alphabet controls how the rest of the alphabet is added
382 after the keyword.
383 0 : from 'a'
384 1 : from the last letter in the sanitised keyword
385 2 : from the largest letter in the sanitised keyword
386
387 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
388 'test message'
389 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
390 'test message'
391 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
392 'test message'
393 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
394 'test message'
395 """
396 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
397 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
398 return message.lower().translate(cipher_translation)
399
400 def scytale_encipher(message, rows):
401 """Enciphers using the scytale transposition cipher.
402 Message is padded with spaces to allow all rows to be the same length.
403
404 >>> scytale_encipher('thequickbrownfox', 3)
405 'tcnhkfeboqrxuo iw '
406 >>> scytale_encipher('thequickbrownfox', 4)
407 'tubnhirfecooqkwx'
408 >>> scytale_encipher('thequickbrownfox', 5)
409 'tubn hirf ecoo qkwx '
410 >>> scytale_encipher('thequickbrownfox', 6)
411 'tqcrnxhukof eibwo '
412 >>> scytale_encipher('thequickbrownfox', 7)
413 'tqcrnx hukof eibwo '
414 """
415 if len(message) % rows != 0:
416 message += ' '*(rows - len(message) % rows)
417 row_length = round(len(message) / rows)
418 slices = [message[i:i+row_length]
419 for i in range(0, len(message), row_length)]
420 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
421
422 def scytale_decipher(message, rows):
423 """Deciphers using the scytale transposition cipher.
424 Assumes the message is padded so that all rows are the same length.
425
426 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
427 'thequickbrownfox '
428 >>> scytale_decipher('tubnhirfecooqkwx', 4)
429 'thequickbrownfox'
430 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
431 'thequickbrownfox '
432 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
433 'thequickbrownfox '
434 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
435 'thequickbrownfox '
436 """
437 cols = round(len(message) / rows)
438 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
439 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
440
441
442 def transpositions_of(keyword):
443 """Finds the transpostions given by a keyword. For instance, the keyword
444 'clever' rearranges to 'celrv', so the first column (0) stays first, the
445 second column (1) moves to third, the third column (2) moves to second,
446 and so on.
447
448 >>> transpositions_of('clever')
449 [0, 2, 1, 4, 3]
450 """
451 key = deduplicate(keyword)
452 transpositions = [key.index(l) for l in sorted(key)]
453 return transpositions
454
455 def column_transposition_encipher(message, keyword, fillvalue=' '):
456 """Enciphers using the column transposition cipher.
457 Message is padded to allow all rows to be the same length.
458
459 >>> column_transposition_encipher('hellothere', 'clever')
460 'hleolteher'
461 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
462 'hleolthre!e!'
463 """
464 return column_transposition_worker(message, keyword, encipher=True,
465 fillvalue=fillvalue)
466
467 def column_transposition_decipher(message, keyword, fillvalue=' '):
468 """Deciphers using the column transposition cipher.
469 Message is padded to allow all rows to be the same length.
470
471 >>> column_transposition_decipher('hleolteher', 'clever')
472 'hellothere'
473 >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
474 'hellothere!!'
475 """
476 return column_transposition_worker(message, keyword, encipher=False,
477 fillvalue=fillvalue)
478
479 def column_transposition_worker(message, keyword,
480 encipher=True, fillvalue=' '):
481 """Does the actual work of the column transposition cipher.
482 Message is padded with spaces to allow all rows to be the same length.
483
484 >>> column_transposition_worker('hellothere', 'clever')
485 'hleolteher'
486 >>> column_transposition_worker('hellothere', 'clever', encipher=True)
487 'hleolteher'
488 >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
489 'hellothere'
490 """
491 transpositions = transpositions_of(keyword)
492 columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
493 if encipher:
494 transposed_columns = transpose(columns, transpositions)
495 else:
496 transposed_columns = untranspose(columns, transpositions)
497 return combine_every_nth(transposed_columns)
498
499 def vigenere_encipher(message, keyword):
500
501
502
503
504 def caesar_break(message,
505 metric=norms.euclidean_distance,
506 target_counts=normalised_english_counts,
507 message_frequency_scaling=norms.normalise):
508 """Breaks a Caesar cipher using frequency analysis
509
510 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
511 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
512 (4, 0.31863952890183...)
513 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
514 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
515 (19, 0.42152901235832...)
516 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
517 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
518 (13, 0.316029208075451...)
519 """
520 sanitised_message = sanitise(message)
521 best_shift = 0
522 best_fit = float("inf")
523 for shift in range(26):
524 plaintext = caesar_decipher(sanitised_message, shift)
525 counts = message_frequency_scaling(letter_frequencies(plaintext))
526 fit = metric(target_counts, counts)
527 logger.debug('Caesar break attempt using key {0} gives fit of {1} '
528 'and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
529 if fit < best_fit:
530 best_fit = fit
531 best_shift = shift
532 logger.info('Caesar break best fit: key {0} gives fit of {1} and '
533 'decrypt starting: {2}'.format(best_shift, best_fit,
534 caesar_decipher(sanitised_message, best_shift)[:50]))
535 return best_shift, best_fit
536
537 def affine_break(message,
538 metric=norms.euclidean_distance,
539 target_counts=normalised_english_counts,
540 message_frequency_scaling=norms.normalise):
541 """Breaks an affine cipher using frequency analysis
542
543 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
544 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
545 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
546 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
547 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
548 'clm ckuxj.') # doctest: +ELLIPSIS
549 ((15, 22, True), 0.23570361818655...)
550 """
551 sanitised_message = sanitise(message)
552 best_multiplier = 0
553 best_adder = 0
554 best_one_based = True
555 best_fit = float("inf")
556 for one_based in [True, False]:
557 for multiplier in range(1, 26, 2):
558 for adder in range(26):
559 plaintext = affine_decipher(sanitised_message,
560 multiplier, adder, one_based)
561 counts = message_frequency_scaling(letter_frequencies(plaintext))
562 fit = metric(target_counts, counts)
563 logger.debug('Affine break attempt using key {0}x+{1} ({2}) '
564 'gives fit of {3} and decrypt starting: {4}'.
565 format(multiplier, adder, one_based, fit,
566 plaintext[:50]))
567 if fit < best_fit:
568 best_fit = fit
569 best_multiplier = multiplier
570 best_adder = adder
571 best_one_based = one_based
572 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
573 'and decrypt starting: {4}'.format(
574 best_multiplier, best_adder, best_one_based, best_fit,
575 affine_decipher(sanitised_message, best_multiplier,
576 best_adder, best_one_based)[:50]))
577 return (best_multiplier, best_adder, best_one_based), best_fit
578
579 def keyword_break(message,
580 wordlist=keywords,
581 metric=norms.euclidean_distance,
582 target_counts=normalised_english_counts,
583 message_frequency_scaling=norms.normalise):
584 """Breaks a keyword substitution cipher using a dictionary and
585 frequency analysis
586
587 >>> keyword_break(keyword_encipher('this is a test message for the ' \
588 'keyword decipherment', 'elephant', 1), \
589 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
590 (('elephant', 1), 0.41643991598441...)
591 """
592 best_keyword = ''
593 best_wrap_alphabet = True
594 best_fit = float("inf")
595 for wrap_alphabet in range(3):
596 for keyword in wordlist:
597 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
598 counts = message_frequency_scaling(letter_frequencies(plaintext))
599 fit = metric(target_counts, counts)
600 logger.debug('Keyword break attempt using key {0} (wrap={1}) '
601 'gives fit of {2} and decrypt starting: {3}'.format(
602 keyword, wrap_alphabet, fit,
603 sanitise(plaintext)[:50]))
604 if fit < best_fit:
605 best_fit = fit
606 best_keyword = keyword
607 best_wrap_alphabet = wrap_alphabet
608 logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
609 '{2} and decrypt starting: {3}'.format(best_keyword,
610 best_wrap_alphabet, best_fit, sanitise(
611 keyword_decipher(message, best_keyword,
612 best_wrap_alphabet))[:50]))
613 return (best_keyword, best_wrap_alphabet), best_fit
614
615 def keyword_break_mp(message,
616 wordlist=keywords,
617 metric=norms.euclidean_distance,
618 target_counts=normalised_english_counts,
619 message_frequency_scaling=norms.normalise,
620 chunksize=500):
621 """Breaks a keyword substitution cipher using a dictionary and
622 frequency analysis
623
624 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
625 'keyword decipherment', 'elephant', 1), \
626 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
627 (('elephant', 1), 0.41643991598441...)
628 """
629 with Pool() as pool:
630 helper_args = [(message, word, wrap, metric, target_counts,
631 message_frequency_scaling)
632 for word in wordlist for wrap in range(3)]
633 # Gotcha: the helper function here needs to be defined at the top level
634 # (limitation of Pool.starmap)
635 breaks = pool.starmap(keyword_break_one, helper_args, chunksize)
636 return min(breaks, key=lambda k: k[1])
637
638 def keyword_break_one(message, keyword, wrap_alphabet, metric, target_counts,
639 message_frequency_scaling):
640 plaintext = keyword_decipher(message, keyword, wrap_alphabet)
641 counts = message_frequency_scaling(letter_frequencies(plaintext))
642 fit = metric(target_counts, counts)
643 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
644 '{2} and decrypt starting: {3}'.format(keyword,
645 wrap_alphabet, fit, sanitise(plaintext)[:50]))
646 return (keyword, wrap_alphabet), fit
647
648 def scytale_break(message,
649 metric=norms.euclidean_distance,
650 target_counts=normalised_english_bigram_counts,
651 message_frequency_scaling=norms.normalise):
652 """Breaks a Scytale cipher
653
654 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
655 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
656 'aek') # doctest: +ELLIPSIS
657 (6, 0.83453041115025...)
658 """
659 best_key = 0
660 best_fit = float("inf")
661 ngram_length = len(next(iter(target_counts.keys())))
662 for key in range(1, 20):
663 if len(message) % key == 0:
664 plaintext = scytale_decipher(message, key)
665 counts = message_frequency_scaling(frequencies(
666 ngrams(sanitise(plaintext), ngram_length)))
667 fit = metric(target_counts, counts)
668 logger.debug('Scytale break attempt using key {0} gives fit of '
669 '{1} and decrypt starting: {2}'.format(key,
670 fit, sanitise(plaintext)[:50]))
671 if fit < best_fit:
672 best_fit = fit
673 best_key = key
674 logger.info('Scytale break best fit with key {0} gives fit of {1} and '
675 'decrypt starting: {2}'.format(best_key, best_fit,
676 sanitise(scytale_decipher(message, best_key))[:50]))
677 return best_key, best_fit
678
679 def column_transposition_break(message,
680 wordlist=keywords,
681 metric=norms.euclidean_distance,
682 target_counts=normalised_english_bigram_counts,
683 message_frequency_scaling=norms.normalise):
684 """Breaks a column transposition cipher using a dictionary and
685 n-gram frequency analysis
686
687 >>> column_transposition_break(column_transposition_encipher(sanitise( \
688 "Turing's homosexuality resulted in a criminal prosecution in 1952, \
689 when homosexual acts were still illegal in the United Kingdom. "), \
690 'encipher'), \
691 wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
692 ('encipher', 0.898128626285...)
693 >>> column_transposition_break(column_transposition_encipher(sanitise( \
694 "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
695 "when homosexual acts were still illegal in the United Kingdom."), \
696 'encipher'), \
697 wordlist=['encipher', 'keyword', 'fourteen'], \
698 target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
699 ('encipher', 1.1958792913127...)
700 """
701 best_keyword = ''
702 best_fit = float("inf")
703 ngram_length = len(next(iter(target_counts.keys())))
704 for keyword in wordlist:
705 if len(message) % len(deduplicate(keyword)) == 0:
706 plaintext = column_transposition_decipher(message, keyword)
707 counts = message_frequency_scaling(frequencies(
708 ngrams(sanitise(plaintext), ngram_length)))
709 fit = metric(target_counts, counts)
710 logger.debug('Column transposition break attempt using key {0} '
711 'gives fit of {1} and decrypt starting: {2}'.format(
712 keyword, fit,
713 sanitise(plaintext)[:50]))
714 if fit < best_fit:
715 best_fit = fit
716 best_keyword = keyword
717 logger.info('Column transposition break best fit with key {0} gives fit '
718 'of {1} and decrypt starting: {2}'.format(best_keyword,
719 best_fit, sanitise(
720 column_transposition_decipher(message,
721 best_keyword))[:50]))
722 return best_keyword, best_fit
723
724
725 def column_transposition_break_mp(message,
726 wordlist=keywords,
727 metric=norms.euclidean_distance,
728 target_counts=normalised_english_bigram_counts,
729 message_frequency_scaling=norms.normalise,
730 chunksize=500):
731 """Breaks a column transposition cipher using a dictionary and
732 n-gram frequency analysis
733
734 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
735 "Turing's homosexuality resulted in a criminal prosecution in 1952, \
736 when homosexual acts were still illegal in the United Kingdom. "), \
737 'encipher'), \
738 wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
739 ('encipher', 0.898128626285...)
740 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
741 "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
742 "when homosexual acts were still illegal in the United Kingdom."), \
743 'encipher'), \
744 wordlist=['encipher', 'keyword', 'fourteen'], \
745 target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
746 ('encipher', 1.1958792913127...)
747 """
748 ngram_length = len(next(iter(target_counts.keys())))
749 with Pool() as pool:
750 helper_args = [(message, word, metric, target_counts, ngram_length,
751 message_frequency_scaling)
752 for word in wordlist]
753 # Gotcha: the helper function here needs to be defined at the top level
754 # (limitation of Pool.starmap)
755 breaks = pool.starmap(column_transposition_break_worker, helper_args, chunksize)
756 return min(breaks, key=lambda k: k[1])
757
758 def column_transposition_break_worker(message, keyword, metric, target_counts,
759 ngram_length, message_frequency_scaling):
760 plaintext = column_transposition_decipher(message, keyword)
761 counts = message_frequency_scaling(frequencies(
762 ngrams(sanitise(plaintext), ngram_length)))
763 fit = metric(target_counts, counts)
764 logger.debug('Column transposition break attempt using key {0} '
765 'gives fit of {1} and decrypt starting: {2}'.format(
766 keyword, fit,
767 sanitise(plaintext)[:50]))
768 return keyword, fit
769
770
771
772 if __name__ == "__main__":
773 import doctest
774 doctest.testmod()