6 from itertools
import zip_longest
, repeat
7 from segment
import segment
8 from multiprocessing
import Pool
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
17 logger
= logging
.getLogger(__name__
)
18 logger
.addHandler(logging
.FileHandler('cipher.log'))
19 logger
.setLevel(logging
.WARNING
)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
23 english_counts
= collections
.defaultdict(int)
24 with
open('count_1l.txt', 'r') as f
:
26 (letter
, count
) = line
.split("\t")
27 english_counts
[letter
] = int(count
)
28 normalised_english_counts
= norms
.normalise(english_counts
)
30 english_bigram_counts
= collections
.defaultdict(int)
31 with
open('count_2l.txt', 'r') as f
:
33 (bigram
, count
) = line
.split("\t")
34 english_bigram_counts
[bigram
] = int(count
)
35 normalised_english_bigram_counts
= norms
.normalise(english_bigram_counts
)
37 english_trigram_counts
= collections
.defaultdict(int)
38 with
open('count_3l.txt', 'r') as f
:
40 (trigram
, count
) = line
.split("\t")
41 english_trigram_counts
[trigram
] = int(count
)
42 normalised_english_trigram_counts
= norms
.normalise(english_trigram_counts
)
45 with
open('words.txt', 'r') as f
:
46 keywords
= [line
.rstrip() for line
in f
]
48 modular_division_table
= [[0]*26 for x
in range(26)]
52 modular_division_table
[b
][c
] = a
56 """Remove all non-alphabetic characters and convert the text to lowercase
58 >>> sanitise('The Quick')
60 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
61 'thequickbrownfoxjumpedoverthelazydog'
63 sanitised
= [c
.lower() for c
in text
if c
in string
.ascii_letters
]
64 return ''.join(sanitised
)
67 """Returns all n-grams of a text
69 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
70 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
72 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
73 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
74 'rown', 'ownf', 'wnfo', 'nfox']
76 return [text
[i
:i
+n
] for i
in range(len(text
)-n
+1)]
78 def every_nth(text
, n
, fillvalue
=''):
79 """Returns n strings, each of which consists of every nth character,
80 starting with the 0th, 1st, 2nd, ... (n-1)th character
82 >>> every_nth(string.ascii_lowercase, 5)
83 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
84 >>> every_nth(string.ascii_lowercase, 1)
85 ['abcdefghijklmnopqrstuvwxyz']
86 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
87 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
88 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
89 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
90 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
92 split_text
= [text
[i
:i
+n
] for i
in range(0, len(text
), n
)]
93 return [''.join(l
) for l
in zip_longest(*split_text
, fillvalue
=fillvalue
)]
95 def combine_every_nth(split_text
):
96 """Reforms a text split into every_nth strings
98 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
99 'abcdefghijklmnopqrstuvwxyz'
100 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
101 'abcdefghijklmnopqrstuvwxyz'
102 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
103 'abcdefghijklmnopqrstuvwxyz'
105 return ''.join([''.join(l
)
106 for l
in zip_longest(*split_text
, fillvalue
='')])
108 def transpose(items
, transposition
):
109 """Moves items around according to the given transposition
111 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
113 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
115 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
116 [13, 12, 14, 11, 15, 10]
118 transposed
= list(repeat('', len(transposition
)))
119 for p
, t
in enumerate(transposition
):
120 transposed
[p
] = items
[t
]
123 def untranspose(items
, transposition
):
124 """Undoes a transpose
126 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
128 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
130 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
131 [10, 11, 12, 13, 14, 15]
133 transposed
= list(repeat('', len(transposition
)))
134 for p
, t
in enumerate(transposition
):
135 transposed
[t
] = items
[p
]
139 def frequencies(text
):
140 """Count the number of occurrences of each character in text
142 >>> sorted(frequencies('abcdefabc').items())
143 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
144 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
145 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
146 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
147 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
148 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
149 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
150 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
151 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
152 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
153 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
154 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
155 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
156 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
157 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
158 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
159 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
160 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
161 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
162 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
164 counts
= collections
.defaultdict(int)
168 letter_frequencies
= frequencies
170 def deduplicate(text
):
171 return list(collections
.OrderedDict
.fromkeys(text
))
175 def caesar_encipher_letter(letter
, shift
):
176 """Encipher a letter, given a shift amount
178 >>> caesar_encipher_letter('a', 1)
180 >>> caesar_encipher_letter('a', 2)
182 >>> caesar_encipher_letter('b', 2)
184 >>> caesar_encipher_letter('x', 2)
186 >>> caesar_encipher_letter('y', 2)
188 >>> caesar_encipher_letter('z', 2)
190 >>> caesar_encipher_letter('z', -1)
192 >>> caesar_encipher_letter('a', -1)
195 if letter
in string
.ascii_letters
:
196 if letter
in string
.ascii_uppercase
:
197 alphabet_start
= ord('A')
199 alphabet_start
= ord('a')
200 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) +
205 def caesar_decipher_letter(letter
, shift
):
206 """Decipher a letter, given a shift amount
208 >>> caesar_decipher_letter('b', 1)
210 >>> caesar_decipher_letter('b', 2)
213 return caesar_encipher_letter(letter
, -shift
)
215 def caesar_encipher(message
, shift
):
216 """Encipher a message with the Caesar cipher of given shift
218 >>> caesar_encipher('abc', 1)
220 >>> caesar_encipher('abc', 2)
222 >>> caesar_encipher('abcxyz', 2)
224 >>> caesar_encipher('ab cx yz', 2)
227 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
228 return ''.join(enciphered
)
230 def caesar_decipher(message
, shift
):
231 """Encipher a message with the Caesar cipher of given shift
233 >>> caesar_decipher('bcd', 1)
235 >>> caesar_decipher('cde', 2)
237 >>> caesar_decipher('cd ez ab', 2)
240 return caesar_encipher(message
, -shift
)
242 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
243 """Encipher a letter, given a multiplier and adder
245 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
246 for l in string.ascii_uppercase])
247 'HKNQTWZCFILORUXADGJMPSVYBE'
248 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
249 for l in string.ascii_uppercase])
250 'FILORUXADGJMPSVYBEHKNQTWZC'
252 if letter
in string
.ascii_letters
:
253 if letter
in string
.ascii_uppercase
:
254 alphabet_start
= ord('A')
256 alphabet_start
= ord('a')
257 letter_number
= ord(letter
) - alphabet_start
258 if one_based
: letter_number
+= 1
259 cipher_number
= (letter_number
* multiplier
+ adder
) % 26
260 if one_based
: cipher_number
-= 1
261 return chr(cipher_number
% 26 + alphabet_start
)
265 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
266 """Encipher a letter, given a multiplier and adder
268 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
269 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
270 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
271 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
272 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
273 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
275 if letter
in string
.ascii_letters
:
276 if letter
in string
.ascii_uppercase
:
277 alphabet_start
= ord('A')
279 alphabet_start
= ord('a')
280 cipher_number
= ord(letter
) - alphabet_start
281 if one_based
: cipher_number
+= 1
282 plaintext_number
= ( modular_division_table
[multiplier
]
283 [(cipher_number
- adder
) % 26] )
284 if one_based
: plaintext_number
-= 1
285 return chr(plaintext_number
% 26 + alphabet_start
)
289 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
290 """Encipher a message
292 >>> affine_encipher('hours passed during which jerico tried every ' \
293 'trick he could think of', 15, 22, True)
294 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
296 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
)
298 return ''.join(enciphered
)
300 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
301 """Decipher a message
303 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
304 'jfaoe ls omytd jlaxe mh', 15, 22, True)
305 'hours passed during which jerico tried every trick he could think of'
307 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
)
309 return ''.join(enciphered
)
312 def keyword_cipher_alphabet_of(keyword
, wrap_alphabet
=0):
313 """Find the cipher alphabet given a keyword.
314 wrap_alphabet controls how the rest of the alphabet is added
317 1 : from the last letter in the sanitised keyword
318 2 : from the largest letter in the sanitised keyword
320 >>> keyword_cipher_alphabet_of('bayes')
321 'bayescdfghijklmnopqrtuvwxz'
322 >>> keyword_cipher_alphabet_of('bayes', 0)
323 'bayescdfghijklmnopqrtuvwxz'
324 >>> keyword_cipher_alphabet_of('bayes', 1)
325 'bayestuvwxzcdfghijklmnopqr'
326 >>> keyword_cipher_alphabet_of('bayes', 2)
327 'bayeszcdfghijklmnopqrtuvwx'
329 if wrap_alphabet
== 0:
330 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) +
331 string
.ascii_lowercase
))
333 if wrap_alphabet
== 1:
334 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
336 last_keyword_letter
= sorted(sanitise(keyword
))[-1]
337 last_keyword_position
= string
.ascii_lowercase
.find(
338 last_keyword_letter
) + 1
339 cipher_alphabet
= ''.join(
340 deduplicate(sanitise(keyword
) +
341 string
.ascii_lowercase
[last_keyword_position
:] +
342 string
.ascii_lowercase
))
343 return cipher_alphabet
346 def keyword_encipher(message
, keyword
, wrap_alphabet
=0):
347 """Enciphers a message with a keyword substitution cipher.
348 wrap_alphabet controls how the rest of the alphabet is added
351 1 : from the last letter in the sanitised keyword
352 2 : from the largest letter in the sanitised keyword
354 >>> keyword_encipher('test message', 'bayes')
356 >>> keyword_encipher('test message', 'bayes', 0)
358 >>> keyword_encipher('test message', 'bayes', 1)
360 >>> keyword_encipher('test message', 'bayes', 2)
363 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
364 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
365 return message
.lower().translate(cipher_translation
)
367 def keyword_decipher(message
, keyword
, wrap_alphabet
=0):
368 """Deciphers a message with a keyword substitution cipher.
369 wrap_alphabet controls how the rest of the alphabet is added
372 1 : from the last letter in the sanitised keyword
373 2 : from the largest letter in the sanitised keyword
375 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
377 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
379 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
381 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
384 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
385 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
386 return message
.lower().translate(cipher_translation
)
388 def scytale_encipher(message
, rows
):
389 """Enciphers using the scytale transposition cipher.
390 Message is padded with spaces to allow all rows to be the same length.
392 >>> scytale_encipher('thequickbrownfox', 3)
394 >>> scytale_encipher('thequickbrownfox', 4)
396 >>> scytale_encipher('thequickbrownfox', 5)
397 'tubn hirf ecoo qkwx '
398 >>> scytale_encipher('thequickbrownfox', 6)
400 >>> scytale_encipher('thequickbrownfox', 7)
401 'tqcrnx hukof eibwo '
403 if len(message
) % rows
!= 0:
404 message
+= ' '*(rows
- len(message
) % rows
)
405 row_length
= round(len(message
) / rows
)
406 slices
= [message
[i
:i
+row_length
]
407 for i
in range(0, len(message
), row_length
)]
408 return ''.join([''.join(r
) for r
in zip_longest(*slices
, fillvalue
='')])
410 def scytale_decipher(message
, rows
):
411 """Deciphers using the scytale transposition cipher.
412 Assumes the message is padded so that all rows are the same length.
414 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
416 >>> scytale_decipher('tubnhirfecooqkwx', 4)
418 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
420 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
422 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
425 cols
= round(len(message
) / rows
)
426 columns
= [message
[i
:i
+rows
] for i
in range(0, cols
* rows
, rows
)]
427 return ''.join([''.join(c
) for c
in zip_longest(*columns
, fillvalue
='')])
430 def transpositions_of(keyword
):
431 """Finds the transpostions given by a keyword. For instance, the keyword
432 'clever' rearranges to 'celrv', so the first column (0) stays first, the
433 second column (1) moves to third, the third column (2) moves to second,
436 >>> transpositions_of('clever')
439 key
= deduplicate(keyword
)
440 transpositions
= [key
.index(l
) for l
in sorted(key
)]
441 return transpositions
443 def column_transposition_encipher(message
, keyword
, fillvalue
=' '):
444 """Enciphers using the column transposition cipher.
445 Message is padded to allow all rows to be the same length.
447 >>> column_transposition_encipher('hellothere', 'clever')
449 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
452 return column_transposition_worker(message
, keyword
, encipher
=True,
455 def column_transposition_decipher(message
, keyword
, fillvalue
=' '):
456 """Deciphers using the column transposition cipher.
457 Message is padded to allow all rows to be the same length.
459 >>> column_transposition_decipher('hleolteher', 'clever')
461 >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
464 return column_transposition_worker(message
, keyword
, encipher
=False,
467 def column_transposition_worker(message
, keyword
,
468 encipher
=True, fillvalue
=' '):
469 """Does the actual work of the column transposition cipher.
470 Message is padded with spaces to allow all rows to be the same length.
472 >>> column_transposition_worker('hellothere', 'clever')
474 >>> column_transposition_worker('hellothere', 'clever', encipher=True)
476 >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
479 transpositions
= transpositions_of(keyword
)
480 columns
= every_nth(message
, len(transpositions
), fillvalue
=fillvalue
)
482 transposed_columns
= transpose(columns
, transpositions
)
484 transposed_columns
= untranspose(columns
, transpositions
)
485 return combine_every_nth(transposed_columns
)
489 def caesar_break(message
,
490 metric
=norms
.euclidean_distance
,
491 target_counts
=normalised_english_counts
,
492 message_frequency_scaling
=norms
.normalise
):
493 """Breaks a Caesar cipher using frequency analysis
495 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
496 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
497 (4, 0.31863952890183...)
498 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
499 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
500 (19, 0.42152901235832...)
501 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
502 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
503 (13, 0.316029208075451...)
505 sanitised_message
= sanitise(message
)
507 best_fit
= float("inf")
508 for shift
in range(26):
509 plaintext
= caesar_decipher(sanitised_message
, shift
)
510 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
511 fit
= metric(target_counts
, counts
)
512 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
513 'and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
517 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
518 'decrypt starting: {2}'.format(best_shift
, best_fit
,
519 caesar_decipher(sanitised_message
, best_shift
)[:50]))
520 return best_shift
, best_fit
522 def affine_break(message
,
523 metric
=norms
.euclidean_distance
,
524 target_counts
=normalised_english_counts
,
525 message_frequency_scaling
=norms
.normalise
):
526 """Breaks an affine cipher using frequency analysis
528 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
529 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
530 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
531 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
532 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
533 'clm ckuxj.') # doctest: +ELLIPSIS
534 ((15, 22, True), 0.23570361818655...)
536 sanitised_message
= sanitise(message
)
539 best_one_based
= True
540 best_fit
= float("inf")
541 for one_based
in [True, False]:
542 for multiplier
in range(1, 26, 2):
543 for adder
in range(26):
544 plaintext
= affine_decipher(sanitised_message
,
545 multiplier
, adder
, one_based
)
546 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
547 fit
= metric(target_counts
, counts
)
548 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
549 'gives fit of {3} and decrypt starting: {4}'.
550 format(multiplier
, adder
, one_based
, fit
,
554 best_multiplier
= multiplier
556 best_one_based
= one_based
557 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
558 'and decrypt starting: {4}'.format(
559 best_multiplier
, best_adder
, best_one_based
, best_fit
,
560 affine_decipher(sanitised_message
, best_multiplier
,
561 best_adder
, best_one_based
)[:50]))
562 return (best_multiplier
, best_adder
, best_one_based
), best_fit
564 def keyword_break(message
,
566 metric
=norms
.euclidean_distance
,
567 target_counts
=normalised_english_counts
,
568 message_frequency_scaling
=norms
.normalise
):
569 """Breaks a keyword substitution cipher using a dictionary and
572 >>> keyword_break(keyword_encipher('this is a test message for the ' \
573 'keyword decipherment', 'elephant', 1), \
574 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
575 (('elephant', 1), 0.41643991598441...)
578 best_wrap_alphabet
= True
579 best_fit
= float("inf")
580 for wrap_alphabet
in range(3):
581 for keyword
in wordlist
:
582 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
583 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
584 fit
= metric(target_counts
, counts
)
585 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
586 'gives fit of {2} and decrypt starting: {3}'.format(
587 keyword
, wrap_alphabet
, fit
,
588 sanitise(plaintext
)[:50]))
591 best_keyword
= keyword
592 best_wrap_alphabet
= wrap_alphabet
593 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
594 '{2} and decrypt starting: {3}'.format(best_keyword
,
595 best_wrap_alphabet
, best_fit
, sanitise(
596 keyword_decipher(message
, best_keyword
,
597 best_wrap_alphabet
))[:50]))
598 return (best_keyword
, best_wrap_alphabet
), best_fit
600 def keyword_break_mp(message
,
602 metric
=norms
.euclidean_distance
,
603 target_counts
=normalised_english_counts
,
604 message_frequency_scaling
=norms
.normalise
,
606 """Breaks a keyword substitution cipher using a dictionary and
609 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
610 'keyword decipherment', 'elephant', 1), \
611 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
612 (('elephant', 1), 0.41643991598441...)
615 helper_args
= [(message
, word
, wrap
, metric
, target_counts
,
616 message_frequency_scaling
)
617 for word
in wordlist
for wrap
in range(3)]
618 # Gotcha: the helper function here needs to be defined at the top level
619 # (limitation of Pool.starmap)
620 breaks
= pool
.starmap(keyword_break_one
, helper_args
, chunksize
)
621 return min(breaks
, key
=lambda k
: k
[1])
623 def keyword_break_one(message
, keyword
, wrap_alphabet
, metric
, target_counts
,
624 message_frequency_scaling
):
625 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
626 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
627 fit
= metric(target_counts
, counts
)
628 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
629 '{2} and decrypt starting: {3}'.format(keyword
,
630 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
631 return (keyword
, wrap_alphabet
), fit
633 def scytale_break(message
,
634 metric
=norms
.euclidean_distance
,
635 target_counts
=normalised_english_bigram_counts
,
636 message_frequency_scaling
=norms
.normalise
):
637 """Breaks a Scytale cipher
639 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
640 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
641 'aek') # doctest: +ELLIPSIS
642 (6, 0.83453041115025...)
645 best_fit
= float("inf")
646 for key
in range(1, 20):
647 if len(message
) % key
== 0:
648 plaintext
= scytale_decipher(message
, key
)
649 counts
= message_frequency_scaling(frequencies(
650 ngrams(sanitise(plaintext
), 2)))
651 fit
= metric(target_counts
, counts
)
652 logger
.debug('Scytale break attempt using key {0} gives fit of '
653 '{1} and decrypt starting: {2}'.format(key
,
654 fit
, sanitise(plaintext
)[:50]))
658 logger
.info('Scytale break best fit with key {0} gives fit of {1} and '
659 'decrypt starting: {2}'.format(best_key
, best_fit
,
660 sanitise(scytale_decipher(message
, best_key
))[:50]))
661 return best_key
, best_fit
663 def column_transposition_break(message
,
665 metric
=norms
.euclidean_distance
,
666 #test_ngram_length=2,
667 target_counts
=normalised_english_bigram_counts
,
668 message_frequency_scaling
=norms
.normalise
):
669 """Breaks a column transposition cipher using a dictionary and
670 n-gram frequency analysis
672 >>> column_transposition_break(column_transposition_encipher(sanitise( \
673 "Turing's homosexuality resulted in a criminal prosecution in 1952, \
674 when homosexual acts were still illegal in the United Kingdom. "), \
676 wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
677 ('encipher', 0.898128626285...)
678 >>> column_transposition_break(column_transposition_encipher(sanitise( \
679 "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
680 "when homosexual acts were still illegal in the United Kingdom."), \
682 wordlist=['encipher', 'keyword', 'fourteen'], \
683 target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
684 ('encipher', 1.1958792913127...)
687 best_fit
= float("inf")
688 ngram_length
= len(next(iter(target_counts
.keys())))
689 for keyword
in wordlist
:
690 if len(message
) % len(deduplicate(keyword
)) == 0:
691 plaintext
= column_transposition_decipher(message
, keyword
)
692 counts
= message_frequency_scaling(frequencies(
693 ngrams(sanitise(plaintext
), ngram_length
)))
694 fit
= metric(target_counts
, counts
)
695 logger
.debug('Column transposition break attempt using key {0} '
696 'gives fit of {1} and decrypt starting: {2}'.format(
698 sanitise(plaintext
)[:50]))
701 best_keyword
= keyword
702 logger
.info('Column transposition break best fit with key {0} gives fit '
703 'of {1} and decrypt starting: {2}'.format(best_keyword
,
705 column_transposition_decipher(message
,
706 best_keyword
))[:50]))
707 return best_keyword
, best_fit
711 if __name__
== "__main__":