6 from itertools
import zip_longest
, repeat
7 from segment
import segment
8 from multiprocessing
import Pool
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
17 logger
= logging
.getLogger(__name__
)
18 logger
.addHandler(logging
.FileHandler('cipher.log'))
19 logger
.setLevel(logging
.WARNING
)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
23 english_counts
= collections
.defaultdict(int)
24 with
open('count_1l.txt', 'r') as f
:
26 (letter
, count
) = line
.split("\t")
27 english_counts
[letter
] = int(count
)
28 normalised_english_counts
= norms
.normalise(english_counts
)
30 english_bigram_counts
= collections
.defaultdict(int)
31 with
open('count_2l.txt', 'r') as f
:
33 (bigram
, count
) = line
.split("\t")
34 english_bigram_counts
[bigram
] = int(count
)
35 normalised_english_bigram_counts
= norms
.normalise(english_bigram_counts
)
37 with
open('words.txt', 'r') as f
:
38 keywords
= [line
.rstrip() for line
in f
]
40 modular_division_table
= [[0]*26 for x
in range(26)]
44 modular_division_table
[b
][c
] = a
48 """Remove all non-alphabetic characters and convert the text to lowercase
50 >>> sanitise('The Quick')
52 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
53 'thequickbrownfoxjumpedoverthelazydog'
55 sanitised
= [c
.lower() for c
in text
if c
in string
.ascii_letters
]
56 return ''.join(sanitised
)
59 """Returns all n-grams of a text
61 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
62 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
64 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
65 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
66 'rown', 'ownf', 'wnfo', 'nfox']
68 return [text
[i
:i
+n
] for i
in range(len(text
)-n
+1)]
70 def every_nth(text
, n
, fillvalue
=''):
71 """Returns n strings, each of which consists of every nth character,
72 starting with the 0th, 1st, 2nd, ... (n-1)th character
74 >>> every_nth(string.ascii_lowercase, 5)
75 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
76 >>> every_nth(string.ascii_lowercase, 1)
77 ['abcdefghijklmnopqrstuvwxyz']
78 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
79 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
80 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
81 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
82 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
84 split_text
= [text
[i
:i
+n
] for i
in range(0, len(text
), n
)]
85 return [''.join(l
) for l
in zip_longest(*split_text
, fillvalue
=fillvalue
)]
87 def combine_every_nth(split_text
):
88 """Reforms a text split into every_nth strings
90 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
91 'abcdefghijklmnopqrstuvwxyz'
92 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
93 'abcdefghijklmnopqrstuvwxyz'
94 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
95 'abcdefghijklmnopqrstuvwxyz'
97 return ''.join([''.join(l
)
98 for l
in zip_longest(*split_text
, fillvalue
='')])
100 def transpose(items
, transposition
):
101 """Moves items around according to the given transposition
103 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
105 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
107 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
108 [13, 12, 14, 11, 15, 10]
110 transposed
= list(repeat('', len(transposition
)))
111 for p
, t
in enumerate(transposition
):
112 transposed
[p
] = items
[t
]
115 def untranspose(items
, transposition
):
116 """Undoes a transpose
118 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
120 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
122 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
123 [10, 11, 12, 13, 14, 15]
125 transposed
= list(repeat('', len(transposition
)))
126 for p
, t
in enumerate(transposition
):
127 transposed
[t
] = items
[p
]
131 def frequencies(text
):
132 """Count the number of occurrences of each character in text
134 >>> sorted(frequencies('abcdefabc').items())
135 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
136 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
137 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
138 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
139 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
140 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
141 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
142 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
143 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
144 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
145 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
146 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
147 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
148 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
149 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
150 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
151 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
152 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
153 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
154 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
156 counts
= collections
.defaultdict(int)
160 letter_frequencies
= frequencies
162 def deduplicate(text
):
163 return list(collections
.OrderedDict
.fromkeys(text
))
167 def caesar_encipher_letter(letter
, shift
):
168 """Encipher a letter, given a shift amount
170 >>> caesar_encipher_letter('a', 1)
172 >>> caesar_encipher_letter('a', 2)
174 >>> caesar_encipher_letter('b', 2)
176 >>> caesar_encipher_letter('x', 2)
178 >>> caesar_encipher_letter('y', 2)
180 >>> caesar_encipher_letter('z', 2)
182 >>> caesar_encipher_letter('z', -1)
184 >>> caesar_encipher_letter('a', -1)
187 if letter
in string
.ascii_letters
:
188 if letter
in string
.ascii_uppercase
:
189 alphabet_start
= ord('A')
191 alphabet_start
= ord('a')
192 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) +
197 def caesar_decipher_letter(letter
, shift
):
198 """Decipher a letter, given a shift amount
200 >>> caesar_decipher_letter('b', 1)
202 >>> caesar_decipher_letter('b', 2)
205 return caesar_encipher_letter(letter
, -shift
)
207 def caesar_encipher(message
, shift
):
208 """Encipher a message with the Caesar cipher of given shift
210 >>> caesar_encipher('abc', 1)
212 >>> caesar_encipher('abc', 2)
214 >>> caesar_encipher('abcxyz', 2)
216 >>> caesar_encipher('ab cx yz', 2)
219 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
220 return ''.join(enciphered
)
222 def caesar_decipher(message
, shift
):
223 """Encipher a message with the Caesar cipher of given shift
225 >>> caesar_decipher('bcd', 1)
227 >>> caesar_decipher('cde', 2)
229 >>> caesar_decipher('cd ez ab', 2)
232 return caesar_encipher(message
, -shift
)
234 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
235 """Encipher a letter, given a multiplier and adder
237 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
238 for l in string.ascii_uppercase])
239 'HKNQTWZCFILORUXADGJMPSVYBE'
240 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
241 for l in string.ascii_uppercase])
242 'FILORUXADGJMPSVYBEHKNQTWZC'
244 if letter
in string
.ascii_letters
:
245 if letter
in string
.ascii_uppercase
:
246 alphabet_start
= ord('A')
248 alphabet_start
= ord('a')
249 letter_number
= ord(letter
) - alphabet_start
250 if one_based
: letter_number
+= 1
251 cipher_number
= (letter_number
* multiplier
+ adder
) % 26
252 if one_based
: cipher_number
-= 1
253 return chr(cipher_number
% 26 + alphabet_start
)
257 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
258 """Encipher a letter, given a multiplier and adder
260 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
261 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
262 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
263 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
264 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
265 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
267 if letter
in string
.ascii_letters
:
268 if letter
in string
.ascii_uppercase
:
269 alphabet_start
= ord('A')
271 alphabet_start
= ord('a')
272 cipher_number
= ord(letter
) - alphabet_start
273 if one_based
: cipher_number
+= 1
274 plaintext_number
= ( modular_division_table
[multiplier
]
275 [(cipher_number
- adder
) % 26] )
276 if one_based
: plaintext_number
-= 1
277 return chr(plaintext_number
% 26 + alphabet_start
)
281 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
282 """Encipher a message
284 >>> affine_encipher('hours passed during which jerico tried every ' \
285 'trick he could think of', 15, 22, True)
286 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
288 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
)
290 return ''.join(enciphered
)
292 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
293 """Decipher a message
295 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
296 'jfaoe ls omytd jlaxe mh', 15, 22, True)
297 'hours passed during which jerico tried every trick he could think of'
299 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
)
301 return ''.join(enciphered
)
304 def keyword_cipher_alphabet_of(keyword
, wrap_alphabet
=0):
305 """Find the cipher alphabet given a keyword.
306 wrap_alphabet controls how the rest of the alphabet is added
309 1 : from the last letter in the sanitised keyword
310 2 : from the largest letter in the sanitised keyword
312 >>> keyword_cipher_alphabet_of('bayes')
313 'bayescdfghijklmnopqrtuvwxz'
314 >>> keyword_cipher_alphabet_of('bayes', 0)
315 'bayescdfghijklmnopqrtuvwxz'
316 >>> keyword_cipher_alphabet_of('bayes', 1)
317 'bayestuvwxzcdfghijklmnopqr'
318 >>> keyword_cipher_alphabet_of('bayes', 2)
319 'bayeszcdfghijklmnopqrtuvwx'
321 if wrap_alphabet
== 0:
322 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) +
323 string
.ascii_lowercase
))
325 if wrap_alphabet
== 1:
326 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
328 last_keyword_letter
= sorted(sanitise(keyword
))[-1]
329 last_keyword_position
= string
.ascii_lowercase
.find(
330 last_keyword_letter
) + 1
331 cipher_alphabet
= ''.join(
332 deduplicate(sanitise(keyword
) +
333 string
.ascii_lowercase
[last_keyword_position
:] +
334 string
.ascii_lowercase
))
335 return cipher_alphabet
338 def keyword_encipher(message
, keyword
, wrap_alphabet
=0):
339 """Enciphers a message with a keyword substitution cipher.
340 wrap_alphabet controls how the rest of the alphabet is added
343 1 : from the last letter in the sanitised keyword
344 2 : from the largest letter in the sanitised keyword
346 >>> keyword_encipher('test message', 'bayes')
348 >>> keyword_encipher('test message', 'bayes', 0)
350 >>> keyword_encipher('test message', 'bayes', 1)
352 >>> keyword_encipher('test message', 'bayes', 2)
355 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
356 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
357 return message
.lower().translate(cipher_translation
)
359 def keyword_decipher(message
, keyword
, wrap_alphabet
=0):
360 """Deciphers a message with a keyword substitution cipher.
361 wrap_alphabet controls how the rest of the alphabet is added
364 1 : from the last letter in the sanitised keyword
365 2 : from the largest letter in the sanitised keyword
367 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
369 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
371 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
373 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
376 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
377 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
378 return message
.lower().translate(cipher_translation
)
380 def scytale_encipher(message
, rows
):
381 """Enciphers using the scytale transposition cipher.
382 Message is padded with spaces to allow all rows to be the same length.
384 >>> scytale_encipher('thequickbrownfox', 3)
386 >>> scytale_encipher('thequickbrownfox', 4)
388 >>> scytale_encipher('thequickbrownfox', 5)
389 'tubn hirf ecoo qkwx '
390 >>> scytale_encipher('thequickbrownfox', 6)
392 >>> scytale_encipher('thequickbrownfox', 7)
393 'tqcrnx hukof eibwo '
395 if len(message
) % rows
!= 0:
396 message
+= ' '*(rows
- len(message
) % rows
)
397 row_length
= round(len(message
) / rows
)
398 slices
= [message
[i
:i
+row_length
]
399 for i
in range(0, len(message
), row_length
)]
400 return ''.join([''.join(r
) for r
in zip_longest(*slices
, fillvalue
='')])
402 def scytale_decipher(message
, rows
):
403 """Deciphers using the scytale transposition cipher.
404 Assumes the message is padded so that all rows are the same length.
406 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
408 >>> scytale_decipher('tubnhirfecooqkwx', 4)
410 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
412 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
414 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
417 cols
= round(len(message
) / rows
)
418 columns
= [message
[i
:i
+rows
] for i
in range(0, cols
* rows
, rows
)]
419 return ''.join([''.join(c
) for c
in zip_longest(*columns
, fillvalue
='')])
422 def transpositions_of(keyword
):
424 >>> transpositions_of('clever')
428 key
= deduplicate(keyword
)
429 for l
in sorted(key
):
430 transpositions
+= [key
.index(l
)]
431 return transpositions
433 def column_transposition_encipher(message
, keyword
):
435 >>> column_transposition_encipher('hellothere', 'clever')
438 transpositions
= transpositions_of(keyword
)
439 columns
= every_nth(message
, len(transpositions
), fillvalue
=' ')
440 transposed_columns
= transpose(columns
, transpositions
)
441 return combine_every_nth(transposed_columns
)
443 def column_transposition_decipher(message
, keyword
):
445 >>> column_transposition_decipher('hleolteher', 'clever')
448 transpositions
= transpositions_of(keyword
)
449 columns
= every_nth(message
, len(transpositions
), fillvalue
=' ')
450 transposed_columns
= untranspose(columns
, transpositions
)
451 return combine_every_nth(transposed_columns
)
455 def caesar_break(message
,
456 metric
=norms
.euclidean_distance
,
457 target_counts
=normalised_english_counts
,
458 message_frequency_scaling
=norms
.normalise
):
459 """Breaks a Caesar cipher using frequency analysis
461 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
462 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
463 (4, 0.31863952890183...)
464 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
465 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
466 (19, 0.42152901235832...)
467 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
468 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
469 (13, 0.316029208075451...)
471 sanitised_message
= sanitise(message
)
473 best_fit
= float("inf")
474 for shift
in range(26):
475 plaintext
= caesar_decipher(sanitised_message
, shift
)
476 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
477 fit
= metric(target_counts
, counts
)
478 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
479 'and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
483 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
484 'decrypt starting: {2}'.format(best_shift
, best_fit
,
485 caesar_decipher(sanitised_message
, best_shift
)[:50]))
486 return best_shift
, best_fit
488 def affine_break(message
,
489 metric
=norms
.euclidean_distance
,
490 target_counts
=normalised_english_counts
,
491 message_frequency_scaling
=norms
.normalise
):
492 """Breaks an affine cipher using frequency analysis
494 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
495 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
496 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
497 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
498 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
499 'clm ckuxj.') # doctest: +ELLIPSIS
500 ((15, 22, True), 0.23570361818655...)
502 sanitised_message
= sanitise(message
)
505 best_one_based
= True
506 best_fit
= float("inf")
507 for one_based
in [True, False]:
508 for multiplier
in range(1, 26, 2):
509 for adder
in range(26):
510 plaintext
= affine_decipher(sanitised_message
,
511 multiplier
, adder
, one_based
)
512 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
513 fit
= metric(target_counts
, counts
)
514 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
515 'gives fit of {3} and decrypt starting: {4}'.
516 format(multiplier
, adder
, one_based
, fit
,
520 best_multiplier
= multiplier
522 best_one_based
= one_based
523 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
524 'and decrypt starting: {4}'.format(
525 best_multiplier
, best_adder
, best_one_based
, best_fit
,
526 affine_decipher(sanitised_message
, best_multiplier
,
527 best_adder
, best_one_based
)[:50]))
528 return (best_multiplier
, best_adder
, best_one_based
), best_fit
530 def keyword_break(message
,
532 metric
=norms
.euclidean_distance
,
533 target_counts
=normalised_english_counts
,
534 message_frequency_scaling
=norms
.normalise
):
535 """Breaks a keyword substitution cipher using a dictionary and
538 >>> keyword_break(keyword_encipher('this is a test message for the ' \
539 'keyword decipherment', 'elephant', 1), \
540 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
541 (('elephant', 1), 0.41643991598441...)
544 best_wrap_alphabet
= True
545 best_fit
= float("inf")
546 for wrap_alphabet
in range(3):
547 for keyword
in wordlist
:
548 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
549 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
550 fit
= metric(target_counts
, counts
)
551 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
552 'gives fit of {2} and decrypt starting: {3}'.format(
553 keyword
, wrap_alphabet
, fit
,
554 sanitise(plaintext
)[:50]))
557 best_keyword
= keyword
558 best_wrap_alphabet
= wrap_alphabet
559 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
560 '{2} and decrypt starting: {3}'.format(best_keyword
,
561 best_wrap_alphabet
, best_fit
, sanitise(
562 keyword_decipher(message
, best_keyword
,
563 best_wrap_alphabet
))[:50]))
564 return (best_keyword
, best_wrap_alphabet
), best_fit
566 def keyword_break_mp(message
,
568 metric
=norms
.euclidean_distance
,
569 target_counts
=normalised_english_counts
,
570 message_frequency_scaling
=norms
.normalise
,
572 """Breaks a keyword substitution cipher using a dictionary and
575 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
576 'keyword decipherment', 'elephant', 1), \
577 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
578 (('elephant', 1), 0.41643991598441...)
581 helper_args
= [(message
, word
, wrap
, metric
, target_counts
,
582 message_frequency_scaling
)
583 for word
in wordlist
for wrap
in range(3)]
584 # Gotcha: the helper function here needs to be defined at the top level
585 # (limitation of Pool.starmap)
586 breaks
= pool
.starmap(keyword_break_one
, helper_args
, chunksize
)
587 return min(breaks
, key
=lambda k
: k
[1])
589 def keyword_break_one(message
, keyword
, wrap_alphabet
, metric
, target_counts
,
590 message_frequency_scaling
):
591 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
592 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
593 fit
= metric(target_counts
, counts
)
594 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
595 '{2} and decrypt starting: {3}'.format(keyword
,
596 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
597 return (keyword
, wrap_alphabet
), fit
599 def scytale_break(message
,
600 metric
=norms
.euclidean_distance
,
601 target_counts
=normalised_english_bigram_counts
,
602 message_frequency_scaling
=norms
.normalise
):
603 """Breaks a Scytale cipher
605 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
606 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
607 'aek') # doctest: +ELLIPSIS
608 (6, 0.83453041115025...)
611 best_fit
= float("inf")
612 for key
in range(1, 20):
613 if len(message
) % key
== 0:
614 plaintext
= scytale_decipher(message
, key
)
615 counts
= message_frequency_scaling(frequencies(
616 ngrams(sanitise(plaintext
), 2)))
617 fit
= metric(target_counts
, counts
)
618 logger
.debug('Scytale break attempt using key {0} gives fit of '
619 '{1} and decrypt starting: {2}'.format(key
,
620 fit
, sanitise(plaintext
)[:50]))
624 logger
.info('Scytale break best fit with key {0} gives fit of {1} and '
625 'decrypt starting: {2}'.format(best_key
, best_fit
,
626 sanitise(scytale_decipher(message
, best_key
))[:50]))
627 return best_key
, best_fit
630 if __name__
== "__main__":