6 from itertools
import zip_longest
, repeat
7 from segment
import segment
8 from multiprocessing
import Pool
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
17 logger
= logging
.getLogger(__name__
)
18 logger
.addHandler(logging
.FileHandler('cipher.log'))
19 logger
.setLevel(logging
.WARNING
)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
23 english_counts
= collections
.defaultdict(int)
24 with
open('count_1l.txt', 'r') as f
:
26 (letter
, count
) = line
.split("\t")
27 english_counts
[letter
] = int(count
)
28 normalised_english_counts
= norms
.normalise(english_counts
)
30 english_bigram_counts
= collections
.defaultdict(int)
31 with
open('count_2l.txt', 'r') as f
:
33 (bigram
, count
) = line
.split("\t")
34 english_bigram_counts
[bigram
] = int(count
)
35 normalised_english_bigram_counts
= norms
.normalise(english_bigram_counts
)
37 with
open('words.txt', 'r') as f
:
38 keywords
= [line
.rstrip() for line
in f
]
40 modular_division_table
= [[0]*26 for x
in range(26)]
44 modular_division_table
[b
][c
] = a
48 """Remove all non-alphabetic characters and convert the text to lowercase
50 >>> sanitise('The Quick')
52 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
53 'thequickbrownfoxjumpedoverthelazydog'
55 sanitised
= [c
.lower() for c
in text
if c
in string
.ascii_letters
]
56 return ''.join(sanitised
)
59 """Returns all n-grams of a text
61 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
62 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
64 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
65 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
66 'rown', 'ownf', 'wnfo', 'nfox']
68 return [text
[i
:i
+n
] for i
in range(len(text
)-n
+1)]
70 def every_nth(text
, n
, fillvalue
=''):
71 """Returns n strings, each of which consists of every nth character,
72 starting with the 0th, 1st, 2nd, ... (n-1)th character
74 >>> every_nth(string.ascii_lowercase, 5)
75 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
76 >>> every_nth(string.ascii_lowercase, 1)
77 ['abcdefghijklmnopqrstuvwxyz']
78 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
79 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
80 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
81 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
82 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
84 split_text
= [text
[i
:i
+n
] for i
in range(0, len(text
), n
)]
85 return [''.join(l
) for l
in zip_longest(*split_text
, fillvalue
=fillvalue
)]
87 def combine_every_nth(split_text
):
88 """Reforms a text split into every_nth strings
90 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
91 'abcdefghijklmnopqrstuvwxyz'
92 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
93 'abcdefghijklmnopqrstuvwxyz'
94 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
95 'abcdefghijklmnopqrstuvwxyz'
97 return ''.join([''.join(l
)
98 for l
in zip_longest(*split_text
, fillvalue
='')])
100 def transpose(items
, transposition
):
101 """Moves items around according to the given transposition
103 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
105 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
107 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
108 [13, 12, 14, 11, 15, 10]
110 transposed
= list(repeat('', len(transposition
)))
111 for p
, t
in enumerate(transposition
):
112 transposed
[p
] = items
[t
]
115 def untranspose(items
, transposition
):
116 """Undoes a transpose
118 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
120 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
122 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
123 [10, 11, 12, 13, 14, 15]
125 transposed
= list(repeat('', len(transposition
)))
126 for p
, t
in enumerate(transposition
):
127 transposed
[t
] = items
[p
]
131 def frequencies(text
):
132 """Count the number of occurrences of each character in text
134 >>> sorted(frequencies('abcdefabc').items())
135 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
136 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
137 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
138 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
139 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
140 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
141 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
142 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
143 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
144 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
145 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
146 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
147 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
148 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
149 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
150 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
151 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
152 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
153 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
154 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
156 counts
= collections
.defaultdict(int)
160 letter_frequencies
= frequencies
162 def deduplicate(text
):
163 return list(collections
.OrderedDict
.fromkeys(text
))
167 def caesar_encipher_letter(letter
, shift
):
168 """Encipher a letter, given a shift amount
170 >>> caesar_encipher_letter('a', 1)
172 >>> caesar_encipher_letter('a', 2)
174 >>> caesar_encipher_letter('b', 2)
176 >>> caesar_encipher_letter('x', 2)
178 >>> caesar_encipher_letter('y', 2)
180 >>> caesar_encipher_letter('z', 2)
182 >>> caesar_encipher_letter('z', -1)
184 >>> caesar_encipher_letter('a', -1)
187 if letter
in string
.ascii_letters
:
188 if letter
in string
.ascii_uppercase
:
189 alphabet_start
= ord('A')
191 alphabet_start
= ord('a')
192 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) +
197 def caesar_decipher_letter(letter
, shift
):
198 """Decipher a letter, given a shift amount
200 >>> caesar_decipher_letter('b', 1)
202 >>> caesar_decipher_letter('b', 2)
205 return caesar_encipher_letter(letter
, -shift
)
207 def caesar_encipher(message
, shift
):
208 """Encipher a message with the Caesar cipher of given shift
210 >>> caesar_encipher('abc', 1)
212 >>> caesar_encipher('abc', 2)
214 >>> caesar_encipher('abcxyz', 2)
216 >>> caesar_encipher('ab cx yz', 2)
219 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
220 return ''.join(enciphered
)
222 def caesar_decipher(message
, shift
):
223 """Encipher a message with the Caesar cipher of given shift
225 >>> caesar_decipher('bcd', 1)
227 >>> caesar_decipher('cde', 2)
229 >>> caesar_decipher('cd ez ab', 2)
232 return caesar_encipher(message
, -shift
)
234 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
235 """Encipher a letter, given a multiplier and adder
237 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
238 for l in string.ascii_uppercase])
239 'HKNQTWZCFILORUXADGJMPSVYBE'
240 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
241 for l in string.ascii_uppercase])
242 'FILORUXADGJMPSVYBEHKNQTWZC'
244 if letter
in string
.ascii_letters
:
245 if letter
in string
.ascii_uppercase
:
246 alphabet_start
= ord('A')
248 alphabet_start
= ord('a')
249 letter_number
= ord(letter
) - alphabet_start
250 if one_based
: letter_number
+= 1
251 cipher_number
= (letter_number
* multiplier
+ adder
) % 26
252 if one_based
: cipher_number
-= 1
253 return chr(cipher_number
% 26 + alphabet_start
)
257 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
258 """Encipher a letter, given a multiplier and adder
260 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
261 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
262 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
263 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
264 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
265 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
267 if letter
in string
.ascii_letters
:
268 if letter
in string
.ascii_uppercase
:
269 alphabet_start
= ord('A')
271 alphabet_start
= ord('a')
272 cipher_number
= ord(letter
) - alphabet_start
273 if one_based
: cipher_number
+= 1
274 plaintext_number
= ( modular_division_table
[multiplier
]
275 [(cipher_number
- adder
) % 26] )
276 if one_based
: plaintext_number
-= 1
277 return chr(plaintext_number
% 26 + alphabet_start
)
281 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
282 """Encipher a message
284 >>> affine_encipher('hours passed during which jerico tried every ' \
285 'trick he could think of', 15, 22, True)
286 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
288 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
)
290 return ''.join(enciphered
)
292 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
293 """Decipher a message
295 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
296 'jfaoe ls omytd jlaxe mh', 15, 22, True)
297 'hours passed during which jerico tried every trick he could think of'
299 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
)
301 return ''.join(enciphered
)
304 def keyword_cipher_alphabet_of(keyword
, wrap_alphabet
=0):
305 """Find the cipher alphabet given a keyword.
306 wrap_alphabet controls how the rest of the alphabet is added
309 1 : from the last letter in the sanitised keyword
310 2 : from the largest letter in the sanitised keyword
312 >>> keyword_cipher_alphabet_of('bayes')
313 'bayescdfghijklmnopqrtuvwxz'
314 >>> keyword_cipher_alphabet_of('bayes', 0)
315 'bayescdfghijklmnopqrtuvwxz'
316 >>> keyword_cipher_alphabet_of('bayes', 1)
317 'bayestuvwxzcdfghijklmnopqr'
318 >>> keyword_cipher_alphabet_of('bayes', 2)
319 'bayeszcdfghijklmnopqrtuvwx'
321 if wrap_alphabet
== 0:
322 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) +
323 string
.ascii_lowercase
))
325 if wrap_alphabet
== 1:
326 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
328 last_keyword_letter
= sorted(sanitise(keyword
))[-1]
329 last_keyword_position
= string
.ascii_lowercase
.find(
330 last_keyword_letter
) + 1
331 cipher_alphabet
= ''.join(
332 deduplicate(sanitise(keyword
) +
333 string
.ascii_lowercase
[last_keyword_position
:] +
334 string
.ascii_lowercase
))
335 return cipher_alphabet
338 def keyword_encipher(message
, keyword
, wrap_alphabet
=0):
339 """Enciphers a message with a keyword substitution cipher.
340 wrap_alphabet controls how the rest of the alphabet is added
343 1 : from the last letter in the sanitised keyword
344 2 : from the largest letter in the sanitised keyword
346 >>> keyword_encipher('test message', 'bayes')
348 >>> keyword_encipher('test message', 'bayes', 0)
350 >>> keyword_encipher('test message', 'bayes', 1)
352 >>> keyword_encipher('test message', 'bayes', 2)
355 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
356 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
357 return message
.lower().translate(cipher_translation
)
359 def keyword_decipher(message
, keyword
, wrap_alphabet
=0):
360 """Deciphers a message with a keyword substitution cipher.
361 wrap_alphabet controls how the rest of the alphabet is added
364 1 : from the last letter in the sanitised keyword
365 2 : from the largest letter in the sanitised keyword
367 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
369 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
371 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
373 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
376 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
377 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
378 return message
.lower().translate(cipher_translation
)
380 def scytale_encipher(message
, rows
):
381 """Enciphers using the scytale transposition cipher.
382 Message is padded with spaces to allow all rows to be the same length.
384 >>> scytale_encipher('thequickbrownfox', 3)
386 >>> scytale_encipher('thequickbrownfox', 4)
388 >>> scytale_encipher('thequickbrownfox', 5)
389 'tubn hirf ecoo qkwx '
390 >>> scytale_encipher('thequickbrownfox', 6)
392 >>> scytale_encipher('thequickbrownfox', 7)
393 'tqcrnx hukof eibwo '
395 if len(message
) % rows
!= 0:
396 message
+= ' '*(rows
- len(message
) % rows
)
397 row_length
= round(len(message
) / rows
)
398 slices
= [message
[i
:i
+row_length
]
399 for i
in range(0, len(message
), row_length
)]
400 return ''.join([''.join(r
) for r
in zip_longest(*slices
, fillvalue
='')])
402 def scytale_decipher(message
, rows
):
403 """Deciphers using the scytale transposition cipher.
404 Assumes the message is padded so that all rows are the same length.
406 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
408 >>> scytale_decipher('tubnhirfecooqkwx', 4)
410 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
412 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
414 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
417 cols
= round(len(message
) / rows
)
418 columns
= [message
[i
:i
+rows
] for i
in range(0, cols
* rows
, rows
)]
419 return ''.join([''.join(c
) for c
in zip_longest(*columns
, fillvalue
='')])
422 def transpositions_of(keyword
):
424 >>> transpositions_of('clever')
427 key
= deduplicate(keyword
)
428 transpositions
= [key
.index(l
) for l
in sorted(key
)]
429 return transpositions
431 def column_transposition_encipher(message
, keyword
):
433 >>> column_transposition_encipher('hellothere', 'clever')
436 return column_transposition_worker(message
, keyword
, True)
438 def column_transposition_decipher(message
, keyword
):
440 >>> column_transposition_decipher('hleolteher', 'clever')
443 return column_transposition_worker(message
, keyword
, False)
445 def column_transposition_worker(message
, keyword
, encipher
=True):
447 >>> column_transposition_worker('hellothere', 'clever')
449 >>> column_transposition_worker('hellothere', 'clever', True)
451 >>> column_transposition_worker('hleolteher', 'clever', False)
454 transpositions
= transpositions_of(keyword
)
455 columns
= every_nth(message
, len(transpositions
), fillvalue
=' ')
457 transposed_columns
= transpose(columns
, transpositions
)
459 transposed_columns
= untranspose(columns
, transpositions
)
460 return combine_every_nth(transposed_columns
)
464 def caesar_break(message
,
465 metric
=norms
.euclidean_distance
,
466 target_counts
=normalised_english_counts
,
467 message_frequency_scaling
=norms
.normalise
):
468 """Breaks a Caesar cipher using frequency analysis
470 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
471 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
472 (4, 0.31863952890183...)
473 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
474 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
475 (19, 0.42152901235832...)
476 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
477 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
478 (13, 0.316029208075451...)
480 sanitised_message
= sanitise(message
)
482 best_fit
= float("inf")
483 for shift
in range(26):
484 plaintext
= caesar_decipher(sanitised_message
, shift
)
485 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
486 fit
= metric(target_counts
, counts
)
487 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
488 'and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
492 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
493 'decrypt starting: {2}'.format(best_shift
, best_fit
,
494 caesar_decipher(sanitised_message
, best_shift
)[:50]))
495 return best_shift
, best_fit
497 def affine_break(message
,
498 metric
=norms
.euclidean_distance
,
499 target_counts
=normalised_english_counts
,
500 message_frequency_scaling
=norms
.normalise
):
501 """Breaks an affine cipher using frequency analysis
503 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
504 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
505 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
506 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
507 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
508 'clm ckuxj.') # doctest: +ELLIPSIS
509 ((15, 22, True), 0.23570361818655...)
511 sanitised_message
= sanitise(message
)
514 best_one_based
= True
515 best_fit
= float("inf")
516 for one_based
in [True, False]:
517 for multiplier
in range(1, 26, 2):
518 for adder
in range(26):
519 plaintext
= affine_decipher(sanitised_message
,
520 multiplier
, adder
, one_based
)
521 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
522 fit
= metric(target_counts
, counts
)
523 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
524 'gives fit of {3} and decrypt starting: {4}'.
525 format(multiplier
, adder
, one_based
, fit
,
529 best_multiplier
= multiplier
531 best_one_based
= one_based
532 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
533 'and decrypt starting: {4}'.format(
534 best_multiplier
, best_adder
, best_one_based
, best_fit
,
535 affine_decipher(sanitised_message
, best_multiplier
,
536 best_adder
, best_one_based
)[:50]))
537 return (best_multiplier
, best_adder
, best_one_based
), best_fit
539 def keyword_break(message
,
541 metric
=norms
.euclidean_distance
,
542 target_counts
=normalised_english_counts
,
543 message_frequency_scaling
=norms
.normalise
):
544 """Breaks a keyword substitution cipher using a dictionary and
547 >>> keyword_break(keyword_encipher('this is a test message for the ' \
548 'keyword decipherment', 'elephant', 1), \
549 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
550 (('elephant', 1), 0.41643991598441...)
553 best_wrap_alphabet
= True
554 best_fit
= float("inf")
555 for wrap_alphabet
in range(3):
556 for keyword
in wordlist
:
557 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
558 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
559 fit
= metric(target_counts
, counts
)
560 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
561 'gives fit of {2} and decrypt starting: {3}'.format(
562 keyword
, wrap_alphabet
, fit
,
563 sanitise(plaintext
)[:50]))
566 best_keyword
= keyword
567 best_wrap_alphabet
= wrap_alphabet
568 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
569 '{2} and decrypt starting: {3}'.format(best_keyword
,
570 best_wrap_alphabet
, best_fit
, sanitise(
571 keyword_decipher(message
, best_keyword
,
572 best_wrap_alphabet
))[:50]))
573 return (best_keyword
, best_wrap_alphabet
), best_fit
575 def keyword_break_mp(message
,
577 metric
=norms
.euclidean_distance
,
578 target_counts
=normalised_english_counts
,
579 message_frequency_scaling
=norms
.normalise
,
581 """Breaks a keyword substitution cipher using a dictionary and
584 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
585 'keyword decipherment', 'elephant', 1), \
586 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
587 (('elephant', 1), 0.41643991598441...)
590 helper_args
= [(message
, word
, wrap
, metric
, target_counts
,
591 message_frequency_scaling
)
592 for word
in wordlist
for wrap
in range(3)]
593 # Gotcha: the helper function here needs to be defined at the top level
594 # (limitation of Pool.starmap)
595 breaks
= pool
.starmap(keyword_break_one
, helper_args
, chunksize
)
596 return min(breaks
, key
=lambda k
: k
[1])
598 def keyword_break_one(message
, keyword
, wrap_alphabet
, metric
, target_counts
,
599 message_frequency_scaling
):
600 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
601 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
602 fit
= metric(target_counts
, counts
)
603 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
604 '{2} and decrypt starting: {3}'.format(keyword
,
605 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
606 return (keyword
, wrap_alphabet
), fit
608 def scytale_break(message
,
609 metric
=norms
.euclidean_distance
,
610 target_counts
=normalised_english_bigram_counts
,
611 message_frequency_scaling
=norms
.normalise
):
612 """Breaks a Scytale cipher
614 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
615 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
616 'aek') # doctest: +ELLIPSIS
617 (6, 0.83453041115025...)
620 best_fit
= float("inf")
621 for key
in range(1, 20):
622 if len(message
) % key
== 0:
623 plaintext
= scytale_decipher(message
, key
)
624 counts
= message_frequency_scaling(frequencies(
625 ngrams(sanitise(plaintext
), 2)))
626 fit
= metric(target_counts
, counts
)
627 logger
.debug('Scytale break attempt using key {0} gives fit of '
628 '{1} and decrypt starting: {2}'.format(key
,
629 fit
, sanitise(plaintext
)[:50]))
633 logger
.info('Scytale break best fit with key {0} gives fit of {1} and '
634 'decrypt starting: {2}'.format(best_key
, best_fit
,
635 sanitise(scytale_decipher(message
, best_key
))[:50]))
636 return best_key
, best_fit
639 if __name__
== "__main__":