6 from itertools
import zip_longest
, repeat
7 from segment
import segment
8 from multiprocessing
import Pool
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
17 logger
= logging
.getLogger(__name__
)
18 logger
.addHandler(logging
.FileHandler('cipher.log'))
19 logger
.setLevel(logging
.WARNING
)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
23 english_counts
= collections
.defaultdict(int)
24 with
open('count_1l.txt', 'r') as f
:
26 (letter
, count
) = line
.split("\t")
27 english_counts
[letter
] = int(count
)
28 normalised_english_counts
= norms
.normalise(english_counts
)
30 english_bigram_counts
= collections
.defaultdict(int)
31 with
open('count_2l.txt', 'r') as f
:
33 (bigram
, count
) = line
.split("\t")
34 english_bigram_counts
[bigram
] = int(count
)
35 normalised_english_bigram_counts
= norms
.normalise(english_bigram_counts
)
37 with
open('words.txt', 'r') as f
:
38 keywords
= [line
.rstrip() for line
in f
]
40 modular_division_table
= [[0]*26 for x
in range(26)]
44 modular_division_table
[b
][c
] = a
47 """Remove all non-alphabetic characters from a text
48 >>> letters('The Quick')
50 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
51 'TheQuickBROWNfoxjumpedoverthelazyDOG'
53 return ''.join([c
for c
in text
if c
in string
.ascii_letters
])
56 """Remove all non-alphabetic characters and convert the text to lowercase
58 >>> sanitise('The Quick')
60 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
61 'thequickbrownfoxjumpedoverthelazydog'
63 # sanitised = [c.lower() for c in text if c in string.ascii_letters]
64 # return ''.join(sanitised)
65 return letters(text
).lower()
68 """Returns all n-grams of a text
70 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
71 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
73 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
74 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
75 'rown', 'ownf', 'wnfo', 'nfox']
77 return [text
[i
:i
+n
] for i
in range(len(text
)-n
+1)]
79 def every_nth(text
, n
, fillvalue
=''):
80 """Returns n strings, each of which consists of every nth character,
81 starting with the 0th, 1st, 2nd, ... (n-1)th character
83 >>> every_nth(string.ascii_lowercase, 5)
84 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
85 >>> every_nth(string.ascii_lowercase, 1)
86 ['abcdefghijklmnopqrstuvwxyz']
87 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
88 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
89 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
90 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
91 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
93 split_text
= [text
[i
:i
+n
] for i
in range(0, len(text
), n
)]
94 return [''.join(l
) for l
in zip_longest(*split_text
, fillvalue
=fillvalue
)]
96 def combine_every_nth(split_text
):
97 """Reforms a text split into every_nth strings
99 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
100 'abcdefghijklmnopqrstuvwxyz'
101 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
102 'abcdefghijklmnopqrstuvwxyz'
103 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
104 'abcdefghijklmnopqrstuvwxyz'
106 return ''.join([''.join(l
)
107 for l
in zip_longest(*split_text
, fillvalue
='')])
109 def transpose(items
, transposition
):
110 """Moves items around according to the given transposition
112 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
114 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
116 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
117 [13, 12, 14, 11, 15, 10]
119 transposed
= list(repeat('', len(transposition
)))
120 for p
, t
in enumerate(transposition
):
121 transposed
[p
] = items
[t
]
124 def untranspose(items
, transposition
):
125 """Undoes a transpose
127 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
129 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
131 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
132 [10, 11, 12, 13, 14, 15]
134 transposed
= list(repeat('', len(transposition
)))
135 for p
, t
in enumerate(transposition
):
136 transposed
[t
] = items
[p
]
140 def frequencies(text
):
141 """Count the number of occurrences of each character in text
143 >>> sorted(frequencies('abcdefabc').items())
144 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
145 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
146 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
147 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
148 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
149 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
150 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
151 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
152 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
153 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
154 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
155 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
156 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
157 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
158 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
159 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
160 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
161 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
162 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
163 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
164 >>> frequencies('abcdefabcdef')['x']
167 #counts = collections.defaultdict(int)
171 return collections
.Counter(c
for c
in text
)
172 letter_frequencies
= frequencies
174 def deduplicate(text
):
175 return list(collections
.OrderedDict
.fromkeys(text
))
179 def caesar_encipher_letter(letter
, shift
):
180 """Encipher a letter, given a shift amount
182 >>> caesar_encipher_letter('a', 1)
184 >>> caesar_encipher_letter('a', 2)
186 >>> caesar_encipher_letter('b', 2)
188 >>> caesar_encipher_letter('x', 2)
190 >>> caesar_encipher_letter('y', 2)
192 >>> caesar_encipher_letter('z', 2)
194 >>> caesar_encipher_letter('z', -1)
196 >>> caesar_encipher_letter('a', -1)
199 if letter
in string
.ascii_letters
:
200 if letter
in string
.ascii_uppercase
:
201 alphabet_start
= ord('A')
203 alphabet_start
= ord('a')
204 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) +
209 def caesar_decipher_letter(letter
, shift
):
210 """Decipher a letter, given a shift amount
212 >>> caesar_decipher_letter('b', 1)
214 >>> caesar_decipher_letter('b', 2)
217 return caesar_encipher_letter(letter
, -shift
)
219 def caesar_encipher(message
, shift
):
220 """Encipher a message with the Caesar cipher of given shift
222 >>> caesar_encipher('abc', 1)
224 >>> caesar_encipher('abc', 2)
226 >>> caesar_encipher('abcxyz', 2)
228 >>> caesar_encipher('ab cx yz', 2)
231 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
232 return ''.join(enciphered
)
234 def caesar_decipher(message
, shift
):
235 """Encipher a message with the Caesar cipher of given shift
237 >>> caesar_decipher('bcd', 1)
239 >>> caesar_decipher('cde', 2)
241 >>> caesar_decipher('cd ez ab', 2)
244 return caesar_encipher(message
, -shift
)
246 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
247 """Encipher a letter, given a multiplier and adder
249 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
250 for l in string.ascii_uppercase])
251 'HKNQTWZCFILORUXADGJMPSVYBE'
252 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
253 for l in string.ascii_uppercase])
254 'FILORUXADGJMPSVYBEHKNQTWZC'
256 if letter
in string
.ascii_letters
:
257 if letter
in string
.ascii_uppercase
:
258 alphabet_start
= ord('A')
260 alphabet_start
= ord('a')
261 letter_number
= ord(letter
) - alphabet_start
262 if one_based
: letter_number
+= 1
263 cipher_number
= (letter_number
* multiplier
+ adder
) % 26
264 if one_based
: cipher_number
-= 1
265 return chr(cipher_number
% 26 + alphabet_start
)
269 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
270 """Encipher a letter, given a multiplier and adder
272 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
273 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
274 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
275 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
276 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
277 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
279 if letter
in string
.ascii_letters
:
280 if letter
in string
.ascii_uppercase
:
281 alphabet_start
= ord('A')
283 alphabet_start
= ord('a')
284 cipher_number
= ord(letter
) - alphabet_start
285 if one_based
: cipher_number
+= 1
286 plaintext_number
= ( modular_division_table
[multiplier
]
287 [(cipher_number
- adder
) % 26] )
288 if one_based
: plaintext_number
-= 1
289 return chr(plaintext_number
% 26 + alphabet_start
)
293 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
294 """Encipher a message
296 >>> affine_encipher('hours passed during which jerico tried every ' \
297 'trick he could think of', 15, 22, True)
298 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
300 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
)
302 return ''.join(enciphered
)
304 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
305 """Decipher a message
307 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
308 'jfaoe ls omytd jlaxe mh', 15, 22, True)
309 'hours passed during which jerico tried every trick he could think of'
311 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
)
313 return ''.join(enciphered
)
316 def keyword_cipher_alphabet_of(keyword
, wrap_alphabet
=0):
317 """Find the cipher alphabet given a keyword.
318 wrap_alphabet controls how the rest of the alphabet is added
321 1 : from the last letter in the sanitised keyword
322 2 : from the largest letter in the sanitised keyword
324 >>> keyword_cipher_alphabet_of('bayes')
325 'bayescdfghijklmnopqrtuvwxz'
326 >>> keyword_cipher_alphabet_of('bayes', 0)
327 'bayescdfghijklmnopqrtuvwxz'
328 >>> keyword_cipher_alphabet_of('bayes', 1)
329 'bayestuvwxzcdfghijklmnopqr'
330 >>> keyword_cipher_alphabet_of('bayes', 2)
331 'bayeszcdfghijklmnopqrtuvwx'
333 if wrap_alphabet
== 0:
334 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) +
335 string
.ascii_lowercase
))
337 if wrap_alphabet
== 1:
338 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
340 last_keyword_letter
= sorted(sanitise(keyword
))[-1]
341 last_keyword_position
= string
.ascii_lowercase
.find(
342 last_keyword_letter
) + 1
343 cipher_alphabet
= ''.join(
344 deduplicate(sanitise(keyword
) +
345 string
.ascii_lowercase
[last_keyword_position
:] +
346 string
.ascii_lowercase
))
347 return cipher_alphabet
350 def keyword_encipher(message
, keyword
, wrap_alphabet
=0):
351 """Enciphers a message with a keyword substitution cipher.
352 wrap_alphabet controls how the rest of the alphabet is added
355 1 : from the last letter in the sanitised keyword
356 2 : from the largest letter in the sanitised keyword
358 >>> keyword_encipher('test message', 'bayes')
360 >>> keyword_encipher('test message', 'bayes', 0)
362 >>> keyword_encipher('test message', 'bayes', 1)
364 >>> keyword_encipher('test message', 'bayes', 2)
367 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
368 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
369 return message
.lower().translate(cipher_translation
)
371 def keyword_decipher(message
, keyword
, wrap_alphabet
=0):
372 """Deciphers a message with a keyword substitution cipher.
373 wrap_alphabet controls how the rest of the alphabet is added
376 1 : from the last letter in the sanitised keyword
377 2 : from the largest letter in the sanitised keyword
379 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
381 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
383 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
385 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
388 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
389 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
390 return message
.lower().translate(cipher_translation
)
392 def scytale_encipher(message
, rows
):
393 """Enciphers using the scytale transposition cipher.
394 Message is padded with spaces to allow all rows to be the same length.
396 >>> scytale_encipher('thequickbrownfox', 3)
398 >>> scytale_encipher('thequickbrownfox', 4)
400 >>> scytale_encipher('thequickbrownfox', 5)
401 'tubn hirf ecoo qkwx '
402 >>> scytale_encipher('thequickbrownfox', 6)
404 >>> scytale_encipher('thequickbrownfox', 7)
405 'tqcrnx hukof eibwo '
407 if len(message
) % rows
!= 0:
408 message
+= ' '*(rows
- len(message
) % rows
)
409 row_length
= round(len(message
) / rows
)
410 slices
= [message
[i
:i
+row_length
]
411 for i
in range(0, len(message
), row_length
)]
412 return ''.join([''.join(r
) for r
in zip_longest(*slices
, fillvalue
='')])
414 def scytale_decipher(message
, rows
):
415 """Deciphers using the scytale transposition cipher.
416 Assumes the message is padded so that all rows are the same length.
418 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
420 >>> scytale_decipher('tubnhirfecooqkwx', 4)
422 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
424 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
426 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
429 cols
= round(len(message
) / rows
)
430 columns
= [message
[i
:i
+rows
] for i
in range(0, cols
* rows
, rows
)]
431 return ''.join([''.join(c
) for c
in zip_longest(*columns
, fillvalue
='')])
434 def transpositions_of(keyword
):
436 >>> transpositions_of('clever')
439 key
= deduplicate(keyword
)
440 transpositions
= [key
.index(l
) for l
in sorted(key
)]
441 return transpositions
443 def column_transposition_encipher(message
, keyword
):
445 >>> column_transposition_encipher('hellothere', 'clever')
448 return column_transposition_worker(message
, keyword
, encipher
=True)
450 def column_transposition_decipher(message
, keyword
):
452 >>> column_transposition_decipher('hleolteher', 'clever')
455 return column_transposition_worker(message
, keyword
, encipher
=False)
457 def column_transposition_worker(message
, keyword
, encipher
=True):
459 >>> column_transposition_worker('hellothere', 'clever')
461 >>> column_transposition_worker('hellothere', 'clever', encipher=True)
463 >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
466 transpositions
= transpositions_of(keyword
)
467 columns
= every_nth(message
, len(transpositions
), fillvalue
=' ')
469 transposed_columns
= transpose(columns
, transpositions
)
471 transposed_columns
= untranspose(columns
, transpositions
)
472 return combine_every_nth(transposed_columns
)
476 def caesar_break(message
,
477 metric
=norms
.euclidean_distance
,
478 target_counts
=normalised_english_counts
,
479 message_frequency_scaling
=norms
.normalise
):
480 """Breaks a Caesar cipher using frequency analysis
482 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
483 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
484 (4, 0.31863952890183...)
485 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
486 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
487 (19, 0.42152901235832...)
488 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
489 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
490 (13, 0.316029208075451...)
492 sanitised_message
= sanitise(message
)
494 best_fit
= float("inf")
495 for shift
in range(26):
496 plaintext
= caesar_decipher(sanitised_message
, shift
)
497 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
498 fit
= metric(target_counts
, counts
)
499 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
500 'and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
504 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
505 'decrypt starting: {2}'.format(best_shift
, best_fit
,
506 caesar_decipher(sanitised_message
, best_shift
)[:50]))
507 return best_shift
, best_fit
509 def affine_break(message
,
510 metric
=norms
.euclidean_distance
,
511 target_counts
=normalised_english_counts
,
512 message_frequency_scaling
=norms
.normalise
):
513 """Breaks an affine cipher using frequency analysis
515 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
516 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
517 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
518 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
519 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
520 'clm ckuxj.') # doctest: +ELLIPSIS
521 ((15, 22, True), 0.23570361818655...)
523 sanitised_message
= sanitise(message
)
526 best_one_based
= True
527 best_fit
= float("inf")
528 for one_based
in [True, False]:
529 for multiplier
in range(1, 26, 2):
530 for adder
in range(26):
531 plaintext
= affine_decipher(sanitised_message
,
532 multiplier
, adder
, one_based
)
533 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
534 fit
= metric(target_counts
, counts
)
535 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
536 'gives fit of {3} and decrypt starting: {4}'.
537 format(multiplier
, adder
, one_based
, fit
,
541 best_multiplier
= multiplier
543 best_one_based
= one_based
544 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
545 'and decrypt starting: {4}'.format(
546 best_multiplier
, best_adder
, best_one_based
, best_fit
,
547 affine_decipher(sanitised_message
, best_multiplier
,
548 best_adder
, best_one_based
)[:50]))
549 return (best_multiplier
, best_adder
, best_one_based
), best_fit
551 def keyword_break(message
,
553 metric
=norms
.euclidean_distance
,
554 target_counts
=normalised_english_counts
,
555 message_frequency_scaling
=norms
.normalise
):
556 """Breaks a keyword substitution cipher using a dictionary and
559 >>> keyword_break(keyword_encipher('this is a test message for the ' \
560 'keyword decipherment', 'elephant', 1), \
561 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
562 (('elephant', 1), 0.41643991598441...)
565 best_wrap_alphabet
= True
566 best_fit
= float("inf")
567 for wrap_alphabet
in range(3):
568 for keyword
in wordlist
:
569 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
570 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
571 fit
= metric(target_counts
, counts
)
572 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
573 'gives fit of {2} and decrypt starting: {3}'.format(
574 keyword
, wrap_alphabet
, fit
,
575 sanitise(plaintext
)[:50]))
578 best_keyword
= keyword
579 best_wrap_alphabet
= wrap_alphabet
580 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
581 '{2} and decrypt starting: {3}'.format(best_keyword
,
582 best_wrap_alphabet
, best_fit
, sanitise(
583 keyword_decipher(message
, best_keyword
,
584 best_wrap_alphabet
))[:50]))
585 return (best_keyword
, best_wrap_alphabet
), best_fit
587 def keyword_break_mp(message
,
589 metric
=norms
.euclidean_distance
,
590 target_counts
=normalised_english_counts
,
591 message_frequency_scaling
=norms
.normalise
,
593 """Breaks a keyword substitution cipher using a dictionary and
596 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
597 'keyword decipherment', 'elephant', 1), \
598 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
599 (('elephant', 1), 0.41643991598441...)
602 helper_args
= [(message
, word
, wrap
, metric
, target_counts
,
603 message_frequency_scaling
)
604 for word
in wordlist
for wrap
in range(3)]
605 # Gotcha: the helper function here needs to be defined at the top level
606 # (limitation of Pool.starmap)
607 breaks
= pool
.starmap(keyword_break_one
, helper_args
, chunksize
)
608 return min(breaks
, key
=lambda k
: k
[1])
610 def keyword_break_one(message
, keyword
, wrap_alphabet
, metric
, target_counts
,
611 message_frequency_scaling
):
612 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
613 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
614 fit
= metric(target_counts
, counts
)
615 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
616 '{2} and decrypt starting: {3}'.format(keyword
,
617 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
618 return (keyword
, wrap_alphabet
), fit
620 def scytale_break(message
,
621 metric
=norms
.euclidean_distance
,
622 target_counts
=normalised_english_bigram_counts
,
623 message_frequency_scaling
=norms
.normalise
):
624 """Breaks a Scytale cipher
626 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
627 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
628 'aek') # doctest: +ELLIPSIS
629 (6, 0.83453041115025...)
632 best_fit
= float("inf")
633 for key
in range(1, 20):
634 if len(message
) % key
== 0:
635 plaintext
= scytale_decipher(message
, key
)
636 counts
= message_frequency_scaling(frequencies(
637 ngrams(sanitise(plaintext
), 2)))
638 fit
= metric(target_counts
, counts
)
639 logger
.debug('Scytale break attempt using key {0} gives fit of '
640 '{1} and decrypt starting: {2}'.format(key
,
641 fit
, sanitise(plaintext
)[:50]))
645 logger
.info('Scytale break best fit with key {0} gives fit of {1} and '
646 'decrypt starting: {2}'.format(best_key
, best_fit
,
647 sanitise(scytale_decipher(message
, best_key
))[:50]))
648 return best_key
, best_fit
651 if __name__
== "__main__":