6 from itertools
import zip_longest
, repeat
, cycle
7 from segment
import segment
8 from multiprocessing
import Pool
13 # c5a = open('2012/5a.ciphertext', 'r').read()
14 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1
17 logger
= logging
.getLogger(__name__
)
18 logger
.addHandler(logging
.FileHandler('cipher.log'))
19 logger
.setLevel(logging
.WARNING
)
20 #logger.setLevel(logging.INFO)
21 #logger.setLevel(logging.DEBUG)
23 english_counts
= collections
.defaultdict(int)
24 with
open('count_1l.txt', 'r') as f
:
26 (letter
, count
) = line
.split("\t")
27 english_counts
[letter
] = int(count
)
28 normalised_english_counts
= norms
.normalise(english_counts
)
30 english_bigram_counts
= collections
.defaultdict(int)
31 with
open('count_2l.txt', 'r') as f
:
33 (bigram
, count
) = line
.split("\t")
34 english_bigram_counts
[bigram
] = int(count
)
35 normalised_english_bigram_counts
= norms
.normalise(english_bigram_counts
)
37 english_trigram_counts
= collections
.defaultdict(int)
38 with
open('count_3l.txt', 'r') as f
:
40 (trigram
, count
) = line
.split("\t")
41 english_trigram_counts
[trigram
] = int(count
)
42 normalised_english_trigram_counts
= norms
.normalise(english_trigram_counts
)
45 with
open('words.txt', 'r') as f
:
46 keywords
= [line
.rstrip() for line
in f
]
48 modular_division_table
= [[0]*26 for x
in range(26)]
52 modular_division_table
[b
][c
] = a
55 """Remove all non-alphabetic characters from a text
56 >>> letters('The Quick')
58 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
59 'TheQuickBROWNfoxjumpedoverthelazyDOG'
61 return ''.join([c
for c
in text
if c
in string
.ascii_letters
])
64 """Remove all non-alphabetic characters and convert the text to lowercase
66 >>> sanitise('The Quick')
68 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
69 'thequickbrownfoxjumpedoverthelazydog'
71 # sanitised = [c.lower() for c in text if c in string.ascii_letters]
72 # return ''.join(sanitised)
73 return letters(text
).lower()
76 """Returns all n-grams of a text
78 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
79 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
81 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
82 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
83 'rown', 'ownf', 'wnfo', 'nfox']
85 return [text
[i
:i
+n
] for i
in range(len(text
)-n
+1)]
87 def every_nth(text
, n
, fillvalue
=''):
88 """Returns n strings, each of which consists of every nth character,
89 starting with the 0th, 1st, 2nd, ... (n-1)th character
91 >>> every_nth(string.ascii_lowercase, 5)
92 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
93 >>> every_nth(string.ascii_lowercase, 1)
94 ['abcdefghijklmnopqrstuvwxyz']
95 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
96 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
97 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
98 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
99 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
101 split_text
= [text
[i
:i
+n
] for i
in range(0, len(text
), n
)]
102 return [''.join(l
) for l
in zip_longest(*split_text
, fillvalue
=fillvalue
)]
104 def combine_every_nth(split_text
):
105 """Reforms a text split into every_nth strings
107 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
108 'abcdefghijklmnopqrstuvwxyz'
109 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
110 'abcdefghijklmnopqrstuvwxyz'
111 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
112 'abcdefghijklmnopqrstuvwxyz'
114 return ''.join([''.join(l
)
115 for l
in zip_longest(*split_text
, fillvalue
='')])
117 def transpose(items
, transposition
):
118 """Moves items around according to the given transposition
120 >>> transpose(['a', 'b', 'c', 'd'], [0,1,2,3])
122 >>> transpose(['a', 'b', 'c', 'd'], [3,1,2,0])
124 >>> transpose([10,11,12,13,14,15], [3,2,4,1,5,0])
125 [13, 12, 14, 11, 15, 10]
127 transposed
= list(repeat('', len(transposition
)))
128 for p
, t
in enumerate(transposition
):
129 transposed
[p
] = items
[t
]
132 def untranspose(items
, transposition
):
133 """Undoes a transpose
135 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
137 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
139 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
140 [10, 11, 12, 13, 14, 15]
142 transposed
= list(repeat('', len(transposition
)))
143 for p
, t
in enumerate(transposition
):
144 transposed
[t
] = items
[p
]
148 def frequencies(text
):
149 """Count the number of occurrences of each character in text
151 >>> sorted(frequencies('abcdefabc').items())
152 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
153 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
154 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
155 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
156 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
157 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
158 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
159 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
160 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
161 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
162 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
163 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
164 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
165 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
166 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... ' \
167 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
168 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
169 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
170 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
171 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
172 >>> frequencies('abcdefabcdef')['x']
175 #counts = collections.defaultdict(int)
179 return collections
.Counter(c
for c
in text
)
180 letter_frequencies
= frequencies
182 def deduplicate(text
):
183 return list(collections
.OrderedDict
.fromkeys(text
))
187 def caesar_encipher_letter(letter
, shift
):
188 """Encipher a letter, given a shift amount
190 >>> caesar_encipher_letter('a', 1)
192 >>> caesar_encipher_letter('a', 2)
194 >>> caesar_encipher_letter('b', 2)
196 >>> caesar_encipher_letter('x', 2)
198 >>> caesar_encipher_letter('y', 2)
200 >>> caesar_encipher_letter('z', 2)
202 >>> caesar_encipher_letter('z', -1)
204 >>> caesar_encipher_letter('a', -1)
207 if letter
in string
.ascii_letters
:
208 if letter
in string
.ascii_uppercase
:
209 alphabet_start
= ord('A')
211 alphabet_start
= ord('a')
212 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) +
217 def caesar_decipher_letter(letter
, shift
):
218 """Decipher a letter, given a shift amount
220 >>> caesar_decipher_letter('b', 1)
222 >>> caesar_decipher_letter('b', 2)
225 return caesar_encipher_letter(letter
, -shift
)
227 def caesar_encipher(message
, shift
):
228 """Encipher a message with the Caesar cipher of given shift
230 >>> caesar_encipher('abc', 1)
232 >>> caesar_encipher('abc', 2)
234 >>> caesar_encipher('abcxyz', 2)
236 >>> caesar_encipher('ab cx yz', 2)
239 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
240 return ''.join(enciphered
)
242 def caesar_decipher(message
, shift
):
243 """Encipher a message with the Caesar cipher of given shift
245 >>> caesar_decipher('bcd', 1)
247 >>> caesar_decipher('cde', 2)
249 >>> caesar_decipher('cd ez ab', 2)
252 return caesar_encipher(message
, -shift
)
254 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
255 """Encipher a letter, given a multiplier and adder
257 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
258 for l in string.ascii_uppercase])
259 'HKNQTWZCFILORUXADGJMPSVYBE'
260 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
261 for l in string.ascii_uppercase])
262 'FILORUXADGJMPSVYBEHKNQTWZC'
264 if letter
in string
.ascii_letters
:
265 if letter
in string
.ascii_uppercase
:
266 alphabet_start
= ord('A')
268 alphabet_start
= ord('a')
269 letter_number
= ord(letter
) - alphabet_start
270 if one_based
: letter_number
+= 1
271 cipher_number
= (letter_number
* multiplier
+ adder
) % 26
272 if one_based
: cipher_number
-= 1
273 return chr(cipher_number
% 26 + alphabet_start
)
277 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
278 """Encipher a letter, given a multiplier and adder
280 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
281 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
282 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
283 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
284 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
285 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
287 if letter
in string
.ascii_letters
:
288 if letter
in string
.ascii_uppercase
:
289 alphabet_start
= ord('A')
291 alphabet_start
= ord('a')
292 cipher_number
= ord(letter
) - alphabet_start
293 if one_based
: cipher_number
+= 1
294 plaintext_number
= ( modular_division_table
[multiplier
]
295 [(cipher_number
- adder
) % 26] )
296 if one_based
: plaintext_number
-= 1
297 return chr(plaintext_number
% 26 + alphabet_start
)
301 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
302 """Encipher a message
304 >>> affine_encipher('hours passed during which jerico tried every ' \
305 'trick he could think of', 15, 22, True)
306 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
308 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
)
310 return ''.join(enciphered
)
312 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
313 """Decipher a message
315 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
316 'jfaoe ls omytd jlaxe mh', 15, 22, True)
317 'hours passed during which jerico tried every trick he could think of'
319 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
)
321 return ''.join(enciphered
)
324 def keyword_cipher_alphabet_of(keyword
, wrap_alphabet
=0):
325 """Find the cipher alphabet given a keyword.
326 wrap_alphabet controls how the rest of the alphabet is added
329 1 : from the last letter in the sanitised keyword
330 2 : from the largest letter in the sanitised keyword
332 >>> keyword_cipher_alphabet_of('bayes')
333 'bayescdfghijklmnopqrtuvwxz'
334 >>> keyword_cipher_alphabet_of('bayes', 0)
335 'bayescdfghijklmnopqrtuvwxz'
336 >>> keyword_cipher_alphabet_of('bayes', 1)
337 'bayestuvwxzcdfghijklmnopqr'
338 >>> keyword_cipher_alphabet_of('bayes', 2)
339 'bayeszcdfghijklmnopqrtuvwx'
341 if wrap_alphabet
== 0:
342 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) +
343 string
.ascii_lowercase
))
345 if wrap_alphabet
== 1:
346 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
348 last_keyword_letter
= sorted(sanitise(keyword
))[-1]
349 last_keyword_position
= string
.ascii_lowercase
.find(
350 last_keyword_letter
) + 1
351 cipher_alphabet
= ''.join(
352 deduplicate(sanitise(keyword
) +
353 string
.ascii_lowercase
[last_keyword_position
:] +
354 string
.ascii_lowercase
))
355 return cipher_alphabet
358 def keyword_encipher(message
, keyword
, wrap_alphabet
=0):
359 """Enciphers a message with a keyword substitution cipher.
360 wrap_alphabet controls how the rest of the alphabet is added
363 1 : from the last letter in the sanitised keyword
364 2 : from the largest letter in the sanitised keyword
366 >>> keyword_encipher('test message', 'bayes')
368 >>> keyword_encipher('test message', 'bayes', 0)
370 >>> keyword_encipher('test message', 'bayes', 1)
372 >>> keyword_encipher('test message', 'bayes', 2)
375 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
376 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
377 return message
.lower().translate(cipher_translation
)
379 def keyword_decipher(message
, keyword
, wrap_alphabet
=0):
380 """Deciphers a message with a keyword substitution cipher.
381 wrap_alphabet controls how the rest of the alphabet is added
384 1 : from the last letter in the sanitised keyword
385 2 : from the largest letter in the sanitised keyword
387 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
389 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
391 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
393 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
396 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
397 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
398 return message
.lower().translate(cipher_translation
)
400 def scytale_encipher(message
, rows
):
401 """Enciphers using the scytale transposition cipher.
402 Message is padded with spaces to allow all rows to be the same length.
404 >>> scytale_encipher('thequickbrownfox', 3)
406 >>> scytale_encipher('thequickbrownfox', 4)
408 >>> scytale_encipher('thequickbrownfox', 5)
409 'tubn hirf ecoo qkwx '
410 >>> scytale_encipher('thequickbrownfox', 6)
412 >>> scytale_encipher('thequickbrownfox', 7)
413 'tqcrnx hukof eibwo '
415 if len(message
) % rows
!= 0:
416 message
+= ' '*(rows
- len(message
) % rows
)
417 row_length
= round(len(message
) / rows
)
418 slices
= [message
[i
:i
+row_length
]
419 for i
in range(0, len(message
), row_length
)]
420 return ''.join([''.join(r
) for r
in zip_longest(*slices
, fillvalue
='')])
422 def scytale_decipher(message
, rows
):
423 """Deciphers using the scytale transposition cipher.
424 Assumes the message is padded so that all rows are the same length.
426 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
428 >>> scytale_decipher('tubnhirfecooqkwx', 4)
430 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
432 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
434 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
437 cols
= round(len(message
) / rows
)
438 columns
= [message
[i
:i
+rows
] for i
in range(0, cols
* rows
, rows
)]
439 return ''.join([''.join(c
) for c
in zip_longest(*columns
, fillvalue
='')])
442 def transpositions_of(keyword
):
443 """Finds the transpostions given by a keyword. For instance, the keyword
444 'clever' rearranges to 'celrv', so the first column (0) stays first, the
445 second column (1) moves to third, the third column (2) moves to second,
448 >>> transpositions_of('clever')
451 key
= deduplicate(keyword
)
452 transpositions
= [key
.index(l
) for l
in sorted(key
)]
453 return transpositions
455 def column_transposition_encipher(message
, keyword
, fillvalue
=' '):
456 """Enciphers using the column transposition cipher.
457 Message is padded to allow all rows to be the same length.
459 >>> column_transposition_encipher('hellothere', 'clever')
461 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
464 return column_transposition_worker(message
, keyword
, encipher
=True,
467 def column_transposition_decipher(message
, keyword
, fillvalue
=' '):
468 """Deciphers using the column transposition cipher.
469 Message is padded to allow all rows to be the same length.
471 >>> column_transposition_decipher('hleolteher', 'clever')
473 >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
476 return column_transposition_worker(message
, keyword
, encipher
=False,
479 def column_transposition_worker(message
, keyword
,
480 encipher
=True, fillvalue
=' '):
481 """Does the actual work of the column transposition cipher.
482 Message is padded with spaces to allow all rows to be the same length.
484 >>> column_transposition_worker('hellothere', 'clever')
486 >>> column_transposition_worker('hellothere', 'clever', encipher=True)
488 >>> column_transposition_worker('hleolteher', 'clever', encipher=False)
491 transpositions
= transpositions_of(keyword
)
492 columns
= every_nth(message
, len(transpositions
), fillvalue
=fillvalue
)
494 transposed_columns
= transpose(columns
, transpositions
)
496 transposed_columns
= untranspose(columns
, transpositions
)
497 return combine_every_nth(transposed_columns
)
499 def vigenere_encipher(message
, keyword
):
502 >>> vigenere_encipher('hello', 'abc')
505 shifts
= [ord(l
) - ord('a') for l
in sanitise(keyword
)]
506 pairs
= zip(message
, cycle(shifts
))
507 return ''.join([caesar_encipher_letter(l
, k
) for l
, k
in pairs
])
509 def vigenere_decipher(message
, keyword
):
512 >>> vigenere_decipher('hfnlp', 'abc')
515 shifts
= [ord(l
) - ord('a') for l
in sanitise(keyword
)]
516 pairs
= zip(message
, cycle(shifts
))
517 return ''.join([caesar_decipher_letter(l
, k
) for l
, k
in pairs
])
521 def caesar_break(message
,
522 metric
=norms
.euclidean_distance
,
523 target_counts
=normalised_english_counts
,
524 message_frequency_scaling
=norms
.normalise
):
525 """Breaks a Caesar cipher using frequency analysis
527 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
528 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
529 (4, 0.31863952890183...)
530 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
531 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
532 (19, 0.42152901235832...)
533 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
534 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
535 (13, 0.316029208075451...)
537 sanitised_message
= sanitise(message
)
539 best_fit
= float("inf")
540 for shift
in range(26):
541 plaintext
= caesar_decipher(sanitised_message
, shift
)
542 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
543 fit
= metric(target_counts
, counts
)
544 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
545 'and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
549 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
550 'decrypt starting: {2}'.format(best_shift
, best_fit
,
551 caesar_decipher(sanitised_message
, best_shift
)[:50]))
552 return best_shift
, best_fit
554 def affine_break(message
,
555 metric
=norms
.euclidean_distance
,
556 target_counts
=normalised_english_counts
,
557 message_frequency_scaling
=norms
.normalise
):
558 """Breaks an affine cipher using frequency analysis
560 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
561 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
562 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
563 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
564 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd ' \
565 'clm ckuxj.') # doctest: +ELLIPSIS
566 ((15, 22, True), 0.23570361818655...)
568 sanitised_message
= sanitise(message
)
571 best_one_based
= True
572 best_fit
= float("inf")
573 for one_based
in [True, False]:
574 for multiplier
in range(1, 26, 2):
575 for adder
in range(26):
576 plaintext
= affine_decipher(sanitised_message
,
577 multiplier
, adder
, one_based
)
578 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
579 fit
= metric(target_counts
, counts
)
580 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
581 'gives fit of {3} and decrypt starting: {4}'.
582 format(multiplier
, adder
, one_based
, fit
,
586 best_multiplier
= multiplier
588 best_one_based
= one_based
589 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} '
590 'and decrypt starting: {4}'.format(
591 best_multiplier
, best_adder
, best_one_based
, best_fit
,
592 affine_decipher(sanitised_message
, best_multiplier
,
593 best_adder
, best_one_based
)[:50]))
594 return (best_multiplier
, best_adder
, best_one_based
), best_fit
596 def keyword_break(message
,
598 metric
=norms
.euclidean_distance
,
599 target_counts
=normalised_english_counts
,
600 message_frequency_scaling
=norms
.normalise
):
601 """Breaks a keyword substitution cipher using a dictionary and
604 >>> keyword_break(keyword_encipher('this is a test message for the ' \
605 'keyword decipherment', 'elephant', 1), \
606 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
607 (('elephant', 1), 0.41643991598441...)
610 best_wrap_alphabet
= True
611 best_fit
= float("inf")
612 for wrap_alphabet
in range(3):
613 for keyword
in wordlist
:
614 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
615 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
616 fit
= metric(target_counts
, counts
)
617 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
618 'gives fit of {2} and decrypt starting: {3}'.format(
619 keyword
, wrap_alphabet
, fit
,
620 sanitise(plaintext
)[:50]))
623 best_keyword
= keyword
624 best_wrap_alphabet
= wrap_alphabet
625 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
626 '{2} and decrypt starting: {3}'.format(best_keyword
,
627 best_wrap_alphabet
, best_fit
, sanitise(
628 keyword_decipher(message
, best_keyword
,
629 best_wrap_alphabet
))[:50]))
630 return (best_keyword
, best_wrap_alphabet
), best_fit
632 def keyword_break_mp(message
,
634 metric
=norms
.euclidean_distance
,
635 target_counts
=normalised_english_counts
,
636 message_frequency_scaling
=norms
.normalise
,
638 """Breaks a keyword substitution cipher using a dictionary and
641 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
642 'keyword decipherment', 'elephant', 1), \
643 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
644 (('elephant', 1), 0.41643991598441...)
647 helper_args
= [(message
, word
, wrap
, metric
, target_counts
,
648 message_frequency_scaling
)
649 for word
in wordlist
for wrap
in range(3)]
650 # Gotcha: the helper function here needs to be defined at the top level
651 # (limitation of Pool.starmap)
652 breaks
= pool
.starmap(keyword_break_worker
, helper_args
, chunksize
)
653 return min(breaks
, key
=lambda k
: k
[1])
655 def keyword_break_worker(message
, keyword
, wrap_alphabet
, metric
, target_counts
,
656 message_frequency_scaling
):
657 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
658 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
659 fit
= metric(target_counts
, counts
)
660 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
661 '{2} and decrypt starting: {3}'.format(keyword
,
662 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
663 return (keyword
, wrap_alphabet
), fit
665 def scytale_break(message
,
666 metric
=norms
.euclidean_distance
,
667 target_counts
=normalised_english_bigram_counts
,
668 message_frequency_scaling
=norms
.normalise
):
669 """Breaks a Scytale cipher
671 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoere' \
672 'dcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriioto' \
673 'aek') # doctest: +ELLIPSIS
674 (6, 0.83453041115025...)
677 best_fit
= float("inf")
678 ngram_length
= len(next(iter(target_counts
.keys())))
679 for key
in range(1, 20):
680 if len(message
) % key
== 0:
681 plaintext
= scytale_decipher(message
, key
)
682 counts
= message_frequency_scaling(frequencies(
683 ngrams(sanitise(plaintext
), ngram_length
)))
684 fit
= metric(target_counts
, counts
)
685 logger
.debug('Scytale break attempt using key {0} gives fit of '
686 '{1} and decrypt starting: {2}'.format(key
,
687 fit
, sanitise(plaintext
)[:50]))
691 logger
.info('Scytale break best fit with key {0} gives fit of {1} and '
692 'decrypt starting: {2}'.format(best_key
, best_fit
,
693 sanitise(scytale_decipher(message
, best_key
))[:50]))
694 return best_key
, best_fit
696 def column_transposition_break(message
,
698 metric
=norms
.euclidean_distance
,
699 target_counts
=normalised_english_bigram_counts
,
700 message_frequency_scaling
=norms
.normalise
):
701 """Breaks a column transposition cipher using a dictionary and
702 n-gram frequency analysis
704 >>> column_transposition_break(column_transposition_encipher(sanitise( \
705 "Turing's homosexuality resulted in a criminal prosecution in 1952, \
706 when homosexual acts were still illegal in the United Kingdom. "), \
708 wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
709 ('encipher', 0.898128626285...)
710 >>> column_transposition_break(column_transposition_encipher(sanitise( \
711 "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
712 "when homosexual acts were still illegal in the United Kingdom."), \
714 wordlist=['encipher', 'keyword', 'fourteen'], \
715 target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
716 ('encipher', 1.1958792913127...)
719 best_fit
= float("inf")
720 ngram_length
= len(next(iter(target_counts
.keys())))
721 for keyword
in wordlist
:
722 if len(message
) % len(deduplicate(keyword
)) == 0:
723 plaintext
= column_transposition_decipher(message
, keyword
)
724 counts
= message_frequency_scaling(frequencies(
725 ngrams(sanitise(plaintext
), ngram_length
)))
726 fit
= metric(target_counts
, counts
)
727 logger
.debug('Column transposition break attempt using key {0} '
728 'gives fit of {1} and decrypt starting: {2}'.format(
730 sanitise(plaintext
)[:50]))
733 best_keyword
= keyword
734 logger
.info('Column transposition break best fit with key {0} gives fit '
735 'of {1} and decrypt starting: {2}'.format(best_keyword
,
737 column_transposition_decipher(message
,
738 best_keyword
))[:50]))
739 return best_keyword
, best_fit
742 def column_transposition_break_mp(message
,
744 metric
=norms
.euclidean_distance
,
745 target_counts
=normalised_english_bigram_counts
,
746 message_frequency_scaling
=norms
.normalise
,
748 """Breaks a column transposition cipher using a dictionary and
749 n-gram frequency analysis
751 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
752 "Turing's homosexuality resulted in a criminal prosecution in 1952, \
753 when homosexual acts were still illegal in the United Kingdom. "), \
755 wordlist=['encipher', 'keyword', 'fourteen']) # doctest: +ELLIPSIS
756 ('encipher', 0.898128626285...)
757 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
758 "Turing's homosexuality resulted in a criminal prosecution in 1952, " \
759 "when homosexual acts were still illegal in the United Kingdom."), \
761 wordlist=['encipher', 'keyword', 'fourteen'], \
762 target_counts=normalised_english_trigram_counts) # doctest: +ELLIPSIS
763 ('encipher', 1.1958792913127...)
765 ngram_length
= len(next(iter(target_counts
.keys())))
767 helper_args
= [(message
, word
, metric
, target_counts
, ngram_length
,
768 message_frequency_scaling
)
769 for word
in wordlist
]
770 # Gotcha: the helper function here needs to be defined at the top level
771 # (limitation of Pool.starmap)
772 breaks
= pool
.starmap(column_transposition_break_worker
, helper_args
, chunksize
)
773 return min(breaks
, key
=lambda k
: k
[1])
775 def column_transposition_break_worker(message
, keyword
, metric
, target_counts
,
776 ngram_length
, message_frequency_scaling
):
777 plaintext
= column_transposition_decipher(message
, keyword
)
778 counts
= message_frequency_scaling(frequencies(
779 ngrams(sanitise(plaintext
), ngram_length
)))
780 fit
= metric(target_counts
, counts
)
781 logger
.debug('Column transposition break attempt using key {0} '
782 'gives fit of {1} and decrypt starting: {2}'.format(
784 sanitise(plaintext
)[:50]))
787 def vigenere_keyword_break(message
,
789 metric
=norms
.euclidean_distance
,
790 target_counts
=normalised_english_counts
,
791 message_frequency_scaling
=norms
.normalise
):
792 """Breaks a vigenere cipher using a dictionary and
795 >>> vigenere_keyword_break(keyword_encipher('this is a test message for the ' \
796 'keyword decipherment', 'elephant', 1), \
797 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
798 ('elephant', 0.7166585201707...)
801 best_fit
= float("inf")
802 for keyword
in wordlist
:
803 plaintext
= vigenere_decipher(message
, keyword
)
804 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
805 fit
= metric(target_counts
, counts
)
806 logger
.debug('Vigenere break attempt using key {0} '
807 'gives fit of {1} and decrypt starting: {2}'.format(
809 sanitise(plaintext
)[:50]))
812 best_keyword
= keyword
813 logger
.info('Vigenere break best fit with key {0} gives fit '
814 'of {1} and decrypt starting: {2}'.format(best_keyword
,
816 vigenere_decipher(message
, best_keyword
))[:50]))
817 return best_keyword
, best_fit
819 def vigenere_keyword_break_mp(message
,
821 metric
=norms
.euclidean_distance
,
822 target_counts
=normalised_english_counts
,
823 message_frequency_scaling
=norms
.normalise
,
825 """Breaks a vigenere cipher using a dictionary and
828 >>> vigenere_keyword_break_mp(keyword_encipher('this is a test message for the ' \
829 'keyword decipherment', 'elephant', 1), \
830 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
831 ('elephant', 0.7166585201707...)
834 helper_args
= [(message
, word
, metric
, target_counts
,
835 message_frequency_scaling
)
836 for word
in wordlist
]
837 # Gotcha: the helper function here needs to be defined at the top level
838 # (limitation of Pool.starmap)
839 breaks
= pool
.starmap(vigenere_keyword_break_worker
, helper_args
, chunksize
)
840 return min(breaks
, key
=lambda k
: k
[1])
842 def vigenere_keyword_break_worker(message
, keyword
, metric
, target_counts
,
843 message_frequency_scaling
):
844 plaintext
= vigenere_decipher(message
, keyword
)
845 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
846 fit
= metric(target_counts
, counts
)
847 logger
.debug('Vigenere keyword break attempt using key {0} gives fit of '
848 '{1} and decrypt starting: {2}'.format(keyword
,
849 fit
, sanitise(plaintext
)[:50]))
854 if __name__
== "__main__":