1 """A set of functions to break the ciphers give in ciphers.py.
10 from itertools
import starmap
11 from segment
import segment
12 from multiprocessing
import Pool
14 import matplotlib
.pyplot
as plt
16 # logging.basicConfig(filename="cipher.log", level=logging.INFO)
17 # logger = logging.getLogger(__name__)
19 logger
= logging
.getLogger('cipherbreak')
20 logger
.setLevel(logging
.WARNING
)
21 # logger.setLevel(logging.INFO)
22 # logger.setLevel(logging.DEBUG)
24 # create the logging file handler
25 fh
= logging
.FileHandler("cipher.log")
26 formatter
= logging
.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
27 fh
.setFormatter(formatter
)
29 # add handler to logger object
34 from language_models
import *
39 # c5a = open('2012/5a.ciphertext', 'r').read()
40 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
41 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
44 def index_of_coincidence(text
):
45 stext
= sanitise(text
)
46 counts
= collections
.Counter(stext
)
47 denom
= len(stext
) * (len(text
) - 1) / 26
49 sum(max(counts
[l
] * counts
[l
] - 1, 0) for l
in string
.ascii_lowercase
)
55 transpositions
= collections
.defaultdict(list)
57 transpositions
[transpositions_of(word
)] += [word
]
59 def frequencies(text
):
60 """Count the number of occurrences of each character in text
62 >>> sorted(frequencies('abcdefabc').items())
63 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
64 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
65 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
66 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
67 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
68 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
69 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
70 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
71 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
72 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
73 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
74 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
75 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
76 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
77 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
78 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
79 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
80 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
81 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
82 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
83 >>> frequencies('abcdefabcdef')['x']
86 return collections
.Counter(c
for c
in text
)
89 def caesar_break(message
, fitness
=Pletters
):
90 """Breaks a Caesar cipher using frequency analysis
92 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
93 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
94 (4, -130.849989015...)
95 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
96 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
97 (19, -128.82410410...)
98 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
99 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
100 (13, -126.25403935...)
102 sanitised_message
= sanitise(message
)
104 best_fit
= float('-inf')
105 for shift
in range(26):
106 plaintext
= caesar_decipher(sanitised_message
, shift
)
107 fit
= fitness(plaintext
)
108 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
109 'and decrypt starting: {2}'.format(shift
, fit
,
114 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
115 'decrypt starting: {2}'.format(best_shift
, best_fit
,
116 caesar_decipher(sanitised_message
, best_shift
)[:50]))
117 return best_shift
, best_fit
119 def affine_break(message
, fitness
=Pletters
):
120 """Breaks an affine cipher using frequency analysis
122 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
123 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
124 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
125 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
126 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
127 'kxd clm ckuxj.') # doctest: +ELLIPSIS
128 ((15, 22, True), -340.601181913...)
130 sanitised_message
= sanitise(message
)
133 best_one_based
= True
134 best_fit
= float("-inf")
135 for one_based
in [True, False]:
136 for multiplier
in [x
for x
in range(1, 26, 2) if x
!= 13]:
137 for adder
in range(26):
138 plaintext
= affine_decipher(sanitised_message
,
139 multiplier
, adder
, one_based
)
140 fit
= fitness(plaintext
)
141 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
142 'gives fit of {3} and decrypt starting: {4}'.
143 format(multiplier
, adder
, one_based
, fit
,
147 best_multiplier
= multiplier
149 best_one_based
= one_based
150 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
151 '{3} and decrypt starting: {4}'.format(
152 best_multiplier
, best_adder
, best_one_based
, best_fit
,
153 affine_decipher(sanitised_message
, best_multiplier
,
154 best_adder
, best_one_based
)[:50]))
155 return (best_multiplier
, best_adder
, best_one_based
), best_fit
157 def keyword_break(message
, wordlist
=keywords
, fitness
=Pletters
):
158 """Breaks a keyword substitution cipher using a dictionary and
161 >>> keyword_break(keyword_encipher('this is a test message for the ' \
162 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
163 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
164 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
167 best_wrap_alphabet
= True
168 best_fit
= float("-inf")
169 for wrap_alphabet
in KeywordWrapAlphabet
:
170 for keyword
in wordlist
:
171 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
172 fit
= fitness(plaintext
)
173 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
174 'gives fit of {2} and decrypt starting: {3}'.format(
175 keyword
, wrap_alphabet
, fit
,
176 sanitise(plaintext
)[:50]))
179 best_keyword
= keyword
180 best_wrap_alphabet
= wrap_alphabet
181 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
182 '{2} and decrypt starting: {3}'.format(best_keyword
,
183 best_wrap_alphabet
, best_fit
, sanitise(
184 keyword_decipher(message
, best_keyword
,
185 best_wrap_alphabet
))[:50]))
186 return (best_keyword
, best_wrap_alphabet
), best_fit
188 def keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
189 number_of_solutions
=1, chunksize
=500):
190 """Breaks a keyword substitution cipher using a dictionary and
193 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
194 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
195 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
196 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
197 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
198 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
199 wordlist=['cat', 'elephant', 'kangaroo'], \
200 number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
201 [(('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...),
202 (('elephant', <KeywordWrapAlphabet.from_largest: 3>), -52.834575011...)]
205 helper_args
= [(message
, word
, wrap
, fitness
)
207 for wrap
in KeywordWrapAlphabet
]
208 # Gotcha: the helper function here needs to be defined at the top level
209 # (limitation of Pool.starmap)
210 breaks
= pool
.starmap(keyword_break_worker
, helper_args
, chunksize
)
211 if number_of_solutions
== 1:
212 return max(breaks
, key
=lambda k
: k
[1])
214 return sorted(breaks
, key
=lambda k
: k
[1], reverse
=True)[:number_of_solutions
]
216 def keyword_break_worker(message
, keyword
, wrap_alphabet
, fitness
):
217 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
218 fit
= fitness(plaintext
)
219 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
220 '{2} and decrypt starting: {3}'.format(keyword
,
221 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
222 return (keyword
, wrap_alphabet
), fit
224 # def monoalphabetic_break_hillclimbing(message, max_iterations=10000000,
225 # alphabet=None, fitness=Pletters):
226 # ciphertext = unaccent(message).lower()
228 # alphabet = list(string.ascii_lowercase)
229 # random.shuffle(alphabet)
230 # alphabet = cat(alphabet)
231 # return monoalphabetic_break_hillclimbing_worker(ciphertext, alphabet,
232 # max_iterations, fitness)
234 # def monoalphabetic_break_hillclimbing_mp(message, workers=10,
235 # max_iterations = 10000000, alphabet=None, fitness=Pletters, chunksize=1):
237 # ciphertext = unaccent(message).lower()
238 # for i in range(workers):
240 # this_alphabet = alphabet
242 # this_alphabet = list(string.ascii_lowercase)
243 # random.shuffle(this_alphabet)
244 # this_alphabet = cat(this_alphabet)
245 # worker_args.append((ciphertext, this_alphabet, max_iterations, fitness))
246 # with Pool() as pool:
247 # breaks = pool.starmap(monoalphabetic_break_hillclimbing_worker,
248 # worker_args, chunksize)
249 # return max(breaks, key=lambda k: k[1])
251 # def monoalphabetic_break_hillclimbing_worker(message, alphabet,
252 # max_iterations, fitness):
253 # def swap(letters, i, j):
259 # return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +
261 # best_alphabet = alphabet
262 # best_fitness = float('-inf')
263 # for i in range(max_iterations):
264 # alphabet = swap(best_alphabet, random.randrange(26), random.randrange(26))
265 # cipher_translation = ''.maketrans(string.ascii_lowercase, alphabet)
266 # plaintext = message.translate(cipher_translation)
267 # if fitness(plaintext) > best_fitness:
268 # best_fitness = fitness(plaintext)
269 # best_alphabet = alphabet
270 # print(i, best_alphabet, best_fitness, plaintext[:50])
271 # return best_alphabet, best_fitness
274 def monoalphabetic_break_hillclimbing(message
,
275 max_iterations
=20000,
277 cipher_alphabet
=None,
278 fitness
=Pletters
, chunksize
=1):
279 return simulated_annealing_break(message
,
281 initial_temperature
=0,
282 max_iterations
=max_iterations
,
283 plain_alphabet
=plain_alphabet
,
284 cipher_alphabet
=cipher_alphabet
,
285 fitness
=fitness
, chunksize
=chunksize
)
288 def monoalphabetic_break_hillclimbing_mp(message
,
290 max_iterations
=20000,
292 cipher_alphabet
=None,
293 fitness
=Pletters
, chunksize
=1):
294 return simulated_annealing_break(message
,
296 initial_temperature
=0,
297 max_iterations
=max_iterations
,
298 plain_alphabet
=plain_alphabet
,
299 cipher_alphabet
=cipher_alphabet
,
300 fitness
=fitness
, chunksize
=chunksize
)
303 def simulated_annealing_break(message
, workers
=10,
304 initial_temperature
=200,
305 max_iterations
=20000,
307 cipher_alphabet
=None,
308 fitness
=Pletters
, chunksize
=1):
310 ciphertext
= sanitise(message
)
311 for i
in range(workers
):
312 if not plain_alphabet
:
313 plain_alphabet
= string
.ascii_lowercase
314 if not cipher_alphabet
:
315 cipher_alphabet
= list(string
.ascii_lowercase
)
316 random
.shuffle(cipher_alphabet
)
317 cipher_alphabet
= cat(cipher_alphabet
)
318 worker_args
.append((ciphertext
, plain_alphabet
, cipher_alphabet
,
319 initial_temperature
, max_iterations
, fitness
))
321 breaks
= pool
.starmap(simulated_annealing_break_worker
,
322 worker_args
, chunksize
)
323 return max(breaks
, key
=lambda k
: k
[1])
326 def simulated_annealing_break_worker(message
, plain_alphabet
, cipher_alphabet
,
327 t0
, max_iterations
, fitness
):
328 def swap(letters
, i
, j
):
334 return (letters
[:i
] + letters
[j
] + letters
[i
+1:j
] + letters
[i
] +
339 dt
= t0
/ (0.9 * max_iterations
)
341 current_alphabet
= cipher_alphabet
342 alphabet
= current_alphabet
343 cipher_translation
= ''.maketrans(current_alphabet
, plain_alphabet
)
344 plaintext
= message
.translate(cipher_translation
)
345 current_fitness
= fitness(plaintext
)
347 best_alphabet
= current_alphabet
348 best_fitness
= current_fitness
349 best_plaintext
= plaintext
351 # print('starting for', max_iterations)
352 for i
in range(max_iterations
):
353 swap_a
= random
.randrange(26)
354 swap_b
= (swap_a
+ int(random
.gauss(0, 4))) % 26
355 alphabet
= swap(current_alphabet
, swap_a
, swap_b
)
356 cipher_translation
= ''.maketrans(alphabet
, plain_alphabet
)
357 plaintext
= message
.translate(cipher_translation
)
358 new_fitness
= fitness(plaintext
)
360 sa_chance
= math
.exp((new_fitness
- current_fitness
) / temperature
)
361 except (OverflowError, ZeroDivisionError):
362 # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
364 if (new_fitness
> current_fitness
or random
.random() < sa_chance
):
365 # logger.debug('Simulated annealing: iteration {}, temperature {}, '
366 # 'current alphabet {}, current_fitness {}, '
367 # 'best_plaintext {}'.format(i, temperature, current_alphabet,
368 # current_fitness, best_plaintext[:50]))
370 # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
371 current_fitness
= new_fitness
372 current_alphabet
= alphabet
374 if current_fitness
> best_fitness
:
375 best_alphabet
= current_alphabet
376 best_fitness
= current_fitness
377 best_plaintext
= plaintext
379 logger
.debug('Simulated annealing: iteration {}, temperature {}, '
380 'current alphabet {}, current_fitness {}, '
381 'best_plaintext {}'.format(i
, temperature
, current_alphabet
,
382 current_fitness
, plaintext
[:50]))
383 temperature
= max(temperature
- dt
, 0.001)
385 return best_alphabet
, best_fitness
# current_alphabet, current_fitness
388 def vigenere_keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
390 """Breaks a vigenere cipher using a dictionary and frequency analysis.
392 >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
393 'message for the vigenere decipherment'), 'cat'), \
394 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
395 ('cat', -52.9472712...)
398 helper_args
= [(message
, word
, fitness
)
399 for word
in wordlist
]
400 # Gotcha: the helper function here needs to be defined at the top level
401 # (limitation of Pool.starmap)
402 breaks
= pool
.starmap(vigenere_keyword_break_worker
, helper_args
,
404 return max(breaks
, key
=lambda k
: k
[1])
405 vigenere_keyword_break
= vigenere_keyword_break_mp
407 def vigenere_keyword_break_worker(message
, keyword
, fitness
):
408 plaintext
= vigenere_decipher(message
, keyword
)
409 fit
= fitness(plaintext
)
410 logger
.debug('Vigenere keyword break attempt using key {0} gives fit of '
411 '{1} and decrypt starting: {2}'.format(keyword
,
412 fit
, sanitise(plaintext
)[:50]))
416 def vigenere_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
417 """Breaks a Vigenere cipher with frequency analysis
419 >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
420 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
421 "afternoon when he left his jacket hanging on the easel in the " \
422 "attic. I jump every time I hear a footstep on the stairs, " \
423 "certain that the theft has been discovered and that I will " \
424 "be caught. The SS officer visits less often now that he is " \
425 "sure"), 'florence')) # doctest: +ELLIPSIS
426 ('florence', -307.5473096...)
428 def worker(message
, key_length
, fitness
):
429 splits
= every_nth(sanitised_message
, key_length
)
430 key
= cat([unpos(caesar_break(s
)[0]) for s
in splits
])
431 plaintext
= vigenere_decipher(message
, key
)
432 fit
= fitness(plaintext
)
434 sanitised_message
= sanitise(message
)
435 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
436 for i
in range(1, max_key_length
+1)])
437 return max(results
, key
=lambda k
: k
[1])
440 def beaufort_sub_break(message
, fitness
=Pletters
):
441 """Breaks one chunk of a Beaufort cipher with frequency analysis
443 >>> beaufort_sub_break('samwpplggnnmmyaazgympjapopnwiywwomwspgpjmefwmawx' \
444 'jafjhxwwwdigxshnlywiamhyshtasxptwueahhytjwsn') # doctest: +ELLIPSIS
446 >>> beaufort_sub_break('eyprzjjzznxymrygryjqmqhznjrjjapenejznawngnnezgza' \
447 'dgndknaogpdjneadadazlhkhxkryevrronrmdjnndjlo') # doctest: +ELLIPSIS
451 best_fit
= float('-inf')
452 for key
in range(26):
453 plaintext
= [unpos(key
- pos(l
)) for l
in message
]
454 fit
= fitness(plaintext
)
455 logger
.debug('Beaufort sub break attempt using key {0} gives fit of {1} '
456 'and decrypt starting: {2}'.format(key
, fit
,
461 logger
.info('Beaufort sub break best fit: key {0} gives fit of {1} and '
462 'decrypt starting: {2}'.format(best_key
, best_fit
,
463 cat([unpos(best_key
- pos(l
)) for l
in message
[:50]])))
464 return best_key
, best_fit
467 def beaufort_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
468 """Breaks a Beaufort cipher with frequency analysis
470 >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
471 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
472 "afternoon when he left his jacket hanging on the easel in the " \
473 "attic. I jump every time I hear a footstep on the stairs, " \
474 "certain that the theft has been discovered and that I will " \
475 "be caught. The SS officer visits less often now " \
476 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
477 ('florence', -307.5473096791...)
479 def worker(message
, key_length
, fitness
):
480 splits
= every_nth(message
, key_length
)
481 key
= cat([unpos(beaufort_sub_break(s
)[0]) for s
in splits
])
482 plaintext
= beaufort_decipher(message
, key
)
483 fit
= fitness(plaintext
)
485 sanitised_message
= sanitise(message
)
486 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
487 for i
in range(1, max_key_length
+1)])
488 return max(results
, key
=lambda k
: k
[1])
491 def beaufort_variant_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
492 """Breaks a Beaufort cipher with frequency analysis
494 >>> beaufort_variant_frequency_break(beaufort_variant_encipher(sanitise("It is time to " \
495 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
496 "afternoon when he left his jacket hanging on the easel in the " \
497 "attic. I jump every time I hear a footstep on the stairs, " \
498 "certain that the theft has been discovered and that I will " \
499 "be caught. The SS officer visits less often now " \
500 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
501 ('florence', -307.5473096791...)
503 def worker(message
, key_length
, fitness
):
504 splits
= every_nth(sanitised_message
, key_length
)
505 key
= cat([unpos(-caesar_break(s
)[0]) for s
in splits
])
506 plaintext
= beaufort_variant_decipher(message
, key
)
507 fit
= fitness(plaintext
)
509 sanitised_message
= sanitise(message
)
510 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
511 for i
in range(1, max_key_length
+1)])
512 return max(results
, key
=lambda k
: k
[1])
514 def polybius_break_mp(message
, column_labels
, row_labels
,
515 letters_to_merge
=None,
516 wordlist
=keywords
, fitness
=Pletters
,
517 number_of_solutions
=1, chunksize
=500):
518 """Breaks a Polybius substitution cipher using a dictionary and
521 >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
522 'polybius decipherment', 'elephant', 'abcde', 'abcde'), \
524 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
525 (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', False), \
527 >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
528 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=True), \
530 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
531 (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', True), \
533 >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
534 'polybius decipherment', 'elephant', 'abcde', 'abcde', column_first=False), \
536 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
537 (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'abcde', False), \
539 >>> polybius_break_mp(polybius_encipher('this is a test message for the ' \
540 'polybius decipherment', 'elephant', 'abcde', 'pqrst', column_first=True), \
542 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
543 (('elephant', <KeywordWrapAlphabet.from_a: 1>, 'abcde', 'pqrst', True), \
546 if letters_to_merge
is None:
547 letters_to_merge
= {'j': 'i'}
549 helper_args
= [(message
, word
, wrap
,
550 column_labels
, row_labels
, column_first
,
554 for wrap
in KeywordWrapAlphabet
555 for column_first
in [False, True]]
556 # Gotcha: the helper function here needs to be defined at the top level
557 # (limitation of Pool.starmap)
558 breaks
= pool
.starmap(polybius_break_worker
, helper_args
, chunksize
)
559 if number_of_solutions
== 1:
560 return max(breaks
, key
=lambda k
: k
[1])
562 return sorted(breaks
, key
=lambda k
: k
[1], reverse
=True)[:number_of_solutions
]
564 def polybius_break_worker(message
, keyword
, wrap_alphabet
,
565 column_order
, row_order
, column_first
,
568 plaintext
= polybius_decipher(message
, keyword
,
569 column_order
, row_order
,
570 column_first
=column_first
,
571 letters_to_merge
=letters_to_merge
,
572 wrap_alphabet
=wrap_alphabet
)
574 fit
= fitness(plaintext
)
577 logger
.debug('Polybius break attempt using key {0} (wrap={1}, merging {2}), '
578 'columns as {3}, rows as {4} (column_first={5}) '
579 'gives fit of {6} and decrypt starting: '
580 '{7}'.format(keyword
, wrap_alphabet
, letters_to_merge
,
581 column_order
, row_order
, column_first
,
582 fit
, sanitise(plaintext
)[:50]))
583 return (keyword
, wrap_alphabet
, column_order
, row_order
, column_first
), fit
586 def column_transposition_break_mp(message
, translist
=transpositions
,
587 fitness
=Pbigrams
, chunksize
=500):
588 """Breaks a column transposition cipher using a dictionary and
589 n-gram frequency analysis
591 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
592 "It is a truth universally acknowledged, that a single man in \
593 possession of a good fortune, must be in want of a wife. However \
594 little known the feelings or views of such a man may be on his \
595 first entering a neighbourhood, this truth is so well fixed in \
596 the minds of the surrounding families, that he is considered the \
597 rightful property of some one or other of their daughters."), \
599 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
600 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
601 (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
602 (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...)
603 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
604 "It is a truth universally acknowledged, that a single man in \
605 possession of a good fortune, must be in want of a wife. However \
606 little known the feelings or views of such a man may be on his \
607 first entering a neighbourhood, this truth is so well fixed in \
608 the minds of the surrounding families, that he is considered the \
609 rightful property of some one or other of their daughters."), \
611 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
612 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
613 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
614 fitness=Ptrigrams) # doctest: +ELLIPSIS
615 (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...)
618 helper_args
= [(message
, trans
, fillcolumnwise
, emptycolumnwise
,
620 for trans
in translist
621 for fillcolumnwise
in [True, False]
622 for emptycolumnwise
in [True, False]]
623 # Gotcha: the helper function here needs to be defined at the top level
624 # (limitation of Pool.starmap)
625 breaks
= pool
.starmap(column_transposition_break_worker
,
626 helper_args
, chunksize
)
627 return max(breaks
, key
=lambda k
: k
[1])
628 column_transposition_break
= column_transposition_break_mp
630 def column_transposition_break_worker(message
, transposition
,
631 fillcolumnwise
, emptycolumnwise
, fitness
):
632 plaintext
= column_transposition_decipher(message
, transposition
,
633 fillcolumnwise
=fillcolumnwise
, emptycolumnwise
=emptycolumnwise
)
634 fit
= fitness(sanitise(plaintext
))
635 logger
.debug('Column transposition break attempt using key {0} '
636 'gives fit of {1} and decrypt starting: {2}'.format(
638 sanitise(plaintext
)[:50]))
639 return (transposition
, fillcolumnwise
, emptycolumnwise
), fit
642 def scytale_break_mp(message
, max_key_length
=20,
643 fitness
=Pbigrams
, chunksize
=500):
644 """Breaks a scytale cipher using a range of lengths and
645 n-gram frequency analysis
647 >>> scytale_break_mp(scytale_encipher(sanitise( \
648 "It is a truth universally acknowledged, that a single man in \
649 possession of a good fortune, must be in want of a wife. However \
650 little known the feelings or views of such a man may be on his \
651 first entering a neighbourhood, this truth is so well fixed in \
652 the minds of the surrounding families, that he is considered the \
653 rightful property of some one or other of their daughters."), \
654 5)) # doctest: +ELLIPSIS
656 >>> scytale_break_mp(scytale_encipher(sanitise( \
657 "It is a truth universally acknowledged, that a single man in \
658 possession of a good fortune, must be in want of a wife. However \
659 little known the feelings or views of such a man may be on his \
660 first entering a neighbourhood, this truth is so well fixed in \
661 the minds of the surrounding families, that he is considered the \
662 rightful property of some one or other of their daughters."), \
664 fitness=Ptrigrams) # doctest: +ELLIPSIS
668 helper_args
= [(message
, trans
, False, True, fitness
)
670 [[col
for col
in range(math
.ceil(len(message
)/rows
))]
671 for rows
in range(1,max_key_length
+1)]]
672 # Gotcha: the helper function here needs to be defined at the top level
673 # (limitation of Pool.starmap)
674 breaks
= pool
.starmap(column_transposition_break_worker
,
675 helper_args
, chunksize
)
676 best
= max(breaks
, key
=lambda k
: k
[1])
677 return math
.trunc(len(message
) / len(best
[0][0])), best
[1]
678 scytale_break
= scytale_break_mp
681 def railfence_break(message
, max_key_length
=20,
682 fitness
=Pletters
, chunksize
=500):
683 """Breaks a railfence cipher using a matrix of given rank and letter frequencies
688 sanitised_message
= sanitise(message
)
689 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
690 for i
in range(2, max_key_length
+1)])
691 return max(results
, key
=lambda k
: k
[1])
694 def railfence_break(message
, max_key_length
=20,
695 fitness
=Pbigrams
, chunksize
=500):
696 """Breaks a railfence cipher using a range of lengths and
697 n-gram frequency analysis
699 >>> railfence_break(railfence_encipher(sanitise( \
700 "It is a truth universally acknowledged, that a single man in \
701 possession of a good fortune, must be in want of a wife. However \
702 little known the feelings or views of such a man may be on his \
703 first entering a neighbourhood, this truth is so well fixed in \
704 the minds of the surrounding families, that he is considered the \
705 rightful property of some one or other of their daughters."), \
706 7)) # doctest: +ELLIPSIS
707 (7, -709.46467226...)
708 >>> railfence_break(railfence_encipher(sanitise( \
709 "It is a truth universally acknowledged, that a single man in \
710 possession of a good fortune, must be in want of a wife. However \
711 little known the feelings or views of such a man may be on his \
712 first entering a neighbourhood, this truth is so well fixed in \
713 the minds of the surrounding families, that he is considered the \
714 rightful property of some one or other of their daughters."), \
716 fitness=Ptrigrams) # doctest: +ELLIPSIS
719 def worker(message
, height
, fitness
):
720 plaintext
= railfence_decipher(message
, height
)
721 fit
= fitness(plaintext
)
724 sanitised_message
= sanitise(message
)
725 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
726 for i
in range(2, max_key_length
+1)])
727 return max(results
, key
=lambda k
: k
[1])
729 def amsco_break(message
, translist
=transpositions
, patterns
= [(1, 2), (2, 1)],
730 fillstyles
= [AmscoFillStyle
.continuous
,
731 AmscoFillStyle
.same_each_row
,
732 AmscoFillStyle
.reverse_each_row
],
735 """Breaks an AMSCO transposition cipher using a dictionary and
736 n-gram frequency analysis
738 >>> amsco_break(amsco_transposition_encipher(sanitise( \
739 "It is a truth universally acknowledged, that a single man in \
740 possession of a good fortune, must be in want of a wife. However \
741 little known the feelings or views of such a man may be on his \
742 first entering a neighbourhood, this truth is so well fixed in \
743 the minds of the surrounding families, that he is considered the \
744 rightful property of some one or other of their daughters."), \
746 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
747 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
748 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
749 patterns=[(1, 2)]) # doctest: +ELLIPSIS
750 (((2, 0, 5, 3, 1, 4, 6), (1, 2), <AmscoFillStyle.continuous: 1>), -709.4646722...)
751 >>> amsco_break(amsco_transposition_encipher(sanitise( \
752 "It is a truth universally acknowledged, that a single man in \
753 possession of a good fortune, must be in want of a wife. However \
754 little known the feelings or views of such a man may be on his \
755 first entering a neighbourhood, this truth is so well fixed in \
756 the minds of the surrounding families, that he is considered the \
757 rightful property of some one or other of their daughters."), \
758 'encipher', fillpattern=(2, 1)), \
759 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
760 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
761 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
762 patterns=[(1, 2), (2, 1)], fitness=Ptrigrams) # doctest: +ELLIPSIS
763 (((2, 0, 5, 3, 1, 4, 6), (2, 1), <AmscoFillStyle.continuous: 1>), -997.0129085...)
766 helper_args
= [(message
, trans
, pattern
, fillstyle
, fitness
)
767 for trans
in translist
768 for pattern
in patterns
769 for fillstyle
in fillstyles
]
770 # Gotcha: the helper function here needs to be defined at the top level
771 # (limitation of Pool.starmap)
772 breaks
= pool
.starmap(amsco_break_worker
, helper_args
, chunksize
)
773 return max(breaks
, key
=lambda k
: k
[1])
775 def amsco_break_worker(message
, transposition
,
776 pattern
, fillstyle
, fitness
):
777 plaintext
= amsco_transposition_decipher(message
, transposition
,
778 fillpattern
=pattern
, fillstyle
=fillstyle
)
779 fit
= fitness(sanitise(plaintext
))
780 logger
.debug('AMSCO transposition break attempt using key {0} and pattern'
781 '{1} ({2}) gives fit of {3} and decrypt starting: '
783 transposition
, pattern
, fillstyle
, fit
,
784 sanitise(plaintext
)[:50]))
785 return (transposition
, pattern
, fillstyle
), fit
788 def hill_break(message
, matrix_size
=2, fitness
=Pletters
,
789 number_of_solutions
=1, chunksize
=500):
791 all_matrices
= [np
.matrix(list(m
))
792 for m
in itertools
.product([list(r
)
793 for r
in itertools
.product(range(26), repeat
=matrix_size
)],
795 valid_matrices
= [m
for m
, d
in
796 zip(all_matrices
, (int(round(linalg
.det(m
))) for m
in all_matrices
))
801 helper_args
= [(message
, matrix
, fitness
)
802 for matrix
in valid_matrices
]
803 # Gotcha: the helper function here needs to be defined at the top level
804 # (limitation of Pool.starmap)
805 breaks
= pool
.starmap(hill_break_worker
, helper_args
, chunksize
)
806 if number_of_solutions
== 1:
807 return max(breaks
, key
=lambda k
: k
[1])
809 return sorted(breaks
, key
=lambda k
: k
[1], reverse
=True)[:number_of_solutions
]
811 def hill_break_worker(message
, matrix
, fitness
):
812 plaintext
= hill_decipher(matrix
, message
)
813 fit
= fitness(plaintext
)
814 logger
.debug('Hill cipher break attempt using key {0} gives fit of '
815 '{1} and decrypt starting: {2}'.format(matrix
,
816 fit
, sanitise(plaintext
)[:50]))
819 def bifid_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
, max_period
=10,
820 number_of_solutions
=1, chunksize
=500):
821 """Breaks a keyword substitution cipher using a dictionary and
824 >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
825 'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
826 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
827 (('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...)
828 >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
829 'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
830 wordlist=['cat', 'elephant', 'kangaroo'], \
831 number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
832 [(('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...),
833 (('elephant', <KeywordWrapAlphabet.from_largest: 3>, 0), -52.834575011...)]
836 helper_args
= [(message
, word
, wrap
, period
, fitness
)
838 for wrap
in KeywordWrapAlphabet
839 for period
in range(max_period
+1)]
840 # Gotcha: the helper function here needs to be defined at the top level
841 # (limitation of Pool.starmap)
842 breaks
= pool
.starmap(bifid_break_worker
, helper_args
, chunksize
)
843 if number_of_solutions
== 1:
844 return max(breaks
, key
=lambda k
: k
[1])
846 return sorted(breaks
, key
=lambda k
: k
[1], reverse
=True)[:number_of_solutions
]
848 def bifid_break_worker(message
, keyword
, wrap_alphabet
, period
, fitness
):
849 plaintext
= bifid_decipher(message
, keyword
, wrap_alphabet
, period
=period
)
850 fit
= fitness(plaintext
)
851 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
852 '{2} and decrypt starting: {3}'.format(keyword
,
853 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
854 return (keyword
, wrap_alphabet
, period
), fit
857 def autokey_sa_break( message
861 , initial_temperature
=200
862 , max_iterations
=20000
867 """Break an autokey cipher by simulated annealing
870 ciphertext
= sanitise(message
)
871 for keylength
in range(min_keylength
, max_keylength
+1):
872 for i
in range(workers
):
873 key
= cat(random
.choice(string
.ascii_lowercase
) for _
in range(keylength
))
874 worker_args
.append((ciphertext
, key
,
875 initial_temperature
, max_iterations
, fitness
))
878 breaks
= pool
.starmap(autokey_sa_break_worker
,
879 worker_args
, chunksize
)
880 if result_count
<= 1:
881 return max(breaks
, key
=lambda k
: k
[1])
883 return sorted(set(breaks
), key
=lambda k
: k
[1], reverse
=True)[:result_count
]
886 def autokey_sa_break_worker(message
, key
,
887 t0
, max_iterations
, fitness
):
891 dt
= t0
/ (0.9 * max_iterations
)
893 plaintext
= autokey_decipher(message
, key
)
894 current_fitness
= fitness(plaintext
)
897 best_key
= current_key
898 best_fitness
= current_fitness
899 best_plaintext
= plaintext
901 # print('starting for', max_iterations)
902 for i
in range(max_iterations
):
903 swap_pos
= random
.randrange(len(current_key
))
904 swap_char
= random
.choice(string
.ascii_lowercase
)
906 new_key
= current_key
[:swap_pos
] + swap_char
+ current_key
[swap_pos
+1:]
908 plaintext
= autokey_decipher(message
, new_key
)
909 new_fitness
= fitness(plaintext
)
911 sa_chance
= math
.exp((new_fitness
- current_fitness
) / temperature
)
912 except (OverflowError, ZeroDivisionError):
913 # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
915 if (new_fitness
> current_fitness
or random
.random() < sa_chance
):
916 # logger.debug('Simulated annealing: iteration {}, temperature {}, '
917 # 'current alphabet {}, current_fitness {}, '
918 # 'best_plaintext {}'.format(i, temperature, current_alphabet,
919 # current_fitness, best_plaintext[:50]))
921 # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
922 # print(new_fitness, new_key, plaintext[:100])
923 current_fitness
= new_fitness
924 current_key
= new_key
926 if current_fitness
> best_fitness
:
927 best_key
= current_key
928 best_fitness
= current_fitness
929 best_plaintext
= plaintext
931 logger
.debug('Simulated annealing: iteration {}, temperature {}, '
932 'current key {}, current_fitness {}, '
933 'best_plaintext {}'.format(i
, temperature
, current_key
,
934 current_fitness
, plaintext
[:50]))
935 temperature
= max(temperature
- dt
, 0.001)
937 # print(best_key, best_fitness, best_plaintext[:70])
938 return best_key
, best_fitness
# current_alphabet, current_fitness
941 def pocket_enigma_break_by_crib(message
, wheel_spec
, crib
, crib_position
):
942 """Break a pocket enigma using a crib (some plaintext that's expected to
943 be in a certain position). Returns a list of possible starting wheel
944 positions that could produce the crib.
946 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'h', 0)
948 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'he', 0)
950 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'll', 2)
952 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 2)
954 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 3)
956 >>> pocket_enigma_break_by_crib('aaaaa', 1, 'l', 3)
959 pe
= PocketEnigma(wheel
=wheel_spec
)
960 possible_positions
= []
961 for p
in string
.ascii_lowercase
:
963 plaintext
= pe
.decipher(message
)
964 if plaintext
[crib_position
:crib_position
+len(crib
)] == crib
:
965 possible_positions
+= [p
]
966 return possible_positions
969 def plot_frequency_histogram(freqs
, sort_key
=None):
970 x
= range(len(freqs
))
971 y
= [freqs
[l
] for l
in sorted(freqs
, key
=sort_key
)]
973 ax
= f
.add_axes([0.1, 0.1, 0.9, 0.9])
974 ax
.bar(x
, y
, align
='center')
976 ax
.set_xticklabels(sorted(freqs
, key
=sort_key
))
980 if __name__
== "__main__":