1 """A set of functions to break the ciphers give in ciphers.py.
10 from itertools
import starmap
11 from segment
import segment
12 from multiprocessing
import Pool
14 import matplotlib
.pyplot
as plt
16 logger
= logging
.getLogger(__name__
)
17 logger
.addHandler(logging
.FileHandler('cipher.log'))
18 logger
.setLevel(logging
.WARNING
)
19 #logger.setLevel(logging.INFO)
20 #logger.setLevel(logging.DEBUG)
23 from language_models
import *
28 # c5a = open('2012/5a.ciphertext', 'r').read()
29 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
30 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
32 transpositions
= collections
.defaultdict(list)
34 transpositions
[transpositions_of(word
)] += [word
]
36 def frequencies(text
):
37 """Count the number of occurrences of each character in text
39 >>> sorted(frequencies('abcdefabc').items())
40 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
41 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
42 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
43 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
44 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
45 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
46 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
47 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
48 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
49 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
50 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
51 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
52 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
53 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
54 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
55 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
56 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
57 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
58 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
59 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
60 >>> frequencies('abcdefabcdef')['x']
63 return collections
.Counter(c
for c
in text
)
66 def caesar_break(message
, fitness
=Pletters
):
67 """Breaks a Caesar cipher using frequency analysis
69 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
70 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
71 (4, -130.849989015...)
72 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
73 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
74 (19, -128.82410410...)
75 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
76 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
77 (13, -126.25403935...)
79 sanitised_message
= sanitise(message
)
81 best_fit
= float('-inf')
82 for shift
in range(26):
83 plaintext
= caesar_decipher(sanitised_message
, shift
)
84 fit
= fitness(plaintext
)
85 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
86 'and decrypt starting: {2}'.format(shift
, fit
,
91 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
92 'decrypt starting: {2}'.format(best_shift
, best_fit
,
93 caesar_decipher(sanitised_message
, best_shift
)[:50]))
94 return best_shift
, best_fit
96 def affine_break(message
, fitness
=Pletters
):
97 """Breaks an affine cipher using frequency analysis
99 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
100 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
101 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
102 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
103 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
104 'kxd clm ckuxj.') # doctest: +ELLIPSIS
105 ((15, 22, True), -340.601181913...)
107 sanitised_message
= sanitise(message
)
110 best_one_based
= True
111 best_fit
= float("-inf")
112 for one_based
in [True, False]:
113 for multiplier
in [x
for x
in range(1, 26, 2) if x
!= 13]:
114 for adder
in range(26):
115 plaintext
= affine_decipher(sanitised_message
,
116 multiplier
, adder
, one_based
)
117 fit
= fitness(plaintext
)
118 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
119 'gives fit of {3} and decrypt starting: {4}'.
120 format(multiplier
, adder
, one_based
, fit
,
124 best_multiplier
= multiplier
126 best_one_based
= one_based
127 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
128 '{3} and decrypt starting: {4}'.format(
129 best_multiplier
, best_adder
, best_one_based
, best_fit
,
130 affine_decipher(sanitised_message
, best_multiplier
,
131 best_adder
, best_one_based
)[:50]))
132 return (best_multiplier
, best_adder
, best_one_based
), best_fit
134 def keyword_break(message
, wordlist
=keywords
, fitness
=Pletters
):
135 """Breaks a keyword substitution cipher using a dictionary and
138 >>> keyword_break(keyword_encipher('this is a test message for the ' \
139 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
140 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
141 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
144 best_wrap_alphabet
= True
145 best_fit
= float("-inf")
146 for wrap_alphabet
in KeywordWrapAlphabet
:
147 for keyword
in wordlist
:
148 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
149 fit
= fitness(plaintext
)
150 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
151 'gives fit of {2} and decrypt starting: {3}'.format(
152 keyword
, wrap_alphabet
, fit
,
153 sanitise(plaintext
)[:50]))
156 best_keyword
= keyword
157 best_wrap_alphabet
= wrap_alphabet
158 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
159 '{2} and decrypt starting: {3}'.format(best_keyword
,
160 best_wrap_alphabet
, best_fit
, sanitise(
161 keyword_decipher(message
, best_keyword
,
162 best_wrap_alphabet
))[:50]))
163 return (best_keyword
, best_wrap_alphabet
), best_fit
165 def keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
166 number_of_solutions
=1, chunksize
=500):
167 """Breaks a keyword substitution cipher using a dictionary and
170 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
171 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
172 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
173 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
174 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
175 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
176 wordlist=['cat', 'elephant', 'kangaroo'], \
177 number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
178 [(('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...),
179 (('elephant', <KeywordWrapAlphabet.from_largest: 3>), -52.834575011...)]
182 helper_args
= [(message
, word
, wrap
, fitness
)
184 for wrap
in KeywordWrapAlphabet
]
185 # Gotcha: the helper function here needs to be defined at the top level
186 # (limitation of Pool.starmap)
187 breaks
= pool
.starmap(keyword_break_worker
, helper_args
, chunksize
)
188 if number_of_solutions
== 1:
189 return max(breaks
, key
=lambda k
: k
[1])
191 return sorted(breaks
, key
=lambda k
: k
[1], reverse
=True)[:number_of_solutions
]
193 def keyword_break_worker(message
, keyword
, wrap_alphabet
, fitness
):
194 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
195 fit
= fitness(plaintext
)
196 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
197 '{2} and decrypt starting: {3}'.format(keyword
,
198 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
199 return (keyword
, wrap_alphabet
), fit
201 def monoalphabetic_break_hillclimbing(message
, max_iterations
=10000000,
203 ciphertext
= unaccent(message
).lower()
204 alphabet
= list(string
.ascii_lowercase
)
205 random
.shuffle(alphabet
)
206 alphabet
= ''.join(alphabet
)
207 return monoalphabetic_break_hillclimbing_worker(ciphertext
, alphabet
,
208 max_iterations
, fitness
)
210 def monoalphabetic_break_hillclimbing_mp(message
, workers
=10,
211 max_iterations
= 10000000, fitness
=Pletters
, chunksize
=1):
213 ciphertext
= unaccent(message
).lower()
214 for i
in range(workers
):
215 alphabet
= list(string
.ascii_lowercase
)
216 random
.shuffle(alphabet
)
217 alphabet
= ''.join(alphabet
)
218 worker_args
.append((ciphertext
, alphabet
, max_iterations
, fitness
))
220 breaks
= pool
.starmap(monoalphabetic_break_hillclimbing_worker
,
221 worker_args
, chunksize
)
222 return max(breaks
, key
=lambda k
: k
[1])
224 def monoalphabetic_break_hillclimbing_worker(message
, alphabet
,
225 max_iterations
, fitness
):
226 def swap(letters
, i
, j
):
232 return (letters
[:i
] + letters
[j
] + letters
[i
+1:j
] + letters
[i
] +
234 best_alphabet
= alphabet
235 best_fitness
= float('-inf')
236 for i
in range(max_iterations
):
237 alphabet
= swap(alphabet
, random
.randrange(26), random
.randrange(26))
238 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, alphabet
)
239 plaintext
= message
.translate(cipher_translation
)
240 if fitness(plaintext
) > best_fitness
:
241 best_fitness
= fitness(plaintext
)
242 best_alphabet
= alphabet
243 print(i
, best_alphabet
, best_fitness
, plaintext
)
244 return best_alphabet
, best_fitness
247 def vigenere_keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
249 """Breaks a vigenere cipher using a dictionary and frequency analysis.
251 >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
252 'message for the vigenere decipherment'), 'cat'), \
253 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
254 ('cat', -52.947271216...)
257 helper_args
= [(message
, word
, fitness
)
258 for word
in wordlist
]
259 # Gotcha: the helper function here needs to be defined at the top level
260 # (limitation of Pool.starmap)
261 breaks
= pool
.starmap(vigenere_keyword_break_worker
, helper_args
,
263 return max(breaks
, key
=lambda k
: k
[1])
264 vigenere_keyword_break
= vigenere_keyword_break_mp
266 def vigenere_keyword_break_worker(message
, keyword
, fitness
):
267 plaintext
= vigenere_decipher(message
, keyword
)
268 fit
= fitness(plaintext
)
269 logger
.debug('Vigenere keyword break attempt using key {0} gives fit of '
270 '{1} and decrypt starting: {2}'.format(keyword
,
271 fit
, sanitise(plaintext
)[:50]))
275 def vigenere_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
276 """Breaks a Vigenere cipher with frequency analysis
278 >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
279 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
280 "afternoon when he left his jacket hanging on the easel in the " \
281 "attic. I jump every time I hear a footstep on the stairs, " \
282 "certain that the theft has been discovered and that I will " \
283 "be caught. The SS officer visits less often now that he is " \
284 "sure"), 'florence')) # doctest: +ELLIPSIS
285 ('florence', -307.5473096791...)
287 def worker(message
, key_length
, fitness
):
288 splits
= every_nth(sanitised_message
, key_length
)
289 key
= ''.join([chr(caesar_break(s
)[0] + ord('a')) for s
in splits
])
290 plaintext
= vigenere_decipher(message
, key
)
291 fit
= fitness(plaintext
)
293 sanitised_message
= sanitise(message
)
294 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
295 for i
in range(1, max_key_length
+1)])
296 return max(results
, key
=lambda k
: k
[1])
299 def beaufort_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
300 """Breaks a Beaufort cipher with frequency analysis
302 >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
303 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
304 "afternoon when he left his jacket hanging on the easel in the " \
305 "attic. I jump every time I hear a footstep on the stairs, " \
306 "certain that the theft has been discovered and that I will " \
307 "be caught. The SS officer visits less often now " \
308 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
309 ('florence', -307.5473096791...)
311 def worker(message
, key_length
, fitness
):
312 splits
= every_nth(sanitised_message
, key_length
)
313 key
= ''.join([chr(-caesar_break(s
)[0] % 26 + ord('a'))
315 plaintext
= beaufort_decipher(message
, key
)
316 fit
= fitness(plaintext
)
318 sanitised_message
= sanitise(message
)
319 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
320 for i
in range(1, max_key_length
+1)])
321 return max(results
, key
=lambda k
: k
[1])
324 def column_transposition_break_mp(message
, translist
=transpositions
,
325 fitness
=Pbigrams
, chunksize
=500):
326 """Breaks a column transposition cipher using a dictionary and
327 n-gram frequency analysis
329 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
330 "It is a truth universally acknowledged, that a single man in \
331 possession of a good fortune, must be in want of a wife. However \
332 little known the feelings or views of such a man may be on his \
333 first entering a neighbourhood, this truth is so well fixed in \
334 the minds of the surrounding families, that he is considered the \
335 rightful property of some one or other of their daughters."), \
337 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
338 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
339 (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
340 (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...)
341 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
342 "It is a truth universally acknowledged, that a single man in \
343 possession of a good fortune, must be in want of a wife. However \
344 little known the feelings or views of such a man may be on his \
345 first entering a neighbourhood, this truth is so well fixed in \
346 the minds of the surrounding families, that he is considered the \
347 rightful property of some one or other of their daughters."), \
349 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
350 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
351 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
352 fitness=Ptrigrams) # doctest: +ELLIPSIS
353 (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...)
356 helper_args
= [(message
, trans
, fillcolumnwise
, emptycolumnwise
,
358 for trans
in translist
.keys()
359 for fillcolumnwise
in [True, False]
360 for emptycolumnwise
in [True, False]]
361 # Gotcha: the helper function here needs to be defined at the top level
362 # (limitation of Pool.starmap)
363 breaks
= pool
.starmap(column_transposition_break_worker
,
364 helper_args
, chunksize
)
365 return max(breaks
, key
=lambda k
: k
[1])
366 column_transposition_break
= column_transposition_break_mp
368 def column_transposition_break_worker(message
, transposition
,
369 fillcolumnwise
, emptycolumnwise
, fitness
):
370 plaintext
= column_transposition_decipher(message
, transposition
,
371 fillcolumnwise
=fillcolumnwise
, emptycolumnwise
=emptycolumnwise
)
372 fit
= fitness(sanitise(plaintext
))
373 logger
.debug('Column transposition break attempt using key {0} '
374 'gives fit of {1} and decrypt starting: {2}'.format(
376 sanitise(plaintext
)[:50]))
377 return (transposition
, fillcolumnwise
, emptycolumnwise
), fit
380 def scytale_break_mp(message
, max_key_length
=20,
381 fitness
=Pbigrams
, chunksize
=500):
382 """Breaks a scytale cipher using a range of lengths and
383 n-gram frequency analysis
385 >>> scytale_break_mp(scytale_encipher(sanitise( \
386 "It is a truth universally acknowledged, that a single man in \
387 possession of a good fortune, must be in want of a wife. However \
388 little known the feelings or views of such a man may be on his \
389 first entering a neighbourhood, this truth is so well fixed in \
390 the minds of the surrounding families, that he is considered the \
391 rightful property of some one or other of their daughters."), \
392 5)) # doctest: +ELLIPSIS
394 >>> scytale_break_mp(scytale_encipher(sanitise( \
395 "It is a truth universally acknowledged, that a single man in \
396 possession of a good fortune, must be in want of a wife. However \
397 little known the feelings or views of such a man may be on his \
398 first entering a neighbourhood, this truth is so well fixed in \
399 the minds of the surrounding families, that he is considered the \
400 rightful property of some one or other of their daughters."), \
402 fitness=Ptrigrams) # doctest: +ELLIPSIS
406 helper_args
= [(message
, trans
, False, True, fitness
)
408 [[col
for col
in range(math
.ceil(len(message
)/rows
))]
409 for rows
in range(1,max_key_length
+1)]]
410 # Gotcha: the helper function here needs to be defined at the top level
411 # (limitation of Pool.starmap)
412 breaks
= pool
.starmap(column_transposition_break_worker
,
413 helper_args
, chunksize
)
414 best
= max(breaks
, key
=lambda k
: k
[1])
415 return math
.trunc(len(message
) / len(best
[0][0])), best
[1]
416 scytale_break
= scytale_break_mp
419 def railfence_break(message
, max_key_length
=20,
420 fitness
=Pbigrams
, chunksize
=500):
421 """Breaks a railfence cipher using a range of lengths and
422 n-gram frequency analysis
424 >>> railfence_break(railfence_encipher(sanitise( \
425 "It is a truth universally acknowledged, that a single man in \
426 possession of a good fortune, must be in want of a wife. However \
427 little known the feelings or views of such a man may be on his \
428 first entering a neighbourhood, this truth is so well fixed in \
429 the minds of the surrounding families, that he is considered the \
430 rightful property of some one or other of their daughters."), \
431 7)) # doctest: +ELLIPSIS
432 (7, -709.46467226...)
433 >>> railfence_break(railfence_encipher(sanitise( \
434 "It is a truth universally acknowledged, that a single man in \
435 possession of a good fortune, must be in want of a wife. However \
436 little known the feelings or views of such a man may be on his \
437 first entering a neighbourhood, this truth is so well fixed in \
438 the minds of the surrounding families, that he is considered the \
439 rightful property of some one or other of their daughters."), \
441 fitness=Ptrigrams) # doctest: +ELLIPSIS
444 def worker(message
, height
, fitness
):
445 plaintext
= railfence_decipher(message
, height
)
446 fit
= fitness(plaintext
)
449 sanitised_message
= sanitise(message
)
450 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
451 for i
in range(2, max_key_length
+1)])
452 return max(results
, key
=lambda k
: k
[1])
455 def pocket_enigma_break_by_crib(message
, wheel_spec
, crib
, crib_position
):
456 """Break a pocket enigma using a crib (some plaintext that's expected to
457 be in a certain position). Returns a list of possible starting wheel
458 positions that could produce the crib.
460 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'h', 0)
462 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'he', 0)
464 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'll', 2)
466 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 2)
468 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 3)
470 >>> pocket_enigma_break_by_crib('aaaaa', 1, 'l', 3)
473 pe
= PocketEnigma(wheel
=wheel_spec
)
474 possible_positions
= []
475 for p
in string
.ascii_lowercase
:
477 plaintext
= pe
.decipher(message
)
478 if plaintext
[crib_position
:crib_position
+len(crib
)] == crib
:
479 possible_positions
+= [p
]
480 return possible_positions
483 def plot_frequency_histogram(freqs
, sort_key
=None):
484 x
= range(len(freqs
.keys()))
485 y
= [freqs
[l
] for l
in sorted(freqs
.keys(), key
=sort_key
)]
487 ax
= f
.add_axes([0.1, 0.1, 0.9, 0.9])
488 ax
.bar(x
, y
, align
='center')
490 ax
.set_xticklabels(sorted(freqs
.keys(), key
=sort_key
))
494 if __name__
== "__main__":