1 """A set of functions to break the ciphers give in ciphers.py.
10 from itertools
import starmap
11 from segment
import segment
12 from multiprocessing
import Pool
14 import matplotlib
.pyplot
as plt
16 logger
= logging
.getLogger(__name__
)
17 logger
.addHandler(logging
.FileHandler('cipher.log'))
18 logger
.setLevel(logging
.WARNING
)
19 #logger.setLevel(logging.INFO)
20 #logger.setLevel(logging.DEBUG)
23 from language_models
import *
28 # c5a = open('2012/5a.ciphertext', 'r').read()
29 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
30 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
32 transpositions
= collections
.defaultdict(list)
34 transpositions
[transpositions_of(word
)] += [word
]
36 def frequencies(text
):
37 """Count the number of occurrences of each character in text
39 >>> sorted(frequencies('abcdefabc').items())
40 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
41 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
42 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
43 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
44 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
45 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
46 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
47 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
48 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
49 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
50 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
51 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
52 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
53 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
54 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
55 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
56 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
57 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
58 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
59 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
60 >>> frequencies('abcdefabcdef')['x']
63 return collections
.Counter(c
for c
in text
)
66 def caesar_break(message
, fitness
=Pletters
):
67 """Breaks a Caesar cipher using frequency analysis
69 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
70 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
71 (4, -130.849989015...)
72 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
73 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
74 (19, -128.82410410...)
75 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
76 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
77 (13, -126.25403935...)
79 sanitised_message
= sanitise(message
)
81 best_fit
= float('-inf')
82 for shift
in range(26):
83 plaintext
= caesar_decipher(sanitised_message
, shift
)
84 fit
= fitness(plaintext
)
85 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
86 'and decrypt starting: {2}'.format(shift
, fit
,
91 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
92 'decrypt starting: {2}'.format(best_shift
, best_fit
,
93 caesar_decipher(sanitised_message
, best_shift
)[:50]))
94 return best_shift
, best_fit
96 def affine_break(message
, fitness
=Pletters
):
97 """Breaks an affine cipher using frequency analysis
99 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
100 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
101 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
102 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
103 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
104 'kxd clm ckuxj.') # doctest: +ELLIPSIS
105 ((15, 22, True), -340.601181913...)
107 sanitised_message
= sanitise(message
)
110 best_one_based
= True
111 best_fit
= float("-inf")
112 for one_based
in [True, False]:
113 for multiplier
in [x
for x
in range(1, 26, 2) if x
!= 13]:
114 for adder
in range(26):
115 plaintext
= affine_decipher(sanitised_message
,
116 multiplier
, adder
, one_based
)
117 fit
= fitness(plaintext
)
118 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
119 'gives fit of {3} and decrypt starting: {4}'.
120 format(multiplier
, adder
, one_based
, fit
,
124 best_multiplier
= multiplier
126 best_one_based
= one_based
127 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
128 '{3} and decrypt starting: {4}'.format(
129 best_multiplier
, best_adder
, best_one_based
, best_fit
,
130 affine_decipher(sanitised_message
, best_multiplier
,
131 best_adder
, best_one_based
)[:50]))
132 return (best_multiplier
, best_adder
, best_one_based
), best_fit
134 def keyword_break(message
, wordlist
=keywords
, fitness
=Pletters
):
135 """Breaks a keyword substitution cipher using a dictionary and
138 >>> keyword_break(keyword_encipher('this is a test message for the ' \
139 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
140 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
141 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
144 best_wrap_alphabet
= True
145 best_fit
= float("-inf")
146 for wrap_alphabet
in KeywordWrapAlphabet
:
147 for keyword
in wordlist
:
148 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
149 fit
= fitness(plaintext
)
150 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
151 'gives fit of {2} and decrypt starting: {3}'.format(
152 keyword
, wrap_alphabet
, fit
,
153 sanitise(plaintext
)[:50]))
156 best_keyword
= keyword
157 best_wrap_alphabet
= wrap_alphabet
158 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
159 '{2} and decrypt starting: {3}'.format(best_keyword
,
160 best_wrap_alphabet
, best_fit
, sanitise(
161 keyword_decipher(message
, best_keyword
,
162 best_wrap_alphabet
))[:50]))
163 return (best_keyword
, best_wrap_alphabet
), best_fit
165 def keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
166 number_of_solutions
=1, chunksize
=500):
167 """Breaks a keyword substitution cipher using a dictionary and
170 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
171 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
172 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
173 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
174 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
175 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
176 wordlist=['cat', 'elephant', 'kangaroo'], \
177 number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
178 [(('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...),
179 (('elephant', <KeywordWrapAlphabet.from_largest: 3>), -52.834575011...)]
182 helper_args
= [(message
, word
, wrap
, fitness
)
184 for wrap
in KeywordWrapAlphabet
]
185 # Gotcha: the helper function here needs to be defined at the top level
186 # (limitation of Pool.starmap)
187 breaks
= pool
.starmap(keyword_break_worker
, helper_args
, chunksize
)
188 if number_of_solutions
== 1:
189 return max(breaks
, key
=lambda k
: k
[1])
191 return sorted(breaks
, key
=lambda k
: k
[1], reverse
=True)[:number_of_solutions
]
193 def keyword_break_worker(message
, keyword
, wrap_alphabet
, fitness
):
194 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
195 fit
= fitness(plaintext
)
196 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
197 '{2} and decrypt starting: {3}'.format(keyword
,
198 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
199 return (keyword
, wrap_alphabet
), fit
201 def monoalphabetic_break_hillclimbing(message
, max_iterations
=10000000,
203 ciphertext
= unaccent(message
).lower()
204 alphabet
= list(string
.ascii_lowercase
)
205 random
.shuffle(alphabet
)
206 alphabet
= ''.join(alphabet
)
207 return monoalphabetic_break_hillclimbing_worker(ciphertext
, alphabet
,
208 max_iterations
, fitness
)
210 def monoalphabetic_break_hillclimbing_mp(message
, workers
=10,
211 max_iterations
= 10000000, fitness
=Pletters
, chunksize
=1):
213 ciphertext
= unaccent(message
).lower()
214 for i
in range(workers
):
215 alphabet
= list(string
.ascii_lowercase
)
216 random
.shuffle(alphabet
)
217 alphabet
= ''.join(alphabet
)
218 worker_args
.append((ciphertext
, alphabet
, max_iterations
, fitness
))
220 breaks
= pool
.starmap(monoalphabetic_break_hillclimbing_worker
,
221 worker_args
, chunksize
)
222 return max(breaks
, key
=lambda k
: k
[1])
224 def monoalphabetic_break_hillclimbing_worker(message
, alphabet
,
225 max_iterations
, fitness
):
226 def swap(letters
, i
, j
):
232 return (letters
[:i
] + letters
[j
] + letters
[i
+1:j
] + letters
[i
] +
234 best_alphabet
= alphabet
235 best_fitness
= float('-inf')
236 for i
in range(max_iterations
):
237 alphabet
= swap(alphabet
, random
.randrange(26), random
.randrange(26))
238 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, alphabet
)
239 plaintext
= message
.translate(cipher_translation
)
240 if fitness(plaintext
) > best_fitness
:
241 best_fitness
= fitness(plaintext
)
242 best_alphabet
= alphabet
243 print(i
, best_alphabet
, best_fitness
, plaintext
)
244 return best_alphabet
, best_fitness
247 def column_transposition_break_mp(message
, translist
=transpositions
,
248 fitness
=Pbigrams
, chunksize
=500):
249 """Breaks a column transposition cipher using a dictionary and
250 n-gram frequency analysis
252 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
253 "It is a truth universally acknowledged, that a single man in \
254 possession of a good fortune, must be in want of a wife. However \
255 little known the feelings or views of such a man may be on his \
256 first entering a neighbourhood, this truth is so well fixed in \
257 the minds of the surrounding families, that he is considered the \
258 rightful property of some one or other of their daughters."), \
260 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
261 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
262 (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
263 (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...)
264 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
265 "It is a truth universally acknowledged, that a single man in \
266 possession of a good fortune, must be in want of a wife. However \
267 little known the feelings or views of such a man may be on his \
268 first entering a neighbourhood, this truth is so well fixed in \
269 the minds of the surrounding families, that he is considered the \
270 rightful property of some one or other of their daughters."), \
272 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
273 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
274 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
275 fitness=Ptrigrams) # doctest: +ELLIPSIS
276 (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...)
279 helper_args
= [(message
, trans
, fillcolumnwise
, emptycolumnwise
,
281 for trans
in translist
.keys()
282 for fillcolumnwise
in [True, False]
283 for emptycolumnwise
in [True, False]]
284 # Gotcha: the helper function here needs to be defined at the top level
285 # (limitation of Pool.starmap)
286 breaks
= pool
.starmap(column_transposition_break_worker
,
287 helper_args
, chunksize
)
288 return max(breaks
, key
=lambda k
: k
[1])
289 column_transposition_break
= column_transposition_break_mp
291 def column_transposition_break_worker(message
, transposition
,
292 fillcolumnwise
, emptycolumnwise
, fitness
):
293 plaintext
= column_transposition_decipher(message
, transposition
,
294 fillcolumnwise
=fillcolumnwise
, emptycolumnwise
=emptycolumnwise
)
295 fit
= fitness(sanitise(plaintext
))
296 logger
.debug('Column transposition break attempt using key {0} '
297 'gives fit of {1} and decrypt starting: {2}'.format(
299 sanitise(plaintext
)[:50]))
300 return (transposition
, fillcolumnwise
, emptycolumnwise
), fit
303 def scytale_break_mp(message
, max_key_length
=20,
304 fitness
=Pbigrams
, chunksize
=500):
305 """Breaks a scytale cipher using a range of lengths and
306 n-gram frequency analysis
308 >>> scytale_break_mp(scytale_encipher(sanitise( \
309 "It is a truth universally acknowledged, that a single man in \
310 possession of a good fortune, must be in want of a wife. However \
311 little known the feelings or views of such a man may be on his \
312 first entering a neighbourhood, this truth is so well fixed in \
313 the minds of the surrounding families, that he is considered the \
314 rightful property of some one or other of their daughters."), \
315 5)) # doctest: +ELLIPSIS
317 >>> scytale_break_mp(scytale_encipher(sanitise( \
318 "It is a truth universally acknowledged, that a single man in \
319 possession of a good fortune, must be in want of a wife. However \
320 little known the feelings or views of such a man may be on his \
321 first entering a neighbourhood, this truth is so well fixed in \
322 the minds of the surrounding families, that he is considered the \
323 rightful property of some one or other of their daughters."), \
325 fitness=Ptrigrams) # doctest: +ELLIPSIS
329 helper_args
= [(message
, trans
, False, True, fitness
)
331 [[col
for col
in range(math
.ceil(len(message
)/rows
))]
332 for rows
in range(1,max_key_length
+1)]]
333 # Gotcha: the helper function here needs to be defined at the top level
334 # (limitation of Pool.starmap)
335 breaks
= pool
.starmap(column_transposition_break_worker
,
336 helper_args
, chunksize
)
337 best
= max(breaks
, key
=lambda k
: k
[1])
338 return math
.trunc(len(message
) / len(best
[0][0])), best
[1]
339 scytale_break
= scytale_break_mp
342 def vigenere_keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
344 """Breaks a vigenere cipher using a dictionary and frequency analysis.
346 >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
347 'message for the vigenere decipherment'), 'cat'), \
348 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
349 ('cat', -52.947271216...)
352 helper_args
= [(message
, word
, fitness
)
353 for word
in wordlist
]
354 # Gotcha: the helper function here needs to be defined at the top level
355 # (limitation of Pool.starmap)
356 breaks
= pool
.starmap(vigenere_keyword_break_worker
, helper_args
,
358 return max(breaks
, key
=lambda k
: k
[1])
359 vigenere_keyword_break
= vigenere_keyword_break_mp
361 def vigenere_keyword_break_worker(message
, keyword
, fitness
):
362 plaintext
= vigenere_decipher(message
, keyword
)
363 fit
= fitness(plaintext
)
364 logger
.debug('Vigenere keyword break attempt using key {0} gives fit of '
365 '{1} and decrypt starting: {2}'.format(keyword
,
366 fit
, sanitise(plaintext
)[:50]))
371 def vigenere_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
372 """Breaks a Vigenere cipher with frequency analysis
374 >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
375 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
376 "afternoon when he left his jacket hanging on the easel in the " \
377 "attic. I jump every time I hear a footstep on the stairs, " \
378 "certain that the theft has been discovered and that I will " \
379 "be caught. The SS officer visits less often now that he is " \
380 "sure"), 'florence')) # doctest: +ELLIPSIS
381 ('florence', -307.5473096791...)
383 def worker(message
, key_length
, fitness
):
384 splits
= every_nth(sanitised_message
, key_length
)
385 key
= ''.join([chr(caesar_break(s
)[0] + ord('a')) for s
in splits
])
386 plaintext
= vigenere_decipher(message
, key
)
387 fit
= fitness(plaintext
)
389 sanitised_message
= sanitise(message
)
390 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
391 for i
in range(1, max_key_length
+1)])
392 return max(results
, key
=lambda k
: k
[1])
395 def beaufort_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
396 """Breaks a Beaufort cipher with frequency analysis
398 >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
399 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
400 "afternoon when he left his jacket hanging on the easel in the " \
401 "attic. I jump every time I hear a footstep on the stairs, " \
402 "certain that the theft has been discovered and that I will " \
403 "be caught. The SS officer visits less often now " \
404 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
405 ('florence', -307.5473096791...)
407 def worker(message
, key_length
, fitness
):
408 splits
= every_nth(sanitised_message
, key_length
)
409 key
= ''.join([chr(-caesar_break(s
)[0] % 26 + ord('a'))
411 plaintext
= beaufort_decipher(message
, key
)
412 fit
= fitness(plaintext
)
414 sanitised_message
= sanitise(message
)
415 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
416 for i
in range(1, max_key_length
+1)])
417 return max(results
, key
=lambda k
: k
[1])
420 def pocket_enigma_break_by_crib(message
, wheel_spec
, crib
, crib_position
):
421 """Break a pocket enigma using a crib (some plaintext that's expected to
422 be in a certain position). Returns a list of possible starting wheel
423 positions that could produce the crib.
425 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'h', 0)
427 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'he', 0)
429 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'll', 2)
431 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 2)
433 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 3)
435 >>> pocket_enigma_break_by_crib('aaaaa', 1, 'l', 3)
438 pe
= PocketEnigma(wheel
=wheel_spec
)
439 possible_positions
= []
440 for p
in string
.ascii_lowercase
:
442 plaintext
= pe
.decipher(message
)
443 if plaintext
[crib_position
:crib_position
+len(crib
)] == crib
:
444 possible_positions
+= [p
]
445 return possible_positions
448 def plot_frequency_histogram(freqs
, sort_key
=None):
449 x
= range(len(freqs
.keys()))
450 y
= [freqs
[l
] for l
in sorted(freqs
.keys(), key
=sort_key
)]
452 ax
= f
.add_axes([0.1, 0.1, 0.9, 0.9])
453 ax
.bar(x
, y
, align
='center')
455 ax
.set_xticklabels(sorted(freqs
.keys(), key
=sort_key
))
459 if __name__
== "__main__":