1 """A set of functions to break the ciphers give in ciphers.py.
10 from itertools
import starmap
11 from segment
import segment
12 from multiprocessing
import Pool
14 import matplotlib
.pyplot
as plt
16 logger
= logging
.getLogger(__name__
)
17 logger
.addHandler(logging
.FileHandler('cipher.log'))
18 logger
.setLevel(logging
.WARNING
)
19 #logger.setLevel(logging.INFO)
20 #logger.setLevel(logging.DEBUG)
23 from language_models
import *
28 # c5a = open('2012/5a.ciphertext', 'r').read()
29 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
30 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
32 transpositions
= collections
.defaultdict(list)
34 transpositions
[transpositions_of(word
)] += [word
]
36 def frequencies(text
):
37 """Count the number of occurrences of each character in text
39 >>> sorted(frequencies('abcdefabc').items())
40 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
41 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
42 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
43 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
44 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
45 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
46 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
47 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
48 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
49 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
50 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
51 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
52 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
53 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
54 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
55 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
56 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
57 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
58 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
59 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
60 >>> frequencies('abcdefabcdef')['x']
63 return collections
.Counter(c
for c
in text
)
66 def caesar_break(message
, fitness
=Pletters
):
67 """Breaks a Caesar cipher using frequency analysis
69 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
70 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
71 (4, -130.849989015...)
72 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
73 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
74 (19, -128.82410410...)
75 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
76 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
77 (13, -126.25403935...)
79 sanitised_message
= sanitise(message
)
81 best_fit
= float('-inf')
82 for shift
in range(26):
83 plaintext
= caesar_decipher(sanitised_message
, shift
)
84 fit
= fitness(plaintext
)
85 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
86 'and decrypt starting: {2}'.format(shift
, fit
,
91 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
92 'decrypt starting: {2}'.format(best_shift
, best_fit
,
93 caesar_decipher(sanitised_message
, best_shift
)[:50]))
94 return best_shift
, best_fit
96 def affine_break(message
, fitness
=Pletters
):
97 """Breaks an affine cipher using frequency analysis
99 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
100 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
101 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
102 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
103 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
104 'kxd clm ckuxj.') # doctest: +ELLIPSIS
105 ((15, 22, True), -340.601181913...)
107 sanitised_message
= sanitise(message
)
110 best_one_based
= True
111 best_fit
= float("-inf")
112 for one_based
in [True, False]:
113 for multiplier
in [x
for x
in range(1, 26, 2) if x
!= 13]:
114 for adder
in range(26):
115 plaintext
= affine_decipher(sanitised_message
,
116 multiplier
, adder
, one_based
)
117 fit
= fitness(plaintext
)
118 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
119 'gives fit of {3} and decrypt starting: {4}'.
120 format(multiplier
, adder
, one_based
, fit
,
124 best_multiplier
= multiplier
126 best_one_based
= one_based
127 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
128 '{3} and decrypt starting: {4}'.format(
129 best_multiplier
, best_adder
, best_one_based
, best_fit
,
130 affine_decipher(sanitised_message
, best_multiplier
,
131 best_adder
, best_one_based
)[:50]))
132 return (best_multiplier
, best_adder
, best_one_based
), best_fit
134 def keyword_break(message
, wordlist
=keywords
, fitness
=Pletters
):
135 """Breaks a keyword substitution cipher using a dictionary and
138 >>> keyword_break(keyword_encipher('this is a test message for the ' \
139 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
140 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
141 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
144 best_wrap_alphabet
= True
145 best_fit
= float("-inf")
146 for wrap_alphabet
in KeywordWrapAlphabet
:
147 for keyword
in wordlist
:
148 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
149 fit
= fitness(plaintext
)
150 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
151 'gives fit of {2} and decrypt starting: {3}'.format(
152 keyword
, wrap_alphabet
, fit
,
153 sanitise(plaintext
)[:50]))
156 best_keyword
= keyword
157 best_wrap_alphabet
= wrap_alphabet
158 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
159 '{2} and decrypt starting: {3}'.format(best_keyword
,
160 best_wrap_alphabet
, best_fit
, sanitise(
161 keyword_decipher(message
, best_keyword
,
162 best_wrap_alphabet
))[:50]))
163 return (best_keyword
, best_wrap_alphabet
), best_fit
165 def keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
167 """Breaks a keyword substitution cipher using a dictionary and
170 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
171 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
172 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
173 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
176 helper_args
= [(message
, word
, wrap
, fitness
)
178 for wrap
in KeywordWrapAlphabet
]
179 # Gotcha: the helper function here needs to be defined at the top level
180 # (limitation of Pool.starmap)
181 breaks
= pool
.starmap(keyword_break_worker
, helper_args
, chunksize
)
182 return max(breaks
, key
=lambda k
: k
[1])
184 def keyword_break_worker(message
, keyword
, wrap_alphabet
, fitness
):
185 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
186 fit
= fitness(plaintext
)
187 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
188 '{2} and decrypt starting: {3}'.format(keyword
,
189 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
190 return (keyword
, wrap_alphabet
), fit
192 def monoalphabetic_break_hillclimbing(message
, max_iterations
=10000000,
194 ciphertext
= unaccent(message
).lower()
195 alphabet
= list(string
.ascii_lowercase
)
196 random
.shuffle(alphabet
)
197 alphabet
= ''.join(alphabet
)
198 return monoalphabetic_break_hillclimbing_worker(ciphertext
, alphabet
,
199 max_iterations
, fitness
)
201 def monoalphabetic_break_hillclimbing_mp(message
, workers
=10,
202 max_iterations
= 10000000, fitness
=Pletters
, chunksize
=1):
204 ciphertext
= unaccent(message
).lower()
205 for i
in range(workers
):
206 alphabet
= list(string
.ascii_lowercase
)
207 random
.shuffle(alphabet
)
208 alphabet
= ''.join(alphabet
)
209 worker_args
.append((ciphertext
, alphabet
, max_iterations
, fitness
))
211 breaks
= pool
.starmap(monoalphabetic_break_hillclimbing_worker
,
212 worker_args
, chunksize
)
213 return max(breaks
, key
=lambda k
: k
[1])
215 def monoalphabetic_break_hillclimbing_worker(message
, alphabet
,
216 max_iterations
, fitness
):
217 def swap(letters
, i
, j
):
223 return (letters
[:i
] + letters
[j
] + letters
[i
+1:j
] + letters
[i
] +
225 best_alphabet
= alphabet
226 best_fitness
= float('-inf')
227 for i
in range(max_iterations
):
228 alphabet
= swap(alphabet
, random
.randrange(26), random
.randrange(26))
229 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, alphabet
)
230 plaintext
= message
.translate(cipher_translation
)
231 if fitness(plaintext
) > best_fitness
:
232 best_fitness
= fitness(plaintext
)
233 best_alphabet
= alphabet
234 print(i
, best_alphabet
, best_fitness
, plaintext
)
235 return best_alphabet
, best_fitness
238 def column_transposition_break_mp(message
, translist
=transpositions
,
239 fitness
=Pbigrams
, chunksize
=500):
240 """Breaks a column transposition cipher using a dictionary and
241 n-gram frequency analysis
243 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
244 "It is a truth universally acknowledged, that a single man in \
245 possession of a good fortune, must be in want of a wife. However \
246 little known the feelings or views of such a man may be on his \
247 first entering a neighbourhood, this truth is so well fixed in \
248 the minds of the surrounding families, that he is considered the \
249 rightful property of some one or other of their daughters."), \
251 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
252 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
253 (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
254 (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...)
255 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
256 "It is a truth universally acknowledged, that a single man in \
257 possession of a good fortune, must be in want of a wife. However \
258 little known the feelings or views of such a man may be on his \
259 first entering a neighbourhood, this truth is so well fixed in \
260 the minds of the surrounding families, that he is considered the \
261 rightful property of some one or other of their daughters."), \
263 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
264 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
265 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
266 fitness=Ptrigrams) # doctest: +ELLIPSIS
267 (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...)
270 helper_args
= [(message
, trans
, fillcolumnwise
, emptycolumnwise
,
272 for trans
in translist
.keys()
273 for fillcolumnwise
in [True, False]
274 for emptycolumnwise
in [True, False]]
275 # Gotcha: the helper function here needs to be defined at the top level
276 # (limitation of Pool.starmap)
277 breaks
= pool
.starmap(column_transposition_break_worker
,
278 helper_args
, chunksize
)
279 return max(breaks
, key
=lambda k
: k
[1])
280 column_transposition_break
= column_transposition_break_mp
282 def column_transposition_break_worker(message
, transposition
,
283 fillcolumnwise
, emptycolumnwise
, fitness
):
284 plaintext
= column_transposition_decipher(message
, transposition
,
285 fillcolumnwise
=fillcolumnwise
, emptycolumnwise
=emptycolumnwise
)
286 fit
= fitness(sanitise(plaintext
))
287 logger
.debug('Column transposition break attempt using key {0} '
288 'gives fit of {1} and decrypt starting: {2}'.format(
290 sanitise(plaintext
)[:50]))
291 return (transposition
, fillcolumnwise
, emptycolumnwise
), fit
294 def scytale_break_mp(message
, max_key_length
=20,
295 fitness
=Pbigrams
, chunksize
=500):
296 """Breaks a scytale cipher using a range of lengths and
297 n-gram frequency analysis
299 >>> scytale_break_mp(scytale_encipher(sanitise( \
300 "It is a truth universally acknowledged, that a single man in \
301 possession of a good fortune, must be in want of a wife. However \
302 little known the feelings or views of such a man may be on his \
303 first entering a neighbourhood, this truth is so well fixed in \
304 the minds of the surrounding families, that he is considered the \
305 rightful property of some one or other of their daughters."), \
306 5)) # doctest: +ELLIPSIS
308 >>> scytale_break_mp(scytale_encipher(sanitise( \
309 "It is a truth universally acknowledged, that a single man in \
310 possession of a good fortune, must be in want of a wife. However \
311 little known the feelings or views of such a man may be on his \
312 first entering a neighbourhood, this truth is so well fixed in \
313 the minds of the surrounding families, that he is considered the \
314 rightful property of some one or other of their daughters."), \
316 fitness=Ptrigrams) # doctest: +ELLIPSIS
320 helper_args
= [(message
, trans
, False, True, fitness
)
322 [[col
for col
in range(math
.ceil(len(message
)/rows
))]
323 for rows
in range(1,max_key_length
+1)]]
324 # Gotcha: the helper function here needs to be defined at the top level
325 # (limitation of Pool.starmap)
326 breaks
= pool
.starmap(column_transposition_break_worker
,
327 helper_args
, chunksize
)
328 best
= max(breaks
, key
=lambda k
: k
[1])
329 return math
.trunc(len(message
) / len(best
[0][0])), best
[1]
330 scytale_break
= scytale_break_mp
333 def vigenere_keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
335 """Breaks a vigenere cipher using a dictionary and frequency analysis.
337 >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
338 'message for the vigenere decipherment'), 'cat'), \
339 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
340 ('cat', -52.947271216...)
343 helper_args
= [(message
, word
, fitness
)
344 for word
in wordlist
]
345 # Gotcha: the helper function here needs to be defined at the top level
346 # (limitation of Pool.starmap)
347 breaks
= pool
.starmap(vigenere_keyword_break_worker
, helper_args
,
349 return max(breaks
, key
=lambda k
: k
[1])
350 vigenere_keyword_break
= vigenere_keyword_break_mp
352 def vigenere_keyword_break_worker(message
, keyword
, fitness
):
353 plaintext
= vigenere_decipher(message
, keyword
)
354 fit
= fitness(plaintext
)
355 logger
.debug('Vigenere keyword break attempt using key {0} gives fit of '
356 '{1} and decrypt starting: {2}'.format(keyword
,
357 fit
, sanitise(plaintext
)[:50]))
362 def vigenere_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
363 """Breaks a Vigenere cipher with frequency analysis
365 >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
366 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
367 "afternoon when he left his jacket hanging on the easel in the " \
368 "attic. I jump every time I hear a footstep on the stairs, " \
369 "certain that the theft has been discovered and that I will " \
370 "be caught. The SS officer visits less often now that he is " \
371 "sure"), 'florence')) # doctest: +ELLIPSIS
372 ('florence', -307.5473096791...)
374 def worker(message
, key_length
, fitness
):
375 splits
= every_nth(sanitised_message
, key_length
)
376 key
= ''.join([chr(caesar_break(s
)[0] + ord('a')) for s
in splits
])
377 plaintext
= vigenere_decipher(message
, key
)
378 fit
= fitness(plaintext
)
380 sanitised_message
= sanitise(message
)
381 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
382 for i
in range(1, max_key_length
+1)])
383 return max(results
, key
=lambda k
: k
[1])
386 def beaufort_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
387 """Breaks a Beaufort cipher with frequency analysis
389 >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
390 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
391 "afternoon when he left his jacket hanging on the easel in the " \
392 "attic. I jump every time I hear a footstep on the stairs, " \
393 "certain that the theft has been discovered and that I will " \
394 "be caught. The SS officer visits less often now " \
395 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
396 ('florence', -307.5473096791...)
398 def worker(message
, key_length
, fitness
):
399 splits
= every_nth(sanitised_message
, key_length
)
400 key
= ''.join([chr(-caesar_break(s
)[0] % 26 + ord('a'))
402 plaintext
= beaufort_decipher(message
, key
)
403 fit
= fitness(plaintext
)
405 sanitised_message
= sanitise(message
)
406 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
407 for i
in range(1, max_key_length
+1)])
408 return max(results
, key
=lambda k
: k
[1])
410 def plot_frequency_histogram(freqs
, sort_key
=None):
411 x
= range(len(freqs
.keys()))
412 y
= [freqs
[l
] for l
in sorted(freqs
.keys(), key
=sort_key
)]
414 ax
= f
.add_axes([0.1, 0.1, 0.9, 0.9])
415 ax
.bar(x
, y
, align
='center')
417 ax
.set_xticklabels(sorted(freqs
.keys(), key
=sort_key
))
421 if __name__
== "__main__":