1 """A set of functions to break the ciphers give in ciphers.py.
10 from itertools
import starmap
11 from segment
import segment
12 from multiprocessing
import Pool
14 import matplotlib
.pyplot
as plt
16 logger
= logging
.getLogger(__name__
)
17 logger
.addHandler(logging
.FileHandler('cipher.log'))
18 logger
.setLevel(logging
.WARNING
)
19 #logger.setLevel(logging.INFO)
20 #logger.setLevel(logging.DEBUG)
23 from language_models
import *
28 # c5a = open('2012/5a.ciphertext', 'r').read()
29 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
30 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
33 def frequencies(text
):
34 """Count the number of occurrences of each character in text
36 >>> sorted(frequencies('abcdefabc').items())
37 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
38 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
39 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
40 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
41 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
42 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
43 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
44 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
45 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
46 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
47 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
48 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
49 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
50 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
51 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
52 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
53 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
54 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
55 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
56 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
57 >>> frequencies('abcdefabcdef')['x']
60 return collections
.Counter(c
for c
in text
)
63 def caesar_break(message
, fitness
=Pletters
):
64 """Breaks a Caesar cipher using frequency analysis
66 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
67 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
68 (4, -130.849989015...)
69 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
70 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
71 (19, -128.82410410...)
72 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
73 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
74 (13, -126.25403935...)
76 sanitised_message
= sanitise(message
)
78 best_fit
= float('-inf')
79 for shift
in range(26):
80 plaintext
= caesar_decipher(sanitised_message
, shift
)
81 fit
= fitness(plaintext
)
82 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
83 'and decrypt starting: {2}'.format(shift
, fit
,
88 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
89 'decrypt starting: {2}'.format(best_shift
, best_fit
,
90 caesar_decipher(sanitised_message
, best_shift
)[:50]))
91 return best_shift
, best_fit
93 def affine_break(message
, fitness
=Pletters
):
94 """Breaks an affine cipher using frequency analysis
96 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
97 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
98 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
99 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
100 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
101 'kxd clm ckuxj.') # doctest: +ELLIPSIS
102 ((15, 22, True), -340.601181913...)
104 sanitised_message
= sanitise(message
)
107 best_one_based
= True
108 best_fit
= float("-inf")
109 for one_based
in [True, False]:
110 for multiplier
in [x
for x
in range(1, 26, 2) if x
!= 13]:
111 for adder
in range(26):
112 plaintext
= affine_decipher(sanitised_message
,
113 multiplier
, adder
, one_based
)
114 fit
= fitness(plaintext
)
115 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
116 'gives fit of {3} and decrypt starting: {4}'.
117 format(multiplier
, adder
, one_based
, fit
,
121 best_multiplier
= multiplier
123 best_one_based
= one_based
124 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
125 '{3} and decrypt starting: {4}'.format(
126 best_multiplier
, best_adder
, best_one_based
, best_fit
,
127 affine_decipher(sanitised_message
, best_multiplier
,
128 best_adder
, best_one_based
)[:50]))
129 return (best_multiplier
, best_adder
, best_one_based
), best_fit
131 def keyword_break(message
, wordlist
=keywords
, fitness
=Pletters
):
132 """Breaks a keyword substitution cipher using a dictionary and
135 >>> keyword_break(keyword_encipher('this is a test message for the ' \
136 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
137 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
138 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
141 best_wrap_alphabet
= True
142 best_fit
= float("-inf")
143 for wrap_alphabet
in KeywordWrapAlphabet
:
144 for keyword
in wordlist
:
145 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
146 fit
= fitness(plaintext
)
147 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
148 'gives fit of {2} and decrypt starting: {3}'.format(
149 keyword
, wrap_alphabet
, fit
,
150 sanitise(plaintext
)[:50]))
153 best_keyword
= keyword
154 best_wrap_alphabet
= wrap_alphabet
155 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
156 '{2} and decrypt starting: {3}'.format(best_keyword
,
157 best_wrap_alphabet
, best_fit
, sanitise(
158 keyword_decipher(message
, best_keyword
,
159 best_wrap_alphabet
))[:50]))
160 return (best_keyword
, best_wrap_alphabet
), best_fit
162 def keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
164 """Breaks a keyword substitution cipher using a dictionary and
167 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
168 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
169 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
170 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
173 helper_args
= [(message
, word
, wrap
, fitness
)
175 for wrap
in KeywordWrapAlphabet
]
176 # Gotcha: the helper function here needs to be defined at the top level
177 # (limitation of Pool.starmap)
178 breaks
= pool
.starmap(keyword_break_worker
, helper_args
, chunksize
)
179 return max(breaks
, key
=lambda k
: k
[1])
181 def keyword_break_worker(message
, keyword
, wrap_alphabet
, fitness
):
182 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
183 fit
= fitness(plaintext
)
184 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
185 '{2} and decrypt starting: {3}'.format(keyword
,
186 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
187 return (keyword
, wrap_alphabet
), fit
189 def monoalphabetic_break_hillclimbing(message
, max_iterations
=10000000,
191 ciphertext
= unaccent(message
).lower()
192 alphabet
= list(string
.ascii_lowercase
)
193 random
.shuffle(alphabet
)
194 alphabet
= ''.join(alphabet
)
195 return monoalphabetic_break_hillclimbing_worker(ciphertext
, alphabet
,
196 max_iterations
, fitness
)
198 def monoalphabetic_break_hillclimbing_mp(message
, workers
=10,
199 max_iterations
= 10000000, fitness
=Pletters
, chunksize
=1):
201 ciphertext
= unaccent(message
).lower()
202 for i
in range(workers
):
203 alphabet
= list(string
.ascii_lowercase
)
204 random
.shuffle(alphabet
)
205 alphabet
= ''.join(alphabet
)
206 worker_args
.append((ciphertext
, alphabet
, max_iterations
, fitness
))
208 breaks
= pool
.starmap(monoalphabetic_break_hillclimbing_worker
,
209 worker_args
, chunksize
)
210 return max(breaks
, key
=lambda k
: k
[1])
212 def monoalphabetic_break_hillclimbing_worker(message
, alphabet
,
213 max_iterations
, fitness
):
214 def swap(letters
, i
, j
):
220 return (letters
[:i
] + letters
[j
] + letters
[i
+1:j
] + letters
[i
] +
222 best_alphabet
= alphabet
223 best_fitness
= float('-inf')
224 for i
in range(max_iterations
):
225 alphabet
= swap(alphabet
, random
.randrange(26), random
.randrange(26))
226 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, alphabet
)
227 plaintext
= message
.translate(cipher_translation
)
228 if fitness(plaintext
) > best_fitness
:
229 best_fitness
= fitness(plaintext
)
230 best_alphabet
= alphabet
231 print(i
, best_alphabet
, best_fitness
, plaintext
)
232 return best_alphabet
, best_fitness
235 def vigenere_keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
237 """Breaks a vigenere cipher using a dictionary and frequency analysis.
239 >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
240 'message for the vigenere decipherment'), 'cat'), \
241 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
242 ('cat', -52.947271216...)
245 helper_args
= [(message
, word
, fitness
)
246 for word
in wordlist
]
247 # Gotcha: the helper function here needs to be defined at the top level
248 # (limitation of Pool.starmap)
249 breaks
= pool
.starmap(vigenere_keyword_break_worker
, helper_args
,
251 return max(breaks
, key
=lambda k
: k
[1])
252 vigenere_keyword_break
= vigenere_keyword_break_mp
254 def vigenere_keyword_break_worker(message
, keyword
, fitness
):
255 plaintext
= vigenere_decipher(message
, keyword
)
256 fit
= fitness(plaintext
)
257 logger
.debug('Vigenere keyword break attempt using key {0} gives fit of '
258 '{1} and decrypt starting: {2}'.format(keyword
,
259 fit
, sanitise(plaintext
)[:50]))
263 def vigenere_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
264 """Breaks a Vigenere cipher with frequency analysis
266 >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
267 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
268 "afternoon when he left his jacket hanging on the easel in the " \
269 "attic. I jump every time I hear a footstep on the stairs, " \
270 "certain that the theft has been discovered and that I will " \
271 "be caught. The SS officer visits less often now that he is " \
272 "sure"), 'florence')) # doctest: +ELLIPSIS
273 ('florence', -307.5473096791...)
275 def worker(message
, key_length
, fitness
):
276 splits
= every_nth(sanitised_message
, key_length
)
277 key
= ''.join([chr(caesar_break(s
)[0] + ord('a')) for s
in splits
])
278 plaintext
= vigenere_decipher(message
, key
)
279 fit
= fitness(plaintext
)
281 sanitised_message
= sanitise(message
)
282 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
283 for i
in range(1, max_key_length
+1)])
284 return max(results
, key
=lambda k
: k
[1])
287 def beaufort_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
288 """Breaks a Beaufort cipher with frequency analysis
290 >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
291 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
292 "afternoon when he left his jacket hanging on the easel in the " \
293 "attic. I jump every time I hear a footstep on the stairs, " \
294 "certain that the theft has been discovered and that I will " \
295 "be caught. The SS officer visits less often now " \
296 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
297 ('florence', -307.5473096791...)
299 def worker(message
, key_length
, fitness
):
300 splits
= every_nth(sanitised_message
, key_length
)
301 key
= ''.join([chr(-caesar_break(s
)[0] % 26 + ord('a'))
303 plaintext
= beaufort_decipher(message
, key
)
304 fit
= fitness(plaintext
)
306 sanitised_message
= sanitise(message
)
307 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
308 for i
in range(1, max_key_length
+1)])
309 return max(results
, key
=lambda k
: k
[1])
311 def plot_frequency_histogram(freqs
, sort_key
=None):
312 x
= range(len(freqs
.keys()))
313 y
= [freqs
[l
] for l
in sorted(freqs
.keys(), key
=sort_key
)]
315 ax
= f
.add_axes([0.1, 0.1, 0.9, 0.9])
316 ax
.bar(x
, y
, align
='center')
318 ax
.set_xticklabels(sorted(freqs
.keys(), key
=sort_key
))
322 if __name__
== "__main__":