1 """A set of functions to break the ciphers give in ciphers.py.
10 from itertools
import starmap
11 from segment
import segment
12 from multiprocessing
import Pool
14 import matplotlib
.pyplot
as plt
16 logger
= logging
.getLogger(__name__
)
17 logger
.addHandler(logging
.FileHandler('cipher.log'))
18 logger
.setLevel(logging
.WARNING
)
19 #logger.setLevel(logging.INFO)
20 #logger.setLevel(logging.DEBUG)
23 from language_models
import *
28 # c5a = open('2012/5a.ciphertext', 'r').read()
29 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
30 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
32 transpositions
= collections
.defaultdict(list)
34 transpositions
[transpositions_of(word
)] += [word
]
36 def frequencies(text
):
37 """Count the number of occurrences of each character in text
39 >>> sorted(frequencies('abcdefabc').items())
40 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
41 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
42 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
43 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
44 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
45 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
46 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
47 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
48 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
49 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
50 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
51 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
52 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
53 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
54 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
55 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
56 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
57 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
58 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
59 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
60 >>> frequencies('abcdefabcdef')['x']
63 return collections
.Counter(c
for c
in text
)
66 def caesar_break(message
, fitness
=Pletters
):
67 """Breaks a Caesar cipher using frequency analysis
69 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
70 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
71 (4, -130.849989015...)
72 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
73 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
74 (19, -128.82410410...)
75 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
76 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
77 (13, -126.25403935...)
79 sanitised_message
= sanitise(message
)
81 best_fit
= float('-inf')
82 for shift
in range(26):
83 plaintext
= caesar_decipher(sanitised_message
, shift
)
84 fit
= fitness(plaintext
)
85 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
86 'and decrypt starting: {2}'.format(shift
, fit
,
91 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
92 'decrypt starting: {2}'.format(best_shift
, best_fit
,
93 caesar_decipher(sanitised_message
, best_shift
)[:50]))
94 return best_shift
, best_fit
96 def affine_break(message
, fitness
=Pletters
):
97 """Breaks an affine cipher using frequency analysis
99 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
100 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
101 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
102 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
103 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
104 'kxd clm ckuxj.') # doctest: +ELLIPSIS
105 ((15, 22, True), -340.601181913...)
107 sanitised_message
= sanitise(message
)
110 best_one_based
= True
111 best_fit
= float("-inf")
112 for one_based
in [True, False]:
113 for multiplier
in [x
for x
in range(1, 26, 2) if x
!= 13]:
114 for adder
in range(26):
115 plaintext
= affine_decipher(sanitised_message
,
116 multiplier
, adder
, one_based
)
117 fit
= fitness(plaintext
)
118 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
119 'gives fit of {3} and decrypt starting: {4}'.
120 format(multiplier
, adder
, one_based
, fit
,
124 best_multiplier
= multiplier
126 best_one_based
= one_based
127 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
128 '{3} and decrypt starting: {4}'.format(
129 best_multiplier
, best_adder
, best_one_based
, best_fit
,
130 affine_decipher(sanitised_message
, best_multiplier
,
131 best_adder
, best_one_based
)[:50]))
132 return (best_multiplier
, best_adder
, best_one_based
), best_fit
134 def keyword_break(message
, wordlist
=keywords
, fitness
=Pletters
):
135 """Breaks a keyword substitution cipher using a dictionary and
138 >>> keyword_break(keyword_encipher('this is a test message for the ' \
139 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
140 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
141 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
144 best_wrap_alphabet
= True
145 best_fit
= float("-inf")
146 for wrap_alphabet
in KeywordWrapAlphabet
:
147 for keyword
in wordlist
:
148 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
149 fit
= fitness(plaintext
)
150 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
151 'gives fit of {2} and decrypt starting: {3}'.format(
152 keyword
, wrap_alphabet
, fit
,
153 sanitise(plaintext
)[:50]))
156 best_keyword
= keyword
157 best_wrap_alphabet
= wrap_alphabet
158 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
159 '{2} and decrypt starting: {3}'.format(best_keyword
,
160 best_wrap_alphabet
, best_fit
, sanitise(
161 keyword_decipher(message
, best_keyword
,
162 best_wrap_alphabet
))[:50]))
163 return (best_keyword
, best_wrap_alphabet
), best_fit
165 def keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
167 """Breaks a keyword substitution cipher using a dictionary and
170 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
171 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
172 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
173 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
176 helper_args
= [(message
, word
, wrap
, fitness
)
178 for wrap
in KeywordWrapAlphabet
]
179 # Gotcha: the helper function here needs to be defined at the top level
180 # (limitation of Pool.starmap)
181 breaks
= pool
.starmap(keyword_break_worker
, helper_args
, chunksize
)
182 return max(breaks
, key
=lambda k
: k
[1])
184 def keyword_break_worker(message
, keyword
, wrap_alphabet
, fitness
):
185 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
186 fit
= fitness(plaintext
)
187 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
188 '{2} and decrypt starting: {3}'.format(keyword
,
189 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
190 return (keyword
, wrap_alphabet
), fit
192 def monoalphabetic_break_hillclimbing(message
, max_iterations
=10000000,
194 ciphertext
= unaccent(message
).lower()
195 alphabet
= list(string
.ascii_lowercase
)
196 random
.shuffle(alphabet
)
197 alphabet
= ''.join(alphabet
)
198 return monoalphabetic_break_hillclimbing_worker(ciphertext
, alphabet
,
199 max_iterations
, fitness
)
201 def monoalphabetic_break_hillclimbing_mp(message
, workers
=10,
202 max_iterations
= 10000000, fitness
=Pletters
, chunksize
=1):
204 ciphertext
= unaccent(message
).lower()
205 for i
in range(workers
):
206 alphabet
= list(string
.ascii_lowercase
)
207 random
.shuffle(alphabet
)
208 alphabet
= ''.join(alphabet
)
209 worker_args
.append((ciphertext
, alphabet
, max_iterations
, fitness
))
211 breaks
= pool
.starmap(monoalphabetic_break_hillclimbing_worker
,
212 worker_args
, chunksize
)
213 return max(breaks
, key
=lambda k
: k
[1])
215 def monoalphabetic_break_hillclimbing_worker(message
, alphabet
,
216 max_iterations
, fitness
):
217 def swap(letters
, i
, j
):
223 return (letters
[:i
] + letters
[j
] + letters
[i
+1:j
] + letters
[i
] +
225 best_alphabet
= alphabet
226 best_fitness
= float('-inf')
227 for i
in range(max_iterations
):
228 alphabet
= swap(alphabet
, random
.randrange(26), random
.randrange(26))
229 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, alphabet
)
230 plaintext
= message
.translate(cipher_translation
)
231 if fitness(plaintext
) > best_fitness
:
232 best_fitness
= fitness(plaintext
)
233 best_alphabet
= alphabet
234 print(i
, best_alphabet
, best_fitness
, plaintext
)
235 return best_alphabet
, best_fitness
238 def vigenere_keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
240 """Breaks a vigenere cipher using a dictionary and frequency analysis.
242 >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
243 'message for the vigenere decipherment'), 'cat'), \
244 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
245 ('cat', -52.947271216...)
248 helper_args
= [(message
, word
, fitness
)
249 for word
in wordlist
]
250 # Gotcha: the helper function here needs to be defined at the top level
251 # (limitation of Pool.starmap)
252 breaks
= pool
.starmap(vigenere_keyword_break_worker
, helper_args
,
254 return max(breaks
, key
=lambda k
: k
[1])
255 vigenere_keyword_break
= vigenere_keyword_break_mp
257 def vigenere_keyword_break_worker(message
, keyword
, fitness
):
258 plaintext
= vigenere_decipher(message
, keyword
)
259 fit
= fitness(plaintext
)
260 logger
.debug('Vigenere keyword break attempt using key {0} gives fit of '
261 '{1} and decrypt starting: {2}'.format(keyword
,
262 fit
, sanitise(plaintext
)[:50]))
266 def vigenere_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
267 """Breaks a Vigenere cipher with frequency analysis
269 >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
270 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
271 "afternoon when he left his jacket hanging on the easel in the " \
272 "attic. I jump every time I hear a footstep on the stairs, " \
273 "certain that the theft has been discovered and that I will " \
274 "be caught. The SS officer visits less often now that he is " \
275 "sure"), 'florence')) # doctest: +ELLIPSIS
276 ('florence', -307.5473096791...)
278 def worker(message
, key_length
, fitness
):
279 splits
= every_nth(sanitised_message
, key_length
)
280 key
= ''.join([chr(caesar_break(s
)[0] + ord('a')) for s
in splits
])
281 plaintext
= vigenere_decipher(message
, key
)
282 fit
= fitness(plaintext
)
284 sanitised_message
= sanitise(message
)
285 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
286 for i
in range(1, max_key_length
+1)])
287 return max(results
, key
=lambda k
: k
[1])
290 def beaufort_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
291 """Breaks a Beaufort cipher with frequency analysis
293 >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
294 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
295 "afternoon when he left his jacket hanging on the easel in the " \
296 "attic. I jump every time I hear a footstep on the stairs, " \
297 "certain that the theft has been discovered and that I will " \
298 "be caught. The SS officer visits less often now " \
299 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
300 ('florence', -307.5473096791...)
302 def worker(message
, key_length
, fitness
):
303 splits
= every_nth(sanitised_message
, key_length
)
304 key
= ''.join([chr(-caesar_break(s
)[0] % 26 + ord('a'))
306 plaintext
= beaufort_decipher(message
, key
)
307 fit
= fitness(plaintext
)
309 sanitised_message
= sanitise(message
)
310 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
311 for i
in range(1, max_key_length
+1)])
312 return max(results
, key
=lambda k
: k
[1])
314 def plot_frequency_histogram(freqs
, sort_key
=None):
315 x
= range(len(freqs
.keys()))
316 y
= [freqs
[l
] for l
in sorted(freqs
.keys(), key
=sort_key
)]
318 ax
= f
.add_axes([0.1, 0.1, 0.9, 0.9])
319 ax
.bar(x
, y
, align
='center')
321 ax
.set_xticklabels(sorted(freqs
.keys(), key
=sort_key
))
325 if __name__
== "__main__":