1 """A set of functions to break the ciphers give in ciphers.py.
10 from itertools
import starmap
11 from segment
import segment
12 from multiprocessing
import Pool
14 import matplotlib
.pyplot
as plt
16 logger
= logging
.getLogger(__name__
)
17 logger
.addHandler(logging
.FileHandler('cipher.log'))
18 logger
.setLevel(logging
.WARNING
)
19 #logger.setLevel(logging.INFO)
20 #logger.setLevel(logging.DEBUG)
23 from language_models
import *
28 # c5a = open('2012/5a.ciphertext', 'r').read()
29 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
30 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
33 def frequencies(text
):
34 """Count the number of occurrences of each character in text
36 >>> sorted(frequencies('abcdefabc').items())
37 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
38 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
39 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
40 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
41 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
42 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
43 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
44 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
45 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
46 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
47 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
48 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
49 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
50 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
51 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
52 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
53 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
54 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
55 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
56 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
57 >>> frequencies('abcdefabcdef')['x']
60 return collections
.Counter(c
for c
in text
)
63 def caesar_break(message
, fitness
=Pletters
):
64 """Breaks a Caesar cipher using frequency analysis
66 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
67 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
68 (4, -130.849989015...)
69 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
70 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
71 (19, -128.82410410...)
72 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
73 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
74 (13, -126.25403935...)
76 sanitised_message
= sanitise(message
)
78 best_fit
= float('-inf')
79 for shift
in range(26):
80 plaintext
= caesar_decipher(sanitised_message
, shift
)
81 fit
= fitness(plaintext
)
82 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
83 'and decrypt starting: {2}'.format(shift
, fit
,
88 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
89 'decrypt starting: {2}'.format(best_shift
, best_fit
,
90 caesar_decipher(sanitised_message
, best_shift
)[:50]))
91 return best_shift
, best_fit
93 def affine_break(message
, fitness
=Pletters
):
94 """Breaks an affine cipher using frequency analysis
96 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
97 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
98 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
99 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
100 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
101 'kxd clm ckuxj.') # doctest: +ELLIPSIS
102 ((15, 22, True), -340.601181913...)
104 sanitised_message
= sanitise(message
)
107 best_one_based
= True
108 best_fit
= float("-inf")
109 for one_based
in [True, False]:
110 for multiplier
in [x
for x
in range(1, 26, 2) if x
!= 13]:
111 for adder
in range(26):
112 plaintext
= affine_decipher(sanitised_message
,
113 multiplier
, adder
, one_based
)
114 fit
= fitness(plaintext
)
115 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
116 'gives fit of {3} and decrypt starting: {4}'.
117 format(multiplier
, adder
, one_based
, fit
,
121 best_multiplier
= multiplier
123 best_one_based
= one_based
124 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
125 '{3} and decrypt starting: {4}'.format(
126 best_multiplier
, best_adder
, best_one_based
, best_fit
,
127 affine_decipher(sanitised_message
, best_multiplier
,
128 best_adder
, best_one_based
)[:50]))
129 return (best_multiplier
, best_adder
, best_one_based
), best_fit
131 def keyword_break(message
, wordlist
=keywords
, fitness
=Pletters
):
132 """Breaks a keyword substitution cipher using a dictionary and
135 >>> keyword_break(keyword_encipher('this is a test message for the ' \
136 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
137 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
138 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
141 best_wrap_alphabet
= True
142 best_fit
= float("-inf")
143 for wrap_alphabet
in KeywordWrapAlphabet
:
144 for keyword
in wordlist
:
145 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
146 fit
= fitness(plaintext
)
147 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
148 'gives fit of {2} and decrypt starting: {3}'.format(
149 keyword
, wrap_alphabet
, fit
,
150 sanitise(plaintext
)[:50]))
153 best_keyword
= keyword
154 best_wrap_alphabet
= wrap_alphabet
155 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
156 '{2} and decrypt starting: {3}'.format(best_keyword
,
157 best_wrap_alphabet
, best_fit
, sanitise(
158 keyword_decipher(message
, best_keyword
,
159 best_wrap_alphabet
))[:50]))
160 return (best_keyword
, best_wrap_alphabet
), best_fit
162 def keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
164 """Breaks a keyword substitution cipher using a dictionary and
167 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
168 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
169 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
170 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
173 helper_args
= [(message
, word
, wrap
, fitness
)
175 for wrap
in KeywordWrapAlphabet
]
176 # Gotcha: the helper function here needs to be defined at the top level
177 # (limitation of Pool.starmap)
178 breaks
= pool
.starmap(keyword_break_worker
, helper_args
, chunksize
)
179 return max(breaks
, key
=lambda k
: k
[1])
181 def keyword_break_worker(message
, keyword
, wrap_alphabet
, fitness
):
182 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
183 fit
= fitness(plaintext
)
184 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
185 '{2} and decrypt starting: {3}'.format(keyword
,
186 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
187 return (keyword
, wrap_alphabet
), fit
190 def vigenere_keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
192 """Breaks a vigenere cipher using a dictionary and frequency analysis.
194 >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
195 'message for the vigenere decipherment'), 'cat'), \
196 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
197 ('cat', -52.947271216...)
200 helper_args
= [(message
, word
, fitness
)
201 for word
in wordlist
]
202 # Gotcha: the helper function here needs to be defined at the top level
203 # (limitation of Pool.starmap)
204 breaks
= pool
.starmap(vigenere_keyword_break_worker
, helper_args
,
206 return max(breaks
, key
=lambda k
: k
[1])
207 vigenere_keyword_break
= vigenere_keyword_break_mp
209 def vigenere_keyword_break_worker(message
, keyword
, fitness
):
210 plaintext
= vigenere_decipher(message
, keyword
)
211 fit
= fitness(plaintext
)
212 logger
.debug('Vigenere keyword break attempt using key {0} gives fit of '
213 '{1} and decrypt starting: {2}'.format(keyword
,
214 fit
, sanitise(plaintext
)[:50]))
218 def vigenere_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
219 """Breaks a Vigenere cipher with frequency analysis
221 >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
222 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
223 "afternoon when he left his jacket hanging on the easel in the " \
224 "attic. I jump every time I hear a footstep on the stairs, " \
225 "certain that the theft has been discovered and that I will " \
226 "be caught. The SS officer visits less often now that he is " \
227 "sure"), 'florence')) # doctest: +ELLIPSIS
228 ('florence', -307.5473096791...)
230 def worker(message
, key_length
, fitness
):
231 splits
= every_nth(sanitised_message
, key_length
)
232 key
= ''.join([chr(caesar_break(s
)[0] + ord('a')) for s
in splits
])
233 plaintext
= vigenere_decipher(message
, key
)
234 fit
= fitness(plaintext
)
236 sanitised_message
= sanitise(message
)
237 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
238 for i
in range(1, max_key_length
+1)])
239 return max(results
, key
=lambda k
: k
[1])
242 def beaufort_frequency_break(message
, max_key_length
=20, fitness
=Pletters
):
243 """Breaks a Beaufort cipher with frequency analysis
245 >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
246 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
247 "afternoon when he left his jacket hanging on the easel in the " \
248 "attic. I jump every time I hear a footstep on the stairs, " \
249 "certain that the theft has been discovered and that I will " \
250 "be caught. The SS officer visits less often now " \
251 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
252 ('florence', -307.5473096791...)
254 def worker(message
, key_length
, fitness
):
255 splits
= every_nth(sanitised_message
, key_length
)
256 key
= ''.join([chr(-caesar_break(s
)[0] % 26 + ord('a'))
258 plaintext
= beaufort_decipher(message
, key
)
259 fit
= fitness(plaintext
)
261 sanitised_message
= sanitise(message
)
262 results
= starmap(worker
, [(sanitised_message
, i
, fitness
)
263 for i
in range(1, max_key_length
+1)])
264 return max(results
, key
=lambda k
: k
[1])
266 def plot_frequency_histogram(freqs
, sort_key
=None):
267 x
= range(len(freqs
.keys()))
268 y
= [freqs
[l
] for l
in sorted(freqs
.keys(), key
=sort_key
)]
270 ax
= f
.add_axes([0.1, 0.1, 0.9, 0.9])
271 ax
.bar(x
, y
, align
='center')
273 ax
.set_xticklabels(sorted(freqs
.keys(), key
=sort_key
))
277 if __name__
== "__main__":