368d669a49b2741822d983243cd5225151764d95
[cipher-tools.git] / cipherbreak.py
1 """A set of functions to break the ciphers give in ciphers.py.
2 """
3
4 import string
5 import collections
6 import norms
7 import logging
8 import random
9 import math
10 from itertools import starmap
11 from segment import segment
12 from multiprocessing import Pool
13
14 import matplotlib.pyplot as plt
15
16
17 from cipher import *
18 from language_models import *
19
20 # To time a run:
21 #
22 # import timeit
23 # c5a = open('2012/5a.ciphertext', 'r').read()
24 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
25 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
26
27
28
29
30
31 def amsco_break(message, translist=transpositions, patterns = [(1, 2), (2, 1)],
32 fillstyles = [AmscoFillStyle.continuous,
33 AmscoFillStyle.same_each_row,
34 AmscoFillStyle.reverse_each_row],
35 fitness=Pbigrams,
36 chunksize=500):
37 """Breaks an AMSCO transposition cipher using a dictionary and
38 n-gram frequency analysis
39
40 >>> amsco_break(amsco_transposition_encipher(sanitise( \
41 "It is a truth universally acknowledged, that a single man in \
42 possession of a good fortune, must be in want of a wife. However \
43 little known the feelings or views of such a man may be on his \
44 first entering a neighbourhood, this truth is so well fixed in \
45 the minds of the surrounding families, that he is considered the \
46 rightful property of some one or other of their daughters."), \
47 'encipher'), \
48 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
49 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
50 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
51 patterns=[(1, 2)]) # doctest: +ELLIPSIS
52 (((2, 0, 5, 3, 1, 4, 6), (1, 2), <AmscoFillStyle.continuous: 1>), -709.4646722...)
53 >>> amsco_break(amsco_transposition_encipher(sanitise( \
54 "It is a truth universally acknowledged, that a single man in \
55 possession of a good fortune, must be in want of a wife. However \
56 little known the feelings or views of such a man may be on his \
57 first entering a neighbourhood, this truth is so well fixed in \
58 the minds of the surrounding families, that he is considered the \
59 rightful property of some one or other of their daughters."), \
60 'encipher', fillpattern=(2, 1)), \
61 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
62 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
63 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
64 patterns=[(1, 2), (2, 1)], fitness=Ptrigrams) # doctest: +ELLIPSIS
65 (((2, 0, 5, 3, 1, 4, 6), (2, 1), <AmscoFillStyle.continuous: 1>), -997.0129085...)
66 """
67 with Pool() as pool:
68 helper_args = [(message, trans, pattern, fillstyle, fitness)
69 for trans in translist
70 for pattern in patterns
71 for fillstyle in fillstyles]
72 # Gotcha: the helper function here needs to be defined at the top level
73 # (limitation of Pool.starmap)
74 breaks = pool.starmap(amsco_break_worker, helper_args, chunksize)
75 return max(breaks, key=lambda k: k[1])
76
77 def amsco_break_worker(message, transposition,
78 pattern, fillstyle, fitness):
79 plaintext = amsco_transposition_decipher(message, transposition,
80 fillpattern=pattern, fillstyle=fillstyle)
81 fit = fitness(sanitise(plaintext))
82 logger.debug('AMSCO transposition break attempt using key {0} and pattern'
83 '{1} ({2}) gives fit of {3} and decrypt starting: '
84 '{4}'.format(
85 transposition, pattern, fillstyle, fit,
86 sanitise(plaintext)[:50]))
87 return (transposition, pattern, fillstyle), fit
88
89
90 def hill_break(message, matrix_size=2, fitness=Pletters,
91 number_of_solutions=1, chunksize=500):
92
93 all_matrices = [np.matrix(list(m))
94 for m in itertools.product([list(r)
95 for r in itertools.product(range(26), repeat=matrix_size)],
96 repeat=matrix_size)]
97 valid_matrices = [m for m, d in
98 zip(all_matrices, (int(round(linalg.det(m))) for m in all_matrices))
99 if d != 0
100 if d % 2 != 0
101 if d % 13 != 0 ]
102 with Pool() as pool:
103 helper_args = [(message, matrix, fitness)
104 for matrix in valid_matrices]
105 # Gotcha: the helper function here needs to be defined at the top level
106 # (limitation of Pool.starmap)
107 breaks = pool.starmap(hill_break_worker, helper_args, chunksize)
108 if number_of_solutions == 1:
109 return max(breaks, key=lambda k: k[1])
110 else:
111 return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
112
113 def hill_break_worker(message, matrix, fitness):
114 plaintext = hill_decipher(matrix, message)
115 fit = fitness(plaintext)
116 logger.debug('Hill cipher break attempt using key {0} gives fit of '
117 '{1} and decrypt starting: {2}'.format(matrix,
118 fit, sanitise(plaintext)[:50]))
119 return matrix, fit
120
121 def bifid_break_mp(message, wordlist=keywords, fitness=Pletters, max_period=10,
122 number_of_solutions=1, chunksize=500):
123 """Breaks a keyword substitution cipher using a dictionary and
124 frequency analysis
125
126 >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
127 'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
128 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
129 (('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...)
130 >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
131 'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
132 wordlist=['cat', 'elephant', 'kangaroo'], \
133 number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
134 [(('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...),
135 (('elephant', <KeywordWrapAlphabet.from_largest: 3>, 0), -52.834575011...)]
136 """
137 with Pool() as pool:
138 helper_args = [(message, word, wrap, period, fitness)
139 for word in wordlist
140 for wrap in KeywordWrapAlphabet
141 for period in range(max_period+1)]
142 # Gotcha: the helper function here needs to be defined at the top level
143 # (limitation of Pool.starmap)
144 breaks = pool.starmap(bifid_break_worker, helper_args, chunksize)
145 if number_of_solutions == 1:
146 return max(breaks, key=lambda k: k[1])
147 else:
148 return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
149
150 def bifid_break_worker(message, keyword, wrap_alphabet, period, fitness):
151 plaintext = bifid_decipher(message, keyword, wrap_alphabet, period=period)
152 fit = fitness(plaintext)
153 logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
154 '{2} and decrypt starting: {3}'.format(keyword,
155 wrap_alphabet, fit, sanitise(plaintext)[:50]))
156 return (keyword, wrap_alphabet, period), fit
157
158
159 def autokey_sa_break( message
160 , min_keylength=2
161 , max_keylength=20
162 , workers=10
163 , initial_temperature=200
164 , max_iterations=20000
165 , fitness=Pletters
166 , chunksize=1
167 , result_count=1
168 ):
169 """Break an autokey cipher by simulated annealing
170 """
171 worker_args = []
172 ciphertext = sanitise(message)
173 for keylength in range(min_keylength, max_keylength+1):
174 for i in range(workers):
175 key = cat(random.choice(string.ascii_lowercase) for _ in range(keylength))
176 worker_args.append((ciphertext, key,
177 initial_temperature, max_iterations, fitness))
178
179 with Pool() as pool:
180 breaks = pool.starmap(autokey_sa_break_worker,
181 worker_args, chunksize)
182 if result_count <= 1:
183 return max(breaks, key=lambda k: k[1])
184 else:
185 return sorted(set(breaks), key=lambda k: k[1], reverse=True)[:result_count]
186
187
188 def autokey_sa_break_worker(message, key,
189 t0, max_iterations, fitness):
190
191 temperature = t0
192
193 dt = t0 / (0.9 * max_iterations)
194
195 plaintext = autokey_decipher(message, key)
196 current_fitness = fitness(plaintext)
197 current_key = key
198
199 best_key = current_key
200 best_fitness = current_fitness
201 best_plaintext = plaintext
202
203 # print('starting for', max_iterations)
204 for i in range(max_iterations):
205 swap_pos = random.randrange(len(current_key))
206 swap_char = random.choice(string.ascii_lowercase)
207
208 new_key = current_key[:swap_pos] + swap_char + current_key[swap_pos+1:]
209
210 plaintext = autokey_decipher(message, new_key)
211 new_fitness = fitness(plaintext)
212 try:
213 sa_chance = math.exp((new_fitness - current_fitness) / temperature)
214 except (OverflowError, ZeroDivisionError):
215 # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
216 sa_chance = 0
217 if (new_fitness > current_fitness or random.random() < sa_chance):
218 # logger.debug('Simulated annealing: iteration {}, temperature {}, '
219 # 'current alphabet {}, current_fitness {}, '
220 # 'best_plaintext {}'.format(i, temperature, current_alphabet,
221 # current_fitness, best_plaintext[:50]))
222
223 # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
224 # print(new_fitness, new_key, plaintext[:100])
225 current_fitness = new_fitness
226 current_key = new_key
227
228 if current_fitness > best_fitness:
229 best_key = current_key
230 best_fitness = current_fitness
231 best_plaintext = plaintext
232 if i % 500 == 0:
233 logger.debug('Simulated annealing: iteration {}, temperature {}, '
234 'current key {}, current_fitness {}, '
235 'best_plaintext {}'.format(i, temperature, current_key,
236 current_fitness, plaintext[:50]))
237 temperature = max(temperature - dt, 0.001)
238
239 # print(best_key, best_fitness, best_plaintext[:70])
240 return best_key, best_fitness # current_alphabet, current_fitness
241
242
243 def pocket_enigma_break_by_crib(message, wheel_spec, crib, crib_position):
244 """Break a pocket enigma using a crib (some plaintext that's expected to
245 be in a certain position). Returns a list of possible starting wheel
246 positions that could produce the crib.
247
248 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'h', 0)
249 ['a', 'f', 'q']
250 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'he', 0)
251 ['a']
252 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'll', 2)
253 ['a']
254 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 2)
255 ['a']
256 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 3)
257 ['a', 'j', 'n']
258 >>> pocket_enigma_break_by_crib('aaaaa', 1, 'l', 3)
259 []
260 """
261 pe = PocketEnigma(wheel=wheel_spec)
262 possible_positions = []
263 for p in string.ascii_lowercase:
264 pe.set_position(p)
265 plaintext = pe.decipher(message)
266 if plaintext[crib_position:crib_position+len(crib)] == crib:
267 possible_positions += [p]
268 return possible_positions
269
270
271 def plot_frequency_histogram(freqs, sort_key=None):
272 x = range(len(freqs))
273 y = [freqs[l] for l in sorted(freqs, key=sort_key)]
274 f = plt.figure()
275 ax = f.add_axes([0.1, 0.1, 0.9, 0.9])
276 ax.bar(x, y, align='center')
277 ax.set_xticks(x)
278 ax.set_xticklabels(sorted(freqs, key=sort_key))
279 f.show()
280
281
282 if __name__ == "__main__":
283 import doctest
284 doctest.testmod()