1 """A set of functions to break the ciphers give in ciphers.py.
10 from itertools
import starmap
11 from segment
import segment
12 from multiprocessing
import Pool
14 import matplotlib
.pyplot
as plt
18 from language_models
import *
23 # c5a = open('2012/5a.ciphertext', 'r').read()
24 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
25 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
31 def amsco_break(message
, translist
=transpositions
, patterns
= [(1, 2), (2, 1)],
32 fillstyles
= [AmscoFillStyle
.continuous
,
33 AmscoFillStyle
.same_each_row
,
34 AmscoFillStyle
.reverse_each_row
],
37 """Breaks an AMSCO transposition cipher using a dictionary and
38 n-gram frequency analysis
40 >>> amsco_break(amsco_transposition_encipher(sanitise( \
41 "It is a truth universally acknowledged, that a single man in \
42 possession of a good fortune, must be in want of a wife. However \
43 little known the feelings or views of such a man may be on his \
44 first entering a neighbourhood, this truth is so well fixed in \
45 the minds of the surrounding families, that he is considered the \
46 rightful property of some one or other of their daughters."), \
48 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
49 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
50 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
51 patterns=[(1, 2)]) # doctest: +ELLIPSIS
52 (((2, 0, 5, 3, 1, 4, 6), (1, 2), <AmscoFillStyle.continuous: 1>), -709.4646722...)
53 >>> amsco_break(amsco_transposition_encipher(sanitise( \
54 "It is a truth universally acknowledged, that a single man in \
55 possession of a good fortune, must be in want of a wife. However \
56 little known the feelings or views of such a man may be on his \
57 first entering a neighbourhood, this truth is so well fixed in \
58 the minds of the surrounding families, that he is considered the \
59 rightful property of some one or other of their daughters."), \
60 'encipher', fillpattern=(2, 1)), \
61 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
62 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
63 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
64 patterns=[(1, 2), (2, 1)], fitness=Ptrigrams) # doctest: +ELLIPSIS
65 (((2, 0, 5, 3, 1, 4, 6), (2, 1), <AmscoFillStyle.continuous: 1>), -997.0129085...)
68 helper_args
= [(message
, trans
, pattern
, fillstyle
, fitness
)
69 for trans
in translist
70 for pattern
in patterns
71 for fillstyle
in fillstyles
]
72 # Gotcha: the helper function here needs to be defined at the top level
73 # (limitation of Pool.starmap)
74 breaks
= pool
.starmap(amsco_break_worker
, helper_args
, chunksize
)
75 return max(breaks
, key
=lambda k
: k
[1])
77 def amsco_break_worker(message
, transposition
,
78 pattern
, fillstyle
, fitness
):
79 plaintext
= amsco_transposition_decipher(message
, transposition
,
80 fillpattern
=pattern
, fillstyle
=fillstyle
)
81 fit
= fitness(sanitise(plaintext
))
82 logger
.debug('AMSCO transposition break attempt using key {0} and pattern'
83 '{1} ({2}) gives fit of {3} and decrypt starting: '
85 transposition
, pattern
, fillstyle
, fit
,
86 sanitise(plaintext
)[:50]))
87 return (transposition
, pattern
, fillstyle
), fit
90 def hill_break(message
, matrix_size
=2, fitness
=Pletters
,
91 number_of_solutions
=1, chunksize
=500):
93 all_matrices
= [np
.matrix(list(m
))
94 for m
in itertools
.product([list(r
)
95 for r
in itertools
.product(range(26), repeat
=matrix_size
)],
97 valid_matrices
= [m
for m
, d
in
98 zip(all_matrices
, (int(round(linalg
.det(m
))) for m
in all_matrices
))
103 helper_args
= [(message
, matrix
, fitness
)
104 for matrix
in valid_matrices
]
105 # Gotcha: the helper function here needs to be defined at the top level
106 # (limitation of Pool.starmap)
107 breaks
= pool
.starmap(hill_break_worker
, helper_args
, chunksize
)
108 if number_of_solutions
== 1:
109 return max(breaks
, key
=lambda k
: k
[1])
111 return sorted(breaks
, key
=lambda k
: k
[1], reverse
=True)[:number_of_solutions
]
113 def hill_break_worker(message
, matrix
, fitness
):
114 plaintext
= hill_decipher(matrix
, message
)
115 fit
= fitness(plaintext
)
116 logger
.debug('Hill cipher break attempt using key {0} gives fit of '
117 '{1} and decrypt starting: {2}'.format(matrix
,
118 fit
, sanitise(plaintext
)[:50]))
121 def bifid_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
, max_period
=10,
122 number_of_solutions
=1, chunksize
=500):
123 """Breaks a keyword substitution cipher using a dictionary and
126 >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
127 'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
128 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
129 (('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...)
130 >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
131 'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
132 wordlist=['cat', 'elephant', 'kangaroo'], \
133 number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
134 [(('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...),
135 (('elephant', <KeywordWrapAlphabet.from_largest: 3>, 0), -52.834575011...)]
138 helper_args
= [(message
, word
, wrap
, period
, fitness
)
140 for wrap
in KeywordWrapAlphabet
141 for period
in range(max_period
+1)]
142 # Gotcha: the helper function here needs to be defined at the top level
143 # (limitation of Pool.starmap)
144 breaks
= pool
.starmap(bifid_break_worker
, helper_args
, chunksize
)
145 if number_of_solutions
== 1:
146 return max(breaks
, key
=lambda k
: k
[1])
148 return sorted(breaks
, key
=lambda k
: k
[1], reverse
=True)[:number_of_solutions
]
150 def bifid_break_worker(message
, keyword
, wrap_alphabet
, period
, fitness
):
151 plaintext
= bifid_decipher(message
, keyword
, wrap_alphabet
, period
=period
)
152 fit
= fitness(plaintext
)
153 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
154 '{2} and decrypt starting: {3}'.format(keyword
,
155 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
156 return (keyword
, wrap_alphabet
, period
), fit
159 def autokey_sa_break( message
163 , initial_temperature
=200
164 , max_iterations
=20000
169 """Break an autokey cipher by simulated annealing
172 ciphertext
= sanitise(message
)
173 for keylength
in range(min_keylength
, max_keylength
+1):
174 for i
in range(workers
):
175 key
= cat(random
.choice(string
.ascii_lowercase
) for _
in range(keylength
))
176 worker_args
.append((ciphertext
, key
,
177 initial_temperature
, max_iterations
, fitness
))
180 breaks
= pool
.starmap(autokey_sa_break_worker
,
181 worker_args
, chunksize
)
182 if result_count
<= 1:
183 return max(breaks
, key
=lambda k
: k
[1])
185 return sorted(set(breaks
), key
=lambda k
: k
[1], reverse
=True)[:result_count
]
188 def autokey_sa_break_worker(message
, key
,
189 t0
, max_iterations
, fitness
):
193 dt
= t0
/ (0.9 * max_iterations
)
195 plaintext
= autokey_decipher(message
, key
)
196 current_fitness
= fitness(plaintext
)
199 best_key
= current_key
200 best_fitness
= current_fitness
201 best_plaintext
= plaintext
203 # print('starting for', max_iterations)
204 for i
in range(max_iterations
):
205 swap_pos
= random
.randrange(len(current_key
))
206 swap_char
= random
.choice(string
.ascii_lowercase
)
208 new_key
= current_key
[:swap_pos
] + swap_char
+ current_key
[swap_pos
+1:]
210 plaintext
= autokey_decipher(message
, new_key
)
211 new_fitness
= fitness(plaintext
)
213 sa_chance
= math
.exp((new_fitness
- current_fitness
) / temperature
)
214 except (OverflowError, ZeroDivisionError):
215 # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
217 if (new_fitness
> current_fitness
or random
.random() < sa_chance
):
218 # logger.debug('Simulated annealing: iteration {}, temperature {}, '
219 # 'current alphabet {}, current_fitness {}, '
220 # 'best_plaintext {}'.format(i, temperature, current_alphabet,
221 # current_fitness, best_plaintext[:50]))
223 # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
224 # print(new_fitness, new_key, plaintext[:100])
225 current_fitness
= new_fitness
226 current_key
= new_key
228 if current_fitness
> best_fitness
:
229 best_key
= current_key
230 best_fitness
= current_fitness
231 best_plaintext
= plaintext
233 logger
.debug('Simulated annealing: iteration {}, temperature {}, '
234 'current key {}, current_fitness {}, '
235 'best_plaintext {}'.format(i
, temperature
, current_key
,
236 current_fitness
, plaintext
[:50]))
237 temperature
= max(temperature
- dt
, 0.001)
239 # print(best_key, best_fitness, best_plaintext[:70])
240 return best_key
, best_fitness
# current_alphabet, current_fitness
243 def pocket_enigma_break_by_crib(message
, wheel_spec
, crib
, crib_position
):
244 """Break a pocket enigma using a crib (some plaintext that's expected to
245 be in a certain position). Returns a list of possible starting wheel
246 positions that could produce the crib.
248 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'h', 0)
250 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'he', 0)
252 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'll', 2)
254 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 2)
256 >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 3)
258 >>> pocket_enigma_break_by_crib('aaaaa', 1, 'l', 3)
261 pe
= PocketEnigma(wheel
=wheel_spec
)
262 possible_positions
= []
263 for p
in string
.ascii_lowercase
:
265 plaintext
= pe
.decipher(message
)
266 if plaintext
[crib_position
:crib_position
+len(crib
)] == crib
:
267 possible_positions
+= [p
]
268 return possible_positions
271 def plot_frequency_histogram(freqs
, sort_key
=None):
272 x
= range(len(freqs
))
273 y
= [freqs
[l
] for l
in sorted(freqs
, key
=sort_key
)]
275 ax
= f
.add_axes([0.1, 0.1, 0.9, 0.9])
276 ax
.bar(x
, y
, align
='center')
278 ax
.set_xticklabels(sorted(freqs
, key
=sort_key
))
282 if __name__
== "__main__":