1 """A set of functions to break the ciphers give in ciphers.py.
10 from itertools
import starmap
11 from segment
import segment
12 from multiprocessing
import Pool
14 import matplotlib
.pyplot
as plt
16 logger
= logging
.getLogger(__name__
)
17 logger
.addHandler(logging
.FileHandler('cipher.log'))
18 logger
.setLevel(logging
.WARNING
)
19 #logger.setLevel(logging.INFO)
20 #logger.setLevel(logging.DEBUG)
23 from language_models
import *
28 # c5a = open('2012/5a.ciphertext', 'r').read()
29 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
30 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
33 def frequencies(text
):
34 """Count the number of occurrences of each character in text
36 >>> sorted(frequencies('abcdefabc').items())
37 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
38 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
39 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
40 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
41 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
42 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
43 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
44 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
45 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
46 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
47 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
48 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
49 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
50 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
51 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
52 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
53 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
54 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
55 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
56 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
57 >>> frequencies('abcdefabcdef')['x']
60 return collections
.Counter(c
for c
in text
)
63 def caesar_break(message
, fitness
=Pletters
):
64 """Breaks a Caesar cipher using frequency analysis
66 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
67 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
68 (4, -130.849989015...)
69 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
70 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
71 (19, -128.82410410...)
72 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
73 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
74 (13, -126.25403935...)
76 sanitised_message
= sanitise(message
)
78 best_fit
= float('-inf')
79 for shift
in range(26):
80 plaintext
= caesar_decipher(sanitised_message
, shift
)
81 fit
= fitness(plaintext
)
82 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
83 'and decrypt starting: {2}'.format(shift
, fit
,
88 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
89 'decrypt starting: {2}'.format(best_shift
, best_fit
,
90 caesar_decipher(sanitised_message
, best_shift
)[:50]))
91 return best_shift
, best_fit
93 def affine_break(message
, fitness
=Pletters
):
94 """Breaks an affine cipher using frequency analysis
96 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
97 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
98 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
99 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
100 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
101 'kxd clm ckuxj.') # doctest: +ELLIPSIS
102 ((15, 22, True), -340.601181913...)
104 sanitised_message
= sanitise(message
)
107 best_one_based
= True
108 best_fit
= float("-inf")
109 for one_based
in [True, False]:
110 for multiplier
in [x
for x
in range(1, 26, 2) if x
!= 13]:
111 for adder
in range(26):
112 plaintext
= affine_decipher(sanitised_message
,
113 multiplier
, adder
, one_based
)
114 fit
= fitness(plaintext
)
115 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
116 'gives fit of {3} and decrypt starting: {4}'.
117 format(multiplier
, adder
, one_based
, fit
,
121 best_multiplier
= multiplier
123 best_one_based
= one_based
124 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
125 '{3} and decrypt starting: {4}'.format(
126 best_multiplier
, best_adder
, best_one_based
, best_fit
,
127 affine_decipher(sanitised_message
, best_multiplier
,
128 best_adder
, best_one_based
)[:50]))
129 return (best_multiplier
, best_adder
, best_one_based
), best_fit
131 def keyword_break(message
, wordlist
=keywords
, fitness
=Pletters
):
132 """Breaks a keyword substitution cipher using a dictionary and
135 >>> keyword_break(keyword_encipher('this is a test message for the ' \
136 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
137 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
138 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
141 best_wrap_alphabet
= True
142 best_fit
= float("-inf")
143 for wrap_alphabet
in KeywordWrapAlphabet
:
144 for keyword
in wordlist
:
145 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
146 fit
= fitness(plaintext
)
147 logger
.debug('Keyword break attempt using key {0} (wrap={1}) '
148 'gives fit of {2} and decrypt starting: {3}'.format(
149 keyword
, wrap_alphabet
, fit
,
150 sanitise(plaintext
)[:50]))
153 best_keyword
= keyword
154 best_wrap_alphabet
= wrap_alphabet
155 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of '
156 '{2} and decrypt starting: {3}'.format(best_keyword
,
157 best_wrap_alphabet
, best_fit
, sanitise(
158 keyword_decipher(message
, best_keyword
,
159 best_wrap_alphabet
))[:50]))
160 return (best_keyword
, best_wrap_alphabet
), best_fit
162 def keyword_break_mp(message
, wordlist
=keywords
, fitness
=Pletters
,
164 """Breaks a keyword substitution cipher using a dictionary and
167 >>> keyword_break_mp(keyword_encipher('this is a test message for the ' \
168 'keyword decipherment', 'elephant', KeywordWrapAlphabet.from_last), \
169 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
170 (('elephant', <KeywordWrapAlphabet.from_last: 2>), -52.834575011...)
173 helper_args
= [(message
, word
, wrap
, fitness
)
175 for wrap
in KeywordWrapAlphabet
]
176 # Gotcha: the helper function here needs to be defined at the top level
177 # (limitation of Pool.starmap)
178 breaks
= pool
.starmap(keyword_break_worker
, helper_args
, chunksize
)
179 return max(breaks
, key
=lambda k
: k
[1])
181 def keyword_break_worker(message
, keyword
, wrap_alphabet
, fitness
):
182 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
183 fit
= fitness(plaintext
)
184 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
185 '{2} and decrypt starting: {3}'.format(keyword
,
186 wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
187 return (keyword
, wrap_alphabet
), fit
191 def plot_frequency_histogram(freqs
, sort_key
=None):
192 x
= range(len(freqs
.keys()))
193 y
= [freqs
[l
] for l
in sorted(freqs
.keys(), key
=sort_key
)]
195 ax
= f
.add_axes([0.1, 0.1, 0.9, 0.9])
196 ax
.bar(x
, y
, align
='center')
198 ax
.set_xticklabels(sorted(freqs
.keys(), key
=sort_key
))
202 if __name__
== "__main__":