1 """A set of functions to break the ciphers give in ciphers.py.
10 from itertools
import starmap
11 from segment
import segment
12 from multiprocessing
import Pool
14 import matplotlib
.pyplot
as plt
16 logger
= logging
.getLogger(__name__
)
17 logger
.addHandler(logging
.FileHandler('cipher.log'))
18 logger
.setLevel(logging
.WARNING
)
19 #logger.setLevel(logging.INFO)
20 #logger.setLevel(logging.DEBUG)
23 from language_models
import *
28 # c5a = open('2012/5a.ciphertext', 'r').read()
29 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
30 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
33 def frequencies(text
):
34 """Count the number of occurrences of each character in text
36 >>> sorted(frequencies('abcdefabc').items())
37 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
38 >>> sorted(frequencies('the quick brown fox jumped over the lazy ' \
39 'dog').items()) # doctest: +NORMALIZE_WHITESPACE
40 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1),
41 ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1),
42 ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2),
43 ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
44 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the ' \
45 '(9lazy) DOG').items()) # doctest: +NORMALIZE_WHITESPACE
46 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1),
47 ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1),
48 ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2),
49 ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1),
50 ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
51 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... '\
52 'the (9lazy) DOG')).items()) # doctest: +NORMALIZE_WHITESPACE
53 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1),
54 ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1),
55 ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1),
56 ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
57 >>> frequencies('abcdefabcdef')['x']
60 return collections
.Counter(c
for c
in text
)
63 def caesar_break(message
, fitness
=Pletters
):
64 """Breaks a Caesar cipher using frequency analysis
66 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrh' \
67 'ecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
68 (4, -130.849989015...)
69 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgtee' \
70 'raxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
71 (19, -128.82410410...)
72 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurer' \
73 'svaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
74 (13, -126.25403935...)
76 sanitised_message
= sanitise(message
)
78 best_fit
= float('-inf')
79 for shift
in range(26):
80 plaintext
= caesar_decipher(sanitised_message
, shift
)
81 fit
= fitness(plaintext
)
82 logger
.debug('Caesar break attempt using key {0} gives fit of {1} '
83 'and decrypt starting: {2}'.format(shift
, fit
,
88 logger
.info('Caesar break best fit: key {0} gives fit of {1} and '
89 'decrypt starting: {2}'.format(best_shift
, best_fit
,
90 caesar_decipher(sanitised_message
, best_shift
)[:50]))
91 return best_shift
, best_fit
93 def affine_break(message
, fitness
=Pletters
):
94 """Breaks an affine cipher using frequency analysis
96 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls ' \
97 'omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ' \
98 'ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ' \
99 'ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls ' \
100 'bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai ' \
101 'kxd clm ckuxj.') # doctest: +ELLIPSIS
102 ((15, 22, True), -340.601181913...)
104 sanitised_message
= sanitise(message
)
107 best_one_based
= True
108 best_fit
= float("-inf")
109 for one_based
in [True, False]:
110 for multiplier
in [x
for x
in range(1, 26, 2) if x
!= 13]:
111 for adder
in range(26):
112 plaintext
= affine_decipher(sanitised_message
,
113 multiplier
, adder
, one_based
)
114 fit
= fitness(plaintext
)
115 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) '
116 'gives fit of {3} and decrypt starting: {4}'.
117 format(multiplier
, adder
, one_based
, fit
,
121 best_multiplier
= multiplier
123 best_one_based
= one_based
124 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of '
125 '{3} and decrypt starting: {4}'.format(
126 best_multiplier
, best_adder
, best_one_based
, best_fit
,
127 affine_decipher(sanitised_message
, best_multiplier
,
128 best_adder
, best_one_based
)[:50]))
129 return (best_multiplier
, best_adder
, best_one_based
), best_fit
132 def plot_frequency_histogram(freqs
, sort_key
=None):
133 x
= range(len(freqs
.keys()))
134 y
= [freqs
[l
] for l
in sorted(freqs
.keys(), key
=sort_key
)]
136 ax
= f
.add_axes([0.1, 0.1, 0.9, 0.9])
137 ax
.bar(x
, y
, align
='center')
139 ax
.set_xticklabels(sorted(freqs
.keys(), key
=sort_key
))
143 if __name__
== "__main__":