Challenge 1 done
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5
6 logger = logging.getLogger(__name__)
7 logger.addHandler(logging.FileHandler('cipher.log'))
8 # logging.basicConfig(filename='cipher.log',level=logging.WARNING)
9 logger.setLevel(logging.INFO)
10
11 english_counts = collections.defaultdict(int)
12 with open('count_1l.txt', 'r') as f:
13 for line in f:
14 (letter, count) = line.split("\t")
15 english_counts[letter] = int(count)
16 normalised_english_counts = norms.normalise(english_counts)
17
18
19 modular_division_table = [[0]*26 for x in range(26)]
20 for a in range(26):
21 for b in range(26):
22 c = (a * b) % 26
23 modular_division_table[b][c] = a
24
25
26 def sanitise(text):
27 """Remove all non-alphabetic characters and convert the text to lowercase
28
29 >>> sanitise('The Quick')
30 'thequick'
31 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
32 'thequickbrownfoxjumpedoverthelazydog'
33 """
34 sanitised = [c.lower() for c in text if c in string.ascii_letters]
35 return ''.join(sanitised)
36
37 def ngrams(text, n):
38 return [tuple(text[i:i+n]) for i in range(len(text)-n+1)]
39
40 def letter_frequencies(text):
41 """Count the number of occurrences of each character in text
42
43 >>> sorted(letter_frequencies('abcdefabc').items())
44 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
45 >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
46 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
47 >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
48 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
49 >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
50 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
51 """
52 counts = collections.defaultdict(int)
53 for c in text:
54 counts[c] += 1
55 return counts
56
57 def caesar_encipher_letter(letter, shift):
58 """Encipher a letter, given a shift amount
59
60 >>> caesar_encipher_letter('a', 1)
61 'b'
62 >>> caesar_encipher_letter('a', 2)
63 'c'
64 >>> caesar_encipher_letter('b', 2)
65 'd'
66 >>> caesar_encipher_letter('x', 2)
67 'z'
68 >>> caesar_encipher_letter('y', 2)
69 'a'
70 >>> caesar_encipher_letter('z', 2)
71 'b'
72 >>> caesar_encipher_letter('z', -1)
73 'y'
74 >>> caesar_encipher_letter('a', -1)
75 'z'
76 """
77 if letter in string.ascii_letters:
78 if letter in string.ascii_uppercase:
79 alphabet_start = ord('A')
80 else:
81 alphabet_start = ord('a')
82 return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start)
83 else:
84 return letter
85
86 def caesar_decipher_letter(letter, shift):
87 """Decipher a letter, given a shift amount
88
89 >>> caesar_decipher_letter('b', 1)
90 'a'
91 >>> caesar_decipher_letter('b', 2)
92 'z'
93 """
94 return caesar_encipher_letter(letter, -shift)
95
96 def caesar_encipher(message, shift):
97 """Encipher a message with the Caesar cipher of given shift
98
99 >>> caesar_encipher('abc', 1)
100 'bcd'
101 >>> caesar_encipher('abc', 2)
102 'cde'
103 >>> caesar_encipher('abcxyz', 2)
104 'cdezab'
105 >>> caesar_encipher('ab cx yz', 2)
106 'cd ez ab'
107 """
108 enciphered = [caesar_encipher_letter(l, shift) for l in message]
109 return ''.join(enciphered)
110
111 def caesar_decipher(message, shift):
112 """Encipher a message with the Caesar cipher of given shift
113
114 >>> caesar_decipher('bcd', 1)
115 'abc'
116 >>> caesar_decipher('cde', 2)
117 'abc'
118 >>> caesar_decipher('cd ez ab', 2)
119 'ab cx yz'
120 """
121 return caesar_encipher(message, -shift)
122
123 def affine_encipher_letter(letter, multiplier, adder, multiply_then_add=True):
124 if letter in string.ascii_letters:
125 if letter in string.ascii_uppercase:
126 alphabet_start = ord('A')
127 else:
128 alphabet_start = ord('a')
129 letter_number = ord(letter) - alphabet_start
130 cipher_number = 0
131 if multiply_then_add:
132 cipher_number = (letter_number * multiplier + adder) % 26
133 else:
134 cipher_number = ((letter_number + adder) * multiplier) % 26
135 return chr(cipher_number + alphabet_start)
136 else:
137 return letter
138
139 def affine_decipher_letter(letter, multiplier, adder, multiply_then_add=True):
140 if letter in string.ascii_letters:
141 if letter in string.ascii_uppercase:
142 alphabet_start = ord('A')
143 else:
144 alphabet_start = ord('a')
145 cipher_number = ord(letter) - alphabet_start
146 plaintext_number = 0
147 if multiply_then_add:
148 plaintext_number = modular_division_table[multiplier][(cipher_number - adder + 26) % 26]
149 else:
150 plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
151 return chr(plaintext_number + alphabet_start)
152 else:
153 return letter
154
155 def affine_encipher(message, multiplier, adder, multiply_then_add=True):
156 enciphered = [affine_encipher_letter(l, multiplier, adder, multiply_then_add) for l in message]
157 return ''.join(enciphered)
158
159 def affine_decipher(message, multiplier, adder, multiply_then_add=True):
160 enciphered = [affine_decipher_letter(l, multiplier, adder, multiply_then_add) for l in message]
161 return ''.join(enciphered)
162
163
164 def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
165 """Breaks a Caesar cipher using frequency analysis
166
167 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm')
168 (4, 0.3186395289018361)
169 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert')
170 (19, 0.4215290123583277)
171 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur')
172 (13, 0.31602920807545154)
173 """
174 sanitised_message = sanitise(message)
175 best_shift = 0
176 best_fit = float("inf")
177 for shift in range(26):
178 plaintext = caesar_decipher(sanitised_message, shift)
179 frequencies = message_frequency_scaling(letter_frequencies(plaintext))
180 fit = metric(target_frequencies, frequencies)
181 logger.info('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
182 if fit < best_fit:
183 best_fit = fit
184 best_shift = shift
185 logger.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift, best_fit, caesar_decipher(sanitised_message, best_shift)[:50]))
186 return best_shift, best_fit
187
188 def affine_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
189 """Breaks an affine cipher using frequency analysis
190 """
191 sanitised_message = sanitise(message)
192 best_multiplier = 0
193 best_adder = 0
194 best_multiply_then_add = True
195 best_fit = float("inf")
196 for multiply_then_add in [True, False]:
197 for multiplier in range(1, 26, 2):
198 for adder in range(26):
199 plaintext = affine_decipher(sanitised_message, multiplier, adder, multiply_then_add)
200 frequencies = message_frequency_scaling(letter_frequencies(plaintext))
201 fit = metric(target_frequencies, frequencies)
202 logger.info('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier, adder, multiply_then_add, fit, plaintext[:50]))
203 if fit < best_fit:
204 best_fit = fit
205 best_multiplier = multiplier
206 best_adder = adder
207 best_multiply_then_add = multiply_then_add
208 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier, best_adder, best_multiply_then_add, best_fit, affine_decipher(sanitised_message, best_multiplier, best_adder, best_multiply_then_add)[:50]))
209 return (best_multiplier, best_adder, best_multiply_then_add), best_fit
210
211
212 if __name__ == "__main__":
213 import doctest
214 doctest.testmod()