Removed spurious affine tests
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4
5 english_counts = collections.defaultdict(int)
6 with open('count_1l.txt', 'r') as f:
7 for line in f:
8 (letter, count) = line.split("\t")
9 english_counts[letter] = int(count)
10 normalised_english_counts = norms.normalise(english_counts)
11
12
13 modular_division_table = [[0]*26 for x in range(26)]
14 for a in range(26):
15 for b in range(26):
16 c = (a * b) % 26
17 modular_division_table[b][c] = a
18
19
20 def sanitise(text):
21 """Remove all non-alphabetic characters and convert the text to lowercase
22
23 >>> sanitise('The Quick')
24 'thequick'
25 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
26 'thequickbrownfoxjumpedoverthelazydog'
27 """
28 sanitised = [c.lower() for c in text if c in string.ascii_letters]
29 return ''.join(sanitised)
30
31 def ngrams(text, n):
32 return [tuple(text[i:i+n]) for i in range(len(text)-n+1)]
33
34 def letter_frequencies(text):
35 """Count the number of occurrences of each character in text
36
37 >>> sorted(letter_frequencies('abcdefabc').items())
38 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
39 >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
40 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
41 >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
42 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
43 >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
44 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
45 """
46 counts = collections.defaultdict(int)
47 for c in text:
48 counts[c] += 1
49 return counts
50
51 def caesar_encipher_letter(letter, shift):
52 """Encipher a letter, given a shift amount
53
54 >>> caesar_encipher_letter('a', 1)
55 'b'
56 >>> caesar_encipher_letter('a', 2)
57 'c'
58 >>> caesar_encipher_letter('b', 2)
59 'd'
60 >>> caesar_encipher_letter('x', 2)
61 'z'
62 >>> caesar_encipher_letter('y', 2)
63 'a'
64 >>> caesar_encipher_letter('z', 2)
65 'b'
66 >>> caesar_encipher_letter('z', -1)
67 'y'
68 >>> caesar_encipher_letter('a', -1)
69 'z'
70 """
71 if letter in string.ascii_letters:
72 if letter in string.ascii_uppercase:
73 alphabet_start = ord('A')
74 else:
75 alphabet_start = ord('a')
76 return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start)
77 else:
78 return letter
79
80 def caesar_decipher_letter(letter, shift):
81 """Decipher a letter, given a shift amount
82
83 >>> caesar_decipher_letter('b', 1)
84 'a'
85 >>> caesar_decipher_letter('b', 2)
86 'z'
87 """
88 return caesar_encipher_letter(letter, -shift)
89
90 def caesar_encipher(message, shift):
91 """Encipher a message with the Caesar cipher of given shift
92
93 >>> caesar_encipher('abc', 1)
94 'bcd'
95 >>> caesar_encipher('abc', 2)
96 'cde'
97 >>> caesar_encipher('abcxyz', 2)
98 'cdezab'
99 >>> caesar_encipher('ab cx yz', 2)
100 'cd ez ab'
101 """
102 enciphered = [caesar_encipher_letter(l, shift) for l in message]
103 return ''.join(enciphered)
104
105 def caesar_decipher(message, shift):
106 """Encipher a message with the Caesar cipher of given shift
107
108 >>> caesar_decipher('bcd', 1)
109 'abc'
110 >>> caesar_decipher('cde', 2)
111 'abc'
112 >>> caesar_decipher('cd ez ab', 2)
113 'ab cx yz'
114 """
115 return caesar_encipher(message, -shift)
116
117 def affine_encipher_letter(letter, multiplier, adder, multiply_then_add=True):
118 if letter in string.ascii_letters:
119 if letter in string.ascii_uppercase:
120 alphabet_start = ord('A')
121 else:
122 alphabet_start = ord('a')
123 letter_number = ord(letter) - alphabet_start
124 cipher_number = 0
125 if multiply_then_add:
126 cipher_number = (letter_number * multiplier + adder) % 26
127 else:
128 cipher_number = ((letter_number + adder) * multiplier) % 26
129 return chr(cipher_number + alphabet_start)
130 else:
131 return letter
132
133 def affine_decipher_letter(letter, multiplier, adder, multiply_then_add=True):
134 if letter in string.ascii_letters:
135 if letter in string.ascii_uppercase:
136 alphabet_start = ord('A')
137 else:
138 alphabet_start = ord('a')
139 cipher_number = ord(letter) - alphabet_start
140 plaintext_number = 0
141 if multiply_then_add:
142 plaintext_number = modular_division_table[multiplier][(cipher_number - adder + 26) % 26]
143 else:
144 plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
145 return chr(plaintext_number + alphabet_start)
146 else:
147 return letter
148
149 def affine_encipher(message, multiplier, adder, multiply_then_add=True):
150 enciphered = [affine_encipher_letter(l, multiplier, adder, multiply_then_add) for l in message]
151 return ''.join(enciphered)
152
153 def affine_decipher(message, multiplier, adder, multiply_then_add=True):
154 enciphered = [affine_decipher_letter(l, multiplier, adder, multiply_then_add) for l in message]
155 return ''.join(enciphered)
156
157
158 def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
159 """Breaks a Caesar cipher using frequency analysis
160
161 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm')
162 (4, 0.3186395289018361)
163 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert')
164 (19, 0.4215290123583277)
165 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur')
166 (13, 0.31602920807545154)
167 """
168 sanitised_message = sanitise(message)
169 best_shift = 0
170 best_fit = float("inf")
171 for shift in range(26):
172 plaintext = caesar_decipher(sanitised_message, shift)
173 frequencies = message_frequency_scaling(letter_frequencies(plaintext))
174 fit = metric(target_frequencies, frequencies)
175 if fit < best_fit:
176 best_fit = fit
177 best_shift = shift
178 return best_shift, best_fit
179
180 def affine_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
181 """Breaks an affine cipher using frequency analysis
182 """
183 sanitised_message = sanitise(message)
184 best_multiplier = 0
185 best_adder = 0
186 best_fit = float("inf")
187 for multiplier in range(1, 26, 2):
188 for adder in range(26):
189 plaintext = affine_decipher(sanitised_message, multiplier, adder)
190 frequencies = message_frequency_scaling(letter_frequencies(plaintext))
191 fit = metric(target_frequencies, frequencies)
192 if fit < best_fit:
193 best_fit = fit
194 best_multiplier = multiplier
195 best_adder = adder
196 return (best_multiplier, best_adder), best_fit
197
198
199 if __name__ == "__main__":
200 import doctest
201 doctest.testmod()