Done one-based affine ciphers, and breaking them.
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import norms
4 import logging
5
6 logger = logging.getLogger(__name__)
7 logger.addHandler(logging.FileHandler('cipher.log'))
8 logger.setLevel(logging.WARNING)
9 logger.setLevel(logging.INFO)
10
11 english_counts = collections.defaultdict(int)
12 with open('count_1l.txt', 'r') as f:
13 for line in f:
14 (letter, count) = line.split("\t")
15 english_counts[letter] = int(count)
16 normalised_english_counts = norms.normalise(english_counts)
17
18
19 modular_division_table = [[0]*26 for x in range(26)]
20 for a in range(26):
21 for b in range(26):
22 c = (a * b) % 26
23 modular_division_table[b][c] = a
24
25 modular_division_table_one_based = [[0]*27 for x in range(27)]
26 for a in range(27):
27 for b in range(27):
28 c = ((a * b)-1) % 26 + 1
29 modular_division_table_one_based[b][c] = a
30
31
32
33 def sanitise(text):
34 """Remove all non-alphabetic characters and convert the text to lowercase
35
36 >>> sanitise('The Quick')
37 'thequick'
38 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
39 'thequickbrownfoxjumpedoverthelazydog'
40 """
41 sanitised = [c.lower() for c in text if c in string.ascii_letters]
42 return ''.join(sanitised)
43
44 def ngrams(text, n):
45 return [tuple(text[i:i+n]) for i in range(len(text)-n+1)]
46
47 def letter_frequencies(text):
48 """Count the number of occurrences of each character in text
49
50 >>> sorted(letter_frequencies('abcdefabc').items())
51 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
52 >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
53 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
54 >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
55 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
56 >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
57 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
58 """
59 counts = collections.defaultdict(int)
60 for c in text:
61 counts[c] += 1
62 return counts
63
64 def caesar_encipher_letter(letter, shift):
65 """Encipher a letter, given a shift amount
66
67 >>> caesar_encipher_letter('a', 1)
68 'b'
69 >>> caesar_encipher_letter('a', 2)
70 'c'
71 >>> caesar_encipher_letter('b', 2)
72 'd'
73 >>> caesar_encipher_letter('x', 2)
74 'z'
75 >>> caesar_encipher_letter('y', 2)
76 'a'
77 >>> caesar_encipher_letter('z', 2)
78 'b'
79 >>> caesar_encipher_letter('z', -1)
80 'y'
81 >>> caesar_encipher_letter('a', -1)
82 'z'
83 """
84 if letter in string.ascii_letters:
85 if letter in string.ascii_uppercase:
86 alphabet_start = ord('A')
87 else:
88 alphabet_start = ord('a')
89 return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start)
90 else:
91 return letter
92
93 def caesar_decipher_letter(letter, shift):
94 """Decipher a letter, given a shift amount
95
96 >>> caesar_decipher_letter('b', 1)
97 'a'
98 >>> caesar_decipher_letter('b', 2)
99 'z'
100 """
101 return caesar_encipher_letter(letter, -shift)
102
103 def caesar_encipher(message, shift):
104 """Encipher a message with the Caesar cipher of given shift
105
106 >>> caesar_encipher('abc', 1)
107 'bcd'
108 >>> caesar_encipher('abc', 2)
109 'cde'
110 >>> caesar_encipher('abcxyz', 2)
111 'cdezab'
112 >>> caesar_encipher('ab cx yz', 2)
113 'cd ez ab'
114 """
115 enciphered = [caesar_encipher_letter(l, shift) for l in message]
116 return ''.join(enciphered)
117
118 def caesar_decipher(message, shift):
119 """Encipher a message with the Caesar cipher of given shift
120
121 >>> caesar_decipher('bcd', 1)
122 'abc'
123 >>> caesar_decipher('cde', 2)
124 'abc'
125 >>> caesar_decipher('cd ez ab', 2)
126 'ab cx yz'
127 """
128 return caesar_encipher(message, -shift)
129
130 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
131 """Encipher a letter, given a multiplier and adder
132
133 >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
134 'HKNQTWZCFILORUXADGJMPSVYBE'
135 >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
136 'FILORUXADGJMPSVYBEHKNQTWZC'
137 """
138 if letter in string.ascii_letters:
139 if letter in string.ascii_uppercase:
140 alphabet_start = ord('A')
141 else:
142 alphabet_start = ord('a')
143 letter_number = ord(letter) - alphabet_start
144 if one_based: letter_number += 1
145 raw_cipher_number = (letter_number * multiplier + adder)
146 cipher_number = 0
147 if one_based:
148 cipher_number = (raw_cipher_number - 1) % 26
149 else:
150 cipher_number = raw_cipher_number % 26
151 return chr(cipher_number + alphabet_start)
152 else:
153 return letter
154
155 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
156 """Encipher a letter, given a multiplier and adder
157
158 >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
159 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
160 >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
161 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
162 """
163 if letter in string.ascii_letters:
164 if letter in string.ascii_uppercase:
165 alphabet_start = ord('A')
166 else:
167 alphabet_start = ord('a')
168 cipher_number = ord(letter) - alphabet_start
169 if one_based: cipher_number += 1
170 plaintext_number = 0
171 if one_based:
172 plaintext_number = (modular_division_table_one_based[multiplier][(cipher_number - adder + 26) % 26] - 1) % 26
173 else:
174 #plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
175 plaintext_number = modular_division_table[multiplier][(cipher_number - adder + 26) % 26]
176 return chr(plaintext_number + alphabet_start)
177 else:
178 return letter
179
180 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
181 """Encipher a message
182
183 >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
184 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
185 """
186
187 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) for l in message]
188 return ''.join(enciphered)
189
190 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
191 """Decipher a message
192
193 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
194 'hours passed during which jerico tried every trick he could think of'
195 """
196 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) for l in message]
197 return ''.join(enciphered)
198
199
200 def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
201 """Breaks a Caesar cipher using frequency analysis
202
203 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm')
204 (4, 0.3186395289018361)
205 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert')
206 (19, 0.4215290123583277)
207 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur')
208 (13, 0.31602920807545154)
209 """
210 sanitised_message = sanitise(message)
211 best_shift = 0
212 best_fit = float("inf")
213 for shift in range(26):
214 plaintext = caesar_decipher(sanitised_message, shift)
215 frequencies = message_frequency_scaling(letter_frequencies(plaintext))
216 fit = metric(target_frequencies, frequencies)
217 logger.info('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
218 if fit < best_fit:
219 best_fit = fit
220 best_shift = shift
221 logger.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift, best_fit, caesar_decipher(sanitised_message, best_shift)[:50]))
222 return best_shift, best_fit
223
224 def affine_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
225 """Breaks an affine cipher using frequency analysis
226
227 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.')
228 ((15, 22, True), 0.2357036181865554)
229 """
230 sanitised_message = sanitise(message)
231 best_multiplier = 0
232 best_adder = 0
233 best_one_based = True
234 best_fit = float("inf")
235 for one_based in [True, False]:
236 for multiplier in range(1, 26, 2):
237 for adder in range(26):
238 plaintext = affine_decipher(sanitised_message, multiplier, adder, one_based)
239 frequencies = message_frequency_scaling(letter_frequencies(plaintext))
240 fit = metric(target_frequencies, frequencies)
241 logger.info('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier, adder, one_based, fit, plaintext[:50]))
242 if fit < best_fit:
243 best_fit = fit
244 best_multiplier = multiplier
245 best_adder = adder
246 best_one_based = one_based
247 logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier, best_adder, best_one_based, best_fit, affine_decipher(sanitised_message, best_multiplier, best_adder, best_one_based)[:50]))
248 return (best_multiplier, best_adder, best_one_based), best_fit
249
250
251 if __name__ == "__main__":
252 import doctest
253 doctest.testmod()