Hillclimbing to solve monosubstitution ciphers
[cipher-training.git] / cipher.py
1 """A set of ciphers with implementations for both enciphering and deciphering
2 them. See cipherbreak for automatic breaking of these ciphers
3 """
4
5 import string
6 import collections
7 from enum import Enum
8 from itertools import zip_longest, cycle, chain
9 from language_models import unaccent, sanitise
10
11
12 modular_division_table = [[0]*26 for _ in range(26)]
13 for a in range(26):
14 for b in range(26):
15 c = (a * b) % 26
16 modular_division_table[b][c] = a
17
18
19 def every_nth(text, n, fillvalue=''):
20 """Returns n strings, each of which consists of every nth character,
21 starting with the 0th, 1st, 2nd, ... (n-1)th character
22
23 >>> every_nth(string.ascii_lowercase, 5)
24 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
25 >>> every_nth(string.ascii_lowercase, 1)
26 ['abcdefghijklmnopqrstuvwxyz']
27 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
28 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
29 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
30 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
31 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
32 """
33 split_text = chunks(text, n, fillvalue)
34 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
35
36 def combine_every_nth(split_text):
37 """Reforms a text split into every_nth strings
38
39 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
40 'abcdefghijklmnopqrstuvwxyz'
41 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
42 'abcdefghijklmnopqrstuvwxyz'
43 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
44 'abcdefghijklmnopqrstuvwxyz'
45 """
46 return ''.join([''.join(l)
47 for l in zip_longest(*split_text, fillvalue='')])
48
49 def chunks(text, n, fillvalue=None):
50 """Split a text into chunks of n characters
51
52 >>> chunks('abcdefghi', 3)
53 ['abc', 'def', 'ghi']
54 >>> chunks('abcdefghi', 4)
55 ['abcd', 'efgh', 'i']
56 >>> chunks('abcdefghi', 4, fillvalue='!')
57 ['abcd', 'efgh', 'i!!!']
58 """
59 if fillvalue:
60 padding = fillvalue[0] * (n - len(text) % n)
61 else:
62 padding = ''
63 return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
64
65
66 def deduplicate(text):
67 """If a string contains duplicate letters, remove all but the first. Retain
68 the order of the letters.
69
70 >>> deduplicate('cat')
71 ['c', 'a', 't']
72 >>> deduplicate('happy')
73 ['h', 'a', 'p', 'y']
74 >>> deduplicate('cattca')
75 ['c', 'a', 't']
76 """
77 return list(collections.OrderedDict.fromkeys(text))
78
79
80 def caesar_encipher_letter(accented_letter, shift):
81 """Encipher a letter, given a shift amount
82
83 >>> caesar_encipher_letter('a', 1)
84 'b'
85 >>> caesar_encipher_letter('a', 2)
86 'c'
87 >>> caesar_encipher_letter('b', 2)
88 'd'
89 >>> caesar_encipher_letter('x', 2)
90 'z'
91 >>> caesar_encipher_letter('y', 2)
92 'a'
93 >>> caesar_encipher_letter('z', 2)
94 'b'
95 >>> caesar_encipher_letter('z', -1)
96 'y'
97 >>> caesar_encipher_letter('a', -1)
98 'z'
99 >>> caesar_encipher_letter('A', 1)
100 'B'
101 >>> caesar_encipher_letter('é', 1)
102 'f'
103 """
104 letter = unaccent(accented_letter)
105 if letter in string.ascii_letters:
106 if letter in string.ascii_uppercase:
107 alphabet_start = ord('A')
108 else:
109 alphabet_start = ord('a')
110 return chr(((ord(letter) - alphabet_start + shift) % 26) +
111 alphabet_start)
112 else:
113 return letter
114
115 def caesar_decipher_letter(letter, shift):
116 """Decipher a letter, given a shift amount
117
118 >>> caesar_decipher_letter('b', 1)
119 'a'
120 >>> caesar_decipher_letter('b', 2)
121 'z'
122 """
123 return caesar_encipher_letter(letter, -shift)
124
125 def caesar_encipher(message, shift):
126 """Encipher a message with the Caesar cipher of given shift
127
128 >>> caesar_encipher('abc', 1)
129 'bcd'
130 >>> caesar_encipher('abc', 2)
131 'cde'
132 >>> caesar_encipher('abcxyz', 2)
133 'cdezab'
134 >>> caesar_encipher('ab cx yz', 2)
135 'cd ez ab'
136 >>> caesar_encipher('Héllo World!', 2)
137 'Jgnnq Yqtnf!'
138 """
139 enciphered = [caesar_encipher_letter(l, shift) for l in message]
140 return ''.join(enciphered)
141
142 def caesar_decipher(message, shift):
143 """Decipher a message with the Caesar cipher of given shift
144
145 >>> caesar_decipher('bcd', 1)
146 'abc'
147 >>> caesar_decipher('cde', 2)
148 'abc'
149 >>> caesar_decipher('cd ez ab', 2)
150 'ab cx yz'
151 >>> caesar_decipher('Jgnnq Yqtnf!', 2)
152 'Hello World!'
153 """
154 return caesar_encipher(message, -shift)
155
156 def affine_encipher_letter(accented_letter, multiplier=1, adder=0,
157 one_based=True):
158 """Encipher a letter, given a multiplier and adder
159 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
160 for l in string.ascii_uppercase])
161 'HKNQTWZCFILORUXADGJMPSVYBE'
162 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
163 for l in string.ascii_uppercase])
164 'FILORUXADGJMPSVYBEHKNQTWZC'
165 """
166 letter = unaccent(accented_letter)
167 if letter in string.ascii_letters:
168 if letter in string.ascii_uppercase:
169 alphabet_start = ord('A')
170 else:
171 alphabet_start = ord('a')
172 letter_number = ord(letter) - alphabet_start
173 if one_based: letter_number += 1
174 cipher_number = (letter_number * multiplier + adder) % 26
175 if one_based: cipher_number -= 1
176 return chr(cipher_number % 26 + alphabet_start)
177 else:
178 return letter
179
180 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
181 """Encipher a letter, given a multiplier and adder
182
183 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
184 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
185 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
186 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
187 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
188 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
189 """
190 if letter in string.ascii_letters:
191 if letter in string.ascii_uppercase:
192 alphabet_start = ord('A')
193 else:
194 alphabet_start = ord('a')
195 cipher_number = ord(letter) - alphabet_start
196 if one_based: cipher_number += 1
197 plaintext_number = (
198 modular_division_table[multiplier]
199 [(cipher_number - adder) % 26]
200 )
201 if one_based: plaintext_number -= 1
202 return chr(plaintext_number % 26 + alphabet_start)
203 else:
204 return letter
205
206 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
207 """Encipher a message
208
209 >>> affine_encipher('hours passed during which jerico tried every ' \
210 'trick he could think of', 15, 22, True)
211 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
212 """
213 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
214 for l in message]
215 return ''.join(enciphered)
216
217 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
218 """Decipher a message
219
220 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
221 'jfaoe ls omytd jlaxe mh', 15, 22, True)
222 'hours passed during which jerico tried every trick he could think of'
223 """
224 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
225 for l in message]
226 return ''.join(enciphered)
227
228
229 class KeywordWrapAlphabet(Enum):
230 """Ways of wrapping the alphabet for keyword-based substitution ciphers."""
231 from_a = 1
232 from_last = 2
233 from_largest = 3
234
235
236 def keyword_cipher_alphabet_of(keyword,
237 wrap_alphabet=KeywordWrapAlphabet.from_a):
238 """Find the cipher alphabet given a keyword.
239 wrap_alphabet controls how the rest of the alphabet is added
240 after the keyword.
241
242 >>> keyword_cipher_alphabet_of('bayes')
243 'bayescdfghijklmnopqrtuvwxz'
244 >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a)
245 'bayescdfghijklmnopqrtuvwxz'
246 >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last)
247 'bayestuvwxzcdfghijklmnopqr'
248 >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest)
249 'bayeszcdfghijklmnopqrtuvwx'
250 """
251 if wrap_alphabet == KeywordWrapAlphabet.from_a:
252 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
253 string.ascii_lowercase))
254 else:
255 if wrap_alphabet == KeywordWrapAlphabet.from_last:
256 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
257 else:
258 last_keyword_letter = sorted(sanitise(keyword))[-1]
259 last_keyword_position = string.ascii_lowercase.find(
260 last_keyword_letter) + 1
261 cipher_alphabet = ''.join(
262 deduplicate(sanitise(keyword) +
263 string.ascii_lowercase[last_keyword_position:] +
264 string.ascii_lowercase))
265 return cipher_alphabet
266
267
268 def keyword_encipher(message, keyword,
269 wrap_alphabet=KeywordWrapAlphabet.from_a):
270 """Enciphers a message with a keyword substitution cipher.
271 wrap_alphabet controls how the rest of the alphabet is added
272 after the keyword.
273 0 : from 'a'
274 1 : from the last letter in the sanitised keyword
275 2 : from the largest letter in the sanitised keyword
276
277 >>> keyword_encipher('test message', 'bayes')
278 'rsqr ksqqbds'
279 >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a)
280 'rsqr ksqqbds'
281 >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last)
282 'lskl dskkbus'
283 >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest)
284 'qspq jsppbcs'
285 """
286 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
287 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
288 return unaccent(message).lower().translate(cipher_translation)
289
290 def keyword_decipher(message, keyword,
291 wrap_alphabet=KeywordWrapAlphabet.from_a):
292 """Deciphers a message with a keyword substitution cipher.
293 wrap_alphabet controls how the rest of the alphabet is added
294 after the keyword.
295 0 : from 'a'
296 1 : from the last letter in the sanitised keyword
297 2 : from the largest letter in the sanitised keyword
298
299 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
300 'test message'
301 >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a)
302 'test message'
303 >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last)
304 'test message'
305 >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest)
306 'test message'
307 """
308 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
309 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
310 return message.lower().translate(cipher_translation)
311
312
313 def vigenere_encipher(message, keyword):
314 """Vigenere encipher
315
316 >>> vigenere_encipher('hello', 'abc')
317 'hfnlp'
318 """
319 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
320 pairs = zip(message, cycle(shifts))
321 return ''.join([caesar_encipher_letter(l, k) for l, k in pairs])
322
323 def vigenere_decipher(message, keyword):
324 """Vigenere decipher
325
326 >>> vigenere_decipher('hfnlp', 'abc')
327 'hello'
328 """
329 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
330 pairs = zip(message, cycle(shifts))
331 return ''.join([caesar_decipher_letter(l, k) for l, k in pairs])
332
333 beaufort_encipher = vigenere_decipher
334 beaufort_decipher = vigenere_encipher
335
336
337 if __name__ == "__main__":
338 import doctest
339 doctest.testmod()