Breaking keyword ciphers
[cipher-training.git] / cipher.py
1 """A set of ciphers with implementations for both enciphering and deciphering
2 them. See cipherbreak for automatic breaking of these ciphers
3 """
4
5 import string
6 import collections
7 from enum import Enum
8 from itertools import zip_longest, cycle, chain
9 from language_models import unaccent, sanitise
10
11
12 modular_division_table = [[0]*26 for _ in range(26)]
13 for a in range(26):
14 for b in range(26):
15 c = (a * b) % 26
16 modular_division_table[b][c] = a
17
18
19 def every_nth(text, n, fillvalue=''):
20 """Returns n strings, each of which consists of every nth character,
21 starting with the 0th, 1st, 2nd, ... (n-1)th character
22
23 >>> every_nth(string.ascii_lowercase, 5)
24 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
25 >>> every_nth(string.ascii_lowercase, 1)
26 ['abcdefghijklmnopqrstuvwxyz']
27 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
28 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
29 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
30 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
31 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
32 """
33 split_text = chunks(text, n, fillvalue)
34 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
35
36 def combine_every_nth(split_text):
37 """Reforms a text split into every_nth strings
38
39 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
40 'abcdefghijklmnopqrstuvwxyz'
41 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
42 'abcdefghijklmnopqrstuvwxyz'
43 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
44 'abcdefghijklmnopqrstuvwxyz'
45 """
46 return ''.join([''.join(l)
47 for l in zip_longest(*split_text, fillvalue='')])
48
49 def chunks(text, n, fillvalue=None):
50 """Split a text into chunks of n characters
51
52 >>> chunks('abcdefghi', 3)
53 ['abc', 'def', 'ghi']
54 >>> chunks('abcdefghi', 4)
55 ['abcd', 'efgh', 'i']
56 >>> chunks('abcdefghi', 4, fillvalue='!')
57 ['abcd', 'efgh', 'i!!!']
58 """
59 if fillvalue:
60 padding = fillvalue[0] * (n - len(text) % n)
61 else:
62 padding = ''
63 return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
64
65 def deduplicate(text):
66 """If a string contains duplicate letters, remove all but the first. Retain
67 the order of the letters.
68
69 >>> deduplicate('cat')
70 ['c', 'a', 't']
71 >>> deduplicate('happy')
72 ['h', 'a', 'p', 'y']
73 >>> deduplicate('cattca')
74 ['c', 'a', 't']
75 """
76 return list(collections.OrderedDict.fromkeys(text))
77
78
79 def caesar_encipher_letter(accented_letter, shift):
80 """Encipher a letter, given a shift amount
81
82 >>> caesar_encipher_letter('a', 1)
83 'b'
84 >>> caesar_encipher_letter('a', 2)
85 'c'
86 >>> caesar_encipher_letter('b', 2)
87 'd'
88 >>> caesar_encipher_letter('x', 2)
89 'z'
90 >>> caesar_encipher_letter('y', 2)
91 'a'
92 >>> caesar_encipher_letter('z', 2)
93 'b'
94 >>> caesar_encipher_letter('z', -1)
95 'y'
96 >>> caesar_encipher_letter('a', -1)
97 'z'
98 >>> caesar_encipher_letter('A', 1)
99 'B'
100 >>> caesar_encipher_letter('é', 1)
101 'f'
102 """
103 letter = unaccent(accented_letter)
104 if letter in string.ascii_letters:
105 if letter in string.ascii_uppercase:
106 alphabet_start = ord('A')
107 else:
108 alphabet_start = ord('a')
109 return chr(((ord(letter) - alphabet_start + shift) % 26) +
110 alphabet_start)
111 else:
112 return letter
113
114 def caesar_decipher_letter(letter, shift):
115 """Decipher a letter, given a shift amount
116
117 >>> caesar_decipher_letter('b', 1)
118 'a'
119 >>> caesar_decipher_letter('b', 2)
120 'z'
121 """
122 return caesar_encipher_letter(letter, -shift)
123
124 def caesar_encipher(message, shift):
125 """Encipher a message with the Caesar cipher of given shift
126
127 >>> caesar_encipher('abc', 1)
128 'bcd'
129 >>> caesar_encipher('abc', 2)
130 'cde'
131 >>> caesar_encipher('abcxyz', 2)
132 'cdezab'
133 >>> caesar_encipher('ab cx yz', 2)
134 'cd ez ab'
135 >>> caesar_encipher('Héllo World!', 2)
136 'Jgnnq Yqtnf!'
137 """
138 enciphered = [caesar_encipher_letter(l, shift) for l in message]
139 return ''.join(enciphered)
140
141 def caesar_decipher(message, shift):
142 """Decipher a message with the Caesar cipher of given shift
143
144 >>> caesar_decipher('bcd', 1)
145 'abc'
146 >>> caesar_decipher('cde', 2)
147 'abc'
148 >>> caesar_decipher('cd ez ab', 2)
149 'ab cx yz'
150 >>> caesar_decipher('Jgnnq Yqtnf!', 2)
151 'Hello World!'
152 """
153 return caesar_encipher(message, -shift)
154
155 def affine_encipher_letter(accented_letter, multiplier=1, adder=0,
156 one_based=True):
157 """Encipher a letter, given a multiplier and adder
158 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
159 for l in string.ascii_uppercase])
160 'HKNQTWZCFILORUXADGJMPSVYBE'
161 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
162 for l in string.ascii_uppercase])
163 'FILORUXADGJMPSVYBEHKNQTWZC'
164 """
165 letter = unaccent(accented_letter)
166 if letter in string.ascii_letters:
167 if letter in string.ascii_uppercase:
168 alphabet_start = ord('A')
169 else:
170 alphabet_start = ord('a')
171 letter_number = ord(letter) - alphabet_start
172 if one_based: letter_number += 1
173 cipher_number = (letter_number * multiplier + adder) % 26
174 if one_based: cipher_number -= 1
175 return chr(cipher_number % 26 + alphabet_start)
176 else:
177 return letter
178
179 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
180 """Encipher a letter, given a multiplier and adder
181
182 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
183 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
184 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
185 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
186 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
187 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
188 """
189 if letter in string.ascii_letters:
190 if letter in string.ascii_uppercase:
191 alphabet_start = ord('A')
192 else:
193 alphabet_start = ord('a')
194 cipher_number = ord(letter) - alphabet_start
195 if one_based: cipher_number += 1
196 plaintext_number = (
197 modular_division_table[multiplier]
198 [(cipher_number - adder) % 26]
199 )
200 if one_based: plaintext_number -= 1
201 return chr(plaintext_number % 26 + alphabet_start)
202 else:
203 return letter
204
205 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
206 """Encipher a message
207
208 >>> affine_encipher('hours passed during which jerico tried every ' \
209 'trick he could think of', 15, 22, True)
210 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
211 """
212 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
213 for l in message]
214 return ''.join(enciphered)
215
216 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
217 """Decipher a message
218
219 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
220 'jfaoe ls omytd jlaxe mh', 15, 22, True)
221 'hours passed during which jerico tried every trick he could think of'
222 """
223 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
224 for l in message]
225 return ''.join(enciphered)
226
227
228 class KeywordWrapAlphabet(Enum):
229 """Ways of wrapping the alphabet for keyword-based substitution ciphers."""
230 from_a = 1
231 from_last = 2
232 from_largest = 3
233
234
235 def keyword_cipher_alphabet_of(keyword,
236 wrap_alphabet=KeywordWrapAlphabet.from_a):
237 """Find the cipher alphabet given a keyword.
238 wrap_alphabet controls how the rest of the alphabet is added
239 after the keyword.
240
241 >>> keyword_cipher_alphabet_of('bayes')
242 'bayescdfghijklmnopqrtuvwxz'
243 >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a)
244 'bayescdfghijklmnopqrtuvwxz'
245 >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last)
246 'bayestuvwxzcdfghijklmnopqr'
247 >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest)
248 'bayeszcdfghijklmnopqrtuvwx'
249 """
250 if wrap_alphabet == KeywordWrapAlphabet.from_a:
251 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
252 string.ascii_lowercase))
253 else:
254 if wrap_alphabet == KeywordWrapAlphabet.from_last:
255 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
256 else:
257 last_keyword_letter = sorted(sanitise(keyword))[-1]
258 last_keyword_position = string.ascii_lowercase.find(
259 last_keyword_letter) + 1
260 cipher_alphabet = ''.join(
261 deduplicate(sanitise(keyword) +
262 string.ascii_lowercase[last_keyword_position:] +
263 string.ascii_lowercase))
264 return cipher_alphabet
265
266
267 def keyword_encipher(message, keyword,
268 wrap_alphabet=KeywordWrapAlphabet.from_a):
269 """Enciphers a message with a keyword substitution cipher.
270 wrap_alphabet controls how the rest of the alphabet is added
271 after the keyword.
272 0 : from 'a'
273 1 : from the last letter in the sanitised keyword
274 2 : from the largest letter in the sanitised keyword
275
276 >>> keyword_encipher('test message', 'bayes')
277 'rsqr ksqqbds'
278 >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a)
279 'rsqr ksqqbds'
280 >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last)
281 'lskl dskkbus'
282 >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest)
283 'qspq jsppbcs'
284 """
285 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
286 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
287 return unaccent(message).lower().translate(cipher_translation)
288
289 def keyword_decipher(message, keyword,
290 wrap_alphabet=KeywordWrapAlphabet.from_a):
291 """Deciphers a message with a keyword substitution cipher.
292 wrap_alphabet controls how the rest of the alphabet is added
293 after the keyword.
294 0 : from 'a'
295 1 : from the last letter in the sanitised keyword
296 2 : from the largest letter in the sanitised keyword
297
298 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
299 'test message'
300 >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a)
301 'test message'
302 >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last)
303 'test message'
304 >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest)
305 'test message'
306 """
307 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
308 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
309 return message.lower().translate(cipher_translation)
310
311
312 if __name__ == "__main__":
313 import doctest
314 doctest.testmod()