Breaking keyword ciphers
[cipher-training.git] / cipher.py
1 """A set of ciphers with implementations for both enciphering and deciphering
2 them. See cipherbreak for automatic breaking of these ciphers
3 """
4
5 import string
6 import collections
7 from enum import Enum
8 from itertools import zip_longest, cycle, chain
9 from language_models import unaccent, sanitise
10
11
12 modular_division_table = [[0]*26 for _ in range(26)]
13 for a in range(26):
14 for b in range(26):
15 c = (a * b) % 26
16 modular_division_table[b][c] = a
17
18
19 def deduplicate(text):
20 """If a string contains duplicate letters, remove all but the first. Retain
21 the order of the letters.
22
23 >>> deduplicate('cat')
24 ['c', 'a', 't']
25 >>> deduplicate('happy')
26 ['h', 'a', 'p', 'y']
27 >>> deduplicate('cattca')
28 ['c', 'a', 't']
29 """
30 return list(collections.OrderedDict.fromkeys(text))
31
32
33 def caesar_encipher_letter(accented_letter, shift):
34 """Encipher a letter, given a shift amount
35
36 >>> caesar_encipher_letter('a', 1)
37 'b'
38 >>> caesar_encipher_letter('a', 2)
39 'c'
40 >>> caesar_encipher_letter('b', 2)
41 'd'
42 >>> caesar_encipher_letter('x', 2)
43 'z'
44 >>> caesar_encipher_letter('y', 2)
45 'a'
46 >>> caesar_encipher_letter('z', 2)
47 'b'
48 >>> caesar_encipher_letter('z', -1)
49 'y'
50 >>> caesar_encipher_letter('a', -1)
51 'z'
52 >>> caesar_encipher_letter('A', 1)
53 'B'
54 >>> caesar_encipher_letter('é', 1)
55 'f'
56 """
57 letter = unaccent(accented_letter)
58 if letter in string.ascii_letters:
59 if letter in string.ascii_uppercase:
60 alphabet_start = ord('A')
61 else:
62 alphabet_start = ord('a')
63 return chr(((ord(letter) - alphabet_start + shift) % 26) +
64 alphabet_start)
65 else:
66 return letter
67
68 def caesar_decipher_letter(letter, shift):
69 """Decipher a letter, given a shift amount
70
71 >>> caesar_decipher_letter('b', 1)
72 'a'
73 >>> caesar_decipher_letter('b', 2)
74 'z'
75 """
76 return caesar_encipher_letter(letter, -shift)
77
78 def caesar_encipher(message, shift):
79 """Encipher a message with the Caesar cipher of given shift
80
81 >>> caesar_encipher('abc', 1)
82 'bcd'
83 >>> caesar_encipher('abc', 2)
84 'cde'
85 >>> caesar_encipher('abcxyz', 2)
86 'cdezab'
87 >>> caesar_encipher('ab cx yz', 2)
88 'cd ez ab'
89 >>> caesar_encipher('Héllo World!', 2)
90 'Jgnnq Yqtnf!'
91 """
92 enciphered = [caesar_encipher_letter(l, shift) for l in message]
93 return ''.join(enciphered)
94
95 def caesar_decipher(message, shift):
96 """Decipher a message with the Caesar cipher of given shift
97
98 >>> caesar_decipher('bcd', 1)
99 'abc'
100 >>> caesar_decipher('cde', 2)
101 'abc'
102 >>> caesar_decipher('cd ez ab', 2)
103 'ab cx yz'
104 >>> caesar_decipher('Jgnnq Yqtnf!', 2)
105 'Hello World!'
106 """
107 return caesar_encipher(message, -shift)
108
109 def affine_encipher_letter(accented_letter, multiplier=1, adder=0,
110 one_based=True):
111 """Encipher a letter, given a multiplier and adder
112 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
113 for l in string.ascii_uppercase])
114 'HKNQTWZCFILORUXADGJMPSVYBE'
115 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
116 for l in string.ascii_uppercase])
117 'FILORUXADGJMPSVYBEHKNQTWZC'
118 """
119 letter = unaccent(accented_letter)
120 if letter in string.ascii_letters:
121 if letter in string.ascii_uppercase:
122 alphabet_start = ord('A')
123 else:
124 alphabet_start = ord('a')
125 letter_number = ord(letter) - alphabet_start
126 if one_based: letter_number += 1
127 cipher_number = (letter_number * multiplier + adder) % 26
128 if one_based: cipher_number -= 1
129 return chr(cipher_number % 26 + alphabet_start)
130 else:
131 return letter
132
133 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
134 """Encipher a letter, given a multiplier and adder
135
136 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
137 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
138 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
139 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
140 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
141 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
142 """
143 if letter in string.ascii_letters:
144 if letter in string.ascii_uppercase:
145 alphabet_start = ord('A')
146 else:
147 alphabet_start = ord('a')
148 cipher_number = ord(letter) - alphabet_start
149 if one_based: cipher_number += 1
150 plaintext_number = (
151 modular_division_table[multiplier]
152 [(cipher_number - adder) % 26]
153 )
154 if one_based: plaintext_number -= 1
155 return chr(plaintext_number % 26 + alphabet_start)
156 else:
157 return letter
158
159 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
160 """Encipher a message
161
162 >>> affine_encipher('hours passed during which jerico tried every ' \
163 'trick he could think of', 15, 22, True)
164 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
165 """
166 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
167 for l in message]
168 return ''.join(enciphered)
169
170 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
171 """Decipher a message
172
173 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
174 'jfaoe ls omytd jlaxe mh', 15, 22, True)
175 'hours passed during which jerico tried every trick he could think of'
176 """
177 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
178 for l in message]
179 return ''.join(enciphered)
180
181
182 class KeywordWrapAlphabet(Enum):
183 """Ways of wrapping the alphabet for keyword-based substitution ciphers."""
184 from_a = 1
185 from_last = 2
186 from_largest = 3
187
188
189 def keyword_cipher_alphabet_of(keyword,
190 wrap_alphabet=KeywordWrapAlphabet.from_a):
191 """Find the cipher alphabet given a keyword.
192 wrap_alphabet controls how the rest of the alphabet is added
193 after the keyword.
194
195 >>> keyword_cipher_alphabet_of('bayes')
196 'bayescdfghijklmnopqrtuvwxz'
197 >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a)
198 'bayescdfghijklmnopqrtuvwxz'
199 >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last)
200 'bayestuvwxzcdfghijklmnopqr'
201 >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest)
202 'bayeszcdfghijklmnopqrtuvwx'
203 """
204 if wrap_alphabet == KeywordWrapAlphabet.from_a:
205 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
206 string.ascii_lowercase))
207 else:
208 if wrap_alphabet == KeywordWrapAlphabet.from_last:
209 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
210 else:
211 last_keyword_letter = sorted(sanitise(keyword))[-1]
212 last_keyword_position = string.ascii_lowercase.find(
213 last_keyword_letter) + 1
214 cipher_alphabet = ''.join(
215 deduplicate(sanitise(keyword) +
216 string.ascii_lowercase[last_keyword_position:] +
217 string.ascii_lowercase))
218 return cipher_alphabet
219
220
221 def keyword_encipher(message, keyword,
222 wrap_alphabet=KeywordWrapAlphabet.from_a):
223 """Enciphers a message with a keyword substitution cipher.
224 wrap_alphabet controls how the rest of the alphabet is added
225 after the keyword.
226 0 : from 'a'
227 1 : from the last letter in the sanitised keyword
228 2 : from the largest letter in the sanitised keyword
229
230 >>> keyword_encipher('test message', 'bayes')
231 'rsqr ksqqbds'
232 >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a)
233 'rsqr ksqqbds'
234 >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last)
235 'lskl dskkbus'
236 >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest)
237 'qspq jsppbcs'
238 """
239 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
240 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
241 return unaccent(message).lower().translate(cipher_translation)
242
243 def keyword_decipher(message, keyword,
244 wrap_alphabet=KeywordWrapAlphabet.from_a):
245 """Deciphers a message with a keyword substitution cipher.
246 wrap_alphabet controls how the rest of the alphabet is added
247 after the keyword.
248 0 : from 'a'
249 1 : from the last letter in the sanitised keyword
250 2 : from the largest letter in the sanitised keyword
251
252 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
253 'test message'
254 >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a)
255 'test message'
256 >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last)
257 'test message'
258 >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest)
259 'test message'
260 """
261 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
262 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
263 return message.lower().translate(cipher_translation)
264
265
266 def vigenere_encipher(message, keyword):
267 """Vigenere encipher
268
269 >>> vigenere_encipher('hello', 'abc')
270 'hfnlp'
271 """
272 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
273 pairs = zip(message, cycle(shifts))
274 return ''.join([caesar_encipher_letter(l, k) for l, k in pairs])
275
276 def vigenere_decipher(message, keyword):
277 """Vigenere decipher
278
279 >>> vigenere_decipher('hfnlp', 'abc')
280 'hello'
281 """
282 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
283 pairs = zip(message, cycle(shifts))
284 return ''.join([caesar_decipher_letter(l, k) for l, k in pairs])
285
286 beaufort_encipher = vigenere_decipher
287 beaufort_decipher = vigenere_encipher
288
289
290 if __name__ == "__main__":
291 import doctest
292 doctest.testmod()