Unaccent letters before enciphering
[cipher-training.git] / cipher.py
1 import string
2 import collections
3 import logging
4 import math
5 from itertools import zip_longest, cycle, chain
6 from language_models import *
7
8 logger = logging.getLogger(__name__)
9 logger.addHandler(logging.FileHandler('cipher.log'))
10 logger.setLevel(logging.WARNING)
11 #logger.setLevel(logging.INFO)
12 #logger.setLevel(logging.DEBUG)
13
14
15 modular_division_table = [[0]*26 for _ in range(26)]
16 for a in range(26):
17 for b in range(26):
18 c = (a * b) % 26
19 modular_division_table[b][c] = a
20
21
22 def every_nth(text, n, fillvalue=''):
23 """Returns n strings, each of which consists of every nth character,
24 starting with the 0th, 1st, 2nd, ... (n-1)th character
25
26 >>> every_nth(string.ascii_lowercase, 5)
27 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
28 >>> every_nth(string.ascii_lowercase, 1)
29 ['abcdefghijklmnopqrstuvwxyz']
30 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
31 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
32 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
33 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
34 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
35 """
36 split_text = [text[i:i+n] for i in range(0, len(text), n)]
37 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
38
39 def combine_every_nth(split_text):
40 """Reforms a text split into every_nth strings
41
42 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
43 'abcdefghijklmnopqrstuvwxyz'
44 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
45 'abcdefghijklmnopqrstuvwxyz'
46 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
47 'abcdefghijklmnopqrstuvwxyz'
48 """
49 return ''.join([''.join(l)
50 for l in zip_longest(*split_text, fillvalue='')])
51
52 def chunks(text, n, fillvalue=None):
53 """Split a text into chunks of n characters
54
55 >>> chunks('abcdefghi', 3)
56 ['abc', 'def', 'ghi']
57 >>> chunks('abcdefghi', 4)
58 ['abcd', 'efgh', 'i']
59 >>> chunks('abcdefghi', 4, fillvalue='!')
60 ['abcd', 'efgh', 'i!!!']
61 """
62 if fillvalue:
63 padding = fillvalue[0] * (n - len(text) % n)
64 else:
65 padding = ''
66 return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
67
68 def transpose(items, transposition):
69 """Moves items around according to the given transposition
70
71 >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
72 ['a', 'b', 'c', 'd']
73 >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
74 ['d', 'b', 'c', 'a']
75 >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
76 [13, 12, 14, 11, 15, 10]
77 """
78 transposed = [''] * len(transposition)
79 for p, t in enumerate(transposition):
80 transposed[p] = items[t]
81 return transposed
82
83 def untranspose(items, transposition):
84 """Undoes a transpose
85
86 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
87 ['a', 'b', 'c', 'd']
88 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
89 ['a', 'b', 'c', 'd']
90 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
91 [10, 11, 12, 13, 14, 15]
92 """
93 transposed = [''] * len(transposition)
94 for p, t in enumerate(transposition):
95 transposed[t] = items[p]
96 return transposed
97
98 def deduplicate(text):
99 return list(collections.OrderedDict.fromkeys(text))
100
101
102 def caesar_encipher_letter(accented_letter, shift):
103 """Encipher a letter, given a shift amount
104
105 >>> caesar_encipher_letter('a', 1)
106 'b'
107 >>> caesar_encipher_letter('a', 2)
108 'c'
109 >>> caesar_encipher_letter('b', 2)
110 'd'
111 >>> caesar_encipher_letter('x', 2)
112 'z'
113 >>> caesar_encipher_letter('y', 2)
114 'a'
115 >>> caesar_encipher_letter('z', 2)
116 'b'
117 >>> caesar_encipher_letter('z', -1)
118 'y'
119 >>> caesar_encipher_letter('a', -1)
120 'z'
121 >>> caesar_encipher_letter('A', 1)
122 'B'
123 >>> caesar_encipher_letter('é', 1)
124 'f'
125 """
126 letter = unaccent(accented_letter)
127 if letter in string.ascii_letters:
128 if letter in string.ascii_uppercase:
129 alphabet_start = ord('A')
130 else:
131 alphabet_start = ord('a')
132 return chr(((ord(letter) - alphabet_start + shift) % 26) +
133 alphabet_start)
134 else:
135 return letter
136
137 def caesar_decipher_letter(letter, shift):
138 """Decipher a letter, given a shift amount
139
140 >>> caesar_decipher_letter('b', 1)
141 'a'
142 >>> caesar_decipher_letter('b', 2)
143 'z'
144 """
145 return caesar_encipher_letter(letter, -shift)
146
147 def caesar_encipher(message, shift):
148 """Encipher a message with the Caesar cipher of given shift
149
150 >>> caesar_encipher('abc', 1)
151 'bcd'
152 >>> caesar_encipher('abc', 2)
153 'cde'
154 >>> caesar_encipher('abcxyz', 2)
155 'cdezab'
156 >>> caesar_encipher('ab cx yz', 2)
157 'cd ez ab'
158 >>> caesar_encipher('Héllo World!', 2)
159 'Jgnnq Yqtnf!'
160 """
161 enciphered = [caesar_encipher_letter(l, shift) for l in message]
162 return ''.join(enciphered)
163
164 def caesar_decipher(message, shift):
165 """Decipher a message with the Caesar cipher of given shift
166
167 >>> caesar_decipher('bcd', 1)
168 'abc'
169 >>> caesar_decipher('cde', 2)
170 'abc'
171 >>> caesar_decipher('cd ez ab', 2)
172 'ab cx yz'
173 """
174 return caesar_encipher(message, -shift)
175
176 def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True):
177 """Encipher a letter, given a multiplier and adder
178
179 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
180 for l in string.ascii_uppercase])
181 'HKNQTWZCFILORUXADGJMPSVYBE'
182 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
183 for l in string.ascii_uppercase])
184 'FILORUXADGJMPSVYBEHKNQTWZC'
185 """
186 letter = unaccent(accented_letter)
187 if letter in string.ascii_letters:
188 if letter in string.ascii_uppercase:
189 alphabet_start = ord('A')
190 else:
191 alphabet_start = ord('a')
192 letter_number = ord(letter) - alphabet_start
193 if one_based: letter_number += 1
194 cipher_number = (letter_number * multiplier + adder) % 26
195 if one_based: cipher_number -= 1
196 return chr(cipher_number % 26 + alphabet_start)
197 else:
198 return letter
199
200 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
201 """Encipher a letter, given a multiplier and adder
202
203 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
204 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
205 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
206 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
207 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
208 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
209 """
210 if letter in string.ascii_letters:
211 if letter in string.ascii_uppercase:
212 alphabet_start = ord('A')
213 else:
214 alphabet_start = ord('a')
215 cipher_number = ord(letter) - alphabet_start
216 if one_based: cipher_number += 1
217 plaintext_number = (
218 modular_division_table[multiplier]
219 [(cipher_number - adder) % 26] )
220 if one_based: plaintext_number -= 1
221 return chr(plaintext_number % 26 + alphabet_start)
222 else:
223 return letter
224
225 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
226 """Encipher a message
227
228 >>> affine_encipher('hours passed during which jerico tried every ' \
229 'trick he could think of', 15, 22, True)
230 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
231 """
232 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
233 for l in message]
234 return ''.join(enciphered)
235
236 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
237 """Decipher a message
238
239 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
240 'jfaoe ls omytd jlaxe mh', 15, 22, True)
241 'hours passed during which jerico tried every trick he could think of'
242 """
243 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
244 for l in message]
245 return ''.join(enciphered)
246
247
248 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
249 """Find the cipher alphabet given a keyword.
250 wrap_alphabet controls how the rest of the alphabet is added
251 after the keyword.
252 0 : from 'a'
253 1 : from the last letter in the sanitised keyword
254 2 : from the largest letter in the sanitised keyword
255
256 >>> keyword_cipher_alphabet_of('bayes')
257 'bayescdfghijklmnopqrtuvwxz'
258 >>> keyword_cipher_alphabet_of('bayes', 0)
259 'bayescdfghijklmnopqrtuvwxz'
260 >>> keyword_cipher_alphabet_of('bayes', 1)
261 'bayestuvwxzcdfghijklmnopqr'
262 >>> keyword_cipher_alphabet_of('bayes', 2)
263 'bayeszcdfghijklmnopqrtuvwx'
264 """
265 if wrap_alphabet == 0:
266 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
267 string.ascii_lowercase))
268 else:
269 if wrap_alphabet == 1:
270 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
271 else:
272 last_keyword_letter = sorted(sanitise(keyword))[-1]
273 last_keyword_position = string.ascii_lowercase.find(
274 last_keyword_letter) + 1
275 cipher_alphabet = ''.join(
276 deduplicate(sanitise(keyword) +
277 string.ascii_lowercase[last_keyword_position:] +
278 string.ascii_lowercase))
279 return cipher_alphabet
280
281
282 def keyword_encipher(message, keyword, wrap_alphabet=0):
283 """Enciphers a message with a keyword substitution cipher.
284 wrap_alphabet controls how the rest of the alphabet is added
285 after the keyword.
286 0 : from 'a'
287 1 : from the last letter in the sanitised keyword
288 2 : from the largest letter in the sanitised keyword
289
290 >>> keyword_encipher('test message', 'bayes')
291 'rsqr ksqqbds'
292 >>> keyword_encipher('test message', 'bayes', 0)
293 'rsqr ksqqbds'
294 >>> keyword_encipher('test message', 'bayes', 1)
295 'lskl dskkbus'
296 >>> keyword_encipher('test message', 'bayes', 2)
297 'qspq jsppbcs'
298 """
299 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
300 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
301 return unaccent(message).lower().translate(cipher_translation)
302
303 def keyword_decipher(message, keyword, wrap_alphabet=0):
304 """Deciphers a message with a keyword substitution cipher.
305 wrap_alphabet controls how the rest of the alphabet is added
306 after the keyword.
307 0 : from 'a'
308 1 : from the last letter in the sanitised keyword
309 2 : from the largest letter in the sanitised keyword
310
311 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
312 'test message'
313 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
314 'test message'
315 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
316 'test message'
317 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
318 'test message'
319 """
320 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
321 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
322 return message.lower().translate(cipher_translation)
323
324
325 def vigenere_encipher(message, keyword):
326 """Vigenere encipher
327
328 >>> vigenere_encipher('hello', 'abc')
329 'hfnlp'
330 """
331 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
332 pairs = zip(message, cycle(shifts))
333 return ''.join([caesar_encipher_letter(l, k) for l, k in pairs])
334
335 def vigenere_decipher(message, keyword):
336 """Vigenere decipher
337
338 >>> vigenere_decipher('hfnlp', 'abc')
339 'hello'
340 """
341 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
342 pairs = zip(message, cycle(shifts))
343 return ''.join([caesar_decipher_letter(l, k) for l, k in pairs])
344
345 beaufort_encipher=vigenere_decipher
346 beaufort_decipher=vigenere_encipher
347
348
349 def transpositions_of(keyword):
350 """Finds the transpostions given by a keyword. For instance, the keyword
351 'clever' rearranges to 'celrv', so the first column (0) stays first, the
352 second column (1) moves to third, the third column (2) moves to second,
353 and so on.
354
355 If passed a tuple, assume it's already a transposition and just return it.
356
357 >>> transpositions_of('clever')
358 (0, 2, 1, 4, 3)
359 >>> transpositions_of('fred')
360 (3, 2, 0, 1)
361 >>> transpositions_of((3, 2, 0, 1))
362 (3, 2, 0, 1)
363 """
364 if isinstance(keyword, tuple):
365 return keyword
366 else:
367 key = deduplicate(keyword)
368 transpositions = tuple(key.index(l) for l in sorted(key))
369 return transpositions
370
371 def pad(message_len, group_len, fillvalue):
372 padding_length = group_len - message_len % group_len
373 if padding_length == group_len: padding_length = 0
374 padding = ''
375 for i in range(padding_length):
376 if callable(fillvalue):
377 padding += fillvalue()
378 else:
379 padding += fillvalue
380 return padding
381
382 def column_transposition_encipher(message, keyword, fillvalue=' ',
383 fillcolumnwise=False,
384 emptycolumnwise=False):
385 """Enciphers using the column transposition cipher.
386 Message is padded to allow all rows to be the same length.
387
388 >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True)
389 'hlohr eltee '
390 >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True)
391 'hellothere '
392 >>> column_transposition_encipher('hellothere', 'abcdef')
393 'hellothere '
394 >>> column_transposition_encipher('hellothere', 'abcde')
395 'hellothere'
396 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
397 'hellothere'
398 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
399 'hlohreltee'
400 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
401 'htehlelroe'
402 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
403 'hellothere'
404 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True)
405 'heotllrehe'
406 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False)
407 'holrhetlee'
408 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True)
409 'htleehoelr'
410 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False)
411 'hleolteher'
412 >>> column_transposition_encipher('hellothere', 'cleverly')
413 'hleolthre e '
414 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
415 'hleolthre!e!'
416 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*')
417 'hleolthre*e*'
418 """
419 transpositions = transpositions_of(keyword)
420 message += pad(len(message), len(transpositions), fillvalue)
421 if fillcolumnwise:
422 rows = every_nth(message, len(message) // len(transpositions))
423 else:
424 rows = chunks(message, len(transpositions))
425 transposed = [transpose(r, transpositions) for r in rows]
426 if emptycolumnwise:
427 return combine_every_nth(transposed)
428 else:
429 return ''.join(chain(*transposed))
430
431 def column_transposition_decipher(message, keyword, fillvalue=' ',
432 fillcolumnwise=False,
433 emptycolumnwise=False):
434 """Deciphers using the column transposition cipher.
435 Message is padded to allow all rows to be the same length.
436
437 >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
438 'hellothere'
439 >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
440 'hellothere'
441 >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
442 'hellothere'
443 >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
444 'hellothere'
445 >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True)
446 'hellothere'
447 >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False)
448 'hellothere'
449 >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True)
450 'hellothere'
451 >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False)
452 'hellothere'
453 """
454 transpositions = transpositions_of(keyword)
455 message += pad(len(message), len(transpositions), '*')
456 if emptycolumnwise:
457 rows = every_nth(message, len(message) // len(transpositions))
458 else:
459 rows = chunks(message, len(transpositions))
460 untransposed = [untranspose(r, transpositions) for r in rows]
461 if fillcolumnwise:
462 return combine_every_nth(untransposed)
463 else:
464 return ''.join(chain(*untransposed))
465
466 def scytale_encipher(message, rows, fillvalue=' '):
467 """Enciphers using the scytale transposition cipher.
468 Message is padded with spaces to allow all rows to be the same length.
469
470 >>> scytale_encipher('thequickbrownfox', 3)
471 'tcnhkfeboqrxuo iw '
472 >>> scytale_encipher('thequickbrownfox', 4)
473 'tubnhirfecooqkwx'
474 >>> scytale_encipher('thequickbrownfox', 5)
475 'tubnhirfecooqkwx'
476 >>> scytale_encipher('thequickbrownfox', 6)
477 'tqcrnxhukof eibwo '
478 >>> scytale_encipher('thequickbrownfox', 7)
479 'tqcrnxhukof eibwo '
480 """
481 transpositions = [i for i in range(math.ceil(len(message) / rows))]
482 return column_transposition_encipher(message, transpositions,
483 fillcolumnwise=False, emptycolumnwise=True)
484
485 def scytale_decipher(message, rows):
486 """Deciphers using the scytale transposition cipher.
487 Assumes the message is padded so that all rows are the same length.
488
489 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
490 'thequickbrownfox '
491 >>> scytale_decipher('tubnhirfecooqkwx', 4)
492 'thequickbrownfox'
493 >>> scytale_decipher('tubnhirfecooqkwx', 5)
494 'thequickbrownfox'
495 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
496 'thequickbrownfox '
497 >>> scytale_decipher('tqcrnxhukof eibwo ', 7)
498 'thequickbrownfox '
499 """
500 transpositions = [i for i in range(math.ceil(len(message) / rows))]
501 return column_transposition_decipher(message, transpositions,
502 fillcolumnwise=False, emptycolumnwise=True)
503
504
505 if __name__ == "__main__":
506 import doctest
507 doctest.testmod()