Caching word segmentation
[cipher-training.git] / cipher.py
1 import string
2 import collections
3 import math
4 from itertools import zip_longest, cycle, chain
5 from language_models import *
6
7
8 modular_division_table = [[0]*26 for _ in range(26)]
9 for a in range(26):
10 for b in range(26):
11 c = (a * b) % 26
12 modular_division_table[b][c] = a
13
14
15 def every_nth(text, n, fillvalue=''):
16 """Returns n strings, each of which consists of every nth character,
17 starting with the 0th, 1st, 2nd, ... (n-1)th character
18
19 >>> every_nth(string.ascii_lowercase, 5)
20 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
21 >>> every_nth(string.ascii_lowercase, 1)
22 ['abcdefghijklmnopqrstuvwxyz']
23 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
24 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
25 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
26 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
27 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
28 """
29 split_text = [text[i:i+n] for i in range(0, len(text), n)]
30 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
31
32 def combine_every_nth(split_text):
33 """Reforms a text split into every_nth strings
34
35 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
36 'abcdefghijklmnopqrstuvwxyz'
37 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
38 'abcdefghijklmnopqrstuvwxyz'
39 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
40 'abcdefghijklmnopqrstuvwxyz'
41 """
42 return ''.join([''.join(l)
43 for l in zip_longest(*split_text, fillvalue='')])
44
45 def chunks(text, n, fillvalue=None):
46 """Split a text into chunks of n characters
47
48 >>> chunks('abcdefghi', 3)
49 ['abc', 'def', 'ghi']
50 >>> chunks('abcdefghi', 4)
51 ['abcd', 'efgh', 'i']
52 >>> chunks('abcdefghi', 4, fillvalue='!')
53 ['abcd', 'efgh', 'i!!!']
54 """
55 if fillvalue:
56 padding = fillvalue[0] * (n - len(text) % n)
57 else:
58 padding = ''
59 return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
60
61 def transpose(items, transposition):
62 """Moves items around according to the given transposition
63
64 >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
65 ['a', 'b', 'c', 'd']
66 >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
67 ['d', 'b', 'c', 'a']
68 >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
69 [13, 12, 14, 11, 15, 10]
70 """
71 transposed = [''] * len(transposition)
72 for p, t in enumerate(transposition):
73 transposed[p] = items[t]
74 return transposed
75
76 def untranspose(items, transposition):
77 """Undoes a transpose
78
79 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
80 ['a', 'b', 'c', 'd']
81 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
82 ['a', 'b', 'c', 'd']
83 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
84 [10, 11, 12, 13, 14, 15]
85 """
86 transposed = [''] * len(transposition)
87 for p, t in enumerate(transposition):
88 transposed[t] = items[p]
89 return transposed
90
91 def deduplicate(text):
92 return list(collections.OrderedDict.fromkeys(text))
93
94
95 def caesar_encipher_letter(accented_letter, shift):
96 """Encipher a letter, given a shift amount
97
98 >>> caesar_encipher_letter('a', 1)
99 'b'
100 >>> caesar_encipher_letter('a', 2)
101 'c'
102 >>> caesar_encipher_letter('b', 2)
103 'd'
104 >>> caesar_encipher_letter('x', 2)
105 'z'
106 >>> caesar_encipher_letter('y', 2)
107 'a'
108 >>> caesar_encipher_letter('z', 2)
109 'b'
110 >>> caesar_encipher_letter('z', -1)
111 'y'
112 >>> caesar_encipher_letter('a', -1)
113 'z'
114 >>> caesar_encipher_letter('A', 1)
115 'B'
116 >>> caesar_encipher_letter('é', 1)
117 'f'
118 """
119 letter = unaccent(accented_letter)
120 if letter in string.ascii_letters:
121 if letter in string.ascii_uppercase:
122 alphabet_start = ord('A')
123 else:
124 alphabet_start = ord('a')
125 return chr(((ord(letter) - alphabet_start + shift) % 26) +
126 alphabet_start)
127 else:
128 return letter
129
130 def caesar_decipher_letter(letter, shift):
131 """Decipher a letter, given a shift amount
132
133 >>> caesar_decipher_letter('b', 1)
134 'a'
135 >>> caesar_decipher_letter('b', 2)
136 'z'
137 """
138 return caesar_encipher_letter(letter, -shift)
139
140 def caesar_encipher(message, shift):
141 """Encipher a message with the Caesar cipher of given shift
142
143 >>> caesar_encipher('abc', 1)
144 'bcd'
145 >>> caesar_encipher('abc', 2)
146 'cde'
147 >>> caesar_encipher('abcxyz', 2)
148 'cdezab'
149 >>> caesar_encipher('ab cx yz', 2)
150 'cd ez ab'
151 >>> caesar_encipher('Héllo World!', 2)
152 'Jgnnq Yqtnf!'
153 """
154 enciphered = [caesar_encipher_letter(l, shift) for l in message]
155 return ''.join(enciphered)
156
157 def caesar_decipher(message, shift):
158 """Decipher a message with the Caesar cipher of given shift
159
160 >>> caesar_decipher('bcd', 1)
161 'abc'
162 >>> caesar_decipher('cde', 2)
163 'abc'
164 >>> caesar_decipher('cd ez ab', 2)
165 'ab cx yz'
166 >>> caesar_decipher('Jgnnq Yqtnf!', 2)
167 'Hello World!'
168 """
169 return caesar_encipher(message, -shift)
170
171 def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True):
172 """Encipher a letter, given a multiplier and adder
173
174 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
175 for l in string.ascii_uppercase])
176 'HKNQTWZCFILORUXADGJMPSVYBE'
177 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
178 for l in string.ascii_uppercase])
179 'FILORUXADGJMPSVYBEHKNQTWZC'
180 """
181 letter = unaccent(accented_letter)
182 if letter in string.ascii_letters:
183 if letter in string.ascii_uppercase:
184 alphabet_start = ord('A')
185 else:
186 alphabet_start = ord('a')
187 letter_number = ord(letter) - alphabet_start
188 if one_based: letter_number += 1
189 cipher_number = (letter_number * multiplier + adder) % 26
190 if one_based: cipher_number -= 1
191 return chr(cipher_number % 26 + alphabet_start)
192 else:
193 return letter
194
195 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
196 """Encipher a letter, given a multiplier and adder
197
198 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
199 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
200 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
201 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
202 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
203 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
204 """
205 if letter in string.ascii_letters:
206 if letter in string.ascii_uppercase:
207 alphabet_start = ord('A')
208 else:
209 alphabet_start = ord('a')
210 cipher_number = ord(letter) - alphabet_start
211 if one_based: cipher_number += 1
212 plaintext_number = (
213 modular_division_table[multiplier]
214 [(cipher_number - adder) % 26] )
215 if one_based: plaintext_number -= 1
216 return chr(plaintext_number % 26 + alphabet_start)
217 else:
218 return letter
219
220 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
221 """Encipher a message
222
223 >>> affine_encipher('hours passed during which jerico tried every ' \
224 'trick he could think of', 15, 22, True)
225 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
226 """
227 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
228 for l in message]
229 return ''.join(enciphered)
230
231 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
232 """Decipher a message
233
234 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
235 'jfaoe ls omytd jlaxe mh', 15, 22, True)
236 'hours passed during which jerico tried every trick he could think of'
237 """
238 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
239 for l in message]
240 return ''.join(enciphered)
241
242
243 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
244 """Find the cipher alphabet given a keyword.
245 wrap_alphabet controls how the rest of the alphabet is added
246 after the keyword.
247 0 : from 'a'
248 1 : from the last letter in the sanitised keyword
249 2 : from the largest letter in the sanitised keyword
250
251 >>> keyword_cipher_alphabet_of('bayes')
252 'bayescdfghijklmnopqrtuvwxz'
253 >>> keyword_cipher_alphabet_of('bayes', 0)
254 'bayescdfghijklmnopqrtuvwxz'
255 >>> keyword_cipher_alphabet_of('bayes', 1)
256 'bayestuvwxzcdfghijklmnopqr'
257 >>> keyword_cipher_alphabet_of('bayes', 2)
258 'bayeszcdfghijklmnopqrtuvwx'
259 """
260 if wrap_alphabet == 0:
261 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
262 string.ascii_lowercase))
263 else:
264 if wrap_alphabet == 1:
265 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
266 else:
267 last_keyword_letter = sorted(sanitise(keyword))[-1]
268 last_keyword_position = string.ascii_lowercase.find(
269 last_keyword_letter) + 1
270 cipher_alphabet = ''.join(
271 deduplicate(sanitise(keyword) +
272 string.ascii_lowercase[last_keyword_position:] +
273 string.ascii_lowercase))
274 return cipher_alphabet
275
276
277 def keyword_encipher(message, keyword, wrap_alphabet=0):
278 """Enciphers a message with a keyword substitution cipher.
279 wrap_alphabet controls how the rest of the alphabet is added
280 after the keyword.
281 0 : from 'a'
282 1 : from the last letter in the sanitised keyword
283 2 : from the largest letter in the sanitised keyword
284
285 >>> keyword_encipher('test message', 'bayes')
286 'rsqr ksqqbds'
287 >>> keyword_encipher('test message', 'bayes', 0)
288 'rsqr ksqqbds'
289 >>> keyword_encipher('test message', 'bayes', 1)
290 'lskl dskkbus'
291 >>> keyword_encipher('test message', 'bayes', 2)
292 'qspq jsppbcs'
293 """
294 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
295 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
296 return unaccent(message).lower().translate(cipher_translation)
297
298 def keyword_decipher(message, keyword, wrap_alphabet=0):
299 """Deciphers a message with a keyword substitution cipher.
300 wrap_alphabet controls how the rest of the alphabet is added
301 after the keyword.
302 0 : from 'a'
303 1 : from the last letter in the sanitised keyword
304 2 : from the largest letter in the sanitised keyword
305
306 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
307 'test message'
308 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
309 'test message'
310 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
311 'test message'
312 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
313 'test message'
314 """
315 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
316 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
317 return message.lower().translate(cipher_translation)
318
319
320 def vigenere_encipher(message, keyword):
321 """Vigenere encipher
322
323 >>> vigenere_encipher('hello', 'abc')
324 'hfnlp'
325 """
326 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
327 pairs = zip(message, cycle(shifts))
328 return ''.join([caesar_encipher_letter(l, k) for l, k in pairs])
329
330 def vigenere_decipher(message, keyword):
331 """Vigenere decipher
332
333 >>> vigenere_decipher('hfnlp', 'abc')
334 'hello'
335 """
336 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
337 pairs = zip(message, cycle(shifts))
338 return ''.join([caesar_decipher_letter(l, k) for l, k in pairs])
339
340 beaufort_encipher=vigenere_decipher
341 beaufort_decipher=vigenere_encipher
342
343
344 def transpositions_of(keyword):
345 """Finds the transpostions given by a keyword. For instance, the keyword
346 'clever' rearranges to 'celrv', so the first column (0) stays first, the
347 second column (1) moves to third, the third column (2) moves to second,
348 and so on.
349
350 If passed a tuple, assume it's already a transposition and just return it.
351
352 >>> transpositions_of('clever')
353 (0, 2, 1, 4, 3)
354 >>> transpositions_of('fred')
355 (3, 2, 0, 1)
356 >>> transpositions_of((3, 2, 0, 1))
357 (3, 2, 0, 1)
358 """
359 if isinstance(keyword, tuple):
360 return keyword
361 else:
362 key = deduplicate(keyword)
363 transpositions = tuple(key.index(l) for l in sorted(key))
364 return transpositions
365
366 def pad(message_len, group_len, fillvalue):
367 padding_length = group_len - message_len % group_len
368 if padding_length == group_len: padding_length = 0
369 padding = ''
370 for i in range(padding_length):
371 if callable(fillvalue):
372 padding += fillvalue()
373 else:
374 padding += fillvalue
375 return padding
376
377 def column_transposition_encipher(message, keyword, fillvalue=' ',
378 fillcolumnwise=False,
379 emptycolumnwise=False):
380 """Enciphers using the column transposition cipher.
381 Message is padded to allow all rows to be the same length.
382
383 >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True)
384 'hlohr eltee '
385 >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True)
386 'hellothere '
387 >>> column_transposition_encipher('hellothere', 'abcdef')
388 'hellothere '
389 >>> column_transposition_encipher('hellothere', 'abcde')
390 'hellothere'
391 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
392 'hellothere'
393 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
394 'hlohreltee'
395 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
396 'htehlelroe'
397 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
398 'hellothere'
399 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True)
400 'heotllrehe'
401 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False)
402 'holrhetlee'
403 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True)
404 'htleehoelr'
405 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False)
406 'hleolteher'
407 >>> column_transposition_encipher('hellothere', 'cleverly')
408 'hleolthre e '
409 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
410 'hleolthre!e!'
411 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*')
412 'hleolthre*e*'
413 """
414 transpositions = transpositions_of(keyword)
415 message += pad(len(message), len(transpositions), fillvalue)
416 if fillcolumnwise:
417 rows = every_nth(message, len(message) // len(transpositions))
418 else:
419 rows = chunks(message, len(transpositions))
420 transposed = [transpose(r, transpositions) for r in rows]
421 if emptycolumnwise:
422 return combine_every_nth(transposed)
423 else:
424 return ''.join(chain(*transposed))
425
426 def column_transposition_decipher(message, keyword, fillvalue=' ',
427 fillcolumnwise=False,
428 emptycolumnwise=False):
429 """Deciphers using the column transposition cipher.
430 Message is padded to allow all rows to be the same length.
431
432 >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
433 'hellothere'
434 >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
435 'hellothere'
436 >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
437 'hellothere'
438 >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
439 'hellothere'
440 >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True)
441 'hellothere'
442 >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False)
443 'hellothere'
444 >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True)
445 'hellothere'
446 >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False)
447 'hellothere'
448 """
449 transpositions = transpositions_of(keyword)
450 message += pad(len(message), len(transpositions), '*')
451 if emptycolumnwise:
452 rows = every_nth(message, len(message) // len(transpositions))
453 else:
454 rows = chunks(message, len(transpositions))
455 untransposed = [untranspose(r, transpositions) for r in rows]
456 if fillcolumnwise:
457 return combine_every_nth(untransposed)
458 else:
459 return ''.join(chain(*untransposed))
460
461 def scytale_encipher(message, rows, fillvalue=' '):
462 """Enciphers using the scytale transposition cipher.
463 Message is padded with spaces to allow all rows to be the same length.
464
465 >>> scytale_encipher('thequickbrownfox', 3)
466 'tcnhkfeboqrxuo iw '
467 >>> scytale_encipher('thequickbrownfox', 4)
468 'tubnhirfecooqkwx'
469 >>> scytale_encipher('thequickbrownfox', 5)
470 'tubnhirfecooqkwx'
471 >>> scytale_encipher('thequickbrownfox', 6)
472 'tqcrnxhukof eibwo '
473 >>> scytale_encipher('thequickbrownfox', 7)
474 'tqcrnxhukof eibwo '
475 """
476 transpositions = [i for i in range(math.ceil(len(message) / rows))]
477 return column_transposition_encipher(message, transpositions,
478 fillcolumnwise=False, emptycolumnwise=True)
479
480 def scytale_decipher(message, rows):
481 """Deciphers using the scytale transposition cipher.
482 Assumes the message is padded so that all rows are the same length.
483
484 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
485 'thequickbrownfox '
486 >>> scytale_decipher('tubnhirfecooqkwx', 4)
487 'thequickbrownfox'
488 >>> scytale_decipher('tubnhirfecooqkwx', 5)
489 'thequickbrownfox'
490 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
491 'thequickbrownfox '
492 >>> scytale_decipher('tqcrnxhukof eibwo ', 7)
493 'thequickbrownfox '
494 """
495 transpositions = [i for i in range(math.ceil(len(message) / rows))]
496 return column_transposition_decipher(message, transpositions,
497 fillcolumnwise=False, emptycolumnwise=True)
498
499
500 if __name__ == "__main__":
501 import doctest
502 doctest.testmod()