Done some work on word segmentation
[cipher-training.git] / cipher.py
1 import string
2 import collections
3 import math
4 from enum import Enum
5 from itertools import zip_longest, cycle, chain
6 from language_models import *
7
8
9 modular_division_table = [[0]*26 for _ in range(26)]
10 for a in range(26):
11 for b in range(26):
12 c = (a * b) % 26
13 modular_division_table[b][c] = a
14
15
16 def every_nth(text, n, fillvalue=''):
17 """Returns n strings, each of which consists of every nth character,
18 starting with the 0th, 1st, 2nd, ... (n-1)th character
19
20 >>> every_nth(string.ascii_lowercase, 5)
21 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
22 >>> every_nth(string.ascii_lowercase, 1)
23 ['abcdefghijklmnopqrstuvwxyz']
24 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
25 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
26 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
27 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
28 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
29 """
30 split_text = [text[i:i+n] for i in range(0, len(text), n)]
31 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
32
33 def combine_every_nth(split_text):
34 """Reforms a text split into every_nth strings
35
36 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
37 'abcdefghijklmnopqrstuvwxyz'
38 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
39 'abcdefghijklmnopqrstuvwxyz'
40 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
41 'abcdefghijklmnopqrstuvwxyz'
42 """
43 return ''.join([''.join(l)
44 for l in zip_longest(*split_text, fillvalue='')])
45
46 def chunks(text, n, fillvalue=None):
47 """Split a text into chunks of n characters
48
49 >>> chunks('abcdefghi', 3)
50 ['abc', 'def', 'ghi']
51 >>> chunks('abcdefghi', 4)
52 ['abcd', 'efgh', 'i']
53 >>> chunks('abcdefghi', 4, fillvalue='!')
54 ['abcd', 'efgh', 'i!!!']
55 """
56 if fillvalue:
57 padding = fillvalue[0] * (n - len(text) % n)
58 else:
59 padding = ''
60 return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
61
62 def transpose(items, transposition):
63 """Moves items around according to the given transposition
64
65 >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
66 ['a', 'b', 'c', 'd']
67 >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
68 ['d', 'b', 'c', 'a']
69 >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
70 [13, 12, 14, 11, 15, 10]
71 """
72 transposed = [''] * len(transposition)
73 for p, t in enumerate(transposition):
74 transposed[p] = items[t]
75 return transposed
76
77 def untranspose(items, transposition):
78 """Undoes a transpose
79
80 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
81 ['a', 'b', 'c', 'd']
82 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
83 ['a', 'b', 'c', 'd']
84 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
85 [10, 11, 12, 13, 14, 15]
86 """
87 transposed = [''] * len(transposition)
88 for p, t in enumerate(transposition):
89 transposed[t] = items[p]
90 return transposed
91
92 def deduplicate(text):
93 return list(collections.OrderedDict.fromkeys(text))
94
95
96 def caesar_encipher_letter(accented_letter, shift):
97 """Encipher a letter, given a shift amount
98
99 >>> caesar_encipher_letter('a', 1)
100 'b'
101 >>> caesar_encipher_letter('a', 2)
102 'c'
103 >>> caesar_encipher_letter('b', 2)
104 'd'
105 >>> caesar_encipher_letter('x', 2)
106 'z'
107 >>> caesar_encipher_letter('y', 2)
108 'a'
109 >>> caesar_encipher_letter('z', 2)
110 'b'
111 >>> caesar_encipher_letter('z', -1)
112 'y'
113 >>> caesar_encipher_letter('a', -1)
114 'z'
115 >>> caesar_encipher_letter('A', 1)
116 'B'
117 >>> caesar_encipher_letter('é', 1)
118 'f'
119 """
120 letter = unaccent(accented_letter)
121 if letter in string.ascii_letters:
122 if letter in string.ascii_uppercase:
123 alphabet_start = ord('A')
124 else:
125 alphabet_start = ord('a')
126 return chr(((ord(letter) - alphabet_start + shift) % 26) +
127 alphabet_start)
128 else:
129 return letter
130
131 def caesar_decipher_letter(letter, shift):
132 """Decipher a letter, given a shift amount
133
134 >>> caesar_decipher_letter('b', 1)
135 'a'
136 >>> caesar_decipher_letter('b', 2)
137 'z'
138 """
139 return caesar_encipher_letter(letter, -shift)
140
141 def caesar_encipher(message, shift):
142 """Encipher a message with the Caesar cipher of given shift
143
144 >>> caesar_encipher('abc', 1)
145 'bcd'
146 >>> caesar_encipher('abc', 2)
147 'cde'
148 >>> caesar_encipher('abcxyz', 2)
149 'cdezab'
150 >>> caesar_encipher('ab cx yz', 2)
151 'cd ez ab'
152 >>> caesar_encipher('Héllo World!', 2)
153 'Jgnnq Yqtnf!'
154 """
155 enciphered = [caesar_encipher_letter(l, shift) for l in message]
156 return ''.join(enciphered)
157
158 def caesar_decipher(message, shift):
159 """Decipher a message with the Caesar cipher of given shift
160
161 >>> caesar_decipher('bcd', 1)
162 'abc'
163 >>> caesar_decipher('cde', 2)
164 'abc'
165 >>> caesar_decipher('cd ez ab', 2)
166 'ab cx yz'
167 >>> caesar_decipher('Jgnnq Yqtnf!', 2)
168 'Hello World!'
169 """
170 return caesar_encipher(message, -shift)
171
172 def affine_encipher_letter(accented_letter, multiplier=1, adder=0, one_based=True):
173 """Encipher a letter, given a multiplier and adder
174
175 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
176 for l in string.ascii_uppercase])
177 'HKNQTWZCFILORUXADGJMPSVYBE'
178 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
179 for l in string.ascii_uppercase])
180 'FILORUXADGJMPSVYBEHKNQTWZC'
181 """
182 letter = unaccent(accented_letter)
183 if letter in string.ascii_letters:
184 if letter in string.ascii_uppercase:
185 alphabet_start = ord('A')
186 else:
187 alphabet_start = ord('a')
188 letter_number = ord(letter) - alphabet_start
189 if one_based: letter_number += 1
190 cipher_number = (letter_number * multiplier + adder) % 26
191 if one_based: cipher_number -= 1
192 return chr(cipher_number % 26 + alphabet_start)
193 else:
194 return letter
195
196 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
197 """Encipher a letter, given a multiplier and adder
198
199 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
200 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
201 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
202 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
203 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
204 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
205 """
206 if letter in string.ascii_letters:
207 if letter in string.ascii_uppercase:
208 alphabet_start = ord('A')
209 else:
210 alphabet_start = ord('a')
211 cipher_number = ord(letter) - alphabet_start
212 if one_based: cipher_number += 1
213 plaintext_number = (
214 modular_division_table[multiplier]
215 [(cipher_number - adder) % 26] )
216 if one_based: plaintext_number -= 1
217 return chr(plaintext_number % 26 + alphabet_start)
218 else:
219 return letter
220
221 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
222 """Encipher a message
223
224 >>> affine_encipher('hours passed during which jerico tried every ' \
225 'trick he could think of', 15, 22, True)
226 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
227 """
228 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
229 for l in message]
230 return ''.join(enciphered)
231
232 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
233 """Decipher a message
234
235 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
236 'jfaoe ls omytd jlaxe mh', 15, 22, True)
237 'hours passed during which jerico tried every trick he could think of'
238 """
239 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
240 for l in message]
241 return ''.join(enciphered)
242
243
244 class Keyword_wrap_alphabet(Enum):
245 from_a = 1
246 from_last = 2
247 from_largest = 3
248
249
250 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=Keyword_wrap_alphabet.from_a):
251 """Find the cipher alphabet given a keyword.
252 wrap_alphabet controls how the rest of the alphabet is added
253 after the keyword.
254
255 >>> keyword_cipher_alphabet_of('bayes')
256 'bayescdfghijklmnopqrtuvwxz'
257 >>> keyword_cipher_alphabet_of('bayes', Keyword_wrap_alphabet.from_a)
258 'bayescdfghijklmnopqrtuvwxz'
259 >>> keyword_cipher_alphabet_of('bayes', Keyword_wrap_alphabet.from_last)
260 'bayestuvwxzcdfghijklmnopqr'
261 >>> keyword_cipher_alphabet_of('bayes', Keyword_wrap_alphabet.from_largest)
262 'bayeszcdfghijklmnopqrtuvwx'
263 """
264 if wrap_alphabet == Keyword_wrap_alphabet.from_a:
265 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
266 string.ascii_lowercase))
267 else:
268 if wrap_alphabet == Keyword_wrap_alphabet.from_last:
269 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
270 else:
271 last_keyword_letter = sorted(sanitise(keyword))[-1]
272 last_keyword_position = string.ascii_lowercase.find(
273 last_keyword_letter) + 1
274 cipher_alphabet = ''.join(
275 deduplicate(sanitise(keyword) +
276 string.ascii_lowercase[last_keyword_position:] +
277 string.ascii_lowercase))
278 return cipher_alphabet
279
280
281 def keyword_encipher(message, keyword, wrap_alphabet=Keyword_wrap_alphabet.from_a):
282 """Enciphers a message with a keyword substitution cipher.
283 wrap_alphabet controls how the rest of the alphabet is added
284 after the keyword.
285 0 : from 'a'
286 1 : from the last letter in the sanitised keyword
287 2 : from the largest letter in the sanitised keyword
288
289 >>> keyword_encipher('test message', 'bayes')
290 'rsqr ksqqbds'
291 >>> keyword_encipher('test message', 'bayes', Keyword_wrap_alphabet.from_a)
292 'rsqr ksqqbds'
293 >>> keyword_encipher('test message', 'bayes', Keyword_wrap_alphabet.from_last)
294 'lskl dskkbus'
295 >>> keyword_encipher('test message', 'bayes', Keyword_wrap_alphabet.from_largest)
296 'qspq jsppbcs'
297 """
298 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
299 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
300 return unaccent(message).lower().translate(cipher_translation)
301
302 def keyword_decipher(message, keyword, wrap_alphabet=Keyword_wrap_alphabet.from_a):
303 """Deciphers a message with a keyword substitution cipher.
304 wrap_alphabet controls how the rest of the alphabet is added
305 after the keyword.
306 0 : from 'a'
307 1 : from the last letter in the sanitised keyword
308 2 : from the largest letter in the sanitised keyword
309
310 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
311 'test message'
312 >>> keyword_decipher('rsqr ksqqbds', 'bayes', Keyword_wrap_alphabet.from_a)
313 'test message'
314 >>> keyword_decipher('lskl dskkbus', 'bayes', Keyword_wrap_alphabet.from_last)
315 'test message'
316 >>> keyword_decipher('qspq jsppbcs', 'bayes', Keyword_wrap_alphabet.from_largest)
317 'test message'
318 """
319 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
320 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
321 return message.lower().translate(cipher_translation)
322
323
324 def vigenere_encipher(message, keyword):
325 """Vigenere encipher
326
327 >>> vigenere_encipher('hello', 'abc')
328 'hfnlp'
329 """
330 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
331 pairs = zip(message, cycle(shifts))
332 return ''.join([caesar_encipher_letter(l, k) for l, k in pairs])
333
334 def vigenere_decipher(message, keyword):
335 """Vigenere decipher
336
337 >>> vigenere_decipher('hfnlp', 'abc')
338 'hello'
339 """
340 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
341 pairs = zip(message, cycle(shifts))
342 return ''.join([caesar_decipher_letter(l, k) for l, k in pairs])
343
344 beaufort_encipher=vigenere_decipher
345 beaufort_decipher=vigenere_encipher
346
347
348 def transpositions_of(keyword):
349 """Finds the transpostions given by a keyword. For instance, the keyword
350 'clever' rearranges to 'celrv', so the first column (0) stays first, the
351 second column (1) moves to third, the third column (2) moves to second,
352 and so on.
353
354 If passed a tuple, assume it's already a transposition and just return it.
355
356 >>> transpositions_of('clever')
357 (0, 2, 1, 4, 3)
358 >>> transpositions_of('fred')
359 (3, 2, 0, 1)
360 >>> transpositions_of((3, 2, 0, 1))
361 (3, 2, 0, 1)
362 """
363 if isinstance(keyword, tuple):
364 return keyword
365 else:
366 key = deduplicate(keyword)
367 transpositions = tuple(key.index(l) for l in sorted(key))
368 return transpositions
369
370 def pad(message_len, group_len, fillvalue):
371 padding_length = group_len - message_len % group_len
372 if padding_length == group_len: padding_length = 0
373 padding = ''
374 for i in range(padding_length):
375 if callable(fillvalue):
376 padding += fillvalue()
377 else:
378 padding += fillvalue
379 return padding
380
381 def column_transposition_encipher(message, keyword, fillvalue=' ',
382 fillcolumnwise=False,
383 emptycolumnwise=False):
384 """Enciphers using the column transposition cipher.
385 Message is padded to allow all rows to be the same length.
386
387 >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True)
388 'hlohr eltee '
389 >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True)
390 'hellothere '
391 >>> column_transposition_encipher('hellothere', 'abcdef')
392 'hellothere '
393 >>> column_transposition_encipher('hellothere', 'abcde')
394 'hellothere'
395 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
396 'hellothere'
397 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
398 'hlohreltee'
399 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
400 'htehlelroe'
401 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
402 'hellothere'
403 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True)
404 'heotllrehe'
405 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False)
406 'holrhetlee'
407 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True)
408 'htleehoelr'
409 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False)
410 'hleolteher'
411 >>> column_transposition_encipher('hellothere', 'cleverly')
412 'hleolthre e '
413 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
414 'hleolthre!e!'
415 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*')
416 'hleolthre*e*'
417 """
418 transpositions = transpositions_of(keyword)
419 message += pad(len(message), len(transpositions), fillvalue)
420 if fillcolumnwise:
421 rows = every_nth(message, len(message) // len(transpositions))
422 else:
423 rows = chunks(message, len(transpositions))
424 transposed = [transpose(r, transpositions) for r in rows]
425 if emptycolumnwise:
426 return combine_every_nth(transposed)
427 else:
428 return ''.join(chain(*transposed))
429
430 def column_transposition_decipher(message, keyword, fillvalue=' ',
431 fillcolumnwise=False,
432 emptycolumnwise=False):
433 """Deciphers using the column transposition cipher.
434 Message is padded to allow all rows to be the same length.
435
436 >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
437 'hellothere'
438 >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
439 'hellothere'
440 >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
441 'hellothere'
442 >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
443 'hellothere'
444 >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True)
445 'hellothere'
446 >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False)
447 'hellothere'
448 >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True)
449 'hellothere'
450 >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False)
451 'hellothere'
452 """
453 transpositions = transpositions_of(keyword)
454 message += pad(len(message), len(transpositions), '*')
455 if emptycolumnwise:
456 rows = every_nth(message, len(message) // len(transpositions))
457 else:
458 rows = chunks(message, len(transpositions))
459 untransposed = [untranspose(r, transpositions) for r in rows]
460 if fillcolumnwise:
461 return combine_every_nth(untransposed)
462 else:
463 return ''.join(chain(*untransposed))
464
465 def scytale_encipher(message, rows, fillvalue=' '):
466 """Enciphers using the scytale transposition cipher.
467 Message is padded with spaces to allow all rows to be the same length.
468
469 >>> scytale_encipher('thequickbrownfox', 3)
470 'tcnhkfeboqrxuo iw '
471 >>> scytale_encipher('thequickbrownfox', 4)
472 'tubnhirfecooqkwx'
473 >>> scytale_encipher('thequickbrownfox', 5)
474 'tubnhirfecooqkwx'
475 >>> scytale_encipher('thequickbrownfox', 6)
476 'tqcrnxhukof eibwo '
477 >>> scytale_encipher('thequickbrownfox', 7)
478 'tqcrnxhukof eibwo '
479 """
480 transpositions = [i for i in range(math.ceil(len(message) / rows))]
481 return column_transposition_encipher(message, transpositions,
482 fillcolumnwise=False, emptycolumnwise=True)
483
484 def scytale_decipher(message, rows):
485 """Deciphers using the scytale transposition cipher.
486 Assumes the message is padded so that all rows are the same length.
487
488 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
489 'thequickbrownfox '
490 >>> scytale_decipher('tubnhirfecooqkwx', 4)
491 'thequickbrownfox'
492 >>> scytale_decipher('tubnhirfecooqkwx', 5)
493 'thequickbrownfox'
494 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
495 'thequickbrownfox '
496 >>> scytale_decipher('tqcrnxhukof eibwo ', 7)
497 'thequickbrownfox '
498 """
499 transpositions = [i for i in range(math.ceil(len(message) / rows))]
500 return column_transposition_decipher(message, transpositions,
501 fillcolumnwise=False, emptycolumnwise=True)
502
503
504 if __name__ == "__main__":
505 import doctest
506 doctest.testmod()