Fixed accent removal
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import logging
4 from itertools import zip_longest, cycle, chain
5 from language_models import *
6
7 logger = logging.getLogger(__name__)
8 logger.addHandler(logging.FileHandler('cipher.log'))
9 logger.setLevel(logging.WARNING)
10 #logger.setLevel(logging.INFO)
11 #logger.setLevel(logging.DEBUG)
12
13
14 modular_division_table = [[0]*26 for x in range(26)]
15 for a in range(26):
16 for b in range(26):
17 c = (a * b) % 26
18 modular_division_table[b][c] = a
19
20 def letters(text):
21 """Remove all non-alphabetic characters from a text
22 >>> letters('The Quick')
23 'TheQuick'
24 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
25 'TheQuickBROWNfoxjumpedoverthelazyDOG'
26 """
27 return ''.join([c for c in text if c in string.ascii_letters])
28
29 def sanitise(text):
30 """Remove all non-alphabetic characters and convert the text to lowercase
31
32 >>> sanitise('The Quick')
33 'thequick'
34 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
35 'thequickbrownfoxjumpedoverthelazydog'
36 """
37 # sanitised = [c.lower() for c in text if c in string.ascii_letters]
38 # return ''.join(sanitised)
39 return letters(text).lower()
40
41 def ngrams(text, n):
42 """Returns all n-grams of a text
43
44 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
45 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
46 'nf', 'fo', 'ox']
47 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
48 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
49 'rown', 'ownf', 'wnfo', 'nfox']
50 """
51 return [text[i:i+n] for i in range(len(text)-n+1)]
52
53 def every_nth(text, n, fillvalue=''):
54 """Returns n strings, each of which consists of every nth character,
55 starting with the 0th, 1st, 2nd, ... (n-1)th character
56
57 >>> every_nth(string.ascii_lowercase, 5)
58 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
59 >>> every_nth(string.ascii_lowercase, 1)
60 ['abcdefghijklmnopqrstuvwxyz']
61 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
62 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
63 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
64 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
65 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
66 """
67 split_text = [text[i:i+n] for i in range(0, len(text), n)]
68 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
69
70 def combine_every_nth(split_text):
71 """Reforms a text split into every_nth strings
72
73 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
74 'abcdefghijklmnopqrstuvwxyz'
75 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
76 'abcdefghijklmnopqrstuvwxyz'
77 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
78 'abcdefghijklmnopqrstuvwxyz'
79 """
80 return ''.join([''.join(l)
81 for l in zip_longest(*split_text, fillvalue='')])
82
83 def chunks(text, n, fillvalue=None):
84 """Split a text into chunks of n characters
85
86 >>> chunks('abcdefghi', 3)
87 ['abc', 'def', 'ghi']
88 >>> chunks('abcdefghi', 4)
89 ['abcd', 'efgh', 'i']
90 >>> chunks('abcdefghi', 4, fillvalue='!')
91 ['abcd', 'efgh', 'i!!!']
92 """
93 if fillvalue:
94 padding = fillvalue[0] * (n - len(text) % n)
95 else:
96 padding = ''
97 return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
98
99 def transpose(items, transposition):
100 """Moves items around according to the given transposition
101
102 >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
103 ['a', 'b', 'c', 'd']
104 >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
105 ['d', 'b', 'c', 'a']
106 >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
107 [13, 12, 14, 11, 15, 10]
108 """
109 transposed = [''] * len(transposition)
110 for p, t in enumerate(transposition):
111 transposed[p] = items[t]
112 return transposed
113
114 def untranspose(items, transposition):
115 """Undoes a transpose
116
117 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
118 ['a', 'b', 'c', 'd']
119 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
120 ['a', 'b', 'c', 'd']
121 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
122 [10, 11, 12, 13, 14, 15]
123 """
124 transposed = [''] * len(transposition)
125 for p, t in enumerate(transposition):
126 transposed[t] = items[p]
127 return transposed
128
129 def deduplicate(text):
130 return list(collections.OrderedDict.fromkeys(text))
131
132
133 def caesar_encipher_letter(letter, shift):
134 """Encipher a letter, given a shift amount
135
136 >>> caesar_encipher_letter('a', 1)
137 'b'
138 >>> caesar_encipher_letter('a', 2)
139 'c'
140 >>> caesar_encipher_letter('b', 2)
141 'd'
142 >>> caesar_encipher_letter('x', 2)
143 'z'
144 >>> caesar_encipher_letter('y', 2)
145 'a'
146 >>> caesar_encipher_letter('z', 2)
147 'b'
148 >>> caesar_encipher_letter('z', -1)
149 'y'
150 >>> caesar_encipher_letter('a', -1)
151 'z'
152 """
153 if letter in string.ascii_letters:
154 if letter in string.ascii_uppercase:
155 alphabet_start = ord('A')
156 else:
157 alphabet_start = ord('a')
158 return chr(((ord(letter) - alphabet_start + shift) % 26) +
159 alphabet_start)
160 else:
161 return letter
162
163 def caesar_decipher_letter(letter, shift):
164 """Decipher a letter, given a shift amount
165
166 >>> caesar_decipher_letter('b', 1)
167 'a'
168 >>> caesar_decipher_letter('b', 2)
169 'z'
170 """
171 return caesar_encipher_letter(letter, -shift)
172
173 def caesar_encipher(message, shift):
174 """Encipher a message with the Caesar cipher of given shift
175
176 >>> caesar_encipher('abc', 1)
177 'bcd'
178 >>> caesar_encipher('abc', 2)
179 'cde'
180 >>> caesar_encipher('abcxyz', 2)
181 'cdezab'
182 >>> caesar_encipher('ab cx yz', 2)
183 'cd ez ab'
184 """
185 enciphered = [caesar_encipher_letter(l, shift) for l in message]
186 return ''.join(enciphered)
187
188 def caesar_decipher(message, shift):
189 """Encipher a message with the Caesar cipher of given shift
190
191 >>> caesar_decipher('bcd', 1)
192 'abc'
193 >>> caesar_decipher('cde', 2)
194 'abc'
195 >>> caesar_decipher('cd ez ab', 2)
196 'ab cx yz'
197 """
198 return caesar_encipher(message, -shift)
199
200 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
201 """Encipher a letter, given a multiplier and adder
202
203 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
204 for l in string.ascii_uppercase])
205 'HKNQTWZCFILORUXADGJMPSVYBE'
206 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
207 for l in string.ascii_uppercase])
208 'FILORUXADGJMPSVYBEHKNQTWZC'
209 """
210 if letter in string.ascii_letters:
211 if letter in string.ascii_uppercase:
212 alphabet_start = ord('A')
213 else:
214 alphabet_start = ord('a')
215 letter_number = ord(letter) - alphabet_start
216 if one_based: letter_number += 1
217 cipher_number = (letter_number * multiplier + adder) % 26
218 if one_based: cipher_number -= 1
219 return chr(cipher_number % 26 + alphabet_start)
220 else:
221 return letter
222
223 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
224 """Encipher a letter, given a multiplier and adder
225
226 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
227 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
228 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
229 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
230 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
231 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
232 """
233 if letter in string.ascii_letters:
234 if letter in string.ascii_uppercase:
235 alphabet_start = ord('A')
236 else:
237 alphabet_start = ord('a')
238 cipher_number = ord(letter) - alphabet_start
239 if one_based: cipher_number += 1
240 plaintext_number = (
241 modular_division_table[multiplier]
242 [(cipher_number - adder) % 26] )
243 if one_based: plaintext_number -= 1
244 return chr(plaintext_number % 26 + alphabet_start)
245 else:
246 return letter
247
248 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
249 """Encipher a message
250
251 >>> affine_encipher('hours passed during which jerico tried every ' \
252 'trick he could think of', 15, 22, True)
253 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
254 """
255 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
256 for l in message]
257 return ''.join(enciphered)
258
259 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
260 """Decipher a message
261
262 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
263 'jfaoe ls omytd jlaxe mh', 15, 22, True)
264 'hours passed during which jerico tried every trick he could think of'
265 """
266 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
267 for l in message]
268 return ''.join(enciphered)
269
270
271 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
272 """Find the cipher alphabet given a keyword.
273 wrap_alphabet controls how the rest of the alphabet is added
274 after the keyword.
275 0 : from 'a'
276 1 : from the last letter in the sanitised keyword
277 2 : from the largest letter in the sanitised keyword
278
279 >>> keyword_cipher_alphabet_of('bayes')
280 'bayescdfghijklmnopqrtuvwxz'
281 >>> keyword_cipher_alphabet_of('bayes', 0)
282 'bayescdfghijklmnopqrtuvwxz'
283 >>> keyword_cipher_alphabet_of('bayes', 1)
284 'bayestuvwxzcdfghijklmnopqr'
285 >>> keyword_cipher_alphabet_of('bayes', 2)
286 'bayeszcdfghijklmnopqrtuvwx'
287 """
288 if wrap_alphabet == 0:
289 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
290 string.ascii_lowercase))
291 else:
292 if wrap_alphabet == 1:
293 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
294 else:
295 last_keyword_letter = sorted(sanitise(keyword))[-1]
296 last_keyword_position = string.ascii_lowercase.find(
297 last_keyword_letter) + 1
298 cipher_alphabet = ''.join(
299 deduplicate(sanitise(keyword) +
300 string.ascii_lowercase[last_keyword_position:] +
301 string.ascii_lowercase))
302 return cipher_alphabet
303
304
305 def keyword_encipher(message, keyword, wrap_alphabet=0):
306 """Enciphers a message with a keyword substitution cipher.
307 wrap_alphabet controls how the rest of the alphabet is added
308 after the keyword.
309 0 : from 'a'
310 1 : from the last letter in the sanitised keyword
311 2 : from the largest letter in the sanitised keyword
312
313 >>> keyword_encipher('test message', 'bayes')
314 'rsqr ksqqbds'
315 >>> keyword_encipher('test message', 'bayes', 0)
316 'rsqr ksqqbds'
317 >>> keyword_encipher('test message', 'bayes', 1)
318 'lskl dskkbus'
319 >>> keyword_encipher('test message', 'bayes', 2)
320 'qspq jsppbcs'
321 """
322 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
323 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
324 return message.lower().translate(cipher_translation)
325
326 def keyword_decipher(message, keyword, wrap_alphabet=0):
327 """Deciphers a message with a keyword substitution cipher.
328 wrap_alphabet controls how the rest of the alphabet is added
329 after the keyword.
330 0 : from 'a'
331 1 : from the last letter in the sanitised keyword
332 2 : from the largest letter in the sanitised keyword
333
334 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
335 'test message'
336 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
337 'test message'
338 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
339 'test message'
340 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
341 'test message'
342 """
343 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
344 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
345 return message.lower().translate(cipher_translation)
346
347 def scytale_encipher(message, rows):
348 """Enciphers using the scytale transposition cipher.
349 Message is padded with spaces to allow all rows to be the same length.
350
351 >>> scytale_encipher('thequickbrownfox', 3)
352 'tcnhkfeboqrxuo iw '
353 >>> scytale_encipher('thequickbrownfox', 4)
354 'tubnhirfecooqkwx'
355 >>> scytale_encipher('thequickbrownfox', 5)
356 'tubn hirf ecoo qkwx '
357 >>> scytale_encipher('thequickbrownfox', 6)
358 'tqcrnxhukof eibwo '
359 >>> scytale_encipher('thequickbrownfox', 7)
360 'tqcrnx hukof eibwo '
361 """
362 if len(message) % rows != 0:
363 message += ' '*(rows - len(message) % rows)
364 row_length = round(len(message) / rows)
365 slices = [message[i:i+row_length]
366 for i in range(0, len(message), row_length)]
367 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
368
369 def scytale_decipher(message, rows):
370 """Deciphers using the scytale transposition cipher.
371 Assumes the message is padded so that all rows are the same length.
372
373 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
374 'thequickbrownfox '
375 >>> scytale_decipher('tubnhirfecooqkwx', 4)
376 'thequickbrownfox'
377 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
378 'thequickbrownfox '
379 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
380 'thequickbrownfox '
381 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
382 'thequickbrownfox '
383 """
384 cols = round(len(message) / rows)
385 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
386 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
387
388
389 def transpositions_of(keyword):
390 """Finds the transpostions given by a keyword. For instance, the keyword
391 'clever' rearranges to 'celrv', so the first column (0) stays first, the
392 second column (1) moves to third, the third column (2) moves to second,
393 and so on.
394
395 If passed a tuple, assume it's already a transposition and just return it.
396
397 >>> transpositions_of('clever')
398 (0, 2, 1, 4, 3)
399 >>> transpositions_of('fred')
400 (3, 2, 0, 1)
401 >>> transpositions_of((3, 2, 0, 1))
402 (3, 2, 0, 1)
403 """
404 if isinstance(keyword, tuple):
405 return keyword
406 else:
407 key = deduplicate(keyword)
408 transpositions = tuple(key.index(l) for l in sorted(key))
409 return transpositions
410
411 def pad(message_len, group_len, fillvalue):
412 padding_length = group_len - message_len % group_len
413 if padding_length == group_len: padding_length = 0
414 padding = ''
415 for i in range(padding_length):
416 if callable(fillvalue):
417 padding += fillvalue()
418 else:
419 padding += fillvalue
420 return padding
421
422 def column_transposition_encipher(message, keyword, fillvalue=' ',
423 fillcolumnwise=False,
424 emptycolumnwise=False):
425 """Enciphers using the column transposition cipher.
426 Message is padded to allow all rows to be the same length.
427 """
428 # >>> column_transposition_encipher('hellothere', 'clever')
429 # 'hleolteher'
430 # >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
431 # 'hleolthre!e!'
432 # >>> column_transposition_encipher('hellothere', 'clever', columnwise=True)
433 # 'htleehoelr'
434 # """
435 transpositions = transpositions_of(keyword)
436 message += pad(len(message), len(transpositions), fillvalue)
437 if fillcolumnwise:
438 rows = every_nth(message, len(transpositions))
439 else:
440 rows = chunks(mesage, len(transpositions))
441 columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
442 transposed = [transpose(r, transpositions) for r in rows]
443 if emptycolumnwise:
444 return combine_every_nth(transposed)
445 else:
446 return ''.join(chain(*transposed))
447
448 def column_transposition_decipher(message, keyword, fillvalue=' ',
449 columnwise=False):
450 """Deciphers using the column transposition cipher.
451 Message is padded to allow all rows to be the same length.
452 """
453 # >>> column_transposition_decipher('hleolteher', 'clever')
454 # 'hellothere'
455 # >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
456 # 'hellothere!!'
457 # >>> column_transposition_decipher('htleehoelr', 'clever', columnwise=True)
458 # 'hellothere'
459 # """
460 transpositions = transpositions_of(keyword)
461 if columnwise:
462 columns = chunks(message, int(len(message) / len(transpositions)))
463 else:
464 columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
465 untransposed_columns = untranspose(columns, transpositions)
466 return combine_every_nth(untransposed_columns)
467
468
469 def vigenere_encipher(message, keyword):
470 """Vigenere encipher
471
472 >>> vigenere_encipher('hello', 'abc')
473 'hfnlp'
474 """
475 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
476 pairs = zip(message, cycle(shifts))
477 return ''.join([caesar_encipher_letter(l, k) for l, k in pairs])
478
479 def vigenere_decipher(message, keyword):
480 """Vigenere decipher
481
482 >>> vigenere_decipher('hfnlp', 'abc')
483 'hello'
484 """
485 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
486 pairs = zip(message, cycle(shifts))
487 return ''.join([caesar_decipher_letter(l, k) for l, k in pairs])
488
489 beaufort_encipher=vigenere_decipher
490 beaufort_decipher=vigenere_encipher
491
492
493 if __name__ == "__main__":
494 import doctest
495 doctest.testmod()