Merge branch 'transpositions' of git.njae.me.uk:national-cipher-challenge into transp...
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import logging
4 from itertools import zip_longest, cycle, chain
5 from language_models import *
6
7 logger = logging.getLogger(__name__)
8 logger.addHandler(logging.FileHandler('cipher.log'))
9 logger.setLevel(logging.WARNING)
10 #logger.setLevel(logging.INFO)
11 #logger.setLevel(logging.DEBUG)
12
13
14 modular_division_table = [[0]*26 for _ in range(26)]
15 for a in range(26):
16 for b in range(26):
17 c = (a * b) % 26
18 modular_division_table[b][c] = a
19
20
21 def ngrams(text, n):
22 """Returns all n-grams of a text
23
24 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
25 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
26 'nf', 'fo', 'ox']
27 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
28 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
29 'rown', 'ownf', 'wnfo', 'nfox']
30 """
31 return [text[i:i+n] for i in range(len(text)-n+1)]
32
33 def every_nth(text, n, fillvalue=''):
34 """Returns n strings, each of which consists of every nth character,
35 starting with the 0th, 1st, 2nd, ... (n-1)th character
36
37 >>> every_nth(string.ascii_lowercase, 5)
38 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
39 >>> every_nth(string.ascii_lowercase, 1)
40 ['abcdefghijklmnopqrstuvwxyz']
41 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
42 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
43 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
44 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
45 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
46 """
47 split_text = [text[i:i+n] for i in range(0, len(text), n)]
48 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
49
50 def combine_every_nth(split_text):
51 """Reforms a text split into every_nth strings
52
53 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
54 'abcdefghijklmnopqrstuvwxyz'
55 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
56 'abcdefghijklmnopqrstuvwxyz'
57 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
58 'abcdefghijklmnopqrstuvwxyz'
59 """
60 return ''.join([''.join(l)
61 for l in zip_longest(*split_text, fillvalue='')])
62
63 def chunks(text, n, fillvalue=None):
64 """Split a text into chunks of n characters
65
66 >>> chunks('abcdefghi', 3)
67 ['abc', 'def', 'ghi']
68 >>> chunks('abcdefghi', 4)
69 ['abcd', 'efgh', 'i']
70 >>> chunks('abcdefghi', 4, fillvalue='!')
71 ['abcd', 'efgh', 'i!!!']
72 """
73 if fillvalue:
74 padding = fillvalue[0] * (n - len(text) % n)
75 else:
76 padding = ''
77 return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
78
79 def transpose(items, transposition):
80 """Moves items around according to the given transposition
81
82 >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
83 ['a', 'b', 'c', 'd']
84 >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
85 ['d', 'b', 'c', 'a']
86 >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
87 [13, 12, 14, 11, 15, 10]
88 """
89 transposed = [''] * len(transposition)
90 for p, t in enumerate(transposition):
91 transposed[p] = items[t]
92 return transposed
93
94 def untranspose(items, transposition):
95 """Undoes a transpose
96
97 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
98 ['a', 'b', 'c', 'd']
99 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
100 ['a', 'b', 'c', 'd']
101 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
102 [10, 11, 12, 13, 14, 15]
103 """
104 transposed = [''] * len(transposition)
105 for p, t in enumerate(transposition):
106 transposed[t] = items[p]
107 return transposed
108
109 def deduplicate(text):
110 return list(collections.OrderedDict.fromkeys(text))
111
112
113 def caesar_encipher_letter(letter, shift):
114 """Encipher a letter, given a shift amount
115
116 >>> caesar_encipher_letter('a', 1)
117 'b'
118 >>> caesar_encipher_letter('a', 2)
119 'c'
120 >>> caesar_encipher_letter('b', 2)
121 'd'
122 >>> caesar_encipher_letter('x', 2)
123 'z'
124 >>> caesar_encipher_letter('y', 2)
125 'a'
126 >>> caesar_encipher_letter('z', 2)
127 'b'
128 >>> caesar_encipher_letter('z', -1)
129 'y'
130 >>> caesar_encipher_letter('a', -1)
131 'z'
132 """
133 if letter in string.ascii_letters:
134 if letter in string.ascii_uppercase:
135 alphabet_start = ord('A')
136 else:
137 alphabet_start = ord('a')
138 return chr(((ord(letter) - alphabet_start + shift) % 26) +
139 alphabet_start)
140 else:
141 return letter
142
143 def caesar_decipher_letter(letter, shift):
144 """Decipher a letter, given a shift amount
145
146 >>> caesar_decipher_letter('b', 1)
147 'a'
148 >>> caesar_decipher_letter('b', 2)
149 'z'
150 """
151 return caesar_encipher_letter(letter, -shift)
152
153 def caesar_encipher(message, shift):
154 """Encipher a message with the Caesar cipher of given shift
155
156 >>> caesar_encipher('abc', 1)
157 'bcd'
158 >>> caesar_encipher('abc', 2)
159 'cde'
160 >>> caesar_encipher('abcxyz', 2)
161 'cdezab'
162 >>> caesar_encipher('ab cx yz', 2)
163 'cd ez ab'
164 """
165 enciphered = [caesar_encipher_letter(l, shift) for l in message]
166 return ''.join(enciphered)
167
168 def caesar_decipher(message, shift):
169 """Encipher a message with the Caesar cipher of given shift
170
171 >>> caesar_decipher('bcd', 1)
172 'abc'
173 >>> caesar_decipher('cde', 2)
174 'abc'
175 >>> caesar_decipher('cd ez ab', 2)
176 'ab cx yz'
177 """
178 return caesar_encipher(message, -shift)
179
180 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
181 """Encipher a letter, given a multiplier and adder
182
183 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
184 for l in string.ascii_uppercase])
185 'HKNQTWZCFILORUXADGJMPSVYBE'
186 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
187 for l in string.ascii_uppercase])
188 'FILORUXADGJMPSVYBEHKNQTWZC'
189 """
190 if letter in string.ascii_letters:
191 if letter in string.ascii_uppercase:
192 alphabet_start = ord('A')
193 else:
194 alphabet_start = ord('a')
195 letter_number = ord(letter) - alphabet_start
196 if one_based: letter_number += 1
197 cipher_number = (letter_number * multiplier + adder) % 26
198 if one_based: cipher_number -= 1
199 return chr(cipher_number % 26 + alphabet_start)
200 else:
201 return letter
202
203 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
204 """Encipher a letter, given a multiplier and adder
205
206 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
207 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
208 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
209 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
210 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
211 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
212 """
213 if letter in string.ascii_letters:
214 if letter in string.ascii_uppercase:
215 alphabet_start = ord('A')
216 else:
217 alphabet_start = ord('a')
218 cipher_number = ord(letter) - alphabet_start
219 if one_based: cipher_number += 1
220 plaintext_number = (
221 modular_division_table[multiplier]
222 [(cipher_number - adder) % 26] )
223 if one_based: plaintext_number -= 1
224 return chr(plaintext_number % 26 + alphabet_start)
225 else:
226 return letter
227
228 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
229 """Encipher a message
230
231 >>> affine_encipher('hours passed during which jerico tried every ' \
232 'trick he could think of', 15, 22, True)
233 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
234 """
235 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
236 for l in message]
237 return ''.join(enciphered)
238
239 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
240 """Decipher a message
241
242 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
243 'jfaoe ls omytd jlaxe mh', 15, 22, True)
244 'hours passed during which jerico tried every trick he could think of'
245 """
246 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
247 for l in message]
248 return ''.join(enciphered)
249
250
251 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
252 """Find the cipher alphabet given a keyword.
253 wrap_alphabet controls how the rest of the alphabet is added
254 after the keyword.
255 0 : from 'a'
256 1 : from the last letter in the sanitised keyword
257 2 : from the largest letter in the sanitised keyword
258
259 >>> keyword_cipher_alphabet_of('bayes')
260 'bayescdfghijklmnopqrtuvwxz'
261 >>> keyword_cipher_alphabet_of('bayes', 0)
262 'bayescdfghijklmnopqrtuvwxz'
263 >>> keyword_cipher_alphabet_of('bayes', 1)
264 'bayestuvwxzcdfghijklmnopqr'
265 >>> keyword_cipher_alphabet_of('bayes', 2)
266 'bayeszcdfghijklmnopqrtuvwx'
267 """
268 if wrap_alphabet == 0:
269 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
270 string.ascii_lowercase))
271 else:
272 if wrap_alphabet == 1:
273 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
274 else:
275 last_keyword_letter = sorted(sanitise(keyword))[-1]
276 last_keyword_position = string.ascii_lowercase.find(
277 last_keyword_letter) + 1
278 cipher_alphabet = ''.join(
279 deduplicate(sanitise(keyword) +
280 string.ascii_lowercase[last_keyword_position:] +
281 string.ascii_lowercase))
282 return cipher_alphabet
283
284
285 def keyword_encipher(message, keyword, wrap_alphabet=0):
286 """Enciphers a message with a keyword substitution cipher.
287 wrap_alphabet controls how the rest of the alphabet is added
288 after the keyword.
289 0 : from 'a'
290 1 : from the last letter in the sanitised keyword
291 2 : from the largest letter in the sanitised keyword
292
293 >>> keyword_encipher('test message', 'bayes')
294 'rsqr ksqqbds'
295 >>> keyword_encipher('test message', 'bayes', 0)
296 'rsqr ksqqbds'
297 >>> keyword_encipher('test message', 'bayes', 1)
298 'lskl dskkbus'
299 >>> keyword_encipher('test message', 'bayes', 2)
300 'qspq jsppbcs'
301 """
302 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
303 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
304 return message.lower().translate(cipher_translation)
305
306 def keyword_decipher(message, keyword, wrap_alphabet=0):
307 """Deciphers a message with a keyword substitution cipher.
308 wrap_alphabet controls how the rest of the alphabet is added
309 after the keyword.
310 0 : from 'a'
311 1 : from the last letter in the sanitised keyword
312 2 : from the largest letter in the sanitised keyword
313
314 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
315 'test message'
316 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
317 'test message'
318 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
319 'test message'
320 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
321 'test message'
322 """
323 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
324 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
325 return message.lower().translate(cipher_translation)
326
327 def scytale_encipher(message, rows):
328 """Enciphers using the scytale transposition cipher.
329 Message is padded with spaces to allow all rows to be the same length.
330
331 >>> scytale_encipher('thequickbrownfox', 3)
332 'tcnhkfeboqrxuo iw '
333 >>> scytale_encipher('thequickbrownfox', 4)
334 'tubnhirfecooqkwx'
335 >>> scytale_encipher('thequickbrownfox', 5)
336 'tubn hirf ecoo qkwx '
337 >>> scytale_encipher('thequickbrownfox', 6)
338 'tqcrnxhukof eibwo '
339 >>> scytale_encipher('thequickbrownfox', 7)
340 'tqcrnx hukof eibwo '
341 """
342 if len(message) % rows != 0:
343 message += ' '*(rows - len(message) % rows)
344 row_length = round(len(message) / rows)
345 slices = [message[i:i+row_length]
346 for i in range(0, len(message), row_length)]
347 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
348
349 def scytale_decipher(message, rows):
350 """Deciphers using the scytale transposition cipher.
351 Assumes the message is padded so that all rows are the same length.
352
353 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
354 'thequickbrownfox '
355 >>> scytale_decipher('tubnhirfecooqkwx', 4)
356 'thequickbrownfox'
357 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
358 'thequickbrownfox '
359 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
360 'thequickbrownfox '
361 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
362 'thequickbrownfox '
363 """
364 cols = round(len(message) / rows)
365 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
366 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
367
368
369 def transpositions_of(keyword):
370 """Finds the transpostions given by a keyword. For instance, the keyword
371 'clever' rearranges to 'celrv', so the first column (0) stays first, the
372 second column (1) moves to third, the third column (2) moves to second,
373 and so on.
374
375 If passed a tuple, assume it's already a transposition and just return it.
376
377 >>> transpositions_of('clever')
378 (0, 2, 1, 4, 3)
379 >>> transpositions_of('fred')
380 (3, 2, 0, 1)
381 >>> transpositions_of((3, 2, 0, 1))
382 (3, 2, 0, 1)
383 """
384 if isinstance(keyword, tuple):
385 return keyword
386 else:
387 key = deduplicate(keyword)
388 transpositions = tuple(key.index(l) for l in sorted(key))
389 return transpositions
390
391 def pad(message_len, group_len, fillvalue):
392 padding_length = group_len - message_len % group_len
393 if padding_length == group_len: padding_length = 0
394 padding = ''
395 for i in range(padding_length):
396 if callable(fillvalue):
397 padding += fillvalue()
398 else:
399 padding += fillvalue
400 return padding
401
402 def column_transposition_encipher(message, keyword, fillvalue=' ',
403 fillcolumnwise=False,
404 emptycolumnwise=False):
405 """Enciphers using the column transposition cipher.
406 Message is padded to allow all rows to be the same length.
407 """
408 # >>> column_transposition_encipher('hellothere', 'clever')
409 # 'hleolteher'
410 # >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
411 # 'hleolthre!e!'
412 # >>> column_transposition_encipher('hellothere', 'clever', columnwise=True)
413 # 'htleehoelr'
414 # """
415 transpositions = transpositions_of(keyword)
416 message += pad(len(message), len(transpositions), fillvalue)
417 if fillcolumnwise:
418 rows = every_nth(message, len(transpositions))
419 else:
420 rows = chunks(mesage, len(transpositions))
421 columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
422 transposed = [transpose(r, transpositions) for r in rows]
423 if emptycolumnwise:
424 return combine_every_nth(transposed)
425 else:
426 return ''.join(chain(*transposed))
427
428 def column_transposition_decipher(message, keyword, fillvalue=' ',
429 columnwise=False):
430 """Deciphers using the column transposition cipher.
431 Message is padded to allow all rows to be the same length.
432 """
433 # >>> column_transposition_decipher('hleolteher', 'clever')
434 # 'hellothere'
435 # >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
436 # 'hellothere!!'
437 # >>> column_transposition_decipher('htleehoelr', 'clever', columnwise=True)
438 # 'hellothere'
439 # """
440 transpositions = transpositions_of(keyword)
441 if columnwise:
442 columns = chunks(message, int(len(message) / len(transpositions)))
443 else:
444 columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
445 untransposed_columns = untranspose(columns, transpositions)
446 return combine_every_nth(untransposed_columns)
447
448
449 def vigenere_encipher(message, keyword):
450 """Vigenere encipher
451
452 >>> vigenere_encipher('hello', 'abc')
453 'hfnlp'
454 """
455 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
456 pairs = zip(message, cycle(shifts))
457 return ''.join([caesar_encipher_letter(l, k) for l, k in pairs])
458
459 def vigenere_decipher(message, keyword):
460 """Vigenere decipher
461
462 >>> vigenere_decipher('hfnlp', 'abc')
463 'hello'
464 """
465 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
466 pairs = zip(message, cycle(shifts))
467 return ''.join([caesar_decipher_letter(l, k) for l, k in pairs])
468
469 beaufort_encipher=vigenere_decipher
470 beaufort_decipher=vigenere_encipher
471
472
473 if __name__ == "__main__":
474 import doctest
475 doctest.testmod()