Moving to a different machine
[cipher-tools.git] / cipher.py
1 import string
2 import collections
3 import logging
4 from itertools import zip_longest, cycle, chain
5 from counts import *
6
7
8 logger = logging.getLogger(__name__)
9 logger.addHandler(logging.FileHandler('cipher.log'))
10 logger.setLevel(logging.WARNING)
11 #logger.setLevel(logging.INFO)
12 #logger.setLevel(logging.DEBUG)
13
14
15 english_counts = collections.defaultdict(int)
16 with open('count_1l.txt', 'r') as f:
17 for line in f:
18 (letter, count) = line.split("\t")
19 english_counts[letter] = int(count)
20 normalised_english_counts = norms.normalise(english_counts)
21
22 choices, weights = zip(*weighted_choices)
23 cumdist = list(itertools.accumulate(weights))
24 x = random.random() * cumdist[-1]
25 choices[bisect.bisect(cumdist, x)]
26
27
28 modular_division_table = [[0]*26 for x in range(26)]
29 for a in range(26):
30 for b in range(26):
31 c = (a * b) % 26
32 modular_division_table[b][c] = a
33
34 def letters(text):
35 """Remove all non-alphabetic characters from a text
36 >>> letters('The Quick')
37 'TheQuick'
38 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
39 'TheQuickBROWNfoxjumpedoverthelazyDOG'
40 """
41 return ''.join([c for c in text if c in string.ascii_letters])
42
43 def sanitise(text):
44 """Remove all non-alphabetic characters and convert the text to lowercase
45
46 >>> sanitise('The Quick')
47 'thequick'
48 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
49 'thequickbrownfoxjumpedoverthelazydog'
50 """
51 # sanitised = [c.lower() for c in text if c in string.ascii_letters]
52 # return ''.join(sanitised)
53 return letters(text).lower()
54
55 def ngrams(text, n):
56 """Returns all n-grams of a text
57
58 >>> ngrams(sanitise('the quick brown fox'), 2) # doctest: +NORMALIZE_WHITESPACE
59 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn',
60 'nf', 'fo', 'ox']
61 >>> ngrams(sanitise('the quick brown fox'), 4) # doctest: +NORMALIZE_WHITESPACE
62 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow',
63 'rown', 'ownf', 'wnfo', 'nfox']
64 """
65 return [text[i:i+n] for i in range(len(text)-n+1)]
66
67 def every_nth(text, n, fillvalue=''):
68 """Returns n strings, each of which consists of every nth character,
69 starting with the 0th, 1st, 2nd, ... (n-1)th character
70
71 >>> every_nth(string.ascii_lowercase, 5)
72 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
73 >>> every_nth(string.ascii_lowercase, 1)
74 ['abcdefghijklmnopqrstuvwxyz']
75 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
76 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
77 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
78 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
79 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
80 """
81 split_text = [text[i:i+n] for i in range(0, len(text), n)]
82 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
83
84 def combine_every_nth(split_text):
85 """Reforms a text split into every_nth strings
86
87 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
88 'abcdefghijklmnopqrstuvwxyz'
89 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
90 'abcdefghijklmnopqrstuvwxyz'
91 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
92 'abcdefghijklmnopqrstuvwxyz'
93 """
94 return ''.join([''.join(l)
95 for l in zip_longest(*split_text, fillvalue='')])
96
97 def chunks(text, n, fillvalue=None):
98 """Split a text into chunks of n characters
99
100 >>> chunks('abcdefghi', 3)
101 ['abc', 'def', 'ghi']
102 >>> chunks('abcdefghi', 4)
103 ['abcd', 'efgh', 'i']
104 >>> chunks('abcdefghi', 4, fillvalue='!')
105 ['abcd', 'efgh', 'i!!!']
106 """
107 if fillvalue:
108 padding = fillvalue[0] * (n - len(text) % n)
109 else:
110 padding = ''
111 return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
112
113 def transpose(items, transposition):
114 """Moves items around according to the given transposition
115
116 >>> transpose(['a', 'b', 'c', 'd'], (0,1,2,3))
117 ['a', 'b', 'c', 'd']
118 >>> transpose(['a', 'b', 'c', 'd'], (3,1,2,0))
119 ['d', 'b', 'c', 'a']
120 >>> transpose([10,11,12,13,14,15], (3,2,4,1,5,0))
121 [13, 12, 14, 11, 15, 10]
122 """
123 transposed = [''] * len(transposition)
124 for p, t in enumerate(transposition):
125 transposed[p] = items[t]
126 return transposed
127
128 def untranspose(items, transposition):
129 """Undoes a transpose
130
131 >>> untranspose(['a', 'b', 'c', 'd'], [0,1,2,3])
132 ['a', 'b', 'c', 'd']
133 >>> untranspose(['d', 'b', 'c', 'a'], [3,1,2,0])
134 ['a', 'b', 'c', 'd']
135 >>> untranspose([13, 12, 14, 11, 15, 10], [3,2,4,1,5,0])
136 [10, 11, 12, 13, 14, 15]
137 """
138 transposed = [''] * len(transposition)
139 for p, t in enumerate(transposition):
140 transposed[t] = items[p]
141 return transposed
142
143 def deduplicate(text):
144 return list(collections.OrderedDict.fromkeys(text))
145
146
147 def caesar_encipher_letter(letter, shift):
148 """Encipher a letter, given a shift amount
149
150 >>> caesar_encipher_letter('a', 1)
151 'b'
152 >>> caesar_encipher_letter('a', 2)
153 'c'
154 >>> caesar_encipher_letter('b', 2)
155 'd'
156 >>> caesar_encipher_letter('x', 2)
157 'z'
158 >>> caesar_encipher_letter('y', 2)
159 'a'
160 >>> caesar_encipher_letter('z', 2)
161 'b'
162 >>> caesar_encipher_letter('z', -1)
163 'y'
164 >>> caesar_encipher_letter('a', -1)
165 'z'
166 """
167 if letter in string.ascii_letters:
168 if letter in string.ascii_uppercase:
169 alphabet_start = ord('A')
170 else:
171 alphabet_start = ord('a')
172 return chr(((ord(letter) - alphabet_start + shift) % 26) +
173 alphabet_start)
174 else:
175 return letter
176
177 def caesar_decipher_letter(letter, shift):
178 """Decipher a letter, given a shift amount
179
180 >>> caesar_decipher_letter('b', 1)
181 'a'
182 >>> caesar_decipher_letter('b', 2)
183 'z'
184 """
185 return caesar_encipher_letter(letter, -shift)
186
187 def caesar_encipher(message, shift):
188 """Encipher a message with the Caesar cipher of given shift
189
190 >>> caesar_encipher('abc', 1)
191 'bcd'
192 >>> caesar_encipher('abc', 2)
193 'cde'
194 >>> caesar_encipher('abcxyz', 2)
195 'cdezab'
196 >>> caesar_encipher('ab cx yz', 2)
197 'cd ez ab'
198 """
199 enciphered = [caesar_encipher_letter(l, shift) for l in message]
200 return ''.join(enciphered)
201
202 def caesar_decipher(message, shift):
203 """Encipher a message with the Caesar cipher of given shift
204
205 >>> caesar_decipher('bcd', 1)
206 'abc'
207 >>> caesar_decipher('cde', 2)
208 'abc'
209 >>> caesar_decipher('cd ez ab', 2)
210 'ab cx yz'
211 """
212 return caesar_encipher(message, -shift)
213
214 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
215 """Encipher a letter, given a multiplier and adder
216
217 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
218 for l in string.ascii_uppercase])
219 'HKNQTWZCFILORUXADGJMPSVYBE'
220 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
221 for l in string.ascii_uppercase])
222 'FILORUXADGJMPSVYBEHKNQTWZC'
223 """
224 if letter in string.ascii_letters:
225 if letter in string.ascii_uppercase:
226 alphabet_start = ord('A')
227 else:
228 alphabet_start = ord('a')
229 letter_number = ord(letter) - alphabet_start
230 if one_based: letter_number += 1
231 cipher_number = (letter_number * multiplier + adder) % 26
232 if one_based: cipher_number -= 1
233 return chr(cipher_number % 26 + alphabet_start)
234 else:
235 return letter
236
237 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
238 """Encipher a letter, given a multiplier and adder
239
240 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
241 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
242 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
243 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
244 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
245 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
246 """
247 if letter in string.ascii_letters:
248 if letter in string.ascii_uppercase:
249 alphabet_start = ord('A')
250 else:
251 alphabet_start = ord('a')
252 cipher_number = ord(letter) - alphabet_start
253 if one_based: cipher_number += 1
254 plaintext_number = (
255 modular_division_table[multiplier]
256 [(cipher_number - adder) % 26] )
257 if one_based: plaintext_number -= 1
258 return chr(plaintext_number % 26 + alphabet_start)
259 else:
260 return letter
261
262 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
263 """Encipher a message
264
265 >>> affine_encipher('hours passed during which jerico tried every ' \
266 'trick he could think of', 15, 22, True)
267 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
268 """
269 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
270 for l in message]
271 return ''.join(enciphered)
272
273 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
274 """Decipher a message
275
276 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
277 'jfaoe ls omytd jlaxe mh', 15, 22, True)
278 'hours passed during which jerico tried every trick he could think of'
279 """
280 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
281 for l in message]
282 return ''.join(enciphered)
283
284
285 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
286 """Find the cipher alphabet given a keyword.
287 wrap_alphabet controls how the rest of the alphabet is added
288 after the keyword.
289 0 : from 'a'
290 1 : from the last letter in the sanitised keyword
291 2 : from the largest letter in the sanitised keyword
292
293 >>> keyword_cipher_alphabet_of('bayes')
294 'bayescdfghijklmnopqrtuvwxz'
295 >>> keyword_cipher_alphabet_of('bayes', 0)
296 'bayescdfghijklmnopqrtuvwxz'
297 >>> keyword_cipher_alphabet_of('bayes', 1)
298 'bayestuvwxzcdfghijklmnopqr'
299 >>> keyword_cipher_alphabet_of('bayes', 2)
300 'bayeszcdfghijklmnopqrtuvwx'
301 """
302 if wrap_alphabet == 0:
303 cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
304 string.ascii_lowercase))
305 else:
306 if wrap_alphabet == 1:
307 last_keyword_letter = deduplicate(sanitise(keyword))[-1]
308 else:
309 last_keyword_letter = sorted(sanitise(keyword))[-1]
310 last_keyword_position = string.ascii_lowercase.find(
311 last_keyword_letter) + 1
312 cipher_alphabet = ''.join(
313 deduplicate(sanitise(keyword) +
314 string.ascii_lowercase[last_keyword_position:] +
315 string.ascii_lowercase))
316 return cipher_alphabet
317
318
319 def keyword_encipher(message, keyword, wrap_alphabet=0):
320 """Enciphers a message with a keyword substitution cipher.
321 wrap_alphabet controls how the rest of the alphabet is added
322 after the keyword.
323 0 : from 'a'
324 1 : from the last letter in the sanitised keyword
325 2 : from the largest letter in the sanitised keyword
326
327 >>> keyword_encipher('test message', 'bayes')
328 'rsqr ksqqbds'
329 >>> keyword_encipher('test message', 'bayes', 0)
330 'rsqr ksqqbds'
331 >>> keyword_encipher('test message', 'bayes', 1)
332 'lskl dskkbus'
333 >>> keyword_encipher('test message', 'bayes', 2)
334 'qspq jsppbcs'
335 """
336 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
337 cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
338 return message.lower().translate(cipher_translation)
339
340 def keyword_decipher(message, keyword, wrap_alphabet=0):
341 """Deciphers a message with a keyword substitution cipher.
342 wrap_alphabet controls how the rest of the alphabet is added
343 after the keyword.
344 0 : from 'a'
345 1 : from the last letter in the sanitised keyword
346 2 : from the largest letter in the sanitised keyword
347
348 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
349 'test message'
350 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
351 'test message'
352 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
353 'test message'
354 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
355 'test message'
356 """
357 cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
358 cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
359 return message.lower().translate(cipher_translation)
360
361 def scytale_encipher(message, rows):
362 """Enciphers using the scytale transposition cipher.
363 Message is padded with spaces to allow all rows to be the same length.
364
365 >>> scytale_encipher('thequickbrownfox', 3)
366 'tcnhkfeboqrxuo iw '
367 >>> scytale_encipher('thequickbrownfox', 4)
368 'tubnhirfecooqkwx'
369 >>> scytale_encipher('thequickbrownfox', 5)
370 'tubn hirf ecoo qkwx '
371 >>> scytale_encipher('thequickbrownfox', 6)
372 'tqcrnxhukof eibwo '
373 >>> scytale_encipher('thequickbrownfox', 7)
374 'tqcrnx hukof eibwo '
375 """
376 if len(message) % rows != 0:
377 message += ' '*(rows - len(message) % rows)
378 row_length = round(len(message) / rows)
379 slices = [message[i:i+row_length]
380 for i in range(0, len(message), row_length)]
381 return ''.join([''.join(r) for r in zip_longest(*slices, fillvalue='')])
382
383 def scytale_decipher(message, rows):
384 """Deciphers using the scytale transposition cipher.
385 Assumes the message is padded so that all rows are the same length.
386
387 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
388 'thequickbrownfox '
389 >>> scytale_decipher('tubnhirfecooqkwx', 4)
390 'thequickbrownfox'
391 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
392 'thequickbrownfox '
393 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
394 'thequickbrownfox '
395 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
396 'thequickbrownfox '
397 """
398 cols = round(len(message) / rows)
399 columns = [message[i:i+rows] for i in range(0, cols * rows, rows)]
400 return ''.join([''.join(c) for c in zip_longest(*columns, fillvalue='')])
401
402
403 def transpositions_of(keyword):
404 """Finds the transpostions given by a keyword. For instance, the keyword
405 'clever' rearranges to 'celrv', so the first column (0) stays first, the
406 second column (1) moves to third, the third column (2) moves to second,
407 and so on.
408
409 If passed a tuple, assume it's already a transposition and just return it.
410
411 >>> transpositions_of('clever')
412 (0, 2, 1, 4, 3)
413 >>> transpositions_of('fred')
414 (3, 2, 0, 1)
415 >>> transpositions_of((3, 2, 0, 1))
416 (3, 2, 0, 1)
417 """
418 if isinstance(keyword, tuple):
419 return keyword
420 else:
421 key = deduplicate(keyword)
422 transpositions = tuple(key.index(l) for l in sorted(key))
423 return transpositions
424
425 def pad(message_len, group_len, fillvalue):
426 padding_length = group_len - message_len % group_len
427 if padding_length == group_len: padding_length = 0
428 padding = ''
429 for i in range(padding_length):
430 if callable(fillvalue):
431 padding += fillvalue()
432 else:
433 padding += fillvalue
434 return padding
435
436 def column_transposition_encipher(message, keyword, fillvalue=' ',
437 fillcolumnwise=False,
438 emptycolumnwise=False):
439 """Enciphers using the column transposition cipher.
440 Message is padded to allow all rows to be the same length.
441
442 >>> column_transposition_encipher('hellothere', 'clever')
443 'hleolteher'
444 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
445 'hleolthre!e!'
446 >>> column_transposition_encipher('hellothere', 'clever', columnwise=True)
447 'htleehoelr'
448 """
449 transpositions = transpositions_of(keyword)
450 message += pad(len(message), len(transpositions), fillvalue)
451 if fillcolumnwise:
452 rows = every_nth(message, len(transpositions))
453 else:
454 rows = chunks(mesage, len(transpositions))
455 columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
456 transposed = [transpose(r, transpositions) for r in rows]
457 if emptycolumnwise:
458 return combine_every_nth(transposed)
459 else:
460 return ''.join(chain(*transposed))
461
462 def column_transposition_decipher(message, keyword, fillvalue=' ',
463 columnwise=False):
464 """Deciphers using the column transposition cipher.
465 Message is padded to allow all rows to be the same length.
466
467 >>> column_transposition_decipher('hleolteher', 'clever')
468 'hellothere'
469 >>> column_transposition_decipher('hleolthre!e!', 'cleverly', fillvalue='?')
470 'hellothere!!'
471 >>> column_transposition_decipher('htleehoelr', 'clever', columnwise=True)
472 'hellothere'
473 """
474 transpositions = transpositions_of(keyword)
475 if columnwise:
476 columns = chunks(message, int(len(message) / len(transpositions)))
477 else:
478 columns = every_nth(message, len(transpositions), fillvalue=fillvalue)
479 untransposed_columns = untranspose(columns, transpositions)
480 return combine_every_nth(untransposed_columns)
481
482
483 def vigenere_encipher(message, keyword):
484 """Vigenere encipher
485
486 >>> vigenere_encipher('hello', 'abc')
487 'hfnlp'
488 """
489 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
490 pairs = zip(message, cycle(shifts))
491 return ''.join([caesar_encipher_letter(l, k) for l, k in pairs])
492
493 def vigenere_decipher(message, keyword):
494 """Vigenere decipher
495
496 >>> vigenere_decipher('hfnlp', 'abc')
497 'hello'
498 """
499 shifts = [ord(l) - ord('a') for l in sanitise(keyword)]
500 pairs = zip(message, cycle(shifts))
501 return ''.join([caesar_decipher_letter(l, k) for l, k in pairs])
502
503 beaufort_encipher=vigenere_decipher
504 beaufort_decipher=vigenere_encipher
505
506
507 if __name__ == "__main__":
508 import doctest
509 doctest.testmod()