Word segmentation
[cipher-training.git] / cipher.py
1 """A set of ciphers with implementations for both enciphering and deciphering
2 them. See cipherbreak for automatic breaking of these ciphers
3 """
4
5 import string
6 import collections
7 from enum import Enum
8 from itertools import zip_longest, cycle, chain
9 from language_models import unaccent, sanitise
10
11
12 modular_division_table = [[0]*26 for _ in range(26)]
13 for a in range(26):
14 for b in range(26):
15 c = (a * b) % 26
16 modular_division_table[b][c] = a
17
18
19 def every_nth(text, n, fillvalue=''):
20 """Returns n strings, each of which consists of every nth character,
21 starting with the 0th, 1st, 2nd, ... (n-1)th character
22
23 >>> every_nth(string.ascii_lowercase, 5)
24 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
25 >>> every_nth(string.ascii_lowercase, 1)
26 ['abcdefghijklmnopqrstuvwxyz']
27 >>> every_nth(string.ascii_lowercase, 26) # doctest: +NORMALIZE_WHITESPACE
28 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
29 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
30 >>> every_nth(string.ascii_lowercase, 5, fillvalue='!')
31 ['afkpuz', 'bglqv!', 'chmrw!', 'dinsx!', 'ejoty!']
32 """
33 split_text = chunks(text, n, fillvalue)
34 return [''.join(l) for l in zip_longest(*split_text, fillvalue=fillvalue)]
35
36 def combine_every_nth(split_text):
37 """Reforms a text split into every_nth strings
38
39 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
40 'abcdefghijklmnopqrstuvwxyz'
41 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
42 'abcdefghijklmnopqrstuvwxyz'
43 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
44 'abcdefghijklmnopqrstuvwxyz'
45 """
46 return ''.join([''.join(l)
47 for l in zip_longest(*split_text, fillvalue='')])
48
49 def chunks(text, n, fillvalue=None):
50 """Split a text into chunks of n characters
51
52 >>> chunks('abcdefghi', 3)
53 ['abc', 'def', 'ghi']
54 >>> chunks('abcdefghi', 4)
55 ['abcd', 'efgh', 'i']
56 >>> chunks('abcdefghi', 4, fillvalue='!')
57 ['abcd', 'efgh', 'i!!!']
58 """
59 if fillvalue:
60 padding = fillvalue[0] * (n - len(text) % n)
61 else:
62 padding = ''
63 return [(text+padding)[i:i+n] for i in range(0, len(text), n)]
64
65 def deduplicate(text):
66 """If a string contains duplicate letters, remove all but the first. Retain
67 the order of the letters.
68
69 >>> deduplicate('cat')
70 ['c', 'a', 't']
71 >>> deduplicate('happy')
72 ['h', 'a', 'p', 'y']
73 >>> deduplicate('cattca')
74 ['c', 'a', 't']
75 """
76 return list(collections.OrderedDict.fromkeys(text))
77
78
79 def caesar_encipher_letter(accented_letter, shift):
80 """Encipher a letter, given a shift amount
81
82 >>> caesar_encipher_letter('a', 1)
83 'b'
84 >>> caesar_encipher_letter('a', 2)
85 'c'
86 >>> caesar_encipher_letter('b', 2)
87 'd'
88 >>> caesar_encipher_letter('x', 2)
89 'z'
90 >>> caesar_encipher_letter('y', 2)
91 'a'
92 >>> caesar_encipher_letter('z', 2)
93 'b'
94 >>> caesar_encipher_letter('z', -1)
95 'y'
96 >>> caesar_encipher_letter('a', -1)
97 'z'
98 >>> caesar_encipher_letter('A', 1)
99 'B'
100 >>> caesar_encipher_letter('é', 1)
101 'f'
102 """
103 letter = unaccent(accented_letter)
104 if letter in string.ascii_letters:
105 if letter in string.ascii_uppercase:
106 alphabet_start = ord('A')
107 else:
108 alphabet_start = ord('a')
109 return chr(((ord(letter) - alphabet_start + shift) % 26) +
110 alphabet_start)
111 else:
112 return letter
113
114 def caesar_decipher_letter(letter, shift):
115 """Decipher a letter, given a shift amount
116
117 >>> caesar_decipher_letter('b', 1)
118 'a'
119 >>> caesar_decipher_letter('b', 2)
120 'z'
121 """
122 return caesar_encipher_letter(letter, -shift)
123
124 def caesar_encipher(message, shift):
125 """Encipher a message with the Caesar cipher of given shift
126
127 >>> caesar_encipher('abc', 1)
128 'bcd'
129 >>> caesar_encipher('abc', 2)
130 'cde'
131 >>> caesar_encipher('abcxyz', 2)
132 'cdezab'
133 >>> caesar_encipher('ab cx yz', 2)
134 'cd ez ab'
135 >>> caesar_encipher('Héllo World!', 2)
136 'Jgnnq Yqtnf!'
137 """
138 enciphered = [caesar_encipher_letter(l, shift) for l in message]
139 return ''.join(enciphered)
140
141 def caesar_decipher(message, shift):
142 """Decipher a message with the Caesar cipher of given shift
143
144 >>> caesar_decipher('bcd', 1)
145 'abc'
146 >>> caesar_decipher('cde', 2)
147 'abc'
148 >>> caesar_decipher('cd ez ab', 2)
149 'ab cx yz'
150 >>> caesar_decipher('Jgnnq Yqtnf!', 2)
151 'Hello World!'
152 """
153 return caesar_encipher(message, -shift)
154
155 def affine_encipher_letter(accented_letter, multiplier=1, adder=0,
156 one_based=True):
157 """Encipher a letter, given a multiplier and adder
158 >>> ''.join([affine_encipher_letter(l, 3, 5, True) \
159 for l in string.ascii_uppercase])
160 'HKNQTWZCFILORUXADGJMPSVYBE'
161 >>> ''.join([affine_encipher_letter(l, 3, 5, False) \
162 for l in string.ascii_uppercase])
163 'FILORUXADGJMPSVYBEHKNQTWZC'
164 """
165 letter = unaccent(accented_letter)
166 if letter in string.ascii_letters:
167 if letter in string.ascii_uppercase:
168 alphabet_start = ord('A')
169 else:
170 alphabet_start = ord('a')
171 letter_number = ord(letter) - alphabet_start
172 if one_based: letter_number += 1
173 cipher_number = (letter_number * multiplier + adder) % 26
174 if one_based: cipher_number -= 1
175 return chr(cipher_number % 26 + alphabet_start)
176 else:
177 return letter
178
179 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
180 """Encipher a letter, given a multiplier and adder
181
182 >>> ''.join([affine_decipher_letter(l, 3, 5, True) \
183 for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
184 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
185 >>> ''.join([affine_decipher_letter(l, 3, 5, False) \
186 for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
187 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
188 """
189 if letter in string.ascii_letters:
190 if letter in string.ascii_uppercase:
191 alphabet_start = ord('A')
192 else:
193 alphabet_start = ord('a')
194 cipher_number = ord(letter) - alphabet_start
195 if one_based: cipher_number += 1
196 plaintext_number = (
197 modular_division_table[multiplier]
198 [(cipher_number - adder) % 26]
199 )
200 if one_based: plaintext_number -= 1
201 return chr(plaintext_number % 26 + alphabet_start)
202 else:
203 return letter
204
205 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
206 """Encipher a message
207
208 >>> affine_encipher('hours passed during which jerico tried every ' \
209 'trick he could think of', 15, 22, True)
210 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
211 """
212 enciphered = [affine_encipher_letter(l, multiplier, adder, one_based)
213 for l in message]
214 return ''.join(enciphered)
215
216 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
217 """Decipher a message
218
219 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg ' \
220 'jfaoe ls omytd jlaxe mh', 15, 22, True)
221 'hours passed during which jerico tried every trick he could think of'
222 """
223 enciphered = [affine_decipher_letter(l, multiplier, adder, one_based)
224 for l in message]
225 return ''.join(enciphered)
226
227
228 if __name__ == "__main__":
229 import doctest
230 doctest.testmod()