a141ff2c1f8fcc263a43884f307bfdb3f8003d8a
[cipher-tools.git] / cipher / column_transposition.py
1 from utilities import *
2 from language_models import *
3 import multiprocessing
4 from itertools import chain
5
6 from logger import logger
7
8 def transpositions_of(keyword):
9 """Finds the transpostions given by a keyword. For instance, the keyword
10 'clever' rearranges to 'celrv', so the first column (0) stays first, the
11 second column (1) moves to third, the third column (2) moves to second,
12 and so on.
13
14 If passed a tuple, assume it's already a transposition and just return it.
15
16 >>> transpositions_of('clever')
17 (0, 2, 1, 4, 3)
18 >>> transpositions_of('fred')
19 (3, 2, 0, 1)
20 >>> transpositions_of((3, 2, 0, 1))
21 (3, 2, 0, 1)
22 """
23 if isinstance(keyword, tuple):
24 return keyword
25 else:
26 key = deduplicate(keyword)
27 transpositions = tuple(key.index(l) for l in sorted(key))
28 return transpositions
29
30
31 transpositions = collections.defaultdict(list)
32 for word in keywords:
33 transpositions[transpositions_of(word)] += [word]
34
35
36 def pad(message_len, group_len, fillvalue):
37 padding_length = group_len - message_len % group_len
38 if padding_length == group_len: padding_length = 0
39 padding = ''
40 for i in range(padding_length):
41 if callable(fillvalue):
42 padding += fillvalue()
43 else:
44 padding += fillvalue
45 return padding
46
47 def column_transposition_encipher(message, keyword, fillvalue=' ',
48 fillcolumnwise=False,
49 emptycolumnwise=False):
50 """Enciphers using the column transposition cipher.
51 Message is padded to allow all rows to be the same length.
52
53 >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True)
54 'hlohr eltee '
55 >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True)
56 'hellothere '
57 >>> column_transposition_encipher('hellothere', 'abcdef')
58 'hellothere '
59 >>> column_transposition_encipher('hellothere', 'abcde')
60 'hellothere'
61 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
62 'hellothere'
63 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
64 'hlohreltee'
65 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
66 'htehlelroe'
67 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
68 'hellothere'
69 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True)
70 'heotllrehe'
71 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False)
72 'holrhetlee'
73 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True)
74 'htleehoelr'
75 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False)
76 'hleolteher'
77 >>> column_transposition_encipher('hellothere', 'cleverly')
78 'hleolthre e '
79 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
80 'hleolthre!e!'
81 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*')
82 'hleolthre*e*'
83 """
84 transpositions = transpositions_of(keyword)
85 message += pad(len(message), len(transpositions), fillvalue)
86 if fillcolumnwise:
87 rows = every_nth(message, len(message) // len(transpositions))
88 else:
89 rows = chunks(message, len(transpositions))
90 transposed = [transpose(r, transpositions) for r in rows]
91 if emptycolumnwise:
92 return combine_every_nth(transposed)
93 else:
94 return cat(chain(*transposed))
95
96 def column_transposition_decipher(message, keyword, fillvalue=' ',
97 fillcolumnwise=False,
98 emptycolumnwise=False):
99 """Deciphers using the column transposition cipher.
100 Message is padded to allow all rows to be the same length.
101
102 >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
103 'hellothere'
104 >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
105 'hellothere'
106 >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
107 'hellothere'
108 >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
109 'hellothere'
110 >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True)
111 'hellothere'
112 >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False)
113 'hellothere'
114 >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True)
115 'hellothere'
116 >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False)
117 'hellothere'
118 """
119 transpositions = transpositions_of(keyword)
120 message += pad(len(message), len(transpositions), fillvalue)
121 if emptycolumnwise:
122 rows = every_nth(message, len(message) // len(transpositions))
123 else:
124 rows = chunks(message, len(transpositions))
125 untransposed = [untranspose(r, transpositions) for r in rows]
126 if fillcolumnwise:
127 return combine_every_nth(untransposed)
128 else:
129 return cat(chain(*untransposed))
130
131 def scytale_encipher(message, rows, fillvalue=' '):
132 """Enciphers using the scytale transposition cipher.
133 Message is padded with spaces to allow all rows to be the same length.
134
135 >>> scytale_encipher('thequickbrownfox', 3)
136 'tcnhkfeboqrxuo iw '
137 >>> scytale_encipher('thequickbrownfox', 4)
138 'tubnhirfecooqkwx'
139 >>> scytale_encipher('thequickbrownfox', 5)
140 'tubn hirf ecoo qkwx '
141 >>> scytale_encipher('thequickbrownfox', 6)
142 'tqcrnxhukof eibwo '
143 >>> scytale_encipher('thequickbrownfox', 7)
144 'tqcrnx hukof eibwo '
145 """
146 # transpositions = [i for i in range(math.ceil(len(message) / rows))]
147 # return column_transposition_encipher(message, transpositions,
148 # fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True)
149 transpositions = [i for i in range(rows)]
150 return column_transposition_encipher(message, transpositions,
151 fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False)
152
153 def scytale_decipher(message, rows):
154 """Deciphers using the scytale transposition cipher.
155 Assumes the message is padded so that all rows are the same length.
156
157 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
158 'thequickbrownfox '
159 >>> scytale_decipher('tubnhirfecooqkwx', 4)
160 'thequickbrownfox'
161 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
162 'thequickbrownfox '
163 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
164 'thequickbrownfox '
165 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
166 'thequickbrownfox '
167 """
168 # transpositions = [i for i in range(math.ceil(len(message) / rows))]
169 # return column_transposition_decipher(message, transpositions,
170 # fillcolumnwise=False, emptycolumnwise=True)
171 transpositions = [i for i in range(rows)]
172 return column_transposition_decipher(message, transpositions,
173 fillcolumnwise=True, emptycolumnwise=False)
174
175
176 def column_transposition_break_mp(message, translist=transpositions,
177 fitness=Pbigrams, chunksize=500):
178 """Breaks a column transposition cipher using a dictionary and
179 n-gram frequency analysis
180
181 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
182 "It is a truth universally acknowledged, that a single man in \
183 possession of a good fortune, must be in want of a wife. However \
184 little known the feelings or views of such a man may be on his \
185 first entering a neighbourhood, this truth is so well fixed in \
186 the minds of the surrounding families, that he is considered the \
187 rightful property of some one or other of their daughters."), \
188 'encipher'), \
189 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
190 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
191 (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
192 (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...)
193 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
194 "It is a truth universally acknowledged, that a single man in \
195 possession of a good fortune, must be in want of a wife. However \
196 little known the feelings or views of such a man may be on his \
197 first entering a neighbourhood, this truth is so well fixed in \
198 the minds of the surrounding families, that he is considered the \
199 rightful property of some one or other of their daughters."), \
200 'encipher'), \
201 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
202 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
203 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
204 fitness=Ptrigrams) # doctest: +ELLIPSIS
205 (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...)
206 """
207 with multiprocessing.Pool() as pool:
208 helper_args = [(message, trans, fillcolumnwise, emptycolumnwise,
209 fitness)
210 for trans in translist
211 for fillcolumnwise in [True, False]
212 for emptycolumnwise in [True, False]]
213 # Gotcha: the helper function here needs to be defined at the top level
214 # (limitation of Pool.starmap)
215 breaks = pool.starmap(column_transposition_break_worker,
216 helper_args, chunksize)
217 return max(breaks, key=lambda k: k[1])
218 column_transposition_break = column_transposition_break_mp
219
220 def column_transposition_break_worker(message, transposition,
221 fillcolumnwise, emptycolumnwise, fitness):
222 plaintext = column_transposition_decipher(message, transposition,
223 fillcolumnwise=fillcolumnwise, emptycolumnwise=emptycolumnwise)
224 fit = fitness(sanitise(plaintext))
225 logger.debug('Column transposition break attempt using key {0} '
226 'gives fit of {1} and decrypt starting: {2}'.format(
227 transposition, fit,
228 sanitise(plaintext)[:50]))
229 return (transposition, fillcolumnwise, emptycolumnwise), fit
230
231
232 def scytale_break_mp(message, max_key_length=20,
233 fitness=Pbigrams, chunksize=500):
234 """Breaks a scytale cipher using a range of lengths and
235 n-gram frequency analysis
236
237 >>> scytale_break_mp(scytale_encipher(sanitise( \
238 "It is a truth universally acknowledged, that a single man in \
239 possession of a good fortune, must be in want of a wife. However \
240 little known the feelings or views of such a man may be on his \
241 first entering a neighbourhood, this truth is so well fixed in \
242 the minds of the surrounding families, that he is considered the \
243 rightful property of some one or other of their daughters."), \
244 5)) # doctest: +ELLIPSIS
245 (5, -709.4646722...)
246 >>> scytale_break_mp(scytale_encipher(sanitise( \
247 "It is a truth universally acknowledged, that a single man in \
248 possession of a good fortune, must be in want of a wife. However \
249 little known the feelings or views of such a man may be on his \
250 first entering a neighbourhood, this truth is so well fixed in \
251 the minds of the surrounding families, that he is considered the \
252 rightful property of some one or other of their daughters."), \
253 5), \
254 fitness=Ptrigrams) # doctest: +ELLIPSIS
255 (5, -997.0129085...)
256 """
257 with Pool() as pool:
258 helper_args = [(message, trans, False, True, fitness)
259 for trans in
260 [[col for col in range(math.ceil(len(message)/rows))]
261 for rows in range(1,max_key_length+1)]]
262 # Gotcha: the helper function here needs to be defined at the top level
263 # (limitation of Pool.starmap)
264 breaks = pool.starmap(column_transposition_break_worker,
265 helper_args, chunksize)
266 best = max(breaks, key=lambda k: k[1])
267 return math.trunc(len(message) / len(best[0][0])), best[1]
268 scytale_break = scytale_break_mp
269
270 if __name__ == "__main__":
271 import doctest