d81903e9bf3cf7f8b93bb7a1e6a5caaad69272e7
[szyfrow.git] / column_transposition.py
1 import math
2 import multiprocessing
3 from itertools import chain
4 from szyfrow.support.utilities import *
5 from szyfrow.support.language_models import *
6
7 def transpositions_of(keyword):
8 """Finds the transpostions given by a keyword. For instance, the keyword
9 'clever' rearranges to 'celrv', so the first column (0) stays first, the
10 second column (1) moves to third, the third column (2) moves to second,
11 and so on.
12
13 If passed a tuple, assume it's already a transposition and just return it.
14
15 >>> transpositions_of('clever')
16 (0, 2, 1, 4, 3)
17 >>> transpositions_of('fred')
18 (3, 2, 0, 1)
19 >>> transpositions_of((3, 2, 0, 1))
20 (3, 2, 0, 1)
21 """
22 if isinstance(keyword, tuple):
23 return keyword
24 else:
25 key = deduplicate(keyword)
26 transpositions = tuple(key.index(l) for l in sorted(key))
27 return transpositions
28
29
30 transpositions = collections.defaultdict(list)
31 for word in keywords:
32 transpositions[transpositions_of(word)] += [word]
33
34
35 def pad(message_len, group_len, fillvalue):
36 padding_length = group_len - message_len % group_len
37 if padding_length == group_len: padding_length = 0
38 padding = ''
39 for i in range(padding_length):
40 if callable(fillvalue):
41 padding += fillvalue()
42 else:
43 padding += fillvalue
44 return padding
45
46 def column_transposition_encipher(message, keyword, fillvalue=' ',
47 fillcolumnwise=False,
48 emptycolumnwise=False):
49 """Enciphers using the column transposition cipher.
50 Message is padded to allow all rows to be the same length.
51
52 >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True)
53 'hlohr eltee '
54 >>> column_transposition_encipher('hellothere', 'abcdef', fillcolumnwise=True, emptycolumnwise=True)
55 'hellothere '
56 >>> column_transposition_encipher('hellothere', 'abcdef')
57 'hellothere '
58 >>> column_transposition_encipher('hellothere', 'abcde')
59 'hellothere'
60 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
61 'hellothere'
62 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
63 'hlohreltee'
64 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
65 'htehlelroe'
66 >>> column_transposition_encipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
67 'hellothere'
68 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=True)
69 'heotllrehe'
70 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=True, emptycolumnwise=False)
71 'holrhetlee'
72 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=True)
73 'htleehoelr'
74 >>> column_transposition_encipher('hellothere', 'clever', fillcolumnwise=False, emptycolumnwise=False)
75 'hleolteher'
76 >>> column_transposition_encipher('hellothere', 'cleverly')
77 'hleolthre e '
78 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue='!')
79 'hleolthre!e!'
80 >>> column_transposition_encipher('hellothere', 'cleverly', fillvalue=lambda: '*')
81 'hleolthre*e*'
82 """
83 transpositions = transpositions_of(keyword)
84 message += pad(len(message), len(transpositions), fillvalue)
85 if fillcolumnwise:
86 rows = every_nth(message, len(message) // len(transpositions))
87 else:
88 rows = chunks(message, len(transpositions))
89 transposed = [transpose(r, transpositions) for r in rows]
90 if emptycolumnwise:
91 return combine_every_nth(transposed)
92 else:
93 return cat(chain(*transposed))
94
95 def column_transposition_decipher(message, keyword, fillvalue=' ',
96 fillcolumnwise=False,
97 emptycolumnwise=False):
98 """Deciphers using the column transposition cipher.
99 Message is padded to allow all rows to be the same length.
100
101 >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=True, emptycolumnwise=True)
102 'hellothere'
103 >>> column_transposition_decipher('hlohreltee', 'abcde', fillcolumnwise=True, emptycolumnwise=False)
104 'hellothere'
105 >>> column_transposition_decipher('htehlelroe', 'abcde', fillcolumnwise=False, emptycolumnwise=True)
106 'hellothere'
107 >>> column_transposition_decipher('hellothere', 'abcde', fillcolumnwise=False, emptycolumnwise=False)
108 'hellothere'
109 >>> column_transposition_decipher('heotllrehe', 'clever', fillcolumnwise=True, emptycolumnwise=True)
110 'hellothere'
111 >>> column_transposition_decipher('holrhetlee', 'clever', fillcolumnwise=True, emptycolumnwise=False)
112 'hellothere'
113 >>> column_transposition_decipher('htleehoelr', 'clever', fillcolumnwise=False, emptycolumnwise=True)
114 'hellothere'
115 >>> column_transposition_decipher('hleolteher', 'clever', fillcolumnwise=False, emptycolumnwise=False)
116 'hellothere'
117 """
118 transpositions = transpositions_of(keyword)
119 message += pad(len(message), len(transpositions), fillvalue)
120 if emptycolumnwise:
121 rows = every_nth(message, len(message) // len(transpositions))
122 else:
123 rows = chunks(message, len(transpositions))
124 untransposed = [untranspose(r, transpositions) for r in rows]
125 if fillcolumnwise:
126 return combine_every_nth(untransposed)
127 else:
128 return cat(chain(*untransposed))
129
130 def scytale_encipher(message, rows, fillvalue=' '):
131 """Enciphers using the scytale transposition cipher.
132 Message is padded with spaces to allow all rows to be the same length.
133
134 >>> scytale_encipher('thequickbrownfox', 3)
135 'tcnhkfeboqrxuo iw '
136 >>> scytale_encipher('thequickbrownfox', 4)
137 'tubnhirfecooqkwx'
138 >>> scytale_encipher('thequickbrownfox', 5)
139 'tubn hirf ecoo qkwx '
140 >>> scytale_encipher('thequickbrownfox', 6)
141 'tqcrnxhukof eibwo '
142 >>> scytale_encipher('thequickbrownfox', 7)
143 'tqcrnx hukof eibwo '
144 """
145 # transpositions = [i for i in range(math.ceil(len(message) / rows))]
146 # return column_transposition_encipher(message, transpositions,
147 # fillvalue=fillvalue, fillcolumnwise=False, emptycolumnwise=True)
148 transpositions = [i for i in range(rows)]
149 return column_transposition_encipher(message, transpositions,
150 fillvalue=fillvalue, fillcolumnwise=True, emptycolumnwise=False)
151
152 def scytale_decipher(message, rows):
153 """Deciphers using the scytale transposition cipher.
154 Assumes the message is padded so that all rows are the same length.
155
156 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
157 'thequickbrownfox '
158 >>> scytale_decipher('tubnhirfecooqkwx', 4)
159 'thequickbrownfox'
160 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
161 'thequickbrownfox '
162 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
163 'thequickbrownfox '
164 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
165 'thequickbrownfox '
166 """
167 # transpositions = [i for i in range(math.ceil(len(message) / rows))]
168 # return column_transposition_decipher(message, transpositions,
169 # fillcolumnwise=False, emptycolumnwise=True)
170 transpositions = [i for i in range(rows)]
171 return column_transposition_decipher(message, transpositions,
172 fillcolumnwise=True, emptycolumnwise=False)
173
174
175 def column_transposition_break_mp(message, translist=transpositions,
176 fitness=Pbigrams, chunksize=500):
177 """Breaks a column transposition cipher using a dictionary and
178 n-gram frequency analysis
179
180 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
181 "It is a truth universally acknowledged, that a single man in \
182 possession of a good fortune, must be in want of a wife. However \
183 little known the feelings or views of such a man may be on his \
184 first entering a neighbourhood, this truth is so well fixed in \
185 the minds of the surrounding families, that he is considered the \
186 rightful property of some one or other of their daughters."), \
187 'encipher'), \
188 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
189 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
190 (6, 1, 0, 4, 5, 3, 2): ['keyword']}) # doctest: +ELLIPSIS
191 (((2, 0, 5, 3, 1, 4, 6), False, False), -709.4646722...)
192 >>> column_transposition_break_mp(column_transposition_encipher(sanitise( \
193 "It is a truth universally acknowledged, that a single man in \
194 possession of a good fortune, must be in want of a wife. However \
195 little known the feelings or views of such a man may be on his \
196 first entering a neighbourhood, this truth is so well fixed in \
197 the minds of the surrounding families, that he is considered the \
198 rightful property of some one or other of their daughters."), \
199 'encipher'), \
200 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
201 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
202 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
203 fitness=Ptrigrams) # doctest: +ELLIPSIS
204 (((2, 0, 5, 3, 1, 4, 6), False, False), -997.0129085...)
205 """
206 with multiprocessing.Pool() as pool:
207 helper_args = [(message, trans, fillcolumnwise, emptycolumnwise,
208 fitness)
209 for trans in translist
210 for fillcolumnwise in [True, False]
211 for emptycolumnwise in [True, False]]
212 # Gotcha: the helper function here needs to be defined at the top level
213 # (limitation of Pool.starmap)
214 breaks = pool.starmap(column_transposition_break_worker,
215 helper_args, chunksize)
216 return max(breaks, key=lambda k: k[1])
217 column_transposition_break = column_transposition_break_mp
218
219 def column_transposition_break_worker(message, transposition,
220 fillcolumnwise, emptycolumnwise, fitness):
221 plaintext = column_transposition_decipher(message, transposition,
222 fillcolumnwise=fillcolumnwise, emptycolumnwise=emptycolumnwise)
223 fit = fitness(sanitise(plaintext))
224 return (transposition, fillcolumnwise, emptycolumnwise), fit
225
226
227 def scytale_break_mp(message, max_key_length=20,
228 fitness=Pbigrams, chunksize=500):
229 """Breaks a scytale cipher using a range of lengths and
230 n-gram frequency analysis
231
232 >>> scytale_break_mp(scytale_encipher(sanitise( \
233 "It is a truth universally acknowledged, that a single man in \
234 possession of a good fortune, must be in want of a wife. However \
235 little known the feelings or views of such a man may be on his \
236 first entering a neighbourhood, this truth is so well fixed in \
237 the minds of the surrounding families, that he is considered the \
238 rightful property of some one or other of their daughters."), \
239 5)) # doctest: +ELLIPSIS
240 (5, -709.4646722...)
241 >>> scytale_break_mp(scytale_encipher(sanitise( \
242 "It is a truth universally acknowledged, that a single man in \
243 possession of a good fortune, must be in want of a wife. However \
244 little known the feelings or views of such a man may be on his \
245 first entering a neighbourhood, this truth is so well fixed in \
246 the minds of the surrounding families, that he is considered the \
247 rightful property of some one or other of their daughters."), \
248 5), \
249 fitness=Ptrigrams) # doctest: +ELLIPSIS
250 (5, -997.0129085...)
251 """
252 with multiprocessing.Pool() as pool:
253 helper_args = [(message, trans, False, True, fitness)
254 for trans in
255 [[col for col in range(math.ceil(len(message)/rows))]
256 for rows in range(1,max_key_length+1)]]
257 # Gotcha: the helper function here needs to be defined at the top level
258 # (limitation of Pool.starmap)
259 breaks = pool.starmap(column_transposition_break_worker,
260 helper_args, chunksize)
261 best = max(breaks, key=lambda k: k[1])
262 return math.trunc(len(message) / len(best[0][0])), best[1]
263 scytale_break = scytale_break_mp
264
265 if __name__ == "__main__":
266 import doctest