Minor documentation updates
[szyfrow.git] / szyfrow / amsco.py
1 """Enciphering and deciphering using the [Amsco cipher](http://ericbrandel.com/2016/10/09/the-amsco-cipher/).
2 Also attempts to break messages that use an Amsco cipher.
3
4 The Amsco cipher is a column transpositoin cipher. The plaintext is laid out,
5 row by row, into columns. However, different numbers of letters are laid out
6 in each cell, typically in a 1-2 pattern.
7
8 It's clearer with an example. Consider we're using the keyword "perceptive",
9 which turns into "perctiv". The text ""It is a truth universally
10 acknowledged, that a single man in, possession of a good fortune, must be in
11 want of a wife." is laid out in seven columns like this:
12
13 p e r c t i v
14 --------------------
15 i ti s at r ut h
16 un i ve r sa l ly
17 a ck n ow l ed g
18 ed t ha t as i ng
19 l em a ni n po s
20 se s si o no f ag
21 o od f or t un e
22 mu s tb e in w an
23 t of a wi f e
24
25 The ciphertext is read out in columns, according to the order of the keyword.
26 In this example, the "c" column is read first, then the "e" column, and so on.
27 That gives the ciphertext of "atrowtnioorewi tiicktemsodsof utledipofunwe
28 iunaedlseomut svenhaasiftba rsalasnnotinf hlygngsagean".
29 """
30
31 from enum import Enum
32 import multiprocessing
33 import itertools
34
35 from szyfrow.support.utilities import *
36 from szyfrow.support.language_models import *
37
38 __pdoc__ = {}
39
40 AmscoSlice = collections.namedtuple('AmscoSlice', ['index', 'start', 'end'])
41 __pdoc__['AmscoSlice'] = """Where each piece of plainatext ends up in the AMSCO
42 transpositon cipher."""
43 __pdoc__['AmscoSlice.index'] = """Where the slice appears in the plaintext"""
44 __pdoc__['AmscoSlice.start'] = """Where the slice starts in the plaintext"""
45 __pdoc__['AmscoSlice.end'] = """Where the slice ends in the plaintext"""
46
47 class AmscoFillStyle(Enum):
48 """Different methods of filling the grid.
49 * `continuous`: continue the fillpattern unbroken by row boundaries
50 * `same_each_row`: each row has the same fillpattern
51 * `reverse_each_row`: each row has the reversed fillpattern to the row above
52 """
53 continuous = 1
54 same_each_row = 2
55 reverse_each_row = 3
56
57 def amsco_positions(message, keyword,
58 fillpattern=(1, 2),
59 fillstyle=AmscoFillStyle.continuous,
60 fillcolumnwise=False,
61 emptycolumnwise=True):
62 """Creates the grid for the AMSCO transposition cipher. Each element in the
63 grid shows the index of that slice and the start and end positions of the
64 plaintext that go to make it up.
65
66 >>> amsco_positions(string.ascii_lowercase, 'freddy', \
67 fillpattern=(1, 2)) # doctest: +NORMALIZE_WHITESPACE
68 [[AmscoSlice(index=3, start=4, end=6),
69 AmscoSlice(index=2, start=3, end=4),
70 AmscoSlice(index=0, start=0, end=1),
71 AmscoSlice(index=1, start=1, end=3),
72 AmscoSlice(index=4, start=6, end=7)],
73 [AmscoSlice(index=8, start=12, end=13),
74 AmscoSlice(index=7, start=10, end=12),
75 AmscoSlice(index=5, start=7, end=9),
76 AmscoSlice(index=6, start=9, end=10),
77 AmscoSlice(index=9, start=13, end=15)],
78 [AmscoSlice(index=13, start=19, end=21),
79 AmscoSlice(index=12, start=18, end=19),
80 AmscoSlice(index=10, start=15, end=16),
81 AmscoSlice(index=11, start=16, end=18),
82 AmscoSlice(index=14, start=21, end=22)],
83 [AmscoSlice(index=18, start=27, end=28),
84 AmscoSlice(index=17, start=25, end=27),
85 AmscoSlice(index=15, start=22, end=24),
86 AmscoSlice(index=16, start=24, end=25),
87 AmscoSlice(index=19, start=28, end=30)]]
88 """
89 transpositions = transpositions_of(keyword)
90 fill_iterator = itertools.cycle(fillpattern)
91 indices = itertools.count()
92 message_length = len(message)
93
94 current_position = 0
95 grid = []
96 current_fillpattern = fillpattern
97 while current_position < message_length:
98 row = []
99 if fillstyle == AmscoFillStyle.same_each_row:
100 fill_iterator = itertools.cycle(fillpattern)
101 if fillstyle == AmscoFillStyle.reverse_each_row:
102 fill_iterator = itertools.cycle(current_fillpattern)
103 for _ in range(len(transpositions)):
104 index = next(indices)
105 gap = next(fill_iterator)
106 row += [AmscoSlice(index, current_position, current_position + gap)]
107 current_position += gap
108 grid += [row]
109 if fillstyle == AmscoFillStyle.reverse_each_row:
110 current_fillpattern = list(reversed(current_fillpattern))
111 return [transpose(r, transpositions) for r in grid]
112
113 def amsco_encipher(message, keyword,
114 fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row):
115 """AMSCO transposition encipher.
116
117 >>> amsco_encipher('hellothere', 'abc', fillpattern=(1, 2))
118 'hoteelhler'
119 >>> amsco_encipher('hellothere', 'abc', fillpattern=(2, 1))
120 'hetelhelor'
121 >>> amsco_encipher('hellothere', 'acb', fillpattern=(1, 2))
122 'hotelerelh'
123 >>> amsco_encipher('hellothere', 'acb', fillpattern=(2, 1))
124 'hetelorlhe'
125 >>> amsco_encipher('hereissometexttoencipher', 'encode')
126 'etecstthhomoerereenisxip'
127 >>> amsco_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2))
128 'hetcsoeisterereipexthomn'
129 >>> amsco_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous)
130 'hecsoisttererteipexhomen'
131 >>> amsco_encipher('hereissometexttoencipher', 'cipher', fillpattern=(2, 1))
132 'heecisoosttrrtepeixhemen'
133 >>> amsco_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2))
134 'hxtomephescieretoeisnter'
135 >>> amsco_encipher('hereissometexttoencipher', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous)
136 'hxomeiphscerettoisenteer'
137 """
138 grid = amsco_positions(message, keyword,
139 fillpattern=fillpattern, fillstyle=fillstyle)
140 ct_as_grid = [[message[s.start:s.end] for s in r] for r in grid]
141 return combine_every_nth(ct_as_grid)
142
143
144 def amsco_decipher(message, keyword,
145 fillpattern=(1,2), fillstyle=AmscoFillStyle.reverse_each_row):
146 """AMSCO transposition decipher
147
148 >>> amsco_decipher('hoteelhler', 'abc', fillpattern=(1, 2))
149 'hellothere'
150 >>> amsco_decipher('hetelhelor', 'abc', fillpattern=(2, 1))
151 'hellothere'
152 >>> amsco_decipher('hotelerelh', 'acb', fillpattern=(1, 2))
153 'hellothere'
154 >>> amsco_decipher('hetelorlhe', 'acb', fillpattern=(2, 1))
155 'hellothere'
156 >>> amsco_decipher('etecstthhomoerereenisxip', 'encode')
157 'hereissometexttoencipher'
158 >>> amsco_decipher('hetcsoeisterereipexthomn', 'cipher', fillpattern=(1, 2))
159 'hereissometexttoencipher'
160 >>> amsco_decipher('hecsoisttererteipexhomen', 'cipher', fillpattern=(1, 2), fillstyle=AmscoFillStyle.continuous)
161 'hereissometexttoencipher'
162 >>> amsco_decipher('heecisoosttrrtepeixhemen', 'cipher', fillpattern=(2, 1))
163 'hereissometexttoencipher'
164 >>> amsco_decipher('hxtomephescieretoeisnter', 'cipher', fillpattern=(1, 3, 2))
165 'hereissometexttoencipher'
166 >>> amsco_decipher('hxomeiphscerettoisenteer', 'cipher', fillpattern=(1, 3, 2), fillstyle=AmscoFillStyle.continuous)
167 'hereissometexttoencipher'
168 """
169
170 grid = amsco_positions(message, keyword,
171 fillpattern=fillpattern, fillstyle=fillstyle)
172 transposed_sections = [s for c in [l for l in zip(*grid)] for s in c]
173 plaintext_list = [''] * len(transposed_sections)
174 current_pos = 0
175 for slice in transposed_sections:
176 plaintext_list[slice.index] = message[current_pos:current_pos-slice.start+slice.end][:len(message[slice.start:slice.end])]
177 current_pos += len(message[slice.start:slice.end])
178 return cat(plaintext_list)
179
180
181 def amsco_break(message, translist=None, patterns = [(1, 2), (2, 1)],
182 fillstyles = [AmscoFillStyle.continuous,
183 AmscoFillStyle.same_each_row,
184 AmscoFillStyle.reverse_each_row],
185 fitness=Pbigrams,
186 chunksize=500):
187 """Breaks an AMSCO transposition cipher using a dictionary and
188 n-gram frequency analysis.
189
190 If `translist` is not specified, use
191 [`szyfrow.support.langauge_models.transpositions`](support/language_models.html#szyfrow.support.language_models.transpositions).
192
193 >>> amsco_break(amsco_encipher(sanitise( \
194 "It is a truth universally acknowledged, that a single man in \
195 possession of a good fortune, must be in want of a wife. However \
196 little known the feelings or views of such a man may be on his \
197 first entering a neighbourhood, this truth is so well fixed in \
198 the minds of the surrounding families, that he is considered the \
199 rightful property of some one or other of their daughters."), \
200 'encipher'), \
201 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
202 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
203 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
204 patterns=[(1, 2)]) # doctest: +ELLIPSIS
205 (((2, 0, 5, 3, 1, 4, 6), (1, 2), <AmscoFillStyle.continuous: 1>), -709.4646722...)
206 >>> amsco_break(amsco_encipher(sanitise( \
207 "It is a truth universally acknowledged, that a single man in \
208 possession of a good fortune, must be in want of a wife. However \
209 little known the feelings or views of such a man may be on his \
210 first entering a neighbourhood, this truth is so well fixed in \
211 the minds of the surrounding families, that he is considered the \
212 rightful property of some one or other of their daughters."), \
213 'encipher', fillpattern=(2, 1)), \
214 translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
215 (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
216 (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
217 patterns=[(1, 2), (2, 1)], fitness=Ptrigrams) # doctest: +ELLIPSIS
218 (((2, 0, 5, 3, 1, 4, 6), (2, 1), <AmscoFillStyle.continuous: 1>), -997.0129085...)
219 """
220 if translist is None:
221 translist = transpositions
222
223 with multiprocessing.Pool() as pool:
224 helper_args = [(message, trans, pattern, fillstyle, fitness)
225 for trans in translist
226 for pattern in patterns
227 for fillstyle in fillstyles]
228 # Gotcha: the helper function here needs to be defined at the top level
229 # (limitation of Pool.starmap)
230 breaks = pool.starmap(amsco_break_worker, helper_args, chunksize)
231 return max(breaks, key=lambda k: k[1])
232
233 def amsco_break_worker(message, transposition,
234 pattern, fillstyle, fitness):
235 plaintext = amsco_decipher(message, transposition,
236 fillpattern=pattern, fillstyle=fillstyle)
237 fit = fitness(sanitise(plaintext))
238 return (transposition, pattern, fillstyle), fit
239
240 if __name__ == "__main__":
241 import doctest