Started on documentation
[szyfrow.git] / szyfrow / cadenus.py
1 """Enciphering and deciphering using the [Cadenus cipher](https://www.thonky.com/kryptos/cadenus-cipher).
2 Also attempts to break messages that use a Cadenus cipher.
3
4 The plaintext is written out in a grid, with one column per letter of the
5 keyword. The plaintext is written out left to right in rows. The plaintext
6 needs to fill 25 rows: if it is shorter, the text is padded; if longer, it is
7 broken into 25-row chunks.
8
9 For instance, the 100 letter chunk:
10
11 > Whoever has made a voyage up the Hudson must remember the Kaatskill mountains.
12 > They are a dismembered branch of the great
13
14 and the keyword "wink" would written out as the leftmost grid below.
15
16 The columns are then rotated according to the _keycolumn_. For each column, the
17 keyword letter in that column is found in the keycolumn. This identifies a
18 specific row in the grid. That column only is rotated upwards until the selected
19 row is at the top of the column. Each column is rotated independently, according
20 to its keyword letter.
21
22 For instance, the middle grid below is formed from the leftmost grid by
23 rotating the first column up four positions, the second column up 17 positions,
24 and so on. (The letters chosen to head the new colums are capitalised in the
25 leftmost grid.)
26
27 Finally, each row is transposed given the alphabetic order of the keyword (as
28 seen in the rightmost grid below).
29
30 The ciphertext is read out in rows, starting with the now-leftmost column. For
31 the example, the ciphertext would be
32
33 > antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaasuvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned'
34
35 ```
36 w i n k w i n k i k n w
37 ------- ------- -------
38 w h o e a o a t n a n t o
39 v e r h z e d l e d e l e
40 a s m a y h e u e e e u h
41 d e a v x d r i s r s i d
42 O y a g vw m r h b r b h m
43 e u p t u r h r d h d r r
44 h e h u t m h i n h n i m
45 d s o n s t e m f e f m t
46 m u s t r a h e g h g e a
47 r e m e q k e a t e t a k
48 m b e r p m s o e s e o m
49 t h e k o t e e h e h e t
50 a a T s n s y a a y a a s
51 k i l l m y u o v u v o y
52 m o u n l a e r g e g r a
53 t a i N k m s m t s t m m
54 s t h e j e u a u u u a e
55 y A r e i b e a n e n a b
56 a d i s h c b p t b t p c
57 m e m b g t h h e h e h t
58 e r e d f r a o r a r o r
59 b r a n e w i s k i k s w
60 c h o f d v o m s o s m v
61 t h e g c a a e l a l e a
62 r e a t b d t e n t n e d
63 ```
64
65 """
66 from itertools import chain
67 import multiprocessing
68 from szyfrow.support.utilities import *
69 from szyfrow.support.language_models import *
70
71
72 def make_cadenus_keycolumn(doubled_letters = 'vw', start='a', reverse=False):
73 """Makes the key column for a Cadenus cipher (the column down between the
74 rows of letters)
75
76 >>> make_cadenus_keycolumn()['a']
77 0
78 >>> make_cadenus_keycolumn()['b']
79 1
80 >>> make_cadenus_keycolumn()['c']
81 2
82 >>> make_cadenus_keycolumn()['v']
83 21
84 >>> make_cadenus_keycolumn()['w']
85 21
86 >>> make_cadenus_keycolumn()['z']
87 24
88 >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['a']
89 1
90 >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['b']
91 0
92 >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['c']
93 24
94 >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['i']
95 18
96 >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['j']
97 18
98 >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['v']
99 6
100 >>> make_cadenus_keycolumn(doubled_letters='ij', start='b', reverse=True)['z']
101 2
102 """
103 index_to_remove = string.ascii_lowercase.find(doubled_letters[0])
104 short_alphabet = string.ascii_lowercase[:index_to_remove] + string.ascii_lowercase[index_to_remove+1:]
105 if reverse:
106 short_alphabet = cat(reversed(short_alphabet))
107 start_pos = short_alphabet.find(start)
108 rotated_alphabet = short_alphabet[start_pos:] + short_alphabet[:start_pos]
109 keycolumn = {l: i for i, l in enumerate(rotated_alphabet)}
110 keycolumn[doubled_letters[0]] = keycolumn[doubled_letters[1]]
111 return keycolumn
112
113 def cadenus_encipher(message, keyword, keycolumn, fillvalue='a'):
114 """Encipher with the Cadenus cipher
115
116 >>> cadenus_encipher(sanitise('Whoever has made a voyage up the Hudson ' \
117 'must remember the Kaatskill mountains. ' \
118 'They are a dismembered branch of the great'), \
119 'wink', \
120 make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True))
121 'antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaasuvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned'
122 >>> cadenus_encipher(sanitise('a severe limitation on the usefulness of ' \
123 'the cadenus is that every message must be ' \
124 'a multiple of twenty-five letters long'), \
125 'easy', \
126 make_cadenus_keycolumn(doubled_letters='vw', start='a', reverse=True))
127 'systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtofarenuseieeieltarlmentieetogevesitfaisltngeeuvowul'
128 """
129 transpositions = transpositions_of(keyword)
130 enciphered_chunks = []
131 for message_chunk in chunks(message, len(transpositions) * 25,
132 fillvalue=fillvalue):
133 rows = chunks(message_chunk, len(transpositions), fillvalue=fillvalue)
134 columns = zip(*rows)
135 rotated_columns = [col[start:] + col[:start] for start, col in zip([keycolumn[l] for l in keyword], columns)]
136 rotated_rows = zip(*rotated_columns)
137 transposed = [transpose(r, transpositions) for r in rotated_rows]
138 enciphered_chunks.append(cat(chain(*transposed)))
139 return cat(enciphered_chunks)
140
141 def cadenus_decipher(message, keyword, keycolumn, fillvalue='a'):
142 """
143 >>> cadenus_decipher('antodeleeeuhrsidrbhmhdrrhnimefmthgeaetakseomehetyaa' \
144 'suvoyegrastmmuuaeenabbtpchehtarorikswosmvaleatned', \
145 'wink', \
146 make_cadenus_keycolumn(reverse=True))
147 'whoeverhasmadeavoyageupthehudsonmustrememberthekaatskillmountainstheyareadismemberedbranchofthegreat'
148 >>> cadenus_decipher('systretomtattlusoatleeesfiyheasdfnmschbhneuvsnpmtof' \
149 'arenuseieeieltarlmentieetogevesitfaisltngeeuvowul', \
150 'easy', \
151 make_cadenus_keycolumn(reverse=True))
152 'aseverelimitationontheusefulnessofthecadenusisthateverymessagemustbeamultipleoftwentyfiveletterslong'
153 """
154 transpositions = transpositions_of(keyword)
155 deciphered_chunks = []
156 for message_chunk in chunks(message, len(transpositions) * 25,
157 fillvalue=fillvalue):
158 rows = chunks(message_chunk, len(transpositions), fillvalue=fillvalue)
159 untransposed_rows = [untranspose(r, transpositions) for r in rows]
160 columns = zip(*untransposed_rows)
161 rotated_columns = [col[-start:] + col[:-start] for start, col in zip([keycolumn[l] for l in keyword], columns)]
162 rotated_rows = zip(*rotated_columns)
163 deciphered_chunks.append(cat(chain(*rotated_rows)))
164 return cat(deciphered_chunks)
165
166
167
168 def cadenus_break(message, wordlist=None,
169 doubled_letters='vw', fitness=Pbigrams):
170 """Breaks a Cadenus cipher using a dictionary and
171 frequency analysis
172
173 If `wordlist` is not specified, use
174 [`szyfrow.support.langauge_models.keywords`](support/language_models.html#szyfrow.support.language_models.keywords).
175 """
176 if wordlist is None:
177 wordlist = keywords
178
179 # c = make_cadenus_keycolumn(reverse=True)
180 # valid_words = [w for w in wordlist
181 # if len(transpositions_of(w)) == len(message) // 25]
182 with multiprocessing.Pool() as pool:
183 results = pool.starmap(cadenus_break_worker,
184 [(message, w,
185 make_cadenus_keycolumn(doubled_letters=doubled_letters,
186 start=s, reverse=r),
187 fitness)
188 for w in wordlist
189 for s in string.ascii_lowercase
190 for r in [True, False]
191 # if max(transpositions_of(w)) <= len(
192 # make_cadenus_keycolumn(
193 # doubled_letters=doubled_letters, start=s, reverse=r))
194 ])
195 # return list(results)
196 return max(results, key=lambda k: k[1])
197
198 def cadenus_break_worker(message, keyword, keycolumn, fitness):
199 # message_chunks = chunks(message, 175)
200 # plaintext = ''.join(cadenus_decipher(c, keyword, keycolumn) for c in message_chunks)
201 plaintext = cadenus_decipher(message, keyword, keycolumn)
202 fit = fitness(plaintext)
203 return (keyword, keycolumn), fit
204
205 if __name__ == "__main__":
206 import doctest