Minor documentation updates
[szyfrow.git] / szyfrow / vigenere.py
1 """[Vigenère polyalphabetic substitution ciphers](https://en.wikipedia.org/wiki/Vigen%C3%A8re_cipher),
2 mainly done by keyword. Also does Beaufort ciphers and Variant Beaufort
3 ciphers.
4
5 Enciphering and deciphering, and a couple of ways to break these ciphers.
6 """
7 from enum import Enum
8 from itertools import starmap, cycle
9 import multiprocessing
10 from szyfrow.caesar import *
11 from szyfrow.support.utilities import *
12 from szyfrow.support.language_models import *
13
14 def vigenere_encipher(message, keyword):
15 """Vigenere encipher
16
17 >>> vigenere_encipher('hello', 'abc')
18 'hfnlp'
19 """
20 shifts = [pos(l) for l in sanitise(keyword)]
21 pairs = zip(message, cycle(shifts))
22 return cat([caesar_encipher_letter(l, k) for l, k in pairs])
23
24 def vigenere_decipher(message, keyword):
25 """Vigenere decipher
26
27 >>> vigenere_decipher('hfnlp', 'abc')
28 'hello'
29 """
30 shifts = [pos(l) for l in sanitise(keyword)]
31 pairs = zip(message, cycle(shifts))
32 return cat([caesar_decipher_letter(l, k) for l, k in pairs])
33
34
35 def beaufort_encipher(message, keyword):
36 """Beaufort encipher. A symmetrical cipher, so enciphering and deciphering
37 follow the same procedure.
38
39 >>> beaufort_encipher('inhisjournaldatedtheidesofoctober', 'arcanaimperii')
40 'sevsvrusyrrxfayyxuteemazudmpjmmwr'
41 """
42 shifts = [pos(l) for l in sanitise(keyword)]
43 pairs = zip(message, cycle(shifts))
44 return cat([unpos(k - pos(l)) for l, k in pairs])
45
46 beaufort_decipher = beaufort_encipher
47
48 beaufort_variant_encipher=vigenere_decipher
49 beaufort_variant_decipher=vigenere_encipher
50
51
52 def index_of_coincidence_scan(text, max_key_length=20):
53 """Finds the index of coincidence of the text, using different chunk sizes."""
54 stext = sanitise(text)
55 iocs = {}
56 for i in range(1, max_key_length + 1):
57 splits = every_nth(stext, i)
58 mean_ioc = sum(index_of_coincidence(s) for s in splits) / i
59 iocs[i] = mean_ioc
60 return iocs
61
62 def vigenere_keyword_break(message, wordlist=keywords, fitness=Pletters,
63 chunksize=500):
64 """Breaks a vigenere cipher using a dictionary and frequency analysis.
65
66 >>> vigenere_keyword_break(vigenere_encipher(sanitise('this is a test ' \
67 'message for the vigenere decipherment'), 'cat'), \
68 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
69 ('cat', -52.9472712...)
70 """
71 with multiprocessing.Pool() as pool:
72 helper_args = [(message, word, fitness)
73 for word in wordlist]
74 # Gotcha: the helper function here needs to be defined at the top level
75 # (limitation of Pool.starmap)
76 breaks = pool.starmap(vigenere_keyword_break_worker, helper_args,
77 chunksize)
78 return max(breaks, key=lambda k: k[1])
79
80 def vigenere_keyword_break_worker(message, keyword, fitness):
81 plaintext = vigenere_decipher(message, keyword)
82 fit = fitness(plaintext)
83 return keyword, fit
84
85
86 def vigenere_frequency_break(message, max_key_length=20, fitness=Pletters):
87 """Breaks a Vigenere cipher with frequency analysis
88
89 >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
90 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
91 "afternoon when he left his jacket hanging on the easel in the " \
92 "attic. I jump every time I hear a footstep on the stairs, " \
93 "certain that the theft has been discovered and that I will " \
94 "be caught. The SS officer visits less often now that he is " \
95 "sure"), 'florence')) # doctest: +ELLIPSIS
96 ('florence', -307.5473096...)
97 """
98 def worker(message, key_length, fitness):
99 splits = every_nth(sanitised_message, key_length)
100 key = cat([unpos(caesar_break(s)[0]) for s in splits])
101 plaintext = vigenere_decipher(message, key)
102 fit = fitness(plaintext)
103 return key, fit
104 sanitised_message = sanitise(message)
105 results = starmap(worker, [(sanitised_message, i, fitness)
106 for i in range(1, max_key_length+1)])
107 return max(results, key=lambda k: k[1])
108
109
110 def beaufort_sub_break(message, fitness=Pletters):
111 """Breaks one chunk of a Beaufort cipher with frequency analysis
112
113 >>> beaufort_sub_break('samwpplggnnmmyaazgympjapopnwiywwomwspgpjmefwmawx' \
114 'jafjhxwwwdigxshnlywiamhyshtasxptwueahhytjwsn') # doctest: +ELLIPSIS
115 (0, -117.4492...)
116 >>> beaufort_sub_break('eyprzjjzznxymrygryjqmqhznjrjjapenejznawngnnezgza' \
117 'dgndknaogpdjneadadazlhkhxkryevrronrmdjnndjlo') # doctest: +ELLIPSIS
118 (17, -114.9598...)
119 """
120 best_shift = 0
121 best_fit = float('-inf')
122 for key in range(26):
123 plaintext = [unpos(key - pos(l)) for l in message]
124 fit = fitness(plaintext)
125 if fit > best_fit:
126 best_fit = fit
127 best_key = key
128 return best_key, best_fit
129
130
131 def beaufort_frequency_break(message, max_key_length=20, fitness=Pletters):
132 """Breaks a Beaufort cipher with frequency analysis
133
134 >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
135 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
136 "afternoon when he left his jacket hanging on the easel in the " \
137 "attic. I jump every time I hear a footstep on the stairs, " \
138 "certain that the theft has been discovered and that I will " \
139 "be caught. The SS officer visits less often now " \
140 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
141 ('florence', -307.5473096791...)
142 """
143 def worker(message, key_length, fitness):
144 splits = every_nth(message, key_length)
145 key = cat([unpos(beaufort_sub_break(s)[0]) for s in splits])
146 plaintext = beaufort_decipher(message, key)
147 fit = fitness(plaintext)
148 return key, fit
149 sanitised_message = sanitise(message)
150 results = starmap(worker, [(sanitised_message, i, fitness)
151 for i in range(1, max_key_length+1)])
152 return max(results, key=lambda k: k[1])
153
154
155 def beaufort_variant_frequency_break(message, max_key_length=20, fitness=Pletters):
156 """Breaks a Beaufort cipher with frequency analysis
157
158 >>> beaufort_variant_frequency_break(beaufort_variant_encipher(sanitise("It is time to " \
159 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
160 "afternoon when he left his jacket hanging on the easel in the " \
161 "attic. I jump every time I hear a footstep on the stairs, " \
162 "certain that the theft has been discovered and that I will " \
163 "be caught. The SS officer visits less often now " \
164 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
165 ('florence', -307.5473096791...)
166 """
167 def worker(message, key_length, fitness):
168 splits = every_nth(sanitised_message, key_length)
169 key = cat([unpos(-caesar_break(s)[0]) for s in splits])
170 plaintext = beaufort_variant_decipher(message, key)
171 fit = fitness(plaintext)
172 return key, fit
173 sanitised_message = sanitise(message)
174 results = starmap(worker, [(sanitised_message, i, fitness)
175 for i in range(1, max_key_length+1)])
176 return max(results, key=lambda k: k[1])
177
178 if __name__ == "__main__":
179 import doctest