e340e5641693d98597cd0fc24ee3a9e8d469eb82
[szyfrow.git] / vigenere.py
1 from enum import Enum
2 from itertools import starmap, cycle
3 import multiprocessing
4 from szyfrow.caesar import *
5 from szyfrow.support.utilities import *
6 from szyfrow.support.language_models import *
7
8 def vigenere_encipher(message, keyword):
9 """Vigenere encipher
10
11 >>> vigenere_encipher('hello', 'abc')
12 'hfnlp'
13 """
14 shifts = [pos(l) for l in sanitise(keyword)]
15 pairs = zip(message, cycle(shifts))
16 return cat([caesar_encipher_letter(l, k) for l, k in pairs])
17
18 def vigenere_decipher(message, keyword):
19 """Vigenere decipher
20
21 >>> vigenere_decipher('hfnlp', 'abc')
22 'hello'
23 """
24 shifts = [pos(l) for l in sanitise(keyword)]
25 pairs = zip(message, cycle(shifts))
26 return cat([caesar_decipher_letter(l, k) for l, k in pairs])
27
28
29 def beaufort_encipher(message, keyword):
30 """Beaufort encipher
31
32 >>> beaufort_encipher('inhisjournaldatedtheidesofoctober', 'arcanaimperii')
33 'sevsvrusyrrxfayyxuteemazudmpjmmwr'
34 """
35 shifts = [pos(l) for l in sanitise(keyword)]
36 pairs = zip(message, cycle(shifts))
37 return cat([unpos(k - pos(l)) for l, k in pairs])
38
39 beaufort_decipher = beaufort_encipher
40
41 beaufort_variant_encipher=vigenere_decipher
42 beaufort_variant_decipher=vigenere_encipher
43
44
45 def index_of_coincidence_scan(text, max_key_length=20):
46 """Finds the index of coincidence of the text, using different chunk sizes."""
47 stext = sanitise(text)
48 iocs = {}
49 for i in range(1, max_key_length + 1):
50 splits = every_nth(stext, i)
51 mean_ioc = sum(index_of_coincidence(s) for s in splits) / i
52 iocs[i] = mean_ioc
53 return iocs
54
55 def vigenere_keyword_break_mp(message, wordlist=keywords, fitness=Pletters,
56 chunksize=500):
57 """Breaks a vigenere cipher using a dictionary and frequency analysis.
58
59 >>> vigenere_keyword_break_mp(vigenere_encipher(sanitise('this is a test ' \
60 'message for the vigenere decipherment'), 'cat'), \
61 wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
62 ('cat', -52.9472712...)
63 """
64 with multiprocessing.Pool() as pool:
65 helper_args = [(message, word, fitness)
66 for word in wordlist]
67 # Gotcha: the helper function here needs to be defined at the top level
68 # (limitation of Pool.starmap)
69 breaks = pool.starmap(vigenere_keyword_break_worker, helper_args,
70 chunksize)
71 return max(breaks, key=lambda k: k[1])
72 vigenere_keyword_break = vigenere_keyword_break_mp
73
74 def vigenere_keyword_break_worker(message, keyword, fitness):
75 plaintext = vigenere_decipher(message, keyword)
76 fit = fitness(plaintext)
77 return keyword, fit
78
79
80 def vigenere_frequency_break(message, max_key_length=20, fitness=Pletters):
81 """Breaks a Vigenere cipher with frequency analysis
82
83 >>> vigenere_frequency_break(vigenere_encipher(sanitise("It is time to " \
84 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
85 "afternoon when he left his jacket hanging on the easel in the " \
86 "attic. I jump every time I hear a footstep on the stairs, " \
87 "certain that the theft has been discovered and that I will " \
88 "be caught. The SS officer visits less often now that he is " \
89 "sure"), 'florence')) # doctest: +ELLIPSIS
90 ('florence', -307.5473096...)
91 """
92 def worker(message, key_length, fitness):
93 splits = every_nth(sanitised_message, key_length)
94 key = cat([unpos(caesar_break(s)[0]) for s in splits])
95 plaintext = vigenere_decipher(message, key)
96 fit = fitness(plaintext)
97 return key, fit
98 sanitised_message = sanitise(message)
99 results = starmap(worker, [(sanitised_message, i, fitness)
100 for i in range(1, max_key_length+1)])
101 return max(results, key=lambda k: k[1])
102
103
104 def beaufort_sub_break(message, fitness=Pletters):
105 """Breaks one chunk of a Beaufort cipher with frequency analysis
106
107 >>> beaufort_sub_break('samwpplggnnmmyaazgympjapopnwiywwomwspgpjmefwmawx' \
108 'jafjhxwwwdigxshnlywiamhyshtasxptwueahhytjwsn') # doctest: +ELLIPSIS
109 (0, -117.4492...)
110 >>> beaufort_sub_break('eyprzjjzznxymrygryjqmqhznjrjjapenejznawngnnezgza' \
111 'dgndknaogpdjneadadazlhkhxkryevrronrmdjnndjlo') # doctest: +ELLIPSIS
112 (17, -114.9598...)
113 """
114 best_shift = 0
115 best_fit = float('-inf')
116 for key in range(26):
117 plaintext = [unpos(key - pos(l)) for l in message]
118 fit = fitness(plaintext)
119 if fit > best_fit:
120 best_fit = fit
121 best_key = key
122 return best_key, best_fit
123
124
125 def beaufort_frequency_break(message, max_key_length=20, fitness=Pletters):
126 """Breaks a Beaufort cipher with frequency analysis
127
128 >>> beaufort_frequency_break(beaufort_encipher(sanitise("It is time to " \
129 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
130 "afternoon when he left his jacket hanging on the easel in the " \
131 "attic. I jump every time I hear a footstep on the stairs, " \
132 "certain that the theft has been discovered and that I will " \
133 "be caught. The SS officer visits less often now " \
134 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
135 ('florence', -307.5473096791...)
136 """
137 def worker(message, key_length, fitness):
138 splits = every_nth(message, key_length)
139 key = cat([unpos(beaufort_sub_break(s)[0]) for s in splits])
140 plaintext = beaufort_decipher(message, key)
141 fit = fitness(plaintext)
142 return key, fit
143 sanitised_message = sanitise(message)
144 results = starmap(worker, [(sanitised_message, i, fitness)
145 for i in range(1, max_key_length+1)])
146 return max(results, key=lambda k: k[1])
147
148
149 def beaufort_variant_frequency_break(message, max_key_length=20, fitness=Pletters):
150 """Breaks a Beaufort cipher with frequency analysis
151
152 >>> beaufort_variant_frequency_break(beaufort_variant_encipher(sanitise("It is time to " \
153 "run. She is ready and so am I. I stole Daniel's pocketbook this " \
154 "afternoon when he left his jacket hanging on the easel in the " \
155 "attic. I jump every time I hear a footstep on the stairs, " \
156 "certain that the theft has been discovered and that I will " \
157 "be caught. The SS officer visits less often now " \
158 "that he is sure"), 'florence')) # doctest: +ELLIPSIS
159 ('florence', -307.5473096791...)
160 """
161 def worker(message, key_length, fitness):
162 splits = every_nth(sanitised_message, key_length)
163 key = cat([unpos(-caesar_break(s)[0]) for s in splits])
164 plaintext = beaufort_variant_decipher(message, key)
165 fit = fitness(plaintext)
166 return key, fit
167 sanitised_message = sanitise(message)
168 results = starmap(worker, [(sanitised_message, i, fitness)
169 for i in range(1, max_key_length+1)])
170 return max(results, key=lambda k: k[1])
171
172 if __name__ == "__main__":
173 import doctest