6 logger
= logging
.getLogger(__name__
)
7 logger
.addHandler(logging
.FileHandler('cipher.log'))
8 logger
.setLevel(logging
.WARNING
)
9 #logger.setLevel(logging.INFO)
11 english_counts
= collections
.defaultdict(int)
12 with
open('count_1l.txt', 'r') as f
:
14 (letter
, count
) = line
.split("\t")
15 english_counts
[letter
] = int(count
)
16 normalised_english_counts
= norms
.normalise(english_counts
)
18 with
open('words.txt', 'r') as f
:
19 keywords
= [line
.rstrip() for line
in f
]
21 modular_division_table
= [[0]*26 for x
in range(26)]
25 modular_division_table
[b
][c
] = a
27 modular_division_table_one_based
= [[0]*27 for x
in range(27)]
30 c
= ((a
* b
)-1) % 26 + 1
31 modular_division_table_one_based
[b
][c
] = a
36 """Remove all non-alphabetic characters and convert the text to lowercase
38 >>> sanitise('The Quick')
40 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
41 'thequickbrownfoxjumpedoverthelazydog'
43 sanitised
= [c
.lower() for c
in text
if c
in string
.ascii_letters
]
44 return ''.join(sanitised
)
47 """Returns all n-grams of a text
49 >>> ngrams(sanitise('the quick brown fox'), 2)
50 [('t', 'h'), ('h', 'e'), ('e', 'q'), ('q', 'u'), ('u', 'i'), ('i', 'c'), ('c', 'k'), ('k', 'b'), ('b', 'r'), ('r', 'o'), ('o', 'w'), ('w', 'n'), ('n', 'f'), ('f', 'o'), ('o', 'x')]
51 >>> ngrams(sanitise('the quick brown fox'), 4)
52 [('t', 'h', 'e', 'q'), ('h', 'e', 'q', 'u'), ('e', 'q', 'u', 'i'), ('q', 'u', 'i', 'c'), ('u', 'i', 'c', 'k'), ('i', 'c', 'k', 'b'), ('c', 'k', 'b', 'r'), ('k', 'b', 'r', 'o'), ('b', 'r', 'o', 'w'), ('r', 'o', 'w', 'n'), ('o', 'w', 'n', 'f'), ('w', 'n', 'f', 'o'), ('n', 'f', 'o', 'x')]
54 return [tuple(text
[i
:i
+n
]) for i
in range(len(text
)-n
+1)]
56 def letter_frequencies(text
):
57 """Count the number of occurrences of each character in text
59 >>> sorted(letter_frequencies('abcdefabc').items())
60 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
61 >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
62 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
63 >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
64 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
65 >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
66 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
68 counts
= collections
.defaultdict(int)
73 def deduplicate(text
):
74 return list(collections
.OrderedDict
.fromkeys(text
))
78 def caesar_encipher_letter(letter
, shift
):
79 """Encipher a letter, given a shift amount
81 >>> caesar_encipher_letter('a', 1)
83 >>> caesar_encipher_letter('a', 2)
85 >>> caesar_encipher_letter('b', 2)
87 >>> caesar_encipher_letter('x', 2)
89 >>> caesar_encipher_letter('y', 2)
91 >>> caesar_encipher_letter('z', 2)
93 >>> caesar_encipher_letter('z', -1)
95 >>> caesar_encipher_letter('a', -1)
98 if letter
in string
.ascii_letters
:
99 if letter
in string
.ascii_uppercase
:
100 alphabet_start
= ord('A')
102 alphabet_start
= ord('a')
103 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) + alphabet_start
)
107 def caesar_decipher_letter(letter
, shift
):
108 """Decipher a letter, given a shift amount
110 >>> caesar_decipher_letter('b', 1)
112 >>> caesar_decipher_letter('b', 2)
115 return caesar_encipher_letter(letter
, -shift
)
117 def caesar_encipher(message
, shift
):
118 """Encipher a message with the Caesar cipher of given shift
120 >>> caesar_encipher('abc', 1)
122 >>> caesar_encipher('abc', 2)
124 >>> caesar_encipher('abcxyz', 2)
126 >>> caesar_encipher('ab cx yz', 2)
129 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
130 return ''.join(enciphered
)
132 def caesar_decipher(message
, shift
):
133 """Encipher a message with the Caesar cipher of given shift
135 >>> caesar_decipher('bcd', 1)
137 >>> caesar_decipher('cde', 2)
139 >>> caesar_decipher('cd ez ab', 2)
142 return caesar_encipher(message
, -shift
)
144 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
145 """Encipher a letter, given a multiplier and adder
147 >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
148 'HKNQTWZCFILORUXADGJMPSVYBE'
149 >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
150 'FILORUXADGJMPSVYBEHKNQTWZC'
152 if letter
in string
.ascii_letters
:
153 if letter
in string
.ascii_uppercase
:
154 alphabet_start
= ord('A')
156 alphabet_start
= ord('a')
157 letter_number
= ord(letter
) - alphabet_start
160 raw_cipher_number
= (letter_number
* multiplier
+ adder
)
163 cipher_number
= (raw_cipher_number
- 1) % 26
165 cipher_number
= raw_cipher_number
% 26
166 return chr(cipher_number
+ alphabet_start
)
170 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
171 """Encipher a letter, given a multiplier and adder
173 >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
174 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
175 >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
176 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
178 if letter
in string
.ascii_letters
:
179 if letter
in string
.ascii_uppercase
:
180 alphabet_start
= ord('A')
182 alphabet_start
= ord('a')
183 cipher_number
= ord(letter
) - alphabet_start
188 plaintext_number
= (modular_division_table_one_based
[multiplier
][(cipher_number
- adder
+ 26) % 26] - 1) % 26
190 #plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
191 plaintext_number
= modular_division_table
[multiplier
][(cipher_number
- adder
+ 26) % 26]
192 return chr(plaintext_number
+ alphabet_start
)
196 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
197 """Encipher a message
199 >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
200 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
202 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
203 return ''.join(enciphered
)
205 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
206 """Decipher a message
208 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
209 'hours passed during which jerico tried every trick he could think of'
211 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
212 return ''.join(enciphered
)
215 def keyword_cipher_alphabet_of(keyword
, wrap_alphabet
=0):
216 """Find the cipher alphabet given a keyword.
217 wrap_alphabet controls how the rest of the alphabet is added
220 1 : from the last letter in the sanitised keyword
221 2 : from the largest letter in the sanitised keyword
223 >>> keyword_cipher_alphabet_of('bayes')
224 'bayescdfghijklmnopqrtuvwxz'
225 >>> keyword_cipher_alphabet_of('bayes', 0)
226 'bayescdfghijklmnopqrtuvwxz'
227 >>> keyword_cipher_alphabet_of('bayes', 1)
228 'bayestuvwxzcdfghijklmnopqr'
229 >>> keyword_cipher_alphabet_of('bayes', 2)
230 'bayeszcdfghijklmnopqrtuvwx'
232 if wrap_alphabet
== 0:
233 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) + string
.ascii_lowercase
))
235 if wrap_alphabet
== 1:
236 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
238 last_keyword_letter
= sorted(sanitise(keyword
))[-1]
239 last_keyword_position
= string
.ascii_lowercase
.find(last_keyword_letter
) + 1
240 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) +
241 string
.ascii_lowercase
[last_keyword_position
:] +
242 string
.ascii_lowercase
))
243 return cipher_alphabet
246 def keyword_encipher(message
, keyword
, wrap_alphabet
=0):
247 """Enciphers a message with a keyword substitution cipher.
248 wrap_alphabet controls how the rest of the alphabet is added
251 1 : from the last letter in the sanitised keyword
252 2 : from the largest letter in the sanitised keyword
254 >>> keyword_encipher('test message', 'bayes')
256 >>> keyword_encipher('test message', 'bayes', 0)
258 >>> keyword_encipher('test message', 'bayes', 1)
260 >>> keyword_encipher('test message', 'bayes', 2)
263 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
264 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
265 return message
.lower().translate(cipher_translation
)
267 def keyword_decipher(message
, keyword
, wrap_alphabet
=0):
268 """Deciphers a message with a keyword substitution cipher.
269 wrap_alphabet controls how the rest of the alphabet is added
272 1 : from the last letter in the sanitised keyword
273 2 : from the largest letter in the sanitised keyword
275 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
277 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
279 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
281 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
284 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
285 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
286 return message
.lower().translate(cipher_translation
)
289 def caesar_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
290 """Breaks a Caesar cipher using frequency analysis
292 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
293 (4, 0.31863952890183...)
294 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
295 (19, 0.42152901235832...)
296 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
297 (13, 0.316029208075451...)
299 sanitised_message
= sanitise(message
)
301 best_fit
= float("inf")
302 for shift
in range(26):
303 plaintext
= caesar_decipher(sanitised_message
, shift
)
304 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
305 fit
= metric(target_frequencies
, frequencies
)
306 logger
.debug('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
310 logger
.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift
, best_fit
, caesar_decipher(sanitised_message
, best_shift
)[:50]))
311 return best_shift
, best_fit
313 def affine_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
314 """Breaks an affine cipher using frequency analysis
316 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
317 ((15, 22, True), 0.23570361818655...)
319 sanitised_message
= sanitise(message
)
322 best_one_based
= True
323 best_fit
= float("inf")
324 for one_based
in [True, False]:
325 for multiplier
in range(1, 26, 2):
326 for adder
in range(26):
327 plaintext
= affine_decipher(sanitised_message
, multiplier
, adder
, one_based
)
328 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
329 fit
= metric(target_frequencies
, frequencies
)
330 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier
, adder
, one_based
, fit
, plaintext
[:50]))
333 best_multiplier
= multiplier
335 best_one_based
= one_based
336 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier
, best_adder
, best_one_based
, best_fit
, affine_decipher(sanitised_message
, best_multiplier
, best_adder
, best_one_based
)[:50]))
337 return (best_multiplier
, best_adder
, best_one_based
), best_fit
340 def keyword_break(message
, wordlist
=keywords
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
341 """Breaks a keyword substitution cipher using a dictionary and frequency analysis
343 >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
344 (('elephant', 1), 0.41643991598441...)
347 best_wrap_alphabet
= True
348 best_fit
= float("inf")
349 for wrap_alphabet
in range(3):
350 for keyword
in wordlist
:
351 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
352 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
353 fit
= metric(target_frequencies
, frequencies
)
354 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword
, wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
357 best_keyword
= keyword
358 best_wrap_alphabet
= wrap_alphabet
359 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword
, best_wrap_alphabet
, best_fit
, sanitise(keyword_decipher(message
, best_keyword
))[:50]))
360 return (best_keyword
, best_wrap_alphabet
), best_fit
363 if __name__
== "__main__":