6 from itertools
import zip_longest
7 from segment
import segment
12 # c5a = open('2012/5a.ciphertext', 'r').read()
13 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
16 logger
= logging
.getLogger(__name__
)
17 logger
.addHandler(logging
.FileHandler('cipher.log'))
18 logger
.setLevel(logging
.WARNING
)
19 #logger.setLevel(logging.INFO)
21 english_counts
= collections
.defaultdict(int)
22 with
open('count_1l.txt', 'r') as f
:
24 (letter
, count
) = line
.split("\t")
25 english_counts
[letter
] = int(count
)
26 normalised_english_counts
= norms
.normalise(english_counts
)
28 with
open('words.txt', 'r') as f
:
29 keywords
= [line
.rstrip() for line
in f
]
31 modular_division_table
= [[0]*26 for x
in range(26)]
35 modular_division_table
[b
][c
] = a
39 """Remove all non-alphabetic characters and convert the text to lowercase
41 >>> sanitise('The Quick')
43 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
44 'thequickbrownfoxjumpedoverthelazydog'
46 sanitised
= [c
.lower() for c
in text
if c
in string
.ascii_letters
]
47 return ''.join(sanitised
)
50 """Returns all n-grams of a text
52 >>> ngrams(sanitise('the quick brown fox'), 2)
53 [('t', 'h'), ('h', 'e'), ('e', 'q'), ('q', 'u'), ('u', 'i'), ('i', 'c'), ('c', 'k'), ('k', 'b'), ('b', 'r'), ('r', 'o'), ('o', 'w'), ('w', 'n'), ('n', 'f'), ('f', 'o'), ('o', 'x')]
54 >>> ngrams(sanitise('the quick brown fox'), 4)
55 [('t', 'h', 'e', 'q'), ('h', 'e', 'q', 'u'), ('e', 'q', 'u', 'i'), ('q', 'u', 'i', 'c'), ('u', 'i', 'c', 'k'), ('i', 'c', 'k', 'b'), ('c', 'k', 'b', 'r'), ('k', 'b', 'r', 'o'), ('b', 'r', 'o', 'w'), ('r', 'o', 'w', 'n'), ('o', 'w', 'n', 'f'), ('w', 'n', 'f', 'o'), ('n', 'f', 'o', 'x')]
57 return [tuple(text
[i
:i
+n
]) for i
in range(len(text
)-n
+1)]
59 def every_nth(text
, n
):
60 """Returns n strings, each of which consists of every nth character,
61 starting with the 0th, 1st, 2nd, ... (n-1)th character
63 >>> every_nth(string.ascii_lowercase, 5)
64 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
65 >>> every_nth(string.ascii_lowercase, 1)
66 ['abcdefghijklmnopqrstuvwxyz']
67 >>> every_nth(string.ascii_lowercase, 26)
68 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
70 split_text
= [text
[i
:i
+n
] for i
in range(0, len(text
), n
)]
71 return [''.join(l
) for l
in zip_longest(*split_text
, fillvalue
='')]
73 def combine_every_nth(split_text
):
74 """Reforms a text split into every_nth strings
76 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
77 'abcdefghijklmnopqrstuvwxyz'
78 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
79 'abcdefghijklmnopqrstuvwxyz'
80 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
81 'abcdefghijklmnopqrstuvwxyz'
83 return ''.join([''.join(l
) for l
in zip_longest(*split_text
, fillvalue
='')])
86 def letter_frequencies(text
):
87 """Count the number of occurrences of each character in text
89 >>> sorted(letter_frequencies('abcdefabc').items())
90 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
91 >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
92 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
93 >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
94 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
95 >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
96 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
98 counts
= collections
.defaultdict(int)
103 def deduplicate(text
):
104 return list(collections
.OrderedDict
.fromkeys(text
))
108 def caesar_encipher_letter(letter
, shift
):
109 """Encipher a letter, given a shift amount
111 >>> caesar_encipher_letter('a', 1)
113 >>> caesar_encipher_letter('a', 2)
115 >>> caesar_encipher_letter('b', 2)
117 >>> caesar_encipher_letter('x', 2)
119 >>> caesar_encipher_letter('y', 2)
121 >>> caesar_encipher_letter('z', 2)
123 >>> caesar_encipher_letter('z', -1)
125 >>> caesar_encipher_letter('a', -1)
128 if letter
in string
.ascii_letters
:
129 if letter
in string
.ascii_uppercase
:
130 alphabet_start
= ord('A')
132 alphabet_start
= ord('a')
133 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) + alphabet_start
)
137 def caesar_decipher_letter(letter
, shift
):
138 """Decipher a letter, given a shift amount
140 >>> caesar_decipher_letter('b', 1)
142 >>> caesar_decipher_letter('b', 2)
145 return caesar_encipher_letter(letter
, -shift
)
147 def caesar_encipher(message
, shift
):
148 """Encipher a message with the Caesar cipher of given shift
150 >>> caesar_encipher('abc', 1)
152 >>> caesar_encipher('abc', 2)
154 >>> caesar_encipher('abcxyz', 2)
156 >>> caesar_encipher('ab cx yz', 2)
159 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
160 return ''.join(enciphered
)
162 def caesar_decipher(message
, shift
):
163 """Encipher a message with the Caesar cipher of given shift
165 >>> caesar_decipher('bcd', 1)
167 >>> caesar_decipher('cde', 2)
169 >>> caesar_decipher('cd ez ab', 2)
172 return caesar_encipher(message
, -shift
)
174 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
175 """Encipher a letter, given a multiplier and adder
177 >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
178 'HKNQTWZCFILORUXADGJMPSVYBE'
179 >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
180 'FILORUXADGJMPSVYBEHKNQTWZC'
182 if letter
in string
.ascii_letters
:
183 if letter
in string
.ascii_uppercase
:
184 alphabet_start
= ord('A')
186 alphabet_start
= ord('a')
187 letter_number
= ord(letter
) - alphabet_start
188 if one_based
: letter_number
+= 1
189 cipher_number
= (letter_number
* multiplier
+ adder
) % 26
190 if one_based
: cipher_number
-= 1
191 return chr(cipher_number
% 26 + alphabet_start
)
195 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
196 """Encipher a letter, given a multiplier and adder
198 >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
199 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
200 >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
201 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
203 if letter
in string
.ascii_letters
:
204 if letter
in string
.ascii_uppercase
:
205 alphabet_start
= ord('A')
207 alphabet_start
= ord('a')
208 cipher_number
= ord(letter
) - alphabet_start
209 if one_based
: cipher_number
+= 1
210 plaintext_number
= modular_division_table
[multiplier
][(cipher_number
- adder
) % 26]
211 if one_based
: plaintext_number
-= 1
212 return chr(plaintext_number
% 26 + alphabet_start
)
216 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
217 """Encipher a message
219 >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
220 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
222 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
223 return ''.join(enciphered
)
225 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
226 """Decipher a message
228 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
229 'hours passed during which jerico tried every trick he could think of'
231 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
232 return ''.join(enciphered
)
235 def keyword_cipher_alphabet_of(keyword
, wrap_alphabet
=0):
236 """Find the cipher alphabet given a keyword.
237 wrap_alphabet controls how the rest of the alphabet is added
240 1 : from the last letter in the sanitised keyword
241 2 : from the largest letter in the sanitised keyword
243 >>> keyword_cipher_alphabet_of('bayes')
244 'bayescdfghijklmnopqrtuvwxz'
245 >>> keyword_cipher_alphabet_of('bayes', 0)
246 'bayescdfghijklmnopqrtuvwxz'
247 >>> keyword_cipher_alphabet_of('bayes', 1)
248 'bayestuvwxzcdfghijklmnopqr'
249 >>> keyword_cipher_alphabet_of('bayes', 2)
250 'bayeszcdfghijklmnopqrtuvwx'
252 if wrap_alphabet
== 0:
253 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) + string
.ascii_lowercase
))
255 if wrap_alphabet
== 1:
256 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
258 last_keyword_letter
= sorted(sanitise(keyword
))[-1]
259 last_keyword_position
= string
.ascii_lowercase
.find(last_keyword_letter
) + 1
260 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) +
261 string
.ascii_lowercase
[last_keyword_position
:] +
262 string
.ascii_lowercase
))
263 return cipher_alphabet
266 def keyword_encipher(message
, keyword
, wrap_alphabet
=0):
267 """Enciphers a message with a keyword substitution cipher.
268 wrap_alphabet controls how the rest of the alphabet is added
271 1 : from the last letter in the sanitised keyword
272 2 : from the largest letter in the sanitised keyword
274 >>> keyword_encipher('test message', 'bayes')
276 >>> keyword_encipher('test message', 'bayes', 0)
278 >>> keyword_encipher('test message', 'bayes', 1)
280 >>> keyword_encipher('test message', 'bayes', 2)
283 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
284 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
285 return message
.lower().translate(cipher_translation
)
287 def keyword_decipher(message
, keyword
, wrap_alphabet
=0):
288 """Deciphers a message with a keyword substitution cipher.
289 wrap_alphabet controls how the rest of the alphabet is added
292 1 : from the last letter in the sanitised keyword
293 2 : from the largest letter in the sanitised keyword
295 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
297 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
299 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
301 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
304 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
305 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
306 return message
.lower().translate(cipher_translation
)
308 def scytale_encipher(message
, rows
):
309 """Enciphers using the scytale transposition cipher
311 >>> scytale_encipher('thequickbrownfox', 3)
313 >>> scytale_encipher('thequickbrownfox', 4)
315 >>> scytale_encipher('thequickbrownfox', 5)
317 >>> scytale_encipher('thequickbrownfox', 6)
319 >>> scytale_encipher('thequickbrownfox', 7)
322 row_length
= math
.ceil(len(message
) / rows
)
323 slices
= [message
[i
:i
+row_length
] for i
in range(0, len(message
), row_length
)]
324 return ''.join([''.join(r
) for r
in zip_longest(*slices
, fillvalue
='')])
326 def scytale_decipher(message
, rows
):
327 """Deciphers using the scytale transposition cipher
329 >>> scytale_decipher('tcnhkfeboqrxuoiw', 3)
332 cols
= math
.ceil(len(message
) / rows
)
333 if len(message
) % rows
== 0:
336 part_cols
= rows
- len(message
) % rows
337 full_cols
= cols
- part_cols
338 columns
= [message
[i
:i
+rows
] for i
in range(0, full_cols
* rows
, rows
)] + \
339 [message
[i
:i
+rows
-1] for i
in range(full_cols
* rows
, len(message
), rows
- 1)]
340 return ''.join([''.join(c
) for c
in zip_longest(*columns
, fillvalue
='')])
343 def caesar_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
344 """Breaks a Caesar cipher using frequency analysis
346 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
347 (4, 0.31863952890183...)
348 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
349 (19, 0.42152901235832...)
350 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
351 (13, 0.316029208075451...)
353 sanitised_message
= sanitise(message
)
355 best_fit
= float("inf")
356 for shift
in range(26):
357 plaintext
= caesar_decipher(sanitised_message
, shift
)
358 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
359 fit
= metric(target_frequencies
, frequencies
)
360 logger
.debug('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
364 logger
.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift
, best_fit
, caesar_decipher(sanitised_message
, best_shift
)[:50]))
365 return best_shift
, best_fit
367 def affine_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
368 """Breaks an affine cipher using frequency analysis
370 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
371 ((15, 22, True), 0.23570361818655...)
373 sanitised_message
= sanitise(message
)
376 best_one_based
= True
377 best_fit
= float("inf")
378 for one_based
in [True, False]:
379 for multiplier
in range(1, 26, 2):
380 for adder
in range(26):
381 plaintext
= affine_decipher(sanitised_message
, multiplier
, adder
, one_based
)
382 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
383 fit
= metric(target_frequencies
, frequencies
)
384 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier
, adder
, one_based
, fit
, plaintext
[:50]))
387 best_multiplier
= multiplier
389 best_one_based
= one_based
390 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier
, best_adder
, best_one_based
, best_fit
, affine_decipher(sanitised_message
, best_multiplier
, best_adder
, best_one_based
)[:50]))
391 return (best_multiplier
, best_adder
, best_one_based
), best_fit
394 def keyword_break(message
, wordlist
=keywords
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
395 """Breaks a keyword substitution cipher using a dictionary and frequency analysis
397 >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
398 (('elephant', 1), 0.41643991598441...)
401 best_wrap_alphabet
= True
402 best_fit
= float("inf")
403 for wrap_alphabet
in range(3):
404 for keyword
in wordlist
:
405 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
406 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
407 fit
= metric(target_frequencies
, frequencies
)
408 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword
, wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
411 best_keyword
= keyword
412 best_wrap_alphabet
= wrap_alphabet
413 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword
, best_wrap_alphabet
, best_fit
, sanitise(keyword_decipher(message
, best_keyword
))[:50]))
414 return (best_keyword
, best_wrap_alphabet
), best_fit
417 if __name__
== "__main__":