6 from itertools
import zip_longest
7 from segment
import segment
12 # c5a = open('2012/5a.ciphertext', 'r').read()
13 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
16 logger
= logging
.getLogger(__name__
)
17 logger
.addHandler(logging
.FileHandler('cipher.log'))
18 logger
.setLevel(logging
.WARNING
)
19 #logger.setLevel(logging.INFO)
20 #logger.setLevel(logging.DEBUG)
22 english_counts
= collections
.defaultdict(int)
23 with
open('count_1l.txt', 'r') as f
:
25 (letter
, count
) = line
.split("\t")
26 english_counts
[letter
] = int(count
)
27 normalised_english_counts
= norms
.normalise(english_counts
)
29 english_bigram_counts
= collections
.defaultdict(int)
30 with
open('count_2l.txt', 'r') as f
:
32 (bigram
, count
) = line
.split("\t")
33 english_bigram_counts
[bigram
] = int(count
)
34 normalised_english_bigram_counts
= norms
.normalise(english_bigram_counts
)
36 with
open('words.txt', 'r') as f
:
37 keywords
= [line
.rstrip() for line
in f
]
39 modular_division_table
= [[0]*26 for x
in range(26)]
43 modular_division_table
[b
][c
] = a
47 """Remove all non-alphabetic characters and convert the text to lowercase
49 >>> sanitise('The Quick')
51 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
52 'thequickbrownfoxjumpedoverthelazydog'
54 sanitised
= [c
.lower() for c
in text
if c
in string
.ascii_letters
]
55 return ''.join(sanitised
)
58 """Returns all n-grams of a text
60 >>> ngrams(sanitise('the quick brown fox'), 2)
61 ['th', 'he', 'eq', 'qu', 'ui', 'ic', 'ck', 'kb', 'br', 'ro', 'ow', 'wn', 'nf', 'fo', 'ox']
62 >>> ngrams(sanitise('the quick brown fox'), 4)
63 ['theq', 'hequ', 'equi', 'quic', 'uick', 'ickb', 'ckbr', 'kbro', 'brow', 'rown', 'ownf', 'wnfo', 'nfox']
65 return [text
[i
:i
+n
] for i
in range(len(text
)-n
+1)]
67 def every_nth(text
, n
):
68 """Returns n strings, each of which consists of every nth character,
69 starting with the 0th, 1st, 2nd, ... (n-1)th character
71 >>> every_nth(string.ascii_lowercase, 5)
72 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
73 >>> every_nth(string.ascii_lowercase, 1)
74 ['abcdefghijklmnopqrstuvwxyz']
75 >>> every_nth(string.ascii_lowercase, 26)
76 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
78 split_text
= [text
[i
:i
+n
] for i
in range(0, len(text
), n
)]
79 return [''.join(l
) for l
in zip_longest(*split_text
, fillvalue
='')]
81 def combine_every_nth(split_text
):
82 """Reforms a text split into every_nth strings
84 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
85 'abcdefghijklmnopqrstuvwxyz'
86 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
87 'abcdefghijklmnopqrstuvwxyz'
88 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
89 'abcdefghijklmnopqrstuvwxyz'
91 return ''.join([''.join(l
) for l
in zip_longest(*split_text
, fillvalue
='')])
94 def frequencies(text
):
95 """Count the number of occurrences of each character in text
97 >>> sorted(frequencies('abcdefabc').items())
98 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
99 >>> sorted(frequencies('the quick brown fox jumped over the lazy dog').items())
100 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
101 >>> sorted(frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
102 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
103 >>> sorted(frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
104 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
106 counts
= collections
.defaultdict(int)
110 letter_frequencies
= frequencies
112 def deduplicate(text
):
113 return list(collections
.OrderedDict
.fromkeys(text
))
117 def caesar_encipher_letter(letter
, shift
):
118 """Encipher a letter, given a shift amount
120 >>> caesar_encipher_letter('a', 1)
122 >>> caesar_encipher_letter('a', 2)
124 >>> caesar_encipher_letter('b', 2)
126 >>> caesar_encipher_letter('x', 2)
128 >>> caesar_encipher_letter('y', 2)
130 >>> caesar_encipher_letter('z', 2)
132 >>> caesar_encipher_letter('z', -1)
134 >>> caesar_encipher_letter('a', -1)
137 if letter
in string
.ascii_letters
:
138 if letter
in string
.ascii_uppercase
:
139 alphabet_start
= ord('A')
141 alphabet_start
= ord('a')
142 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) + alphabet_start
)
146 def caesar_decipher_letter(letter
, shift
):
147 """Decipher a letter, given a shift amount
149 >>> caesar_decipher_letter('b', 1)
151 >>> caesar_decipher_letter('b', 2)
154 return caesar_encipher_letter(letter
, -shift
)
156 def caesar_encipher(message
, shift
):
157 """Encipher a message with the Caesar cipher of given shift
159 >>> caesar_encipher('abc', 1)
161 >>> caesar_encipher('abc', 2)
163 >>> caesar_encipher('abcxyz', 2)
165 >>> caesar_encipher('ab cx yz', 2)
168 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
169 return ''.join(enciphered
)
171 def caesar_decipher(message
, shift
):
172 """Encipher a message with the Caesar cipher of given shift
174 >>> caesar_decipher('bcd', 1)
176 >>> caesar_decipher('cde', 2)
178 >>> caesar_decipher('cd ez ab', 2)
181 return caesar_encipher(message
, -shift
)
183 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
184 """Encipher a letter, given a multiplier and adder
186 >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
187 'HKNQTWZCFILORUXADGJMPSVYBE'
188 >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
189 'FILORUXADGJMPSVYBEHKNQTWZC'
191 if letter
in string
.ascii_letters
:
192 if letter
in string
.ascii_uppercase
:
193 alphabet_start
= ord('A')
195 alphabet_start
= ord('a')
196 letter_number
= ord(letter
) - alphabet_start
197 if one_based
: letter_number
+= 1
198 cipher_number
= (letter_number
* multiplier
+ adder
) % 26
199 if one_based
: cipher_number
-= 1
200 return chr(cipher_number
% 26 + alphabet_start
)
204 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
205 """Encipher a letter, given a multiplier and adder
207 >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
208 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
209 >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
210 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
212 if letter
in string
.ascii_letters
:
213 if letter
in string
.ascii_uppercase
:
214 alphabet_start
= ord('A')
216 alphabet_start
= ord('a')
217 cipher_number
= ord(letter
) - alphabet_start
218 if one_based
: cipher_number
+= 1
219 plaintext_number
= modular_division_table
[multiplier
][(cipher_number
- adder
) % 26]
220 if one_based
: plaintext_number
-= 1
221 return chr(plaintext_number
% 26 + alphabet_start
)
225 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
226 """Encipher a message
228 >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
229 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
231 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
232 return ''.join(enciphered
)
234 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
235 """Decipher a message
237 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
238 'hours passed during which jerico tried every trick he could think of'
240 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
241 return ''.join(enciphered
)
244 def keyword_cipher_alphabet_of(keyword
, wrap_alphabet
=0):
245 """Find the cipher alphabet given a keyword.
246 wrap_alphabet controls how the rest of the alphabet is added
249 1 : from the last letter in the sanitised keyword
250 2 : from the largest letter in the sanitised keyword
252 >>> keyword_cipher_alphabet_of('bayes')
253 'bayescdfghijklmnopqrtuvwxz'
254 >>> keyword_cipher_alphabet_of('bayes', 0)
255 'bayescdfghijklmnopqrtuvwxz'
256 >>> keyword_cipher_alphabet_of('bayes', 1)
257 'bayestuvwxzcdfghijklmnopqr'
258 >>> keyword_cipher_alphabet_of('bayes', 2)
259 'bayeszcdfghijklmnopqrtuvwx'
261 if wrap_alphabet
== 0:
262 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) + string
.ascii_lowercase
))
264 if wrap_alphabet
== 1:
265 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
267 last_keyword_letter
= sorted(sanitise(keyword
))[-1]
268 last_keyword_position
= string
.ascii_lowercase
.find(last_keyword_letter
) + 1
269 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) +
270 string
.ascii_lowercase
[last_keyword_position
:] +
271 string
.ascii_lowercase
))
272 return cipher_alphabet
275 def keyword_encipher(message
, keyword
, wrap_alphabet
=0):
276 """Enciphers a message with a keyword substitution cipher.
277 wrap_alphabet controls how the rest of the alphabet is added
280 1 : from the last letter in the sanitised keyword
281 2 : from the largest letter in the sanitised keyword
283 >>> keyword_encipher('test message', 'bayes')
285 >>> keyword_encipher('test message', 'bayes', 0)
287 >>> keyword_encipher('test message', 'bayes', 1)
289 >>> keyword_encipher('test message', 'bayes', 2)
292 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
293 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
294 return message
.lower().translate(cipher_translation
)
296 def keyword_decipher(message
, keyword
, wrap_alphabet
=0):
297 """Deciphers a message with a keyword substitution cipher.
298 wrap_alphabet controls how the rest of the alphabet is added
301 1 : from the last letter in the sanitised keyword
302 2 : from the largest letter in the sanitised keyword
304 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
306 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
308 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
310 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
313 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
314 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
315 return message
.lower().translate(cipher_translation
)
317 def scytale_encipher(message
, rows
):
318 """Enciphers using the scytale transposition cipher.
319 Message is padded with spaces to allow all rows to be the same length.
321 >>> scytale_encipher('thequickbrownfox', 3)
323 >>> scytale_encipher('thequickbrownfox', 4)
325 >>> scytale_encipher('thequickbrownfox', 5)
326 'tubn hirf ecoo qkwx '
327 >>> scytale_encipher('thequickbrownfox', 6)
329 >>> scytale_encipher('thequickbrownfox', 7)
330 'tqcrnx hukof eibwo '
332 if len(message
) % rows
!= 0:
333 message
+= ' '*(rows
- len(message
) % rows
)
334 row_length
= round(len(message
) / rows
)
335 slices
= [message
[i
:i
+row_length
] for i
in range(0, len(message
), row_length
)]
336 return ''.join([''.join(r
) for r
in zip_longest(*slices
, fillvalue
='')])
338 def scytale_decipher(message
, rows
):
339 """Deciphers using the scytale transposition cipher.
340 Assumes the message is padded so that all rows are the same length.
342 >>> scytale_decipher('tcnhkfeboqrxuo iw ', 3)
344 >>> scytale_decipher('tubnhirfecooqkwx', 4)
346 >>> scytale_decipher('tubn hirf ecoo qkwx ', 5)
348 >>> scytale_decipher('tqcrnxhukof eibwo ', 6)
350 >>> scytale_decipher('tqcrnx hukof eibwo ', 7)
353 cols
= round(len(message
) / rows
)
354 columns
= [message
[i
:i
+rows
] for i
in range(0, cols
* rows
, rows
)]
355 return ''.join([''.join(c
) for c
in zip_longest(*columns
, fillvalue
='')])
358 def caesar_break(message
, metric
=norms
.euclidean_distance
, target_counts
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
359 """Breaks a Caesar cipher using frequency analysis
361 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
362 (4, 0.31863952890183...)
363 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
364 (19, 0.42152901235832...)
365 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
366 (13, 0.316029208075451...)
368 sanitised_message
= sanitise(message
)
370 best_fit
= float("inf")
371 for shift
in range(26):
372 plaintext
= caesar_decipher(sanitised_message
, shift
)
373 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
374 fit
= metric(target_counts
, counts
)
375 logger
.debug('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
379 logger
.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift
, best_fit
, caesar_decipher(sanitised_message
, best_shift
)[:50]))
380 return best_shift
, best_fit
382 def affine_break(message
, metric
=norms
.euclidean_distance
, target_counts
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
383 """Breaks an affine cipher using frequency analysis
385 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
386 ((15, 22, True), 0.23570361818655...)
388 sanitised_message
= sanitise(message
)
391 best_one_based
= True
392 best_fit
= float("inf")
393 for one_based
in [True, False]:
394 for multiplier
in range(1, 26, 2):
395 for adder
in range(26):
396 plaintext
= affine_decipher(sanitised_message
, multiplier
, adder
, one_based
)
397 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
398 fit
= metric(target_counts
, counts
)
399 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier
, adder
, one_based
, fit
, plaintext
[:50]))
402 best_multiplier
= multiplier
404 best_one_based
= one_based
405 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier
, best_adder
, best_one_based
, best_fit
, affine_decipher(sanitised_message
, best_multiplier
, best_adder
, best_one_based
)[:50]))
406 return (best_multiplier
, best_adder
, best_one_based
), best_fit
409 def keyword_break(message
, wordlist
=keywords
, metric
=norms
.euclidean_distance
, target_counts
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
410 """Breaks a keyword substitution cipher using a dictionary and frequency analysis
412 >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
413 (('elephant', 1), 0.41643991598441...)
416 best_wrap_alphabet
= True
417 best_fit
= float("inf")
418 for wrap_alphabet
in range(3):
419 for keyword
in wordlist
:
420 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
421 counts
= message_frequency_scaling(letter_frequencies(plaintext
))
422 fit
= metric(target_counts
, counts
)
423 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword
, wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
426 best_keyword
= keyword
427 best_wrap_alphabet
= wrap_alphabet
428 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword
, best_wrap_alphabet
, best_fit
, sanitise(keyword_decipher(message
, best_keyword
))[:50]))
429 return (best_keyword
, best_wrap_alphabet
), best_fit
431 def scytale_break(message
, metric
=norms
.euclidean_distance
, target_counts
=normalised_english_bigram_counts
, message_frequency_scaling
=norms
.normalise
):
432 """Breaks a Scytale cipher
434 >>> scytale_break('tfeulchtrtteehwahsdehneoifeayfsondmwpltmaoalhikotoeredcweatehiplwxsnhooacgorrcrcraotohsgullasenylrendaianeplscdriiotoaek') # doctest: +ELLIPSIS
435 (6, 0.83453041115025...)
438 best_fit
= float("inf")
439 for key
in range(1, 20):
440 if len(message
) % key
== 0:
441 plaintext
= scytale_decipher(message
, key
)
442 counts
= message_frequency_scaling(frequencies(ngrams(sanitise(plaintext
), 2)))
443 fit
= metric(target_counts
, counts
)
444 logger
.debug('Scytale break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(key
, fit
, sanitise(plaintext
)[:50]))
448 logger
.info('Scytale break best fit with key {0} gives fit of {1} and decrypt starting: {2}'.format(best_key
, best_fit
, sanitise(scytale_decipher(message
, best_key
))[:50]))
449 return best_key
, best_fit
452 if __name__
== "__main__":