5 from itertools
import zip_longest
6 from segment
import segment
11 # c5a = open('2012/5a.ciphertext', 'r').read()
12 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
15 logger
= logging
.getLogger(__name__
)
16 logger
.addHandler(logging
.FileHandler('cipher.log'))
17 logger
.setLevel(logging
.WARNING
)
18 #logger.setLevel(logging.INFO)
20 english_counts
= collections
.defaultdict(int)
21 with
open('count_1l.txt', 'r') as f
:
23 (letter
, count
) = line
.split("\t")
24 english_counts
[letter
] = int(count
)
25 normalised_english_counts
= norms
.normalise(english_counts
)
27 with
open('words.txt', 'r') as f
:
28 keywords
= [line
.rstrip() for line
in f
]
30 modular_division_table
= [[0]*26 for x
in range(26)]
34 modular_division_table
[b
][c
] = a
36 modular_division_table_one_based
= [[0]*27 for x
in range(27)]
39 c
= ((a
* b
)-1) % 26 + 1
40 modular_division_table_one_based
[b
][c
] = a
45 """Remove all non-alphabetic characters and convert the text to lowercase
47 >>> sanitise('The Quick')
49 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
50 'thequickbrownfoxjumpedoverthelazydog'
52 sanitised
= [c
.lower() for c
in text
if c
in string
.ascii_letters
]
53 return ''.join(sanitised
)
56 """Returns all n-grams of a text
58 >>> ngrams(sanitise('the quick brown fox'), 2)
59 [('t', 'h'), ('h', 'e'), ('e', 'q'), ('q', 'u'), ('u', 'i'), ('i', 'c'), ('c', 'k'), ('k', 'b'), ('b', 'r'), ('r', 'o'), ('o', 'w'), ('w', 'n'), ('n', 'f'), ('f', 'o'), ('o', 'x')]
60 >>> ngrams(sanitise('the quick brown fox'), 4)
61 [('t', 'h', 'e', 'q'), ('h', 'e', 'q', 'u'), ('e', 'q', 'u', 'i'), ('q', 'u', 'i', 'c'), ('u', 'i', 'c', 'k'), ('i', 'c', 'k', 'b'), ('c', 'k', 'b', 'r'), ('k', 'b', 'r', 'o'), ('b', 'r', 'o', 'w'), ('r', 'o', 'w', 'n'), ('o', 'w', 'n', 'f'), ('w', 'n', 'f', 'o'), ('n', 'f', 'o', 'x')]
63 return [tuple(text
[i
:i
+n
]) for i
in range(len(text
)-n
+1)]
65 def every_nth(text
, n
):
66 """Returns n strings, each of which consists of every nth character,
67 starting with the 0th, 1st, 2nd, ... (n-1)th character
69 >>> every_nth(string.ascii_lowercase, 5)
70 ['afkpuz', 'bglqv', 'chmrw', 'dinsx', 'ejoty']
71 >>> every_nth(string.ascii_lowercase, 1)
72 ['abcdefghijklmnopqrstuvwxyz']
73 >>> every_nth(string.ascii_lowercase, 26)
74 ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
76 split_text
= [text
[i
:i
+n
] for i
in range(0, len(text
), n
)]
77 return [''.join(l
) for l
in zip_longest(*split_text
, fillvalue
='')]
79 def combine_every_nth(split_text
):
80 """Reforms a text split into every_nth strings
82 >>> combine_every_nth(every_nth(string.ascii_lowercase, 5))
83 'abcdefghijklmnopqrstuvwxyz'
84 >>> combine_every_nth(every_nth(string.ascii_lowercase, 1))
85 'abcdefghijklmnopqrstuvwxyz'
86 >>> combine_every_nth(every_nth(string.ascii_lowercase, 26))
87 'abcdefghijklmnopqrstuvwxyz'
89 return ''.join([''.join(l
) for l
in zip_longest(*split_text
, fillvalue
='')])
92 def letter_frequencies(text
):
93 """Count the number of occurrences of each character in text
95 >>> sorted(letter_frequencies('abcdefabc').items())
96 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
97 >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
98 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
99 >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
100 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
101 >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
102 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
104 counts
= collections
.defaultdict(int)
109 def deduplicate(text
):
110 return list(collections
.OrderedDict
.fromkeys(text
))
114 def caesar_encipher_letter(letter
, shift
):
115 """Encipher a letter, given a shift amount
117 >>> caesar_encipher_letter('a', 1)
119 >>> caesar_encipher_letter('a', 2)
121 >>> caesar_encipher_letter('b', 2)
123 >>> caesar_encipher_letter('x', 2)
125 >>> caesar_encipher_letter('y', 2)
127 >>> caesar_encipher_letter('z', 2)
129 >>> caesar_encipher_letter('z', -1)
131 >>> caesar_encipher_letter('a', -1)
134 if letter
in string
.ascii_letters
:
135 if letter
in string
.ascii_uppercase
:
136 alphabet_start
= ord('A')
138 alphabet_start
= ord('a')
139 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) + alphabet_start
)
143 def caesar_decipher_letter(letter
, shift
):
144 """Decipher a letter, given a shift amount
146 >>> caesar_decipher_letter('b', 1)
148 >>> caesar_decipher_letter('b', 2)
151 return caesar_encipher_letter(letter
, -shift
)
153 def caesar_encipher(message
, shift
):
154 """Encipher a message with the Caesar cipher of given shift
156 >>> caesar_encipher('abc', 1)
158 >>> caesar_encipher('abc', 2)
160 >>> caesar_encipher('abcxyz', 2)
162 >>> caesar_encipher('ab cx yz', 2)
165 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
166 return ''.join(enciphered
)
168 def caesar_decipher(message
, shift
):
169 """Encipher a message with the Caesar cipher of given shift
171 >>> caesar_decipher('bcd', 1)
173 >>> caesar_decipher('cde', 2)
175 >>> caesar_decipher('cd ez ab', 2)
178 return caesar_encipher(message
, -shift
)
180 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
181 """Encipher a letter, given a multiplier and adder
183 >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
184 'HKNQTWZCFILORUXADGJMPSVYBE'
185 >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
186 'FILORUXADGJMPSVYBEHKNQTWZC'
188 if letter
in string
.ascii_letters
:
189 if letter
in string
.ascii_uppercase
:
190 alphabet_start
= ord('A')
192 alphabet_start
= ord('a')
193 letter_number
= ord(letter
) - alphabet_start
196 raw_cipher_number
= (letter_number
* multiplier
+ adder
)
199 cipher_number
= (raw_cipher_number
- 1) % 26
201 cipher_number
= raw_cipher_number
% 26
202 return chr(cipher_number
+ alphabet_start
)
206 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
207 """Encipher a letter, given a multiplier and adder
209 >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
210 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
211 >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
212 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
214 if letter
in string
.ascii_letters
:
215 if letter
in string
.ascii_uppercase
:
216 alphabet_start
= ord('A')
218 alphabet_start
= ord('a')
219 cipher_number
= ord(letter
) - alphabet_start
224 plaintext_number
= (modular_division_table_one_based
[multiplier
][(cipher_number
- adder
+ 26) % 26] - 1) % 26
226 #plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
227 plaintext_number
= modular_division_table
[multiplier
][(cipher_number
- adder
+ 26) % 26]
228 return chr(plaintext_number
+ alphabet_start
)
232 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
233 """Encipher a message
235 >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
236 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
238 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
239 return ''.join(enciphered
)
241 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
242 """Decipher a message
244 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
245 'hours passed during which jerico tried every trick he could think of'
247 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
248 return ''.join(enciphered
)
251 def keyword_cipher_alphabet_of(keyword
, wrap_alphabet
=0):
252 """Find the cipher alphabet given a keyword.
253 wrap_alphabet controls how the rest of the alphabet is added
256 1 : from the last letter in the sanitised keyword
257 2 : from the largest letter in the sanitised keyword
259 >>> keyword_cipher_alphabet_of('bayes')
260 'bayescdfghijklmnopqrtuvwxz'
261 >>> keyword_cipher_alphabet_of('bayes', 0)
262 'bayescdfghijklmnopqrtuvwxz'
263 >>> keyword_cipher_alphabet_of('bayes', 1)
264 'bayestuvwxzcdfghijklmnopqr'
265 >>> keyword_cipher_alphabet_of('bayes', 2)
266 'bayeszcdfghijklmnopqrtuvwx'
268 if wrap_alphabet
== 0:
269 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) + string
.ascii_lowercase
))
271 if wrap_alphabet
== 1:
272 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
274 last_keyword_letter
= sorted(sanitise(keyword
))[-1]
275 last_keyword_position
= string
.ascii_lowercase
.find(last_keyword_letter
) + 1
276 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) +
277 string
.ascii_lowercase
[last_keyword_position
:] +
278 string
.ascii_lowercase
))
279 return cipher_alphabet
282 def keyword_encipher(message
, keyword
, wrap_alphabet
=0):
283 """Enciphers a message with a keyword substitution cipher.
284 wrap_alphabet controls how the rest of the alphabet is added
287 1 : from the last letter in the sanitised keyword
288 2 : from the largest letter in the sanitised keyword
290 >>> keyword_encipher('test message', 'bayes')
292 >>> keyword_encipher('test message', 'bayes', 0)
294 >>> keyword_encipher('test message', 'bayes', 1)
296 >>> keyword_encipher('test message', 'bayes', 2)
299 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
300 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
301 return message
.lower().translate(cipher_translation
)
303 def keyword_decipher(message
, keyword
, wrap_alphabet
=0):
304 """Deciphers a message with a keyword substitution cipher.
305 wrap_alphabet controls how the rest of the alphabet is added
308 1 : from the last letter in the sanitised keyword
309 2 : from the largest letter in the sanitised keyword
311 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
313 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
315 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
317 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
320 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
321 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
322 return message
.lower().translate(cipher_translation
)
325 def caesar_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
326 """Breaks a Caesar cipher using frequency analysis
328 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
329 (4, 0.31863952890183...)
330 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
331 (19, 0.42152901235832...)
332 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
333 (13, 0.316029208075451...)
335 sanitised_message
= sanitise(message
)
337 best_fit
= float("inf")
338 for shift
in range(26):
339 plaintext
= caesar_decipher(sanitised_message
, shift
)
340 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
341 fit
= metric(target_frequencies
, frequencies
)
342 logger
.debug('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
346 logger
.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift
, best_fit
, caesar_decipher(sanitised_message
, best_shift
)[:50]))
347 return best_shift
, best_fit
349 def affine_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
350 """Breaks an affine cipher using frequency analysis
352 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
353 ((15, 22, True), 0.23570361818655...)
355 sanitised_message
= sanitise(message
)
358 best_one_based
= True
359 best_fit
= float("inf")
360 for one_based
in [True, False]:
361 for multiplier
in range(1, 26, 2):
362 for adder
in range(26):
363 plaintext
= affine_decipher(sanitised_message
, multiplier
, adder
, one_based
)
364 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
365 fit
= metric(target_frequencies
, frequencies
)
366 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier
, adder
, one_based
, fit
, plaintext
[:50]))
369 best_multiplier
= multiplier
371 best_one_based
= one_based
372 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier
, best_adder
, best_one_based
, best_fit
, affine_decipher(sanitised_message
, best_multiplier
, best_adder
, best_one_based
)[:50]))
373 return (best_multiplier
, best_adder
, best_one_based
), best_fit
376 def keyword_break(message
, wordlist
=keywords
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
377 """Breaks a keyword substitution cipher using a dictionary and frequency analysis
379 >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
380 (('elephant', 1), 0.41643991598441...)
383 best_wrap_alphabet
= True
384 best_fit
= float("inf")
385 for wrap_alphabet
in range(3):
386 for keyword
in wordlist
:
387 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
388 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
389 fit
= metric(target_frequencies
, frequencies
)
390 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword
, wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
393 best_keyword
= keyword
394 best_wrap_alphabet
= wrap_alphabet
395 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword
, best_wrap_alphabet
, best_fit
, sanitise(keyword_decipher(message
, best_keyword
))[:50]))
396 return (best_keyword
, best_wrap_alphabet
), best_fit
399 if __name__
== "__main__":