5 from segment
import segment
7 logger
= logging
.getLogger(__name__
)
8 logger
.addHandler(logging
.FileHandler('cipher.log'))
9 logger
.setLevel(logging
.WARNING
)
10 #logger.setLevel(logging.INFO)
12 english_counts
= collections
.defaultdict(int)
13 with
open('count_1l.txt', 'r') as f
:
15 (letter
, count
) = line
.split("\t")
16 english_counts
[letter
] = int(count
)
17 normalised_english_counts
= norms
.normalise(english_counts
)
19 with
open('words.txt', 'r') as f
:
20 keywords
= [line
.rstrip() for line
in f
]
22 modular_division_table
= [[0]*26 for x
in range(26)]
26 modular_division_table
[b
][c
] = a
28 modular_division_table_one_based
= [[0]*27 for x
in range(27)]
31 c
= ((a
* b
)-1) % 26 + 1
32 modular_division_table_one_based
[b
][c
] = a
37 """Remove all non-alphabetic characters and convert the text to lowercase
39 >>> sanitise('The Quick')
41 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
42 'thequickbrownfoxjumpedoverthelazydog'
44 sanitised
= [c
.lower() for c
in text
if c
in string
.ascii_letters
]
45 return ''.join(sanitised
)
48 """Returns all n-grams of a text
50 >>> ngrams(sanitise('the quick brown fox'), 2)
51 [('t', 'h'), ('h', 'e'), ('e', 'q'), ('q', 'u'), ('u', 'i'), ('i', 'c'), ('c', 'k'), ('k', 'b'), ('b', 'r'), ('r', 'o'), ('o', 'w'), ('w', 'n'), ('n', 'f'), ('f', 'o'), ('o', 'x')]
52 >>> ngrams(sanitise('the quick brown fox'), 4)
53 [('t', 'h', 'e', 'q'), ('h', 'e', 'q', 'u'), ('e', 'q', 'u', 'i'), ('q', 'u', 'i', 'c'), ('u', 'i', 'c', 'k'), ('i', 'c', 'k', 'b'), ('c', 'k', 'b', 'r'), ('k', 'b', 'r', 'o'), ('b', 'r', 'o', 'w'), ('r', 'o', 'w', 'n'), ('o', 'w', 'n', 'f'), ('w', 'n', 'f', 'o'), ('n', 'f', 'o', 'x')]
55 return [tuple(text
[i
:i
+n
]) for i
in range(len(text
)-n
+1)]
57 def letter_frequencies(text
):
58 """Count the number of occurrences of each character in text
60 >>> sorted(letter_frequencies('abcdefabc').items())
61 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
62 >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
63 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
64 >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
65 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
66 >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
67 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
69 counts
= collections
.defaultdict(int)
74 def deduplicate(text
):
75 return list(collections
.OrderedDict
.fromkeys(text
))
79 def caesar_encipher_letter(letter
, shift
):
80 """Encipher a letter, given a shift amount
82 >>> caesar_encipher_letter('a', 1)
84 >>> caesar_encipher_letter('a', 2)
86 >>> caesar_encipher_letter('b', 2)
88 >>> caesar_encipher_letter('x', 2)
90 >>> caesar_encipher_letter('y', 2)
92 >>> caesar_encipher_letter('z', 2)
94 >>> caesar_encipher_letter('z', -1)
96 >>> caesar_encipher_letter('a', -1)
99 if letter
in string
.ascii_letters
:
100 if letter
in string
.ascii_uppercase
:
101 alphabet_start
= ord('A')
103 alphabet_start
= ord('a')
104 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) + alphabet_start
)
108 def caesar_decipher_letter(letter
, shift
):
109 """Decipher a letter, given a shift amount
111 >>> caesar_decipher_letter('b', 1)
113 >>> caesar_decipher_letter('b', 2)
116 return caesar_encipher_letter(letter
, -shift
)
118 def caesar_encipher(message
, shift
):
119 """Encipher a message with the Caesar cipher of given shift
121 >>> caesar_encipher('abc', 1)
123 >>> caesar_encipher('abc', 2)
125 >>> caesar_encipher('abcxyz', 2)
127 >>> caesar_encipher('ab cx yz', 2)
130 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
131 return ''.join(enciphered
)
133 def caesar_decipher(message
, shift
):
134 """Encipher a message with the Caesar cipher of given shift
136 >>> caesar_decipher('bcd', 1)
138 >>> caesar_decipher('cde', 2)
140 >>> caesar_decipher('cd ez ab', 2)
143 return caesar_encipher(message
, -shift
)
145 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
146 """Encipher a letter, given a multiplier and adder
148 >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
149 'HKNQTWZCFILORUXADGJMPSVYBE'
150 >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
151 'FILORUXADGJMPSVYBEHKNQTWZC'
153 if letter
in string
.ascii_letters
:
154 if letter
in string
.ascii_uppercase
:
155 alphabet_start
= ord('A')
157 alphabet_start
= ord('a')
158 letter_number
= ord(letter
) - alphabet_start
161 raw_cipher_number
= (letter_number
* multiplier
+ adder
)
164 cipher_number
= (raw_cipher_number
- 1) % 26
166 cipher_number
= raw_cipher_number
% 26
167 return chr(cipher_number
+ alphabet_start
)
171 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
172 """Encipher a letter, given a multiplier and adder
174 >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
175 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
176 >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
177 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
179 if letter
in string
.ascii_letters
:
180 if letter
in string
.ascii_uppercase
:
181 alphabet_start
= ord('A')
183 alphabet_start
= ord('a')
184 cipher_number
= ord(letter
) - alphabet_start
189 plaintext_number
= (modular_division_table_one_based
[multiplier
][(cipher_number
- adder
+ 26) % 26] - 1) % 26
191 #plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
192 plaintext_number
= modular_division_table
[multiplier
][(cipher_number
- adder
+ 26) % 26]
193 return chr(plaintext_number
+ alphabet_start
)
197 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
198 """Encipher a message
200 >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
201 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
203 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
204 return ''.join(enciphered
)
206 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
207 """Decipher a message
209 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
210 'hours passed during which jerico tried every trick he could think of'
212 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
213 return ''.join(enciphered
)
216 def keyword_cipher_alphabet_of(keyword
, wrap_alphabet
=0):
217 """Find the cipher alphabet given a keyword.
218 wrap_alphabet controls how the rest of the alphabet is added
221 1 : from the last letter in the sanitised keyword
222 2 : from the largest letter in the sanitised keyword
224 >>> keyword_cipher_alphabet_of('bayes')
225 'bayescdfghijklmnopqrtuvwxz'
226 >>> keyword_cipher_alphabet_of('bayes', 0)
227 'bayescdfghijklmnopqrtuvwxz'
228 >>> keyword_cipher_alphabet_of('bayes', 1)
229 'bayestuvwxzcdfghijklmnopqr'
230 >>> keyword_cipher_alphabet_of('bayes', 2)
231 'bayeszcdfghijklmnopqrtuvwx'
233 if wrap_alphabet
== 0:
234 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) + string
.ascii_lowercase
))
236 if wrap_alphabet
== 1:
237 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
239 last_keyword_letter
= sorted(sanitise(keyword
))[-1]
240 last_keyword_position
= string
.ascii_lowercase
.find(last_keyword_letter
) + 1
241 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) +
242 string
.ascii_lowercase
[last_keyword_position
:] +
243 string
.ascii_lowercase
))
244 return cipher_alphabet
247 def keyword_encipher(message
, keyword
, wrap_alphabet
=0):
248 """Enciphers a message with a keyword substitution cipher.
249 wrap_alphabet controls how the rest of the alphabet is added
252 1 : from the last letter in the sanitised keyword
253 2 : from the largest letter in the sanitised keyword
255 >>> keyword_encipher('test message', 'bayes')
257 >>> keyword_encipher('test message', 'bayes', 0)
259 >>> keyword_encipher('test message', 'bayes', 1)
261 >>> keyword_encipher('test message', 'bayes', 2)
264 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
265 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
266 return message
.lower().translate(cipher_translation
)
268 def keyword_decipher(message
, keyword
, wrap_alphabet
=0):
269 """Deciphers a message with a keyword substitution cipher.
270 wrap_alphabet controls how the rest of the alphabet is added
273 1 : from the last letter in the sanitised keyword
274 2 : from the largest letter in the sanitised keyword
276 >>> keyword_decipher('rsqr ksqqbds', 'bayes')
278 >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
280 >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
282 >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
285 cipher_alphabet
= keyword_cipher_alphabet_of(keyword
, wrap_alphabet
)
286 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
287 return message
.lower().translate(cipher_translation
)
290 def caesar_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
291 """Breaks a Caesar cipher using frequency analysis
293 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
294 (4, 0.31863952890183...)
295 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
296 (19, 0.42152901235832...)
297 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
298 (13, 0.316029208075451...)
300 sanitised_message
= sanitise(message
)
302 best_fit
= float("inf")
303 for shift
in range(26):
304 plaintext
= caesar_decipher(sanitised_message
, shift
)
305 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
306 fit
= metric(target_frequencies
, frequencies
)
307 logger
.debug('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
311 logger
.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift
, best_fit
, caesar_decipher(sanitised_message
, best_shift
)[:50]))
312 return best_shift
, best_fit
314 def affine_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
315 """Breaks an affine cipher using frequency analysis
317 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
318 ((15, 22, True), 0.23570361818655...)
320 sanitised_message
= sanitise(message
)
323 best_one_based
= True
324 best_fit
= float("inf")
325 for one_based
in [True, False]:
326 for multiplier
in range(1, 26, 2):
327 for adder
in range(26):
328 plaintext
= affine_decipher(sanitised_message
, multiplier
, adder
, one_based
)
329 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
330 fit
= metric(target_frequencies
, frequencies
)
331 logger
.debug('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier
, adder
, one_based
, fit
, plaintext
[:50]))
334 best_multiplier
= multiplier
336 best_one_based
= one_based
337 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier
, best_adder
, best_one_based
, best_fit
, affine_decipher(sanitised_message
, best_multiplier
, best_adder
, best_one_based
)[:50]))
338 return (best_multiplier
, best_adder
, best_one_based
), best_fit
341 def keyword_break(message
, wordlist
=keywords
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
342 """Breaks a keyword substitution cipher using a dictionary and frequency analysis
344 >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
345 (('elephant', 1), 0.41643991598441...)
348 best_wrap_alphabet
= True
349 best_fit
= float("inf")
350 for wrap_alphabet
in range(3):
351 for keyword
in wordlist
:
352 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
353 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
354 fit
= metric(target_frequencies
, frequencies
)
355 logger
.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword
, wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
358 best_keyword
= keyword
359 best_wrap_alphabet
= wrap_alphabet
360 logger
.info('Keyword break best fit with key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword
, best_wrap_alphabet
, best_fit
, sanitise(keyword_decipher(message
, best_keyword
))[:50]))
361 return (best_keyword
, best_wrap_alphabet
), best_fit
364 if __name__
== "__main__":