6 logger
= logging
.getLogger(__name__
)
7 logger
.addHandler(logging
.FileHandler('cipher.log'))
8 logger
.setLevel(logging
.WARNING
)
9 #logger.setLevel(logging.INFO)
11 english_counts
= collections
.defaultdict(int)
12 with
open('count_1l.txt', 'r') as f
:
14 (letter
, count
) = line
.split("\t")
15 english_counts
[letter
] = int(count
)
16 normalised_english_counts
= norms
.normalise(english_counts
)
19 with
open('words.txt', 'r') as f
:
20 keywords
= [line
.rstrip() for line
in f
]
23 modular_division_table
= [[0]*26 for x
in range(26)]
27 modular_division_table
[b
][c
] = a
29 modular_division_table_one_based
= [[0]*27 for x
in range(27)]
32 c
= ((a
* b
)-1) % 26 + 1
33 modular_division_table_one_based
[b
][c
] = a
38 """Remove all non-alphabetic characters and convert the text to lowercase
40 >>> sanitise('The Quick')
42 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
43 'thequickbrownfoxjumpedoverthelazydog'
45 sanitised
= [c
.lower() for c
in text
if c
in string
.ascii_letters
]
46 return ''.join(sanitised
)
49 """Returns all n-grams of a text
51 >>> ngrams(sanitise('the quick brown fox'), 2)
52 [('t', 'h'), ('h', 'e'), ('e', 'q'), ('q', 'u'), ('u', 'i'), ('i', 'c'), ('c', 'k'), ('k', 'b'), ('b', 'r'), ('r', 'o'), ('o', 'w'), ('w', 'n'), ('n', 'f'), ('f', 'o'), ('o', 'x')]
53 >>> ngrams(sanitise('the quick brown fox'), 4)
54 [('t', 'h', 'e', 'q'), ('h', 'e', 'q', 'u'), ('e', 'q', 'u', 'i'), ('q', 'u', 'i', 'c'), ('u', 'i', 'c', 'k'), ('i', 'c', 'k', 'b'), ('c', 'k', 'b', 'r'), ('k', 'b', 'r', 'o'), ('b', 'r', 'o', 'w'), ('r', 'o', 'w', 'n'), ('o', 'w', 'n', 'f'), ('w', 'n', 'f', 'o'), ('n', 'f', 'o', 'x')]
56 return [tuple(text
[i
:i
+n
]) for i
in range(len(text
)-n
+1)]
58 def letter_frequencies(text
):
59 """Count the number of occurrences of each character in text
61 >>> sorted(letter_frequencies('abcdefabc').items())
62 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
63 >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
64 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
65 >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
66 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
67 >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
68 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
70 counts
= collections
.defaultdict(int)
75 def deduplicate(text
):
76 return list(collections
.OrderedDict
.fromkeys(text
))
80 def caesar_encipher_letter(letter
, shift
):
81 """Encipher a letter, given a shift amount
83 >>> caesar_encipher_letter('a', 1)
85 >>> caesar_encipher_letter('a', 2)
87 >>> caesar_encipher_letter('b', 2)
89 >>> caesar_encipher_letter('x', 2)
91 >>> caesar_encipher_letter('y', 2)
93 >>> caesar_encipher_letter('z', 2)
95 >>> caesar_encipher_letter('z', -1)
97 >>> caesar_encipher_letter('a', -1)
100 if letter
in string
.ascii_letters
:
101 if letter
in string
.ascii_uppercase
:
102 alphabet_start
= ord('A')
104 alphabet_start
= ord('a')
105 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) + alphabet_start
)
109 def caesar_decipher_letter(letter
, shift
):
110 """Decipher a letter, given a shift amount
112 >>> caesar_decipher_letter('b', 1)
114 >>> caesar_decipher_letter('b', 2)
117 return caesar_encipher_letter(letter
, -shift
)
119 def caesar_encipher(message
, shift
):
120 """Encipher a message with the Caesar cipher of given shift
122 >>> caesar_encipher('abc', 1)
124 >>> caesar_encipher('abc', 2)
126 >>> caesar_encipher('abcxyz', 2)
128 >>> caesar_encipher('ab cx yz', 2)
131 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
132 return ''.join(enciphered
)
134 def caesar_decipher(message
, shift
):
135 """Encipher a message with the Caesar cipher of given shift
137 >>> caesar_decipher('bcd', 1)
139 >>> caesar_decipher('cde', 2)
141 >>> caesar_decipher('cd ez ab', 2)
144 return caesar_encipher(message
, -shift
)
146 def affine_encipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
147 """Encipher a letter, given a multiplier and adder
149 >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
150 'HKNQTWZCFILORUXADGJMPSVYBE'
151 >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
152 'FILORUXADGJMPSVYBEHKNQTWZC'
154 if letter
in string
.ascii_letters
:
155 if letter
in string
.ascii_uppercase
:
156 alphabet_start
= ord('A')
158 alphabet_start
= ord('a')
159 letter_number
= ord(letter
) - alphabet_start
160 if one_based
: letter_number
+= 1
161 raw_cipher_number
= (letter_number
* multiplier
+ adder
)
164 cipher_number
= (raw_cipher_number
- 1) % 26
166 cipher_number
= raw_cipher_number
% 26
167 return chr(cipher_number
+ alphabet_start
)
171 def affine_decipher_letter(letter
, multiplier
=1, adder
=0, one_based
=True):
172 """Encipher a letter, given a multiplier and adder
174 >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
175 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
176 >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
177 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
179 if letter
in string
.ascii_letters
:
180 if letter
in string
.ascii_uppercase
:
181 alphabet_start
= ord('A')
183 alphabet_start
= ord('a')
184 cipher_number
= ord(letter
) - alphabet_start
185 if one_based
: cipher_number
+= 1
188 plaintext_number
= (modular_division_table_one_based
[multiplier
][(cipher_number
- adder
+ 26) % 26] - 1) % 26
190 #plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
191 plaintext_number
= modular_division_table
[multiplier
][(cipher_number
- adder
+ 26) % 26]
192 return chr(plaintext_number
+ alphabet_start
)
196 def affine_encipher(message
, multiplier
=1, adder
=0, one_based
=True):
197 """Encipher a message
199 >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
200 'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
203 enciphered
= [affine_encipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
204 return ''.join(enciphered
)
206 def affine_decipher(message
, multiplier
=1, adder
=0, one_based
=True):
207 """Decipher a message
209 >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
210 'hours passed during which jerico tried every trick he could think of'
212 enciphered
= [affine_decipher_letter(l
, multiplier
, adder
, one_based
) for l
in message
]
213 return ''.join(enciphered
)
216 def keyword_encipher(message
, keyword
, wrap_alphabet
=False):
219 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
220 last_keyword_position
= string
.ascii_lowercase
.find(last_keyword_letter
) + 1
221 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) + string
.ascii_lowercase
[last_keyword_position
:] + string
.ascii_lowercase
[:last_keyword_position
]))
223 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) + string
.ascii_lowercase
))
224 cipher_translation
= ''.maketrans(string
.ascii_lowercase
, cipher_alphabet
)
225 return message
.lower().translate(cipher_translation
)
227 def keyword_decipher(message
, keyword
, wrap_alphabet
=False):
230 last_keyword_letter
= deduplicate(sanitise(keyword
))[-1]
231 last_keyword_position
= string
.ascii_lowercase
.find(last_keyword_letter
) + 1
232 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) + string
.ascii_lowercase
[last_keyword_position
:] + string
.ascii_lowercase
[:last_keyword_position
]))
234 cipher_alphabet
= ''.join(deduplicate(sanitise(keyword
) + string
.ascii_lowercase
))
235 #cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase))
236 cipher_translation
= ''.maketrans(cipher_alphabet
, string
.ascii_lowercase
)
237 return message
.lower().translate(cipher_translation
)
240 def caesar_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
241 """Breaks a Caesar cipher using frequency analysis
243 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm')
244 (4, 0.31863952890183...)
245 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert')
246 (19, 0.42152901235832...)
247 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur')
248 (13, 0.316029208075451...)
250 sanitised_message
= sanitise(message
)
252 best_fit
= float("inf")
253 for shift
in range(26):
254 plaintext
= caesar_decipher(sanitised_message
, shift
)
255 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
256 fit
= metric(target_frequencies
, frequencies
)
257 logger
.info('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift
, fit
, plaintext
[:50]))
261 logger
.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift
, best_fit
, caesar_decipher(sanitised_message
, best_shift
)[:50]))
262 return best_shift
, best_fit
264 def affine_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
265 """Breaks an affine cipher using frequency analysis
267 >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.')
268 ((15, 22, True), 0.23570361818655...)
270 sanitised_message
= sanitise(message
)
273 best_one_based
= True
274 best_fit
= float("inf")
275 for one_based
in [True, False]:
276 for multiplier
in range(1, 26, 2):
277 for adder
in range(26):
278 plaintext
= affine_decipher(sanitised_message
, multiplier
, adder
, one_based
)
279 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
280 fit
= metric(target_frequencies
, frequencies
)
281 logger
.info('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier
, adder
, one_based
, fit
, plaintext
[:50]))
284 best_multiplier
= multiplier
286 best_one_based
= one_based
287 logger
.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier
, best_adder
, best_one_based
, best_fit
, affine_decipher(sanitised_message
, best_multiplier
, best_adder
, best_one_based
)[:50]))
288 return (best_multiplier
, best_adder
, best_one_based
), best_fit
291 def keyword_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
293 best_wrap_alphabet
= True
294 best_fit
= float("inf")
295 for wrap_alphabet
in [True, False]:
296 for keyword
in keywords
:
297 plaintext
= keyword_decipher(message
, keyword
, wrap_alphabet
)
298 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
299 fit
= metric(target_frequencies
, frequencies
)
300 logger
.info('Keyword break attempt using key {0} ({1}) gives fit of {2} and decrypt starting: {3}'.format(keyword
, wrap_alphabet
, fit
, sanitise(plaintext
)[:50]))
303 best_keyword
= keyword
304 best_wrap_alphabet
= wrap_alphabet
305 logger
.info('Keyword break best fit with key {0} ({1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword
, best_wrap_alphabet
, best_fit
, sanitise(keyword_decipher(message
, best_keyword
))[:50]))
306 return (best_keyword
, best_wrap_alphabet
), best_fit
309 if __name__
== "__main__":