5 english_counts
= collections
.defaultdict(int)
6 with
open('count_1l.txt', 'r') as f
:
8 (letter
, count
) = line
.split("\t")
9 english_counts
[letter
] = int(count
)
10 normalised_english_counts
= norms
.normalise(english_counts
)
14 """Remove all non-alphabetic characters and convert the text to lowercase
16 >>> sanitise('The Quick')
18 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
19 'thequickbrownfoxjumpedoverthelazydog'
21 sanitised
= [c
.lower() for c
in text
if c
in string
.ascii_letters
]
22 return ''.join(sanitised
)
25 return [tuple(text
[i
:i
+n
]) for i
in range(len(text
)-n
+1)]
27 def letter_frequencies(text
):
28 """Count the number of occurrences of each character in text
30 >>> sorted(letter_frequencies('abcdefabc').items())
31 [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
32 >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
33 [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
34 >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
35 [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
36 >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
37 [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
39 counts
= collections
.defaultdict(int)
44 def caesar_encipher_letter(letter
, shift
):
45 """Encipher a letter, given a shift amount
47 >>> caesar_encipher_letter('a', 1)
49 >>> caesar_encipher_letter('a', 2)
51 >>> caesar_encipher_letter('b', 2)
53 >>> caesar_encipher_letter('x', 2)
55 >>> caesar_encipher_letter('y', 2)
57 >>> caesar_encipher_letter('z', 2)
59 >>> caesar_encipher_letter('z', -1)
61 >>> caesar_encipher_letter('a', -1)
64 if letter
in string
.ascii_letters
:
65 if letter
in string
.ascii_uppercase
:
66 alphabet_start
= ord('A')
68 alphabet_start
= ord('a')
69 return chr(((ord(letter
) - alphabet_start
+ shift
) % 26) + alphabet_start
)
73 def caesar_decipher_letter(letter
, shift
):
74 """Decipher a letter, given a shift amount
76 >>> caesar_decipher_letter('b', 1)
78 >>> caesar_decipher_letter('b', 2)
81 return caesar_encipher_letter(letter
, -shift
)
83 def caesar_encipher(message
, shift
):
84 """Encipher a message with the Caesar cipher of given shift
86 >>> caesar_encipher('abc', 1)
88 >>> caesar_encipher('abc', 2)
90 >>> caesar_encipher('abcxyz', 2)
92 >>> caesar_encipher('ab cx yz', 2)
95 enciphered
= [caesar_encipher_letter(l
, shift
) for l
in message
]
96 return ''.join(enciphered
)
98 def caesar_decipher(message
, shift
):
99 """Encipher a message with the Caesar cipher of given shift
101 >>> caesar_decipher('bcd', 1)
103 >>> caesar_decipher('cde', 2)
105 >>> caesar_decipher('cd ez ab', 2)
108 return caesar_encipher(message
, -shift
)
110 def caesar_break(message
, metric
=norms
.euclidean_distance
, target_frequencies
=normalised_english_counts
, message_frequency_scaling
=norms
.normalise
):
111 """Breaks a Caesar cipher using frequency analysis
114 >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm')
115 (4, 0.3186395289018361)
116 >>> caesar_break('jhzhuhfrqilqhgwrdevwudfwuhdvrqlqjwkhqkdylqjvxemhfwhgwrfulwlflvpwkhhasodqdwlrqrisrzhuwkdwmxulglfdovfl')
117 (3, 0.3290204286173084)
118 >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert')
119 (19, 0.4215290123583277)
120 >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur')
121 (13, 0.31602920807545154)
123 sanitised_message
= sanitise(message
)
125 best_fit
= float("inf")
126 for shift
in range(26):
127 plaintext
= caesar_decipher(sanitised_message
, shift
)
128 frequencies
= message_frequency_scaling(letter_frequencies(plaintext
))
129 fit
= metric(target_frequencies
, frequencies
)
133 return best_shift
, best_fit
136 if __name__
== "__main__":