Refactored out sanitise, added loading of English letter counts
authorNeil Smith <neil.github@njae.me.uk>
Fri, 11 Oct 2013 10:14:05 +0000 (11:14 +0100)
committerNeil Smith <neil.github@njae.me.uk>
Fri, 11 Oct 2013 10:14:05 +0000 (11:14 +0100)
cipher.py

index c4b28ba321ea4a55953af22f914c5b94a7843b6f..b3ab488db74c3af8090aefcc2396c2d503725064 100644 (file)
--- a/cipher.py
+++ b/cipher.py
@@ -1,6 +1,21 @@
 import string
 import collections
 
+english_counts = collections.defaultdict(int)
+with open('count_1l.txt', 'r') as f:
+    for line in f:
+        (letter, count) = line.split("\t")
+        english_counts[letter] = int(count)
+
+def sanitise(text):
+    sanitised = [c.lower() for c in text if c in string.ascii_letters]
+    return ''.join(sanitised)
+
+def letter_frequencies(message):
+    frequencies = collections.defaultdict(int)
+    for letter in sanitise(message): 
+        frequencies[letter]+=1
+    return frequencies
 
 def caesar_cipher_letter(letter, shift):
     if letter in string.ascii_letters:
@@ -22,11 +37,3 @@ def caesar_cipher_message(message, shift):
 
 def caesar_decipher_message(message, shift):
     return caesar_cipher_message(message, -shift)
-
-def letter_frequencies(message):
-    frequencies = collections.defaultdict(int)
-    for letter in message: 
-        if letter in  string.ascii_letters:
-            frequencies[letter.lower()]+=1
-    return frequencies
-