X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=lettercount.py;fp=lettercount.py;h=4a7082d1068669762d1c8526c761382d07ed6182;hb=8e6bdd888ded10a3f7abb660b241f168d617e58d;hp=0000000000000000000000000000000000000000;hpb=e354c8aa351b8b9c0d9f28aa5708883c209809fe;p=cipher-tools.git

diff --git a/lettercount.py b/lettercount.py
new file mode 100644
index 0000000..4a7082d
--- /dev/null
+++ b/lettercount.py
@@ -0,0 +1,21 @@
+import collections
+import string
+
+def sanitise(text):
+    return [l.lower() for l in text if l in string.ascii_letters]
+
+corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
+counts = collections.defaultdict(int)
+
+for corpus in corpora:
+    text = sanitise(open(corpus, 'r').read())
+    for letter in text:
+        counts[letter] += 1
+
+sorted_letters = sorted(counts, key=counts.get, reverse=True)
+
+with open('count_1l.txt', 'w') as f:
+    for l in sorted_letters:
+        f.write("{0}\t{1}\n".format(l, counts[l]))
+        
+    
\ No newline at end of file