Added development files
[cipher-training.git] / lettercount.py
1 import collections
2 import string
3
4 def sanitise(text):
5 return [l.lower() for l in text if l in string.ascii_letters]
6
7 corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
8 counts = collections.defaultdict(int)
9
10 for corpus in corpora:
11 text = sanitise(open(corpus, 'r').read())
12 for letter in text:
13 counts[letter] += 1
14
15 sorted_letters = sorted(counts, key=counts.get, reverse=True)
16
17 with open('count_1l.txt', 'w') as f:
18 for l in sorted_letters:
19 f.write("{0}\t{1}\n".format(l, counts[l]))
20
21