All tests now running and passing
[cipher-tools.git] / support / lettercount.py
1 import collections
2 import string
3 from utilities import sanitise
4
5 # def sanitise(text):
6 # return [l.lower() for l in text if l in string.ascii_letters]
7
8 corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
9 counts = collections.defaultdict(int)
10
11 for corpus in corpora:
12 text = sanitise(open(corpus, 'r').read())
13 for letter in text:
14 counts[letter] += 1
15
16 sorted_letters = sorted(counts, key=counts.get, reverse=True)
17
18 with open('count_1l.txt', 'w') as f:
19 for l in sorted_letters:
20 f.write("{0}\t{1}\n".format(l, counts[l]))
21
22