Updated lettercount.py
[cipher-training.git] / lettercount.py
1 from language_models import sanitise
2 import collections
3
4 corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
5 counts = collections.Counter()
6
7 for corpus in corpora:
8 text = sanitise(open(corpus).read())
9 counts.update(text)
10
11 sorted_letters = sorted(counts, key=counts.get, reverse=True)
12
13 with open('count_1l.txt', 'w') as f:
14 for l in sorted_letters:
15 f.write("{}\t{}\n".format(l, counts[l]))