Added doc redirect
[szyfrow.git] / raw_data_processing / lettercount.py
1 import collections
2 import string
3 from szyfrow.support.utilities import sanitise
4
5 corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
6 counts = collections.Counter()
7
8 for corpus in corpora:
9 text = sanitise(open('szyfrow/support/' + corpus, 'r').read())
10 counts.update(text)
11
12 sorted_letters = sorted(counts, key=counts.get, reverse=True)
13
14 with open('szyfrow/support/count_1l.txt', 'w') as f:
15 for l in sorted_letters:
16 f.write("{0}\t{1}\n".format(l, counts[l]))
17
18