lettercount.py

   1 from language_models import sanitise
   2 import collections
   3
   4 corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
   5 counts = collections.Counter()
   6
   7 for corpus in corpora:
   8     text = sanitise(open(corpus).read())
   9     counts.update(text)
  10
  11 with open('count_1l.txt', 'w') as f:
  12     for l, c in counts.most_common():
  13         f.write("{}\t{}\n".format(l, c))