956eca1a5b2ba469d66c80612446726cdf8307d4
1 from language_models
import sanitise
4 corpora
= ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
5 counts
= collections
.Counter()
8 text
= sanitise(open(corpus
).read())
11 with
open('count_1l.txt', 'w') as f
:
12 for l
, c
in counts
.most_common():
13 f
.write("{}\t{}\n".format(l
, c
))