X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=lettercount.py;fp=lettercount.py;h=0000000000000000000000000000000000000000;hb=92ae3192f1ac20bfefacbefa7c2d68f843553e80;hp=956eca1a5b2ba469d66c80612446726cdf8307d4;hpb=20aff345391c6ae2d00a55c586fda31f6f4315d5;p=cipher-training.git diff --git a/lettercount.py b/lettercount.py deleted file mode 100644 index 956eca1..0000000 --- a/lettercount.py +++ /dev/null @@ -1,13 +0,0 @@ -from language_models import sanitise -import collections - -corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt'] -counts = collections.Counter() - -for corpus in corpora: - text = sanitise(open(corpus).read()) - counts.update(text) - -with open('count_1l.txt', 'w') as f: - for l, c in counts.most_common(): - f.write("{}\t{}\n".format(l, c))