From: Neil Smith Date: Fri, 14 Mar 2014 11:54:05 +0000 (+0000) Subject: Updated lettercount.py X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=cab0d48b8fa6a93ddbdc2ceb8687b14a5377e084;p=cipher-training.git Updated lettercount.py --- diff --git a/lettercount.py b/lettercount.py index 4a7082d..18f75c4 100644 --- a/lettercount.py +++ b/lettercount.py @@ -1,21 +1,15 @@ +from language_models import sanitise import collections -import string - -def sanitise(text): - return [l.lower() for l in text if l in string.ascii_letters] corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt'] -counts = collections.defaultdict(int) +counts = collections.Counter() for corpus in corpora: - text = sanitise(open(corpus, 'r').read()) - for letter in text: - counts[letter] += 1 + text = sanitise(open(corpus).read()) + counts.update(text) sorted_letters = sorted(counts, key=counts.get, reverse=True) with open('count_1l.txt', 'w') as f: for l in sorted_letters: - f.write("{0}\t{1}\n".format(l, counts[l])) - - \ No newline at end of file + f.write("{}\t{}\n".format(l, counts[l]))