X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=support%2Flettercount.py;fp=support%2Flettercount.py;h=4a7082d1068669762d1c8526c761382d07ed6182;hb=311b300d197536622980f7a837294d8245e326b4;hp=0000000000000000000000000000000000000000;hpb=d7224fba67d9f99c01bd78ef669c96189686e4c2;p=cipher-tools.git diff --git a/support/lettercount.py b/support/lettercount.py new file mode 100644 index 0000000..4a7082d --- /dev/null +++ b/support/lettercount.py @@ -0,0 +1,21 @@ +import collections +import string + +def sanitise(text): + return [l.lower() for l in text if l in string.ascii_letters] + +corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt'] +counts = collections.defaultdict(int) + +for corpus in corpora: + text = sanitise(open(corpus, 'r').read()) + for letter in text: + counts[letter] += 1 + +sorted_letters = sorted(counts, key=counts.get, reverse=True) + +with open('count_1l.txt', 'w') as f: + for l in sorted_letters: + f.write("{0}\t{1}\n".format(l, counts[l])) + + \ No newline at end of file