projects
/
cipher-tools.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Added a couple of other experiments
[cipher-tools.git]
/
support
/
lettercount.py
diff --git
a/support/lettercount.py
b/support/lettercount.py
index c095cefb6a67ab9a9e21d97f85b0370c3a66e0fa..fe1ea08b655d6ff6051520e18486acf5dceb6e95 100644
(file)
--- a/
support/lettercount.py
+++ b/
support/lettercount.py
@@
-2,16
+2,12
@@
import collections
import string
from utilities import sanitise
import string
from utilities import sanitise
-# def sanitise(text):
-# return [l.lower() for l in text if l in string.ascii_letters]
-
corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
-counts = collections.
defaultdict(int
)
+counts = collections.
Counter(
)
for corpus in corpora:
text = sanitise(open(corpus, 'r').read())
for corpus in corpora:
text = sanitise(open(corpus, 'r').read())
- for letter in text:
- counts[letter] += 1
+ counts.update(text)
sorted_letters = sorted(counts, key=counts.get, reverse=True)
sorted_letters = sorted(counts, key=counts.get, reverse=True)