projects
/
cipher-tools.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Added a couple of other experiments
[cipher-tools.git]
/
support
/
lettercount.py
diff --git
a/support/lettercount.py
b/support/lettercount.py
index 4a7082d1068669762d1c8526c761382d07ed6182..fe1ea08b655d6ff6051520e18486acf5dceb6e95 100644
(file)
--- a/
support/lettercount.py
+++ b/
support/lettercount.py
@@
-1,16
+1,13
@@
import collections
import string
import collections
import string
-
-def sanitise(text):
- return [l.lower() for l in text if l in string.ascii_letters]
+from utilities import sanitise
corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
-counts = collections.
defaultdict(int
)
+counts = collections.
Counter(
)
for corpus in corpora:
text = sanitise(open(corpus, 'r').read())
for corpus in corpora:
text = sanitise(open(corpus, 'r').read())
- for letter in text:
- counts[letter] += 1
+ counts.update(text)
sorted_letters = sorted(counts, key=counts.get, reverse=True)
sorted_letters = sorted(counts, key=counts.get, reverse=True)