projects
/
cipher-training.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
5bcbe17
)
Updated lettercount.py
author
Neil Smith
<neil.git@njae.me.uk>
Fri, 14 Mar 2014 11:54:05 +0000
(11:54 +0000)
committer
Neil Smith
<neil.git@njae.me.uk>
Fri, 14 Mar 2014 11:54:05 +0000
(11:54 +0000)
lettercount.py
patch
|
blob
|
history
diff --git
a/lettercount.py
b/lettercount.py
index 4a7082d1068669762d1c8526c761382d07ed6182..18f75c48db4d8f141f19a645267a86f58b704d5e 100644
(file)
--- a/
lettercount.py
+++ b/
lettercount.py
@@
-1,21
+1,15
@@
+from language_models import sanitise
import collections
import collections
-import string
-
-def sanitise(text):
- return [l.lower() for l in text if l in string.ascii_letters]
corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
corpora = ['shakespeare.txt', 'sherlock-holmes.txt', 'war-and-peace.txt']
-counts = collections.
defaultdict(int
)
+counts = collections.
Counter(
)
for corpus in corpora:
for corpus in corpora:
- text = sanitise(open(corpus, 'r').read())
- for letter in text:
- counts[letter] += 1
+ text = sanitise(open(corpus).read())
+ counts.update(text)
sorted_letters = sorted(counts, key=counts.get, reverse=True)
with open('count_1l.txt', 'w') as f:
for l in sorted_letters:
sorted_letters = sorted(counts, key=counts.get, reverse=True)
with open('count_1l.txt', 'w') as f:
for l in sorted_letters:
- f.write("{0}\t{1}\n".format(l, counts[l]))
-
-
\ No newline at end of file
+ f.write("{}\t{}\n".format(l, counts[l]))