projects
/
cipher-tools.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Fiddling with cracking dictionaries
[cipher-tools.git]
/
segment.py
diff --git
a/segment.py
b/segment.py
index 08bc0aa84244bf222655defba7b4a0773f87ebcf..dd0b2a8347ee800c4addf996f369ea0293b47bb7 100644
(file)
--- a/
segment.py
+++ b/
segment.py
@@
-1,22
+1,12
@@
-# import re, string, random, glob, operator, heapq
import string
import collections
from math import log10
import itertools
import sys
import string
import collections
from math import log10
import itertools
import sys
+from functools import lru_cache
sys.setrecursionlimit(1000000)
sys.setrecursionlimit(1000000)
-def memo(f):
- "Memoize function f."
- table = {}
- def fmemo(*args):
- if args not in table:
- table[args] = f(*args)
- return table[args]
- fmemo.memo = table
- return fmemo
-
-@memo
+@lru_cache()
def segment(text):
"""Return a list of words that is the best segmentation of text.
"""
def segment(text):
"""Return a list of words that is the best segmentation of text.
"""
@@
-33,7
+23,7
@@
def splits(text, L=20):
def Pwords(words):
"""The Naive Bayes log probability of a sequence of words.
"""
def Pwords(words):
"""The Naive Bayes log probability of a sequence of words.
"""
- return sum(Pw[w] for w in words)
+ return sum(Pw[w
.lower()
] for w in words)
class Pdist(dict):
"""A probability distribution estimated from counts in datafile.
class Pdist(dict):
"""A probability distribution estimated from counts in datafile.