import collections
from math import log10
import itertools
+import sys
+from functools import lru_cache
+sys.setrecursionlimit(1000000)
-def memo(f):
- "Memoize function f."
- table = {}
- def fmemo(*args):
- if args not in table:
- table[args] = f(*args)
- return table[args]
- fmemo.memo = table
- return fmemo
-
-@memo
+@lru_cache()
def segment(text):
"""Return a list of words that is the best segmentation of text.
"""
def Pwords(words):
"""The Naive Bayes log probability of a sequence of words.
"""
- return sum(Pw[w] for w in words)
+ return sum(Pw[w.lower()] for w in words)
class Pdist(dict):
"""A probability distribution estimated from counts in datafile.
"""
return -log10((N * 10**(len(key) - 2)))
-# N = 1024908267229 ## Number of tokens
-
Pw = Pdist(datafile('count_1w.txt'), avoid_long_words)