1 """Segment a collection of letters into words"""
5 from functools
import lru_cache
6 sys
.setrecursionlimit(1000000)
10 """Return a list of words that is the best segmentation of text.
12 if not text
: return []
13 candidates
= ([first
]+segment(rest
) for first
, rest
in splits(text
))
14 return max(candidates
, key
=language_models
.Pwords
)
17 def segment_wrong(text
):
18 """Return a list of words that is the best segmentation of text.
20 if not text
: return []
21 candidates
= ([first
]+segment(rest
) for first
, rest
in splits(text
))
22 return max(candidates
, key
=language_models
.Pwords_wrong
)
25 def splits(text
, L
=20):
26 """Return a list of all possible (first, rest) pairs, len(first)<=L.
28 return [(text
[:i
+1], text
[i
+1:])
29 for i
in range(min(len(text
), L
))]