77fb5bae6809406b99918dec984f6df3d6001ff0
1 """Setments a string of characters into words, following a language model.
3 Implementation taken from [Peter Norvig](https://norvig.com/ngrams/ch14.pdf)
7 from functools
import lru_cache
8 from szyfrow
.support
.language_models
import Pwords
10 sys
.setrecursionlimit(1000000)
14 """Return a list of words that is the best segmentation of text.
16 if not text
: return []
17 candidates
= ([first
]+segment(rest
) for first
,rest
in splits(text
))
18 return max(candidates
, key
=Pwords
)
20 def splits(text
, L
=20):
21 """Return a list of all possible (first, rest) pairs, len(first)<=L.
23 return [(text
[:i
+1], text
[i
+1:])
24 for i
in range(min(len(text
), L
))]