1 """Language-specific functions, including models of languages based on data of
9 """Remove all non-alphabetic characters from a text
10 >>> letters('The Quick')
12 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
13 'TheQuickBROWNfoxjumpedoverthelazyDOG'
15 return ''.join([c
for c
in text
if c
in string
.ascii_letters
])
18 """Remove all accents from letters.
19 It does this by converting the unicode string to decomposed compatability
20 form, dropping all the combining accents, then re-encoding the bytes.
33 return unicodedata
.normalize('NFKD', text
).\
34 encode('ascii', 'ignore').\
38 """Remove all non-alphabetic characters and convert the text to lowercase
40 >>> sanitise('The Quick')
42 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
43 'thequickbrownfoxjumpedoverthelazydog'
47 # sanitised = [c.lower() for c in text if c in string.ascii_letters]
48 # return ''.join(sanitised)
49 return letters(unaccent(text
)).lower()
53 if __name__
== "__main__":