1 """Language-specific functions, including models of languages based on data of
11 """Remove all non-alphabetic characters from a text
12 >>> letters('The Quick')
14 >>> letters('The Quick BROWN fox jumped! over... the (9lazy) DOG')
15 'TheQuickBROWNfoxjumpedoverthelazyDOG'
17 return ''.join([c
for c
in text
if c
in string
.ascii_letters
])
20 """Remove all accents from letters.
21 It does this by converting the unicode string to decomposed compatability
22 form, dropping all the combining accents, then re-encoding the bytes.
35 return unicodedata
.normalize('NFKD', text
).\
36 encode('ascii', 'ignore').\
40 """Remove all non-alphabetic characters and convert the text to lowercase
42 >>> sanitise('The Quick')
44 >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
45 'thequickbrownfoxjumpedoverthelazydog'
49 # sanitised = [c.lower() for c in text if c in string.ascii_letters]
50 # return ''.join(sanitised)
51 return letters(unaccent(text
)).lower()
54 if __name__
== "__main__":