X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=language_models.py;fp=language_models.py;h=8c98a2e27906ed959b5820f2ef58f38f5dbd157a;hb=833724c23cc4f479a185cdd8dae89ce35964cf7c;hp=5626edbfd03bd802877e7114699437af7dd6164a;hpb=5e31b8928eb08839244c2c36981b50e0f20959a2;p=cipher-tools.git diff --git a/language_models.py b/language_models.py index 5626edb..8c98a2e 100644 --- a/language_models.py +++ b/language_models.py @@ -70,7 +70,9 @@ def unaccent(text): >>> unaccent('HÉLLÖ') 'HELLO' """ - return unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8') + return unicodedata.normalize('NFKD', text).\ + encode('ascii', 'ignore').\ + decode('utf-8') def sanitise(text): """Remove all non-alphabetic characters and convert the text to lowercase