From: Neil Smith Date: Mon, 10 Mar 2014 20:33:05 +0000 (-0400) Subject: Unaccent letters before enciphering X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=69c9038aaf5cc2f0a758435713f18f3b51bbbe4a;p=cipher-training.git Unaccent letters before enciphering --- diff --git a/cipher.py b/cipher.py index e3c183d..4a12703 100644 --- a/cipher.py +++ b/cipher.py @@ -1,6 +1,7 @@ import string +from language_models import * -def caesar_encipher_letter(letter, shift): +def caesar_encipher_letter(accented_letter, shift): """Encipher a letter, given a shift amount >>> caesar_encipher_letter('a', 1) @@ -19,7 +20,12 @@ def caesar_encipher_letter(letter, shift): 'y' >>> caesar_encipher_letter('a', -1) 'z' + >>> caesar_encipher_letter('A', 1) + 'B' + >>> caesar_encipher_letter('é', 1) + 'f' """ + letter = unaccent(accented_letter) if letter in string.ascii_letters: if letter in string.ascii_uppercase: alphabet_start = ord('A') @@ -51,6 +57,8 @@ def caesar_encipher(message, shift): 'cdezab' >>> caesar_encipher('ab cx yz', 2) 'cd ez ab' + >>> caesar_encipher('Héllo World!', 2) + 'Jgnnq Yqtnf!' """ enciphered = [caesar_encipher_letter(l, shift) for l in message] return ''.join(enciphered) @@ -64,6 +72,8 @@ def caesar_decipher(message, shift): 'abc' >>> caesar_decipher('cd ez ab', 2) 'ab cx yz' + >>> caesar_decipher('Jgnnq Yqtnf!', 2) + 'Hello World!' """ return caesar_encipher(message, -shift) diff --git a/language_models.py b/language_models.py new file mode 100644 index 0000000..1786ebe --- /dev/null +++ b/language_models.py @@ -0,0 +1,26 @@ +import unicodedata + +def unaccent(text): + """Remove all accents from letters. + It does this by converting the unicode string to decomposed compatability + form, dropping all the combining accents, then re-encoding the bytes. + + >>> unaccent('hello') + 'hello' + >>> unaccent('HELLO') + 'HELLO' + >>> unaccent('héllo') + 'hello' + >>> unaccent('héllö') + 'hello' + >>> unaccent('HÉLLÖ') + 'HELLO' + """ + return unicodedata.normalize('NFKD', text).\ + encode('ascii', 'ignore').\ + decode('utf-8') + + +if __name__ == "__main__": + import doctest + doctest.testmod()