From: Neil Smith Date: Mon, 14 Jul 2014 19:31:03 +0000 (+0100) Subject: Word segmentation X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=d1fc579636b7f9bda757beccd1d5d4fcdf1fa71a;p=cipher-training.git Word segmentation --- diff --git a/cipher.py b/cipher.py index c5e3938..5398714 100644 --- a/cipher.py +++ b/cipher.py @@ -225,90 +225,6 @@ def affine_decipher(message, multiplier=1, adder=0, one_based=True): return ''.join(enciphered) -class KeywordWrapAlphabet(Enum): - """Ways of wrapping the alphabet for keyword-based substitution ciphers.""" - from_a = 1 - from_last = 2 - from_largest = 3 - - -def keyword_cipher_alphabet_of(keyword, - wrap_alphabet=KeywordWrapAlphabet.from_a): - """Find the cipher alphabet given a keyword. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - - >>> keyword_cipher_alphabet_of('bayes') - 'bayescdfghijklmnopqrtuvwxz' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_a) - 'bayescdfghijklmnopqrtuvwxz' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_last) - 'bayestuvwxzcdfghijklmnopqr' - >>> keyword_cipher_alphabet_of('bayes', KeywordWrapAlphabet.from_largest) - 'bayeszcdfghijklmnopqrtuvwx' - """ - if wrap_alphabet == KeywordWrapAlphabet.from_a: - cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + - string.ascii_lowercase)) - else: - if wrap_alphabet == KeywordWrapAlphabet.from_last: - last_keyword_letter = deduplicate(sanitise(keyword))[-1] - else: - last_keyword_letter = sorted(sanitise(keyword))[-1] - last_keyword_position = string.ascii_lowercase.find( - last_keyword_letter) + 1 - cipher_alphabet = ''.join( - deduplicate(sanitise(keyword) + - string.ascii_lowercase[last_keyword_position:] + - string.ascii_lowercase)) - return cipher_alphabet - - -def keyword_encipher(message, keyword, - wrap_alphabet=KeywordWrapAlphabet.from_a): - """Enciphers a message with a keyword substitution cipher. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - 0 : from 'a' - 1 : from the last letter in the sanitised keyword - 2 : from the largest letter in the sanitised keyword - - >>> keyword_encipher('test message', 'bayes') - 'rsqr ksqqbds' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_a) - 'rsqr ksqqbds' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_last) - 'lskl dskkbus' - >>> keyword_encipher('test message', 'bayes', KeywordWrapAlphabet.from_largest) - 'qspq jsppbcs' - """ - cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) - cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) - return unaccent(message).lower().translate(cipher_translation) - -def keyword_decipher(message, keyword, - wrap_alphabet=KeywordWrapAlphabet.from_a): - """Deciphers a message with a keyword substitution cipher. - wrap_alphabet controls how the rest of the alphabet is added - after the keyword. - 0 : from 'a' - 1 : from the last letter in the sanitised keyword - 2 : from the largest letter in the sanitised keyword - - >>> keyword_decipher('rsqr ksqqbds', 'bayes') - 'test message' - >>> keyword_decipher('rsqr ksqqbds', 'bayes', KeywordWrapAlphabet.from_a) - 'test message' - >>> keyword_decipher('lskl dskkbus', 'bayes', KeywordWrapAlphabet.from_last) - 'test message' - >>> keyword_decipher('qspq jsppbcs', 'bayes', KeywordWrapAlphabet.from_largest) - 'test message' - """ - cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet) - cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase) - return message.lower().translate(cipher_translation) - - if __name__ == "__main__": import doctest doctest.testmod()