X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=cipher.py;h=5a8cfa22bd788981557c5fe733467e5ec2dc116e;hb=269665fe76e7aeb87165a87d3a1cbb755a5e3768;hp=3f9e29087408202e91b0294f231bea9edfc0054a;hpb=d304015036dd0b45572d52a0a9acf3bf3f1e7e63;p=cipher-tools.git diff --git a/cipher.py b/cipher.py index 3f9e290..5a8cfa2 100644 --- a/cipher.py +++ b/cipher.py @@ -6,7 +6,7 @@ import logging logger = logging.getLogger(__name__) logger.addHandler(logging.FileHandler('cipher.log')) logger.setLevel(logging.WARNING) -logger.setLevel(logging.INFO) +#logger.setLevel(logging.INFO) english_counts = collections.defaultdict(int) with open('count_1l.txt', 'r') as f: @@ -15,6 +15,10 @@ with open('count_1l.txt', 'r') as f: english_counts[letter] = int(count) normalised_english_counts = norms.normalise(english_counts) +keywords = [] +with open('words.txt', 'r') as f: + keywords = [line.rstrip() for line in f] + modular_division_table = [[0]*26 for x in range(26)] for a in range(26): @@ -42,6 +46,13 @@ def sanitise(text): return ''.join(sanitised) def ngrams(text, n): + """Returns all n-grams of a text + + >>> ngrams(sanitise('the quick brown fox'), 2) + [('t', 'h'), ('h', 'e'), ('e', 'q'), ('q', 'u'), ('u', 'i'), ('i', 'c'), ('c', 'k'), ('k', 'b'), ('b', 'r'), ('r', 'o'), ('o', 'w'), ('w', 'n'), ('n', 'f'), ('f', 'o'), ('o', 'x')] + >>> ngrams(sanitise('the quick brown fox'), 4) + [('t', 'h', 'e', 'q'), ('h', 'e', 'q', 'u'), ('e', 'q', 'u', 'i'), ('q', 'u', 'i', 'c'), ('u', 'i', 'c', 'k'), ('i', 'c', 'k', 'b'), ('c', 'k', 'b', 'r'), ('k', 'b', 'r', 'o'), ('b', 'r', 'o', 'w'), ('r', 'o', 'w', 'n'), ('o', 'w', 'n', 'f'), ('w', 'n', 'f', 'o'), ('n', 'f', 'o', 'x')] + """ return [tuple(text[i:i+n]) for i in range(len(text)-n+1)] def letter_frequencies(text): @@ -61,6 +72,11 @@ def letter_frequencies(text): counts[c] += 1 return counts +def deduplicate(text): + return list(collections.OrderedDict.fromkeys(text)) + + + def caesar_encipher_letter(letter, shift): """Encipher a letter, given a shift amount @@ -197,6 +213,17 @@ def affine_decipher(message, multiplier=1, adder=0, one_based=True): return ''.join(enciphered) +def keyword_encipher(message, keyword): + cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase)) + cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet) + return message.lower().translate(cipher_translation) + +def keyword_decipher(message, keyword): + cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase)) + cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase) + return message.lower().translate(cipher_translation) + + def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise): """Breaks a Caesar cipher using frequency analysis @@ -248,6 +275,21 @@ def affine_break(message, metric=norms.euclidean_distance, target_frequencies=no return (best_multiplier, best_adder, best_one_based), best_fit +def keyword_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise): + best_keyword = '' + best_fit = float("inf") + for keyword in keywords: + plaintext = keyword_decipher(message, keyword) + frequencies = message_frequency_scaling(letter_frequencies(plaintext)) + fit = metric(target_frequencies, frequencies) + logger.info('Keyword break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(keyword, fit, plaintext[:50])) + if fit < best_fit: + best_fit = fit + best_keyword = keyword + logger.info('Keyword break best fit with key {0} gives fit of {1} and decrypt starting: {2}'.format(best_keyword, best_fit, keyword_decipher(message, best_keyword)[:50])) + return best_keyword, best_fit + + if __name__ == "__main__": import doctest doctest.testmod()