X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=cipher.py;h=55d99b6773bef3aac5b752ee8e0395856e3c75a8;hb=5141b041bd618e0339ce04f6abef56665d829b95;hp=053635008df52ccdaba6b354dd5c4ddd11462ba5;hpb=26f5de0a23dd94ded412f6f507910ac5e26ea2b6;p=cipher-tools.git diff --git a/cipher.py b/cipher.py index 0536350..55d99b6 100644 --- a/cipher.py +++ b/cipher.py @@ -21,6 +21,9 @@ def sanitise(text): sanitised = [c.lower() for c in text if c in string.ascii_letters] return ''.join(sanitised) +def ngrams(text, n): + return [tuple(text[i:i+n]) for i in range(len(text)-n+1)] + def letter_frequencies(text): """Count the number of occurrences of each character in text @@ -105,10 +108,21 @@ def caesar_decipher(message, shift): return caesar_encipher(message, -shift) def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise): + """Breaks a Caesar cipher using frequency analysis + + >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') + (4, 0.3186395289018361) + >>> caesar_break('jhzhuhfrqilqhgwrdevwudfwuhdvrqlqjwkhqkdylqjvxemhfwhgwrfulwlflvpwkhhasodqdwlrqrisrzhuwkdwmxulglfdovfl') + (3, 0.3290204286173084) + >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') + (19, 0.4215290123583277) + >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') + (13, 0.31602920807545154) + """ sanitised_message = sanitise(message) best_shift = 0 best_fit = float("inf") - for shift in range(1, 25): + for shift in range(26): plaintext = caesar_decipher(sanitised_message, shift) frequencies = message_frequency_scaling(letter_frequencies(plaintext)) fit = metric(target_frequencies, frequencies)