X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=cipher.py;h=85de0a52c33b880653b83b573ff87ee109ec2970;hb=fec4faced2cebd32cc97a844ff416bf46441379a;hp=053635008df52ccdaba6b354dd5c4ddd11462ba5;hpb=26f5de0a23dd94ded412f6f507910ac5e26ea2b6;p=cipher-tools.git diff --git a/cipher.py b/cipher.py index 0536350..85de0a5 100644 --- a/cipher.py +++ b/cipher.py @@ -21,6 +21,9 @@ def sanitise(text): sanitised = [c.lower() for c in text if c in string.ascii_letters] return ''.join(sanitised) +def ngrams(text, n): + return [tuple(text[i:i+n]) for i in range(len(text)-n+1)] + def letter_frequencies(text): """Count the number of occurrences of each character in text @@ -105,10 +108,22 @@ def caesar_decipher(message, shift): return caesar_encipher(message, -shift) def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise): + """Breaks a Caesar cipher using frequency analysis + + + >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') + (4, 0.3186395289018361) + >>> caesar_break('jhzhuhfrqilqhgwrdevwudfwuhdvrqlqjwkhqkdylqjvxemhfwhgwrfulwlflvpwkhhasodqdwlrqrisrzhuwkdwmxulglfdovfl') + (3, 0.3290204286173084) + >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') + (19, 0.4215290123583277) + >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') + (13, 0.31602920807545154) + """ sanitised_message = sanitise(message) best_shift = 0 best_fit = float("inf") - for shift in range(1, 25): + for shift in range(26): plaintext = caesar_decipher(sanitised_message, shift) frequencies = message_frequency_scaling(letter_frequencies(plaintext)) fit = metric(target_frequencies, frequencies)