X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=cipher.py;h=b07cd250fb8263e338a1b35686f28a583f9bf2a2;hb=0a8d248c149a8499fc9b92e8ea7a5f4780be4d76;hp=c4b28ba321ea4a55953af22f914c5b94a7843b6f;hpb=e6aea9194297e503e6b2887f1756ce5dc4d6f078;p=cipher-tools.git diff --git a/cipher.py b/cipher.py index c4b28ba..b07cd25 100644 --- a/cipher.py +++ b/cipher.py @@ -1,6 +1,36 @@ import string import collections +english_counts = collections.defaultdict(int) +with open('count_1l.txt', 'r') as f: + for line in f: + (letter, count) = line.split("\t") + english_counts[letter] = int(count) + +def sanitise(text): + sanitised = [c.lower() for c in text if c in string.ascii_letters] + return ''.join(sanitised) + +def letter_frequencies(message): + frequencies = collections.defaultdict(int) + for letter in sanitise(message): + frequencies[letter]+=1 + return frequencies + +def scale_freq(frequencies): + total= sum(frequencies.values()) + scaled_frequencies = collections.defaultdict(int) + for letter in frequencies.keys(): + scaled_frequencies[letter] = frequencies[letter] / total + return scaled_frequencies + +def value_diff(frequencies1, frequencies2): + total= 0 + for letter in frequencies1.keys(): + total += abs(frequencies1[letter]-frequencies2[letter]) + return total + + def caesar_cipher_letter(letter, shift): if letter in string.ascii_letters: @@ -23,10 +53,16 @@ def caesar_cipher_message(message, shift): def caesar_decipher_message(message, shift): return caesar_cipher_message(message, -shift) -def letter_frequencies(message): - frequencies = collections.defaultdict(int) - for letter in message: - if letter in string.ascii_letters: - frequencies[letter.lower()]+=1 - return frequencies - +def caesar_break(message): + best_key = 0 + best_fit = float("inf") + for shift in range(26): + plaintxt = caesar_decipher_message(message, shift) + lettertxt = letter_frequencies(plaintxt) + total1 = scale_freq(lettertxt) + total2 = scale_freq(english_counts) + fit = value_diff(total2, total1) + if fit < best_fit: + best_key = shift + best_fit = fit + return best_key