cipher.py

   1 import string
   2 import collections
   3 import norms
   4
   5 english_counts = collections.defaultdict(int)
   6 with open('count_1l.txt', 'r') as f:
   7     for line in f:
   8         (letter, count) = line.split("\t")
   9         english_counts[letter] = int(count)
  10 normalised_english_counts = norms.normalise(english_counts)
  11
  12
  13 def sanitise(text):
  14     """Remove all non-alphabetic characters and convert the text to lowercase
  15
  16     >>> sanitise('The Quick')
  17     'thequick'
  18     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  19     'thequickbrownfoxjumpedoverthelazydog'
  20     """
  21     sanitised = [c.lower() for c in text if c in string.ascii_letters]
  22     return ''.join(sanitised)
  23
  24 def ngrams(text, n):
  25     return [tuple(text[i:i+n]) for i in range(len(text)-n+1)]
  26
  27 def letter_frequencies(text):
  28     """Count the number of occurrences of each character in text
  29
  30     >>> sorted(letter_frequencies('abcdefabc').items())
  31     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
  32     >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
  33     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
  34     >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
  35     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
  36     >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
  37     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
  38     """
  39     counts = collections.defaultdict(int)
  40     for c in text:
  41         counts[c] += 1
  42     return counts
  43
  44 def caesar_encipher_letter(letter, shift):
  45     """Encipher a letter, given a shift amount
  46
  47     >>> caesar_encipher_letter('a', 1)
  48     'b'
  49     >>> caesar_encipher_letter('a', 2)
  50     'c'
  51     >>> caesar_encipher_letter('b', 2)
  52     'd'
  53     >>> caesar_encipher_letter('x', 2)
  54     'z'
  55     >>> caesar_encipher_letter('y', 2)
  56     'a'
  57     >>> caesar_encipher_letter('z', 2)
  58     'b'
  59     >>> caesar_encipher_letter('z', -1)
  60     'y'
  61     >>> caesar_encipher_letter('a', -1)
  62     'z'
  63     """
  64     if letter in string.ascii_letters:
  65         if letter in string.ascii_uppercase:
  66             alphabet_start = ord('A')
  67         else:
  68             alphabet_start = ord('a')
  69         return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start)
  70     else:
  71         return letter
  72
  73 def caesar_decipher_letter(letter, shift):
  74     """Decipher a letter, given a shift amount
  75
  76     >>> caesar_decipher_letter('b', 1)
  77     'a'
  78     >>> caesar_decipher_letter('b', 2)
  79     'z'
  80     """
  81     return caesar_encipher_letter(letter, -shift)
  82
  83 def caesar_encipher(message, shift):
  84     """Encipher a message with the Caesar cipher of given shift
  85
  86     >>> caesar_encipher('abc', 1)
  87     'bcd'
  88     >>> caesar_encipher('abc', 2)
  89     'cde'
  90     >>> caesar_encipher('abcxyz', 2)
  91     'cdezab'
  92     >>> caesar_encipher('ab cx yz', 2)
  93     'cd ez ab'
  94     """
  95     enciphered = [caesar_encipher_letter(l, shift) for l in message]
  96     return ''.join(enciphered)
  97
  98 def caesar_decipher(message, shift):
  99     """Encipher a message with the Caesar cipher of given shift
 100
 101     >>> caesar_decipher('bcd', 1)
 102     'abc'
 103     >>> caesar_decipher('cde', 2)
 104     'abc'
 105     >>> caesar_decipher('cd ez ab', 2)
 106     'ab cx yz'
 107     """
 108     return caesar_encipher(message, -shift)
 109
 110 def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
 111     """Breaks a Caesar cipher using frequency analysis
 112
 113
 114     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm')
 115     (4, 0.3186395289018361)
 116     >>> caesar_break('jhzhuhfrqilqhgwrdevwudfwuhdvrqlqjwkhqkdylqjvxemhfwhgwrfulwlflvpwkhhasodqdwlrqrisrzhuwkdwmxulglfdovfl')
 117     (3, 0.32902042861730835)
 118     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert')
 119     (19, 0.4215290123583277)
 120     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur')
 121     (13, 0.31602920807545154)
 122     """
 123     sanitised_message = sanitise(message)
 124     best_shift = 0
 125     best_fit = float("inf")
 126     for shift in range(26):
 127         plaintext = caesar_decipher(sanitised_message, shift)
 128         frequencies = message_frequency_scaling(letter_frequencies(plaintext))
 129         fit = metric(target_frequencies, frequencies)
 130         if fit < best_fit:
 131             best_fit = fit
 132             best_shift = shift
 133     return best_shift, best_fit
 134
 135
 136 if __name__ == "__main__":
 137     import doctest
 138     doctest.testmod()