cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5
   6 logger = logging.getLogger(__name__)
   7 logger.addHandler(logging.FileHandler('cipher.log'))
   8 logger.setLevel(logging.WARNING)
   9 #logger.setLevel(logging.INFO)
  10
  11 english_counts = collections.defaultdict(int)
  12 with open('count_1l.txt', 'r') as f:
  13     for line in f:
  14         (letter, count) = line.split("\t")
  15         english_counts[letter] = int(count)
  16 normalised_english_counts = norms.normalise(english_counts)
  17
  18 keywords = []
  19 with open('words.txt', 'r') as f:
  20     keywords = [line.rstrip() for line in f]
  21
  22
  23 modular_division_table = [[0]*26 for x in range(26)]
  24 for a in range(26):
  25     for b in range(26):
  26         c = (a * b) % 26
  27         modular_division_table[b][c] = a
  28
  29 modular_division_table_one_based = [[0]*27 for x in range(27)]
  30 for a in range(27):
  31     for b in range(27):
  32         c = ((a * b)-1) % 26 + 1
  33         modular_division_table_one_based[b][c] = a
  34
  35
  36
  37 def sanitise(text):
  38     """Remove all non-alphabetic characters and convert the text to lowercase
  39
  40     >>> sanitise('The Quick')
  41     'thequick'
  42     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  43     'thequickbrownfoxjumpedoverthelazydog'
  44     """
  45     sanitised = [c.lower() for c in text if c in string.ascii_letters]
  46     return ''.join(sanitised)
  47
  48 def ngrams(text, n):
  49     """Returns all n-grams of a text
  50
  51     >>> ngrams(sanitise('the quick brown fox'), 2)
  52     [('t', 'h'), ('h', 'e'), ('e', 'q'), ('q', 'u'), ('u', 'i'), ('i', 'c'), ('c', 'k'), ('k', 'b'), ('b', 'r'), ('r', 'o'), ('o', 'w'), ('w', 'n'), ('n', 'f'), ('f', 'o'), ('o', 'x')]
  53     >>> ngrams(sanitise('the quick brown fox'), 4)
  54     [('t', 'h', 'e', 'q'), ('h', 'e', 'q', 'u'), ('e', 'q', 'u', 'i'), ('q', 'u', 'i', 'c'), ('u', 'i', 'c', 'k'), ('i', 'c', 'k', 'b'), ('c', 'k', 'b', 'r'), ('k', 'b', 'r', 'o'), ('b', 'r', 'o', 'w'), ('r', 'o', 'w', 'n'), ('o', 'w', 'n', 'f'), ('w', 'n', 'f', 'o'), ('n', 'f', 'o', 'x')]
  55     """
  56     return [tuple(text[i:i+n]) for i in range(len(text)-n+1)]
  57
  58 def letter_frequencies(text):
  59     """Count the number of occurrences of each character in text
  60
  61     >>> sorted(letter_frequencies('abcdefabc').items())
  62     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
  63     >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
  64     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
  65     >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
  66     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
  67     >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
  68     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
  69     """
  70     counts = collections.defaultdict(int)
  71     for c in text:
  72         counts[c] += 1
  73     return counts
  74
  75 def deduplicate(text):
  76     return list(collections.OrderedDict.fromkeys(text))
  77
  78
  79
  80 def caesar_encipher_letter(letter, shift):
  81     """Encipher a letter, given a shift amount
  82
  83     >>> caesar_encipher_letter('a', 1)
  84     'b'
  85     >>> caesar_encipher_letter('a', 2)
  86     'c'
  87     >>> caesar_encipher_letter('b', 2)
  88     'd'
  89     >>> caesar_encipher_letter('x', 2)
  90     'z'
  91     >>> caesar_encipher_letter('y', 2)
  92     'a'
  93     >>> caesar_encipher_letter('z', 2)
  94     'b'
  95     >>> caesar_encipher_letter('z', -1)
  96     'y'
  97     >>> caesar_encipher_letter('a', -1)
  98     'z'
  99     """
 100     if letter in string.ascii_letters:
 101         if letter in string.ascii_uppercase:
 102             alphabet_start = ord('A')
 103         else:
 104             alphabet_start = ord('a')
 105         return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start)
 106     else:
 107         return letter
 108
 109 def caesar_decipher_letter(letter, shift):
 110     """Decipher a letter, given a shift amount
 111
 112     >>> caesar_decipher_letter('b', 1)
 113     'a'
 114     >>> caesar_decipher_letter('b', 2)
 115     'z'
 116     """
 117     return caesar_encipher_letter(letter, -shift)
 118
 119 def caesar_encipher(message, shift):
 120     """Encipher a message with the Caesar cipher of given shift
 121
 122     >>> caesar_encipher('abc', 1)
 123     'bcd'
 124     >>> caesar_encipher('abc', 2)
 125     'cde'
 126     >>> caesar_encipher('abcxyz', 2)
 127     'cdezab'
 128     >>> caesar_encipher('ab cx yz', 2)
 129     'cd ez ab'
 130     """
 131     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 132     return ''.join(enciphered)
 133
 134 def caesar_decipher(message, shift):
 135     """Encipher a message with the Caesar cipher of given shift
 136
 137     >>> caesar_decipher('bcd', 1)
 138     'abc'
 139     >>> caesar_decipher('cde', 2)
 140     'abc'
 141     >>> caesar_decipher('cd ez ab', 2)
 142     'ab cx yz'
 143     """
 144     return caesar_encipher(message, -shift)
 145
 146 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 147     """Encipher a letter, given a multiplier and adder
 148
 149     >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
 150     'HKNQTWZCFILORUXADGJMPSVYBE'
 151     >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
 152     'FILORUXADGJMPSVYBEHKNQTWZC'
 153     """
 154     if letter in string.ascii_letters:
 155         if letter in string.ascii_uppercase:
 156             alphabet_start = ord('A')
 157         else:
 158             alphabet_start = ord('a')
 159         letter_number = ord(letter) - alphabet_start
 160         if one_based: letter_number += 1
 161         raw_cipher_number = (letter_number * multiplier + adder)
 162         cipher_number = 0
 163         if one_based:
 164             cipher_number = (raw_cipher_number - 1) % 26
 165         else:
 166             cipher_number = raw_cipher_number % 26
 167         return chr(cipher_number + alphabet_start)
 168     else:
 169         return letter
 170
 171 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 172     """Encipher a letter, given a multiplier and adder
 173
 174     >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 175     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 176     >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 177     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 178     """
 179     if letter in string.ascii_letters:
 180         if letter in string.ascii_uppercase:
 181             alphabet_start = ord('A')
 182         else:
 183             alphabet_start = ord('a')
 184         cipher_number = ord(letter) - alphabet_start
 185         if one_based: cipher_number += 1
 186         plaintext_number = 0
 187         if one_based:
 188             plaintext_number = (modular_division_table_one_based[multiplier][(cipher_number - adder + 26) % 26] - 1) % 26
 189         else:
 190             #plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
 191             plaintext_number = modular_division_table[multiplier][(cipher_number - adder + 26) % 26]
 192         return chr(plaintext_number + alphabet_start)
 193     else:
 194         return letter
 195
 196 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 197     """Encipher a message
 198
 199     >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
 200     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 201     """
 202
 203     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) for l in message]
 204     return ''.join(enciphered)
 205
 206 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 207     """Decipher a message
 208
 209     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
 210     'hours passed during which jerico tried every trick he could think of'
 211     """
 212     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) for l in message]
 213     return ''.join(enciphered)
 214
 215
 216 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=False):
 217     """Find the cipher alphabet given a keyword
 218
 219     >>> keyword_cipher_alphabet_of('harry')
 220     'harybcdefgijklmnopqstuvwxz'
 221     >>> keyword_cipher_alphabet_of('harry', True)
 222     'haryzbcdefgijklmnopqstuvwx'
 223     >>> keyword_cipher_alphabet_of('harry', False)
 224     'harybcdefgijklmnopqstuvwxz'
 225     """
 226     cipher_alphabet = ''
 227     if wrap_alphabet:
 228         last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 229         last_keyword_position = string.ascii_lowercase.find(last_keyword_letter) + 1
 230         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase[last_keyword_position:] + string.ascii_lowercase))
 231     else:
 232         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase))
 233     return cipher_alphabet
 234
 235
 236 def keyword_encipher(message, keyword, wrap_alphabet=False):
 237     """Enciphers a message with a keyword substitution cipher
 238
 239     >>> keyword_encipher('test message', 'harry')
 240     'sbqs kbqqhdb'
 241     >>> keyword_encipher('test message', 'harry', True)
 242     'qzpq jzpphcz'
 243     >>> keyword_encipher('test message', 'harry', False)
 244     'sbqs kbqqhdb'
 245     """
 246     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 247     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 248     return message.lower().translate(cipher_translation)
 249
 250 def keyword_decipher(message, keyword, wrap_alphabet=False):
 251     """Deciphers a message with a keyword substitution cipher
 252
 253     >>> keyword_decipher('sbqs kbqqhdb', 'harry')
 254     'test message'
 255     >>> keyword_decipher('qzpq jzpphcz', 'harry', True)
 256     'test message'
 257     >>> keyword_decipher('sbqs kbqqhdb', 'harry', False)
 258     'test message'
 259     """
 260     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 261     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 262     return message.lower().translate(cipher_translation)
 263
 264
 265 def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
 266     """Breaks a Caesar cipher using frequency analysis
 267
 268     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 269     (4, 0.31863952890183...)
 270     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 271     (19, 0.42152901235832...)
 272     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 273     (13, 0.316029208075451...)
 274     """
 275     sanitised_message = sanitise(message)
 276     best_shift = 0
 277     best_fit = float("inf")
 278     for shift in range(26):
 279         plaintext = caesar_decipher(sanitised_message, shift)
 280         frequencies = message_frequency_scaling(letter_frequencies(plaintext))
 281         fit = metric(target_frequencies, frequencies)
 282         logger.debug('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 283         if fit < best_fit:
 284             best_fit = fit
 285             best_shift = shift
 286     logger.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift, best_fit, caesar_decipher(sanitised_message, best_shift)[:50]))
 287     return best_shift, best_fit
 288
 289 def affine_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
 290     """Breaks an affine cipher using frequency analysis
 291
 292     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
 293     ((15, 22, True), 0.23570361818655...)
 294     """
 295     sanitised_message = sanitise(message)
 296     best_multiplier = 0
 297     best_adder = 0
 298     best_one_based = True
 299     best_fit = float("inf")
 300     for one_based in [True, False]:
 301         for multiplier in range(1, 26, 2):
 302             for adder in range(26):
 303                 plaintext = affine_decipher(sanitised_message, multiplier, adder, one_based)
 304                 frequencies = message_frequency_scaling(letter_frequencies(plaintext))
 305                 fit = metric(target_frequencies, frequencies)
 306                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier, adder, one_based, fit, plaintext[:50]))
 307                 if fit < best_fit:
 308                     best_fit = fit
 309                     best_multiplier = multiplier
 310                     best_adder = adder
 311                     best_one_based = one_based
 312     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier, best_adder, best_one_based, best_fit, affine_decipher(sanitised_message, best_multiplier, best_adder, best_one_based)[:50]))
 313     return (best_multiplier, best_adder, best_one_based), best_fit
 314
 315
 316 def keyword_break(message, wordlist=keywords, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
 317     """Breaks a keyword substitution cipher using a dictionary and frequency analysis
 318
 319     >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', True), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 320     (('elephant', True), 0.41643991598441...)
 321     """
 322     best_keyword = ''
 323     best_wrap_alphabet = True
 324     best_fit = float("inf")
 325     for wrap_alphabet in [True, False]:
 326         for keyword in wordlist:
 327             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 328             frequencies = message_frequency_scaling(letter_frequencies(plaintext))
 329             fit = metric(target_frequencies, frequencies)
 330             logger.debug('Keyword break attempt using key {0} ({1}) gives fit of {2} and decrypt starting: {3}'.format(keyword, wrap_alphabet, fit, sanitise(plaintext)[:50]))
 331             if fit < best_fit:
 332                 best_fit = fit
 333                 best_keyword = keyword
 334                 best_wrap_alphabet = wrap_alphabet
 335     logger.info('Keyword break best fit with key {0} ({1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword, best_wrap_alphabet, best_fit, sanitise(keyword_decipher(message, best_keyword))[:50]))
 336     return (best_keyword, best_wrap_alphabet), best_fit
 337
 338
 339 if __name__ == "__main__":
 340     import doctest
 341     doctest.testmod()