cipher.py

   1 import string
   2 import collections
   3 import norms
   4 import logging
   5
   6 logger = logging.getLogger(__name__)
   7 logger.addHandler(logging.FileHandler('cipher.log'))
   8 logger.setLevel(logging.WARNING)
   9 #logger.setLevel(logging.INFO)
  10
  11 english_counts = collections.defaultdict(int)
  12 with open('count_1l.txt', 'r') as f:
  13     for line in f:
  14         (letter, count) = line.split("\t")
  15         english_counts[letter] = int(count)
  16 normalised_english_counts = norms.normalise(english_counts)
  17
  18 with open('words.txt', 'r') as f:
  19     keywords = [line.rstrip() for line in f]
  20
  21 modular_division_table = [[0]*26 for x in range(26)]
  22 for a in range(26):
  23     for b in range(26):
  24         c = (a * b) % 26
  25         modular_division_table[b][c] = a
  26
  27 modular_division_table_one_based = [[0]*27 for x in range(27)]
  28 for a in range(27):
  29     for b in range(27):
  30         c = ((a * b)-1) % 26 + 1
  31         modular_division_table_one_based[b][c] = a
  32
  33
  34
  35 def sanitise(text):
  36     """Remove all non-alphabetic characters and convert the text to lowercase
  37
  38     >>> sanitise('The Quick')
  39     'thequick'
  40     >>> sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')
  41     'thequickbrownfoxjumpedoverthelazydog'
  42     """
  43     sanitised = [c.lower() for c in text if c in string.ascii_letters]
  44     return ''.join(sanitised)
  45
  46 def ngrams(text, n):
  47     """Returns all n-grams of a text
  48
  49     >>> ngrams(sanitise('the quick brown fox'), 2)
  50     [('t', 'h'), ('h', 'e'), ('e', 'q'), ('q', 'u'), ('u', 'i'), ('i', 'c'), ('c', 'k'), ('k', 'b'), ('b', 'r'), ('r', 'o'), ('o', 'w'), ('w', 'n'), ('n', 'f'), ('f', 'o'), ('o', 'x')]
  51     >>> ngrams(sanitise('the quick brown fox'), 4)
  52     [('t', 'h', 'e', 'q'), ('h', 'e', 'q', 'u'), ('e', 'q', 'u', 'i'), ('q', 'u', 'i', 'c'), ('u', 'i', 'c', 'k'), ('i', 'c', 'k', 'b'), ('c', 'k', 'b', 'r'), ('k', 'b', 'r', 'o'), ('b', 'r', 'o', 'w'), ('r', 'o', 'w', 'n'), ('o', 'w', 'n', 'f'), ('w', 'n', 'f', 'o'), ('n', 'f', 'o', 'x')]
  53     """
  54     return [tuple(text[i:i+n]) for i in range(len(text)-n+1)]
  55
  56 def letter_frequencies(text):
  57     """Count the number of occurrences of each character in text
  58
  59     >>> sorted(letter_frequencies('abcdefabc').items())
  60     [('a', 2), ('b', 2), ('c', 2), ('d', 1), ('e', 1), ('f', 1)]
  61     >>> sorted(letter_frequencies('the quick brown fox jumped over the lazy dog').items())
  62     [(' ', 8), ('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
  63     >>> sorted(letter_frequencies('The Quick BROWN fox jumped! over... the (9lazy) DOG').items())
  64     [(' ', 8), ('!', 1), ('(', 1), (')', 1), ('.', 3), ('9', 1), ('B', 1), ('D', 1), ('G', 1), ('N', 1), ('O', 2), ('Q', 1), ('R', 1), ('T', 1), ('W', 1), ('a', 1), ('c', 1), ('d', 1), ('e', 4), ('f', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('o', 2), ('p', 1), ('r', 1), ('t', 1), ('u', 2), ('v', 1), ('x', 1), ('y', 1), ('z', 1)]
  65     >>> sorted(letter_frequencies(sanitise('The Quick BROWN fox jumped! over... the (9lazy) DOG')).items())
  66     [('a', 1), ('b', 1), ('c', 1), ('d', 2), ('e', 4), ('f', 1), ('g', 1), ('h', 2), ('i', 1), ('j', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1), ('o', 4), ('p', 1), ('q', 1), ('r', 2), ('t', 2), ('u', 2), ('v', 1), ('w', 1), ('x', 1), ('y', 1), ('z', 1)]
  67     """
  68     counts = collections.defaultdict(int)
  69     for c in text:
  70         counts[c] += 1
  71     return counts
  72
  73 def deduplicate(text):
  74     return list(collections.OrderedDict.fromkeys(text))
  75
  76
  77
  78 def caesar_encipher_letter(letter, shift):
  79     """Encipher a letter, given a shift amount
  80
  81     >>> caesar_encipher_letter('a', 1)
  82     'b'
  83     >>> caesar_encipher_letter('a', 2)
  84     'c'
  85     >>> caesar_encipher_letter('b', 2)
  86     'd'
  87     >>> caesar_encipher_letter('x', 2)
  88     'z'
  89     >>> caesar_encipher_letter('y', 2)
  90     'a'
  91     >>> caesar_encipher_letter('z', 2)
  92     'b'
  93     >>> caesar_encipher_letter('z', -1)
  94     'y'
  95     >>> caesar_encipher_letter('a', -1)
  96     'z'
  97     """
  98     if letter in string.ascii_letters:
  99         if letter in string.ascii_uppercase:
 100             alphabet_start = ord('A')
 101         else:
 102             alphabet_start = ord('a')
 103         return chr(((ord(letter) - alphabet_start + shift) % 26) + alphabet_start)
 104     else:
 105         return letter
 106
 107 def caesar_decipher_letter(letter, shift):
 108     """Decipher a letter, given a shift amount
 109
 110     >>> caesar_decipher_letter('b', 1)
 111     'a'
 112     >>> caesar_decipher_letter('b', 2)
 113     'z'
 114     """
 115     return caesar_encipher_letter(letter, -shift)
 116
 117 def caesar_encipher(message, shift):
 118     """Encipher a message with the Caesar cipher of given shift
 119
 120     >>> caesar_encipher('abc', 1)
 121     'bcd'
 122     >>> caesar_encipher('abc', 2)
 123     'cde'
 124     >>> caesar_encipher('abcxyz', 2)
 125     'cdezab'
 126     >>> caesar_encipher('ab cx yz', 2)
 127     'cd ez ab'
 128     """
 129     enciphered = [caesar_encipher_letter(l, shift) for l in message]
 130     return ''.join(enciphered)
 131
 132 def caesar_decipher(message, shift):
 133     """Encipher a message with the Caesar cipher of given shift
 134
 135     >>> caesar_decipher('bcd', 1)
 136     'abc'
 137     >>> caesar_decipher('cde', 2)
 138     'abc'
 139     >>> caesar_decipher('cd ez ab', 2)
 140     'ab cx yz'
 141     """
 142     return caesar_encipher(message, -shift)
 143
 144 def affine_encipher_letter(letter, multiplier=1, adder=0, one_based=True):
 145     """Encipher a letter, given a multiplier and adder
 146
 147     >>> ''.join([affine_encipher_letter(l, 3, 5, True) for l in string.ascii_uppercase])
 148     'HKNQTWZCFILORUXADGJMPSVYBE'
 149     >>> ''.join([affine_encipher_letter(l, 3, 5, False) for l in string.ascii_uppercase])
 150     'FILORUXADGJMPSVYBEHKNQTWZC'
 151     """
 152     if letter in string.ascii_letters:
 153         if letter in string.ascii_uppercase:
 154             alphabet_start = ord('A')
 155         else:
 156             alphabet_start = ord('a')
 157         letter_number = ord(letter) - alphabet_start
 158         if one_based:
 159             letter_number += 1
 160         raw_cipher_number = (letter_number * multiplier + adder)
 161         cipher_number = 0
 162         if one_based:
 163             cipher_number = (raw_cipher_number - 1) % 26
 164         else:
 165             cipher_number = raw_cipher_number % 26
 166         return chr(cipher_number + alphabet_start)
 167     else:
 168         return letter
 169
 170 def affine_decipher_letter(letter, multiplier=1, adder=0, one_based=True):
 171     """Encipher a letter, given a multiplier and adder
 172
 173     >>> ''.join([affine_decipher_letter(l, 3, 5, True) for l in 'HKNQTWZCFILORUXADGJMPSVYBE'])
 174     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 175     >>> ''.join([affine_decipher_letter(l, 3, 5, False) for l in 'FILORUXADGJMPSVYBEHKNQTWZC'])
 176     'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 177     """
 178     if letter in string.ascii_letters:
 179         if letter in string.ascii_uppercase:
 180             alphabet_start = ord('A')
 181         else:
 182             alphabet_start = ord('a')
 183         cipher_number = ord(letter) - alphabet_start
 184         if one_based:
 185             cipher_number += 1
 186         plaintext_number = 0
 187         if one_based:
 188             plaintext_number = (modular_division_table_one_based[multiplier][(cipher_number - adder + 26) % 26] - 1) % 26
 189         else:
 190             #plaintext_number = (modular_division_table[multiplier][cipher_number] - adder) % 26
 191             plaintext_number = modular_division_table[multiplier][(cipher_number - adder + 26) % 26]
 192         return chr(plaintext_number + alphabet_start)
 193     else:
 194         return letter
 195
 196 def affine_encipher(message, multiplier=1, adder=0, one_based=True):
 197     """Encipher a message
 198
 199     >>> affine_encipher('hours passed during which jerico tried every trick he could think of', 15, 22, True)
 200     'lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh'
 201     """
 202     enciphered = [affine_encipher_letter(l, multiplier, adder, one_based) for l in message]
 203     return ''.join(enciphered)
 204
 205 def affine_decipher(message, multiplier=1, adder=0, one_based=True):
 206     """Decipher a message
 207
 208     >>> affine_decipher('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh', 15, 22, True)
 209     'hours passed during which jerico tried every trick he could think of'
 210     """
 211     enciphered = [affine_decipher_letter(l, multiplier, adder, one_based) for l in message]
 212     return ''.join(enciphered)
 213
 214
 215 def keyword_cipher_alphabet_of(keyword, wrap_alphabet=0):
 216     """Find the cipher alphabet given a keyword.
 217     wrap_alphabet controls how the rest of the alphabet is added
 218     after the keyword.
 219     0 : from 'a'
 220     1 : from the last letter in the sanitised keyword
 221     2 : from the largest letter in the sanitised keyword
 222
 223     >>> keyword_cipher_alphabet_of('bayes')
 224     'bayescdfghijklmnopqrtuvwxz'
 225     >>> keyword_cipher_alphabet_of('bayes', 0)
 226     'bayescdfghijklmnopqrtuvwxz'
 227     >>> keyword_cipher_alphabet_of('bayes', 1)
 228     'bayestuvwxzcdfghijklmnopqr'
 229     >>> keyword_cipher_alphabet_of('bayes', 2)
 230     'bayeszcdfghijklmnopqrtuvwx'
 231     """
 232     if wrap_alphabet == 0:
 233         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) + string.ascii_lowercase))
 234     else:
 235         if wrap_alphabet == 1:
 236             last_keyword_letter = deduplicate(sanitise(keyword))[-1]
 237         else:
 238             last_keyword_letter = sorted(sanitise(keyword))[-1]
 239         last_keyword_position = string.ascii_lowercase.find(last_keyword_letter) + 1
 240         cipher_alphabet = ''.join(deduplicate(sanitise(keyword) +
 241                                               string.ascii_lowercase[last_keyword_position:] +
 242                                               string.ascii_lowercase))
 243     return cipher_alphabet
 244
 245
 246 def keyword_encipher(message, keyword, wrap_alphabet=0):
 247     """Enciphers a message with a keyword substitution cipher.
 248     wrap_alphabet controls how the rest of the alphabet is added
 249     after the keyword.
 250     0 : from 'a'
 251     1 : from the last letter in the sanitised keyword
 252     2 : from the largest letter in the sanitised keyword
 253
 254     >>> keyword_encipher('test message', 'bayes')
 255     'rsqr ksqqbds'
 256     >>> keyword_encipher('test message', 'bayes', 0)
 257     'rsqr ksqqbds'
 258     >>> keyword_encipher('test message', 'bayes', 1)
 259     'lskl dskkbus'
 260     >>> keyword_encipher('test message', 'bayes', 2)
 261     'qspq jsppbcs'
 262     """
 263     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 264     cipher_translation = ''.maketrans(string.ascii_lowercase, cipher_alphabet)
 265     return message.lower().translate(cipher_translation)
 266
 267 def keyword_decipher(message, keyword, wrap_alphabet=0):
 268     """Deciphers a message with a keyword substitution cipher.
 269     wrap_alphabet controls how the rest of the alphabet is added
 270     after the keyword.
 271     0 : from 'a'
 272     1 : from the last letter in the sanitised keyword
 273     2 : from the largest letter in the sanitised keyword
 274
 275     >>> keyword_decipher('rsqr ksqqbds', 'bayes')
 276     'test message'
 277     >>> keyword_decipher('rsqr ksqqbds', 'bayes', 0)
 278     'test message'
 279     >>> keyword_decipher('lskl dskkbus', 'bayes', 1)
 280     'test message'
 281     >>> keyword_decipher('qspq jsppbcs', 'bayes', 2)
 282     'test message'
 283     """
 284     cipher_alphabet = keyword_cipher_alphabet_of(keyword, wrap_alphabet)
 285     cipher_translation = ''.maketrans(cipher_alphabet, string.ascii_lowercase)
 286     return message.lower().translate(cipher_translation)
 287
 288
 289 def caesar_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
 290     """Breaks a Caesar cipher using frequency analysis
 291
 292     >>> caesar_break('ibxcsyorsaqcheyklxivoexlevmrimwxsfiqevvmihrsasrxliwyrhecjsppsamrkwleppfmergefifvmhixscsymjcsyqeoixlm') # doctest: +ELLIPSIS
 293     (4, 0.31863952890183...)
 294     >>> caesar_break('wxwmaxdgheetgwuxztgptedbgznitgwwhpguxyhkxbmhvvtlbhgteeraxlmhiixweblmxgxwmhmaxybkbgztgwztsxwbgmxgmert') # doctest: +ELLIPSIS
 295     (19, 0.42152901235832...)
 296     >>> caesar_break('yltbbqnqnzvguvaxurorgenafsbezqvagbnornfgsbevpnaabjurersvaquvzyvxrnznazlybequrvfohgriraabjtbaruraprur') # doctest: +ELLIPSIS
 297     (13, 0.316029208075451...)
 298     """
 299     sanitised_message = sanitise(message)
 300     best_shift = 0
 301     best_fit = float("inf")
 302     for shift in range(26):
 303         plaintext = caesar_decipher(sanitised_message, shift)
 304         frequencies = message_frequency_scaling(letter_frequencies(plaintext))
 305         fit = metric(target_frequencies, frequencies)
 306         logger.debug('Caesar break attempt using key {0} gives fit of {1} and decrypt starting: {2}'.format(shift, fit, plaintext[:50]))
 307         if fit < best_fit:
 308             best_fit = fit
 309             best_shift = shift
 310     logger.info('Caesar break best fit: key {0} gives fit of {1} and decrypt starting: {2}'.format(best_shift, best_fit, caesar_decipher(sanitised_message, best_shift)[:50]))
 311     return best_shift, best_fit
 312
 313 def affine_break(message, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
 314     """Breaks an affine cipher using frequency analysis
 315
 316     >>> affine_break('lmyfu bkuusd dyfaxw claol psfaom jfasd snsfg jfaoe ls omytd jlaxe mh jm bfmibj umis hfsul axubafkjamx. ls kffkxwsd jls ofgbjmwfkiu olfmxmtmwaokttg jlsx ls kffkxwsd jlsi zg tsxwjl. jlsx ls umfjsd jlsi zg hfsqysxog. ls dmmdtsd mx jls bats mh bkbsf. ls bfmctsd kfmyxd jls lyj, mztanamyu xmc jm clm cku tmmeaxw kj lai kxd clm ckuxj.') # doctest: +ELLIPSIS
 317     ((15, 22, True), 0.23570361818655...)
 318     """
 319     sanitised_message = sanitise(message)
 320     best_multiplier = 0
 321     best_adder = 0
 322     best_one_based = True
 323     best_fit = float("inf")
 324     for one_based in [True, False]:
 325         for multiplier in range(1, 26, 2):
 326             for adder in range(26):
 327                 plaintext = affine_decipher(sanitised_message, multiplier, adder, one_based)
 328                 frequencies = message_frequency_scaling(letter_frequencies(plaintext))
 329                 fit = metric(target_frequencies, frequencies)
 330                 logger.debug('Affine break attempt using key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(multiplier, adder, one_based, fit, plaintext[:50]))
 331                 if fit < best_fit:
 332                     best_fit = fit
 333                     best_multiplier = multiplier
 334                     best_adder = adder
 335                     best_one_based = one_based
 336     logger.info('Affine break best fit with key {0}x+{1} ({2}) gives fit of {3} and decrypt starting: {4}'.format(best_multiplier, best_adder, best_one_based, best_fit, affine_decipher(sanitised_message, best_multiplier, best_adder, best_one_based)[:50]))
 337     return (best_multiplier, best_adder, best_one_based), best_fit
 338
 339
 340 def keyword_break(message, wordlist=keywords, metric=norms.euclidean_distance, target_frequencies=normalised_english_counts, message_frequency_scaling=norms.normalise):
 341     """Breaks a keyword substitution cipher using a dictionary and frequency analysis
 342
 343     >>> keyword_break(keyword_encipher('this is a test message for the keyword decipherment', 'elephant', 1), wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 344     (('elephant', 1), 0.41643991598441...)
 345     """
 346     best_keyword = ''
 347     best_wrap_alphabet = True
 348     best_fit = float("inf")
 349     for wrap_alphabet in range(3):
 350         for keyword in wordlist:
 351             plaintext = keyword_decipher(message, keyword, wrap_alphabet)
 352             frequencies = message_frequency_scaling(letter_frequencies(plaintext))
 353             fit = metric(target_frequencies, frequencies)
 354             logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(keyword, wrap_alphabet, fit, sanitise(plaintext)[:50]))
 355             if fit < best_fit:
 356                 best_fit = fit
 357                 best_keyword = keyword
 358                 best_wrap_alphabet = wrap_alphabet
 359     logger.info('Keyword break best fit with key {0} (wrap={1}) gives fit of {2} and decrypt starting: {3}'.format(best_keyword, best_wrap_alphabet, best_fit, sanitise(keyword_decipher(message, best_keyword))[:50]))
 360     return (best_keyword, best_wrap_alphabet), best_fit
 361
 362
 363 if __name__ == "__main__":
 364     import doctest
 365     doctest.testmod()