cipherbreak.py

   1 """A set of functions to break the ciphers give in ciphers.py.
   2 """
   3
   4 import string
   5 import collections
   6 import norms
   7 import logging
   8 import random
   9 import math
  10 from itertools import starmap
  11 from segment import segment
  12 from multiprocessing import Pool
  13
  14 import matplotlib.pyplot as plt
  15
  16
  17 from cipher import *
  18 from language_models import *
  19
  20 # To time a run:
  21 #
  22 # import timeit
  23 # c5a = open('2012/5a.ciphertext', 'r').read()
  24 # timeit.timeit('keyword_break(c5a)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break', number=1)
  25 # timeit.repeat('keyword_break_mp(c5a, chunksize=500)', setup='gc.enable() ; from __main__ import c5a ; from cipher import keyword_break_mp', repeat=5, number=1)
  26
  27
  28
  29
  30
  31 def amsco_break(message, translist=transpositions, patterns = [(1, 2), (2, 1)],
  32                                   fillstyles = [AmscoFillStyle.continuous,
  33                                                 AmscoFillStyle.same_each_row,
  34                                                 AmscoFillStyle.reverse_each_row],
  35                                   fitness=Pbigrams,
  36                                   chunksize=500):
  37     """Breaks an AMSCO transposition cipher using a dictionary and
  38     n-gram frequency analysis
  39
  40     >>> amsco_break(amsco_transposition_encipher(sanitise( \
  41             "It is a truth universally acknowledged, that a single man in \
  42              possession of a good fortune, must be in want of a wife. However \
  43              little known the feelings or views of such a man may be on his \
  44              first entering a neighbourhood, this truth is so well fixed in \
  45              the minds of the surrounding families, that he is considered the \
  46              rightful property of some one or other of their daughters."), \
  47         'encipher'), \
  48         translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
  49                    (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
  50                    (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
  51         patterns=[(1, 2)]) # doctest: +ELLIPSIS
  52     (((2, 0, 5, 3, 1, 4, 6), (1, 2), <AmscoFillStyle.continuous: 1>), -709.4646722...)
  53     >>> amsco_break(amsco_transposition_encipher(sanitise( \
  54             "It is a truth universally acknowledged, that a single man in \
  55              possession of a good fortune, must be in want of a wife. However \
  56              little known the feelings or views of such a man may be on his \
  57              first entering a neighbourhood, this truth is so well fixed in \
  58              the minds of the surrounding families, that he is considered the \
  59              rightful property of some one or other of their daughters."), \
  60         'encipher', fillpattern=(2, 1)), \
  61         translist={(2, 0, 5, 3, 1, 4, 6): ['encipher'], \
  62                    (5, 0, 6, 1, 3, 4, 2): ['fourteen'], \
  63                    (6, 1, 0, 4, 5, 3, 2): ['keyword']}, \
  64         patterns=[(1, 2), (2, 1)], fitness=Ptrigrams) # doctest: +ELLIPSIS
  65     (((2, 0, 5, 3, 1, 4, 6), (2, 1), <AmscoFillStyle.continuous: 1>), -997.0129085...)
  66     """
  67     with Pool() as pool:
  68         helper_args = [(message, trans, pattern, fillstyle, fitness)
  69                        for trans in translist
  70                        for pattern in patterns
  71                        for fillstyle in fillstyles]
  72         # Gotcha: the helper function here needs to be defined at the top level
  73         #   (limitation of Pool.starmap)
  74         breaks = pool.starmap(amsco_break_worker, helper_args, chunksize)
  75         return max(breaks, key=lambda k: k[1])
  76
  77 def amsco_break_worker(message, transposition,
  78         pattern, fillstyle, fitness):
  79     plaintext = amsco_transposition_decipher(message, transposition,
  80         fillpattern=pattern, fillstyle=fillstyle)
  81     fit = fitness(sanitise(plaintext))
  82     logger.debug('AMSCO transposition break attempt using key {0} and pattern'
  83                          '{1} ({2}) gives fit of {3} and decrypt starting: '
  84                          '{4}'.format(
  85                              transposition, pattern, fillstyle, fit,
  86                              sanitise(plaintext)[:50]))
  87     return (transposition, pattern, fillstyle), fit
  88
  89
  90 def hill_break(message, matrix_size=2, fitness=Pletters,
  91     number_of_solutions=1, chunksize=500):
  92
  93     all_matrices = [np.matrix(list(m))
  94         for m in itertools.product([list(r)
  95             for r in itertools.product(range(26), repeat=matrix_size)],
  96         repeat=matrix_size)]
  97     valid_matrices = [m for m, d in
  98         zip(all_matrices, (int(round(linalg.det(m))) for m in all_matrices))
  99                   if d != 0
 100                   if d % 2 != 0
 101                   if d % 13 != 0 ]
 102     with Pool() as pool:
 103         helper_args = [(message, matrix, fitness)
 104                        for matrix in valid_matrices]
 105         # Gotcha: the helper function here needs to be defined at the top level
 106         #   (limitation of Pool.starmap)
 107         breaks = pool.starmap(hill_break_worker, helper_args, chunksize)
 108         if number_of_solutions == 1:
 109             return max(breaks, key=lambda k: k[1])
 110         else:
 111             return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
 112
 113 def hill_break_worker(message, matrix, fitness):
 114     plaintext = hill_decipher(matrix, message)
 115     fit = fitness(plaintext)
 116     logger.debug('Hill cipher break attempt using key {0} gives fit of '
 117                  '{1} and decrypt starting: {2}'.format(matrix,
 118                      fit, sanitise(plaintext)[:50]))
 119     return matrix, fit
 120
 121 def bifid_break_mp(message, wordlist=keywords, fitness=Pletters, max_period=10,
 122                      number_of_solutions=1, chunksize=500):
 123     """Breaks a keyword substitution cipher using a dictionary and
 124     frequency analysis
 125
 126     >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
 127           'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
 128           wordlist=['cat', 'elephant', 'kangaroo']) # doctest: +ELLIPSIS
 129     (('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...)
 130     >>> bifid_break_mp(bifid_encipher('this is a test message for the ' \
 131           'keyword decipherment', 'elephant', wrap_alphabet=KeywordWrapAlphabet.from_last), \
 132           wordlist=['cat', 'elephant', 'kangaroo'], \
 133           number_of_solutions=2) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
 134     [(('elephant', <KeywordWrapAlphabet.from_last: 2>, 0), -52.834575011...),
 135     (('elephant', <KeywordWrapAlphabet.from_largest: 3>, 0), -52.834575011...)]
 136     """
 137     with Pool() as pool:
 138         helper_args = [(message, word, wrap, period, fitness)
 139                        for word in wordlist
 140                        for wrap in KeywordWrapAlphabet
 141                        for period in range(max_period+1)]
 142         # Gotcha: the helper function here needs to be defined at the top level
 143         #   (limitation of Pool.starmap)
 144         breaks = pool.starmap(bifid_break_worker, helper_args, chunksize)
 145         if number_of_solutions == 1:
 146             return max(breaks, key=lambda k: k[1])
 147         else:
 148             return sorted(breaks, key=lambda k: k[1], reverse=True)[:number_of_solutions]
 149
 150 def bifid_break_worker(message, keyword, wrap_alphabet, period, fitness):
 151     plaintext = bifid_decipher(message, keyword, wrap_alphabet, period=period)
 152     fit = fitness(plaintext)
 153     logger.debug('Keyword break attempt using key {0} (wrap={1}) gives fit of '
 154                  '{2} and decrypt starting: {3}'.format(keyword,
 155                      wrap_alphabet, fit, sanitise(plaintext)[:50]))
 156     return (keyword, wrap_alphabet, period), fit
 157
 158
 159 def autokey_sa_break( message
 160                     , min_keylength=2
 161                     , max_keylength=20
 162                     , workers=10
 163                     , initial_temperature=200
 164                     , max_iterations=20000
 165                     , fitness=Pletters
 166                     , chunksize=1
 167                     , result_count=1
 168                     ):
 169     """Break an autokey cipher by simulated annealing
 170     """
 171     worker_args = []
 172     ciphertext = sanitise(message)
 173     for keylength in range(min_keylength, max_keylength+1):
 174         for i in range(workers):
 175             key = cat(random.choice(string.ascii_lowercase) for _ in range(keylength))
 176             worker_args.append((ciphertext, key,
 177                             initial_temperature, max_iterations, fitness))
 178
 179     with Pool() as pool:
 180         breaks = pool.starmap(autokey_sa_break_worker,
 181                               worker_args, chunksize)
 182     if result_count <= 1:
 183         return max(breaks, key=lambda k: k[1])
 184     else:
 185         return sorted(set(breaks), key=lambda k: k[1], reverse=True)[:result_count]
 186
 187
 188 def autokey_sa_break_worker(message, key,
 189                                      t0, max_iterations, fitness):
 190
 191     temperature = t0
 192
 193     dt = t0 / (0.9 * max_iterations)
 194
 195     plaintext = autokey_decipher(message, key)
 196     current_fitness = fitness(plaintext)
 197     current_key = key
 198
 199     best_key = current_key
 200     best_fitness = current_fitness
 201     best_plaintext = plaintext
 202
 203     # print('starting for', max_iterations)
 204     for i in range(max_iterations):
 205         swap_pos = random.randrange(len(current_key))
 206         swap_char = random.choice(string.ascii_lowercase)
 207
 208         new_key = current_key[:swap_pos] + swap_char + current_key[swap_pos+1:]
 209
 210         plaintext = autokey_decipher(message, new_key)
 211         new_fitness = fitness(plaintext)
 212         try:
 213             sa_chance = math.exp((new_fitness - current_fitness) / temperature)
 214         except (OverflowError, ZeroDivisionError):
 215             # print('exception triggered: new_fit {}, current_fit {}, temp {}'.format(new_fitness, current_fitness, temperature))
 216             sa_chance = 0
 217         if (new_fitness > current_fitness or random.random() < sa_chance):
 218             # logger.debug('Simulated annealing: iteration {}, temperature {}, '
 219             #     'current alphabet {}, current_fitness {}, '
 220             #     'best_plaintext {}'.format(i, temperature, current_alphabet,
 221             #     current_fitness, best_plaintext[:50]))
 222
 223             # logger.debug('new_fit {}, current_fit {}, temp {}, sa_chance {}'.format(new_fitness, current_fitness, temperature, sa_chance))
 224 #             print(new_fitness, new_key, plaintext[:100])
 225             current_fitness = new_fitness
 226             current_key = new_key
 227
 228         if current_fitness > best_fitness:
 229             best_key = current_key
 230             best_fitness = current_fitness
 231             best_plaintext = plaintext
 232         if i % 500 == 0:
 233             logger.debug('Simulated annealing: iteration {}, temperature {}, '
 234                 'current key {}, current_fitness {}, '
 235                 'best_plaintext {}'.format(i, temperature, current_key,
 236                 current_fitness, plaintext[:50]))
 237         temperature = max(temperature - dt, 0.001)
 238
 239 #     print(best_key, best_fitness, best_plaintext[:70])
 240     return best_key, best_fitness # current_alphabet, current_fitness
 241
 242
 243 def pocket_enigma_break_by_crib(message, wheel_spec, crib, crib_position):
 244     """Break a pocket enigma using a crib (some plaintext that's expected to
 245     be in a certain position). Returns a list of possible starting wheel
 246     positions that could produce the crib.
 247
 248     >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'h', 0)
 249     ['a', 'f', 'q']
 250     >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'he', 0)
 251     ['a']
 252     >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'll', 2)
 253     ['a']
 254     >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 2)
 255     ['a']
 256     >>> pocket_enigma_break_by_crib('kzpjlzmoga', 1, 'l', 3)
 257     ['a', 'j', 'n']
 258     >>> pocket_enigma_break_by_crib('aaaaa', 1, 'l', 3)
 259     []
 260     """
 261     pe = PocketEnigma(wheel=wheel_spec)
 262     possible_positions = []
 263     for p in string.ascii_lowercase:
 264         pe.set_position(p)
 265         plaintext = pe.decipher(message)
 266         if plaintext[crib_position:crib_position+len(crib)] == crib:
 267             possible_positions += [p]
 268     return possible_positions
 269
 270
 271 def plot_frequency_histogram(freqs, sort_key=None):
 272     x = range(len(freqs))
 273     y = [freqs[l] for l in sorted(freqs, key=sort_key)]
 274     f = plt.figure()
 275     ax = f.add_axes([0.1, 0.1, 0.9, 0.9])
 276     ax.bar(x, y, align='center')
 277     ax.set_xticks(x)
 278     ax.set_xticklabels(sorted(freqs, key=sort_key))
 279     f.show()
 280
 281
 282 if __name__ == "__main__":
 283     import doctest
 284     doctest.testmod()