caesar_break_parameter_trials.py

   1 import random
   2 import csv
   3 from support.utilities import *
   4 from support.language_models import *
   5 from support.norms import *
   6 from cipher.caesar import *
   7
   8 trials = 100
   9
  10 corpus = sanitise(cat([
  11     open('support/shakespeare.txt').read(),
  12     open('support/sherlock-holmes.txt').read(),
  13     open('support/war-and-peace.txt').read()
  14     ]))
  15 corpus_length = len(corpus)
  16
  17 euclidean_scaled_english_counts = euclidean_scale(english_counts)
  18
  19 metrics = [{'func': l1, 'invert': True, 'name': 'l1'},
  20     {'func': l2, 'invert': True, 'name': 'l2'},
  21     {'func': l3, 'invert': True, 'name': 'l3'},
  22     {'func': cosine_similarity, 'invert': False, 'name': 'cosine_similarity'}]
  23
  24 scalings = [{'corpus_frequency': normalised_english_counts,
  25          'scaling': normalise,
  26          'name': 'normalised'},
  27         {'corpus_frequency': euclidean_scaled_english_counts,
  28          'scaling': euclidean_scale,
  29          'name': 'euclidean_scaled'}]
  30
  31 message_lengths = [100, 50, 30, 20, 10, 5]
  32
  33 def make_frequency_compare_function(
  34         target_frequency, frequency_scaling, metric, invert):
  35     def frequency_compare(text):
  36         counts = frequency_scaling(frequencies(text))
  37         if invert:
  38             score = -1 * metric(target_frequency, counts)
  39         else:
  40             score = metric(target_frequency, counts)
  41         return score
  42     return frequency_compare
  43
  44 models = (
  45     [ {'func': make_frequency_compare_function(
  46             s['corpus_frequency'], s['scaling'],
  47             m['func'], m['invert']),
  48        'name': '{} + {}'.format(m['name'], s['name'])}
  49         for m in metrics
  50         for s in scalings ]
  51     +
  52     [{'func': Pletters, 'name': 'Pletters'},
  53      {'func': Pbigrams, 'name': 'Pbigrams'},
  54      {'func': Ptrigrams, 'name': 'Ptrigrams'}]
  55 )
  56
  57 def random_ciphertext(message_length):
  58     sample_start = random.randint(0, corpus_length - message_length)
  59     sample = corpus[sample_start:(sample_start + message_length)]
  60     key = random.randint(1, 25)
  61     ciphertext = caesar_encipher(sample, key)
  62     return key, ciphertext
  63
  64
  65 def eval_models():
  66     return {m['name']: {l: eval_one_model(m, l) for l in message_lengths}
  67                for m in models}
  68
  69 def eval_one_model(model, message_length):
  70     print(model['name'], message_length)
  71     successes = 0
  72     for _ in range(trials):
  73         key, ciphertext = random_ciphertext(message_length)
  74         found_key, _ = caesar_break(ciphertext, model['func'])
  75         if found_key == key:
  76             successes += 1
  77     return successes
  78
  79 def write_results(scores):
  80     with open('caesar_break_parameter_trials.csv', 'w') as f:
  81         writer = csv.DictWriter(f, ['name'] + message_lengths,
  82             quoting=csv.QUOTE_NONNUMERIC)
  83         writer.writeheader()
  84         for scoring in sorted(scores):
  85             scores[scoring]['name'] = scoring
  86             writer.writerow(scores[scoring])
  87
  88 scores = eval_models()
  89 write_results(scores)