Updated for challenge 9
[cipher-tools.git] / caesar_break_parameter_trials.py
1 import random
2 import csv
3 from support.utilities import *
4 from support.language_models import *
5 from support.norms import *
6 from cipher.caesar import *
7
8 trials = 100
9
10 corpus = sanitise(cat([
11 open('support/shakespeare.txt').read(),
12 open('support/sherlock-holmes.txt').read(),
13 open('support/war-and-peace.txt').read()
14 ]))
15 corpus_length = len(corpus)
16
17 euclidean_scaled_english_counts = euclidean_scale(english_counts)
18
19 metrics = [{'func': l1, 'invert': True, 'name': 'l1'},
20 {'func': l2, 'invert': True, 'name': 'l2'},
21 {'func': l3, 'invert': True, 'name': 'l3'},
22 {'func': cosine_similarity, 'invert': False, 'name': 'cosine_similarity'}]
23
24 scalings = [{'corpus_frequency': normalised_english_counts,
25 'scaling': normalise,
26 'name': 'normalised'},
27 {'corpus_frequency': euclidean_scaled_english_counts,
28 'scaling': euclidean_scale,
29 'name': 'euclidean_scaled'}]
30
31 message_lengths = [100, 50, 30, 20, 10, 5]
32
33 def make_frequency_compare_function(
34 target_frequency, frequency_scaling, metric, invert):
35 def frequency_compare(text):
36 counts = frequency_scaling(frequencies(text))
37 if invert:
38 score = -1 * metric(target_frequency, counts)
39 else:
40 score = metric(target_frequency, counts)
41 return score
42 return frequency_compare
43
44 models = (
45 [ {'func': make_frequency_compare_function(
46 s['corpus_frequency'], s['scaling'],
47 m['func'], m['invert']),
48 'name': '{} + {}'.format(m['name'], s['name'])}
49 for m in metrics
50 for s in scalings ]
51 +
52 [{'func': Pletters, 'name': 'Pletters'},
53 {'func': Pbigrams, 'name': 'Pbigrams'},
54 {'func': Ptrigrams, 'name': 'Ptrigrams'}]
55 )
56
57 def random_ciphertext(message_length):
58 sample_start = random.randint(0, corpus_length - message_length)
59 sample = corpus[sample_start:(sample_start + message_length)]
60 key = random.randint(1, 25)
61 ciphertext = caesar_encipher(sample, key)
62 return key, ciphertext
63
64
65 def eval_models():
66 return {m['name']: {l: eval_one_model(m, l) for l in message_lengths}
67 for m in models}
68
69 def eval_one_model(model, message_length):
70 print(model['name'], message_length)
71 successes = 0
72 for _ in range(trials):
73 key, ciphertext = random_ciphertext(message_length)
74 found_key, _ = caesar_break(ciphertext, model['func'])
75 if found_key == key:
76 successes += 1
77 return successes
78
79 def write_results(scores):
80 with open('caesar_break_parameter_trials.csv', 'w') as f:
81 writer = csv.DictWriter(f, ['name'] + message_lengths,
82 quoting=csv.QUOTE_NONNUMERIC)
83 writer.writeheader()
84 for scoring in sorted(scores):
85 scores[scoring]['name'] = scoring
86 writer.writerow(scores[scoring])
87
88 scores = eval_models()
89 write_results(scores)