3 from support
.utilities
import *
4 from support
.language_models
import *
5 from support
.norms
import *
6 from cipher
.caesar
import *
10 corpus
= sanitise(cat([
11 open('support/shakespeare.txt').read(),
12 open('support/sherlock-holmes.txt').read(),
13 open('support/war-and-peace.txt').read()
15 corpus_length
= len(corpus
)
17 euclidean_scaled_english_counts
= euclidean_scale(english_counts
)
19 metrics
= [{'func': l1
, 'invert': True, 'name': 'l1'},
20 {'func': l2
, 'invert': True, 'name': 'l2'},
21 {'func': l3
, 'invert': True, 'name': 'l3'},
22 {'func': cosine_similarity
, 'invert': False, 'name': 'cosine_similarity'}]
24 scalings
= [{'corpus_frequency': normalised_english_counts
,
26 'name': 'normalised'},
27 {'corpus_frequency': euclidean_scaled_english_counts
,
28 'scaling': euclidean_scale
,
29 'name': 'euclidean_scaled'}]
31 message_lengths
= [100, 50, 30, 20, 10, 5]
33 def make_frequency_compare_function(
34 target_frequency
, frequency_scaling
, metric
, invert
):
35 def frequency_compare(text
):
36 counts
= frequency_scaling(frequencies(text
))
38 score
= -1 * metric(target_frequency
, counts
)
40 score
= metric(target_frequency
, counts
)
42 return frequency_compare
45 [ {'func': make_frequency_compare_function(
46 s
['corpus_frequency'], s
['scaling'],
47 m
['func'], m
['invert']),
48 'name': '{} + {}'.format(m
['name'], s
['name'])}
52 [{'func': Pletters
, 'name': 'Pletters'},
53 {'func': Pbigrams
, 'name': 'Pbigrams'},
54 {'func': Ptrigrams
, 'name': 'Ptrigrams'}]
57 def random_ciphertext(message_length
):
58 sample_start
= random
.randint(0, corpus_length
- message_length
)
59 sample
= corpus
[sample_start
:(sample_start
+ message_length
)]
60 key
= random
.randint(1, 25)
61 ciphertext
= caesar_encipher(sample
, key
)
62 return key
, ciphertext
66 return {m
['name']: {l
: eval_one_model(m
, l
) for l
in message_lengths
}
69 def eval_one_model(model
, message_length
):
70 print(model
['name'], message_length
)
72 for _
in range(trials
):
73 key
, ciphertext
= random_ciphertext(message_length
)
74 found_key
, _
= caesar_break(ciphertext
, model
['func'])
79 def write_results(scores
):
80 with
open('caesar_break_parameter_trials.csv', 'w') as f
:
81 writer
= csv
.DictWriter(f
, ['name'] + message_lengths
,
82 quoting
=csv
.QUOTE_NONNUMERIC
)
84 for scoring
in sorted(scores
):
85 scores
[scoring
]['name'] = scoring
86 writer
.writerow(scores
[scoring
])
88 scores
= eval_models()