4 from cipherbreak
import *
6 corpus
= sanitise(''.join([open('shakespeare.txt', 'r').read(),
7 open('sherlock-holmes.txt', 'r').read(),
8 open('war-and-peace.txt', 'r').read()]))
9 corpus_length
= len(corpus
)
11 euclidean_scaled_english_counts
= norms
.euclidean_scale(english_counts
)
13 metrics
= [{'func': norms
.l1
, 'name': 'l1'},
14 {'func': norms
.l2
, 'name': 'l2'},
15 {'func': norms
.l3
, 'name': 'l3'},
16 {'func': norms
.cosine_distance
, 'name': 'cosine_distance'},
17 {'func': norms
.harmonic_mean
, 'name': 'harmonic_mean'},
18 {'func': norms
.geometric_mean
, 'name': 'geometric_mean'},
19 {'func': norms
.inverse_log_pl
, 'name': 'inverse_log_pl'}]
20 scalings
= [{'corpus_frequency': normalised_english_counts
,
21 'scaling': norms
.normalise
,
22 'name': 'normalised'},
23 {'corpus_frequency': euclidean_scaled_english_counts
,
24 'scaling': norms
.euclidean_scale
,
25 'name': 'euclidean_scaled'},
26 {'corpus_frequency': normalised_english_counts
,
27 'scaling': norms
.identity_scale
,
28 'name': 'normalised_with_identity'}]
29 message_lengths
= [300, 100, 50, 30, 20, 10, 5]
33 scores
= collections
.defaultdict(int)
35 with
open('caesar_break_parameter_trials.csv', 'w') as f
:
36 print('metric,scaling,message_length,score', file = f
)
37 for metric
in metrics
:
38 for scaling
in scalings
:
39 for message_length
in message_lengths
:
40 for i
in range(trials
):
41 sample_start
= random
.randint(0, corpus_length
- message_length
)
42 sample
= corpus
[sample_start
:(sample_start
+ message_length
)]
43 key
= random
.randint(1, 25)
44 sample_ciphertext
= caesar_encipher(sample
, key
)
45 (found_key
, score
) = caesar_break(sample_ciphertext
,
46 metric
=metric
['func'],
47 target_counts
=scaling
['corpus_frequency'],
48 message_frequency_scaling
=scaling
['scaling'])
50 scores
[(metric
['name'], scaling
['name'], message_length
)] += 1
51 print(', '.join([metric
['name'],
54 str(scores
[(metric
['name'], scaling
['name'], message_length
)] / trials
) ]),