5 corpus
= sanitise(''.join([open('shakespeare.txt', 'r').read(), open('sherlock-holmes.txt', 'r').read(), open('war-and-peace.txt', 'r').read()]))
6 corpus_length
= len(corpus
)
8 scaled_english_counts
= norms
.scale(english_counts
)
11 metrics
= [norms
.l1
, norms
.l2
, norms
.l3
, norms
.cosine_distance
, norms
.harmonic_mean
, norms
.geometric_mean
]
12 corpus_frequencies
= [normalised_english_counts
, scaled_english_counts
]
13 scalings
= [norms
.normalise
, norms
.scale
]
14 message_lengths
= [300, 100, 50, 30, 20, 10, 5]
16 metric_names
= ['l1', 'l2', 'l3', 'cosine_distance', 'harmonic_mean', 'geometric_mean']
17 corpus_frequency_names
= ['normalised_english_counts', 'scaled_english_counts']
18 scaling_names
= ['normalise', 'scale']
22 scores
= collections
.defaultdict(int)
23 for metric
in range(len(metrics
)):
24 scores
[metric_names
[metric
]] = collections
.defaultdict(int)
25 for corpus_freqency
in range(len(corpus_frequencies
)):
26 scores
[metric_names
[metric
]][corpus_frequency_names
[corpus_freqency
]] = collections
.defaultdict(int)
27 for scaling
in range(len(scalings
)):
28 scores
[metric_names
[metric
]][corpus_frequency_names
[corpus_freqency
]][scaling_names
[scaling
]] = collections
.defaultdict(int)
29 for message_length
in message_lengths
:
30 for i
in range(trials
):
31 sample_start
= random
.randint(0, corpus_length
- message_length
)
32 sample
= corpus
[sample_start
:(sample_start
+ message_length
)]
33 key
= random
.randint(1, 25)
34 sample_ciphertext
= caesar_encipher(sample
, key
)
35 (found_key
, score
) = caesar_break(sample_ciphertext
,
36 metric
=metrics
[metric
],
37 target_frequencies
=corpus_frequencies
[corpus_freqency
],
38 message_frequency_scaling
=scalings
[scaling
])
40 scores
[metric_names
[metric
]][corpus_frequency_names
[corpus_freqency
]][scaling_names
[scaling
]][message_length
] += 1
41 print(', '.join([metric_names
[metric
],
42 corpus_frequency_names
[corpus_freqency
],
43 scaling_names
[scaling
],
45 str(scores
[metric_names
[metric
]][corpus_frequency_names
[corpus_freqency
]][scaling_names
[scaling
]][message_length
] / trials
) ]))
48 with
open('caesar_break_parameter_trials.csv', 'w') as f
:
49 for metric
in range(len(metrics
)):
50 for corpus_freqency
in range(len(corpus_frequencies
)):
51 for scaling
in range(len(scalings
)):
52 for message_length
in message_lengths
:
53 print(', '.join([metric_names
[metric
],
54 corpus_frequency_names
[corpus_freqency
],
55 scaling_names
[scaling
],
57 str(scores
[metric_names
[metric
]][corpus_frequency_names
[corpus_freqency
]][scaling_names
[scaling
]][message_length
] / trials
) ]),