4 from cipherbreak
import *
8 corpus
= sanitise(''.join([open('shakespeare.txt', 'r').read(),
9 open('sherlock-holmes.txt', 'r').read(),
10 open('war-and-peace.txt', 'r').read()]))
11 corpus_length
= len(corpus
)
13 euclidean_scaled_english_counts
= norms
.euclidean_scale(english_counts
)
15 metrics
= [{'func': norms
.l1
, 'name': 'l1'},
16 {'func': norms
.l2
, 'name': 'l2'},
17 {'func': norms
.l3
, 'name': 'l2'},
18 {'func': norms
.cosine_distance
, 'name': 'cosine_distance'},
19 {'func': norms
.harmonic_mean
, 'name': 'harminic_mean'},
20 {'func': norms
.geometric_mean
, 'name': 'geometric_mean'},
21 {'func': norms
.inverse_log_pl
, 'name': 'inverse_log_pl'}]
22 scalings
= [{'corpus_frequency': normalised_english_counts
,
23 'scaling': norms
.normalise
,
24 'name': 'normalised'},
25 {'corpus_frequency': euclidean_scaled_english_counts
,
26 'scaling': norms
.euclidean_scale
,
27 'name': 'euclidean_scaled'},
28 {'corpus_frequency': normalised_english_counts
,
29 'scaling': norms
.identity_scale
,
30 'name': 'normalised_with_identity'}]
31 message_lengths
= [300, 100, 50, 30, 20, 10, 5]
35 # rebuild with itertools.product and itertools.starmap
36 # e.g. results = starmap(one_trial, product(metrics, scalings, message_lengths))
37 # ... which would then be easy parallelise.
39 print('Starting:', end
='', flush
=True)
40 with
open('caesar_break_parameter_trials.csv', 'w') as f
:
41 print('metric,scaling,message_length,score', file = f
)
42 scores
= collections
.defaultdict(int)
43 for metric
in metrics
:
44 scores
[metric
['name']] = collections
.defaultdict(int)
45 for scaling
in scalings
:
46 scores
[metric
['name']][scaling
['name']] = collections
.defaultdict(int)
47 for message_length
in message_lengths
:
48 for i
in range(trials
):
49 sample_start
= random
.randint(0, corpus_length
- message_length
)
50 sample
= corpus
[sample_start
:(sample_start
+ message_length
)]
51 key
= random
.randint(1, 25)
52 sample_ciphertext
= caesar_encipher(sample
, key
)
53 (found_key
, score
) = caesar_break(sample_ciphertext
,
54 metric
=metric
['func'],
55 target_counts
=scaling
['corpus_frequency'],
56 message_frequency_scaling
=scaling
['scaling'])
58 scores
[metric
['name']][scaling
['name']][message_length
] += 1
59 print('.', end
='', flush
=True)
60 print(', '.join([metric
['name'],
63 str(scores
[metric
['name']][scaling
['name']][message_length
] / trials
) ]),