4 from cipherbreak
import *
8 corpus
= sanitise(''.join([open('shakespeare.txt', 'r').read(),
9 open('sherlock-holmes.txt', 'r').read(),
10 open('war-and-peace.txt', 'r').read()]))
11 corpus_length
= len(corpus
)
13 euclidean_scaled_english_counts
= norms
.euclidean_scale(english_counts
)
15 metrics
= [{'func': norms
.l1
, 'invert': True, 'name': 'l1'},
16 {'func': norms
.l2
, 'invert': True, 'name': 'l2'},
17 {'func': norms
.l3
, 'invert': True, 'name': 'l3'},
18 {'func': norms
.cosine_similarity
, 'invert': False, 'name': 'cosine_similarity'}]
19 # {'func': norms.harmonic_mean, 'invert': True, 'name': 'harmonic_mean'},
20 # {'func': norms.geometric_mean, 'invert': True, 'name': 'geometric_mean'}]
21 scalings
= [{'corpus_frequency': normalised_english_counts
,
22 'scaling': norms
.normalise
,
23 'name': 'normalised'},
24 {'corpus_frequency': euclidean_scaled_english_counts
,
25 'scaling': norms
.euclidean_scale
,
26 'name': 'euclidean_scaled'}]
27 message_lengths
= [100, 50, 30, 20, 10, 5]
34 def make_frequency_compare_function(target_frequency
, frequency_scaling
, metric
, invert
):
35 def frequency_compare(text
):
36 counts
= frequency_scaling(frequencies(text
))
38 score
= -1 * metric(target_frequency
, counts
)
40 score
= metric(target_frequency
, counts
)
42 return frequency_compare
44 def scoring_functions():
45 return [{'func': make_frequency_compare_function(s
['corpus_frequency'],
46 s
['scaling'], m
['func'], m
['invert']),
47 'name': '{} + {}'.format(m
['name'], s
['name'])}
49 for s
in scalings
] + [{'func': Pletters
, 'name': 'Pletters'}]
53 for f
in scoring_functions()
54 for l
in message_lengths
]
56 def eval_one_score(scoring_function
, message_length
):
57 print(scoring_function
['name'], message_length
)
58 if scoring_function
['name'] not in scores
:
59 scores
[scoring_function
['name']] = collections
.defaultdict(int)
60 for _
in range(trials
):
61 sample_start
= random
.randint(0, corpus_length
- message_length
)
62 sample
= corpus
[sample_start
:(sample_start
+ message_length
)]
63 key
= random
.randint(1, 25)
64 ciphertext
= caesar_encipher(sample
, key
)
65 found_key
, _
= caesar_break(ciphertext
, scoring_function
['func'])
67 scores
[scoring_function
['name']][message_length
] += 1
68 return scores
[scoring_function
['name']][message_length
]
71 with
open('caesar_break_parameter_trials.csv', 'w') as f
:
72 writer
= csv
.DictWriter(f
, ['name'] + message_lengths
,
73 quoting
=csv
.QUOTE_NONNUMERIC
)
75 for scoring
in sorted(scores
.keys()):
76 scores
[scoring
]['name'] = scoring
77 writer
.writerow(scores
[scoring
])