from cipher import *
from cipherbreak import *
import itertools
+import csv
corpus = sanitise(''.join([open('shakespeare.txt', 'r').read(),
open('sherlock-holmes.txt', 'r').read(),
euclidean_scaled_english_counts = norms.euclidean_scale(english_counts)
-# def frequency_compare(text, target_frequency, frequency_scaling, metric):
-# counts = frequency_scaling(frequencies(text))
-# return -1 * metric(target_frequency, counts)
-
-# def euclidean_compare(text):
-# return frequency_compare(text, norms.euclidean_scale(english_counts),
-# norms.euclidean_scale, norms.euclidean_distance)
-
metrics = [{'func': norms.l1, 'invert': True, 'name': 'l1'},
{'func': norms.l2, 'invert': True, 'name': 'l2'},
{'func': norms.l3, 'invert': True, 'name': 'l3'},
return score
return frequency_compare
-
def scoring_functions():
return [{'func': make_frequency_compare_function(s['corpus_frequency'],
s['scaling'], m['func'], m['invert']),
print(scoring_function['name'], message_length)
if scoring_function['name'] not in scores:
scores[scoring_function['name']] = collections.defaultdict(int)
+ scores[scoring_function['name']]['name'] = scoring_function['name']
for _ in range(trials):
sample_start = random.randint(0, corpus_length - message_length)
sample = corpus[sample_start:(sample_start + message_length)]
def show_results():
with open('caesar_break_parameter_trials.csv', 'w') as f:
- print(',message_length', file = f)
- print('scoring,', ', '.join([str(l) for l in message_lengths]), file = f)
+ writer = csv.DictWriter(f, ['name'] + message_lengths,
+ quoting=csv.QUOTE_NONNUMERIC)
+ writer.writeheader()
for scoring in sorted(scores.keys()):
- for length in message_lengths:
- print(scoring, end='', sep='', file=f)
- for l in message_lengths:
- print(',', scores[scoring][l] / trials, end='', file=f)
- print('', file = f)
+ writer.writerow(scores[scoring])
eval_scores()
show_results()