from cipher import *
from cipherbreak import *
import itertools
+import csv
corpus = sanitise(''.join([open('shakespeare.txt', 'r').read(),
open('sherlock-holmes.txt', 'r').read(),
euclidean_scaled_english_counts = norms.euclidean_scale(english_counts)
-# def frequency_compare(text, target_frequency, frequency_scaling, metric):
-# counts = frequency_scaling(frequencies(text))
-# return -1 * metric(target_frequency, counts)
-
-# def euclidean_compare(text):
-# return frequency_compare(text, norms.euclidean_scale(english_counts),
-# norms.euclidean_scale, norms.euclidean_distance)
-
metrics = [{'func': norms.l1, 'invert': True, 'name': 'l1'},
{'func': norms.l2, 'invert': True, 'name': 'l2'},
{'func': norms.l3, 'invert': True, 'name': 'l3'},
- {'func': norms.cosine_distance, 'invert': False, 'name': 'cosine_distance'},
- {'func': norms.harmonic_mean, 'invert': True, 'name': 'harmonic_mean'},
- {'func': norms.geometric_mean, 'invert': True, 'name': 'geometric_mean'}]
+ {'func': norms.cosine_similarity, 'invert': False, 'name': 'cosine_similarity'}]
+ # {'func': norms.harmonic_mean, 'invert': True, 'name': 'harmonic_mean'},
+ # {'func': norms.geometric_mean, 'invert': True, 'name': 'geometric_mean'}]
scalings = [{'corpus_frequency': normalised_english_counts,
'scaling': norms.normalise,
'name': 'normalised'},
{'corpus_frequency': euclidean_scaled_english_counts,
'scaling': norms.euclidean_scale,
'name': 'euclidean_scaled'}]
-message_lengths = [300, 100, 50, 30, 20, 10, 5]
+message_lengths = [100, 50, 30, 20, 10, 5]
trials = 5000
return score
return frequency_compare
-
def scoring_functions():
return [{'func': make_frequency_compare_function(s['corpus_frequency'],
s['scaling'], m['func'], m['invert']),
def show_results():
with open('caesar_break_parameter_trials.csv', 'w') as f:
- print(',message_length', file = f)
- print('scoring,', ', '.join([str(l) for l in message_lengths]), file = f)
+ writer = csv.DictWriter(f, ['name'] + message_lengths,
+ quoting=csv.QUOTE_NONNUMERIC)
+ writer.writeheader()
for scoring in sorted(scores.keys()):
- for length in message_lengths:
- print(scoring, end='', sep='', file=f)
- for l in message_lengths:
- print(',', scores[scoring][l] / trials, end='', file=f)
- print('', file = f)
+ scores[scoring]['name'] = scoring
+ writer.writerow(scores[scoring])
eval_scores()
show_results()