X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=find_best_caesar_break_parameters-2.py;fp=find_best_caesar_break_parameters-2.py;h=88ca06c3f806288a0352c605c62650e4f00d3396;hb=c71e96eb2f7ee215802f30b60dc4c136a60d701d;hp=0000000000000000000000000000000000000000;hpb=36820d02361529d5327ad040432d0198b72baed2;p=cipher-tools.git diff --git a/find_best_caesar_break_parameters-2.py b/find_best_caesar_break_parameters-2.py new file mode 100644 index 0000000..88ca06c --- /dev/null +++ b/find_best_caesar_break_parameters-2.py @@ -0,0 +1,62 @@ +import random +import collections +from cipher import * +from cipherbreak import * +import itertools + +print('Loading...') + +corpus = sanitise(''.join([open('shakespeare.txt', 'r').read(), + open('sherlock-holmes.txt', 'r').read(), + open('war-and-peace.txt', 'r').read()])) +corpus_length = len(corpus) + +euclidean_scaled_english_counts = norms.euclidean_scale(english_counts) + +metrics = [{'func': norms.l1, 'name': 'l1'}, + {'func': norms.l2, 'name': 'l2'}, + {'func': norms.l3, 'name': 'l2'}, + {'func': norms.cosine_distance, 'name': 'cosine_distance'}, + {'func': norms.harmonic_mean, 'name': 'harminic_mean'}, + {'func': norms.geometric_mean, 'name': 'geometric_mean'}, + {'func': norms.inverse_log_pl, 'name': 'inverse_log_pl'}] +scalings = [{'corpus_frequency': normalised_english_counts, + 'scaling': norms.normalise, + 'name': 'normalised'}, + {'corpus_frequency': euclidean_scaled_english_counts, + 'scaling': norms.euclidean_scale, + 'name': 'euclidean_scaled'}, + {'corpus_frequency': normalised_english_counts, + 'scaling': norms.identity_scale, + 'name': 'normalised_with_identity'}] +message_lengths = [300, 100, 50, 30, 20, 10, 5] + +trials = 5000 + +scores = collections.defaultdict(int) + +def eval_all(): + list(itertools.starmap(eval_one_parameter_set, + itertools.product(metrics, scalings, message_lengths))) + +def eval_one_parameter_set(metric, scaling, message_length): + for i in range(trials): + sample_start = random.randint(0, corpus_length - message_length) + sample = corpus[sample_start:(sample_start + message_length)] + key = random.randint(1, 25) + sample_ciphertext = caesar_encipher(sample, key) + (found_key, score) = caesar_break(sample_ciphertext, + metric=metric['func'], + target_counts=scaling['corpus_frequency'], + message_frequency_scaling=scaling['scaling']) + if found_key == key: + scores[(metric['name'], scaling['name'], message_length)] += 1 + return scores[(metric['name'], scaling['name'], message_length)] + +def show_results(): + with open('caesar_break_parameter_trials.csv', 'w') as f: + for (k, v) in scores.items(): + print(str(k)[1:-1], v, sep=",", file=f) + +eval_all() +show_results()