X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=counts.py;fp=counts.py;h=2714db294760ebd524bebad4614813fef8e8e4a3;hb=19a359ab34be225b4ab7df3974368a2833d45648;hp=0000000000000000000000000000000000000000;hpb=382cdeb6aee9e799db696d84455b447e3cad778b;p=cipher-tools.git diff --git a/counts.py b/counts.py new file mode 100644 index 0000000..2714db2 --- /dev/null +++ b/counts.py @@ -0,0 +1,32 @@ +import norms +import itertools +import random +import bisect +import collections + +english_counts = collections.defaultdict(int) +with open('count_1l.txt', 'r') as f: + for line in f: + (letter, count) = line.split("\t") + english_counts[letter] = int(count) +normalised_english_counts = norms.normalise(english_counts) + +english_bigram_counts = collections.defaultdict(int) +with open('count_2l.txt', 'r') as f: + for line in f: + (bigram, count) = line.split("\t") + english_bigram_counts[bigram] = int(count) +normalised_english_bigram_counts = norms.normalise(english_bigram_counts) + +english_trigram_counts = collections.defaultdict(int) +with open('count_3l.txt', 'r') as f: + for line in f: + (trigram, count) = line.split("\t") + english_trigram_counts[trigram] = int(count) +normalised_english_trigram_counts = norms.normalise(english_trigram_counts) + + +# choices, weights = zip(*weighted_choices) +# cumdist = list(itertools.accumulate(weights)) +# x = random.random() * cumdist[-1] +# choices[bisect.bisect(cumdist, x)]