X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=szyfrow%2Fsupport%2Flanguage_models.py;h=95dbeb277b8c828f62cc6e8ac83938e683491ed0;hb=3350a462f460e81d96c587466f5b6a88cbba1f7e;hp=6898eb4688031c1e21a31cbef64b5a66c3400b83;hpb=6dae779d336388fdec43f684e9607c25423d1572;p=szyfrow.git diff --git a/szyfrow/support/language_models.py b/szyfrow/support/language_models.py index 6898eb4..95dbeb2 100644 --- a/szyfrow/support/language_models.py +++ b/szyfrow/support/language_models.py @@ -4,13 +4,10 @@ import collections import itertools from math import log10 import os - import importlib.resources as pkg_resources import szyfrow.support.norms -from szyfrow.support.utilities import sanitise - - +from szyfrow.support.utilities import sanitise, deduplicate from szyfrow import language_model_files @@ -36,6 +33,33 @@ with pkg_resources.open_text(language_model_files, 'words.txt') as f: keywords = [line.rstrip() for line in f] +def transpositions_of(keyword): + """Finds the transpostions given by a keyword. For instance, the keyword + 'clever' rearranges to 'celrv', so the first column (0) stays first, the + second column (1) moves to third, the third column (2) moves to second, + and so on. + + If passed a tuple, assume it's already a transposition and just return it. + + >>> transpositions_of('clever') + (0, 2, 1, 4, 3) + >>> transpositions_of('fred') + (3, 2, 0, 1) + >>> transpositions_of((3, 2, 0, 1)) + (3, 2, 0, 1) + """ + if isinstance(keyword, tuple): + return keyword + else: + key = deduplicate(keyword) + transpositions = tuple(key.index(l) for l in sorted(key)) + return transpositions + +transpositions = collections.defaultdict(list) +for word in keywords: + transpositions[transpositions_of(word)] += [word] + + def weighted_choice(d): """Generate random item from a dictionary of item counts """