X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=szyfrow%2Fsupport%2Flanguage_models.py;h=6898eb4688031c1e21a31cbef64b5a66c3400b83;hb=deed628b25281baff05b8ec8310a7458ac298d7b;hp=271b8dba023f93f35ab9c6b93d51e10d36fcdcb4;hpb=27c8005f6dea0026887b80a01b5f93a8f1b3c2b2;p=szyfrow.git diff --git a/szyfrow/support/language_models.py b/szyfrow/support/language_models.py index 271b8db..6898eb4 100644 --- a/szyfrow/support/language_models.py +++ b/szyfrow/support/language_models.py @@ -5,13 +5,20 @@ import itertools from math import log10 import os +import importlib.resources as pkg_resources + import szyfrow.support.norms from szyfrow.support.utilities import sanitise + +from szyfrow import language_model_files + + def datafile(name, sep='\t'): """Read key,value pairs from file. """ - with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), name), 'r') as f: + with pkg_resources.open_text(language_model_files, name) as f: + # with open(p name), 'r') as f: for line in f: splits = line.split(sep) yield [splits[0], int(splits[1])] @@ -25,7 +32,7 @@ normalised_english_bigram_counts = szyfrow.support.norms.normalise(english_bigra english_trigram_counts = collections.Counter(dict(datafile('count_3l.txt'))) normalised_english_trigram_counts = szyfrow.support.norms.normalise(english_trigram_counts) -with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'words.txt'), 'r') as f: +with pkg_resources.open_text(language_model_files, 'words.txt') as f: keywords = [line.rstrip() for line in f]