X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=language_models.py;h=8824bca4597327623798382288c2bcffb9d8005b;hb=881e6da407d16c42b13458764f7531e8ba578f23;hp=19f886fcefcb4384184e0bbad108e6925f029bbf;hpb=8c4e8509ebd603f878a844b7cec8d0e2375ec8f9;p=cipher-tools.git

diff --git a/language_models.py b/language_models.py
index 19f886f..8824bca 100644
--- a/language_models.py
+++ b/language_models.py
@@ -5,8 +5,9 @@ import collections
 import unicodedata
 import itertools
 from math import log10
+import os 
 
-unaccent_specials = ''.maketrans({"’": "'"})
+unaccent_specials = ''.maketrans({"’": "'", '“': '"', '”': '"'})
 
 def letters(text):
     """Remove all non-alphabetic characters from a text
@@ -56,7 +57,7 @@ def sanitise(text):
 def datafile(name, sep='\t'):
     """Read key,value pairs from file.
     """
-    with open(name, 'r') as f:
+    with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), name), 'r') as f:
         for line in f:
             splits = line.split(sep)
             yield [splits[0], int(splits[1])]
@@ -70,7 +71,7 @@ normalised_english_bigram_counts = norms.normalise(english_bigram_counts)
 english_trigram_counts = collections.Counter(dict(datafile('count_3l.txt')))
 normalised_english_trigram_counts = norms.normalise(english_trigram_counts)
 
-with open('words.txt', 'r') as f:
+with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'words.txt'), 'r') as f:
     keywords = [line.rstrip() for line in f]