X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=norms.py;h=2c8eb70e0401b163ba1ecce6858aec82820b9d53;hb=36820d02361529d5327ad040432d0198b72baed2;hp=c9cafc4f718b05b697a20e4f8b5f55085336bc88;hpb=d86a492d92a9dfa8f27db824ac67acd641b24714;p=cipher-tools.git diff --git a/norms.py b/norms.py index c9cafc4..2c8eb70 100644 --- a/norms.py +++ b/norms.py @@ -1,4 +1,5 @@ import collections +from math import log10 def normalise(frequencies): """Scale a set of frequencies so they sum to one @@ -32,23 +33,9 @@ def euclidean_scale(frequencies): return collections.defaultdict(int, ((k, v / length) for (k, v) in frequencies.items())) - -def scale(frequencies): - """Scale a set of frequencies so the largest is 1 - - >>> sorted(scale({1: 1, 2: 0}).items()) - [(1, 1.0), (2, 0.0)] - >>> sorted(scale({1: 1, 2: 1}).items()) - [(1, 1.0), (2, 1.0)] - >>> sorted(scale({1: 1, 2: 1, 3: 1}).items()) - [(1, 1.0), (2, 1.0), (3, 1.0)] - >>> sorted(scale({1: 1, 2: 2, 3: 1}).items()) - [(1, 0.5), (2, 1.0), (3, 0.5)] - """ - largest = max(frequencies.values()) - return collections.defaultdict(int, ((k, v / largest) - for (k, v) in frequencies.items())) - +def identity_scale(frequencies): + return frequencies + def l2(frequencies1, frequencies2): """Finds the distances between two frequency profiles, expressed as dictionaries. @@ -196,6 +183,14 @@ def cosine_distance(frequencies1, frequencies2): return 1 - (numerator / (length1 ** 0.5 * length2 ** 0.5)) +def log_pl(frequencies1, frequencies2): + return sum([frequencies2[l] * log10(frequencies1[l]) for l in frequencies1.keys()]) + +def inverse_log_pl(frequencies1, frequencies2): + return -log_pl(frequencies1, frequencies2) + + + def index_of_coincidence(frequencies): """Finds the (expected) index of coincidence given a set of frequencies """