X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=norms.py;h=66452940c376658e446bb704c7198ee1ace2be22;hb=3ba8a3b82ccec64a2939708f428354176af6746e;hp=36e7fa4440b41942386e550b4403d2bf7d59e058;hpb=74b5187560b137a68d8d4b8b7f510517dbf51d6a;p=cipher-training.git diff --git a/norms.py b/norms.py index 36e7fa4..6645294 100644 --- a/norms.py +++ b/norms.py @@ -1,10 +1,9 @@ -"""Define a variety of norms for finding distances between vectors""" - import collections +from math import log10 def normalise(frequencies): """Scale a set of frequencies so they sum to one - + >>> sorted(normalise({1: 1, 2: 0}).items()) [(1, 1.0), (2, 0.0)] >>> sorted(normalise({1: 1, 2: 1}).items()) @@ -14,13 +13,13 @@ def normalise(frequencies): >>> sorted(normalise({1: 1, 2: 2, 3: 1}).items()) [(1, 0.25), (2, 0.5), (3, 0.25)] """ - length = sum([f for f in frequencies.values()]) - return collections.defaultdict(int, ((k, v / length) + length = sum(f for f in frequencies.values()) + return collections.defaultdict(int, ((k, v / length) for (k, v) in frequencies.items())) def euclidean_scale(frequencies): """Scale a set of frequencies so they have a unit euclidean length - + >>> sorted(euclidean_scale({1: 1, 2: 0}).items()) [(1, 1.0), (2, 0.0)] >>> sorted(euclidean_scale({1: 1, 2: 1}).items()) # doctest: +ELLIPSIS @@ -31,21 +30,17 @@ def euclidean_scale(frequencies): [(1, 0.408248...), (2, 0.81649658...), (3, 0.408248...)] """ length = sum([f ** 2 for f in frequencies.values()]) ** 0.5 - return collections.defaultdict(int, ((k, v / length) + return collections.defaultdict(int, ((k, v / length) for (k, v) in frequencies.items())) def identity_scale(frequencies): - """Don't scale a set of frequencies. (For use when a function expects a - scaling function but you don't want to supply one.) - """ return frequencies def l2(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as - dictionaries. + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 - + >>> l2({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) 0.0 >>> l2({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS @@ -67,7 +62,7 @@ def l2(frequencies1, frequencies2): euclidean_distance = l2 def l1(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> l1({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) @@ -87,7 +82,7 @@ def l1(frequencies1, frequencies2): return total def l3(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> l3({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) @@ -110,10 +105,10 @@ def l3(frequencies1, frequencies2): return total ** (1/3) def geometric_mean(frequencies1, frequencies2): - """Finds the geometric mean of the absolute differences between two - frequency profiles, expressed as dictionaries. + """Finds the geometric mean of the absolute differences between two frequency profiles, + expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 - + >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) 1 >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) @@ -136,8 +131,8 @@ def geometric_mean(frequencies1, frequencies2): return total def harmonic_mean(frequencies1, frequencies2): - """Finds the harmonic mean of the absolute differences between two - frequency profiles, expressed as dictionaries. + """Finds the harmonic mean of the absolute differences between two frequency profiles, + expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) @@ -165,8 +160,7 @@ def harmonic_mean(frequencies1, frequencies2): def cosine_similarity(frequencies1, frequencies2): - """Finds the distances between two frequency profiles, expressed as - dictionaries. + """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2 >>> cosine_similarity({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS