X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=norms.py;h=f9fc1d73aebde753a52cf5038bfa6a85db2f54ed;hb=e6332a16567643e66c2a491b94994f9482384d34;hp=744cbe4d9d4336f574d7b0885fed973189acd7d9;hpb=26f5de0a23dd94ded412f6f507910ac5e26ea2b6;p=cipher-tools.git diff --git a/norms.py b/norms.py index 744cbe4..f9fc1d7 100644 --- a/norms.py +++ b/norms.py @@ -13,7 +13,8 @@ def normalise(frequencies): [(1, 0.4082482904638631), (2, 0.8164965809277261), (3, 0.4082482904638631)] """ length = sum([f ** 2 for f in frequencies.values()]) ** 0.5 - return collections.defaultdict(int, ((k, v / length) for (k, v) in frequencies.items())) + return collections.defaultdict(int, ((k, v / length) + for (k, v) in frequencies.items())) def scale(frequencies): """Scale a set of frequencies so the largest is 1 @@ -28,7 +29,8 @@ def scale(frequencies): [(1, 0.5), (2, 1.0), (3, 0.5)] """ largest = max(frequencies.values()) - return collections.defaultdict(int, ((k, v / largest) for (k, v) in frequencies.items())) + return collections.defaultdict(int, ((k, v / largest) + for (k, v) in frequencies.items())) def l2(frequencies1, frequencies2): @@ -96,6 +98,55 @@ def l3(frequencies1, frequencies2): total += abs(frequencies1[k] - frequencies2[k]) ** 3 return total ** (1/3) +def geometric_mean(frequencies1, frequencies2): + """Finds the geometric mean of the absolute differences between two frequency profiles, + expressed as dictionaries. + Assumes every key in frequencies1 is also in frequencies2 + + >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) + 1 + >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) + 1 + >>> geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':5, 'c':1}) + 3 + >>> geometric_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':5, 'c':1})) + 0.057022248808851934 + >>> geometric_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':1})) + 0.0 + >>> geometric_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':0})) + 0.009720703533656434 + """ + total = 1 + for k in frequencies1.keys(): + total *= abs(frequencies1[k] - frequencies2[k]) + return total + +def harmonic_mean(frequencies1, frequencies2): + """Finds the harmonic mean of the absolute differences between two frequency profiles, + expressed as dictionaries. + Assumes every key in frequencies1 is also in frequencies2 + + >>> harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) + 1.0 + >>> harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) + 1.0 + >>> harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':5, 'c':1}) + 1.2857142857142858 + >>> harmonic_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':5, 'c':1})) + 0.3849001794597505 + >>> harmonic_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':1})) + 0 + >>> harmonic_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':0})) + 0.17497266360581604 + """ + total = 0 + for k in frequencies1.keys(): + if abs(frequencies1[k] - frequencies2[k]) == 0: + return 0 + total += 1 / abs(frequencies1[k] - frequencies2[k]) + return len(frequencies1) / total + + def cosine_distance(frequencies1, frequencies2): """Finds the distances between two frequency profiles, expressed as dictionaries. Assumes every key in frequencies1 is also in frequencies2