Added keybase signature
[cipher-tools.git] / norms.py
index 2c8eb70e0401b163ba1ecce6858aec82820b9d53..3d6d37df7f2e4f9f576c9cd4ef1a2341aa48d016 100644 (file)
--- a/norms.py
+++ b/norms.py
@@ -56,7 +56,7 @@ def l2(frequencies1, frequencies2):
     1.0
     """
     total = 0
-    for k in frequencies1.keys():
+    for k in frequencies1:
         total += (frequencies1[k] - frequencies2[k]) ** 2
     return total ** 0.5
 euclidean_distance = l2
@@ -77,7 +77,7 @@ def l1(frequencies1, frequencies2):
     1
     """
     total = 0
-    for k in frequencies1.keys():
+    for k in frequencies1:
         total += abs(frequencies1[k] - frequencies2[k])
     return total
 
@@ -100,7 +100,7 @@ def l3(frequencies1, frequencies2):
     0.6299605249...
     """
     total = 0
-    for k in frequencies1.keys():
+    for k in frequencies1:
         total += abs(frequencies1[k] - frequencies2[k]) ** 3
     return total ** (1/3)
 
@@ -126,7 +126,7 @@ def geometric_mean(frequencies1, frequencies2):
     0.009259259...
     """
     total = 1
-    for k in frequencies1.keys():
+    for k in frequencies1:
         total *= abs(frequencies1[k] - frequencies2[k])
     return total
 
@@ -152,7 +152,7 @@ def harmonic_mean(frequencies1, frequencies2):
     0.2
     """
     total = 0
-    for k in frequencies1.keys():
+    for k in frequencies1:
         if abs(frequencies1[k] - frequencies2[k]) == 0:
             return 0
         total += 1 / abs(frequencies1[k] - frequencies2[k])
@@ -164,38 +164,25 @@ def cosine_distance(frequencies1, frequencies2):
     Assumes every key in frequencies1 is also in frequencies2
 
     >>> cosine_distance({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
-    -2.22044604...e-16
+    1.0000000000...
     >>> cosine_distance({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
-    -2.22044604...e-16
+    1.0000000000...
     >>> cosine_distance({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
-    0.4226497308...
+    0.5773502691...
     >>> cosine_distance({'a':0, 'b':1}, {'a':1, 'b':1}) # doctest: +ELLIPSIS
-    0.29289321881...
+    0.7071067811...
     """
     numerator = 0
     length1 = 0
     length2 = 0
-    for k in frequencies1.keys():
+    for k in frequencies1:
         numerator += frequencies1[k] * frequencies2[k]
         length1 += frequencies1[k]**2
     for k in frequencies2.keys():
         length2 += frequencies2[k]
-    return 1 - (numerator / (length1 ** 0.5 * length2 ** 0.5))
+    return numerator / (length1 ** 0.5 * length2 ** 0.5)
 
 
-def log_pl(frequencies1, frequencies2):
-    return sum([frequencies2[l] * log10(frequencies1[l])  for l in frequencies1.keys()])
-
-def inverse_log_pl(frequencies1, frequencies2):
-    return -log_pl(frequencies1, frequencies2)
-
-
-
-def index_of_coincidence(frequencies):
-    """Finds the (expected) index of coincidence given a set of frequencies
-    """
-    return sum([f ** 2 for f in frequencies.values()]) * len(frequencies.keys())
-
 
 if __name__ == "__main__":
     import doctest