Done challenge 4
[cipher-tools.git] / norms.py
index 2c8eb70e0401b163ba1ecce6858aec82820b9d53..eb436c3b8163141a3ada1f1f02f8be741d6f47fb 100644 (file)
--- a/norms.py
+++ b/norms.py
@@ -13,7 +13,7 @@ def normalise(frequencies):
     >>> sorted(normalise({1: 1, 2: 2, 3: 1}).items())
     [(1, 0.25), (2, 0.5), (3, 0.25)]
     """
     >>> sorted(normalise({1: 1, 2: 2, 3: 1}).items())
     [(1, 0.25), (2, 0.5), (3, 0.25)]
     """
-    length = sum([f for f in frequencies.values()])
+    length = sum(f for f in frequencies.values())
     return collections.defaultdict(int, ((k, v / length) 
         for (k, v) in frequencies.items()))
 
     return collections.defaultdict(int, ((k, v / length) 
         for (k, v) in frequencies.items()))
 
@@ -56,7 +56,7 @@ def l2(frequencies1, frequencies2):
     1.0
     """
     total = 0
     1.0
     """
     total = 0
-    for k in frequencies1.keys():
+    for k in frequencies1:
         total += (frequencies1[k] - frequencies2[k]) ** 2
     return total ** 0.5
 euclidean_distance = l2
         total += (frequencies1[k] - frequencies2[k]) ** 2
     return total ** 0.5
 euclidean_distance = l2
@@ -77,7 +77,7 @@ def l1(frequencies1, frequencies2):
     1
     """
     total = 0
     1
     """
     total = 0
-    for k in frequencies1.keys():
+    for k in frequencies1:
         total += abs(frequencies1[k] - frequencies2[k])
     return total
 
         total += abs(frequencies1[k] - frequencies2[k])
     return total
 
@@ -100,7 +100,7 @@ def l3(frequencies1, frequencies2):
     0.6299605249...
     """
     total = 0
     0.6299605249...
     """
     total = 0
-    for k in frequencies1.keys():
+    for k in frequencies1:
         total += abs(frequencies1[k] - frequencies2[k]) ** 3
     return total ** (1/3)
 
         total += abs(frequencies1[k] - frequencies2[k]) ** 3
     return total ** (1/3)
 
@@ -126,7 +126,7 @@ def geometric_mean(frequencies1, frequencies2):
     0.009259259...
     """
     total = 1
     0.009259259...
     """
     total = 1
-    for k in frequencies1.keys():
+    for k in frequencies1:
         total *= abs(frequencies1[k] - frequencies2[k])
     return total
 
         total *= abs(frequencies1[k] - frequencies2[k])
     return total
 
@@ -152,50 +152,37 @@ def harmonic_mean(frequencies1, frequencies2):
     0.2
     """
     total = 0
     0.2
     """
     total = 0
-    for k in frequencies1.keys():
+    for k in frequencies1:
         if abs(frequencies1[k] - frequencies2[k]) == 0:
             return 0
         total += 1 / abs(frequencies1[k] - frequencies2[k])
     return len(frequencies1) / total
 
 
         if abs(frequencies1[k] - frequencies2[k]) == 0:
             return 0
         total += 1 / abs(frequencies1[k] - frequencies2[k])
     return len(frequencies1) / total
 
 
-def cosine_distance(frequencies1, frequencies2):
+def cosine_similarity(frequencies1, frequencies2):
     """Finds the distances between two frequency profiles, expressed as dictionaries.
     Assumes every key in frequencies1 is also in frequencies2
 
     """Finds the distances between two frequency profiles, expressed as dictionaries.
     Assumes every key in frequencies1 is also in frequencies2
 
-    >>> cosine_distance({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
-    -2.22044604...e-16
-    >>> cosine_distance({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
-    -2.22044604...e-16
-    >>> cosine_distance({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
-    0.4226497308...
-    >>> cosine_distance({'a':0, 'b':1}, {'a':1, 'b':1}) # doctest: +ELLIPSIS
-    0.29289321881...
+    >>> cosine_similarity({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
+    1.0000000000...
+    >>> cosine_similarity({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
+    1.0000000000...
+    >>> cosine_similarity({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
+    0.5773502691...
+    >>> cosine_similarity({'a':0, 'b':1}, {'a':1, 'b':1}) # doctest: +ELLIPSIS
+    0.7071067811...
     """
     numerator = 0
     length1 = 0
     length2 = 0
     """
     numerator = 0
     length1 = 0
     length2 = 0
-    for k in frequencies1.keys():
+    for k in frequencies1:
         numerator += frequencies1[k] * frequencies2[k]
         length1 += frequencies1[k]**2
         numerator += frequencies1[k] * frequencies2[k]
         length1 += frequencies1[k]**2
-    for k in frequencies2.keys():
-        length2 += frequencies2[k]
-    return 1 - (numerator / (length1 ** 0.5 * length2 ** 0.5))
+    for k in frequencies2:
+        length2 += frequencies2[k]**2
+    return numerator / (length1 ** 0.5 * length2 ** 0.5)
 
 
 
 
-def log_pl(frequencies1, frequencies2):
-    return sum([frequencies2[l] * log10(frequencies1[l])  for l in frequencies1.keys()])
-
-def inverse_log_pl(frequencies1, frequencies2):
-    return -log_pl(frequencies1, frequencies2)
-
-
-
-def index_of_coincidence(frequencies):
-    """Finds the (expected) index of coincidence given a set of frequencies
-    """
-    return sum([f ** 2 for f in frequencies.values()]) * len(frequencies.keys())
-
 
 if __name__ == "__main__":
     import doctest
 
 if __name__ == "__main__":
     import doctest