projects
/
cipher-training.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Moved a slide around, added project directories
[cipher-training.git]
/
norms.py
diff --git
a/norms.py
b/norms.py
index 3d6d37df7f2e4f9f576c9cd4ef1a2341aa48d016..b8e4bf1ef82c8b8ea6f85c0e3c3597d90b7d9b8b 100644
(file)
--- a/
norms.py
+++ b/
norms.py
@@
-159,17
+159,17
@@
def harmonic_mean(frequencies1, frequencies2):
return len(frequencies1) / total
return len(frequencies1) / total
-def cosine_
distance
(frequencies1, frequencies2):
+def cosine_
similarity
(frequencies1, frequencies2):
"""Finds the distances between two frequency profiles, expressed as dictionaries.
Assumes every key in frequencies1 is also in frequencies2
"""Finds the distances between two frequency profiles, expressed as dictionaries.
Assumes every key in frequencies1 is also in frequencies2
- >>> cosine_
distance
({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
+ >>> cosine_
similarity
({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
1.0000000000...
1.0000000000...
- >>> cosine_
distance
({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
+ >>> cosine_
similarity
({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
1.0000000000...
1.0000000000...
- >>> cosine_
distance
({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
+ >>> cosine_
similarity
({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
0.5773502691...
0.5773502691...
- >>> cosine_
distance
({'a':0, 'b':1}, {'a':1, 'b':1}) # doctest: +ELLIPSIS
+ >>> cosine_
similarity
({'a':0, 'b':1}, {'a':1, 'b':1}) # doctest: +ELLIPSIS
0.7071067811...
"""
numerator = 0
0.7071067811...
"""
numerator = 0
@@
-179,7
+179,7
@@
def cosine_distance(frequencies1, frequencies2):
numerator += frequencies1[k] * frequencies2[k]
length1 += frequencies1[k]**2
for k in frequencies2.keys():
numerator += frequencies1[k] * frequencies2[k]
length1 += frequencies1[k]**2
for k in frequencies2.keys():
- length2 += frequencies2[k]
+ length2 += frequencies2[k]
**2
return numerator / (length1 ** 0.5 * length2 ** 0.5)
return numerator / (length1 ** 0.5 * length2 ** 0.5)