5 <meta name=
"viewport" content=
"width=device-width, initial-scale=1, minimum-scale=1" />
6 <meta name=
"generator" content=
"pdoc 0.9.2" />
7 <title>szyfrow.support.norms API documentation
</title>
8 <meta name=
"description" content=
"Various norms, for calcuating the distances between two frequency
10 <link rel=
"preload stylesheet" as=
"style" href=
"https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity=
"sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin
>
11 <link rel=
"preload stylesheet" as=
"style" href=
"https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity=
"sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin
>
12 <link rel=
"stylesheet preload" as=
"style" href=
"https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin
>
13 <style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:
1.5em}#content{padding:
20px}#sidebar{padding:
30px;overflow:hidden}#sidebar
> *:last-child{margin-bottom:
2cm}.http-server-breadcrumbs{font-size:
130%;margin:
0 0 15px
0}#footer{font-size:
.75em;padding:
5px
30px;border-top:
1px solid #ddd;text-align:right}#footer p{margin:
0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:
30px}h1,h2,h3,h4,h5{font-weight:
300}h1{font-size:
2.5em;line-height:
1.1em}h2{font-size:
1.75em;margin:
1em
0 .50em
0}h3{font-size:
1.4em;margin:
25px
0 10px
0}h4{margin:
0;font-size:
105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:
.2em
0}a{color:#
058;text-decoration:none;transition:color
.3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^=
"header-"]{margin-top:
2em}.ident{color:#
900}pre code{background:#f8f8f8;font-size:
.8em;line-height:
1.4em}code{background:#f2f2f1;padding:
1px
4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:
0;border-top:
1px solid #ccc;border-bottom:
1px solid #ccc;margin:
1em
0;padding:
1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:
10%}#http-server-module-list p{margin-top:
0}.toc ul,#index{list-style-type:none;margin:
0;padding:
0}#index code{background:transparent}#index h3{border-bottom:
1px solid #ddd}#index ul{padding:
0}#index h4{margin-top:
.6em;font-weight:bold}@media (min-width:
200ex){#index .two-column{column-count:
2}}@media (min-width:
300ex){#index .two-column{column-count:
3}}dl{margin-bottom:
2em}dl dl:last-child{margin-bottom:
4em}dd{margin:
0 0 1em
3em}#header-classes + dl
> dd{margin-bottom:
3em}dd dd{margin-left:
2em}dd p{margin:
10px
0}.name{background:#eee;font-weight:bold;font-size:
.85em;padding:
5px
10px;display:inline-block;min-width:
40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name
> span:first-child{white-space:nowrap}.name.class
> span:nth-child(
2){margin-left:
.4em}.inherited{color:#
999;border-left:
5px solid #eee;padding-left:
1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:
400;font-size:
1.25em}.desc h3{font-size:
1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#
666;text-align:right;font-weight:
400;font-size:
.8em;text-transform:uppercase}.source summary
> *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:
1em}.source pre{max-height:
500px;overflow:auto;margin:
0}.source pre code{font-size:
12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\
2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:
1em}img{max-width:
100%}td{padding:
0 .5em}.admonition{padding:
.1em
.5em;margin-bottom:
1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}
</style>
14 <style media=
"screen and (min-width: 700px)">@media screen and (min-width:
700px){#sidebar{width:
30%;height:
100vh;overflow:auto;position:sticky;top:
0}#content{width:
70%;max-width:
100ch;padding:
3em
4em;border-left:
1px solid #ddd}pre code{font-size:
1em}.item .name{font-size:
1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:
1.5em}.toc
> ul
> li{margin-top:
.5em}}
</style>
15 <style media=
"print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#
000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:
" (" attr(href)
")";font-size:
90%}a[href][title]:after{content:none}abbr[title]:after{content:
" (" attr(title)
")"}.ir a:after,a[href^=
"javascript:"]:after,a[href^=
"#"]:after{content:
""}pre,blockquote{border:
1px solid #
999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:
100% !important}@page{margin:
0.5cm}p,h2,h3{orphans:
3;widows:
3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}
</style>
16 <script defer
src=
"https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity=
"sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin
></script>
17 <script>window.addEventListener('DOMContentLoaded', () =
> hljs.initHighlighting())
</script>
21 <article id=
"content">
23 <h1 class=
"title">Module
<code>szyfrow.support.norms
</code></h1>
25 <section id=
"section-intro">
26 <p>Various norms, for calcuating the distances between two frequency
28 <details class=
"source">
30 <span>Expand source code
</span>
32 <pre><code class=
"python">"""Various norms, for calcuating the distances between two frequency
37 from math import log10
39 def lp(v1, v2=None, p=
2):
40 """Find the L_p norm. If passed one vector, find the length of that vector.
41 If passed two vectors, find the length of the difference between them.
44 vec = {k: abs(v1[k] - v2[k]) for k in (v1.keys() | v2.keys())}
47 return sum(v ** p for v in vec.values()) ** (
1.0 / p)
50 """Finds the distances between two frequency profiles, expressed as
51 dictionaries. Assumes every key in frequencies1 is also in frequencies2
53 >>> l1({
'a
':
1,
'b
':
1,
'c
':
1}, {
'a
':
1,
'b
':
1,
'c
':
1})
55 >>> l1({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1})
57 >>> l1(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), normalise({
'a
':
1,
'b
':
1,
'c
':
1}))
59 >>> l1({
'a
':
0,
'b
':
2,
'c
':
0}, {
'a
':
1,
'b
':
1,
'c
':
1})
61 >>> l1({
'a
':
0,
'b
':
1}, {
'a
':
1,
'b
':
1})
67 """Finds the distances between two frequency profiles, expressed as dictionaries.
68 Assumes every key in frequencies1 is also in frequencies2
70 >>> l2({
'a
':
1,
'b
':
1,
'c
':
1}, {
'a
':
1,
'b
':
1,
'c
':
1})
72 >>> l2({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
74 >>> l2(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), normalise({
'a
':
1,
'b
':
1,
'c
':
1}))
76 >>> l2({
'a
':
0,
'b
':
2,
'c
':
0}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
78 >>> l2(normalise({
'a
':
0,
'b
':
2,
'c
':
0}), \
79 normalise({
'a
':
1,
'b
':
1,
'c
':
1})) # doctest: +ELLIPSIS
81 >>> l2({
'a
':
0,
'b
':
1}, {
'a
':
1,
'b
':
1})
87 """Finds the distances between two frequency profiles, expressed as
88 dictionaries. Assumes every key in frequencies1 is also in frequencies2
90 >>> l3({
'a
':
1,
'b
':
1,
'c
':
1}, {
'a
':
1,
'b
':
1,
'c
':
1})
92 >>> l3({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
94 >>> l3({
'a
':
0,
'b
':
2,
'c
':
0}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
96 >>> l3(normalise({
'a
':
0,
'b
':
2,
'c
':
0}), \
97 normalise({
'a
':
1,
'b
':
1,
'c
':
1})) # doctest: +ELLIPSIS
99 >>> l3({
'a
':
0,
'b
':
1}, {
'a
':
1,
'b
':
1})
101 >>> l3(normalise({
'a
':
0,
'b
':
1}), normalise({
'a
':
1,
'b
':
1})) # doctest: +ELLIPSIS
106 def linf(v1, v2=None):
107 """Finds the distances between two frequency profiles, expressed as
108 dictionaries. Assumes every key in frequencies1 is also in frequencies2
"""
110 vec = {k: abs(v1[k] - v2[k]) for k in (v1.keys() | v2.keys())}
113 return max(v for v in vec.values())
116 def scale(frequencies, norm=l2):
117 length = norm(frequencies)
118 return collections.defaultdict(int,
119 {k: v / length for k, v in frequencies.items()})
122 """Scale a set of frequencies so they have a unit euclidean length
124 >>> sorted(euclidean_scale({
1:
1,
2:
0}).items())
126 >>> sorted(euclidean_scale({
1:
1,
2:
1}).items()) # doctest: +ELLIPSIS
127 [(
1,
0.7071067...), (
2,
0.7071067...)]
128 >>> sorted(euclidean_scale({
1:
1,
2:
1,
3:
1}).items()) # doctest: +ELLIPSIS
129 [(
1,
0.577350...), (
2,
0.577350...), (
3,
0.577350...)]
130 >>> sorted(euclidean_scale({
1:
1,
2:
2,
3:
1}).items()) # doctest: +ELLIPSIS
131 [(
1,
0.408248...), (
2,
0.81649658...), (
3,
0.408248...)]
136 """Scale a set of frequencies so they sum to one
138 >>> sorted(normalise({
1:
1,
2:
0}).items())
140 >>> sorted(normalise({
1:
1,
2:
1}).items())
142 >>> sorted(normalise({
1:
1,
2:
1,
3:
1}).items()) # doctest: +ELLIPSIS
143 [(
1,
0.333...), (
2,
0.333...), (
3,
0.333...)]
144 >>> sorted(normalise({
1:
1,
2:
2,
3:
1}).items())
145 [(
1,
0.25), (
2,
0.5), (
3,
0.25)]
150 euclidean_distance = l2
151 euclidean_scale = l2_scale
154 def geometric_mean(frequencies1, frequencies2):
155 """Finds the geometric mean of the absolute differences between two frequency profiles,
156 expressed as dictionaries.
157 Assumes every key in frequencies1 is also in frequencies2
159 >>> geometric_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1})
161 >>> geometric_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1})
163 >>> geometric_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
5,
'c
':
1})
165 >>> geometric_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
166 normalise({
'a
':
1,
'b
':
5,
'c
':
1})) # doctest: +ELLIPSIS
168 >>> geometric_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
169 normalise({
'a
':
1,
'b
':
1,
'c
':
1})) # doctest: +ELLIPSIS
171 >>> geometric_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
172 normalise({
'a
':
1,
'b
':
1,
'c
':
0})) # doctest: +ELLIPSIS
176 for k in frequencies1:
177 total *= abs(frequencies1[k] - frequencies2[k])
180 def harmonic_mean(frequencies1, frequencies2):
181 """Finds the harmonic mean of the absolute differences between two frequency profiles,
182 expressed as dictionaries.
183 Assumes every key in frequencies1 is also in frequencies2
185 >>> harmonic_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1})
187 >>> harmonic_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1})
189 >>> harmonic_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
5,
'c
':
1}) # doctest: +ELLIPSIS
191 >>> harmonic_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
192 normalise({
'a
':
1,
'b
':
5,
'c
':
1})) # doctest: +ELLIPSIS
194 >>> harmonic_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
195 normalise({
'a
':
1,
'b
':
1,
'c
':
1})) # doctest: +ELLIPSIS
197 >>> harmonic_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
198 normalise({
'a
':
1,
'b
':
1,
'c
':
0})) # doctest: +ELLIPSIS
202 for k in frequencies1:
203 if abs(frequencies1[k] - frequencies2[k]) ==
0:
205 total +=
1.0 / abs(frequencies1[k] - frequencies2[k])
206 return len(frequencies1) / total
209 def cosine_similarity(frequencies1, frequencies2):
210 """Finds the distances between two frequency profiles, expressed as dictionaries.
211 Assumes every key in frequencies1 is also in frequencies2
213 >>> cosine_similarity({
'a
':
1,
'b
':
1,
'c
':
1}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
215 >>> cosine_similarity({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
217 >>> cosine_similarity({
'a
':
0,
'b
':
2,
'c
':
0}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
219 >>> cosine_similarity({
'a
':
0,
'b
':
1}, {
'a
':
1,
'b
':
1}) # doctest: +ELLIPSIS
225 for k in frequencies1:
226 numerator += frequencies1[k] * frequencies2[k]
227 length1 += frequencies1[k]**
2
228 for k in frequencies2:
229 length2 += frequencies2[k]**
2
230 return numerator / (length1 **
0.5 * length2 **
0.5)
234 if __name__ ==
"__main__
":
236 doctest.testmod()
</code></pre>
244 <h2 class=
"section-title" id=
"header-functions">Functions
</h2>
246 <dt id=
"szyfrow.support.norms.cosine_similarity"><code class=
"name flex">
247 <span>def
<span class=
"ident">cosine_similarity
</span></span>(
<span>frequencies1, frequencies2)
</span>
250 <div class=
"desc"><p>Finds the distances between two frequency profiles, expressed as dictionaries.
251 Assumes every key in frequencies1 is also in frequencies2
</p>
252 <pre><code class=
"language-python-repl">>>> cosine_similarity({'a':
1, 'b':
1, 'c':
1}, {'a':
1, 'b':
1, 'c':
1}) # doctest: +ELLIPSIS
254 >>> cosine_similarity({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
1, 'c':
1}) # doctest: +ELLIPSIS
256 >>> cosine_similarity({'a':
0, 'b':
2, 'c':
0}, {'a':
1, 'b':
1, 'c':
1}) # doctest: +ELLIPSIS
258 >>> cosine_similarity({'a':
0, 'b':
1}, {'a':
1, 'b':
1}) # doctest: +ELLIPSIS
261 <details class=
"source">
263 <span>Expand source code
</span>
265 <pre><code class=
"python">def cosine_similarity(frequencies1, frequencies2):
266 """Finds the distances between two frequency profiles, expressed as dictionaries.
267 Assumes every key in frequencies1 is also in frequencies2
269 >>> cosine_similarity({
'a
':
1,
'b
':
1,
'c
':
1}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
271 >>> cosine_similarity({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
273 >>> cosine_similarity({
'a
':
0,
'b
':
2,
'c
':
0}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
275 >>> cosine_similarity({
'a
':
0,
'b
':
1}, {
'a
':
1,
'b
':
1}) # doctest: +ELLIPSIS
281 for k in frequencies1:
282 numerator += frequencies1[k] * frequencies2[k]
283 length1 += frequencies1[k]**
2
284 for k in frequencies2:
285 length2 += frequencies2[k]**
2
286 return numerator / (length1 **
0.5 * length2 **
0.5)
</code></pre>
289 <dt id=
"szyfrow.support.norms.euclidean_distance"><code class=
"name flex">
290 <span>def
<span class=
"ident">euclidean_distance
</span></span>(
<span>v1, v2=None)
</span>
293 <div class=
"desc"><p>Finds the distances between two frequency profiles, expressed as dictionaries.
294 Assumes every key in frequencies1 is also in frequencies2
</p>
295 <pre><code class=
"language-python-repl">>>> l2({'a':
1, 'b':
1, 'c':
1}, {'a':
1, 'b':
1, 'c':
1})
297 >>> l2({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
1, 'c':
1}) # doctest: +ELLIPSIS
299 >>> l2(normalise({'a':
2, 'b':
2, 'c':
2}), normalise({'a':
1, 'b':
1, 'c':
1}))
301 >>> l2({'a':
0, 'b':
2, 'c':
0}, {'a':
1, 'b':
1, 'c':
1}) # doctest: +ELLIPSIS
303 >>> l2(normalise({'a':
0, 'b':
2, 'c':
0}), normalise({'a':
1, 'b':
1, 'c':
1})) # doctest: +ELLIPSIS
305 >>> l2({'a':
0, 'b':
1}, {'a':
1, 'b':
1})
308 <details class=
"source">
310 <span>Expand source code
</span>
312 <pre><code class=
"python">def l2(v1, v2=None):
313 """Finds the distances between two frequency profiles, expressed as dictionaries.
314 Assumes every key in frequencies1 is also in frequencies2
316 >>> l2({
'a
':
1,
'b
':
1,
'c
':
1}, {
'a
':
1,
'b
':
1,
'c
':
1})
318 >>> l2({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
320 >>> l2(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), normalise({
'a
':
1,
'b
':
1,
'c
':
1}))
322 >>> l2({
'a
':
0,
'b
':
2,
'c
':
0}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
324 >>> l2(normalise({
'a
':
0,
'b
':
2,
'c
':
0}), \
325 normalise({
'a
':
1,
'b
':
1,
'c
':
1})) # doctest: +ELLIPSIS
327 >>> l2({
'a
':
0,
'b
':
1}, {
'a
':
1,
'b
':
1})
330 return lp(v1, v2,
2)
</code></pre>
333 <dt id=
"szyfrow.support.norms.euclidean_scale"><code class=
"name flex">
334 <span>def
<span class=
"ident">euclidean_scale
</span></span>(
<span>f)
</span>
337 <div class=
"desc"><p>Scale a set of frequencies so they have a unit euclidean length
</p>
338 <pre><code class=
"language-python-repl">>>> sorted(euclidean_scale({
1:
1,
2:
0}).items())
340 >>> sorted(euclidean_scale({
1:
1,
2:
1}).items()) # doctest: +ELLIPSIS
341 [(
1,
0.7071067...), (
2,
0.7071067...)]
342 >>> sorted(euclidean_scale({
1:
1,
2:
1,
3:
1}).items()) # doctest: +ELLIPSIS
343 [(
1,
0.577350...), (
2,
0.577350...), (
3,
0.577350...)]
344 >>> sorted(euclidean_scale({
1:
1,
2:
2,
3:
1}).items()) # doctest: +ELLIPSIS
345 [(
1,
0.408248...), (
2,
0.81649658...), (
3,
0.408248...)]
347 <details class=
"source">
349 <span>Expand source code
</span>
351 <pre><code class=
"python">def l2_scale(f):
352 """Scale a set of frequencies so they have a unit euclidean length
354 >>> sorted(euclidean_scale({
1:
1,
2:
0}).items())
356 >>> sorted(euclidean_scale({
1:
1,
2:
1}).items()) # doctest: +ELLIPSIS
357 [(
1,
0.7071067...), (
2,
0.7071067...)]
358 >>> sorted(euclidean_scale({
1:
1,
2:
1,
3:
1}).items()) # doctest: +ELLIPSIS
359 [(
1,
0.577350...), (
2,
0.577350...), (
3,
0.577350...)]
360 >>> sorted(euclidean_scale({
1:
1,
2:
2,
3:
1}).items()) # doctest: +ELLIPSIS
361 [(
1,
0.408248...), (
2,
0.81649658...), (
3,
0.408248...)]
363 return scale(f, l2)
</code></pre>
366 <dt id=
"szyfrow.support.norms.geometric_mean"><code class=
"name flex">
367 <span>def
<span class=
"ident">geometric_mean
</span></span>(
<span>frequencies1, frequencies2)
</span>
370 <div class=
"desc"><p>Finds the geometric mean of the absolute differences between two frequency profiles,
371 expressed as dictionaries.
372 Assumes every key in frequencies1 is also in frequencies2
</p>
373 <pre><code class=
"language-python-repl">>>> geometric_mean({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
1, 'c':
1})
375 >>> geometric_mean({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
1, 'c':
1})
377 >>> geometric_mean({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
5, 'c':
1})
379 >>> geometric_mean(normalise({'a':
2, 'b':
2, 'c':
2}), normalise({'a':
1, 'b':
5, 'c':
1})) # doctest: +ELLIPSIS
381 >>> geometric_mean(normalise({'a':
2, 'b':
2, 'c':
2}), normalise({'a':
1, 'b':
1, 'c':
1})) # doctest: +ELLIPSIS
383 >>> geometric_mean(normalise({'a':
2, 'b':
2, 'c':
2}), normalise({'a':
1, 'b':
1, 'c':
0})) # doctest: +ELLIPSIS
386 <details class=
"source">
388 <span>Expand source code
</span>
390 <pre><code class=
"python">def geometric_mean(frequencies1, frequencies2):
391 """Finds the geometric mean of the absolute differences between two frequency profiles,
392 expressed as dictionaries.
393 Assumes every key in frequencies1 is also in frequencies2
395 >>> geometric_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1})
397 >>> geometric_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1})
399 >>> geometric_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
5,
'c
':
1})
401 >>> geometric_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
402 normalise({
'a
':
1,
'b
':
5,
'c
':
1})) # doctest: +ELLIPSIS
404 >>> geometric_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
405 normalise({
'a
':
1,
'b
':
1,
'c
':
1})) # doctest: +ELLIPSIS
407 >>> geometric_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
408 normalise({
'a
':
1,
'b
':
1,
'c
':
0})) # doctest: +ELLIPSIS
412 for k in frequencies1:
413 total *= abs(frequencies1[k] - frequencies2[k])
414 return total
</code></pre>
417 <dt id=
"szyfrow.support.norms.harmonic_mean"><code class=
"name flex">
418 <span>def
<span class=
"ident">harmonic_mean
</span></span>(
<span>frequencies1, frequencies2)
</span>
421 <div class=
"desc"><p>Finds the harmonic mean of the absolute differences between two frequency profiles,
422 expressed as dictionaries.
423 Assumes every key in frequencies1 is also in frequencies2
</p>
424 <pre><code class=
"language-python-repl">>>> harmonic_mean({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
1, 'c':
1})
426 >>> harmonic_mean({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
1, 'c':
1})
428 >>> harmonic_mean({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
5, 'c':
1}) # doctest: +ELLIPSIS
430 >>> harmonic_mean(normalise({'a':
2, 'b':
2, 'c':
2}), normalise({'a':
1, 'b':
5, 'c':
1})) # doctest: +ELLIPSIS
432 >>> harmonic_mean(normalise({'a':
2, 'b':
2, 'c':
2}), normalise({'a':
1, 'b':
1, 'c':
1})) # doctest: +ELLIPSIS
434 >>> harmonic_mean(normalise({'a':
2, 'b':
2, 'c':
2}), normalise({'a':
1, 'b':
1, 'c':
0})) # doctest: +ELLIPSIS
437 <details class=
"source">
439 <span>Expand source code
</span>
441 <pre><code class=
"python">def harmonic_mean(frequencies1, frequencies2):
442 """Finds the harmonic mean of the absolute differences between two frequency profiles,
443 expressed as dictionaries.
444 Assumes every key in frequencies1 is also in frequencies2
446 >>> harmonic_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1})
448 >>> harmonic_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1})
450 >>> harmonic_mean({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
5,
'c
':
1}) # doctest: +ELLIPSIS
452 >>> harmonic_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
453 normalise({
'a
':
1,
'b
':
5,
'c
':
1})) # doctest: +ELLIPSIS
455 >>> harmonic_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
456 normalise({
'a
':
1,
'b
':
1,
'c
':
1})) # doctest: +ELLIPSIS
458 >>> harmonic_mean(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), \
459 normalise({
'a
':
1,
'b
':
1,
'c
':
0})) # doctest: +ELLIPSIS
463 for k in frequencies1:
464 if abs(frequencies1[k] - frequencies2[k]) ==
0:
466 total +=
1.0 / abs(frequencies1[k] - frequencies2[k])
467 return len(frequencies1) / total
</code></pre>
470 <dt id=
"szyfrow.support.norms.l1"><code class=
"name flex">
471 <span>def
<span class=
"ident">l1
</span></span>(
<span>v1, v2=None)
</span>
474 <div class=
"desc"><p>Finds the distances between two frequency profiles, expressed as
475 dictionaries. Assumes every key in frequencies1 is also in frequencies2
</p>
476 <pre><code class=
"language-python-repl">>>> l1({'a':
1, 'b':
1, 'c':
1}, {'a':
1, 'b':
1, 'c':
1})
478 >>> l1({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
1, 'c':
1})
480 >>> l1(normalise({'a':
2, 'b':
2, 'c':
2}), normalise({'a':
1, 'b':
1, 'c':
1}))
482 >>> l1({'a':
0, 'b':
2, 'c':
0}, {'a':
1, 'b':
1, 'c':
1})
484 >>> l1({'a':
0, 'b':
1}, {'a':
1, 'b':
1})
487 <details class=
"source">
489 <span>Expand source code
</span>
491 <pre><code class=
"python">def l1(v1, v2=None):
492 """Finds the distances between two frequency profiles, expressed as
493 dictionaries. Assumes every key in frequencies1 is also in frequencies2
495 >>> l1({
'a
':
1,
'b
':
1,
'c
':
1}, {
'a
':
1,
'b
':
1,
'c
':
1})
497 >>> l1({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1})
499 >>> l1(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), normalise({
'a
':
1,
'b
':
1,
'c
':
1}))
501 >>> l1({
'a
':
0,
'b
':
2,
'c
':
0}, {
'a
':
1,
'b
':
1,
'c
':
1})
503 >>> l1({
'a
':
0,
'b
':
1}, {
'a
':
1,
'b
':
1})
506 return lp(v1, v2,
1)
</code></pre>
509 <dt id=
"szyfrow.support.norms.l1_scale"><code class=
"name flex">
510 <span>def
<span class=
"ident">l1_scale
</span></span>(
<span>f)
</span>
513 <div class=
"desc"><p>Scale a set of frequencies so they sum to one
</p>
514 <pre><code class=
"language-python-repl">>>> sorted(normalise({
1:
1,
2:
0}).items())
516 >>> sorted(normalise({
1:
1,
2:
1}).items())
518 >>> sorted(normalise({
1:
1,
2:
1,
3:
1}).items()) # doctest: +ELLIPSIS
519 [(
1,
0.333...), (
2,
0.333...), (
3,
0.333...)]
520 >>> sorted(normalise({
1:
1,
2:
2,
3:
1}).items())
521 [(
1,
0.25), (
2,
0.5), (
3,
0.25)]
523 <details class=
"source">
525 <span>Expand source code
</span>
527 <pre><code class=
"python">def l1_scale(f):
528 """Scale a set of frequencies so they sum to one
530 >>> sorted(normalise({
1:
1,
2:
0}).items())
532 >>> sorted(normalise({
1:
1,
2:
1}).items())
534 >>> sorted(normalise({
1:
1,
2:
1,
3:
1}).items()) # doctest: +ELLIPSIS
535 [(
1,
0.333...), (
2,
0.333...), (
3,
0.333...)]
536 >>> sorted(normalise({
1:
1,
2:
2,
3:
1}).items())
537 [(
1,
0.25), (
2,
0.5), (
3,
0.25)]
539 return scale(f, l1)
</code></pre>
542 <dt id=
"szyfrow.support.norms.l2"><code class=
"name flex">
543 <span>def
<span class=
"ident">l2
</span></span>(
<span>v1, v2=None)
</span>
546 <div class=
"desc"><p>Finds the distances between two frequency profiles, expressed as dictionaries.
547 Assumes every key in frequencies1 is also in frequencies2
</p>
548 <pre><code class=
"language-python-repl">>>> l2({'a':
1, 'b':
1, 'c':
1}, {'a':
1, 'b':
1, 'c':
1})
550 >>> l2({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
1, 'c':
1}) # doctest: +ELLIPSIS
552 >>> l2(normalise({'a':
2, 'b':
2, 'c':
2}), normalise({'a':
1, 'b':
1, 'c':
1}))
554 >>> l2({'a':
0, 'b':
2, 'c':
0}, {'a':
1, 'b':
1, 'c':
1}) # doctest: +ELLIPSIS
556 >>> l2(normalise({'a':
0, 'b':
2, 'c':
0}), normalise({'a':
1, 'b':
1, 'c':
1})) # doctest: +ELLIPSIS
558 >>> l2({'a':
0, 'b':
1}, {'a':
1, 'b':
1})
561 <details class=
"source">
563 <span>Expand source code
</span>
565 <pre><code class=
"python">def l2(v1, v2=None):
566 """Finds the distances between two frequency profiles, expressed as dictionaries.
567 Assumes every key in frequencies1 is also in frequencies2
569 >>> l2({
'a
':
1,
'b
':
1,
'c
':
1}, {
'a
':
1,
'b
':
1,
'c
':
1})
571 >>> l2({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
573 >>> l2(normalise({
'a
':
2,
'b
':
2,
'c
':
2}), normalise({
'a
':
1,
'b
':
1,
'c
':
1}))
575 >>> l2({
'a
':
0,
'b
':
2,
'c
':
0}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
577 >>> l2(normalise({
'a
':
0,
'b
':
2,
'c
':
0}), \
578 normalise({
'a
':
1,
'b
':
1,
'c
':
1})) # doctest: +ELLIPSIS
580 >>> l2({
'a
':
0,
'b
':
1}, {
'a
':
1,
'b
':
1})
583 return lp(v1, v2,
2)
</code></pre>
586 <dt id=
"szyfrow.support.norms.l2_scale"><code class=
"name flex">
587 <span>def
<span class=
"ident">l2_scale
</span></span>(
<span>f)
</span>
590 <div class=
"desc"><p>Scale a set of frequencies so they have a unit euclidean length
</p>
591 <pre><code class=
"language-python-repl">>>> sorted(euclidean_scale({
1:
1,
2:
0}).items())
593 >>> sorted(euclidean_scale({
1:
1,
2:
1}).items()) # doctest: +ELLIPSIS
594 [(
1,
0.7071067...), (
2,
0.7071067...)]
595 >>> sorted(euclidean_scale({
1:
1,
2:
1,
3:
1}).items()) # doctest: +ELLIPSIS
596 [(
1,
0.577350...), (
2,
0.577350...), (
3,
0.577350...)]
597 >>> sorted(euclidean_scale({
1:
1,
2:
2,
3:
1}).items()) # doctest: +ELLIPSIS
598 [(
1,
0.408248...), (
2,
0.81649658...), (
3,
0.408248...)]
600 <details class=
"source">
602 <span>Expand source code
</span>
604 <pre><code class=
"python">def l2_scale(f):
605 """Scale a set of frequencies so they have a unit euclidean length
607 >>> sorted(euclidean_scale({
1:
1,
2:
0}).items())
609 >>> sorted(euclidean_scale({
1:
1,
2:
1}).items()) # doctest: +ELLIPSIS
610 [(
1,
0.7071067...), (
2,
0.7071067...)]
611 >>> sorted(euclidean_scale({
1:
1,
2:
1,
3:
1}).items()) # doctest: +ELLIPSIS
612 [(
1,
0.577350...), (
2,
0.577350...), (
3,
0.577350...)]
613 >>> sorted(euclidean_scale({
1:
1,
2:
2,
3:
1}).items()) # doctest: +ELLIPSIS
614 [(
1,
0.408248...), (
2,
0.81649658...), (
3,
0.408248...)]
616 return scale(f, l2)
</code></pre>
619 <dt id=
"szyfrow.support.norms.l3"><code class=
"name flex">
620 <span>def
<span class=
"ident">l3
</span></span>(
<span>v1, v2=None)
</span>
623 <div class=
"desc"><p>Finds the distances between two frequency profiles, expressed as
624 dictionaries. Assumes every key in frequencies1 is also in frequencies2
</p>
625 <pre><code class=
"language-python-repl">>>> l3({'a':
1, 'b':
1, 'c':
1}, {'a':
1, 'b':
1, 'c':
1})
627 >>> l3({'a':
2, 'b':
2, 'c':
2}, {'a':
1, 'b':
1, 'c':
1}) # doctest: +ELLIPSIS
629 >>> l3({'a':
0, 'b':
2, 'c':
0}, {'a':
1, 'b':
1, 'c':
1}) # doctest: +ELLIPSIS
631 >>> l3(normalise({'a':
0, 'b':
2, 'c':
0}), normalise({'a':
1, 'b':
1, 'c':
1})) # doctest: +ELLIPSIS
633 >>> l3({'a':
0, 'b':
1}, {'a':
1, 'b':
1})
635 >>> l3(normalise({'a':
0, 'b':
1}), normalise({'a':
1, 'b':
1})) # doctest: +ELLIPSIS
638 <details class=
"source">
640 <span>Expand source code
</span>
642 <pre><code class=
"python">def l3(v1, v2=None):
643 """Finds the distances between two frequency profiles, expressed as
644 dictionaries. Assumes every key in frequencies1 is also in frequencies2
646 >>> l3({
'a
':
1,
'b
':
1,
'c
':
1}, {
'a
':
1,
'b
':
1,
'c
':
1})
648 >>> l3({
'a
':
2,
'b
':
2,
'c
':
2}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
650 >>> l3({
'a
':
0,
'b
':
2,
'c
':
0}, {
'a
':
1,
'b
':
1,
'c
':
1}) # doctest: +ELLIPSIS
652 >>> l3(normalise({
'a
':
0,
'b
':
2,
'c
':
0}), \
653 normalise({
'a
':
1,
'b
':
1,
'c
':
1})) # doctest: +ELLIPSIS
655 >>> l3({
'a
':
0,
'b
':
1}, {
'a
':
1,
'b
':
1})
657 >>> l3(normalise({
'a
':
0,
'b
':
1}), normalise({
'a
':
1,
'b
':
1})) # doctest: +ELLIPSIS
660 return lp(v1, v2,
3)
</code></pre>
663 <dt id=
"szyfrow.support.norms.linf"><code class=
"name flex">
664 <span>def
<span class=
"ident">linf
</span></span>(
<span>v1, v2=None)
</span>
667 <div class=
"desc"><p>Finds the distances between two frequency profiles, expressed as
668 dictionaries. Assumes every key in frequencies1 is also in frequencies2
</p></div>
669 <details class=
"source">
671 <span>Expand source code
</span>
673 <pre><code class=
"python">def linf(v1, v2=None):
674 """Finds the distances between two frequency profiles, expressed as
675 dictionaries. Assumes every key in frequencies1 is also in frequencies2
"""
677 vec = {k: abs(v1[k] - v2[k]) for k in (v1.keys() | v2.keys())}
680 return max(v for v in vec.values())
</code></pre>
683 <dt id=
"szyfrow.support.norms.lp"><code class=
"name flex">
684 <span>def
<span class=
"ident">lp
</span></span>(
<span>v1, v2=None, p=
2)
</span>
687 <div class=
"desc"><p>Find the L_p norm. If passed one vector, find the length of that vector.
688 If passed two vectors, find the length of the difference between them.
</p></div>
689 <details class=
"source">
691 <span>Expand source code
</span>
693 <pre><code class=
"python">def lp(v1, v2=None, p=
2):
694 """Find the L_p norm. If passed one vector, find the length of that vector.
695 If passed two vectors, find the length of the difference between them.
698 vec = {k: abs(v1[k] - v2[k]) for k in (v1.keys() | v2.keys())}
701 return sum(v ** p for v in vec.values()) ** (
1.0 / p)
</code></pre>
704 <dt id=
"szyfrow.support.norms.normalise"><code class=
"name flex">
705 <span>def
<span class=
"ident">normalise
</span></span>(
<span>f)
</span>
708 <div class=
"desc"><p>Scale a set of frequencies so they sum to one
</p>
709 <pre><code class=
"language-python-repl">>>> sorted(normalise({
1:
1,
2:
0}).items())
711 >>> sorted(normalise({
1:
1,
2:
1}).items())
713 >>> sorted(normalise({
1:
1,
2:
1,
3:
1}).items()) # doctest: +ELLIPSIS
714 [(
1,
0.333...), (
2,
0.333...), (
3,
0.333...)]
715 >>> sorted(normalise({
1:
1,
2:
2,
3:
1}).items())
716 [(
1,
0.25), (
2,
0.5), (
3,
0.25)]
718 <details class=
"source">
720 <span>Expand source code
</span>
722 <pre><code class=
"python">def l1_scale(f):
723 """Scale a set of frequencies so they sum to one
725 >>> sorted(normalise({
1:
1,
2:
0}).items())
727 >>> sorted(normalise({
1:
1,
2:
1}).items())
729 >>> sorted(normalise({
1:
1,
2:
1,
3:
1}).items()) # doctest: +ELLIPSIS
730 [(
1,
0.333...), (
2,
0.333...), (
3,
0.333...)]
731 >>> sorted(normalise({
1:
1,
2:
2,
3:
1}).items())
732 [(
1,
0.25), (
2,
0.5), (
3,
0.25)]
734 return scale(f, l1)
</code></pre>
737 <dt id=
"szyfrow.support.norms.scale"><code class=
"name flex">
738 <span>def
<span class=
"ident">scale
</span></span>(
<span>frequencies, norm=
<function l2
>)
</span>
741 <div class=
"desc"></div>
742 <details class=
"source">
744 <span>Expand source code
</span>
746 <pre><code class=
"python">def scale(frequencies, norm=l2):
747 length = norm(frequencies)
748 return collections.defaultdict(int,
749 {k: v / length for k, v in frequencies.items()})
</code></pre>
763 <li><h3>Super-module
</h3>
765 <li><code><a title=
"szyfrow.support" href=
"index.html">szyfrow.support
</a></code></li>
768 <li><h3><a href=
"#header-functions">Functions
</a></h3>
769 <ul class=
"two-column">
770 <li><code><a title=
"szyfrow.support.norms.cosine_similarity" href=
"#szyfrow.support.norms.cosine_similarity">cosine_similarity
</a></code></li>
771 <li><code><a title=
"szyfrow.support.norms.euclidean_distance" href=
"#szyfrow.support.norms.euclidean_distance">euclidean_distance
</a></code></li>
772 <li><code><a title=
"szyfrow.support.norms.euclidean_scale" href=
"#szyfrow.support.norms.euclidean_scale">euclidean_scale
</a></code></li>
773 <li><code><a title=
"szyfrow.support.norms.geometric_mean" href=
"#szyfrow.support.norms.geometric_mean">geometric_mean
</a></code></li>
774 <li><code><a title=
"szyfrow.support.norms.harmonic_mean" href=
"#szyfrow.support.norms.harmonic_mean">harmonic_mean
</a></code></li>
775 <li><code><a title=
"szyfrow.support.norms.l1" href=
"#szyfrow.support.norms.l1">l1
</a></code></li>
776 <li><code><a title=
"szyfrow.support.norms.l1_scale" href=
"#szyfrow.support.norms.l1_scale">l1_scale
</a></code></li>
777 <li><code><a title=
"szyfrow.support.norms.l2" href=
"#szyfrow.support.norms.l2">l2
</a></code></li>
778 <li><code><a title=
"szyfrow.support.norms.l2_scale" href=
"#szyfrow.support.norms.l2_scale">l2_scale
</a></code></li>
779 <li><code><a title=
"szyfrow.support.norms.l3" href=
"#szyfrow.support.norms.l3">l3
</a></code></li>
780 <li><code><a title=
"szyfrow.support.norms.linf" href=
"#szyfrow.support.norms.linf">linf
</a></code></li>
781 <li><code><a title=
"szyfrow.support.norms.lp" href=
"#szyfrow.support.norms.lp">lp
</a></code></li>
782 <li><code><a title=
"szyfrow.support.norms.normalise" href=
"#szyfrow.support.norms.normalise">normalise
</a></code></li>
783 <li><code><a title=
"szyfrow.support.norms.scale" href=
"#szyfrow.support.norms.scale">scale
</a></code></li>
790 <p>Generated by
<a href=
"https://pdoc3.github.io/pdoc"><cite>pdoc
</cite> 0.9.2</a>.
</p>