Started on documentation
[szyfrow.git] / docs / szyfrow / support / norms.html
1 <!doctype html>
2 <html lang="en">
3 <head>
4 <meta charset="utf-8">
5 <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
6 <meta name="generator" content="pdoc 0.9.2" />
7 <title>szyfrow.support.norms API documentation</title>
8 <meta name="description" content="Various norms, for calcuating the distances between two frequency
9 profiles." />
10 <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
11 <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
12 <link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
13 <style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
14 <style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
15 <style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
16 <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
17 <script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
18 </head>
19 <body>
20 <main>
21 <article id="content">
22 <header>
23 <h1 class="title">Module <code>szyfrow.support.norms</code></h1>
24 </header>
25 <section id="section-intro">
26 <p>Various norms, for calcuating the distances between two frequency
27 profiles.</p>
28 <details class="source">
29 <summary>
30 <span>Expand source code</span>
31 </summary>
32 <pre><code class="python">&#34;&#34;&#34;Various norms, for calcuating the distances between two frequency
33 profiles.
34 &#34;&#34;&#34;
35
36 import collections
37 from math import log10
38
39 def lp(v1, v2=None, p=2):
40 &#34;&#34;&#34;Find the L_p norm. If passed one vector, find the length of that vector.
41 If passed two vectors, find the length of the difference between them.
42 &#34;&#34;&#34;
43 if v2:
44 vec = {k: abs(v1[k] - v2[k]) for k in (v1.keys() | v2.keys())}
45 else:
46 vec = v1
47 return sum(v ** p for v in vec.values()) ** (1.0 / p)
48
49 def l1(v1, v2=None):
50 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as
51 dictionaries. Assumes every key in frequencies1 is also in frequencies2
52
53 &gt;&gt;&gt; l1({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
54 0.0
55 &gt;&gt;&gt; l1({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
56 3.0
57 &gt;&gt;&gt; l1(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}))
58 0.0
59 &gt;&gt;&gt; l1({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
60 3.0
61 &gt;&gt;&gt; l1({&#39;a&#39;:0, &#39;b&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1})
62 1.0
63 &#34;&#34;&#34;
64 return lp(v1, v2, 1)
65
66 def l2(v1, v2=None):
67 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as dictionaries.
68 Assumes every key in frequencies1 is also in frequencies2
69
70 &gt;&gt;&gt; l2({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
71 0.0
72 &gt;&gt;&gt; l2({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
73 1.73205080...
74 &gt;&gt;&gt; l2(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}))
75 0.0
76 &gt;&gt;&gt; l2({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
77 1.732050807...
78 &gt;&gt;&gt; l2(normalise({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}), \
79 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})) # doctest: +ELLIPSIS
80 0.81649658...
81 &gt;&gt;&gt; l2({&#39;a&#39;:0, &#39;b&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1})
82 1.0
83 &#34;&#34;&#34;
84 return lp(v1, v2, 2)
85
86 def l3(v1, v2=None):
87 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as
88 dictionaries. Assumes every key in frequencies1 is also in frequencies2
89
90 &gt;&gt;&gt; l3({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
91 0.0
92 &gt;&gt;&gt; l3({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
93 1.44224957...
94 &gt;&gt;&gt; l3({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
95 1.4422495703...
96 &gt;&gt;&gt; l3(normalise({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}), \
97 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})) # doctest: +ELLIPSIS
98 0.718144896...
99 &gt;&gt;&gt; l3({&#39;a&#39;:0, &#39;b&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1})
100 1.0
101 &gt;&gt;&gt; l3(normalise({&#39;a&#39;:0, &#39;b&#39;:1}), normalise({&#39;a&#39;:1, &#39;b&#39;:1})) # doctest: +ELLIPSIS
102 0.6299605249...
103 &#34;&#34;&#34;
104 return lp(v1, v2, 3)
105
106 def linf(v1, v2=None):
107 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as
108 dictionaries. Assumes every key in frequencies1 is also in frequencies2&#34;&#34;&#34;
109 if v2:
110 vec = {k: abs(v1[k] - v2[k]) for k in (v1.keys() | v2.keys())}
111 else:
112 vec = v1
113 return max(v for v in vec.values())
114
115
116 def scale(frequencies, norm=l2):
117 length = norm(frequencies)
118 return collections.defaultdict(int,
119 {k: v / length for k, v in frequencies.items()})
120
121 def l2_scale(f):
122 &#34;&#34;&#34;Scale a set of frequencies so they have a unit euclidean length
123
124 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 0}).items())
125 [(1, 1.0), (2, 0.0)]
126 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 1}).items()) # doctest: +ELLIPSIS
127 [(1, 0.7071067...), (2, 0.7071067...)]
128 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
129 [(1, 0.577350...), (2, 0.577350...), (3, 0.577350...)]
130 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 2, 3: 1}).items()) # doctest: +ELLIPSIS
131 [(1, 0.408248...), (2, 0.81649658...), (3, 0.408248...)]
132 &#34;&#34;&#34;
133 return scale(f, l2)
134
135 def l1_scale(f):
136 &#34;&#34;&#34;Scale a set of frequencies so they sum to one
137
138 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 0}).items())
139 [(1, 1.0), (2, 0.0)]
140 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 1}).items())
141 [(1, 0.5), (2, 0.5)]
142 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
143 [(1, 0.333...), (2, 0.333...), (3, 0.333...)]
144 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 2, 3: 1}).items())
145 [(1, 0.25), (2, 0.5), (3, 0.25)]
146 &#34;&#34;&#34;
147 return scale(f, l1)
148
149 normalise = l1_scale
150 euclidean_distance = l2
151 euclidean_scale = l2_scale
152
153
154 def geometric_mean(frequencies1, frequencies2):
155 &#34;&#34;&#34;Finds the geometric mean of the absolute differences between two frequency profiles,
156 expressed as dictionaries.
157 Assumes every key in frequencies1 is also in frequencies2
158
159 &gt;&gt;&gt; geometric_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
160 1.0
161 &gt;&gt;&gt; geometric_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
162 1.0
163 &gt;&gt;&gt; geometric_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:5, &#39;c&#39;:1})
164 3.0
165 &gt;&gt;&gt; geometric_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
166 normalise({&#39;a&#39;:1, &#39;b&#39;:5, &#39;c&#39;:1})) # doctest: +ELLIPSIS
167 0.01382140...
168 &gt;&gt;&gt; geometric_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
169 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})) # doctest: +ELLIPSIS
170 0.0
171 &gt;&gt;&gt; geometric_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
172 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:0})) # doctest: +ELLIPSIS
173 0.009259259...
174 &#34;&#34;&#34;
175 total = 1.0
176 for k in frequencies1:
177 total *= abs(frequencies1[k] - frequencies2[k])
178 return total
179
180 def harmonic_mean(frequencies1, frequencies2):
181 &#34;&#34;&#34;Finds the harmonic mean of the absolute differences between two frequency profiles,
182 expressed as dictionaries.
183 Assumes every key in frequencies1 is also in frequencies2
184
185 &gt;&gt;&gt; harmonic_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
186 1.0
187 &gt;&gt;&gt; harmonic_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
188 1.0
189 &gt;&gt;&gt; harmonic_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:5, &#39;c&#39;:1}) # doctest: +ELLIPSIS
190 1.285714285...
191 &gt;&gt;&gt; harmonic_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
192 normalise({&#39;a&#39;:1, &#39;b&#39;:5, &#39;c&#39;:1})) # doctest: +ELLIPSIS
193 0.228571428571...
194 &gt;&gt;&gt; harmonic_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
195 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})) # doctest: +ELLIPSIS
196 0.0
197 &gt;&gt;&gt; harmonic_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
198 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:0})) # doctest: +ELLIPSIS
199 0.2
200 &#34;&#34;&#34;
201 total = 0.0
202 for k in frequencies1:
203 if abs(frequencies1[k] - frequencies2[k]) == 0:
204 return 0.0
205 total += 1.0 / abs(frequencies1[k] - frequencies2[k])
206 return len(frequencies1) / total
207
208
209 def cosine_similarity(frequencies1, frequencies2):
210 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as dictionaries.
211 Assumes every key in frequencies1 is also in frequencies2
212
213 &gt;&gt;&gt; cosine_similarity({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
214 1.0000000000...
215 &gt;&gt;&gt; cosine_similarity({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
216 1.0000000000...
217 &gt;&gt;&gt; cosine_similarity({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
218 0.5773502691...
219 &gt;&gt;&gt; cosine_similarity({&#39;a&#39;:0, &#39;b&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1}) # doctest: +ELLIPSIS
220 0.7071067811...
221 &#34;&#34;&#34;
222 numerator = 0
223 length1 = 0
224 length2 = 0
225 for k in frequencies1:
226 numerator += frequencies1[k] * frequencies2[k]
227 length1 += frequencies1[k]**2
228 for k in frequencies2:
229 length2 += frequencies2[k]**2
230 return numerator / (length1 ** 0.5 * length2 ** 0.5)
231
232
233
234 if __name__ == &#34;__main__&#34;:
235 import doctest
236 doctest.testmod()</code></pre>
237 </details>
238 </section>
239 <section>
240 </section>
241 <section>
242 </section>
243 <section>
244 <h2 class="section-title" id="header-functions">Functions</h2>
245 <dl>
246 <dt id="szyfrow.support.norms.cosine_similarity"><code class="name flex">
247 <span>def <span class="ident">cosine_similarity</span></span>(<span>frequencies1, frequencies2)</span>
248 </code></dt>
249 <dd>
250 <div class="desc"><p>Finds the distances between two frequency profiles, expressed as dictionaries.
251 Assumes every key in frequencies1 is also in frequencies2</p>
252 <pre><code class="language-python-repl">&gt;&gt;&gt; cosine_similarity({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
253 1.0000000000...
254 &gt;&gt;&gt; cosine_similarity({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
255 1.0000000000...
256 &gt;&gt;&gt; cosine_similarity({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
257 0.5773502691...
258 &gt;&gt;&gt; cosine_similarity({'a':0, 'b':1}, {'a':1, 'b':1}) # doctest: +ELLIPSIS
259 0.7071067811...
260 </code></pre></div>
261 <details class="source">
262 <summary>
263 <span>Expand source code</span>
264 </summary>
265 <pre><code class="python">def cosine_similarity(frequencies1, frequencies2):
266 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as dictionaries.
267 Assumes every key in frequencies1 is also in frequencies2
268
269 &gt;&gt;&gt; cosine_similarity({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
270 1.0000000000...
271 &gt;&gt;&gt; cosine_similarity({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
272 1.0000000000...
273 &gt;&gt;&gt; cosine_similarity({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
274 0.5773502691...
275 &gt;&gt;&gt; cosine_similarity({&#39;a&#39;:0, &#39;b&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1}) # doctest: +ELLIPSIS
276 0.7071067811...
277 &#34;&#34;&#34;
278 numerator = 0
279 length1 = 0
280 length2 = 0
281 for k in frequencies1:
282 numerator += frequencies1[k] * frequencies2[k]
283 length1 += frequencies1[k]**2
284 for k in frequencies2:
285 length2 += frequencies2[k]**2
286 return numerator / (length1 ** 0.5 * length2 ** 0.5)</code></pre>
287 </details>
288 </dd>
289 <dt id="szyfrow.support.norms.euclidean_distance"><code class="name flex">
290 <span>def <span class="ident">euclidean_distance</span></span>(<span>v1, v2=None)</span>
291 </code></dt>
292 <dd>
293 <div class="desc"><p>Finds the distances between two frequency profiles, expressed as dictionaries.
294 Assumes every key in frequencies1 is also in frequencies2</p>
295 <pre><code class="language-python-repl">&gt;&gt;&gt; l2({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1})
296 0.0
297 &gt;&gt;&gt; l2({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
298 1.73205080...
299 &gt;&gt;&gt; l2(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':1}))
300 0.0
301 &gt;&gt;&gt; l2({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
302 1.732050807...
303 &gt;&gt;&gt; l2(normalise({'a':0, 'b':2, 'c':0}), normalise({'a':1, 'b':1, 'c':1})) # doctest: +ELLIPSIS
304 0.81649658...
305 &gt;&gt;&gt; l2({'a':0, 'b':1}, {'a':1, 'b':1})
306 1.0
307 </code></pre></div>
308 <details class="source">
309 <summary>
310 <span>Expand source code</span>
311 </summary>
312 <pre><code class="python">def l2(v1, v2=None):
313 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as dictionaries.
314 Assumes every key in frequencies1 is also in frequencies2
315
316 &gt;&gt;&gt; l2({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
317 0.0
318 &gt;&gt;&gt; l2({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
319 1.73205080...
320 &gt;&gt;&gt; l2(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}))
321 0.0
322 &gt;&gt;&gt; l2({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
323 1.732050807...
324 &gt;&gt;&gt; l2(normalise({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}), \
325 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})) # doctest: +ELLIPSIS
326 0.81649658...
327 &gt;&gt;&gt; l2({&#39;a&#39;:0, &#39;b&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1})
328 1.0
329 &#34;&#34;&#34;
330 return lp(v1, v2, 2)</code></pre>
331 </details>
332 </dd>
333 <dt id="szyfrow.support.norms.euclidean_scale"><code class="name flex">
334 <span>def <span class="ident">euclidean_scale</span></span>(<span>f)</span>
335 </code></dt>
336 <dd>
337 <div class="desc"><p>Scale a set of frequencies so they have a unit euclidean length</p>
338 <pre><code class="language-python-repl">&gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 0}).items())
339 [(1, 1.0), (2, 0.0)]
340 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 1}).items()) # doctest: +ELLIPSIS
341 [(1, 0.7071067...), (2, 0.7071067...)]
342 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
343 [(1, 0.577350...), (2, 0.577350...), (3, 0.577350...)]
344 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 2, 3: 1}).items()) # doctest: +ELLIPSIS
345 [(1, 0.408248...), (2, 0.81649658...), (3, 0.408248...)]
346 </code></pre></div>
347 <details class="source">
348 <summary>
349 <span>Expand source code</span>
350 </summary>
351 <pre><code class="python">def l2_scale(f):
352 &#34;&#34;&#34;Scale a set of frequencies so they have a unit euclidean length
353
354 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 0}).items())
355 [(1, 1.0), (2, 0.0)]
356 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 1}).items()) # doctest: +ELLIPSIS
357 [(1, 0.7071067...), (2, 0.7071067...)]
358 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
359 [(1, 0.577350...), (2, 0.577350...), (3, 0.577350...)]
360 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 2, 3: 1}).items()) # doctest: +ELLIPSIS
361 [(1, 0.408248...), (2, 0.81649658...), (3, 0.408248...)]
362 &#34;&#34;&#34;
363 return scale(f, l2)</code></pre>
364 </details>
365 </dd>
366 <dt id="szyfrow.support.norms.geometric_mean"><code class="name flex">
367 <span>def <span class="ident">geometric_mean</span></span>(<span>frequencies1, frequencies2)</span>
368 </code></dt>
369 <dd>
370 <div class="desc"><p>Finds the geometric mean of the absolute differences between two frequency profiles,
371 expressed as dictionaries.
372 Assumes every key in frequencies1 is also in frequencies2</p>
373 <pre><code class="language-python-repl">&gt;&gt;&gt; geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
374 1.0
375 &gt;&gt;&gt; geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
376 1.0
377 &gt;&gt;&gt; geometric_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':5, 'c':1})
378 3.0
379 &gt;&gt;&gt; geometric_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':5, 'c':1})) # doctest: +ELLIPSIS
380 0.01382140...
381 &gt;&gt;&gt; geometric_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':1})) # doctest: +ELLIPSIS
382 0.0
383 &gt;&gt;&gt; geometric_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':0})) # doctest: +ELLIPSIS
384 0.009259259...
385 </code></pre></div>
386 <details class="source">
387 <summary>
388 <span>Expand source code</span>
389 </summary>
390 <pre><code class="python">def geometric_mean(frequencies1, frequencies2):
391 &#34;&#34;&#34;Finds the geometric mean of the absolute differences between two frequency profiles,
392 expressed as dictionaries.
393 Assumes every key in frequencies1 is also in frequencies2
394
395 &gt;&gt;&gt; geometric_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
396 1.0
397 &gt;&gt;&gt; geometric_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
398 1.0
399 &gt;&gt;&gt; geometric_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:5, &#39;c&#39;:1})
400 3.0
401 &gt;&gt;&gt; geometric_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
402 normalise({&#39;a&#39;:1, &#39;b&#39;:5, &#39;c&#39;:1})) # doctest: +ELLIPSIS
403 0.01382140...
404 &gt;&gt;&gt; geometric_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
405 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})) # doctest: +ELLIPSIS
406 0.0
407 &gt;&gt;&gt; geometric_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
408 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:0})) # doctest: +ELLIPSIS
409 0.009259259...
410 &#34;&#34;&#34;
411 total = 1.0
412 for k in frequencies1:
413 total *= abs(frequencies1[k] - frequencies2[k])
414 return total</code></pre>
415 </details>
416 </dd>
417 <dt id="szyfrow.support.norms.harmonic_mean"><code class="name flex">
418 <span>def <span class="ident">harmonic_mean</span></span>(<span>frequencies1, frequencies2)</span>
419 </code></dt>
420 <dd>
421 <div class="desc"><p>Finds the harmonic mean of the absolute differences between two frequency profiles,
422 expressed as dictionaries.
423 Assumes every key in frequencies1 is also in frequencies2</p>
424 <pre><code class="language-python-repl">&gt;&gt;&gt; harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
425 1.0
426 &gt;&gt;&gt; harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
427 1.0
428 &gt;&gt;&gt; harmonic_mean({'a':2, 'b':2, 'c':2}, {'a':1, 'b':5, 'c':1}) # doctest: +ELLIPSIS
429 1.285714285...
430 &gt;&gt;&gt; harmonic_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':5, 'c':1})) # doctest: +ELLIPSIS
431 0.228571428571...
432 &gt;&gt;&gt; harmonic_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':1})) # doctest: +ELLIPSIS
433 0.0
434 &gt;&gt;&gt; harmonic_mean(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':0})) # doctest: +ELLIPSIS
435 0.2
436 </code></pre></div>
437 <details class="source">
438 <summary>
439 <span>Expand source code</span>
440 </summary>
441 <pre><code class="python">def harmonic_mean(frequencies1, frequencies2):
442 &#34;&#34;&#34;Finds the harmonic mean of the absolute differences between two frequency profiles,
443 expressed as dictionaries.
444 Assumes every key in frequencies1 is also in frequencies2
445
446 &gt;&gt;&gt; harmonic_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
447 1.0
448 &gt;&gt;&gt; harmonic_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
449 1.0
450 &gt;&gt;&gt; harmonic_mean({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:5, &#39;c&#39;:1}) # doctest: +ELLIPSIS
451 1.285714285...
452 &gt;&gt;&gt; harmonic_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
453 normalise({&#39;a&#39;:1, &#39;b&#39;:5, &#39;c&#39;:1})) # doctest: +ELLIPSIS
454 0.228571428571...
455 &gt;&gt;&gt; harmonic_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
456 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})) # doctest: +ELLIPSIS
457 0.0
458 &gt;&gt;&gt; harmonic_mean(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), \
459 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:0})) # doctest: +ELLIPSIS
460 0.2
461 &#34;&#34;&#34;
462 total = 0.0
463 for k in frequencies1:
464 if abs(frequencies1[k] - frequencies2[k]) == 0:
465 return 0.0
466 total += 1.0 / abs(frequencies1[k] - frequencies2[k])
467 return len(frequencies1) / total</code></pre>
468 </details>
469 </dd>
470 <dt id="szyfrow.support.norms.l1"><code class="name flex">
471 <span>def <span class="ident">l1</span></span>(<span>v1, v2=None)</span>
472 </code></dt>
473 <dd>
474 <div class="desc"><p>Finds the distances between two frequency profiles, expressed as
475 dictionaries. Assumes every key in frequencies1 is also in frequencies2</p>
476 <pre><code class="language-python-repl">&gt;&gt;&gt; l1({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1})
477 0.0
478 &gt;&gt;&gt; l1({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1})
479 3.0
480 &gt;&gt;&gt; l1(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':1}))
481 0.0
482 &gt;&gt;&gt; l1({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1})
483 3.0
484 &gt;&gt;&gt; l1({'a':0, 'b':1}, {'a':1, 'b':1})
485 1.0
486 </code></pre></div>
487 <details class="source">
488 <summary>
489 <span>Expand source code</span>
490 </summary>
491 <pre><code class="python">def l1(v1, v2=None):
492 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as
493 dictionaries. Assumes every key in frequencies1 is also in frequencies2
494
495 &gt;&gt;&gt; l1({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
496 0.0
497 &gt;&gt;&gt; l1({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
498 3.0
499 &gt;&gt;&gt; l1(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}))
500 0.0
501 &gt;&gt;&gt; l1({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
502 3.0
503 &gt;&gt;&gt; l1({&#39;a&#39;:0, &#39;b&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1})
504 1.0
505 &#34;&#34;&#34;
506 return lp(v1, v2, 1)</code></pre>
507 </details>
508 </dd>
509 <dt id="szyfrow.support.norms.l1_scale"><code class="name flex">
510 <span>def <span class="ident">l1_scale</span></span>(<span>f)</span>
511 </code></dt>
512 <dd>
513 <div class="desc"><p>Scale a set of frequencies so they sum to one</p>
514 <pre><code class="language-python-repl">&gt;&gt;&gt; sorted(normalise({1: 1, 2: 0}).items())
515 [(1, 1.0), (2, 0.0)]
516 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 1}).items())
517 [(1, 0.5), (2, 0.5)]
518 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
519 [(1, 0.333...), (2, 0.333...), (3, 0.333...)]
520 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 2, 3: 1}).items())
521 [(1, 0.25), (2, 0.5), (3, 0.25)]
522 </code></pre></div>
523 <details class="source">
524 <summary>
525 <span>Expand source code</span>
526 </summary>
527 <pre><code class="python">def l1_scale(f):
528 &#34;&#34;&#34;Scale a set of frequencies so they sum to one
529
530 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 0}).items())
531 [(1, 1.0), (2, 0.0)]
532 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 1}).items())
533 [(1, 0.5), (2, 0.5)]
534 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
535 [(1, 0.333...), (2, 0.333...), (3, 0.333...)]
536 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 2, 3: 1}).items())
537 [(1, 0.25), (2, 0.5), (3, 0.25)]
538 &#34;&#34;&#34;
539 return scale(f, l1)</code></pre>
540 </details>
541 </dd>
542 <dt id="szyfrow.support.norms.l2"><code class="name flex">
543 <span>def <span class="ident">l2</span></span>(<span>v1, v2=None)</span>
544 </code></dt>
545 <dd>
546 <div class="desc"><p>Finds the distances between two frequency profiles, expressed as dictionaries.
547 Assumes every key in frequencies1 is also in frequencies2</p>
548 <pre><code class="language-python-repl">&gt;&gt;&gt; l2({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1})
549 0.0
550 &gt;&gt;&gt; l2({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
551 1.73205080...
552 &gt;&gt;&gt; l2(normalise({'a':2, 'b':2, 'c':2}), normalise({'a':1, 'b':1, 'c':1}))
553 0.0
554 &gt;&gt;&gt; l2({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
555 1.732050807...
556 &gt;&gt;&gt; l2(normalise({'a':0, 'b':2, 'c':0}), normalise({'a':1, 'b':1, 'c':1})) # doctest: +ELLIPSIS
557 0.81649658...
558 &gt;&gt;&gt; l2({'a':0, 'b':1}, {'a':1, 'b':1})
559 1.0
560 </code></pre></div>
561 <details class="source">
562 <summary>
563 <span>Expand source code</span>
564 </summary>
565 <pre><code class="python">def l2(v1, v2=None):
566 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as dictionaries.
567 Assumes every key in frequencies1 is also in frequencies2
568
569 &gt;&gt;&gt; l2({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
570 0.0
571 &gt;&gt;&gt; l2({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
572 1.73205080...
573 &gt;&gt;&gt; l2(normalise({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}), normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}))
574 0.0
575 &gt;&gt;&gt; l2({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
576 1.732050807...
577 &gt;&gt;&gt; l2(normalise({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}), \
578 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})) # doctest: +ELLIPSIS
579 0.81649658...
580 &gt;&gt;&gt; l2({&#39;a&#39;:0, &#39;b&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1})
581 1.0
582 &#34;&#34;&#34;
583 return lp(v1, v2, 2)</code></pre>
584 </details>
585 </dd>
586 <dt id="szyfrow.support.norms.l2_scale"><code class="name flex">
587 <span>def <span class="ident">l2_scale</span></span>(<span>f)</span>
588 </code></dt>
589 <dd>
590 <div class="desc"><p>Scale a set of frequencies so they have a unit euclidean length</p>
591 <pre><code class="language-python-repl">&gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 0}).items())
592 [(1, 1.0), (2, 0.0)]
593 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 1}).items()) # doctest: +ELLIPSIS
594 [(1, 0.7071067...), (2, 0.7071067...)]
595 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
596 [(1, 0.577350...), (2, 0.577350...), (3, 0.577350...)]
597 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 2, 3: 1}).items()) # doctest: +ELLIPSIS
598 [(1, 0.408248...), (2, 0.81649658...), (3, 0.408248...)]
599 </code></pre></div>
600 <details class="source">
601 <summary>
602 <span>Expand source code</span>
603 </summary>
604 <pre><code class="python">def l2_scale(f):
605 &#34;&#34;&#34;Scale a set of frequencies so they have a unit euclidean length
606
607 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 0}).items())
608 [(1, 1.0), (2, 0.0)]
609 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 1}).items()) # doctest: +ELLIPSIS
610 [(1, 0.7071067...), (2, 0.7071067...)]
611 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
612 [(1, 0.577350...), (2, 0.577350...), (3, 0.577350...)]
613 &gt;&gt;&gt; sorted(euclidean_scale({1: 1, 2: 2, 3: 1}).items()) # doctest: +ELLIPSIS
614 [(1, 0.408248...), (2, 0.81649658...), (3, 0.408248...)]
615 &#34;&#34;&#34;
616 return scale(f, l2)</code></pre>
617 </details>
618 </dd>
619 <dt id="szyfrow.support.norms.l3"><code class="name flex">
620 <span>def <span class="ident">l3</span></span>(<span>v1, v2=None)</span>
621 </code></dt>
622 <dd>
623 <div class="desc"><p>Finds the distances between two frequency profiles, expressed as
624 dictionaries. Assumes every key in frequencies1 is also in frequencies2</p>
625 <pre><code class="language-python-repl">&gt;&gt;&gt; l3({'a':1, 'b':1, 'c':1}, {'a':1, 'b':1, 'c':1})
626 0.0
627 &gt;&gt;&gt; l3({'a':2, 'b':2, 'c':2}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
628 1.44224957...
629 &gt;&gt;&gt; l3({'a':0, 'b':2, 'c':0}, {'a':1, 'b':1, 'c':1}) # doctest: +ELLIPSIS
630 1.4422495703...
631 &gt;&gt;&gt; l3(normalise({'a':0, 'b':2, 'c':0}), normalise({'a':1, 'b':1, 'c':1})) # doctest: +ELLIPSIS
632 0.718144896...
633 &gt;&gt;&gt; l3({'a':0, 'b':1}, {'a':1, 'b':1})
634 1.0
635 &gt;&gt;&gt; l3(normalise({'a':0, 'b':1}), normalise({'a':1, 'b':1})) # doctest: +ELLIPSIS
636 0.6299605249...
637 </code></pre></div>
638 <details class="source">
639 <summary>
640 <span>Expand source code</span>
641 </summary>
642 <pre><code class="python">def l3(v1, v2=None):
643 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as
644 dictionaries. Assumes every key in frequencies1 is also in frequencies2
645
646 &gt;&gt;&gt; l3({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})
647 0.0
648 &gt;&gt;&gt; l3({&#39;a&#39;:2, &#39;b&#39;:2, &#39;c&#39;:2}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
649 1.44224957...
650 &gt;&gt;&gt; l3({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}, {&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1}) # doctest: +ELLIPSIS
651 1.4422495703...
652 &gt;&gt;&gt; l3(normalise({&#39;a&#39;:0, &#39;b&#39;:2, &#39;c&#39;:0}), \
653 normalise({&#39;a&#39;:1, &#39;b&#39;:1, &#39;c&#39;:1})) # doctest: +ELLIPSIS
654 0.718144896...
655 &gt;&gt;&gt; l3({&#39;a&#39;:0, &#39;b&#39;:1}, {&#39;a&#39;:1, &#39;b&#39;:1})
656 1.0
657 &gt;&gt;&gt; l3(normalise({&#39;a&#39;:0, &#39;b&#39;:1}), normalise({&#39;a&#39;:1, &#39;b&#39;:1})) # doctest: +ELLIPSIS
658 0.6299605249...
659 &#34;&#34;&#34;
660 return lp(v1, v2, 3)</code></pre>
661 </details>
662 </dd>
663 <dt id="szyfrow.support.norms.linf"><code class="name flex">
664 <span>def <span class="ident">linf</span></span>(<span>v1, v2=None)</span>
665 </code></dt>
666 <dd>
667 <div class="desc"><p>Finds the distances between two frequency profiles, expressed as
668 dictionaries. Assumes every key in frequencies1 is also in frequencies2</p></div>
669 <details class="source">
670 <summary>
671 <span>Expand source code</span>
672 </summary>
673 <pre><code class="python">def linf(v1, v2=None):
674 &#34;&#34;&#34;Finds the distances between two frequency profiles, expressed as
675 dictionaries. Assumes every key in frequencies1 is also in frequencies2&#34;&#34;&#34;
676 if v2:
677 vec = {k: abs(v1[k] - v2[k]) for k in (v1.keys() | v2.keys())}
678 else:
679 vec = v1
680 return max(v for v in vec.values())</code></pre>
681 </details>
682 </dd>
683 <dt id="szyfrow.support.norms.lp"><code class="name flex">
684 <span>def <span class="ident">lp</span></span>(<span>v1, v2=None, p=2)</span>
685 </code></dt>
686 <dd>
687 <div class="desc"><p>Find the L_p norm. If passed one vector, find the length of that vector.
688 If passed two vectors, find the length of the difference between them.</p></div>
689 <details class="source">
690 <summary>
691 <span>Expand source code</span>
692 </summary>
693 <pre><code class="python">def lp(v1, v2=None, p=2):
694 &#34;&#34;&#34;Find the L_p norm. If passed one vector, find the length of that vector.
695 If passed two vectors, find the length of the difference between them.
696 &#34;&#34;&#34;
697 if v2:
698 vec = {k: abs(v1[k] - v2[k]) for k in (v1.keys() | v2.keys())}
699 else:
700 vec = v1
701 return sum(v ** p for v in vec.values()) ** (1.0 / p)</code></pre>
702 </details>
703 </dd>
704 <dt id="szyfrow.support.norms.normalise"><code class="name flex">
705 <span>def <span class="ident">normalise</span></span>(<span>f)</span>
706 </code></dt>
707 <dd>
708 <div class="desc"><p>Scale a set of frequencies so they sum to one</p>
709 <pre><code class="language-python-repl">&gt;&gt;&gt; sorted(normalise({1: 1, 2: 0}).items())
710 [(1, 1.0), (2, 0.0)]
711 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 1}).items())
712 [(1, 0.5), (2, 0.5)]
713 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
714 [(1, 0.333...), (2, 0.333...), (3, 0.333...)]
715 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 2, 3: 1}).items())
716 [(1, 0.25), (2, 0.5), (3, 0.25)]
717 </code></pre></div>
718 <details class="source">
719 <summary>
720 <span>Expand source code</span>
721 </summary>
722 <pre><code class="python">def l1_scale(f):
723 &#34;&#34;&#34;Scale a set of frequencies so they sum to one
724
725 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 0}).items())
726 [(1, 1.0), (2, 0.0)]
727 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 1}).items())
728 [(1, 0.5), (2, 0.5)]
729 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 1, 3: 1}).items()) # doctest: +ELLIPSIS
730 [(1, 0.333...), (2, 0.333...), (3, 0.333...)]
731 &gt;&gt;&gt; sorted(normalise({1: 1, 2: 2, 3: 1}).items())
732 [(1, 0.25), (2, 0.5), (3, 0.25)]
733 &#34;&#34;&#34;
734 return scale(f, l1)</code></pre>
735 </details>
736 </dd>
737 <dt id="szyfrow.support.norms.scale"><code class="name flex">
738 <span>def <span class="ident">scale</span></span>(<span>frequencies, norm=&lt;function l2&gt;)</span>
739 </code></dt>
740 <dd>
741 <div class="desc"></div>
742 <details class="source">
743 <summary>
744 <span>Expand source code</span>
745 </summary>
746 <pre><code class="python">def scale(frequencies, norm=l2):
747 length = norm(frequencies)
748 return collections.defaultdict(int,
749 {k: v / length for k, v in frequencies.items()})</code></pre>
750 </details>
751 </dd>
752 </dl>
753 </section>
754 <section>
755 </section>
756 </article>
757 <nav id="sidebar">
758 <h1>Index</h1>
759 <div class="toc">
760 <ul></ul>
761 </div>
762 <ul id="index">
763 <li><h3>Super-module</h3>
764 <ul>
765 <li><code><a title="szyfrow.support" href="index.html">szyfrow.support</a></code></li>
766 </ul>
767 </li>
768 <li><h3><a href="#header-functions">Functions</a></h3>
769 <ul class="two-column">
770 <li><code><a title="szyfrow.support.norms.cosine_similarity" href="#szyfrow.support.norms.cosine_similarity">cosine_similarity</a></code></li>
771 <li><code><a title="szyfrow.support.norms.euclidean_distance" href="#szyfrow.support.norms.euclidean_distance">euclidean_distance</a></code></li>
772 <li><code><a title="szyfrow.support.norms.euclidean_scale" href="#szyfrow.support.norms.euclidean_scale">euclidean_scale</a></code></li>
773 <li><code><a title="szyfrow.support.norms.geometric_mean" href="#szyfrow.support.norms.geometric_mean">geometric_mean</a></code></li>
774 <li><code><a title="szyfrow.support.norms.harmonic_mean" href="#szyfrow.support.norms.harmonic_mean">harmonic_mean</a></code></li>
775 <li><code><a title="szyfrow.support.norms.l1" href="#szyfrow.support.norms.l1">l1</a></code></li>
776 <li><code><a title="szyfrow.support.norms.l1_scale" href="#szyfrow.support.norms.l1_scale">l1_scale</a></code></li>
777 <li><code><a title="szyfrow.support.norms.l2" href="#szyfrow.support.norms.l2">l2</a></code></li>
778 <li><code><a title="szyfrow.support.norms.l2_scale" href="#szyfrow.support.norms.l2_scale">l2_scale</a></code></li>
779 <li><code><a title="szyfrow.support.norms.l3" href="#szyfrow.support.norms.l3">l3</a></code></li>
780 <li><code><a title="szyfrow.support.norms.linf" href="#szyfrow.support.norms.linf">linf</a></code></li>
781 <li><code><a title="szyfrow.support.norms.lp" href="#szyfrow.support.norms.lp">lp</a></code></li>
782 <li><code><a title="szyfrow.support.norms.normalise" href="#szyfrow.support.norms.normalise">normalise</a></code></li>
783 <li><code><a title="szyfrow.support.norms.scale" href="#szyfrow.support.norms.scale">scale</a></code></li>
784 </ul>
785 </li>
786 </ul>
787 </nav>
788 </main>
789 <footer id="footer">
790 <p>Generated by <a href="https://pdoc3.github.io/pdoc"><cite>pdoc</cite> 0.9.2</a>.</p>
791 </footer>
792 </body>
793 </html>