Removed neighbour generation out of the core library
[riddle-generator.git] / creation_analysis.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.3'
9 jupytext_version: 1.14.5
10 kernelspec:
11 display_name: Python 3 (ipykernel)
12 language: python
13 name: python3
14 ---
15
16 ```python
17 import pandas as pd
18 import matplotlib as mpl
19 import matplotlib.pyplot as plt
20 %matplotlib inline
21 import pstats
22 from pstats import SortKey
23
24 from riddle_definitions import *
25 import random
26 ```
27
28 ```python
29 distances = [edit_distance(random.choice(dictionary),
30 random.choice(dictionary))
31 for _ in range(10000)]
32 distances = pd.Series(distances)
33 distances.describe()
34 ```
35
36 ```python
37 distances[distances <= 3].count() / distances.count()
38 ```
39
40 ```python
41 metrics_original = pd.read_csv('metrics_original.csv')
42 metrics_original
43 ```
44
45 ```python
46 metrics_related = pd.read_csv('metrics_related.csv')
47 metrics_related
48 ```
49
50 ```python
51 metrics_lazy = pd.read_csv('metrics_lazy.csv')
52 metrics_lazy
53 ```
54
55 ```python
56 metrics_original.describe()
57 ```
58
59 ```python
60 metrics_related.describe()
61 ```
62
63 ```python
64 metrics_lazy.describe()
65 ```
66
67 ```python
68 fig, ax = plt.subplots(1, 1)
69
70 metrics_original.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5)
71 metrics_related.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5)
72 metrics_lazy.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5);
73 ```
74
75 ```python
76 ax = metrics_original.cpu_time.plot.hist(bins=20, alpha=0.5)
77 metrics_related.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5)
78 metrics_lazy.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5);
79 ```
80
81 ```python
82 ax = metrics_original.cpu_time.plot.kde(xlim=(0, 10))
83 metrics_related.cpu_time.plot.kde(ax=ax)
84 metrics_lazy.cpu_time.plot.kde(ax=ax);
85 ```
86
87 ```python
88 fig, ax = plt.subplots()
89 ax.hist([metrics_original.cpu_time], bins=20)
90 ax.legend(['Original', 'Related', 'Related 2'])
91 ax.set_title('Times taken to generate a riddle')
92 ax.set_xlabel('Time (s)')
93 plt.savefig('original_time_histogram.png')
94 ```
95
96 ```python
97 fig, ax = plt.subplots()
98 ax.hist([metrics_original.cpu_time,
99 metrics_related.cpu_time], bins=20)
100 ax.legend(['Original', 'Related'])
101 ax.set_title('Times taken to generate a riddle')
102 ax.set_xlabel('Time (s)')
103 plt.savefig('original_related_time_histogram.png')
104 ```
105
106 ```python
107 fig, ax = plt.subplots()
108 ax.hist([metrics_original.cpu_time,
109 metrics_related.cpu_time,
110 metrics_lazy.cpu_time], bins=20)
111 ax.legend(['Original', 'Related', 'Lazy'])
112 ax.set_title('Times taken to generate a riddle')
113 ax.set_xlabel('Time (s)')
114 plt.savefig('original_related_lazy_time_histogram.png')
115 ```
116
117 ```python
118 fig, ax = plt.subplots()
119 ax.hist([metrics_related.cpu_time,
120 metrics_lazy.cpu_time], bins=20,
121 color=['#ff7f0e', '#2ca02c'])
122 ax.legend(['Related', 'Lazy'])
123 ax.set_title('Times taken to generate a riddle')
124 ax.set_xlabel('Time (s)')
125 plt.savefig('related_lazy_time_histogram.png')
126 ```
127
128 ```python
129 metrics_original['generated_per_line'] = metrics_original.generated_lines / metrics_original.riddle_lines
130 metrics_original['time_per_line'] = metrics_original.cpu_time / metrics_original.riddle_lines
131 metrics_original
132 ```
133
134 ```python
135 metrics_related['generated_per_line'] = metrics_related.generated_lines / metrics_related.riddle_lines
136 metrics_related['time_per_line'] = metrics_related.cpu_time / metrics_related.riddle_lines
137 metrics_related
138 ```
139
140 ```python
141 metrics_lazy['generated_per_line'] = metrics_lazy.generated_lines / metrics_lazy.riddle_lines
142 metrics_lazy['time_per_line'] = metrics_lazy.cpu_time / metrics_lazy.riddle_lines
143 metrics_lazy
144 ```
145
146 ```python
147 fig, ax = plt.subplots()
148 ax.hist([metrics_original.time_per_line], bins=20)
149 ax.legend(['Original']);
150 ```
151
152 ```python
153 fig, ax = plt.subplots()
154 ax.hist([metrics_original.time_per_line,
155 metrics_related.time_per_line], bins=20)
156 ax.legend(['Original', 'Related']);
157 ```
158
159 ```python
160 fig, ax = plt.subplots()
161 ax.hist([metrics_original.time_per_line,
162 metrics_related.time_per_line,
163 metrics_lazy.time_per_line], bins=20)
164 ax.legend(['Original', 'Related', 'Lazy'])
165 ```
166
167 ```python
168 plt.rcParams['axes.prop_cycle'].by_key()['color']
169 ```
170
171 ```python
172 for bars, column in zip(*ax.get_legend_handles_labels()):
173 color = bars[0].get_facecolor()
174 print(column, color)
175 ```
176
177 ```python
178 fig, ax = plt.subplots()
179 ax.hist([metrics_related.time_per_line,
180 metrics_lazy.time_per_line], bins=20,
181 color=['#ff7f0e', '#2ca02c'])
182 ax.legend(['Related', 'Lazy']);
183 ```
184
185 ```python
186 ax = metrics_original.time_per_line.plot.kde(xlim=(0, 2))
187 metrics_related.time_per_line.plot.kde(ax=ax)
188 metrics_lazy.time_per_line.plot.kde(ax=ax);
189 ```
190
191 ```python
192 metrics_original.describe()
193 ```
194
195 ```python
196 metrics_related.describe()
197 ```
198
199 ```python
200 metrics_lazy.describe()
201 ```
202
203 ```python
204 metrics_original.time_per_line.mean() / metrics_related.time_per_line.mean()
205 ```
206
207 ```python
208 metrics_original.time_per_line.median() / metrics_related.time_per_line.median()
209 ```
210
211 ```python
212 metrics_related.time_per_line.mean() / metrics_lazy.time_per_line.mean()
213 ```
214
215 ```python
216 metrics_related.time_per_line.median() / metrics_lazy.time_per_line.median()
217 ```
218
219 ```python
220 metrics_original.time_per_line.mean() / metrics_lazy.time_per_line.mean()
221 ```
222
223 ```python
224 metrics_original.time_per_line.median() / metrics_lazy.time_per_line.median()
225 ```
226
227 ```python
228 metrics_original.wall_time.mean() / metrics_related.wall_time.mean()
229 ```
230
231 ```python
232 metrics_related.wall_time.mean() / metrics_lazy.wall_time.mean()
233 ```
234
235 ```python
236 metrics_original.wall_time.mean() / metrics_lazy.wall_time.mean()
237 ```
238
239 ```python
240 stats = pstats.Stats('filtered.stats')
241 stats.strip_dirs().sort_stats(SortKey.TIME).print_stats(10)
242 ```
243
244 ```python
245 stats2 = pstats.Stats('lazy.stats')
246 stats2.strip_dirs().sort_stats(SortKey.TIME).print_stats(10)
247 ```
248
249 ```python
250
251 ```