Optimised riddle creation
[riddle-generator.git] / creation_analysis.md
diff --git a/creation_analysis.md b/creation_analysis.md
new file mode 100644 (file)
index 0000000..405d559
--- /dev/null
@@ -0,0 +1,251 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.3'
+      jupytext_version: 1.14.5
+  kernelspec:
+    display_name: Python 3 (ipykernel)
+    language: python
+    name: python3
+---
+
+```python
+import pandas as pd
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+%matplotlib inline
+import pstats
+from pstats import SortKey
+
+from riddle_definitions import *
+import random
+```
+
+```python
+distances = [edit_distance(random.choice(dictionary), 
+                           random.choice(dictionary))
+             for _ in range(10000)]
+distances = pd.Series(distances)
+distances.describe()
+```
+
+```python
+distances[distances <= 3].count() / distances.count()
+```
+
+```python
+metrics_original = pd.read_csv('metrics_original.csv')
+metrics_original
+```
+
+```python
+metrics_related = pd.read_csv('metrics_related.csv')
+metrics_related
+```
+
+```python
+metrics_lazy = pd.read_csv('metrics_lazy.csv')
+metrics_lazy
+```
+
+```python
+metrics_original.describe()
+```
+
+```python
+metrics_related.describe()
+```
+
+```python
+metrics_lazy.describe()
+```
+
+```python
+fig, ax = plt.subplots(1, 1)
+
+metrics_original.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5)
+metrics_related.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5)
+metrics_lazy.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5);
+```
+
+```python
+ax = metrics_original.cpu_time.plot.hist(bins=20, alpha=0.5)
+metrics_related.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5)
+metrics_lazy.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5);
+```
+
+```python
+ax = metrics_original.cpu_time.plot.kde(xlim=(0, 10))
+metrics_related.cpu_time.plot.kde(ax=ax)
+metrics_lazy.cpu_time.plot.kde(ax=ax);
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.cpu_time], bins=20)
+ax.legend(['Original', 'Related', 'Related 2'])
+ax.set_title('Times taken to generate a riddle')
+ax.set_xlabel('Time (s)')
+plt.savefig('original_time_histogram.png')
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.cpu_time, 
+         metrics_related.cpu_time], bins=20)
+ax.legend(['Original', 'Related'])
+ax.set_title('Times taken to generate a riddle')
+ax.set_xlabel('Time (s)')
+plt.savefig('original_related_time_histogram.png')
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.cpu_time, 
+         metrics_related.cpu_time, 
+         metrics_lazy.cpu_time], bins=20)
+ax.legend(['Original', 'Related', 'Lazy'])
+ax.set_title('Times taken to generate a riddle')
+ax.set_xlabel('Time (s)')
+plt.savefig('original_related_lazy_time_histogram.png')
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_related.cpu_time, 
+         metrics_lazy.cpu_time], bins=20,
+        color=['#ff7f0e', '#2ca02c'])
+ax.legend(['Related', 'Lazy'])
+ax.set_title('Times taken to generate a riddle')
+ax.set_xlabel('Time (s)')
+plt.savefig('related_lazy_time_histogram.png')
+```
+
+```python
+metrics_original['generated_per_line'] = metrics_original.generated_lines / metrics_original.riddle_lines
+metrics_original['time_per_line'] = metrics_original.cpu_time / metrics_original.riddle_lines
+metrics_original
+```
+
+```python
+metrics_related['generated_per_line'] = metrics_related.generated_lines / metrics_related.riddle_lines
+metrics_related['time_per_line'] = metrics_related.cpu_time / metrics_related.riddle_lines
+metrics_related
+```
+
+```python
+metrics_lazy['generated_per_line'] = metrics_lazy.generated_lines / metrics_lazy.riddle_lines
+metrics_lazy['time_per_line'] = metrics_lazy.cpu_time / metrics_lazy.riddle_lines
+metrics_lazy
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.time_per_line], bins=20)
+ax.legend(['Original']);
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.time_per_line, 
+         metrics_related.time_per_line], bins=20)
+ax.legend(['Original', 'Related']);
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.time_per_line, 
+         metrics_related.time_per_line, 
+         metrics_lazy.time_per_line], bins=20)
+ax.legend(['Original', 'Related', 'Lazy'])
+```
+
+```python
+plt.rcParams['axes.prop_cycle'].by_key()['color']
+```
+
+```python
+for bars, column in zip(*ax.get_legend_handles_labels()):
+  color = bars[0].get_facecolor()
+  print(column, color)
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_related.time_per_line, 
+         metrics_lazy.time_per_line], bins=20,
+       color=['#ff7f0e', '#2ca02c'])
+ax.legend(['Related', 'Lazy']);
+```
+
+```python
+ax = metrics_original.time_per_line.plot.kde(xlim=(0, 2))
+metrics_related.time_per_line.plot.kde(ax=ax)
+metrics_lazy.time_per_line.plot.kde(ax=ax);
+```
+
+```python
+metrics_original.describe()
+```
+
+```python
+metrics_related.describe()
+```
+
+```python
+metrics_lazy.describe()
+```
+
+```python
+metrics_original.time_per_line.mean() / metrics_related.time_per_line.mean()
+```
+
+```python
+metrics_original.time_per_line.median() / metrics_related.time_per_line.median()
+```
+
+```python
+metrics_related.time_per_line.mean() / metrics_lazy.time_per_line.mean()
+```
+
+```python
+metrics_related.time_per_line.median() / metrics_lazy.time_per_line.median()
+```
+
+```python
+metrics_original.time_per_line.mean() / metrics_lazy.time_per_line.mean()
+```
+
+```python
+metrics_original.time_per_line.median() / metrics_lazy.time_per_line.median()
+```
+
+```python
+metrics_original.wall_time.mean() / metrics_related.wall_time.mean()
+```
+
+```python
+metrics_related.wall_time.mean() / metrics_lazy.wall_time.mean()
+```
+
+```python
+metrics_original.wall_time.mean() / metrics_lazy.wall_time.mean()
+```
+
+```python
+stats = pstats.Stats('filtered.stats')
+stats.strip_dirs().sort_stats(SortKey.TIME).print_stats(10)
+```
+
+```python
+stats2 = pstats.Stats('lazy.stats')
+stats2.strip_dirs().sort_stats(SortKey.TIME).print_stats(10)
+```
+
+```python
+
+```