X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=creation_analysis.md;fp=creation_analysis.md;h=405d559986f70ccb9be55831f78a28402c76bcb5;hb=95c4c545a4abe7ef5f222b674da8535739ef1fcb;hp=0000000000000000000000000000000000000000;hpb=52008779b0281639e17a6570271dc7d5a3227b03;p=riddle-generator.git diff --git a/creation_analysis.md b/creation_analysis.md new file mode 100644 index 0000000..405d559 --- /dev/null +++ b/creation_analysis.md @@ -0,0 +1,251 @@ +--- +jupyter: + jupytext: + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.14.5 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +```python +import pandas as pd +import matplotlib as mpl +import matplotlib.pyplot as plt +%matplotlib inline +import pstats +from pstats import SortKey + +from riddle_definitions import * +import random +``` + +```python +distances = [edit_distance(random.choice(dictionary), + random.choice(dictionary)) + for _ in range(10000)] +distances = pd.Series(distances) +distances.describe() +``` + +```python +distances[distances <= 3].count() / distances.count() +``` + +```python +metrics_original = pd.read_csv('metrics_original.csv') +metrics_original +``` + +```python +metrics_related = pd.read_csv('metrics_related.csv') +metrics_related +``` + +```python +metrics_lazy = pd.read_csv('metrics_lazy.csv') +metrics_lazy +``` + +```python +metrics_original.describe() +``` + +```python +metrics_related.describe() +``` + +```python +metrics_lazy.describe() +``` + +```python +fig, ax = plt.subplots(1, 1) + +metrics_original.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5) +metrics_related.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5) +metrics_lazy.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5); +``` + +```python +ax = metrics_original.cpu_time.plot.hist(bins=20, alpha=0.5) +metrics_related.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5) +metrics_lazy.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5); +``` + +```python +ax = metrics_original.cpu_time.plot.kde(xlim=(0, 10)) +metrics_related.cpu_time.plot.kde(ax=ax) +metrics_lazy.cpu_time.plot.kde(ax=ax); +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.cpu_time], bins=20) +ax.legend(['Original', 'Related', 'Related 2']) +ax.set_title('Times taken to generate a riddle') +ax.set_xlabel('Time (s)') +plt.savefig('original_time_histogram.png') +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.cpu_time, + metrics_related.cpu_time], bins=20) +ax.legend(['Original', 'Related']) +ax.set_title('Times taken to generate a riddle') +ax.set_xlabel('Time (s)') +plt.savefig('original_related_time_histogram.png') +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.cpu_time, + metrics_related.cpu_time, + metrics_lazy.cpu_time], bins=20) +ax.legend(['Original', 'Related', 'Lazy']) +ax.set_title('Times taken to generate a riddle') +ax.set_xlabel('Time (s)') +plt.savefig('original_related_lazy_time_histogram.png') +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_related.cpu_time, + metrics_lazy.cpu_time], bins=20, + color=['#ff7f0e', '#2ca02c']) +ax.legend(['Related', 'Lazy']) +ax.set_title('Times taken to generate a riddle') +ax.set_xlabel('Time (s)') +plt.savefig('related_lazy_time_histogram.png') +``` + +```python +metrics_original['generated_per_line'] = metrics_original.generated_lines / metrics_original.riddle_lines +metrics_original['time_per_line'] = metrics_original.cpu_time / metrics_original.riddle_lines +metrics_original +``` + +```python +metrics_related['generated_per_line'] = metrics_related.generated_lines / metrics_related.riddle_lines +metrics_related['time_per_line'] = metrics_related.cpu_time / metrics_related.riddle_lines +metrics_related +``` + +```python +metrics_lazy['generated_per_line'] = metrics_lazy.generated_lines / metrics_lazy.riddle_lines +metrics_lazy['time_per_line'] = metrics_lazy.cpu_time / metrics_lazy.riddle_lines +metrics_lazy +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.time_per_line], bins=20) +ax.legend(['Original']); +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.time_per_line, + metrics_related.time_per_line], bins=20) +ax.legend(['Original', 'Related']); +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.time_per_line, + metrics_related.time_per_line, + metrics_lazy.time_per_line], bins=20) +ax.legend(['Original', 'Related', 'Lazy']) +``` + +```python +plt.rcParams['axes.prop_cycle'].by_key()['color'] +``` + +```python +for bars, column in zip(*ax.get_legend_handles_labels()): + color = bars[0].get_facecolor() + print(column, color) +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_related.time_per_line, + metrics_lazy.time_per_line], bins=20, + color=['#ff7f0e', '#2ca02c']) +ax.legend(['Related', 'Lazy']); +``` + +```python +ax = metrics_original.time_per_line.plot.kde(xlim=(0, 2)) +metrics_related.time_per_line.plot.kde(ax=ax) +metrics_lazy.time_per_line.plot.kde(ax=ax); +``` + +```python +metrics_original.describe() +``` + +```python +metrics_related.describe() +``` + +```python +metrics_lazy.describe() +``` + +```python +metrics_original.time_per_line.mean() / metrics_related.time_per_line.mean() +``` + +```python +metrics_original.time_per_line.median() / metrics_related.time_per_line.median() +``` + +```python +metrics_related.time_per_line.mean() / metrics_lazy.time_per_line.mean() +``` + +```python +metrics_related.time_per_line.median() / metrics_lazy.time_per_line.median() +``` + +```python +metrics_original.time_per_line.mean() / metrics_lazy.time_per_line.mean() +``` + +```python +metrics_original.time_per_line.median() / metrics_lazy.time_per_line.median() +``` + +```python +metrics_original.wall_time.mean() / metrics_related.wall_time.mean() +``` + +```python +metrics_related.wall_time.mean() / metrics_lazy.wall_time.mean() +``` + +```python +metrics_original.wall_time.mean() / metrics_lazy.wall_time.mean() +``` + +```python +stats = pstats.Stats('filtered.stats') +stats.strip_dirs().sort_stats(SortKey.TIME).print_stats(10) +``` + +```python +stats2 = pstats.Stats('lazy.stats') +stats2.strip_dirs().sort_stats(SortKey.TIME).print_stats(10) +``` + +```python + +```