Added profiling
[advent-of-code-22.git] / profiling / profiling.md
diff --git a/profiling/profiling.md b/profiling/profiling.md
new file mode 100644 (file)
index 0000000..88811ae
--- /dev/null
@@ -0,0 +1,360 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.3'
+      jupytext_version: 1.11.1
+  kernelspec:
+    display_name: Python 3 (ipykernel)
+    language: python
+    name: python3
+---
+
+```python Collapsed="false"
+import glob
+import json
+import pandas as pd
+import numpy as np
+import datetime
+import re
+
+import matplotlib.pyplot as plt
+%matplotlib inline
+```
+
+```python
+! cd .. && cabal install
+```
+
+```python Collapsed="false" tags=[]
+! cd .. && for i in {01..25}; do cabal run advent${i} --enable-profiling -- +RTS -N -pj -s -hT ; done
+```
+
+```python
+! rm ../times.csv
+! rm ../times_raw.csv
+```
+
+```python Collapsed="false" tags=[]
+! cd .. && for i in {01..25}; do /usr/bin/time -f "%C,%S,%E,%M" -o times.csv -a cabal run advent${i}; done
+```
+
+```python Collapsed="false" tags=[]
+! cd .. && for i in {01..25}; do /usr/bin/time -f "%C,%S,%E,%M" -o times_raw.csv -a advent${i}; done
+```
+
+```python
+!mv ../*prof .
+```
+
+```python
+!mv ../times.csv .
+```
+
+```python
+!mv ../times_raw.csv .
+```
+
+```python
+!mv ../*hp .
+```
+
+```python
+! for f in *hp ; do hp2ps ${f} ; done
+```
+
+```python Collapsed="false"
+glob.glob('*prof')
+```
+
+```python Collapsed="false"
+profs = []
+for fn in glob.glob('*prof'):
+    with open(fn) as f:
+        j = json.load(f)
+        prof = {}
+        for n in 'program total_time total_alloc total_ticks initial_capabilities'.split():
+            prof[n] = j[n]
+        profs.append(prof)
+profs
+```
+
+```python Collapsed="false"
+performance = pd.DataFrame(profs).set_index('program').sort_index()
+performance
+```
+
+```python Collapsed="false"
+performance.total_ticks.plot.bar()
+```
+
+```python Collapsed="false"
+performance.total_ticks.plot.bar(logy=True)
+```
+
+```python Collapsed="false"
+performance.total_alloc.plot.bar()
+```
+
+```python Collapsed="false"
+performance.total_alloc.plot.bar(logy=True)
+```
+
+```python Collapsed="false"
+performance[['total_ticks', 'total_alloc']].plot.bar(
+    logy=True, secondary_y=['total_alloc'], 
+    figsize=(8, 6), title="Internal time and memory")
+plt.savefig('internal_time_and_memory_log.png')
+```
+
+```python Collapsed="false"
+performance[['total_ticks', 'total_alloc']].plot.bar(
+    logy=False, secondary_y=['total_alloc'], 
+    figsize=(8, 6), title="Internal time and memory")
+plt.savefig('internal_time_and_memory_linear.png')
+```
+
+```python
+# times = pd.read_csv('times.csv', 
+#                     names=['program', 'system', 'elapsed', 'memory'], 
+#                     index_col='program')
+# times.index = times.index.str.slice(start=len('cabal run '))
+# times.elapsed = pd.to_numeric(times.elapsed.str.slice(start=2))
+# times
+```
+
+```python
+today = datetime.date.today()
+today = datetime.datetime(year=today.year, month=today.month, day=today.day) - datetime.timedelta(seconds=1)
+today
+```
+
+```python
+epoch = datetime.datetime(year=1900, month=1, day=1)
+epoch
+```
+
+```python
+times = pd.read_csv('times_raw.csv', 
+                    names=['program', 'system', 'elapsed', 'memory'], 
+                    index_col='program')
+times.elapsed = (pd.to_datetime(times.elapsed, format="%M:%S.%f") - epoch)
+times.elapsed = times.elapsed.apply(lambda x: x.total_seconds())
+times
+```
+
+```python
+times.dtypes
+```
+
+```python Collapsed="false"
+times.describe()
+```
+
+```python Collapsed="false"
+performance = performance.merge(times, left_index=True, right_index=True)
+# performance.drop(index='advent15loop', inplace=True)
+performance
+```
+
+```python Collapsed="false"
+performance.columns
+```
+
+```python
+# performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
+performance.elapsed.plot.bar(
+    figsize=(8, 6), title="External time")
+plt.savefig('external_time.png')
+```
+
+```python
+# performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
+performance[['elapsed', 'memory']].plot.bar(
+    logy=False, secondary_y=['memory'], 
+    figsize=(8, 6), title="External time and memory")
+plt.savefig('external_time_and_memory.png')
+```
+
+```python
+# performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
+performance[['elapsed', 'memory']].plot.bar(
+    logy=True, secondary_y=['memory'], 
+    figsize=(8, 6), title="External time and memory")
+plt.savefig('external_time_and_memory_log.png')
+```
+
+```python Collapsed="false"
+# performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
+performance[['elapsed', 'memory']].plot.bar(
+    logy=False, secondary_y=['memory'], 
+    figsize=(8, 6), title="External time and memory")
+plt.savefig('external_time_and_memory_linear.png')
+```
+
+```python Collapsed="false"
+# performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
+performance[['total_ticks', 'elapsed']].plot.bar(
+    logy=True, secondary_y=['elapsed'], 
+    figsize=(8, 6), title="Internal vs external time")
+plt.savefig('internal_external_time.png')
+```
+
+```python Collapsed="false"
+# performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
+performance[['total_ticks', 'elapsed']].plot.bar(
+    logy=False, secondary_y=['elapsed'], 
+    figsize=(8, 6), title="Internal vs external time")
+plt.savefig('internal_external_time_linear.png')
+```
+
+```python Collapsed="false"
+# performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
+performance[['total_alloc', 'memory']].plot.bar(
+    logy=True, secondary_y=['memory'], 
+    figsize=(8, 6), title="Internal vs external memory")
+plt.savefig('internal_external_memory_log.png')
+```
+
+```python Collapsed="false"
+# performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
+performance[['total_alloc', 'memory']].plot.bar(
+    logy=False, secondary_y=['memory'], 
+    figsize=(8, 6), title="Internal vs external memory")
+plt.savefig('internal_external_memory_linear.png')
+```
+
+```python Collapsed="false"
+# performance['elapsed_adj'] = performance['elapsed'] - 0.28
+# performance
+```
+
+```python Collapsed="false"
+# performance[['total_time', 'elapsed_adj']].plot.bar(logy=True)
+```
+
+```python Collapsed="false"
+fig, ax = plt.subplots(ncols=3, figsize=(20,5))
+
+performance['elapsed'].plot.bar(ax=ax[2],
+    logy=True, 
+    title="Run times (wall clock), log scale",
+#     figsize=(10,8)
+    )
+ax[2].set_xlabel('Program')
+
+performance['elapsed'].plot.bar(ax=ax[0],
+    logy=False, 
+    title="Run times (wall clock), linear scale",
+#     figsize=(10,8)
+    )
+ax[0].set_xlabel('Program')
+
+performance['elapsed'].plot.bar(ax=ax[1],
+    logy=False, 
+    ylim=(0, 22),
+    title="Run times (wall clock), truncated linear scale",
+#     figsize=(10,8)
+    )
+ax[1].set_xlabel('Program')
+
+plt.savefig('run_times_combined.png')
+```
+
+```python Collapsed="false"
+fig, ax = plt.subplots(ncols=2, figsize=(13,5))
+
+performance['memory'].plot.bar(ax=ax[0],
+    logy=True, 
+    title="Memory used, log scale",
+#     figsize=(10,8)
+    )
+ax[0].set_xlabel('Program')
+
+performance['memory'].plot.bar(ax=ax[1],
+    logy=False, 
+    title="Memory used, linear scale",
+#     figsize=(10,8)
+    )
+ax[1].set_xlabel('Program')
+
+plt.savefig('memory_combined.png')
+```
+
+```python
+fig, ax = plt.subplots(ncols=2, figsize=(13,5))
+
+performance[['total_alloc', 'memory']].plot.bar(ax=ax[0],
+    logy=False, secondary_y=['memory'], 
+    title="Internal vs external memory, linear scale")
+ax[0].set_xlabel('Program')
+
+performance[['total_alloc', 'memory']].plot.bar(ax=ax[1],
+    logy=True, secondary_y=['memory'], 
+    title="Internal vs external memory. log scale")
+
+plt.savefig('internal_external_memory_combined.png')
+```
+
+```python Collapsed="false"
+# ax = performance['elapsed_adj'].plot.bar(logy=False, 
+#     title="Run times (wall clock), linear scale",
+#     figsize=(10,8))
+# ax.set_xlabel('Program')
+# plt.savefig('run_times_linear.png')
+```
+
+```python Collapsed="false"
+performance.columns
+```
+
+```python Collapsed="false"
+performance['memory'].plot.bar()
+```
+
+```python Collapsed="false"
+performance.plot.scatter('elapsed', 'total_alloc', logx=True, logy=True)
+```
+
+```python Collapsed="false"
+performance.plot.scatter('memory', 'total_alloc', logx=True, logy=True)
+```
+
+```python Collapsed="false"
+performance.plot.scatter('elapsed', 'total_ticks', logx=True, logy=True)
+```
+
+```python Collapsed="false"
+performance[['total_alloc', 'memory', 'elapsed']].to_csv('performance.csv')
+```
+
+```python Collapsed="false"
+print(performance[['total_alloc', 'elapsed', 'memory']].to_markdown(floatfmt=['0.0f', '0.0f', '.2f', '0.0f']))
+```
+
+```python
+line_counts = ! find .. -path ../dist-newstyle -prune -o -type f -name "Main.hs" -exec wc -l {} \;
+count_names = [re.search("(\d+) \.\./([^/]+)", l).groups([2, 1]) for l in line_counts if 'advent' in l if 'Main' in l]
+program_counts = pd.Series({n: int(c) for n, c in sorted([(c, n) for n, c in count_names])})
+program_counts
+```
+
+```python
+program_counts[::-1].plot.barh(figsize=(6, 9))
+plt.savefig('lines_of_code.png')
+```
+
+```python
+print(program_counts.to_markdown())
+```
+
+```python
+program_counts.median()
+```
+
+```python
+
+```