Added profiling
[advent-of-code-22.git] / profiling / modules.md
diff --git a/profiling/modules.md b/profiling/modules.md
new file mode 100644 (file)
index 0000000..7af56ce
--- /dev/null
@@ -0,0 +1,232 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.3'
+      jupytext_version: 1.11.1
+  kernelspec:
+    display_name: Python 3 (ipykernel)
+    language: python
+    name: python3
+---
+
+```python
+import os, glob
+import collections
+import pandas as pd
+import numpy as np
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+%matplotlib inline
+```
+
+```python
+with open('../advent-of-code22.cabal') as f:
+    build_depends = [l for l in f.readlines() if 'build-depends' in l]
+build_depends
+```
+
+```python
+cabal_file = open('../advent-of-code22.cabal').read()
+executables = cabal_file.split('executable')[2:]
+executables[:3]
+```
+
+```python
+e = executables[1]
+e.strip().split('build-depends: ')[1].split(',')
+```
+
+```python
+def extract(line):
+    parts = line.strip().split('build-depends: ')
+    name = parts[0].split()[0]
+    if len(parts) > 1:
+        depends = [p.strip() for p in parts[1].split('\n')[0].split(',') if 'base' not in p]
+    else:
+        depends = []
+    return name, depends       
+```
+
+```python
+modules = {e: ms for e, ms in [extract(e) for e in executables] if e.endswith(tuple(str(i) for i in range(10)))}
+modules
+```
+
+```python
+all_modules = set(m for p in modules for m in modules[p])
+modules_df = pd.DataFrame.from_dict({p: {m: m in modules[p] for m in sorted(all_modules)} for p in modules}, orient='index').sort_index()
+modules_df
+```
+
+```python
+print(modules_df.sum().sort_values(ascending=False).to_markdown())
+```
+
+```python tags=[]
+sorted_modules = modules_df.sum().sort_values(ascending=False).index.values
+sorted_modules
+```
+
+```python tags=[]
+modules_sorted_cols = modules_df[sorted_modules]
+modules_sorted_cols
+```
+
+```python
+modules_scatter = modules_df.stack().reset_index()
+modules_scatter.columns = ['program', 'module', 'present']
+modules_scatter = modules_scatter[modules_scatter.present]
+modules_scatter
+```
+
+```python tags=[]
+modules_scatter.plot.scatter(x='program', y='module', s=80, rot=45, figsize=(10, 6))
+```
+
+```python
+cmap = mpl.colors.ListedColormap(['white', 'blue'])
+
+fig, ax = plt.subplots(figsize=(10, 10))
+ax.imshow(modules_df.to_numpy().T, cmap=cmap)
+plt.xticks(range(modules_df.index.size), labels=modules_df.index.values, rotation=90);
+plt.yticks(range(modules_df.columns.size), labels=modules_df.columns.values);
+
+ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
+ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
+ax.grid(which='minor', axis='both', linestyle='-', color='silver', linewidth=1.5);
+plt.savefig('packages.png');
+```
+
+```python
+cmap = mpl.colors.ListedColormap(['white', 'blue'])
+
+fig, ax = plt.subplots(figsize=(10, 10))
+ax.imshow(modules_sorted_cols.to_numpy().T, cmap=cmap)
+plt.xticks(range(modules_sorted_cols.index.size), labels=modules_sorted_cols.index.values, rotation=90);
+plt.yticks(range(modules_sorted_cols.columns.size), labels=modules_sorted_cols.columns.values);
+
+ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
+ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
+ax.grid(which='minor', axis='both', linestyle='-', color='silver', linewidth=1.5);
+plt.savefig('packages_sorted.png');
+```
+
+```python
+mains = list(sorted(f for f in glob.glob('../advent*/Main.hs')))
+mains
+```
+
+```python
+main_imports = {}
+
+for m in mains:
+    with open(m) as f:
+        lines = f.readlines()
+        import_lines = [l for l in lines if l.strip().startswith('import') if 'Debug.Trace' not in l]
+        imports = []
+        for i in import_lines:
+            words = i.strip().split()
+            if 'qualified' in i:
+                imports.append((words[2], True))
+            else:
+                imports.append((words[1], False))
+    main_imports[m.split('/')[1]] = imports
+
+main_imports
+```
+
+```python
+import_counts = collections.Counter(l for ls in main_imports.values() for l in ls)
+import_counts.most_common()
+```
+
+```python
+main_imports_unqualified = {m: set(i[0] for i in main_imports[m]) for m in main_imports}
+main_imports_unqualified
+```
+
+```python
+import_counts_unqualified = collections.Counter(l for ls in main_imports_unqualified.values() for l in ls)
+import_counts_unqualified.most_common()
+```
+
+```python
+all_imports = set(m for p in main_imports_unqualified for m in main_imports_unqualified[p])
+imports_df = pd.DataFrame.from_dict(
+    {p: {m: m in main_imports_unqualified[p] 
+         for m in sorted(all_imports)} 
+     for p in main_imports_unqualified}, 
+    orient='index').sort_index()
+imports_df
+```
+
+```python
+print(imports_df.sum().sort_values(ascending=False).to_markdown())
+```
+
+```python
+imports_scatter = imports_df.stack().reset_index()
+imports_scatter.columns = ['program', 'module', 'present']
+imports_scatter = imports_scatter[imports_scatter.present]
+imports_scatter
+```
+
+```python tags=[]
+imports_scatter.plot.scatter(x='program', y='module', s=80, rot=45, figsize=(10, 10))
+```
+
+```python
+imports_df.columns.size
+```
+
+```python tags=[]
+sorted_imports = imports_df.sum().sort_values(ascending=False).index.values
+sorted_imports
+```
+
+```python tags=[]
+imports_sorted_cols = imports_df[sorted_imports]
+imports_sorted_cols
+```
+
+```python
+cmap = mpl.colors.ListedColormap(['white', 'blue'])
+
+fig, ax = plt.subplots(figsize=(10, 10))
+ax.imshow(imports_df.to_numpy().T, cmap=cmap)
+plt.xticks(range(imports_df.index.size), labels=imports_df.index.values, rotation=90);
+plt.yticks(range(imports_df.columns.size), labels=imports_df.columns.values);
+
+ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
+ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
+ax.grid(which='minor', axis='both', linestyle='-', color='silver', linewidth=1.5);
+plt.savefig('imports.png');
+```
+
+```python
+cmap = mpl.colors.ListedColormap(['white', 'blue'])
+
+fig, ax = plt.subplots(figsize=(10, 10))
+ax.imshow(imports_sorted_cols.to_numpy().T, cmap=cmap)
+plt.xticks(range(imports_sorted_cols.index.size), labels=imports_sorted_cols.index.values, rotation=90);
+plt.yticks(range(imports_sorted_cols.columns.size), labels=imports_sorted_cols.columns.values);
+
+ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
+ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
+ax.grid(which='minor', axis='both', linestyle='-', color='silver', linewidth=1.5);
+plt.savefig('imports_sorted.png');
+```
+
+```python
+import matplotlib as mpl
+mpl.__version__
+```
+
+```python
+
+```