Tweaked some parsing code
[advent-of-code-21.git] / profiling / modules.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.3'
9 jupytext_version: 1.11.1
10 kernelspec:
11 display_name: Python 3 (ipykernel)
12 language: python
13 name: python3
14 ---
15
16 ```python
17 import os, glob
18 import collections
19 import pandas as pd
20 import numpy as np
21
22 import matplotlib as mpl
23 import matplotlib.pyplot as plt
24 %matplotlib inline
25 ```
26
27 ```python
28 with open('../advent-of-code21.cabal') as f:
29 build_depends = [l for l in f.readlines() if 'build-depends' in l]
30 build_depends
31 ```
32
33 ```python
34 cabal_file = open('../advent-of-code21.cabal').read()
35 executables = cabal_file.split('executable')[2:]
36 executables[:3]
37 ```
38
39 ```python
40 e = executables[1]
41 e.strip().split('build-depends: ')[1].split(',')
42 ```
43
44 ```python
45 def extract(line):
46 parts = line.strip().split('build-depends: ')
47 name = parts[0].split()[0]
48 if len(parts) > 1:
49 depends = [p.strip() for p in parts[1].split('\n')[0].split(',') if 'base' not in p]
50 else:
51 depends = []
52 return name, depends
53 ```
54
55 ```python
56 modules = {e: ms for e, ms in [extract(e) for e in executables] if e.endswith(tuple(str(i) for i in range(10)))}
57 modules
58 ```
59
60 ```python
61 all_modules = set(m for p in modules for m in modules[p])
62 modules_df = pd.DataFrame.from_dict({p: {m: m in modules[p] for m in sorted(all_modules)} for p in modules}, orient='index').sort_index()
63 modules_df
64 ```
65
66 ```python
67 print(modules_df.sum().sort_values(ascending=False).to_markdown())
68 ```
69
70 ```python
71 modules_scatter = modules_df.stack().reset_index()
72 modules_scatter.columns = ['program', 'module', 'present']
73 modules_scatter = modules_scatter[modules_scatter.present]
74 modules_scatter
75 ```
76
77 ```python tags=[]
78 modules_scatter.plot.scatter(x='program', y='module', s=80, rot=45, figsize=(10, 6))
79 ```
80
81 ```python
82 cmap = mpl.colors.ListedColormap(['white', 'blue'])
83
84 fig, ax = plt.subplots(figsize=(10, 10))
85 ax.imshow(modules_df.to_numpy().T, cmap=cmap)
86 plt.xticks(range(modules_df.index.size), labels=modules_df.index.values, rotation=90);
87 plt.yticks(range(modules_df.columns.size), labels=modules_df.columns.values);
88
89 ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
90 ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
91 ax.grid(which='minor', axis='both', linestyle='-', color='silver', linewidth=1.5);
92 plt.savefig('packages.png');
93 ```
94
95 ```python
96 mains = list(sorted(f for f in glob.glob('../advent*/Main.hs')))
97 mains
98 ```
99
100 ```python
101 main_imports = {}
102
103 for m in mains:
104 with open(m) as f:
105 lines = f.readlines()
106 import_lines = [l for l in lines if l.strip().startswith('import') if 'Debug.Trace' not in l]
107 imports = []
108 for i in import_lines:
109 words = i.strip().split()
110 if 'qualified' in i:
111 imports.append((words[2], True))
112 else:
113 imports.append((words[1], False))
114 main_imports[m.split('/')[1]] = imports
115
116 main_imports
117 ```
118
119 ```python
120 import_counts = collections.Counter(l for ls in main_imports.values() for l in ls)
121 import_counts.most_common()
122 ```
123
124 ```python
125 main_imports_unqualified = {m: set(i[0] for i in main_imports[m]) for m in main_imports}
126 main_imports_unqualified
127 ```
128
129 ```python
130 import_counts_unqualified = collections.Counter(l for ls in main_imports_unqualified.values() for l in ls)
131 import_counts_unqualified.most_common()
132 ```
133
134 ```python
135 all_imports = set(m for p in main_imports_unqualified for m in main_imports_unqualified[p])
136 imports_df = pd.DataFrame.from_dict(
137 {p: {m: m in main_imports_unqualified[p]
138 for m in sorted(all_imports)}
139 for p in main_imports_unqualified},
140 orient='index').sort_index()
141 imports_df
142 ```
143
144 ```python
145 print(imports_df.sum().sort_values(ascending=False).to_markdown())
146 ```
147
148 ```python
149 imports_scatter = imports_df.stack().reset_index()
150 imports_scatter.columns = ['program', 'module', 'present']
151 imports_scatter = imports_scatter[imports_scatter.present]
152 imports_scatter
153 ```
154
155 ```python tags=[]
156 imports_scatter.plot.scatter(x='program', y='module', s=80, rot=45, figsize=(10, 10))
157 ```
158
159 ```python
160 imports_df.columns.size
161 ```
162
163 ```python
164 cmap = mpl.colors.ListedColormap(['white', 'blue'])
165
166 fig, ax = plt.subplots(figsize=(10, 10))
167 ax.imshow(imports_df.to_numpy().T, cmap=cmap)
168 plt.xticks(range(imports_df.index.size), labels=imports_df.index.values, rotation=90);
169 plt.yticks(range(imports_df.columns.size), labels=imports_df.columns.values);
170
171 ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
172 ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.5))
173 ax.grid(which='minor', axis='both', linestyle='-', color='silver', linewidth=1.5);
174 plt.savefig('imports.png');
175 ```
176
177 ```python
178 import matplotlib as mpl
179 mpl.__version__
180 ```
181
182 ```python
183
184 ```