Day 23 now using arrays
[advent-of-code-22.git] / profiling / profiling.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.3'
9 jupytext_version: 1.11.1
10 kernelspec:
11 display_name: Python 3 (ipykernel)
12 language: python
13 name: python3
14 ---
15
16 ```python Collapsed="false"
17 import glob
18 import json
19 import pandas as pd
20 import numpy as np
21 import datetime
22 import re
23
24 import matplotlib.pyplot as plt
25 %matplotlib inline
26 ```
27
28 ```python
29 ! cd .. && cabal install
30 ```
31
32 ```python Collapsed="false" tags=[]
33 ! cd .. && for i in {01..25}; do cabal run advent${i} --enable-profiling -- +RTS -N -pj -s -hT ; done
34 ```
35
36 ```python
37 ! rm ../times.csv
38 ! rm ../times_raw.csv
39 ```
40
41 ```python Collapsed="false" tags=[]
42 ! cd .. && for i in {01..25}; do /usr/bin/time -f "%C,%S,%E,%M" -o times.csv -a cabal run advent${i}; done
43 ```
44
45 ```python Collapsed="false" tags=[]
46 ! cd .. && for i in {01..25}; do /usr/bin/time -f "%C,%S,%E,%M" -o times_raw.csv -a advent${i}; done
47 ```
48
49 ```python
50 !mv ../*prof .
51 ```
52
53 ```python
54 !mv ../times.csv .
55 ```
56
57 ```python
58 !mv ../times_raw.csv .
59 ```
60
61 ```python
62 !mv ../*hp .
63 ```
64
65 ```python
66 ! for f in *hp ; do hp2ps ${f} ; done
67 ```
68
69 ```python Collapsed="false"
70 glob.glob('*prof')
71 ```
72
73 ```python Collapsed="false"
74 profs = []
75 for fn in glob.glob('*prof'):
76 with open(fn) as f:
77 j = json.load(f)
78 prof = {}
79 for n in 'program total_time total_alloc total_ticks initial_capabilities'.split():
80 prof[n] = j[n]
81 profs.append(prof)
82 profs
83 ```
84
85 ```python Collapsed="false"
86 performance = pd.DataFrame(profs).set_index('program').sort_index()
87 performance
88 ```
89
90 ```python Collapsed="false"
91 performance.total_ticks.plot.bar()
92 ```
93
94 ```python Collapsed="false"
95 performance.total_ticks.plot.bar(logy=True)
96 ```
97
98 ```python Collapsed="false"
99 performance.total_alloc.plot.bar()
100 ```
101
102 ```python Collapsed="false"
103 performance.total_alloc.plot.bar(logy=True)
104 ```
105
106 ```python Collapsed="false"
107 performance[['total_ticks', 'total_alloc']].plot.bar(
108 logy=True, secondary_y=['total_alloc'],
109 figsize=(8, 6), title="Internal time and memory")
110 plt.savefig('internal_time_and_memory_log.png')
111 ```
112
113 ```python Collapsed="false"
114 performance[['total_ticks', 'total_alloc']].plot.bar(
115 logy=False, secondary_y=['total_alloc'],
116 figsize=(8, 6), title="Internal time and memory")
117 plt.savefig('internal_time_and_memory_linear.png')
118 ```
119
120 ```python
121 # times = pd.read_csv('times.csv',
122 # names=['program', 'system', 'elapsed', 'memory'],
123 # index_col='program')
124 # times.index = times.index.str.slice(start=len('cabal run '))
125 # times.elapsed = pd.to_numeric(times.elapsed.str.slice(start=2))
126 # times
127 ```
128
129 ```python
130 today = datetime.date.today()
131 today = datetime.datetime(year=today.year, month=today.month, day=today.day) - datetime.timedelta(seconds=1)
132 today
133 ```
134
135 ```python
136 epoch = datetime.datetime(year=1900, month=1, day=1)
137 epoch
138 ```
139
140 ```python
141 times = pd.read_csv('times_raw.csv',
142 names=['program', 'system', 'elapsed', 'memory'],
143 index_col='program')
144 times.elapsed = (pd.to_datetime(times.elapsed, format="%M:%S.%f") - epoch)
145 times.elapsed = times.elapsed.apply(lambda x: x.total_seconds())
146 times
147 ```
148
149 ```python
150 times.dtypes
151 ```
152
153 ```python Collapsed="false"
154 times.describe()
155 ```
156
157 ```python Collapsed="false"
158 performance = performance.merge(times, left_index=True, right_index=True)
159 # performance.drop(index='advent15loop', inplace=True)
160 performance
161 ```
162
163 ```python Collapsed="false"
164 performance.columns
165 ```
166
167 ```python
168 # performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
169 performance.elapsed.plot.bar(
170 figsize=(8, 6), title="External time")
171 plt.savefig('external_time.png')
172 ```
173
174 ```python
175 # performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
176 performance[['elapsed', 'memory']].plot.bar(
177 logy=False, secondary_y=['memory'],
178 figsize=(8, 6), title="External time and memory")
179 plt.savefig('external_time_and_memory.png')
180 ```
181
182 ```python
183 # performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
184 performance[['elapsed', 'memory']].plot.bar(
185 logy=True, secondary_y=['memory'],
186 figsize=(8, 6), title="External time and memory")
187 plt.savefig('external_time_and_memory_log.png')
188 ```
189
190 ```python Collapsed="false"
191 # performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
192 performance[['elapsed', 'memory']].plot.bar(
193 logy=False, secondary_y=['memory'],
194 figsize=(8, 6), title="External time and memory")
195 plt.savefig('external_time_and_memory_linear.png')
196 ```
197
198 ```python Collapsed="false"
199 # performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
200 performance[['total_ticks', 'elapsed']].plot.bar(
201 logy=True, secondary_y=['elapsed'],
202 figsize=(8, 6), title="Internal vs external time")
203 plt.savefig('internal_external_time.png')
204 ```
205
206 ```python Collapsed="false"
207 # performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
208 performance[['total_ticks', 'elapsed']].plot.bar(
209 logy=False, secondary_y=['elapsed'],
210 figsize=(8, 6), title="Internal vs external time")
211 plt.savefig('internal_external_time_linear.png')
212 ```
213
214 ```python Collapsed="false"
215 # performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
216 performance[['total_alloc', 'memory']].plot.bar(
217 logy=True, secondary_y=['memory'],
218 figsize=(8, 6), title="Internal vs external memory")
219 plt.savefig('internal_external_memory_log.png')
220 ```
221
222 ```python Collapsed="false"
223 # performance[['total_ticks', 'elapsed']].plot.bar(logy=True)
224 performance[['total_alloc', 'memory']].plot.bar(
225 logy=False, secondary_y=['memory'],
226 figsize=(8, 6), title="Internal vs external memory")
227 plt.savefig('internal_external_memory_linear.png')
228 ```
229
230 ```python Collapsed="false"
231 # performance['elapsed_adj'] = performance['elapsed'] - 0.28
232 # performance
233 ```
234
235 ```python Collapsed="false"
236 # performance[['total_time', 'elapsed_adj']].plot.bar(logy=True)
237 ```
238
239 ```python Collapsed="false"
240 fig, ax = plt.subplots(ncols=3, figsize=(20,5))
241
242 performance['elapsed'].plot.bar(ax=ax[2],
243 logy=True,
244 title="Run times (wall clock), log scale",
245 # figsize=(10,8)
246 )
247 ax[2].set_xlabel('Program')
248
249 performance['elapsed'].plot.bar(ax=ax[0],
250 logy=False,
251 title="Run times (wall clock), linear scale",
252 # figsize=(10,8)
253 )
254 ax[0].set_xlabel('Program')
255
256 performance['elapsed'].plot.bar(ax=ax[1],
257 logy=False,
258 ylim=(0, 22),
259 title="Run times (wall clock), truncated linear scale",
260 # figsize=(10,8)
261 )
262 ax[1].set_xlabel('Program')
263
264 plt.savefig('run_times_combined.png')
265 ```
266
267 ```python Collapsed="false"
268 fig, ax = plt.subplots(ncols=2, figsize=(13,5))
269
270 performance['memory'].plot.bar(ax=ax[0],
271 logy=True,
272 title="Memory used, log scale",
273 # figsize=(10,8)
274 )
275 ax[0].set_xlabel('Program')
276
277 performance['memory'].plot.bar(ax=ax[1],
278 logy=False,
279 title="Memory used, linear scale",
280 # figsize=(10,8)
281 )
282 ax[1].set_xlabel('Program')
283
284 plt.savefig('memory_combined.png')
285 ```
286
287 ```python
288 fig, ax = plt.subplots(ncols=2, figsize=(13,5))
289
290 performance[['total_alloc', 'memory']].plot.bar(ax=ax[0],
291 logy=False, secondary_y=['memory'],
292 title="Internal vs external memory, linear scale")
293 ax[0].set_xlabel('Program')
294
295 performance[['total_alloc', 'memory']].plot.bar(ax=ax[1],
296 logy=True, secondary_y=['memory'],
297 title="Internal vs external memory. log scale")
298
299 plt.savefig('internal_external_memory_combined.png')
300 ```
301
302 ```python Collapsed="false"
303 # ax = performance['elapsed_adj'].plot.bar(logy=False,
304 # title="Run times (wall clock), linear scale",
305 # figsize=(10,8))
306 # ax.set_xlabel('Program')
307 # plt.savefig('run_times_linear.png')
308 ```
309
310 ```python Collapsed="false"
311 performance.columns
312 ```
313
314 ```python Collapsed="false"
315 performance['memory'].plot.bar()
316 ```
317
318 ```python Collapsed="false"
319 performance.plot.scatter('elapsed', 'total_alloc', logx=True, logy=True)
320 ```
321
322 ```python Collapsed="false"
323 performance.plot.scatter('memory', 'total_alloc', logx=True, logy=True)
324 ```
325
326 ```python Collapsed="false"
327 performance.plot.scatter('elapsed', 'total_ticks', logx=True, logy=True)
328 ```
329
330 ```python Collapsed="false"
331 performance[['total_alloc', 'memory', 'elapsed']].to_csv('performance.csv')
332 ```
333
334 ```python Collapsed="false"
335 print(performance[['total_alloc', 'elapsed', 'memory']].to_markdown(floatfmt=['0.0f', '0.0f', '.2f', '0.0f']))
336 ```
337
338 ```python
339 line_counts = ! find .. -path ../dist-newstyle -prune -o -type f -name "Main.hs" -exec wc -l {} \;
340 count_names = [re.search("(\d+) \.\./([^/]+)", l).groups([2, 1]) for l in line_counts if 'advent' in l if 'Main' in l]
341 program_counts = pd.Series({n: int(c) for n, c in sorted([(c, n) for n, c in count_names])})
342 program_counts
343 ```
344
345 ```python
346 program_counts[::-1].plot.barh(figsize=(6, 9))
347 plt.savefig('lines_of_code.png')
348 ```
349
350 ```python
351 print(program_counts.to_markdown())
352 ```
353
354 ```python
355 program_counts.median()
356 ```
357
358 ```python
359
360 ```