X-Git-Url: https://git.njae.me.uk/?p=advent-of-code-22.git;a=blobdiff_plain;f=profiling%2Fprofiling.md;fp=profiling%2Fprofiling.md;h=88811aebd5824378f0938a992c856ccda762410e;hp=0000000000000000000000000000000000000000;hb=89eb500db478502b125606aa4ffbf8c2cc515ddf;hpb=7556dfa39ef3eec2bc5e55ff2cfaad101a6cfb5f diff --git a/profiling/profiling.md b/profiling/profiling.md new file mode 100644 index 0000000..88811ae --- /dev/null +++ b/profiling/profiling.md @@ -0,0 +1,360 @@ +--- +jupyter: + jupytext: + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.11.1 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +```python Collapsed="false" +import glob +import json +import pandas as pd +import numpy as np +import datetime +import re + +import matplotlib.pyplot as plt +%matplotlib inline +``` + +```python +! cd .. && cabal install +``` + +```python Collapsed="false" tags=[] +! cd .. && for i in {01..25}; do cabal run advent${i} --enable-profiling -- +RTS -N -pj -s -hT ; done +``` + +```python +! rm ../times.csv +! rm ../times_raw.csv +``` + +```python Collapsed="false" tags=[] +! cd .. && for i in {01..25}; do /usr/bin/time -f "%C,%S,%E,%M" -o times.csv -a cabal run advent${i}; done +``` + +```python Collapsed="false" tags=[] +! cd .. && for i in {01..25}; do /usr/bin/time -f "%C,%S,%E,%M" -o times_raw.csv -a advent${i}; done +``` + +```python +!mv ../*prof . +``` + +```python +!mv ../times.csv . +``` + +```python +!mv ../times_raw.csv . +``` + +```python +!mv ../*hp . +``` + +```python +! for f in *hp ; do hp2ps ${f} ; done +``` + +```python Collapsed="false" +glob.glob('*prof') +``` + +```python Collapsed="false" +profs = [] +for fn in glob.glob('*prof'): + with open(fn) as f: + j = json.load(f) + prof = {} + for n in 'program total_time total_alloc total_ticks initial_capabilities'.split(): + prof[n] = j[n] + profs.append(prof) +profs +``` + +```python Collapsed="false" +performance = pd.DataFrame(profs).set_index('program').sort_index() +performance +``` + +```python Collapsed="false" +performance.total_ticks.plot.bar() +``` + +```python Collapsed="false" +performance.total_ticks.plot.bar(logy=True) +``` + +```python Collapsed="false" +performance.total_alloc.plot.bar() +``` + +```python Collapsed="false" +performance.total_alloc.plot.bar(logy=True) +``` + +```python Collapsed="false" +performance[['total_ticks', 'total_alloc']].plot.bar( + logy=True, secondary_y=['total_alloc'], + figsize=(8, 6), title="Internal time and memory") +plt.savefig('internal_time_and_memory_log.png') +``` + +```python Collapsed="false" +performance[['total_ticks', 'total_alloc']].plot.bar( + logy=False, secondary_y=['total_alloc'], + figsize=(8, 6), title="Internal time and memory") +plt.savefig('internal_time_and_memory_linear.png') +``` + +```python +# times = pd.read_csv('times.csv', +# names=['program', 'system', 'elapsed', 'memory'], +# index_col='program') +# times.index = times.index.str.slice(start=len('cabal run ')) +# times.elapsed = pd.to_numeric(times.elapsed.str.slice(start=2)) +# times +``` + +```python +today = datetime.date.today() +today = datetime.datetime(year=today.year, month=today.month, day=today.day) - datetime.timedelta(seconds=1) +today +``` + +```python +epoch = datetime.datetime(year=1900, month=1, day=1) +epoch +``` + +```python +times = pd.read_csv('times_raw.csv', + names=['program', 'system', 'elapsed', 'memory'], + index_col='program') +times.elapsed = (pd.to_datetime(times.elapsed, format="%M:%S.%f") - epoch) +times.elapsed = times.elapsed.apply(lambda x: x.total_seconds()) +times +``` + +```python +times.dtypes +``` + +```python Collapsed="false" +times.describe() +``` + +```python Collapsed="false" +performance = performance.merge(times, left_index=True, right_index=True) +# performance.drop(index='advent15loop', inplace=True) +performance +``` + +```python Collapsed="false" +performance.columns +``` + +```python +# performance[['total_ticks', 'elapsed']].plot.bar(logy=True) +performance.elapsed.plot.bar( + figsize=(8, 6), title="External time") +plt.savefig('external_time.png') +``` + +```python +# performance[['total_ticks', 'elapsed']].plot.bar(logy=True) +performance[['elapsed', 'memory']].plot.bar( + logy=False, secondary_y=['memory'], + figsize=(8, 6), title="External time and memory") +plt.savefig('external_time_and_memory.png') +``` + +```python +# performance[['total_ticks', 'elapsed']].plot.bar(logy=True) +performance[['elapsed', 'memory']].plot.bar( + logy=True, secondary_y=['memory'], + figsize=(8, 6), title="External time and memory") +plt.savefig('external_time_and_memory_log.png') +``` + +```python Collapsed="false" +# performance[['total_ticks', 'elapsed']].plot.bar(logy=True) +performance[['elapsed', 'memory']].plot.bar( + logy=False, secondary_y=['memory'], + figsize=(8, 6), title="External time and memory") +plt.savefig('external_time_and_memory_linear.png') +``` + +```python Collapsed="false" +# performance[['total_ticks', 'elapsed']].plot.bar(logy=True) +performance[['total_ticks', 'elapsed']].plot.bar( + logy=True, secondary_y=['elapsed'], + figsize=(8, 6), title="Internal vs external time") +plt.savefig('internal_external_time.png') +``` + +```python Collapsed="false" +# performance[['total_ticks', 'elapsed']].plot.bar(logy=True) +performance[['total_ticks', 'elapsed']].plot.bar( + logy=False, secondary_y=['elapsed'], + figsize=(8, 6), title="Internal vs external time") +plt.savefig('internal_external_time_linear.png') +``` + +```python Collapsed="false" +# performance[['total_ticks', 'elapsed']].plot.bar(logy=True) +performance[['total_alloc', 'memory']].plot.bar( + logy=True, secondary_y=['memory'], + figsize=(8, 6), title="Internal vs external memory") +plt.savefig('internal_external_memory_log.png') +``` + +```python Collapsed="false" +# performance[['total_ticks', 'elapsed']].plot.bar(logy=True) +performance[['total_alloc', 'memory']].plot.bar( + logy=False, secondary_y=['memory'], + figsize=(8, 6), title="Internal vs external memory") +plt.savefig('internal_external_memory_linear.png') +``` + +```python Collapsed="false" +# performance['elapsed_adj'] = performance['elapsed'] - 0.28 +# performance +``` + +```python Collapsed="false" +# performance[['total_time', 'elapsed_adj']].plot.bar(logy=True) +``` + +```python Collapsed="false" +fig, ax = plt.subplots(ncols=3, figsize=(20,5)) + +performance['elapsed'].plot.bar(ax=ax[2], + logy=True, + title="Run times (wall clock), log scale", +# figsize=(10,8) + ) +ax[2].set_xlabel('Program') + +performance['elapsed'].plot.bar(ax=ax[0], + logy=False, + title="Run times (wall clock), linear scale", +# figsize=(10,8) + ) +ax[0].set_xlabel('Program') + +performance['elapsed'].plot.bar(ax=ax[1], + logy=False, + ylim=(0, 22), + title="Run times (wall clock), truncated linear scale", +# figsize=(10,8) + ) +ax[1].set_xlabel('Program') + +plt.savefig('run_times_combined.png') +``` + +```python Collapsed="false" +fig, ax = plt.subplots(ncols=2, figsize=(13,5)) + +performance['memory'].plot.bar(ax=ax[0], + logy=True, + title="Memory used, log scale", +# figsize=(10,8) + ) +ax[0].set_xlabel('Program') + +performance['memory'].plot.bar(ax=ax[1], + logy=False, + title="Memory used, linear scale", +# figsize=(10,8) + ) +ax[1].set_xlabel('Program') + +plt.savefig('memory_combined.png') +``` + +```python +fig, ax = plt.subplots(ncols=2, figsize=(13,5)) + +performance[['total_alloc', 'memory']].plot.bar(ax=ax[0], + logy=False, secondary_y=['memory'], + title="Internal vs external memory, linear scale") +ax[0].set_xlabel('Program') + +performance[['total_alloc', 'memory']].plot.bar(ax=ax[1], + logy=True, secondary_y=['memory'], + title="Internal vs external memory. log scale") + +plt.savefig('internal_external_memory_combined.png') +``` + +```python Collapsed="false" +# ax = performance['elapsed_adj'].plot.bar(logy=False, +# title="Run times (wall clock), linear scale", +# figsize=(10,8)) +# ax.set_xlabel('Program') +# plt.savefig('run_times_linear.png') +``` + +```python Collapsed="false" +performance.columns +``` + +```python Collapsed="false" +performance['memory'].plot.bar() +``` + +```python Collapsed="false" +performance.plot.scatter('elapsed', 'total_alloc', logx=True, logy=True) +``` + +```python Collapsed="false" +performance.plot.scatter('memory', 'total_alloc', logx=True, logy=True) +``` + +```python Collapsed="false" +performance.plot.scatter('elapsed', 'total_ticks', logx=True, logy=True) +``` + +```python Collapsed="false" +performance[['total_alloc', 'memory', 'elapsed']].to_csv('performance.csv') +``` + +```python Collapsed="false" +print(performance[['total_alloc', 'elapsed', 'memory']].to_markdown(floatfmt=['0.0f', '0.0f', '.2f', '0.0f'])) +``` + +```python +line_counts = ! find .. -path ../dist-newstyle -prune -o -type f -name "Main.hs" -exec wc -l {} \; +count_names = [re.search("(\d+) \.\./([^/]+)", l).groups([2, 1]) for l in line_counts if 'advent' in l if 'Main' in l] +program_counts = pd.Series({n: int(c) for n, c in sorted([(c, n) for n, c in count_names])}) +program_counts +``` + +```python +program_counts[::-1].plot.barh(figsize=(6, 9)) +plt.savefig('lines_of_code.png') +``` + +```python +print(program_counts.to_markdown()) +``` + +```python +program_counts.median() +``` + +```python + +```