Rerun notebooks with output
[summerofcode2018soln.git] / src / task5 / task5-re.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 1,
6 "metadata": {},
7 "outputs": [],
8 "source": [
9 "import re\n",
10 "import string"
11 ]
12 },
13 {
14 "cell_type": "code",
15 "execution_count": 2,
16 "metadata": {},
17 "outputs": [],
18 "source": [
19 "comment_mark = re.compile('<[^>]*>') # A < followed by some things that aren't > followed by >\n",
20 "expand_mark = re.compile(':(\\d+):(\\d+):') # A : followed by some digits, followed by another :, then some more digits, then a final :"
21 ]
22 },
23 {
24 "cell_type": "code",
25 "execution_count": 3,
26 "metadata": {},
27 "outputs": [],
28 "source": [
29 "def decomment(text):\n",
30 " m = comment_mark.search(text)\n",
31 " while m:\n",
32 " s = m.start()\n",
33 " e = m.end()\n",
34 " text = text[:s] + text[e:]\n",
35 " m = comment_mark.search(text)\n",
36 " return text"
37 ]
38 },
39 {
40 "cell_type": "code",
41 "execution_count": 4,
42 "metadata": {},
43 "outputs": [],
44 "source": [
45 "def expand(text, expansion_limit=None):\n",
46 " i = 1\n",
47 " m = expand_mark.search(text)\n",
48 " while m and (expansion_limit is None or (expansion_limit is not None and i <= expansion_limit)):\n",
49 " l = int(m.group(1))\n",
50 " n = int(m.group(2))\n",
51 " s = m.start()\n",
52 " e = m.end()\n",
53 " if l > s:\n",
54 " l = s\n",
55 " text = text[:s-l] + text[s-l:s] * n + text[e:]\n",
56 " m = expand_mark.search(text)\n",
57 " i += 1\n",
58 " return text"
59 ]
60 },
61 {
62 "cell_type": "code",
63 "execution_count": 5,
64 "metadata": {},
65 "outputs": [],
66 "source": [
67 "data_p_t = [l.strip() for l in open('../../data/05-instructions.txt')]"
68 ]
69 },
70 {
71 "cell_type": "code",
72 "execution_count": 6,
73 "metadata": {},
74 "outputs": [],
75 "source": [
76 "data_p = [decomment(l) for l in data_p_t]\n",
77 "data = [expand(l) for l in data_p]"
78 ]
79 },
80 {
81 "cell_type": "code",
82 "execution_count": 7,
83 "metadata": {},
84 "outputs": [
85 {
86 "data": {
87 "text/plain": [
88 "149043"
89 ]
90 },
91 "execution_count": 7,
92 "metadata": {},
93 "output_type": "execute_result"
94 }
95 ],
96 "source": [
97 "len([c for c in ''.join(data_p) if c not in string.whitespace])"
98 ]
99 },
100 {
101 "cell_type": "code",
102 "execution_count": 8,
103 "metadata": {},
104 "outputs": [
105 {
106 "data": {
107 "text/plain": [
108 "302266"
109 ]
110 },
111 "execution_count": 8,
112 "metadata": {},
113 "output_type": "execute_result"
114 }
115 ],
116 "source": [
117 "len([c for c in ''.join(data) if c not in string.whitespace])"
118 ]
119 },
120 {
121 "cell_type": "code",
122 "execution_count": 9,
123 "metadata": {},
124 "outputs": [],
125 "source": [
126 "data_s_p_t = open('../../data/05-instructions.txt').read()\n",
127 "data_s_p = decomment(data_s_p_t)\n",
128 "data_s = expand(data_s_p)"
129 ]
130 },
131 {
132 "cell_type": "code",
133 "execution_count": 10,
134 "metadata": {},
135 "outputs": [
136 {
137 "data": {
138 "text/plain": [
139 "149043"
140 ]
141 },
142 "execution_count": 10,
143 "metadata": {},
144 "output_type": "execute_result"
145 }
146 ],
147 "source": [
148 "sum(1 for c in data_s_p if c not in string.whitespace)"
149 ]
150 },
151 {
152 "cell_type": "code",
153 "execution_count": 11,
154 "metadata": {},
155 "outputs": [
156 {
157 "data": {
158 "text/plain": [
159 "302266"
160 ]
161 },
162 "execution_count": 11,
163 "metadata": {},
164 "output_type": "execute_result"
165 }
166 ],
167 "source": [
168 "sum(1 for c in data_s if c not in string.whitespace)"
169 ]
170 },
171 {
172 "cell_type": "code",
173 "execution_count": 12,
174 "metadata": {},
175 "outputs": [
176 {
177 "name": "stdout",
178 "output_type": "stream",
179 "text": [
180 "27.7 ms ± 908 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
181 ]
182 }
183 ],
184 "source": [
185 "%%timeit\n",
186 "data_p = [decomment(l) for l in data_p_t]\n",
187 "data = [expand(l) for l in data_p]"
188 ]
189 },
190 {
191 "cell_type": "code",
192 "execution_count": 13,
193 "metadata": {},
194 "outputs": [
195 {
196 "name": "stdout",
197 "output_type": "stream",
198 "text": [
199 "861 ms ± 7.25 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
200 ]
201 }
202 ],
203 "source": [
204 "%%timeit\n",
205 "data_s_p = decomment(data_s_p_t)\n",
206 "data_s = expand(data_s_p)"
207 ]
208 },
209 {
210 "cell_type": "code",
211 "execution_count": null,
212 "metadata": {},
213 "outputs": [],
214 "source": []
215 }
216 ],
217 "metadata": {
218 "kernelspec": {
219 "display_name": "Python 3",
220 "language": "python",
221 "name": "python3"
222 },
223 "language_info": {
224 "codemirror_mode": {
225 "name": "ipython",
226 "version": 3
227 },
228 "file_extension": ".py",
229 "mimetype": "text/x-python",
230 "name": "python",
231 "nbconvert_exporter": "python",
232 "pygments_lexer": "ipython3",
233 "version": "3.6.6"
234 }
235 },
236 "nbformat": 4,
237 "nbformat_minor": 2
238 }