Done task 5
[summerofcode2018soln.git] / src / task5 / task5-re.ipynb
diff --git a/src/task5/task5-re.ipynb b/src/task5/task5-re.ipynb
new file mode 100644 (file)
index 0000000..fc14587
--- /dev/null
@@ -0,0 +1,238 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "import string"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "comment_mark = re.compile('<[^>]*>')\n",
+    "expand_mark = re.compile(':(\\d+):(\\d+):')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def decomment(text):\n",
+    "    m = comment_mark.search(text)\n",
+    "    while m:\n",
+    "        s = m.start()\n",
+    "        e = m.end()\n",
+    "        text = text[:s] + text[e:]\n",
+    "        m = comment_mark.search(text)\n",
+    "    return text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def expand(text, expansion_limit=None):\n",
+    "    i = 1\n",
+    "    m = expand_mark.search(text)\n",
+    "    while m and (expansion_limit is None or (expansion_limit is not None and i <= expansion_limit)):\n",
+    "        l = int(m.group(1))\n",
+    "        n = int(m.group(2))\n",
+    "        s = m.start()\n",
+    "        e = m.end()\n",
+    "        if l > s:\n",
+    "            l = s\n",
+    "        text = text[:s-l] + text[s-l:s] * n + text[e:]\n",
+    "        m = expand_mark.search(text)\n",
+    "        i += 1\n",
+    "    return text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_p_t = [l.strip() for l in open('../../data/05-instructions.txt')]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_p = [decomment(l) for l in data_p_t]\n",
+    "data = [expand(l) for l in data_p]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "149043"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len([c for c in ''.join(data_p) if c not in string.whitespace])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "302266"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len([c for c in ''.join(data) if c not in string.whitespace])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_s_p_t = open('../../data/05-instructions.txt').read()\n",
+    "data_s_p = decomment(data_s_p_t)\n",
+    "data_s = expand(data_s_p)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "149043"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sum(1 for c in data_s_p if c not in string.whitespace)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "302266"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sum(1 for c in data_s if c not in string.whitespace)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "27.5 ms ± 951 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "data_p = [decomment(l) for l in data_p_t]\n",
+    "data = [expand(l) for l in data_p]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "875 ms ± 5.02 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "data_s_p = decomment(data_s_p_t)\n",
+    "data_s = expand(data_s_p)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}