X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=src%2Ftask5%2Ftask5-re.ipynb;fp=src%2Ftask5%2Ftask5-re.ipynb;h=fc1458734e97ea88a7383ceae43fdea45fa69d94;hb=6e2b9f4076f380e747db63e75ac8abaad3e6e2bf;hp=0000000000000000000000000000000000000000;hpb=1d37b09e19995a7cf2efc6db692d81644c67694f;p=summerofcode2018soln.git diff --git a/src/task5/task5-re.ipynb b/src/task5/task5-re.ipynb new file mode 100644 index 0000000..fc14587 --- /dev/null +++ b/src/task5/task5-re.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import string" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "comment_mark = re.compile('<[^>]*>')\n", + "expand_mark = re.compile(':(\\d+):(\\d+):')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def decomment(text):\n", + " m = comment_mark.search(text)\n", + " while m:\n", + " s = m.start()\n", + " e = m.end()\n", + " text = text[:s] + text[e:]\n", + " m = comment_mark.search(text)\n", + " return text" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def expand(text, expansion_limit=None):\n", + " i = 1\n", + " m = expand_mark.search(text)\n", + " while m and (expansion_limit is None or (expansion_limit is not None and i <= expansion_limit)):\n", + " l = int(m.group(1))\n", + " n = int(m.group(2))\n", + " s = m.start()\n", + " e = m.end()\n", + " if l > s:\n", + " l = s\n", + " text = text[:s-l] + text[s-l:s] * n + text[e:]\n", + " m = expand_mark.search(text)\n", + " i += 1\n", + " return text" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "data_p_t = [l.strip() for l in open('../../data/05-instructions.txt')]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "data_p = [decomment(l) for l in data_p_t]\n", + "data = [expand(l) for l in data_p]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "149043" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len([c for c in ''.join(data_p) if c not in string.whitespace])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "302266" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len([c for c in ''.join(data) if c not in string.whitespace])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "data_s_p_t = open('../../data/05-instructions.txt').read()\n", + "data_s_p = decomment(data_s_p_t)\n", + "data_s = expand(data_s_p)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "149043" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(1 for c in data_s_p if c not in string.whitespace)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "302266" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(1 for c in data_s if c not in string.whitespace)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "27.5 ms ± 951 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "data_p = [decomment(l) for l in data_p_t]\n", + "data = [expand(l) for l in data_p]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "875 ms ± 5.02 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "data_s_p = decomment(data_s_p_t)\n", + "data_s = expand(data_s_p)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}