X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=09-resolving-the-bill%2Fresolving-the-bill-solution.ipynb;h=3643807022dbde7f6e593940e8571f66b3fa171b;hb=34c17b05a50db9b3e5f55496251aab9409241021;hp=379cfc931a6699cb8c6179b7ec3674c48c7c71d9;hpb=689ae4d9136e0481282731c4d7136577bcfeabf7;p=ou-summer-of-code-2017.git diff --git a/09-resolving-the-bill/resolving-the-bill-solution.ipynb b/09-resolving-the-bill/resolving-the-bill-solution.ipynb index 379cfc9..3643807 100644 --- a/09-resolving-the-bill/resolving-the-bill-solution.ipynb +++ b/09-resolving-the-bill/resolving-the-bill-solution.ipynb @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -87,7 +87,7 @@ "148" ] }, - "execution_count": 26, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -114,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 3, "metadata": { "collapsed": true }, @@ -134,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 4, "metadata": { "collapsed": true }, @@ -158,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 5, "metadata": { "collapsed": true }, @@ -178,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 6, "metadata": { "collapsed": true }, @@ -233,7 +233,58 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 35, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def is_subseq_rows(seq1, seq2, return_backpointers=False, debug=False):\n", + " \"\"\"Return true if seq1 is a subsequence of seq2.\n", + " If return_backpointers, also return the set of backpointers to\n", + " reconstruct the subsequence\"\"\"\n", + " \n", + " # dp_table[i, j] is True if first i characters of seq1 can\n", + " # be found in the first j characters of seq2\n", + " \n", + " backpointers = {}\n", + " \n", + " for i in range(len(seq1)+1):\n", + " row = [False] * (len(seq2)+1)\n", + " for j in range(i, len(seq2)+1):\n", + " if i == 0:\n", + " row[j] = True\n", + " if debug: print('aa', i, j, '!', '!', row[j])\n", + " elif j == 0:\n", + " row[j] = False\n", + " if debug: print('zz', i, j, '!', '!', row[j])\n", + " else:\n", + " # extend by character from s2\n", + " if row[j-1]:\n", + " row[j] = True\n", + " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n", + " if debug: print('s2', i, j, seq1[i-1], seq2[j-1], row[j]) \n", + " # extend by character from s1\n", + " if previous_row[j-1] and seq1[i-1] == seq2[j-1]:\n", + " row[j] = True\n", + " backpointers[i, j] = (i-1, j-1, seq1[i-1], 'seq1') \n", + " if debug: print('s1', i, j, seq1[i-1], seq2[j-1], row[j])\n", + " if not row[j]:\n", + " if debug: print('xx', i, j, seq1[i-1], seq2[j-1], row[j])\n", + " previous_row = row\n", + " \n", + " if return_backpointers:\n", + " retval = [row[-1]]\n", + " if return_backpointers:\n", + " retval += [backpointers]\n", + " return tuple(retval)\n", + " else:\n", + " return row[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -242,7 +293,7 @@ "22" ] }, - "execution_count": 54, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -255,11 +306,39 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "22" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(1 for s in bills\n", + " if s != 0\n", + " if is_subseq_rows(bills[0], bills[s]))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 loop, best of 3: 8.02 s per loop\n" + ] + } + ], "source": [ "%%timeit\n", "sum(1 for s in bills\n", @@ -269,7 +348,27 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 loop, best of 3: 3.04 s per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "sum(1 for s in bills\n", + " if s != 0\n", + " if is_subseq_rows(bills[0], bills[s]))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": { "scrolled": true }, @@ -301,7 +400,7 @@ " 146]" ] }, - "execution_count": 36, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -678,6 +777,159 @@ " return dp_table[len(seq1), len(seq2)]" ] }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def is_interleave_rows(seq1, seq2, seq3, return_backpointers=False, debug=False):\n", + " \"\"\"Return true if seq3 is some interleaved merge of seq1 and seq2.\n", + " If return_backpointers, also return the set of backpointers to\n", + " reconstruct the interleaving.\n", + " \n", + " This version doesn't build the whole table, just keeps the current and previous rows\"\"\"\n", + " \n", + " # dp_table[i, j] is True if first i+j characters of seq is made up of \n", + " # an interleaving of the first i characters of seq1 and the \n", + " # first j characters of seq2\n", + " \n", + " if len(seq1) + len(seq2) != len(seq3):\n", + " if return_backpointers:\n", + " retval = [False]\n", + " if return_backpointers:\n", + " retval += [{}]\n", + " return tuple(retval)\n", + " else:\n", + " return False\n", + " \n", + "\n", + " backpointers = {}\n", + "\n", + " for i in range(len(seq1)+1):\n", + " row = [False] * (len(seq2)+1)\n", + " for j in range(len(seq2)+1):\n", + " if i == 0 and j == 0:\n", + " row[j] = True\n", + " if debug: print('xxxx', i, j, '!', '!', '!', row[j])\n", + " elif i == 0:\n", + " # extend by character from seq2\n", + " if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n", + " row[j] = True\n", + " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n", + " if debug: print('seq2', i, j, '!', seq2[j-1], seq3[i+j-1], row[j])\n", + " elif j == 0:\n", + " # extend by character from seq1\n", + " if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n", + " row[j] = True\n", + " backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')\n", + " if debug: print('seq1', i, j, seq1[i-1], '!', seq3[i+j-1], row[j])\n", + " else:\n", + " # extend by character from seq2\n", + " if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n", + " row[j] = True\n", + " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n", + " if debug: print('seq2', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j]) \n", + " # extend by character from seq1\n", + " if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n", + " row[j] = True\n", + " backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1') \n", + " if debug: print('seq1', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n", + " if not row[j]:\n", + " if debug: print('xxxx', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n", + " previous_row = row\n", + "\n", + " if return_backpointers:\n", + " retval = [row[-1]]\n", + " if return_backpointers:\n", + " retval += [backpointers]\n", + " return tuple(retval)\n", + " else:\n", + " return row[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def is_interleave_rows2(seq1, seq2, seq3, return_backpointers=False, debug=False):\n", + " \"\"\"Return true if seq3 is some interleaved merge of seq1 and seq2.\n", + " If return_backpointers, also return the set of backpointers to\n", + " reconstruct the interleaving.\n", + " \n", + " This version doesn't keep the whole table, just the current and previous\n", + " rows. It also builds the current row as it goes along, rather than\n", + " building the whole row and updating elements as required.\"\"\"\n", + " \n", + " # dp_table[i, j] is True if first i+j characters of seq is made up of \n", + " # an interleaving of the first i characters of seq1 and the \n", + " # first j characters of seq2\n", + " \n", + " if len(seq1) + len(seq2) != len(seq3):\n", + " if return_backpointers:\n", + " retval = [False]\n", + " if return_backpointers:\n", + " retval += [{}]\n", + " return tuple(retval)\n", + " else:\n", + " return False\n", + " \n", + "\n", + " backpointers = {}\n", + "\n", + " for i in range(len(seq1)+1):\n", + " row = []\n", + " for j in range(len(seq2)+1):\n", + " if i == 0 and j == 0:\n", + " row += [True]\n", + " if debug: print('xxxx', i, j, '!', '!', '!', row[j])\n", + " elif i == 0:\n", + " # extend by character from seq2\n", + " if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n", + " row += [True]\n", + " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n", + " else:\n", + " row += [False]\n", + " if debug: print('seq2', i, j, '!', seq2[j-1], seq3[i+j-1], row[j])\n", + " elif j == 0:\n", + " # extend by character from seq1\n", + " if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n", + " row += [True]\n", + " backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')\n", + " else:\n", + " row += [False]\n", + " if debug: print('seq1', i, j, seq1[i-1], '!', seq3[i+j-1], row[j])\n", + " else:\n", + " # extend by character from seq2\n", + " if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n", + " row += [True]\n", + " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n", + " if debug: print('seq2', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j]) \n", + " # extend by character from seq1\n", + " elif previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n", + " row += [True]\n", + " backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1') \n", + " if debug: print('seq1', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n", + " else:\n", + " row += [False]\n", + " if debug: print('xxxx', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n", + " previous_row = row\n", + "\n", + " if return_backpointers:\n", + " retval = [row[-1]]\n", + " if return_backpointers:\n", + " retval += [backpointers]\n", + " return tuple(retval)\n", + " else:\n", + " return row[-1]" + ] + }, { "cell_type": "code", "execution_count": 41, @@ -723,6 +975,48 @@ " if is_interleave(bills[0], bills[1], bills[s])]" ] }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[30]" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[s for s in bills\n", + " if is_interleave_rows(bills[0], bills[1], bills[s])]" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[30]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[s for s in bills\n", + " if is_interleave_rows2(bills[0], bills[1], bills[s])]" + ] + }, { "cell_type": "code", "execution_count": 45, @@ -994,6 +1288,44 @@ " if is_interleave(bills[0], bills[1], bills[s])]" ] }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 loop, best of 3: 1.06 s per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "[s for s in bills\n", + " if is_interleave_rows(bills[0], bills[1], bills[s])]" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 loop, best of 3: 1.56 s per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "[s for s in bills\n", + " if is_interleave_rows2(bills[0], bills[1], bills[s])]" + ] + }, { "cell_type": "code", "execution_count": 71, @@ -1013,6 +1345,96 @@ " if is_interleave_recursive(bills[0], bills[1], bills[s])]" ] }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Sense solution\n", + "Took \n", + "* 22.9 seconds to load file\n", + "* 14906.8 seconds to find subsequences (4.14 hours; 4 hours, 8 minutes, 26.8 seconds)\n", + "* 41726.9 seconds to check all interleavings (11.59 hours; 11 hours, 35 minutes, 26.9 seconds)\n", + "\n", + "Total of 15 hours 44 minutes 16.6 seconds." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4.140777777777777, 4, 8, 26.799999999999272)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rtime = 14906.8\n", + "(rtime / 3600,\n", + " int(rtime / 3600), \n", + " int(rtime / 60 - int(rtime / 3600) * 60), \n", + " rtime - int(rtime / 60) * 60\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(11.590805555555557, 11, 35, 26.900000000001455)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rtime = 41726.9 \n", + "(rtime / 3600,\n", + " int(rtime / 3600), \n", + " int(rtime / 60 - int(rtime / 3600) * 60), \n", + " rtime - int(rtime / 60) * 60\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(15.737944444444445, 15, 44, 16.599999999998545)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rtime = 22.9 + 14906.8 + 41726.9 \n", + "(rtime / 3600,\n", + " int(rtime / 3600), \n", + " int(rtime / 60 - int(rtime / 3600) * 60), \n", + " rtime - int(rtime / 60) * 60\n", + ")" + ] + }, { "cell_type": "code", "execution_count": null,