Variant implementations for day 9
[ou-summer-of-code-2017.git] / 09-resolving-the-bill / resolving-the-bill-solution.ipynb
index 379cfc931a6699cb8c6179b7ec3674c48c7c71d9..3643807022dbde7f6e593940e8571f66b3fa171b 100644 (file)
@@ -78,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -87,7 +87,7 @@
        "148"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 1,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 2,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 3,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 4,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 5,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 6,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 35,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def is_subseq_rows(seq1, seq2, return_backpointers=False, debug=False):\n",
+    "    \"\"\"Return true if seq1 is a subsequence of seq2.\n",
+    "    If return_backpointers, also return the set of backpointers to\n",
+    "    reconstruct the subsequence\"\"\"\n",
+    "    \n",
+    "    # dp_table[i, j] is True if first i characters of seq1 can\n",
+    "    # be found in the first j characters of seq2\n",
+    "  \n",
+    "    backpointers = {}\n",
+    "    \n",
+    "    for i in range(len(seq1)+1):\n",
+    "        row = [False] * (len(seq2)+1)\n",
+    "        for j in range(i, len(seq2)+1):\n",
+    "            if i == 0:\n",
+    "                row[j] = True\n",
+    "                if debug: print('aa', i, j, '!', '!', row[j])\n",
+    "            elif j == 0:\n",
+    "                row[j] = False\n",
+    "                if debug: print('zz', i, j, '!', '!', row[j])\n",
+    "            else:\n",
+    "                # extend by character from s2\n",
+    "                if row[j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+    "                    if debug: print('s2', i, j, seq1[i-1], seq2[j-1], row[j])                \n",
+    "                # extend by character from s1\n",
+    "                if previous_row[j-1] and seq1[i-1] == seq2[j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i-1, j-1, seq1[i-1], 'seq1')                \n",
+    "                    if debug: print('s1', i, j, seq1[i-1], seq2[j-1], row[j])\n",
+    "                if not row[j]:\n",
+    "                    if debug: print('xx', i, j, seq1[i-1], seq2[j-1], row[j])\n",
+    "        previous_row = row\n",
+    "    \n",
+    "    if return_backpointers:\n",
+    "        retval = [row[-1]]\n",
+    "        if return_backpointers:\n",
+    "            retval += [backpointers]\n",
+    "        return tuple(retval)\n",
+    "    else:\n",
+    "        return row[-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
        "22"
       ]
      },
-     "execution_count": 54,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "22"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sum(1 for s in bills\n",
+    "   if s != 0\n",
+    "   if is_subseq_rows(bills[0], bills[s]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 8.02 s per loop\n"
+     ]
+    }
+   ],
    "source": [
     "%%timeit\n",
     "sum(1 for s in bills\n",
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 3.04 s per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "sum(1 for s in bills\n",
+    "   if s != 0\n",
+    "   if is_subseq_rows(bills[0], bills[s]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
    "metadata": {
     "scrolled": true
    },
        " 146]"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
     "        return dp_table[len(seq1), len(seq2)]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def is_interleave_rows(seq1, seq2, seq3, return_backpointers=False, debug=False):\n",
+    "    \"\"\"Return true if seq3 is some interleaved merge of seq1 and seq2.\n",
+    "    If return_backpointers, also return the set of backpointers to\n",
+    "    reconstruct the interleaving.\n",
+    "    \n",
+    "    This version doesn't build the whole table, just keeps the current and previous rows\"\"\"\n",
+    "    \n",
+    "    # dp_table[i, j] is True if first i+j characters of seq is made up of \n",
+    "    # an interleaving of the first i characters of seq1 and the \n",
+    "    # first j characters of seq2\n",
+    "    \n",
+    "    if len(seq1) + len(seq2) != len(seq3):\n",
+    "        if return_backpointers:\n",
+    "            retval = [False]\n",
+    "            if return_backpointers:\n",
+    "                retval += [{}]\n",
+    "            return tuple(retval)\n",
+    "        else:\n",
+    "            return False\n",
+    "    \n",
+    "\n",
+    "    backpointers = {}\n",
+    "\n",
+    "    for i in range(len(seq1)+1):\n",
+    "        row = [False] * (len(seq2)+1)\n",
+    "        for j in range(len(seq2)+1):\n",
+    "            if i == 0 and j == 0:\n",
+    "                row[j] = True\n",
+    "                if debug: print('xxxx', i, j, '!', '!', '!', row[j])\n",
+    "            elif i == 0:\n",
+    "                # extend by character from seq2\n",
+    "                if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+    "                if debug: print('seq2', i, j, '!', seq2[j-1], seq3[i+j-1], row[j])\n",
+    "            elif j == 0:\n",
+    "                # extend by character from seq1\n",
+    "                if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')\n",
+    "                if debug: print('seq1', i, j, seq1[i-1], '!', seq3[i+j-1], row[j])\n",
+    "            else:\n",
+    "                # extend by character from seq2\n",
+    "                if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+    "                    if debug: print('seq2', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])                \n",
+    "                # extend by character from seq1\n",
+    "                if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')                \n",
+    "                    if debug: print('seq1', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+    "                if not row[j]:\n",
+    "                    if debug: print('xxxx', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+    "        previous_row = row\n",
+    "\n",
+    "    if return_backpointers:\n",
+    "        retval = [row[-1]]\n",
+    "        if return_backpointers:\n",
+    "            retval += [backpointers]\n",
+    "        return tuple(retval)\n",
+    "    else:\n",
+    "        return row[-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def is_interleave_rows2(seq1, seq2, seq3, return_backpointers=False, debug=False):\n",
+    "    \"\"\"Return true if seq3 is some interleaved merge of seq1 and seq2.\n",
+    "    If return_backpointers, also return the set of backpointers to\n",
+    "    reconstruct the interleaving.\n",
+    "    \n",
+    "    This version doesn't keep the whole table, just the current and previous\n",
+    "    rows. It also builds the current row as it goes along, rather than\n",
+    "    building the whole row and updating elements as required.\"\"\"\n",
+    "    \n",
+    "    # dp_table[i, j] is True if first i+j characters of seq is made up of \n",
+    "    # an interleaving of the first i characters of seq1 and the \n",
+    "    # first j characters of seq2\n",
+    "    \n",
+    "    if len(seq1) + len(seq2) != len(seq3):\n",
+    "        if return_backpointers:\n",
+    "            retval = [False]\n",
+    "            if return_backpointers:\n",
+    "                retval += [{}]\n",
+    "            return tuple(retval)\n",
+    "        else:\n",
+    "            return False\n",
+    "    \n",
+    "\n",
+    "    backpointers = {}\n",
+    "\n",
+    "    for i in range(len(seq1)+1):\n",
+    "        row = []\n",
+    "        for j in range(len(seq2)+1):\n",
+    "            if i == 0 and j == 0:\n",
+    "                row += [True]\n",
+    "                if debug: print('xxxx', i, j, '!', '!', '!', row[j])\n",
+    "            elif i == 0:\n",
+    "                # extend by character from seq2\n",
+    "                if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+    "                    row += [True]\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+    "                else:\n",
+    "                    row += [False]\n",
+    "                if debug: print('seq2', i, j, '!', seq2[j-1], seq3[i+j-1], row[j])\n",
+    "            elif j == 0:\n",
+    "                # extend by character from seq1\n",
+    "                if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+    "                    row += [True]\n",
+    "                    backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')\n",
+    "                else:\n",
+    "                    row += [False]\n",
+    "                if debug: print('seq1', i, j, seq1[i-1], '!', seq3[i+j-1], row[j])\n",
+    "            else:\n",
+    "                # extend by character from seq2\n",
+    "                if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+    "                    row += [True]\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+    "                    if debug: print('seq2', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])                \n",
+    "                # extend by character from seq1\n",
+    "                elif previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+    "                    row += [True]\n",
+    "                    backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')                \n",
+    "                    if debug: print('seq1', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+    "                else:\n",
+    "                    row += [False]\n",
+    "                    if debug: print('xxxx', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+    "        previous_row = row\n",
+    "\n",
+    "    if return_backpointers:\n",
+    "        retval = [row[-1]]\n",
+    "        if return_backpointers:\n",
+    "            retval += [backpointers]\n",
+    "        return tuple(retval)\n",
+    "    else:\n",
+    "        return row[-1]"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 41,
     "   if is_interleave(bills[0], bills[1], bills[s])]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[30]"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[s for s in bills\n",
+    "   if is_interleave_rows(bills[0], bills[1], bills[s])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[30]"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[s for s in bills\n",
+    "   if is_interleave_rows2(bills[0], bills[1], bills[s])]"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 45,
     "   if is_interleave(bills[0], bills[1], bills[s])]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 1.06 s per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "[s for s in bills\n",
+    "   if is_interleave_rows(bills[0], bills[1], bills[s])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 1.56 s per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "[s for s in bills\n",
+    "   if is_interleave_rows2(bills[0], bills[1], bills[s])]"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 71,
     "   if is_interleave_recursive(bills[0], bills[1], bills[s])]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Sense solution\n",
+    "Took \n",
+    "* 22.9 seconds to load file\n",
+    "* 14906.8 seconds to find subsequences (4.14 hours; 4 hours, 8 minutes, 26.8 seconds)\n",
+    "* 41726.9 seconds to check all interleavings (11.59 hours; 11 hours, 35 minutes, 26.9 seconds)\n",
+    "\n",
+    "Total of 15 hours 44 minutes 16.6 seconds."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4.140777777777777, 4, 8, 26.799999999999272)"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rtime = 14906.8\n",
+    "(rtime / 3600,\n",
+    " int(rtime / 3600), \n",
+    " int(rtime / 60 - int(rtime / 3600) * 60), \n",
+    " rtime - int(rtime / 60) * 60\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(11.590805555555557, 11, 35, 26.900000000001455)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rtime = 41726.9 \n",
+    "(rtime / 3600,\n",
+    " int(rtime / 3600), \n",
+    " int(rtime / 60 - int(rtime / 3600) * 60), \n",
+    " rtime - int(rtime / 60) * 60\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(15.737944444444445, 15, 44, 16.599999999998545)"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rtime = 22.9 + 14906.8 + 41726.9 \n",
+    "(rtime / 3600,\n",
+    " int(rtime / 3600), \n",
+    " int(rtime / 60 - int(rtime / 3600) * 60), \n",
+    " rtime - int(rtime / 60) * 60\n",
+    ")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,