Tweaking of polyglot languages
[ou-summer-of-code-2017.git] / 09-resolving-the-bill / resolving-the-bill-solution.ipynb
index 5b04383b8c8abe67310371a40c4a4aef39183a4e..3643807022dbde7f6e593940e8571f66b3fa171b 100644 (file)
@@ -24,7 +24,7 @@
     "6: aacccabaddcdaddaabdc\n",
     "```\n",
     "\n",
-    "You know what you spent, so you've helpfully added your bill as the first line of the file, at index 0. \n",
+    "You know what you spent, so you've helpfully included your bill as the first line of the file, at index 0. \n",
     "\n",
     "You can find your bill in the mixed-up line at index 4, like this:\n",
     "\n",
@@ -78,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -87,7 +87,7 @@
        "148"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 1,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 2,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 3,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 4,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 5,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 6,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 35,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def is_subseq_rows(seq1, seq2, return_backpointers=False, debug=False):\n",
+    "    \"\"\"Return true if seq1 is a subsequence of seq2.\n",
+    "    If return_backpointers, also return the set of backpointers to\n",
+    "    reconstruct the subsequence\"\"\"\n",
+    "    \n",
+    "    # dp_table[i, j] is True if first i characters of seq1 can\n",
+    "    # be found in the first j characters of seq2\n",
+    "  \n",
+    "    backpointers = {}\n",
+    "    \n",
+    "    for i in range(len(seq1)+1):\n",
+    "        row = [False] * (len(seq2)+1)\n",
+    "        for j in range(i, len(seq2)+1):\n",
+    "            if i == 0:\n",
+    "                row[j] = True\n",
+    "                if debug: print('aa', i, j, '!', '!', row[j])\n",
+    "            elif j == 0:\n",
+    "                row[j] = False\n",
+    "                if debug: print('zz', i, j, '!', '!', row[j])\n",
+    "            else:\n",
+    "                # extend by character from s2\n",
+    "                if row[j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+    "                    if debug: print('s2', i, j, seq1[i-1], seq2[j-1], row[j])                \n",
+    "                # extend by character from s1\n",
+    "                if previous_row[j-1] and seq1[i-1] == seq2[j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i-1, j-1, seq1[i-1], 'seq1')                \n",
+    "                    if debug: print('s1', i, j, seq1[i-1], seq2[j-1], row[j])\n",
+    "                if not row[j]:\n",
+    "                    if debug: print('xx', i, j, seq1[i-1], seq2[j-1], row[j])\n",
+    "        previous_row = row\n",
+    "    \n",
+    "    if return_backpointers:\n",
+    "        retval = [row[-1]]\n",
+    "        if return_backpointers:\n",
+    "            retval += [backpointers]\n",
+    "        return tuple(retval)\n",
+    "    else:\n",
+    "        return row[-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
        "22"
       ]
      },
-     "execution_count": 54,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "22"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sum(1 for s in bills\n",
+    "   if s != 0\n",
+    "   if is_subseq_rows(bills[0], bills[s]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 8.02 s per loop\n"
+     ]
+    }
+   ],
    "source": [
     "%%timeit\n",
     "sum(1 for s in bills\n",
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 3.04 s per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "sum(1 for s in bills\n",
+    "   if s != 0\n",
+    "   if is_subseq_rows(bills[0], bills[s]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
    "metadata": {
     "scrolled": true
    },
        " 146]"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
     "        return dp_table[len(seq1), len(seq2)]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def is_interleave_rows(seq1, seq2, seq3, return_backpointers=False, debug=False):\n",
+    "    \"\"\"Return true if seq3 is some interleaved merge of seq1 and seq2.\n",
+    "    If return_backpointers, also return the set of backpointers to\n",
+    "    reconstruct the interleaving.\n",
+    "    \n",
+    "    This version doesn't build the whole table, just keeps the current and previous rows\"\"\"\n",
+    "    \n",
+    "    # dp_table[i, j] is True if first i+j characters of seq is made up of \n",
+    "    # an interleaving of the first i characters of seq1 and the \n",
+    "    # first j characters of seq2\n",
+    "    \n",
+    "    if len(seq1) + len(seq2) != len(seq3):\n",
+    "        if return_backpointers:\n",
+    "            retval = [False]\n",
+    "            if return_backpointers:\n",
+    "                retval += [{}]\n",
+    "            return tuple(retval)\n",
+    "        else:\n",
+    "            return False\n",
+    "    \n",
+    "\n",
+    "    backpointers = {}\n",
+    "\n",
+    "    for i in range(len(seq1)+1):\n",
+    "        row = [False] * (len(seq2)+1)\n",
+    "        for j in range(len(seq2)+1):\n",
+    "            if i == 0 and j == 0:\n",
+    "                row[j] = True\n",
+    "                if debug: print('xxxx', i, j, '!', '!', '!', row[j])\n",
+    "            elif i == 0:\n",
+    "                # extend by character from seq2\n",
+    "                if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+    "                if debug: print('seq2', i, j, '!', seq2[j-1], seq3[i+j-1], row[j])\n",
+    "            elif j == 0:\n",
+    "                # extend by character from seq1\n",
+    "                if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')\n",
+    "                if debug: print('seq1', i, j, seq1[i-1], '!', seq3[i+j-1], row[j])\n",
+    "            else:\n",
+    "                # extend by character from seq2\n",
+    "                if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+    "                    if debug: print('seq2', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])                \n",
+    "                # extend by character from seq1\n",
+    "                if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+    "                    row[j] = True\n",
+    "                    backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')                \n",
+    "                    if debug: print('seq1', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+    "                if not row[j]:\n",
+    "                    if debug: print('xxxx', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+    "        previous_row = row\n",
+    "\n",
+    "    if return_backpointers:\n",
+    "        retval = [row[-1]]\n",
+    "        if return_backpointers:\n",
+    "            retval += [backpointers]\n",
+    "        return tuple(retval)\n",
+    "    else:\n",
+    "        return row[-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def is_interleave_rows2(seq1, seq2, seq3, return_backpointers=False, debug=False):\n",
+    "    \"\"\"Return true if seq3 is some interleaved merge of seq1 and seq2.\n",
+    "    If return_backpointers, also return the set of backpointers to\n",
+    "    reconstruct the interleaving.\n",
+    "    \n",
+    "    This version doesn't keep the whole table, just the current and previous\n",
+    "    rows. It also builds the current row as it goes along, rather than\n",
+    "    building the whole row and updating elements as required.\"\"\"\n",
+    "    \n",
+    "    # dp_table[i, j] is True if first i+j characters of seq is made up of \n",
+    "    # an interleaving of the first i characters of seq1 and the \n",
+    "    # first j characters of seq2\n",
+    "    \n",
+    "    if len(seq1) + len(seq2) != len(seq3):\n",
+    "        if return_backpointers:\n",
+    "            retval = [False]\n",
+    "            if return_backpointers:\n",
+    "                retval += [{}]\n",
+    "            return tuple(retval)\n",
+    "        else:\n",
+    "            return False\n",
+    "    \n",
+    "\n",
+    "    backpointers = {}\n",
+    "\n",
+    "    for i in range(len(seq1)+1):\n",
+    "        row = []\n",
+    "        for j in range(len(seq2)+1):\n",
+    "            if i == 0 and j == 0:\n",
+    "                row += [True]\n",
+    "                if debug: print('xxxx', i, j, '!', '!', '!', row[j])\n",
+    "            elif i == 0:\n",
+    "                # extend by character from seq2\n",
+    "                if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+    "                    row += [True]\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+    "                else:\n",
+    "                    row += [False]\n",
+    "                if debug: print('seq2', i, j, '!', seq2[j-1], seq3[i+j-1], row[j])\n",
+    "            elif j == 0:\n",
+    "                # extend by character from seq1\n",
+    "                if previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+    "                    row += [True]\n",
+    "                    backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')\n",
+    "                else:\n",
+    "                    row += [False]\n",
+    "                if debug: print('seq1', i, j, seq1[i-1], '!', seq3[i+j-1], row[j])\n",
+    "            else:\n",
+    "                # extend by character from seq2\n",
+    "                if row[j-1] and seq2[j-1] == seq3[i+j-1]:\n",
+    "                    row += [True]\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
+    "                    if debug: print('seq2', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])                \n",
+    "                # extend by character from seq1\n",
+    "                elif previous_row[j] and seq1[i-1] == seq3[i+j-1]:\n",
+    "                    row += [True]\n",
+    "                    backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')                \n",
+    "                    if debug: print('seq1', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+    "                else:\n",
+    "                    row += [False]\n",
+    "                    if debug: print('xxxx', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], row[j])\n",
+    "        previous_row = row\n",
+    "\n",
+    "    if return_backpointers:\n",
+    "        retval = [row[-1]]\n",
+    "        if return_backpointers:\n",
+    "            retval += [backpointers]\n",
+    "        return tuple(retval)\n",
+    "    else:\n",
+    "        return row[-1]"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 41,
     "   if is_interleave(bills[0], bills[1], bills[s])]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[30]"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[s for s in bills\n",
+    "   if is_interleave_rows(bills[0], bills[1], bills[s])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[30]"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[s for s in bills\n",
+    "   if is_interleave_rows2(bills[0], bills[1], bills[s])]"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 45,
     "   if is_interleave(bills[0], bills[1], bills[s])]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 1.06 s per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "[s for s in bills\n",
+    "   if is_interleave_rows(bills[0], bills[1], bills[s])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 1.56 s per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "[s for s in bills\n",
+    "   if is_interleave_rows2(bills[0], bills[1], bills[s])]"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 71,
     "   if is_interleave_recursive(bills[0], bills[1], bills[s])]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Sense solution\n",
+    "Took \n",
+    "* 22.9 seconds to load file\n",
+    "* 14906.8 seconds to find subsequences (4.14 hours; 4 hours, 8 minutes, 26.8 seconds)\n",
+    "* 41726.9 seconds to check all interleavings (11.59 hours; 11 hours, 35 minutes, 26.9 seconds)\n",
+    "\n",
+    "Total of 15 hours 44 minutes 16.6 seconds."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4.140777777777777, 4, 8, 26.799999999999272)"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rtime = 14906.8\n",
+    "(rtime / 3600,\n",
+    " int(rtime / 3600), \n",
+    " int(rtime / 60 - int(rtime / 3600) * 60), \n",
+    " rtime - int(rtime / 60) * 60\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(11.590805555555557, 11, 35, 26.900000000001455)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rtime = 41726.9 \n",
+    "(rtime / 3600,\n",
+    " int(rtime / 3600), \n",
+    " int(rtime / 60 - int(rtime / 3600) * 60), \n",
+    " rtime - int(rtime / 60) * 60\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(15.737944444444445, 15, 44, 16.599999999998545)"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rtime = 22.9 + 14906.8 + 41726.9 \n",
+    "(rtime / 3600,\n",
+    " int(rtime / 3600), \n",
+    " int(rtime / 60 - int(rtime / 3600) * 60), \n",
+    " rtime - int(rtime / 60) * 60\n",
+    ")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,