Added recursive solutions
[ou-summer-of-code-2017.git] / 09-resolving-the-bill / subsequence.ipynb
index 4484bfb6c6a1fa5fe979607ccc7d6c4c086d1b26..79d837f8f830cff9c312a37eefd447148c31ceeb 100644 (file)
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 33,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import random"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
    "metadata": {
     "collapsed": true
    },
@@ -41,7 +52,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "metadata": {
     "scrolled": true
    },
        " (5, 10): False}"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
        " (4, 7): (4, 6, 'b', 's2')}"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 8,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
        "'AAAbAbb'"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def is_subseq(seq1, seq2, return_backpointers=False, return_table=False, debug=False):\n",
+    "    \"\"\"Return true if seq1 is a subsequence of seq2.\n",
+    "    If return_backpointers, also return the set of backpointers to\n",
+    "    reconstruct the subsequence\"\"\"\n",
+    "    \n",
+    "    # dp_table[i, j] is True if first i characters of seq1 can\n",
+    "    # be found in the first j characters of seq2\n",
+    "  \n",
+    "    dp_table = {(i, j): False\n",
+    "               for i in range(len(seq1)+1)\n",
+    "               for j in range(len(seq2)+1)}\n",
+    "\n",
+    "    backpointers = {}\n",
+    "    \n",
+    "    for i in range(len(seq1)+1):\n",
+    "        for j in range(i, len(seq2)+1):\n",
+    "            if i == 0 or j == 0:\n",
+    "                dp_table[i, j] = True\n",
+    "                if debug: print('aa', i, j, '!', '!', dp_table[i, j])\n",
+    "            else:\n",
+    "                # extend by character from s2\n",
+    "                if dp_table[i, j-1]:\n",
+    "                    dp_table[i, j] = True\n",
+    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 's2')\n",
+    "                    if debug: print('s2', i, j, seq1[i-1], seq2[j-1], dp_table[i, j])                \n",
+    "                # extend by character from s1\n",
+    "                if dp_table[i-1, j-1] and seq1[i-1] == seq2[j-1]:\n",
+    "                    dp_table[i, j] = True\n",
+    "                    backpointers[i, j] = (i-1, j-1, seq1[i-1], 's1')                \n",
+    "                    if debug: print('s1', i, j, seq1[i-1], seq2[j-1], dp_table[i, j])\n",
+    "                if not dp_table[i, j]:\n",
+    "                    if debug: print('xx', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n",
+    "   \n",
+    "#     if return_backpointers:\n",
+    "#         return dp_table[len(seq1), len(seq2)], backpointers\n",
+    "#     else:\n",
+    "#         return dp_table[len(seq1), len(seq2)]\n",
+    "    \n",
+    "    if return_backpointers or return_table:\n",
+    "        retval = [dp_table[len(seq1), len(seq2)]]\n",
+    "        if return_backpointers:\n",
+    "            retval += [backpointers]\n",
+    "        if return_table:\n",
+    "            retval += [dp_table]\n",
+    "        return tuple(retval)\n",
+    "    else:\n",
+    "        return dp_table[len(seq1), len(seq2)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "name": "stdout",
        "True"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "def is_subseq(seq1, seq2, return_backpointers=False, return_table=False, debug=False):\n",
-    "    \"\"\"Return true if seq1 is a subsequence of seq2.\n",
-    "    If return_backpointers, also return the set of backpointers to\n",
-    "    reconstruct the subsequence\"\"\"\n",
-    "    \n",
-    "    # dp_table[i, j] is True if first i characters of seq1 can\n",
-    "    # be found in the first j characters of seq2\n",
-    "  \n",
-    "    dp_table = {(i, j): False\n",
-    "               for i in range(len(seq1)+1)\n",
-    "               for j in range(len(seq2)+1)}\n",
-    "\n",
-    "    backpointers = {}\n",
-    "    \n",
-    "    for i in range(len(seq1)+1):\n",
-    "        for j in range(i, len(seq2)+1):\n",
-    "            if i == 0 or j == 0:\n",
-    "                dp_table[i, j] = True\n",
-    "                if debug: print('aa', i, j, '!', '!', dp_table[i, j])\n",
-    "            else:\n",
-    "                # extend by character from s2\n",
-    "                if dp_table[i, j-1]:\n",
-    "                    dp_table[i, j] = True\n",
-    "                    backpointers[i, j] = (i, j-1, seq2[j-1], 's2')\n",
-    "                    if debug: print('s2', i, j, seq1[i-1], seq2[j-1], dp_table[i, j])                \n",
-    "                # extend by character from s1\n",
-    "                if dp_table[i-1, j-1] and seq1[i-1] == seq2[j-1]:\n",
-    "                    dp_table[i, j] = True\n",
-    "                    backpointers[i, j] = (i-1, j-1, seq1[i-1], 's1')                \n",
-    "                    if debug: print('s1', i, j, seq1[i-1], seq2[j-1], dp_table[i, j])\n",
-    "                if not dp_table[i, j]:\n",
-    "                    if debug: print('xx', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n",
-    "   \n",
-    "#     if return_backpointers:\n",
-    "#         return dp_table[len(seq1), len(seq2)], backpointers\n",
-    "#     else:\n",
-    "#         return dp_table[len(seq1), len(seq2)]\n",
-    "    \n",
-    "    if return_backpointers or return_table:\n",
-    "        retval = [dp_table[len(seq1), len(seq2)]]\n",
-    "        if return_backpointers:\n",
-    "            retval += [backpointers]\n",
-    "        if return_table:\n",
-    "            retval += [dp_table]\n",
-    "        return tuple(retval)\n",
-    "    else:\n",
-    "        return dp_table[len(seq1), len(seq2)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 13,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
        "True"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
        "'AbAAbcAcb'"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
        "False"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
        "('aaaa', 'dabaabcacb')"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
        " (4, 10): (4, 9, 'b', 's2')}"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "is_subseq(s1, s2f)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "aa 0 0 ! ! True\n",
+      "aa 0 1 ! ! True\n",
+      "aa 0 2 ! ! True\n",
+      "aa 0 3 ! ! True\n",
+      "aa 0 4 ! ! True\n",
+      "aa 0 5 ! ! True\n",
+      "aa 0 6 ! ! True\n",
+      "aa 0 7 ! ! True\n",
+      "s1 1 1 a a True\n",
+      "s2 1 2 a a True\n",
+      "s1 1 2 a a True\n",
+      "s2 1 3 a a True\n",
+      "s1 1 3 a a True\n",
+      "s2 1 4 a b True\n",
+      "s2 1 5 a a True\n",
+      "s1 1 5 a a True\n",
+      "s2 1 6 a b True\n",
+      "s2 1 7 a b True\n",
+      "s1 2 2 a a True\n",
+      "s2 2 3 a a True\n",
+      "s1 2 3 a a True\n",
+      "s2 2 4 a b True\n",
+      "s2 2 5 a a True\n",
+      "s1 2 5 a a True\n",
+      "s2 2 6 a b True\n",
+      "s2 2 7 a b True\n",
+      "s1 3 3 a a True\n",
+      "s2 3 4 a b True\n",
+      "s2 3 5 a a True\n",
+      "s1 3 5 a a True\n",
+      "s2 3 6 a b True\n",
+      "s2 3 7 a b True\n",
+      "xx 4 4 a b False\n",
+      "s1 4 5 a a True\n",
+      "s2 4 6 a b True\n",
+      "s2 4 7 a b True\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(True,\n",
+       " {(1, 1): (0, 0, 'a', 's1'),\n",
+       "  (1, 2): (0, 1, 'a', 's1'),\n",
+       "  (1, 3): (0, 2, 'a', 's1'),\n",
+       "  (1, 4): (1, 3, 'b', 's2'),\n",
+       "  (1, 5): (0, 4, 'a', 's1'),\n",
+       "  (1, 6): (1, 5, 'b', 's2'),\n",
+       "  (1, 7): (1, 6, 'b', 's2'),\n",
+       "  (2, 2): (1, 1, 'a', 's1'),\n",
+       "  (2, 3): (1, 2, 'a', 's1'),\n",
+       "  (2, 4): (2, 3, 'b', 's2'),\n",
+       "  (2, 5): (1, 4, 'a', 's1'),\n",
+       "  (2, 6): (2, 5, 'b', 's2'),\n",
+       "  (2, 7): (2, 6, 'b', 's2'),\n",
+       "  (3, 3): (2, 2, 'a', 's1'),\n",
+       "  (3, 4): (3, 3, 'b', 's2'),\n",
+       "  (3, 5): (2, 4, 'a', 's1'),\n",
+       "  (3, 6): (3, 5, 'b', 's2'),\n",
+       "  (3, 7): (3, 6, 'b', 's2'),\n",
+       "  (4, 5): (3, 4, 'a', 's1'),\n",
+       "  (4, 6): (4, 5, 'b', 's2'),\n",
+       "  (4, 7): (4, 6, 'b', 's2')})"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "is_subseq('aaaa', 'aaababb', return_backpointers=True, debug=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def is_subseq_recursive(s1, s2):\n",
+    "    if not s1:\n",
+    "        return True\n",
+    "    elif len(s1) > len(s2):\n",
+    "        return False\n",
+    "    else:\n",
+    "        if s1[-1] == s2[-1]:\n",
+    "            return is_subseq_recursive(s1[:-1], s2[:-1]) or is_subseq_recursive(s1, s2[:-1])\n",
+    "        else:\n",
+    "            return is_subseq_recursive(s1, s2[:-1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "is_subseq_recursive(s1, s2t)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "is_subseq_recursive(s1, s2f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def make_string(length, alphabet=None):\n",
+    "    if not alphabet:\n",
+    "        alphabet = 'abcdefgh'\n",
+    "    return ''.join(random.choice(alphabet) for _ in range(length)) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def interleave(s1, s2, wander_limit=10, debug=False):\n",
+    "    i1 = i2 = wander = 0\n",
+    "    interleaved = []\n",
+    "    while i1 <= len(s1) and i2 <= len(s2):\n",
+    "        if i1 == len(s1):\n",
+    "            if debug: print(i1, i2, wander, 'remaining s2', s2[i2:])\n",
+    "            interleaved += s2[i2:]\n",
+    "            i2 = len(s2) + 1\n",
+    "        elif i2 == len(s2):\n",
+    "            if debug: print(i1, i2, wander, 'remaining s1', s1[i1:])\n",
+    "            interleaved += s1[i1:]\n",
+    "            i1 = len(s1) + 1\n",
+    "        else:\n",
+    "            if wander == wander_limit:\n",
+    "                step = -1\n",
+    "            elif wander == -wander_limit:\n",
+    "                step = +1\n",
+    "            else:\n",
+    "                step = random.choice([+1, -1])\n",
+    "            if step == +1:\n",
+    "                if debug: print(i1, i2, wander, 'adding', s1[i1])\n",
+    "                interleaved += s1[i1]\n",
+    "                i1 += 1\n",
+    "                wander += 1\n",
+    "            else:\n",
+    "                if debug: print(i1, i2, wander, 'adding', s2[i2])\n",
+    "                interleaved += s2[i2]\n",
+    "                i2 += 1\n",
+    "                wander -= 1\n",
+    "    return ''.join(interleaved)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "sl1 = make_string(200)\n",
+    "sl2 = make_string(200)\n",
+    "sl3 = make_string(200)\n",
+    "sl12 = interleave(sl1, sl2)\n",
+    "sl23 = interleave(sl2, sl3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(True, False)"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "is_subseq(sl1, sl12), is_subseq(sl1, sl23)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "is_subseq('aaaa', 'aaababb', return_backpointers=True, debug=True)"
+    "is_subseq_recursive(sl1, sl12), is_subseq_recursive(sl1, sl23)"
    ]
   },
   {