X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=09-resolving-the-bill%2Fsubsequence.ipynb;h=79d837f8f830cff9c312a37eefd447148c31ceeb;hb=690b9aa61348310a072473b58ebbbd424dce2b45;hp=4484bfb6c6a1fa5fe979607ccc7d6c4c086d1b26;hpb=130c870164e49bc33d63a887ecbaee95e4fa9997;p=ou-summer-of-code-2017.git diff --git a/09-resolving-the-bill/subsequence.ipynb b/09-resolving-the-bill/subsequence.ipynb index 4484bfb..79d837f 100644 --- a/09-resolving-the-bill/subsequence.ipynb +++ b/09-resolving-the-bill/subsequence.ipynb @@ -19,7 +19,18 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 33, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import random" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "metadata": { "collapsed": true }, @@ -41,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": { "scrolled": true }, @@ -117,7 +128,7 @@ " (5, 10): False}" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -131,7 +142,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": { "collapsed": true }, @@ -145,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -167,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -244,7 +255,7 @@ " (4, 7): (4, 6, 'b', 's2')}" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -288,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": { "collapsed": true }, @@ -312,7 +323,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -321,7 +332,7 @@ "'AAAbAbb'" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -332,8 +343,67 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": {}, + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def is_subseq(seq1, seq2, return_backpointers=False, return_table=False, debug=False):\n", + " \"\"\"Return true if seq1 is a subsequence of seq2.\n", + " If return_backpointers, also return the set of backpointers to\n", + " reconstruct the subsequence\"\"\"\n", + " \n", + " # dp_table[i, j] is True if first i characters of seq1 can\n", + " # be found in the first j characters of seq2\n", + " \n", + " dp_table = {(i, j): False\n", + " for i in range(len(seq1)+1)\n", + " for j in range(len(seq2)+1)}\n", + "\n", + " backpointers = {}\n", + " \n", + " for i in range(len(seq1)+1):\n", + " for j in range(i, len(seq2)+1):\n", + " if i == 0 or j == 0:\n", + " dp_table[i, j] = True\n", + " if debug: print('aa', i, j, '!', '!', dp_table[i, j])\n", + " else:\n", + " # extend by character from s2\n", + " if dp_table[i, j-1]:\n", + " dp_table[i, j] = True\n", + " backpointers[i, j] = (i, j-1, seq2[j-1], 's2')\n", + " if debug: print('s2', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n", + " # extend by character from s1\n", + " if dp_table[i-1, j-1] and seq1[i-1] == seq2[j-1]:\n", + " dp_table[i, j] = True\n", + " backpointers[i, j] = (i-1, j-1, seq1[i-1], 's1') \n", + " if debug: print('s1', i, j, seq1[i-1], seq2[j-1], dp_table[i, j])\n", + " if not dp_table[i, j]:\n", + " if debug: print('xx', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n", + " \n", + "# if return_backpointers:\n", + "# return dp_table[len(seq1), len(seq2)], backpointers\n", + "# else:\n", + "# return dp_table[len(seq1), len(seq2)]\n", + " \n", + " if return_backpointers or return_table:\n", + " retval = [dp_table[len(seq1), len(seq2)]]\n", + " if return_backpointers:\n", + " retval += [backpointers]\n", + " if return_table:\n", + " retval += [dp_table]\n", + " return tuple(retval)\n", + " else:\n", + " return dp_table[len(seq1), len(seq2)]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", @@ -398,7 +468,7 @@ "True" ] }, - "execution_count": 28, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -409,64 +479,7 @@ }, { "cell_type": "code", - "execution_count": 27, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def is_subseq(seq1, seq2, return_backpointers=False, return_table=False, debug=False):\n", - " \"\"\"Return true if seq1 is a subsequence of seq2.\n", - " If return_backpointers, also return the set of backpointers to\n", - " reconstruct the subsequence\"\"\"\n", - " \n", - " # dp_table[i, j] is True if first i characters of seq1 can\n", - " # be found in the first j characters of seq2\n", - " \n", - " dp_table = {(i, j): False\n", - " for i in range(len(seq1)+1)\n", - " for j in range(len(seq2)+1)}\n", - "\n", - " backpointers = {}\n", - " \n", - " for i in range(len(seq1)+1):\n", - " for j in range(i, len(seq2)+1):\n", - " if i == 0 or j == 0:\n", - " dp_table[i, j] = True\n", - " if debug: print('aa', i, j, '!', '!', dp_table[i, j])\n", - " else:\n", - " # extend by character from s2\n", - " if dp_table[i, j-1]:\n", - " dp_table[i, j] = True\n", - " backpointers[i, j] = (i, j-1, seq2[j-1], 's2')\n", - " if debug: print('s2', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n", - " # extend by character from s1\n", - " if dp_table[i-1, j-1] and seq1[i-1] == seq2[j-1]:\n", - " dp_table[i, j] = True\n", - " backpointers[i, j] = (i-1, j-1, seq1[i-1], 's1') \n", - " if debug: print('s1', i, j, seq1[i-1], seq2[j-1], dp_table[i, j])\n", - " if not dp_table[i, j]:\n", - " if debug: print('xx', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n", - " \n", - "# if return_backpointers:\n", - "# return dp_table[len(seq1), len(seq2)], backpointers\n", - "# else:\n", - "# return dp_table[len(seq1), len(seq2)]\n", - " \n", - " if return_backpointers or return_table:\n", - " retval = [dp_table[len(seq1), len(seq2)]]\n", - " if return_backpointers:\n", - " retval += [backpointers]\n", - " if return_table:\n", - " retval += [dp_table]\n", - " return tuple(retval)\n", - " else:\n", - " return dp_table[len(seq1), len(seq2)]" - ] - }, - { - "cell_type": "code", - "execution_count": 37, + "execution_count": 13, "metadata": { "collapsed": true }, @@ -479,7 +492,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -488,7 +501,7 @@ "True" ] }, - "execution_count": 29, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -500,7 +513,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -509,7 +522,7 @@ "'AbAAbcAcb'" ] }, - "execution_count": 20, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -520,7 +533,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -541,7 +554,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -562,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -571,7 +584,7 @@ "False" ] }, - "execution_count": 21, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -582,7 +595,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -591,7 +604,7 @@ "('aaaa', 'dabaabcacb')" ] }, - "execution_count": 24, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -602,7 +615,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -635,7 +648,7 @@ " (4, 10): (4, 9, 'b', 's2')}" ] }, - "execution_count": 23, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -646,20 +659,265 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "is_subseq(s1, s2f)" ] }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "aa 0 0 ! ! True\n", + "aa 0 1 ! ! True\n", + "aa 0 2 ! ! True\n", + "aa 0 3 ! ! True\n", + "aa 0 4 ! ! True\n", + "aa 0 5 ! ! True\n", + "aa 0 6 ! ! True\n", + "aa 0 7 ! ! True\n", + "s1 1 1 a a True\n", + "s2 1 2 a a True\n", + "s1 1 2 a a True\n", + "s2 1 3 a a True\n", + "s1 1 3 a a True\n", + "s2 1 4 a b True\n", + "s2 1 5 a a True\n", + "s1 1 5 a a True\n", + "s2 1 6 a b True\n", + "s2 1 7 a b True\n", + "s1 2 2 a a True\n", + "s2 2 3 a a True\n", + "s1 2 3 a a True\n", + "s2 2 4 a b True\n", + "s2 2 5 a a True\n", + "s1 2 5 a a True\n", + "s2 2 6 a b True\n", + "s2 2 7 a b True\n", + "s1 3 3 a a True\n", + "s2 3 4 a b True\n", + "s2 3 5 a a True\n", + "s1 3 5 a a True\n", + "s2 3 6 a b True\n", + "s2 3 7 a b True\n", + "xx 4 4 a b False\n", + "s1 4 5 a a True\n", + "s2 4 6 a b True\n", + "s2 4 7 a b True\n" + ] + }, + { + "data": { + "text/plain": [ + "(True,\n", + " {(1, 1): (0, 0, 'a', 's1'),\n", + " (1, 2): (0, 1, 'a', 's1'),\n", + " (1, 3): (0, 2, 'a', 's1'),\n", + " (1, 4): (1, 3, 'b', 's2'),\n", + " (1, 5): (0, 4, 'a', 's1'),\n", + " (1, 6): (1, 5, 'b', 's2'),\n", + " (1, 7): (1, 6, 'b', 's2'),\n", + " (2, 2): (1, 1, 'a', 's1'),\n", + " (2, 3): (1, 2, 'a', 's1'),\n", + " (2, 4): (2, 3, 'b', 's2'),\n", + " (2, 5): (1, 4, 'a', 's1'),\n", + " (2, 6): (2, 5, 'b', 's2'),\n", + " (2, 7): (2, 6, 'b', 's2'),\n", + " (3, 3): (2, 2, 'a', 's1'),\n", + " (3, 4): (3, 3, 'b', 's2'),\n", + " (3, 5): (2, 4, 'a', 's1'),\n", + " (3, 6): (3, 5, 'b', 's2'),\n", + " (3, 7): (3, 6, 'b', 's2'),\n", + " (4, 5): (3, 4, 'a', 's1'),\n", + " (4, 6): (4, 5, 'b', 's2'),\n", + " (4, 7): (4, 6, 'b', 's2')})" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "is_subseq('aaaa', 'aaababb', return_backpointers=True, debug=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def is_subseq_recursive(s1, s2):\n", + " if not s1:\n", + " return True\n", + " elif len(s1) > len(s2):\n", + " return False\n", + " else:\n", + " if s1[-1] == s2[-1]:\n", + " return is_subseq_recursive(s1[:-1], s2[:-1]) or is_subseq_recursive(s1, s2[:-1])\n", + " else:\n", + " return is_subseq_recursive(s1, s2[:-1])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "is_subseq_recursive(s1, s2t)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "is_subseq_recursive(s1, s2f)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def make_string(length, alphabet=None):\n", + " if not alphabet:\n", + " alphabet = 'abcdefgh'\n", + " return ''.join(random.choice(alphabet) for _ in range(length)) " + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def interleave(s1, s2, wander_limit=10, debug=False):\n", + " i1 = i2 = wander = 0\n", + " interleaved = []\n", + " while i1 <= len(s1) and i2 <= len(s2):\n", + " if i1 == len(s1):\n", + " if debug: print(i1, i2, wander, 'remaining s2', s2[i2:])\n", + " interleaved += s2[i2:]\n", + " i2 = len(s2) + 1\n", + " elif i2 == len(s2):\n", + " if debug: print(i1, i2, wander, 'remaining s1', s1[i1:])\n", + " interleaved += s1[i1:]\n", + " i1 = len(s1) + 1\n", + " else:\n", + " if wander == wander_limit:\n", + " step = -1\n", + " elif wander == -wander_limit:\n", + " step = +1\n", + " else:\n", + " step = random.choice([+1, -1])\n", + " if step == +1:\n", + " if debug: print(i1, i2, wander, 'adding', s1[i1])\n", + " interleaved += s1[i1]\n", + " i1 += 1\n", + " wander += 1\n", + " else:\n", + " if debug: print(i1, i2, wander, 'adding', s2[i2])\n", + " interleaved += s2[i2]\n", + " i2 += 1\n", + " wander -= 1\n", + " return ''.join(interleaved)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "sl1 = make_string(200)\n", + "sl2 = make_string(200)\n", + "sl3 = make_string(200)\n", + "sl12 = interleave(sl1, sl2)\n", + "sl23 = interleave(sl2, sl3)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(True, False)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "is_subseq(sl1, sl12), is_subseq(sl1, sl23)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "is_subseq('aaaa', 'aaababb', return_backpointers=True, debug=True)" + "is_subseq_recursive(sl1, sl12), is_subseq_recursive(sl1, sl23)" ] }, {