},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 33,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import random"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 4,
"metadata": {
"scrolled": true
},
" (5, 10): False}"
]
},
- "execution_count": 2,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 5,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
" (4, 7): (4, 6, 'b', 's2')}"
]
},
- "execution_count": 5,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 8,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
"'AAAbAbb'"
]
},
- "execution_count": 7,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 28,
- "metadata": {},
+ "execution_count": 11,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def is_subseq(seq1, seq2, return_backpointers=False, return_table=False, debug=False):\n",
+ " \"\"\"Return true if seq1 is a subsequence of seq2.\n",
+ " If return_backpointers, also return the set of backpointers to\n",
+ " reconstruct the subsequence\"\"\"\n",
+ " \n",
+ " # dp_table[i, j] is True if first i characters of seq1 can\n",
+ " # be found in the first j characters of seq2\n",
+ " \n",
+ " dp_table = {(i, j): False\n",
+ " for i in range(len(seq1)+1)\n",
+ " for j in range(len(seq2)+1)}\n",
+ "\n",
+ " backpointers = {}\n",
+ " \n",
+ " for i in range(len(seq1)+1):\n",
+ " for j in range(i, len(seq2)+1):\n",
+ " if i == 0 or j == 0:\n",
+ " dp_table[i, j] = True\n",
+ " if debug: print('aa', i, j, '!', '!', dp_table[i, j])\n",
+ " else:\n",
+ " # extend by character from s2\n",
+ " if dp_table[i, j-1]:\n",
+ " dp_table[i, j] = True\n",
+ " backpointers[i, j] = (i, j-1, seq2[j-1], 's2')\n",
+ " if debug: print('s2', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n",
+ " # extend by character from s1\n",
+ " if dp_table[i-1, j-1] and seq1[i-1] == seq2[j-1]:\n",
+ " dp_table[i, j] = True\n",
+ " backpointers[i, j] = (i-1, j-1, seq1[i-1], 's1') \n",
+ " if debug: print('s1', i, j, seq1[i-1], seq2[j-1], dp_table[i, j])\n",
+ " if not dp_table[i, j]:\n",
+ " if debug: print('xx', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n",
+ " \n",
+ "# if return_backpointers:\n",
+ "# return dp_table[len(seq1), len(seq2)], backpointers\n",
+ "# else:\n",
+ "# return dp_table[len(seq1), len(seq2)]\n",
+ " \n",
+ " if return_backpointers or return_table:\n",
+ " retval = [dp_table[len(seq1), len(seq2)]]\n",
+ " if return_backpointers:\n",
+ " retval += [backpointers]\n",
+ " if return_table:\n",
+ " retval += [dp_table]\n",
+ " return tuple(retval)\n",
+ " else:\n",
+ " return dp_table[len(seq1), len(seq2)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [
{
"name": "stdout",
"True"
]
},
- "execution_count": 28,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 27,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "def is_subseq(seq1, seq2, return_backpointers=False, return_table=False, debug=False):\n",
- " \"\"\"Return true if seq1 is a subsequence of seq2.\n",
- " If return_backpointers, also return the set of backpointers to\n",
- " reconstruct the subsequence\"\"\"\n",
- " \n",
- " # dp_table[i, j] is True if first i characters of seq1 can\n",
- " # be found in the first j characters of seq2\n",
- " \n",
- " dp_table = {(i, j): False\n",
- " for i in range(len(seq1)+1)\n",
- " for j in range(len(seq2)+1)}\n",
- "\n",
- " backpointers = {}\n",
- " \n",
- " for i in range(len(seq1)+1):\n",
- " for j in range(i, len(seq2)+1):\n",
- " if i == 0 or j == 0:\n",
- " dp_table[i, j] = True\n",
- " if debug: print('aa', i, j, '!', '!', dp_table[i, j])\n",
- " else:\n",
- " # extend by character from s2\n",
- " if dp_table[i, j-1]:\n",
- " dp_table[i, j] = True\n",
- " backpointers[i, j] = (i, j-1, seq2[j-1], 's2')\n",
- " if debug: print('s2', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n",
- " # extend by character from s1\n",
- " if dp_table[i-1, j-1] and seq1[i-1] == seq2[j-1]:\n",
- " dp_table[i, j] = True\n",
- " backpointers[i, j] = (i-1, j-1, seq1[i-1], 's1') \n",
- " if debug: print('s1', i, j, seq1[i-1], seq2[j-1], dp_table[i, j])\n",
- " if not dp_table[i, j]:\n",
- " if debug: print('xx', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n",
- " \n",
- "# if return_backpointers:\n",
- "# return dp_table[len(seq1), len(seq2)], backpointers\n",
- "# else:\n",
- "# return dp_table[len(seq1), len(seq2)]\n",
- " \n",
- " if return_backpointers or return_table:\n",
- " retval = [dp_table[len(seq1), len(seq2)]]\n",
- " if return_backpointers:\n",
- " retval += [backpointers]\n",
- " if return_table:\n",
- " retval += [dp_table]\n",
- " return tuple(retval)\n",
- " else:\n",
- " return dp_table[len(seq1), len(seq2)]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 37,
+ "execution_count": 13,
"metadata": {
"collapsed": true
},
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
"True"
]
},
- "execution_count": 29,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
"'AbAAbcAcb'"
]
},
- "execution_count": 20,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
"False"
]
},
- "execution_count": 21,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
"('aaaa', 'dabaabcacb')"
]
},
- "execution_count": 24,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
" (4, 10): (4, 9, 'b', 's2')}"
]
},
- "execution_count": 23,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "False"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"is_subseq(s1, s2f)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "aa 0 0 ! ! True\n",
+ "aa 0 1 ! ! True\n",
+ "aa 0 2 ! ! True\n",
+ "aa 0 3 ! ! True\n",
+ "aa 0 4 ! ! True\n",
+ "aa 0 5 ! ! True\n",
+ "aa 0 6 ! ! True\n",
+ "aa 0 7 ! ! True\n",
+ "s1 1 1 a a True\n",
+ "s2 1 2 a a True\n",
+ "s1 1 2 a a True\n",
+ "s2 1 3 a a True\n",
+ "s1 1 3 a a True\n",
+ "s2 1 4 a b True\n",
+ "s2 1 5 a a True\n",
+ "s1 1 5 a a True\n",
+ "s2 1 6 a b True\n",
+ "s2 1 7 a b True\n",
+ "s1 2 2 a a True\n",
+ "s2 2 3 a a True\n",
+ "s1 2 3 a a True\n",
+ "s2 2 4 a b True\n",
+ "s2 2 5 a a True\n",
+ "s1 2 5 a a True\n",
+ "s2 2 6 a b True\n",
+ "s2 2 7 a b True\n",
+ "s1 3 3 a a True\n",
+ "s2 3 4 a b True\n",
+ "s2 3 5 a a True\n",
+ "s1 3 5 a a True\n",
+ "s2 3 6 a b True\n",
+ "s2 3 7 a b True\n",
+ "xx 4 4 a b False\n",
+ "s1 4 5 a a True\n",
+ "s2 4 6 a b True\n",
+ "s2 4 7 a b True\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(True,\n",
+ " {(1, 1): (0, 0, 'a', 's1'),\n",
+ " (1, 2): (0, 1, 'a', 's1'),\n",
+ " (1, 3): (0, 2, 'a', 's1'),\n",
+ " (1, 4): (1, 3, 'b', 's2'),\n",
+ " (1, 5): (0, 4, 'a', 's1'),\n",
+ " (1, 6): (1, 5, 'b', 's2'),\n",
+ " (1, 7): (1, 6, 'b', 's2'),\n",
+ " (2, 2): (1, 1, 'a', 's1'),\n",
+ " (2, 3): (1, 2, 'a', 's1'),\n",
+ " (2, 4): (2, 3, 'b', 's2'),\n",
+ " (2, 5): (1, 4, 'a', 's1'),\n",
+ " (2, 6): (2, 5, 'b', 's2'),\n",
+ " (2, 7): (2, 6, 'b', 's2'),\n",
+ " (3, 3): (2, 2, 'a', 's1'),\n",
+ " (3, 4): (3, 3, 'b', 's2'),\n",
+ " (3, 5): (2, 4, 'a', 's1'),\n",
+ " (3, 6): (3, 5, 'b', 's2'),\n",
+ " (3, 7): (3, 6, 'b', 's2'),\n",
+ " (4, 5): (3, 4, 'a', 's1'),\n",
+ " (4, 6): (4, 5, 'b', 's2'),\n",
+ " (4, 7): (4, 6, 'b', 's2')})"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "is_subseq('aaaa', 'aaababb', return_backpointers=True, debug=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def is_subseq_recursive(s1, s2):\n",
+ " if not s1:\n",
+ " return True\n",
+ " elif len(s1) > len(s2):\n",
+ " return False\n",
+ " else:\n",
+ " if s1[-1] == s2[-1]:\n",
+ " return is_subseq_recursive(s1[:-1], s2[:-1]) or is_subseq_recursive(s1, s2[:-1])\n",
+ " else:\n",
+ " return is_subseq_recursive(s1, s2[:-1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "is_subseq_recursive(s1, s2t)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "False"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "is_subseq_recursive(s1, s2f)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def make_string(length, alphabet=None):\n",
+ " if not alphabet:\n",
+ " alphabet = 'abcdefgh'\n",
+ " return ''.join(random.choice(alphabet) for _ in range(length)) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def interleave(s1, s2, wander_limit=10, debug=False):\n",
+ " i1 = i2 = wander = 0\n",
+ " interleaved = []\n",
+ " while i1 <= len(s1) and i2 <= len(s2):\n",
+ " if i1 == len(s1):\n",
+ " if debug: print(i1, i2, wander, 'remaining s2', s2[i2:])\n",
+ " interleaved += s2[i2:]\n",
+ " i2 = len(s2) + 1\n",
+ " elif i2 == len(s2):\n",
+ " if debug: print(i1, i2, wander, 'remaining s1', s1[i1:])\n",
+ " interleaved += s1[i1:]\n",
+ " i1 = len(s1) + 1\n",
+ " else:\n",
+ " if wander == wander_limit:\n",
+ " step = -1\n",
+ " elif wander == -wander_limit:\n",
+ " step = +1\n",
+ " else:\n",
+ " step = random.choice([+1, -1])\n",
+ " if step == +1:\n",
+ " if debug: print(i1, i2, wander, 'adding', s1[i1])\n",
+ " interleaved += s1[i1]\n",
+ " i1 += 1\n",
+ " wander += 1\n",
+ " else:\n",
+ " if debug: print(i1, i2, wander, 'adding', s2[i2])\n",
+ " interleaved += s2[i2]\n",
+ " i2 += 1\n",
+ " wander -= 1\n",
+ " return ''.join(interleaved)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "sl1 = make_string(200)\n",
+ "sl2 = make_string(200)\n",
+ "sl3 = make_string(200)\n",
+ "sl12 = interleave(sl1, sl2)\n",
+ "sl23 = interleave(sl2, sl3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(True, False)"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "is_subseq(sl1, sl12), is_subseq(sl1, sl23)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "is_subseq('aaaa', 'aaababb', return_backpointers=True, debug=True)"
+ "is_subseq_recursive(sl1, sl12), is_subseq_recursive(sl1, sl23)"
]
},
{