4 "cell_type": "markdown",
7 "# Subsequence strings\n",
9 "Given two strings `a` and `b`, is `a` a subsequence of `b`?\n",
14 "b = \"dabaabcacb\",\n",
16 "return True : dAbAaBCaCb\n",
22 "execution_count": 54,
29 "s2t = \"dabaabcacb\"\n",
36 "cell_type": "markdown",
39 "`dp_table[i, j]` is True if first `i` characters of `s1` can be found in first `j` characters of `s2`."
106 " (4, 10): False,\n",
120 "execution_count": 4,
122 "output_type": "execute_result"
126 "dp_table = {(i, j): False\n",
127 " for i in range(len(s1)+1)\n",
128 " for j in range(len(s2)+1)}\n",
134 "execution_count": 5,
140 "def show_table(table):\n",
141 " return '\\n'.join(\n",
142 " ' '.join(str(table[i, j])[0] for j in sorted(set([k[1] for k in table])))\n",
143 " for i in sorted(set([k[0] for k in table]))) "
148 "execution_count": 6,
153 "output_type": "stream",
155 "F F F F F F F F F F F\n",
156 "F F F F F F F F F F F\n",
157 "F F F F F F F F F F F\n",
158 "F F F F F F F F F F F\n",
159 "F F F F F F F F F F F\n",
160 "F F F F F F F F F F F\n"
165 "print(show_table(dp_table))"
170 "execution_count": 47,
175 "output_type": "stream",
210 "xx 4 4 a b False\n",
224 "{(1, 1): (0, 0, 'a', 's1'),\n",
225 " (1, 2): (0, 1, 'a', 's1'),\n",
226 " (1, 3): (0, 2, 'a', 's1'),\n",
227 " (1, 4): (1, 3, 'b', 's2'),\n",
228 " (1, 5): (0, 4, 'a', 's1'),\n",
229 " (1, 6): (1, 5, 'b', 's2'),\n",
230 " (1, 7): (1, 6, 'b', 's2'),\n",
231 " (2, 2): (1, 1, 'a', 's1'),\n",
232 " (2, 3): (1, 2, 'a', 's1'),\n",
233 " (2, 4): (2, 3, 'b', 's2'),\n",
234 " (2, 5): (1, 4, 'a', 's1'),\n",
235 " (2, 6): (2, 5, 'b', 's2'),\n",
236 " (2, 7): (2, 6, 'b', 's2'),\n",
237 " (3, 3): (2, 2, 'a', 's1'),\n",
238 " (3, 4): (3, 3, 'b', 's2'),\n",
239 " (3, 5): (2, 4, 'a', 's1'),\n",
240 " (3, 6): (3, 5, 'b', 's2'),\n",
241 " (3, 7): (3, 6, 'b', 's2'),\n",
242 " (4, 5): (3, 4, 'a', 's1'),\n",
243 " (4, 6): (4, 5, 'b', 's2'),\n",
244 " (4, 7): (4, 6, 'b', 's2')}"
247 "execution_count": 47,
249 "output_type": "execute_result"
260 "dp_table = {(i, j): False\n",
261 " for i in range(len(s1)+1)\n",
262 " for j in range(len(s2)+1)}\n",
264 "backpointers = {}\n",
266 "for i in range(len(s1)+1):\n",
267 " for j in range(i, len(s2)+1):\n",
268 " if i == 0 or j == 0:\n",
269 " dp_table[i, j] = True\n",
270 " print('aa', i, j, '!', '!', dp_table[i, j])\n",
272 " # extend by character from s2\n",
273 " if dp_table[i, j-1]:\n",
274 " dp_table[i, j] = True\n",
275 " backpointers[i, j] = (i, j-1, s2[j-1], 's2')\n",
276 " print('s2', i, j, s1[i-1], s2[j-1], dp_table[i, j]) \n",
277 " # extend by character from s1\n",
278 " if dp_table[i-1, j-1] and s1[i-1] == s2[j-1]:\n",
279 " dp_table[i, j] = True\n",
280 " backpointers[i, j] = (i-1, j-1, s1[i-1], 's1') \n",
281 " print('s1', i, j, s1[i-1], s2[j-1], dp_table[i, j])\n",
282 " if not dp_table[i, j]:\n",
283 " print('xx', i, j, s1[i-1], s2[j-1], dp_table[i, j])\n",
285 "print(show_table(dp_table))\n",
291 "execution_count": 59,
297 "def show_backtrace(bps):\n",
298 " i = max([0] + [k[0] for k in bps])\n",
299 " j = max([0] + [k[1] for k in bps])\n",
301 " if (i, j) in bps:\n",
302 " while i != 0 or j != 0:\n",
303 " if bps[i, j][3] == 's1':\n",
304 " chars += bps[i, j][2].upper()\n",
306 " chars += bps[i, j][2]\n",
307 " i, j = bps[i, j][0], bps[i, j][1] \n",
308 " return ''.join(list(reversed(chars)))\n",
315 "execution_count": 60,
324 "execution_count": 60,
326 "output_type": "execute_result"
330 "show_backtrace(backpointers)"
335 "execution_count": 49,
339 "def is_subseq(seq1, seq2, return_backpointers=False, debug=False):\n",
340 " \"\"\"Return true if seq1 is a subsequence of seq2.\n",
341 " If return_backpointers, also return the set of backpointers to\n",
342 " reconstruct the subsequence\"\"\"\n",
344 " # dp_table[i, j] is True if first i characters of seq1 can\n",
345 " # be found in the first j characters of seq2\n",
347 " dp_table = {(i, j): False\n",
348 " for i in range(len(seq1)+1)\n",
349 " for j in range(len(seq2)+1)}\n",
351 " backpointers = {}\n",
353 " for i in range(len(seq1)+1):\n",
354 " for j in range(i, len(seq2)+1):\n",
355 " if i == 0 or j == 0:\n",
356 " dp_table[i, j] = True\n",
357 " if debug: print('aa', i, j, '!', '!', dp_table[i, j])\n",
359 " # extend by character from s2\n",
360 " if dp_table[i, j-1]:\n",
361 " dp_table[i, j] = True\n",
362 " backpointers[i, j] = (i, j-1, seq2[j-1], 's2')\n",
363 " if debug: print('s2', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n",
364 " # extend by character from s1\n",
365 " if dp_table[i-1, j-1] and seq1[i-1] == seq2[j-1]:\n",
366 " dp_table[i, j] = True\n",
367 " backpointers[i, j] = (i-1, j-1, seq1[i-1], 's1') \n",
368 " if debug: print('s1', i, j, seq1[i-1], seq2[j-1], dp_table[i, j])\n",
369 " if not dp_table[i, j]:\n",
370 " if debug: print('xx', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n",
372 " if return_backpointers:\n",
373 " return dp_table[len(seq1), len(seq2)], backpointers\n",
375 " return dp_table[len(seq1), len(seq2)]"
380 "execution_count": 39,
389 "execution_count": 39,
391 "output_type": "execute_result"
400 "execution_count": 55,
409 "execution_count": 55,
411 "output_type": "execute_result"
415 "tf, bps = is_subseq(s1, s2t, return_backpointers=True)\n",
421 "execution_count": 56,
430 "execution_count": 56,
432 "output_type": "execute_result"
436 "show_backtrace(bps)"
441 "execution_count": 42,
450 "execution_count": 42,
452 "output_type": "execute_result"
461 "execution_count": 43,
466 "output_type": "stream",
500 "xx 4 4 a b False\n",
510 " {(1, 1): (0, 1, 'a', 's1'),\n",
511 " (1, 2): (0, 2, 'a', 's1'),\n",
512 " (1, 3): (0, 3, 'a', 's1'),\n",
513 " (1, 4): (1, 3, 'b', 's2'),\n",
514 " (1, 5): (0, 5, 'a', 's1'),\n",
515 " (1, 6): (1, 5, 'b', 's2'),\n",
516 " (1, 7): (1, 6, 'b', 's2'),\n",
517 " (2, 2): (1, 2, 'a', 's1'),\n",
518 " (2, 3): (1, 3, 'a', 's1'),\n",
519 " (2, 4): (2, 3, 'b', 's2'),\n",
520 " (2, 5): (1, 5, 'a', 's1'),\n",
521 " (2, 6): (2, 5, 'b', 's2'),\n",
522 " (2, 7): (2, 6, 'b', 's2'),\n",
523 " (3, 3): (2, 3, 'a', 's1'),\n",
524 " (3, 4): (3, 3, 'b', 's2'),\n",
525 " (3, 5): (2, 5, 'a', 's1'),\n",
526 " (3, 6): (3, 5, 'b', 's2'),\n",
527 " (3, 7): (3, 6, 'b', 's2'),\n",
528 " (4, 5): (3, 5, 'a', 's1'),\n",
529 " (4, 6): (4, 5, 'b', 's2'),\n",
530 " (4, 7): (4, 6, 'b', 's2')})"
533 "execution_count": 43,
535 "output_type": "execute_result"
539 "is_subseq('aaaa', 'aaababb', return_backpointers=True, debug=True)"
544 "execution_count": null,
554 "display_name": "Python 3",
555 "language": "python",
563 "file_extension": ".py",
564 "mimetype": "text/x-python",
566 "nbconvert_exporter": "python",
567 "pygments_lexer": "ipython3",