Added recursive solutions
[ou-summer-of-code-2017.git] / 09-resolving-the-bill / subsequence.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "# Subsequence strings\n",
8 "\n",
9 "Given two strings `a` and `b`, is `a` a subsequence of `b`?\n",
10 "\n",
11 "For example,\n",
12 "Given:\n",
13 "a = \"aabcc\",\n",
14 "b = \"dabaabcacb\",\n",
15 "\n",
16 "return True : dAbAaBCaCb\n",
17 "\n"
18 ]
19 },
20 {
21 "cell_type": "code",
22 "execution_count": 33,
23 "metadata": {
24 "collapsed": true
25 },
26 "outputs": [],
27 "source": [
28 "import random"
29 ]
30 },
31 {
32 "cell_type": "code",
33 "execution_count": 3,
34 "metadata": {
35 "collapsed": true
36 },
37 "outputs": [],
38 "source": [
39 "s1 = \"aabcc\"\n",
40 "s2t = \"dabaabcacb\"\n",
41 "s2f = \"dabacc\"\n",
42 "\n",
43 "s2 = s2t"
44 ]
45 },
46 {
47 "cell_type": "markdown",
48 "metadata": {},
49 "source": [
50 "`dp_table[i, j]` is True if first `i` characters of `s1` can be found in first `j` characters of `s2`."
51 ]
52 },
53 {
54 "cell_type": "code",
55 "execution_count": 4,
56 "metadata": {
57 "scrolled": true
58 },
59 "outputs": [
60 {
61 "data": {
62 "text/plain": [
63 "{(0, 0): False,\n",
64 " (0, 1): False,\n",
65 " (0, 2): False,\n",
66 " (0, 3): False,\n",
67 " (0, 4): False,\n",
68 " (0, 5): False,\n",
69 " (0, 6): False,\n",
70 " (0, 7): False,\n",
71 " (0, 8): False,\n",
72 " (0, 9): False,\n",
73 " (0, 10): False,\n",
74 " (1, 0): False,\n",
75 " (1, 1): False,\n",
76 " (1, 2): False,\n",
77 " (1, 3): False,\n",
78 " (1, 4): False,\n",
79 " (1, 5): False,\n",
80 " (1, 6): False,\n",
81 " (1, 7): False,\n",
82 " (1, 8): False,\n",
83 " (1, 9): False,\n",
84 " (1, 10): False,\n",
85 " (2, 0): False,\n",
86 " (2, 1): False,\n",
87 " (2, 2): False,\n",
88 " (2, 3): False,\n",
89 " (2, 4): False,\n",
90 " (2, 5): False,\n",
91 " (2, 6): False,\n",
92 " (2, 7): False,\n",
93 " (2, 8): False,\n",
94 " (2, 9): False,\n",
95 " (2, 10): False,\n",
96 " (3, 0): False,\n",
97 " (3, 1): False,\n",
98 " (3, 2): False,\n",
99 " (3, 3): False,\n",
100 " (3, 4): False,\n",
101 " (3, 5): False,\n",
102 " (3, 6): False,\n",
103 " (3, 7): False,\n",
104 " (3, 8): False,\n",
105 " (3, 9): False,\n",
106 " (3, 10): False,\n",
107 " (4, 0): False,\n",
108 " (4, 1): False,\n",
109 " (4, 2): False,\n",
110 " (4, 3): False,\n",
111 " (4, 4): False,\n",
112 " (4, 5): False,\n",
113 " (4, 6): False,\n",
114 " (4, 7): False,\n",
115 " (4, 8): False,\n",
116 " (4, 9): False,\n",
117 " (4, 10): False,\n",
118 " (5, 0): False,\n",
119 " (5, 1): False,\n",
120 " (5, 2): False,\n",
121 " (5, 3): False,\n",
122 " (5, 4): False,\n",
123 " (5, 5): False,\n",
124 " (5, 6): False,\n",
125 " (5, 7): False,\n",
126 " (5, 8): False,\n",
127 " (5, 9): False,\n",
128 " (5, 10): False}"
129 ]
130 },
131 "execution_count": 4,
132 "metadata": {},
133 "output_type": "execute_result"
134 }
135 ],
136 "source": [
137 "dp_table = {(i, j): False\n",
138 " for i in range(len(s1)+1)\n",
139 " for j in range(len(s2)+1)}\n",
140 "dp_table"
141 ]
142 },
143 {
144 "cell_type": "code",
145 "execution_count": 5,
146 "metadata": {
147 "collapsed": true
148 },
149 "outputs": [],
150 "source": [
151 "def show_table(table):\n",
152 " return '\\n'.join(\n",
153 " ' '.join(str(table[i, j])[0] for j in sorted(set([k[1] for k in table])))\n",
154 " for i in sorted(set([k[0] for k in table]))) "
155 ]
156 },
157 {
158 "cell_type": "code",
159 "execution_count": 6,
160 "metadata": {},
161 "outputs": [
162 {
163 "name": "stdout",
164 "output_type": "stream",
165 "text": [
166 "F F F F F F F F F F F\n",
167 "F F F F F F F F F F F\n",
168 "F F F F F F F F F F F\n",
169 "F F F F F F F F F F F\n",
170 "F F F F F F F F F F F\n",
171 "F F F F F F F F F F F\n"
172 ]
173 }
174 ],
175 "source": [
176 "print(show_table(dp_table))"
177 ]
178 },
179 {
180 "cell_type": "code",
181 "execution_count": 7,
182 "metadata": {},
183 "outputs": [
184 {
185 "name": "stdout",
186 "output_type": "stream",
187 "text": [
188 "aaaa aaababb\n",
189 "aa 0 0 ! ! True\n",
190 "aa 0 1 ! ! True\n",
191 "aa 0 2 ! ! True\n",
192 "aa 0 3 ! ! True\n",
193 "aa 0 4 ! ! True\n",
194 "aa 0 5 ! ! True\n",
195 "aa 0 6 ! ! True\n",
196 "aa 0 7 ! ! True\n",
197 "s1 1 1 a a True\n",
198 "s2 1 2 a a True\n",
199 "s1 1 2 a a True\n",
200 "s2 1 3 a a True\n",
201 "s1 1 3 a a True\n",
202 "s2 1 4 a b True\n",
203 "s2 1 5 a a True\n",
204 "s1 1 5 a a True\n",
205 "s2 1 6 a b True\n",
206 "s2 1 7 a b True\n",
207 "s1 2 2 a a True\n",
208 "s2 2 3 a a True\n",
209 "s1 2 3 a a True\n",
210 "s2 2 4 a b True\n",
211 "s2 2 5 a a True\n",
212 "s1 2 5 a a True\n",
213 "s2 2 6 a b True\n",
214 "s2 2 7 a b True\n",
215 "s1 3 3 a a True\n",
216 "s2 3 4 a b True\n",
217 "s2 3 5 a a True\n",
218 "s1 3 5 a a True\n",
219 "s2 3 6 a b True\n",
220 "s2 3 7 a b True\n",
221 "xx 4 4 a b False\n",
222 "s1 4 5 a a True\n",
223 "s2 4 6 a b True\n",
224 "s2 4 7 a b True\n",
225 "T T T T T T T T\n",
226 "F T T T T T T T\n",
227 "F F T T T T T T\n",
228 "F F F T T T T T\n",
229 "F F F F F T T T\n"
230 ]
231 },
232 {
233 "data": {
234 "text/plain": [
235 "{(1, 1): (0, 0, 'a', 's1'),\n",
236 " (1, 2): (0, 1, 'a', 's1'),\n",
237 " (1, 3): (0, 2, 'a', 's1'),\n",
238 " (1, 4): (1, 3, 'b', 's2'),\n",
239 " (1, 5): (0, 4, 'a', 's1'),\n",
240 " (1, 6): (1, 5, 'b', 's2'),\n",
241 " (1, 7): (1, 6, 'b', 's2'),\n",
242 " (2, 2): (1, 1, 'a', 's1'),\n",
243 " (2, 3): (1, 2, 'a', 's1'),\n",
244 " (2, 4): (2, 3, 'b', 's2'),\n",
245 " (2, 5): (1, 4, 'a', 's1'),\n",
246 " (2, 6): (2, 5, 'b', 's2'),\n",
247 " (2, 7): (2, 6, 'b', 's2'),\n",
248 " (3, 3): (2, 2, 'a', 's1'),\n",
249 " (3, 4): (3, 3, 'b', 's2'),\n",
250 " (3, 5): (2, 4, 'a', 's1'),\n",
251 " (3, 6): (3, 5, 'b', 's2'),\n",
252 " (3, 7): (3, 6, 'b', 's2'),\n",
253 " (4, 5): (3, 4, 'a', 's1'),\n",
254 " (4, 6): (4, 5, 'b', 's2'),\n",
255 " (4, 7): (4, 6, 'b', 's2')}"
256 ]
257 },
258 "execution_count": 7,
259 "metadata": {},
260 "output_type": "execute_result"
261 }
262 ],
263 "source": [
264 "s2 = s2f\n",
265 "\n",
266 "s1 = 'aaaa'\n",
267 "s2 = 'aaababb'\n",
268 "\n",
269 "print(s1, s2)\n",
270 "\n",
271 "dp_table = {(i, j): False\n",
272 " for i in range(len(s1)+1)\n",
273 " for j in range(len(s2)+1)}\n",
274 "\n",
275 "backpointers = {}\n",
276 "\n",
277 "for i in range(len(s1)+1):\n",
278 " for j in range(i, len(s2)+1):\n",
279 " if i == 0 or j == 0:\n",
280 " dp_table[i, j] = True\n",
281 " print('aa', i, j, '!', '!', dp_table[i, j])\n",
282 " else:\n",
283 " # extend by character from s2\n",
284 " if dp_table[i, j-1]:\n",
285 " dp_table[i, j] = True\n",
286 " backpointers[i, j] = (i, j-1, s2[j-1], 's2')\n",
287 " print('s2', i, j, s1[i-1], s2[j-1], dp_table[i, j]) \n",
288 " # extend by character from s1\n",
289 " if dp_table[i-1, j-1] and s1[i-1] == s2[j-1]:\n",
290 " dp_table[i, j] = True\n",
291 " backpointers[i, j] = (i-1, j-1, s1[i-1], 's1') \n",
292 " print('s1', i, j, s1[i-1], s2[j-1], dp_table[i, j])\n",
293 " if not dp_table[i, j]:\n",
294 " print('xx', i, j, s1[i-1], s2[j-1], dp_table[i, j])\n",
295 "\n",
296 "print(show_table(dp_table))\n",
297 "backpointers"
298 ]
299 },
300 {
301 "cell_type": "code",
302 "execution_count": 8,
303 "metadata": {
304 "collapsed": true
305 },
306 "outputs": [],
307 "source": [
308 "def show_backtrace(bps):\n",
309 " i = max([0] + [k[0] for k in bps])\n",
310 " j = max([0] + [k[1] for k in bps])\n",
311 " chars = ''\n",
312 " if (i, j) in bps:\n",
313 " while i != 0 and j != 0:\n",
314 " if bps[i, j][3] == 's1':\n",
315 " chars += bps[i, j][2].upper()\n",
316 " else:\n",
317 " chars += bps[i, j][2]\n",
318 " i, j = bps[i, j][0], bps[i, j][1] \n",
319 " return ''.join(list(reversed(chars)))\n",
320 " else:\n",
321 " return ''"
322 ]
323 },
324 {
325 "cell_type": "code",
326 "execution_count": 9,
327 "metadata": {},
328 "outputs": [
329 {
330 "data": {
331 "text/plain": [
332 "'AAAbAbb'"
333 ]
334 },
335 "execution_count": 9,
336 "metadata": {},
337 "output_type": "execute_result"
338 }
339 ],
340 "source": [
341 "show_backtrace(backpointers)"
342 ]
343 },
344 {
345 "cell_type": "code",
346 "execution_count": 11,
347 "metadata": {
348 "collapsed": true
349 },
350 "outputs": [],
351 "source": [
352 "def is_subseq(seq1, seq2, return_backpointers=False, return_table=False, debug=False):\n",
353 " \"\"\"Return true if seq1 is a subsequence of seq2.\n",
354 " If return_backpointers, also return the set of backpointers to\n",
355 " reconstruct the subsequence\"\"\"\n",
356 " \n",
357 " # dp_table[i, j] is True if first i characters of seq1 can\n",
358 " # be found in the first j characters of seq2\n",
359 " \n",
360 " dp_table = {(i, j): False\n",
361 " for i in range(len(seq1)+1)\n",
362 " for j in range(len(seq2)+1)}\n",
363 "\n",
364 " backpointers = {}\n",
365 " \n",
366 " for i in range(len(seq1)+1):\n",
367 " for j in range(i, len(seq2)+1):\n",
368 " if i == 0 or j == 0:\n",
369 " dp_table[i, j] = True\n",
370 " if debug: print('aa', i, j, '!', '!', dp_table[i, j])\n",
371 " else:\n",
372 " # extend by character from s2\n",
373 " if dp_table[i, j-1]:\n",
374 " dp_table[i, j] = True\n",
375 " backpointers[i, j] = (i, j-1, seq2[j-1], 's2')\n",
376 " if debug: print('s2', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n",
377 " # extend by character from s1\n",
378 " if dp_table[i-1, j-1] and seq1[i-1] == seq2[j-1]:\n",
379 " dp_table[i, j] = True\n",
380 " backpointers[i, j] = (i-1, j-1, seq1[i-1], 's1') \n",
381 " if debug: print('s1', i, j, seq1[i-1], seq2[j-1], dp_table[i, j])\n",
382 " if not dp_table[i, j]:\n",
383 " if debug: print('xx', i, j, seq1[i-1], seq2[j-1], dp_table[i, j]) \n",
384 " \n",
385 "# if return_backpointers:\n",
386 "# return dp_table[len(seq1), len(seq2)], backpointers\n",
387 "# else:\n",
388 "# return dp_table[len(seq1), len(seq2)]\n",
389 " \n",
390 " if return_backpointers or return_table:\n",
391 " retval = [dp_table[len(seq1), len(seq2)]]\n",
392 " if return_backpointers:\n",
393 " retval += [backpointers]\n",
394 " if return_table:\n",
395 " retval += [dp_table]\n",
396 " return tuple(retval)\n",
397 " else:\n",
398 " return dp_table[len(seq1), len(seq2)]"
399 ]
400 },
401 {
402 "cell_type": "code",
403 "execution_count": 12,
404 "metadata": {
405 "scrolled": true
406 },
407 "outputs": [
408 {
409 "name": "stdout",
410 "output_type": "stream",
411 "text": [
412 "aa 0 0 ! ! True\n",
413 "aa 0 1 ! ! True\n",
414 "aa 0 2 ! ! True\n",
415 "aa 0 3 ! ! True\n",
416 "aa 0 4 ! ! True\n",
417 "aa 0 5 ! ! True\n",
418 "aa 0 6 ! ! True\n",
419 "aa 0 7 ! ! True\n",
420 "aa 0 8 ! ! True\n",
421 "aa 0 9 ! ! True\n",
422 "aa 0 10 ! ! True\n",
423 "xx 1 1 a d False\n",
424 "s1 1 2 a a True\n",
425 "s2 1 3 a b True\n",
426 "s2 1 4 a a True\n",
427 "s1 1 4 a a True\n",
428 "s2 1 5 a a True\n",
429 "s1 1 5 a a True\n",
430 "s2 1 6 a b True\n",
431 "s2 1 7 a c True\n",
432 "s2 1 8 a a True\n",
433 "s1 1 8 a a True\n",
434 "s2 1 9 a c True\n",
435 "s2 1 10 a b True\n",
436 "xx 2 2 a a False\n",
437 "xx 2 3 a b False\n",
438 "s1 2 4 a a True\n",
439 "s2 2 5 a a True\n",
440 "s1 2 5 a a True\n",
441 "s2 2 6 a b True\n",
442 "s2 2 7 a c True\n",
443 "s2 2 8 a a True\n",
444 "s1 2 8 a a True\n",
445 "s2 2 9 a c True\n",
446 "s2 2 10 a b True\n",
447 "xx 3 3 a b False\n",
448 "xx 3 4 a a False\n",
449 "s1 3 5 a a True\n",
450 "s2 3 6 a b True\n",
451 "s2 3 7 a c True\n",
452 "s2 3 8 a a True\n",
453 "s1 3 8 a a True\n",
454 "s2 3 9 a c True\n",
455 "s2 3 10 a b True\n",
456 "xx 4 4 a a False\n",
457 "xx 4 5 a a False\n",
458 "xx 4 6 a b False\n",
459 "xx 4 7 a c False\n",
460 "s1 4 8 a a True\n",
461 "s2 4 9 a c True\n",
462 "s2 4 10 a b True\n"
463 ]
464 },
465 {
466 "data": {
467 "text/plain": [
468 "True"
469 ]
470 },
471 "execution_count": 12,
472 "metadata": {},
473 "output_type": "execute_result"
474 }
475 ],
476 "source": [
477 "is_subseq(s1, s2t, debug=True)"
478 ]
479 },
480 {
481 "cell_type": "code",
482 "execution_count": 13,
483 "metadata": {
484 "collapsed": true
485 },
486 "outputs": [],
487 "source": [
488 "def show_annotated_table(table, bps):\n",
489 " return '\\n'.join(' '.join('.' if (i, j) not in bps else bps[i, j][2] if table[i, j] else '.' for j in sorted(set([k[1] for k in table])))\n",
490 " for i in sorted(set([k[0] for k in table])))"
491 ]
492 },
493 {
494 "cell_type": "code",
495 "execution_count": 14,
496 "metadata": {},
497 "outputs": [
498 {
499 "data": {
500 "text/plain": [
501 "True"
502 ]
503 },
504 "execution_count": 14,
505 "metadata": {},
506 "output_type": "execute_result"
507 }
508 ],
509 "source": [
510 "tf, bps, tb = is_subseq(s1, s2t, return_backpointers=True, return_table=True)\n",
511 "tf"
512 ]
513 },
514 {
515 "cell_type": "code",
516 "execution_count": 15,
517 "metadata": {},
518 "outputs": [
519 {
520 "data": {
521 "text/plain": [
522 "'AbAAbcAcb'"
523 ]
524 },
525 "execution_count": 15,
526 "metadata": {},
527 "output_type": "execute_result"
528 }
529 ],
530 "source": [
531 "show_backtrace(bps)"
532 ]
533 },
534 {
535 "cell_type": "code",
536 "execution_count": 16,
537 "metadata": {},
538 "outputs": [
539 {
540 "name": "stdout",
541 "output_type": "stream",
542 "text": [
543 "T T T T T T T T T T T\n",
544 "F F T T T T T T T T T\n",
545 "F F F F T T T T T T T\n",
546 "F F F F F T T T T T T\n",
547 "F F F F F F F F T T T\n"
548 ]
549 }
550 ],
551 "source": [
552 "print(show_table(tb))"
553 ]
554 },
555 {
556 "cell_type": "code",
557 "execution_count": 17,
558 "metadata": {},
559 "outputs": [
560 {
561 "name": "stdout",
562 "output_type": "stream",
563 "text": [
564 ". . . . . . . . . . .\n",
565 ". . a b a a b c a c b\n",
566 ". . . . a a b c a c b\n",
567 ". . . . . a b c a c b\n",
568 ". . . . . . . . a c b\n"
569 ]
570 }
571 ],
572 "source": [
573 "print(show_annotated_table(tb, bps))"
574 ]
575 },
576 {
577 "cell_type": "code",
578 "execution_count": 18,
579 "metadata": {},
580 "outputs": [
581 {
582 "data": {
583 "text/plain": [
584 "False"
585 ]
586 },
587 "execution_count": 18,
588 "metadata": {},
589 "output_type": "execute_result"
590 }
591 ],
592 "source": [
593 "len(show_backtrace(bps)) == len(s2t)"
594 ]
595 },
596 {
597 "cell_type": "code",
598 "execution_count": 19,
599 "metadata": {},
600 "outputs": [
601 {
602 "data": {
603 "text/plain": [
604 "('aaaa', 'dabaabcacb')"
605 ]
606 },
607 "execution_count": 19,
608 "metadata": {},
609 "output_type": "execute_result"
610 }
611 ],
612 "source": [
613 "s1, s2t"
614 ]
615 },
616 {
617 "cell_type": "code",
618 "execution_count": 20,
619 "metadata": {},
620 "outputs": [
621 {
622 "data": {
623 "text/plain": [
624 "{(1, 2): (0, 1, 'a', 's1'),\n",
625 " (1, 3): (1, 2, 'b', 's2'),\n",
626 " (1, 4): (0, 3, 'a', 's1'),\n",
627 " (1, 5): (0, 4, 'a', 's1'),\n",
628 " (1, 6): (1, 5, 'b', 's2'),\n",
629 " (1, 7): (1, 6, 'c', 's2'),\n",
630 " (1, 8): (0, 7, 'a', 's1'),\n",
631 " (1, 9): (1, 8, 'c', 's2'),\n",
632 " (1, 10): (1, 9, 'b', 's2'),\n",
633 " (2, 4): (1, 3, 'a', 's1'),\n",
634 " (2, 5): (1, 4, 'a', 's1'),\n",
635 " (2, 6): (2, 5, 'b', 's2'),\n",
636 " (2, 7): (2, 6, 'c', 's2'),\n",
637 " (2, 8): (1, 7, 'a', 's1'),\n",
638 " (2, 9): (2, 8, 'c', 's2'),\n",
639 " (2, 10): (2, 9, 'b', 's2'),\n",
640 " (3, 5): (2, 4, 'a', 's1'),\n",
641 " (3, 6): (3, 5, 'b', 's2'),\n",
642 " (3, 7): (3, 6, 'c', 's2'),\n",
643 " (3, 8): (2, 7, 'a', 's1'),\n",
644 " (3, 9): (3, 8, 'c', 's2'),\n",
645 " (3, 10): (3, 9, 'b', 's2'),\n",
646 " (4, 8): (3, 7, 'a', 's1'),\n",
647 " (4, 9): (4, 8, 'c', 's2'),\n",
648 " (4, 10): (4, 9, 'b', 's2')}"
649 ]
650 },
651 "execution_count": 20,
652 "metadata": {},
653 "output_type": "execute_result"
654 }
655 ],
656 "source": [
657 "bps"
658 ]
659 },
660 {
661 "cell_type": "code",
662 "execution_count": 21,
663 "metadata": {},
664 "outputs": [
665 {
666 "data": {
667 "text/plain": [
668 "False"
669 ]
670 },
671 "execution_count": 21,
672 "metadata": {},
673 "output_type": "execute_result"
674 }
675 ],
676 "source": [
677 "is_subseq(s1, s2f)"
678 ]
679 },
680 {
681 "cell_type": "code",
682 "execution_count": 22,
683 "metadata": {
684 "scrolled": true
685 },
686 "outputs": [
687 {
688 "name": "stdout",
689 "output_type": "stream",
690 "text": [
691 "aa 0 0 ! ! True\n",
692 "aa 0 1 ! ! True\n",
693 "aa 0 2 ! ! True\n",
694 "aa 0 3 ! ! True\n",
695 "aa 0 4 ! ! True\n",
696 "aa 0 5 ! ! True\n",
697 "aa 0 6 ! ! True\n",
698 "aa 0 7 ! ! True\n",
699 "s1 1 1 a a True\n",
700 "s2 1 2 a a True\n",
701 "s1 1 2 a a True\n",
702 "s2 1 3 a a True\n",
703 "s1 1 3 a a True\n",
704 "s2 1 4 a b True\n",
705 "s2 1 5 a a True\n",
706 "s1 1 5 a a True\n",
707 "s2 1 6 a b True\n",
708 "s2 1 7 a b True\n",
709 "s1 2 2 a a True\n",
710 "s2 2 3 a a True\n",
711 "s1 2 3 a a True\n",
712 "s2 2 4 a b True\n",
713 "s2 2 5 a a True\n",
714 "s1 2 5 a a True\n",
715 "s2 2 6 a b True\n",
716 "s2 2 7 a b True\n",
717 "s1 3 3 a a True\n",
718 "s2 3 4 a b True\n",
719 "s2 3 5 a a True\n",
720 "s1 3 5 a a True\n",
721 "s2 3 6 a b True\n",
722 "s2 3 7 a b True\n",
723 "xx 4 4 a b False\n",
724 "s1 4 5 a a True\n",
725 "s2 4 6 a b True\n",
726 "s2 4 7 a b True\n"
727 ]
728 },
729 {
730 "data": {
731 "text/plain": [
732 "(True,\n",
733 " {(1, 1): (0, 0, 'a', 's1'),\n",
734 " (1, 2): (0, 1, 'a', 's1'),\n",
735 " (1, 3): (0, 2, 'a', 's1'),\n",
736 " (1, 4): (1, 3, 'b', 's2'),\n",
737 " (1, 5): (0, 4, 'a', 's1'),\n",
738 " (1, 6): (1, 5, 'b', 's2'),\n",
739 " (1, 7): (1, 6, 'b', 's2'),\n",
740 " (2, 2): (1, 1, 'a', 's1'),\n",
741 " (2, 3): (1, 2, 'a', 's1'),\n",
742 " (2, 4): (2, 3, 'b', 's2'),\n",
743 " (2, 5): (1, 4, 'a', 's1'),\n",
744 " (2, 6): (2, 5, 'b', 's2'),\n",
745 " (2, 7): (2, 6, 'b', 's2'),\n",
746 " (3, 3): (2, 2, 'a', 's1'),\n",
747 " (3, 4): (3, 3, 'b', 's2'),\n",
748 " (3, 5): (2, 4, 'a', 's1'),\n",
749 " (3, 6): (3, 5, 'b', 's2'),\n",
750 " (3, 7): (3, 6, 'b', 's2'),\n",
751 " (4, 5): (3, 4, 'a', 's1'),\n",
752 " (4, 6): (4, 5, 'b', 's2'),\n",
753 " (4, 7): (4, 6, 'b', 's2')})"
754 ]
755 },
756 "execution_count": 22,
757 "metadata": {},
758 "output_type": "execute_result"
759 }
760 ],
761 "source": [
762 "is_subseq('aaaa', 'aaababb', return_backpointers=True, debug=True)"
763 ]
764 },
765 {
766 "cell_type": "code",
767 "execution_count": 27,
768 "metadata": {
769 "collapsed": true
770 },
771 "outputs": [],
772 "source": [
773 "def is_subseq_recursive(s1, s2):\n",
774 " if not s1:\n",
775 " return True\n",
776 " elif len(s1) > len(s2):\n",
777 " return False\n",
778 " else:\n",
779 " if s1[-1] == s2[-1]:\n",
780 " return is_subseq_recursive(s1[:-1], s2[:-1]) or is_subseq_recursive(s1, s2[:-1])\n",
781 " else:\n",
782 " return is_subseq_recursive(s1, s2[:-1])"
783 ]
784 },
785 {
786 "cell_type": "code",
787 "execution_count": 29,
788 "metadata": {},
789 "outputs": [
790 {
791 "data": {
792 "text/plain": [
793 "True"
794 ]
795 },
796 "execution_count": 29,
797 "metadata": {},
798 "output_type": "execute_result"
799 }
800 ],
801 "source": [
802 "is_subseq_recursive(s1, s2t)"
803 ]
804 },
805 {
806 "cell_type": "code",
807 "execution_count": 28,
808 "metadata": {},
809 "outputs": [
810 {
811 "data": {
812 "text/plain": [
813 "False"
814 ]
815 },
816 "execution_count": 28,
817 "metadata": {},
818 "output_type": "execute_result"
819 }
820 ],
821 "source": [
822 "is_subseq_recursive(s1, s2f)"
823 ]
824 },
825 {
826 "cell_type": "code",
827 "execution_count": 30,
828 "metadata": {
829 "collapsed": true
830 },
831 "outputs": [],
832 "source": [
833 "def make_string(length, alphabet=None):\n",
834 " if not alphabet:\n",
835 " alphabet = 'abcdefgh'\n",
836 " return ''.join(random.choice(alphabet) for _ in range(length)) "
837 ]
838 },
839 {
840 "cell_type": "code",
841 "execution_count": 31,
842 "metadata": {
843 "collapsed": true
844 },
845 "outputs": [],
846 "source": [
847 "def interleave(s1, s2, wander_limit=10, debug=False):\n",
848 " i1 = i2 = wander = 0\n",
849 " interleaved = []\n",
850 " while i1 <= len(s1) and i2 <= len(s2):\n",
851 " if i1 == len(s1):\n",
852 " if debug: print(i1, i2, wander, 'remaining s2', s2[i2:])\n",
853 " interleaved += s2[i2:]\n",
854 " i2 = len(s2) + 1\n",
855 " elif i2 == len(s2):\n",
856 " if debug: print(i1, i2, wander, 'remaining s1', s1[i1:])\n",
857 " interleaved += s1[i1:]\n",
858 " i1 = len(s1) + 1\n",
859 " else:\n",
860 " if wander == wander_limit:\n",
861 " step = -1\n",
862 " elif wander == -wander_limit:\n",
863 " step = +1\n",
864 " else:\n",
865 " step = random.choice([+1, -1])\n",
866 " if step == +1:\n",
867 " if debug: print(i1, i2, wander, 'adding', s1[i1])\n",
868 " interleaved += s1[i1]\n",
869 " i1 += 1\n",
870 " wander += 1\n",
871 " else:\n",
872 " if debug: print(i1, i2, wander, 'adding', s2[i2])\n",
873 " interleaved += s2[i2]\n",
874 " i2 += 1\n",
875 " wander -= 1\n",
876 " return ''.join(interleaved)"
877 ]
878 },
879 {
880 "cell_type": "code",
881 "execution_count": 40,
882 "metadata": {
883 "collapsed": true
884 },
885 "outputs": [],
886 "source": [
887 "sl1 = make_string(200)\n",
888 "sl2 = make_string(200)\n",
889 "sl3 = make_string(200)\n",
890 "sl12 = interleave(sl1, sl2)\n",
891 "sl23 = interleave(sl2, sl3)"
892 ]
893 },
894 {
895 "cell_type": "code",
896 "execution_count": 41,
897 "metadata": {},
898 "outputs": [
899 {
900 "data": {
901 "text/plain": [
902 "(True, False)"
903 ]
904 },
905 "execution_count": 41,
906 "metadata": {},
907 "output_type": "execute_result"
908 }
909 ],
910 "source": [
911 "is_subseq(sl1, sl12), is_subseq(sl1, sl23)"
912 ]
913 },
914 {
915 "cell_type": "code",
916 "execution_count": null,
917 "metadata": {},
918 "outputs": [],
919 "source": [
920 "is_subseq_recursive(sl1, sl12), is_subseq_recursive(sl1, sl23)"
921 ]
922 },
923 {
924 "cell_type": "code",
925 "execution_count": null,
926 "metadata": {
927 "collapsed": true
928 },
929 "outputs": [],
930 "source": []
931 }
932 ],
933 "metadata": {
934 "kernelspec": {
935 "display_name": "Python 3",
936 "language": "python",
937 "name": "python3"
938 },
939 "language_info": {
940 "codemirror_mode": {
941 "name": "ipython",
942 "version": 3
943 },
944 "file_extension": ".py",
945 "mimetype": "text/x-python",
946 "name": "python",
947 "nbconvert_exporter": "python",
948 "pygments_lexer": "ipython3",
949 "version": "3.5.2+"
950 }
951 },
952 "nbformat": 4,
953 "nbformat_minor": 1
954 }