22140c9a938e2bfdc41609a07961e01a9c254c43
[ou-summer-of-code-2017.git] / 09-resolving-the-bill / interleaving.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "# Interleaved strings\n",
8 "\n",
9 "Given two strings a and b and a target c, could c be formed form some interleaving/merge of a and b?\n",
10 "\n",
11 "For example,\n",
12 "Given:\n",
13 "s1 = \"aabcc\",\n",
14 "s2 = \"dbbca\",\n",
15 "\n",
16 "When s3 = \"aadbbcbcac\", return true.\n",
17 "When s3 = \"aadbbbaccc\", return false."
18 ]
19 },
20 {
21 "cell_type": "code",
22 "execution_count": 2,
23 "metadata": {
24 "collapsed": true
25 },
26 "outputs": [],
27 "source": [
28 "s1 = \"aabcc\"\n",
29 "s2 = \"dbbca\"\n",
30 "\n",
31 "s3t = \"aadbbcbcac\"\n",
32 "s3f = \"aadbbbaccc\""
33 ]
34 },
35 {
36 "cell_type": "code",
37 "execution_count": 8,
38 "metadata": {
39 "collapsed": false
40 },
41 "outputs": [
42 {
43 "data": {
44 "text/plain": [
45 "[(0, ''), (1, 'a'), (2, 'aa'), (3, 'aab'), (4, 'aabc'), (5, 'aabcc')]"
46 ]
47 },
48 "execution_count": 8,
49 "metadata": {},
50 "output_type": "execute_result"
51 }
52 ],
53 "source": [
54 "[(i, s1[:i]) for i in range(len(s1)+1)]"
55 ]
56 },
57 {
58 "cell_type": "markdown",
59 "metadata": {},
60 "source": [
61 "`dp_table[i, j]` is True if `s3[:i+j]` can be formed from interleaving `s1[:i]` and `s2[:j]`."
62 ]
63 },
64 {
65 "cell_type": "code",
66 "execution_count": 11,
67 "metadata": {
68 "collapsed": false
69 },
70 "outputs": [
71 {
72 "data": {
73 "text/plain": [
74 "[[True, False, False, False, False, False],\n",
75 " [False, False, False, False, False, False],\n",
76 " [False, False, False, False, False, False],\n",
77 " [False, False, False, False, False, False],\n",
78 " [False, False, False, False, False, False],\n",
79 " [False, False, False, False, False, False]]"
80 ]
81 },
82 "execution_count": 11,
83 "metadata": {},
84 "output_type": "execute_result"
85 }
86 ],
87 "source": [
88 "dp_table = [[False] * (len(s1) + 1) for _ in range(len(s2) + 1)]\n",
89 "dp_table[0][0] = True\n",
90 "dp_table"
91 ]
92 },
93 {
94 "cell_type": "code",
95 "execution_count": 24,
96 "metadata": {
97 "collapsed": false,
98 "scrolled": true
99 },
100 "outputs": [
101 {
102 "data": {
103 "text/plain": [
104 "{(0, 0): False,\n",
105 " (0, 1): False,\n",
106 " (0, 2): False,\n",
107 " (0, 3): False,\n",
108 " (0, 4): False,\n",
109 " (0, 5): False,\n",
110 " (1, 0): False,\n",
111 " (1, 1): False,\n",
112 " (1, 2): False,\n",
113 " (1, 3): False,\n",
114 " (1, 4): False,\n",
115 " (1, 5): False,\n",
116 " (2, 0): False,\n",
117 " (2, 1): False,\n",
118 " (2, 2): False,\n",
119 " (2, 3): False,\n",
120 " (2, 4): False,\n",
121 " (2, 5): False,\n",
122 " (3, 0): False,\n",
123 " (3, 1): False,\n",
124 " (3, 2): False,\n",
125 " (3, 3): False,\n",
126 " (3, 4): False,\n",
127 " (3, 5): False,\n",
128 " (4, 0): False,\n",
129 " (4, 1): False,\n",
130 " (4, 2): False,\n",
131 " (4, 3): False,\n",
132 " (4, 4): False,\n",
133 " (4, 5): False,\n",
134 " (5, 0): False,\n",
135 " (5, 1): False,\n",
136 " (5, 2): False,\n",
137 " (5, 3): False,\n",
138 " (5, 4): False,\n",
139 " (5, 5): False}"
140 ]
141 },
142 "execution_count": 24,
143 "metadata": {},
144 "output_type": "execute_result"
145 }
146 ],
147 "source": [
148 "dp_table = {(i, j): False\n",
149 " for i in range(len(s1)+1)\n",
150 " for j in range(len(s2)+1)}\n",
151 "dp_table"
152 ]
153 },
154 {
155 "cell_type": "code",
156 "execution_count": 25,
157 "metadata": {
158 "collapsed": true
159 },
160 "outputs": [],
161 "source": [
162 "def show_table(table):\n",
163 " return '\\n'.join(\n",
164 " ' '.join(str(table[i, j])[0] for j in sorted(set([k[1] for k in table])))\n",
165 " for i in sorted(set([k[0] for k in table]))) "
166 ]
167 },
168 {
169 "cell_type": "code",
170 "execution_count": 26,
171 "metadata": {
172 "collapsed": false
173 },
174 "outputs": [
175 {
176 "name": "stdout",
177 "output_type": "stream",
178 "text": [
179 "F F F F F F\n",
180 "F F F F F F\n",
181 "F F F F F F\n",
182 "F F F F F F\n",
183 "F F F F F F\n",
184 "F F F F F F\n"
185 ]
186 }
187 ],
188 "source": [
189 "print(show_table(dp_table))"
190 ]
191 },
192 {
193 "cell_type": "code",
194 "execution_count": 39,
195 "metadata": {
196 "collapsed": false
197 },
198 "outputs": [
199 {
200 "name": "stdout",
201 "output_type": "stream",
202 "text": [
203 "aabcc dbbca aadbbbaccc\n",
204 "aa 0 0 ! ! ! True\n",
205 "s2 0 1 ! d a False\n",
206 "s2 0 2 ! b a False\n",
207 "s2 0 3 ! b d False\n",
208 "s2 0 4 ! c b False\n",
209 "s2 0 5 ! a b False\n",
210 "s1 1 0 a ! a True\n",
211 "xx 1 1 a d a False\n",
212 "xx 1 2 a b d False\n",
213 "xx 1 3 a b b False\n",
214 "xx 1 4 a c b False\n",
215 "xx 1 5 a a b False\n",
216 "s1 2 0 a ! a True\n",
217 "s2 2 1 a d d True\n",
218 "s2 2 2 a b b True\n",
219 "s2 2 3 a b b True\n",
220 "xx 2 4 a c b False\n",
221 "xx 2 5 a a a False\n",
222 "s1 3 0 b ! d False\n",
223 "s1 3 1 b d b True\n",
224 "s2 3 2 b b b True\n",
225 "s1 3 2 b b b True\n",
226 "s2 3 3 b b b True\n",
227 "s1 3 3 b b b True\n",
228 "xx 3 4 b c a False\n",
229 "xx 3 5 b a c False\n",
230 "s1 4 0 c ! b False\n",
231 "xx 4 1 c d b False\n",
232 "xx 4 2 c b b False\n",
233 "xx 4 3 c b a False\n",
234 "xx 4 4 c c c False\n",
235 "xx 4 5 c a c False\n",
236 "s1 5 0 c ! b False\n",
237 "xx 5 1 c d b False\n",
238 "xx 5 2 c b a False\n",
239 "xx 5 3 c b c False\n",
240 "xx 5 4 c c c False\n",
241 "xx 5 5 c a c False\n",
242 "T F F F F F\n",
243 "T F F F F F\n",
244 "T T T T F F\n",
245 "F T T T F F\n",
246 "F F F F F F\n",
247 "F F F F F F\n"
248 ]
249 },
250 {
251 "data": {
252 "text/plain": [
253 "{(1, 0): (0, 0, 'a', 's1'),\n",
254 " (2, 0): (1, 0, 'a', 's1'),\n",
255 " (2, 1): (2, 0, 'd', 's2'),\n",
256 " (2, 2): (2, 1, 'b', 's2'),\n",
257 " (2, 3): (2, 2, 'b', 's2'),\n",
258 " (3, 1): (2, 1, 'b', 's1'),\n",
259 " (3, 2): (2, 2, 'b', 's1'),\n",
260 " (3, 3): (2, 3, 'b', 's1')}"
261 ]
262 },
263 "execution_count": 39,
264 "metadata": {},
265 "output_type": "execute_result"
266 }
267 ],
268 "source": [
269 "s3 = s3f\n",
270 "\n",
271 "print(s1, s2, s3)\n",
272 "\n",
273 "dp_table = {(i, j): False\n",
274 " for i in range(len(s1)+1)\n",
275 " for j in range(len(s2)+1)}\n",
276 "\n",
277 "backpointers = {}\n",
278 "\n",
279 "for i in range(len(s1)+1):\n",
280 " for j in range(len(s2)+1):\n",
281 " if i == 0 and j == 0:\n",
282 " dp_table[i, j] = True\n",
283 " print('aa', i, j, '!', '!', '!', dp_table[i, j])\n",
284 " elif i == 0:\n",
285 " # extend by character from s2\n",
286 " if dp_table[i, j-1] and s2[j-1] == s3[i+j-1]:\n",
287 " dp_table[i, j] = True\n",
288 " backpointers[i, j] = (i, j-1, s2[j-1], 's2')\n",
289 " print('s2', i, j, '!', s2[j-1], s3[i+j-1], dp_table[i, j])\n",
290 " elif j == 0:\n",
291 " # extend by character from s1\n",
292 " if dp_table[i-1, j] and s1[i-1] == s3[i+j-1]:\n",
293 " dp_table[i, j] = True\n",
294 " backpointers[i, j] = (i-1, j, s1[i-1], 's1')\n",
295 " print('s1', i, j, s1[i-1], '!', s3[i+j-1], dp_table[i, j])\n",
296 " else:\n",
297 " # extend by character from s2\n",
298 " if dp_table[i, j-1] and s2[j-1] == s3[i+j-1]:\n",
299 " dp_table[i, j] = True\n",
300 " backpointers[i, j] = (i, j-1, s2[j-1], 's2')\n",
301 " print('s2', i, j, s1[i-1], s2[j-1], s3[i+j-1], dp_table[i, j]) \n",
302 " # extend by character from s1\n",
303 " if dp_table[i-1, j] and s1[i-1] == s3[i+j-1]:\n",
304 " dp_table[i, j] = True\n",
305 " backpointers[i, j] = (i-1, j, s1[i-1], 's1') \n",
306 " print('s1', i, j, s1[i-1], s2[j-1], s3[i+j-1], dp_table[i, j])\n",
307 " if not dp_table[i, j]:\n",
308 " print('xx', i, j, s1[i-1], s2[j-1], s3[i+j-1], dp_table[i, j])\n",
309 "\n",
310 "print(show_table(dp_table))\n",
311 "backpointers"
312 ]
313 },
314 {
315 "cell_type": "code",
316 "execution_count": 49,
317 "metadata": {
318 "collapsed": false
319 },
320 "outputs": [],
321 "source": [
322 "def is_interleave(seq1, seq2, seq3, return_backpointers=False, debug=False):\n",
323 " \"\"\"Return true if seq3 is some interleaved merge of seq1 and seq2.\n",
324 " If return_backpointers, also return the set of backpointers to\n",
325 " reconstruct the interleaving\"\"\"\n",
326 " \n",
327 " # dp_table[i, j] is True if seq3[:i+j-1] is made up of an interleaving\n",
328 " # of the first i characters of seq1 and the first j characters of seq2\n",
329 " \n",
330 " if len(seq1) + len(seq2) != len(seq3):\n",
331 " if return_backpointers:\n",
332 " return False, {}\n",
333 " else:\n",
334 " return False\n",
335 " \n",
336 " dp_table = {(i, j): False\n",
337 " for i in range(len(seq1)+1)\n",
338 " for j in range(len(seq2)+1)}\n",
339 "\n",
340 " backpointers = {}\n",
341 "\n",
342 " for i in range(len(seq1)+1):\n",
343 " for j in range(len(seq2)+1):\n",
344 " if i == 0 and j == 0:\n",
345 " dp_table[i, j] = True\n",
346 " if debug: print('aa', i, j, '!', '!', '!', dp_table[i, j])\n",
347 " elif i == 0:\n",
348 " # extend by character from seq2\n",
349 " if dp_table[i, j-1] and seq2[j-1] == seq3[i+j-1]:\n",
350 " dp_table[i, j] = True\n",
351 " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
352 " if debug: print('seq2', i, j, '!', seq2[j-1], seq3[i+j-1], dp_table[i, j])\n",
353 " elif j == 0:\n",
354 " # extend by character from seq1\n",
355 " if dp_table[i-1, j] and seq1[i-1] == seq3[i+j-1]:\n",
356 " dp_table[i, j] = True\n",
357 " backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1')\n",
358 " if debug: print('seq1', i, j, seq1[i-1], '!', seq3[i+j-1], dp_table[i, j])\n",
359 " else:\n",
360 " # extend by character from seq2\n",
361 " if dp_table[i, j-1] and seq2[j-1] == seq3[i+j-1]:\n",
362 " dp_table[i, j] = True\n",
363 " backpointers[i, j] = (i, j-1, seq2[j-1], 'seq2')\n",
364 " if debug: print('seq2', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], dp_table[i, j]) \n",
365 " # extend by character from seq1\n",
366 " if dp_table[i-1, j] and seq1[i-1] == seq3[i+j-1]:\n",
367 " dp_table[i, j] = True\n",
368 " backpointers[i, j] = (i-1, j, seq1[i-1], 'seq1') \n",
369 " if debug: print('seq1', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], dp_table[i, j])\n",
370 " if not dp_table[i, j]:\n",
371 " if debug: print('xx', i, j, seq1[i-1], seq2[j-1], seq3[i+j-1], dp_table[i, j])\n",
372 "\n",
373 " if return_backpointers:\n",
374 " return dp_table[len(seq1), len(seq2)], backpointers\n",
375 " else:\n",
376 " return dp_table[len(seq1), len(seq2)]"
377 ]
378 },
379 {
380 "cell_type": "code",
381 "execution_count": 50,
382 "metadata": {
383 "collapsed": false
384 },
385 "outputs": [
386 {
387 "data": {
388 "text/plain": [
389 "True"
390 ]
391 },
392 "execution_count": 50,
393 "metadata": {},
394 "output_type": "execute_result"
395 }
396 ],
397 "source": [
398 "is_interleave(s1, s2, s3t)"
399 ]
400 },
401 {
402 "cell_type": "code",
403 "execution_count": 51,
404 "metadata": {
405 "collapsed": false
406 },
407 "outputs": [
408 {
409 "data": {
410 "text/plain": [
411 "(True,\n",
412 " {(1, 0): (0, 0, 'a', 'seq1'),\n",
413 " (2, 0): (1, 0, 'a', 'seq1'),\n",
414 " (2, 1): (2, 0, 'd', 'seq2'),\n",
415 " (2, 2): (2, 1, 'b', 'seq2'),\n",
416 " (2, 3): (2, 2, 'b', 'seq2'),\n",
417 " (2, 4): (2, 3, 'c', 'seq2'),\n",
418 " (3, 1): (2, 1, 'b', 'seq1'),\n",
419 " (3, 2): (2, 2, 'b', 'seq1'),\n",
420 " (3, 4): (2, 4, 'b', 'seq1'),\n",
421 " (4, 2): (3, 2, 'c', 'seq1'),\n",
422 " (4, 3): (4, 2, 'b', 'seq2'),\n",
423 " (4, 4): (3, 4, 'c', 'seq1'),\n",
424 " (4, 5): (4, 4, 'a', 'seq2'),\n",
425 " (5, 3): (4, 3, 'c', 'seq1'),\n",
426 " (5, 5): (4, 5, 'c', 'seq1')})"
427 ]
428 },
429 "execution_count": 51,
430 "metadata": {},
431 "output_type": "execute_result"
432 }
433 ],
434 "source": [
435 "is_interleave(s1, s2, s3t, return_backpointers=True)"
436 ]
437 },
438 {
439 "cell_type": "code",
440 "execution_count": 52,
441 "metadata": {
442 "collapsed": false
443 },
444 "outputs": [
445 {
446 "data": {
447 "text/plain": [
448 "False"
449 ]
450 },
451 "execution_count": 52,
452 "metadata": {},
453 "output_type": "execute_result"
454 }
455 ],
456 "source": [
457 "is_interleave(s1, s2, s3f)"
458 ]
459 },
460 {
461 "cell_type": "code",
462 "execution_count": 54,
463 "metadata": {
464 "collapsed": false
465 },
466 "outputs": [
467 {
468 "data": {
469 "text/plain": [
470 "(True,\n",
471 " {(1, 0): (0, 0, 'a', 'seq1'),\n",
472 " (2, 0): (1, 0, 'a', 'seq1'),\n",
473 " (3, 0): (2, 0, 'a', 'seq1'),\n",
474 " (3, 1): (3, 0, 'b', 'seq2'),\n",
475 " (4, 1): (3, 1, 'a', 'seq1'),\n",
476 " (4, 2): (4, 1, 'b', 'seq2'),\n",
477 " (4, 3): (4, 2, 'b', 'seq2')})"
478 ]
479 },
480 "execution_count": 54,
481 "metadata": {},
482 "output_type": "execute_result"
483 }
484 ],
485 "source": [
486 "is_interleave('aaaa', 'bbb', 'aaababb', return_backpointers=True)"
487 ]
488 },
489 {
490 "cell_type": "code",
491 "execution_count": null,
492 "metadata": {
493 "collapsed": true
494 },
495 "outputs": [],
496 "source": []
497 }
498 ],
499 "metadata": {
500 "kernelspec": {
501 "display_name": "Python 3",
502 "language": "python",
503 "name": "python3"
504 },
505 "language_info": {
506 "codemirror_mode": {
507 "name": "ipython",
508 "version": 3
509 },
510 "file_extension": ".py",
511 "mimetype": "text/x-python",
512 "name": "python",
513 "nbconvert_exporter": "python",
514 "pygments_lexer": "ipython3",
515 "version": "3.5.2"
516 }
517 },
518 "nbformat": 4,
519 "nbformat_minor": 1
520 }