Fixed typo
[ou-summer-of-code-2017.git] / word-chains / word-chain-solution.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "# Word chains\n",
8 "\n",
9 "\"Word chain\" puzzles are where you transform one word into another, by changing one letter at a time, with all the intermediate steps being valid words. \n",
10 "\n",
11 "For instance, you can transform 'rash' to 'jags' like this:\n",
12 "\n",
13 "```\n",
14 "rash\n",
15 "Bash\n",
16 "basS\n",
17 "baGs\n",
18 "Jags\n",
19 "```\n",
20 "\n",
21 "(the capital letter is the one changed in each step).\n",
22 "\n",
23 "## Part 1\n",
24 "\n",
25 "Given this [list of words](words4.txt), what is the minimum number of steps to go from `vice` to `wars`?"
26 ]
27 },
28 {
29 "cell_type": "code",
30 "execution_count": 1,
31 "metadata": {
32 "collapsed": true
33 },
34 "outputs": [],
35 "source": [
36 "import string\n",
37 "import heapq"
38 ]
39 },
40 {
41 "cell_type": "code",
42 "execution_count": 2,
43 "metadata": {},
44 "outputs": [
45 {
46 "data": {
47 "text/plain": [
48 "2336"
49 ]
50 },
51 "execution_count": 2,
52 "metadata": {},
53 "output_type": "execute_result"
54 }
55 ],
56 "source": [
57 "words = [w.strip() for w in open('words4.txt').readlines()]\n",
58 "len(words)"
59 ]
60 },
61 {
62 "cell_type": "code",
63 "execution_count": 3,
64 "metadata": {
65 "collapsed": true
66 },
67 "outputs": [],
68 "source": [
69 "def adjacents(word):\n",
70 " return [word[0:i] + l + word[i+1:]\n",
71 " for i in range(len(word))\n",
72 " for l in string.ascii_lowercase\n",
73 " if l != word[i]]"
74 ]
75 },
76 {
77 "cell_type": "code",
78 "execution_count": 4,
79 "metadata": {
80 "collapsed": true
81 },
82 "outputs": [],
83 "source": [
84 "neighbours = {w: [n for n in adjacents(w) if n in words]\n",
85 " for w in words}"
86 ]
87 },
88 {
89 "cell_type": "code",
90 "execution_count": 5,
91 "metadata": {
92 "collapsed": true
93 },
94 "outputs": [],
95 "source": [
96 "def distance(w1, w2):\n",
97 " return sum(1 for i in range(len(w1))\n",
98 " if w1[i] != w2[i])"
99 ]
100 },
101 {
102 "cell_type": "code",
103 "execution_count": 6,
104 "metadata": {
105 "collapsed": true
106 },
107 "outputs": [],
108 "source": [
109 "def extend(chain):\n",
110 " return [chain + [s] for s in neighbours[chain[-1]]\n",
111 " if s not in chain]"
112 ]
113 },
114 {
115 "cell_type": "code",
116 "execution_count": 7,
117 "metadata": {
118 "collapsed": true
119 },
120 "outputs": [],
121 "source": [
122 "def extend_raw(chain):\n",
123 " nbrs = [w for w in adjacents(chain[-1]) if w in words]\n",
124 " return [chain + [s] for s in nbrs\n",
125 " if s not in chain]"
126 ]
127 },
128 {
129 "cell_type": "code",
130 "execution_count": 8,
131 "metadata": {
132 "collapsed": true
133 },
134 "outputs": [],
135 "source": [
136 "def bfs_search(start, target, debug=False):\n",
137 " return bfs([[start]], target, debug=debug)"
138 ]
139 },
140 {
141 "cell_type": "code",
142 "execution_count": 9,
143 "metadata": {
144 "collapsed": true
145 },
146 "outputs": [],
147 "source": [
148 "def bfs(agenda, goal, debug=False):\n",
149 " finished = False\n",
150 " while not finished and agenda:\n",
151 " current = agenda[0]\n",
152 " if debug:\n",
153 " print(current)\n",
154 " if current[-1] == goal:\n",
155 " finished = True\n",
156 " else:\n",
157 " successors = extend(current)\n",
158 " agenda = agenda[1:] + successors\n",
159 " if agenda:\n",
160 " return current\n",
161 " else:\n",
162 " return None "
163 ]
164 },
165 {
166 "cell_type": "code",
167 "execution_count": 10,
168 "metadata": {
169 "collapsed": true
170 },
171 "outputs": [],
172 "source": [
173 "def dfs_search(start, target, debug=False):\n",
174 " return dfs([[start]], target, debug=debug)"
175 ]
176 },
177 {
178 "cell_type": "code",
179 "execution_count": 11,
180 "metadata": {
181 "collapsed": true
182 },
183 "outputs": [],
184 "source": [
185 "def dfs(agenda, goal, debug=False):\n",
186 " finished = False\n",
187 " while not finished and agenda:\n",
188 " current = agenda[0]\n",
189 " if debug:\n",
190 " print(current)\n",
191 " if current[-1] == goal:\n",
192 " finished = True\n",
193 " else:\n",
194 " successors = extend(current)\n",
195 " agenda = successors + agenda[1:]\n",
196 " if agenda:\n",
197 " return current\n",
198 " else:\n",
199 " return None "
200 ]
201 },
202 {
203 "cell_type": "code",
204 "execution_count": 12,
205 "metadata": {
206 "collapsed": true
207 },
208 "outputs": [],
209 "source": [
210 "def astar_search(start, target, debug=False):\n",
211 " agenda = [(distance(start, target), [start])]\n",
212 " heapq.heapify(agenda)\n",
213 " return astar(agenda, target, debug=debug)"
214 ]
215 },
216 {
217 "cell_type": "code",
218 "execution_count": 13,
219 "metadata": {
220 "collapsed": true
221 },
222 "outputs": [],
223 "source": [
224 "def astar(agenda, goal, debug=False):\n",
225 " finished = False\n",
226 " while not finished and agenda:\n",
227 " _, current = heapq.heappop(agenda)\n",
228 " if debug:\n",
229 " print(current)\n",
230 " if current[-1] == goal:\n",
231 " finished = True\n",
232 " else:\n",
233 " successors = extend(current)\n",
234 " for s in successors:\n",
235 " heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n",
236 " if agenda:\n",
237 " return current\n",
238 " else:\n",
239 " return None "
240 ]
241 },
242 {
243 "cell_type": "code",
244 "execution_count": 14,
245 "metadata": {
246 "collapsed": true
247 },
248 "outputs": [],
249 "source": [
250 "def astar_search_raw(start, target, debug=False):\n",
251 " agenda = [(distance(start, target), [start])]\n",
252 " heapq.heapify(agenda)\n",
253 " return astar_raw(agenda, target, debug=debug)"
254 ]
255 },
256 {
257 "cell_type": "code",
258 "execution_count": 15,
259 "metadata": {
260 "collapsed": true
261 },
262 "outputs": [],
263 "source": [
264 "def astar_raw(agenda, goal, debug=False):\n",
265 " finished = False\n",
266 " while not finished and agenda:\n",
267 " _, current = heapq.heappop(agenda)\n",
268 " if debug:\n",
269 " print(current)\n",
270 " if current[-1] == goal:\n",
271 " finished = True\n",
272 " else:\n",
273 " successors = extend_raw(current)\n",
274 " for s in successors:\n",
275 " heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n",
276 " if agenda:\n",
277 " return current\n",
278 " else:\n",
279 " return None "
280 ]
281 },
282 {
283 "cell_type": "code",
284 "execution_count": 16,
285 "metadata": {},
286 "outputs": [
287 {
288 "data": {
289 "text/plain": [
290 "['vice', 'dice', 'dire', 'dare', 'ware', 'wars']"
291 ]
292 },
293 "execution_count": 16,
294 "metadata": {},
295 "output_type": "execute_result"
296 }
297 ],
298 "source": [
299 "astar_search('vice', 'wars')"
300 ]
301 },
302 {
303 "cell_type": "code",
304 "execution_count": 17,
305 "metadata": {},
306 "outputs": [
307 {
308 "data": {
309 "text/plain": [
310 "['vice', 'dice', 'dire', 'dare', 'ware', 'wars']"
311 ]
312 },
313 "execution_count": 17,
314 "metadata": {},
315 "output_type": "execute_result"
316 }
317 ],
318 "source": [
319 "astar_search_raw('vice', 'wars')"
320 ]
321 },
322 {
323 "cell_type": "code",
324 "execution_count": 18,
325 "metadata": {},
326 "outputs": [
327 {
328 "data": {
329 "text/plain": [
330 "6"
331 ]
332 },
333 "execution_count": 18,
334 "metadata": {},
335 "output_type": "execute_result"
336 }
337 ],
338 "source": [
339 "len(astar_search('vice', 'wars'))"
340 ]
341 },
342 {
343 "cell_type": "code",
344 "execution_count": 19,
345 "metadata": {},
346 "outputs": [
347 {
348 "data": {
349 "text/plain": [
350 "6"
351 ]
352 },
353 "execution_count": 19,
354 "metadata": {},
355 "output_type": "execute_result"
356 }
357 ],
358 "source": [
359 "len(bfs_search('vice', 'wars'))"
360 ]
361 },
362 {
363 "cell_type": "code",
364 "execution_count": 20,
365 "metadata": {},
366 "outputs": [
367 {
368 "data": {
369 "text/plain": [
370 "793"
371 ]
372 },
373 "execution_count": 20,
374 "metadata": {},
375 "output_type": "execute_result"
376 }
377 ],
378 "source": [
379 "len(dfs_search('vice', 'wars'))"
380 ]
381 },
382 {
383 "cell_type": "code",
384 "execution_count": 21,
385 "metadata": {},
386 "outputs": [
387 {
388 "name": "stdout",
389 "output_type": "stream",
390 "text": [
391 "10000 loops, best of 3: 153 µs per loop\n"
392 ]
393 }
394 ],
395 "source": [
396 "%%timeit\n",
397 "astar_search('vice', 'wars')"
398 ]
399 },
400 {
401 "cell_type": "code",
402 "execution_count": 22,
403 "metadata": {},
404 "outputs": [
405 {
406 "name": "stdout",
407 "output_type": "stream",
408 "text": [
409 "100 loops, best of 3: 15.8 ms per loop\n"
410 ]
411 }
412 ],
413 "source": [
414 "%%timeit\n",
415 "astar_search_raw('vice', 'wars')"
416 ]
417 },
418 {
419 "cell_type": "code",
420 "execution_count": 23,
421 "metadata": {},
422 "outputs": [
423 {
424 "name": "stdout",
425 "output_type": "stream",
426 "text": [
427 "1 loop, best of 3: 1min 42s per loop\n"
428 ]
429 }
430 ],
431 "source": [
432 "%%timeit\n",
433 "bfs_search('vice', 'wars')"
434 ]
435 },
436 {
437 "cell_type": "code",
438 "execution_count": 24,
439 "metadata": {},
440 "outputs": [
441 {
442 "name": "stdout",
443 "output_type": "stream",
444 "text": [
445 "10 loops, best of 3: 88 ms per loop\n"
446 ]
447 }
448 ],
449 "source": [
450 "%%timeit\n",
451 "dfs_search('vice', 'wars')"
452 ]
453 },
454 {
455 "cell_type": "markdown",
456 "metadata": {},
457 "source": [
458 "## Part 2\n",
459 "\n",
460 "The example shows that `jags` is reachable in four steps from `rash`. There are 11 words one step away from `rash`: \n",
461 "`bash`, `cash`, `dash`, `gash`, `hash`, `lash`, `mash`, `rasp`, `rush`, `sash`, and `wash`. \n",
462 "\n",
463 "There are 47 words reachable in one or two steps from `rash`. They are `base`, `bash`, `bask`, `bass`, `bast`, `bath`, `bosh`, `bush`, `case`, `cash`, `cask`, `cast`, `dash`, `dish`, `gash`, `gasp`, `gosh`, `gush`, `hash`, `hasp`, `hath`, `hush`, `lash`, `lass`, `last`, `lath`, `lush`, `mash`, `mask`, `mass`, `mast`, `math`, `mesh`, `mush`, `push`, `ramp`, `rasp`, `ruse`, `rush`, `rusk`, `rust`, `sash`, `sass`, `tush`, `wash`, `wasp`, and `wish`.\n",
464 "\n",
465 "There are 180 words reachable in up to three steps from `rash`.\n",
466 "\n",
467 "How many words are reachable in up to ten steps from `vice`?"
468 ]
469 },
470 {
471 "cell_type": "code",
472 "execution_count": 25,
473 "metadata": {
474 "collapsed": true
475 },
476 "outputs": [],
477 "source": [
478 "def reachable_in(word, n, trim_extras=False):\n",
479 " reachable = set()\n",
480 " boundary = set([word])\n",
481 " for i in range(n):\n",
482 " extras = set()\n",
483 " for w in boundary:\n",
484 " extras.update(neighbours[w])\n",
485 " if trim_extras:\n",
486 " extras.difference_update(reachable)\n",
487 " reachable.update(boundary)\n",
488 " boundary = extras.copy()\n",
489 " return reachable.union(extras).difference(set([word]))"
490 ]
491 },
492 {
493 "cell_type": "code",
494 "execution_count": 26,
495 "metadata": {
496 "scrolled": true
497 },
498 "outputs": [
499 {
500 "data": {
501 "text/plain": [
502 "(11,\n",
503 " '`bash`, `cash`, `dash`, `gash`, `hash`, `lash`, `mash`, `rasp`, `rush`, `sash`, `wash`')"
504 ]
505 },
506 "execution_count": 26,
507 "metadata": {},
508 "output_type": "execute_result"
509 }
510 ],
511 "source": [
512 "len(reachable_in('rash', 1)), ', '.join(sorted('`{}`'.format(r) for r in reachable_in('rash', 1)))"
513 ]
514 },
515 {
516 "cell_type": "code",
517 "execution_count": 27,
518 "metadata": {
519 "scrolled": true
520 },
521 "outputs": [
522 {
523 "data": {
524 "text/plain": [
525 "(47,\n",
526 " '`base`, `bash`, `bask`, `bass`, `bast`, `bath`, `bosh`, `bush`, `case`, `cash`, `cask`, `cast`, `dash`, `dish`, `gash`, `gasp`, `gosh`, `gush`, `hash`, `hasp`, `hath`, `hush`, `lash`, `lass`, `last`, `lath`, `lush`, `mash`, `mask`, `mass`, `mast`, `math`, `mesh`, `mush`, `push`, `ramp`, `rasp`, `ruse`, `rush`, `rusk`, `rust`, `sash`, `sass`, `tush`, `wash`, `wasp`, `wish`')"
527 ]
528 },
529 "execution_count": 27,
530 "metadata": {},
531 "output_type": "execute_result"
532 }
533 ],
534 "source": [
535 "len(reachable_in('rash', 2)), ', '.join(sorted('`{}`'.format(r) for r in reachable_in('rash', 2)))"
536 ]
537 },
538 {
539 "cell_type": "code",
540 "execution_count": 28,
541 "metadata": {
542 "scrolled": true
543 },
544 "outputs": [
545 {
546 "data": {
547 "text/plain": [
548 "180"
549 ]
550 },
551 "execution_count": 28,
552 "metadata": {},
553 "output_type": "execute_result"
554 }
555 ],
556 "source": [
557 "len(reachable_in('rash', 3))"
558 ]
559 },
560 {
561 "cell_type": "code",
562 "execution_count": 29,
563 "metadata": {
564 "scrolled": true
565 },
566 "outputs": [
567 {
568 "data": {
569 "text/plain": [
570 "2195"
571 ]
572 },
573 "execution_count": 29,
574 "metadata": {},
575 "output_type": "execute_result"
576 }
577 ],
578 "source": [
579 "len(reachable_in('rash', 10))"
580 ]
581 },
582 {
583 "cell_type": "code",
584 "execution_count": 30,
585 "metadata": {
586 "scrolled": true
587 },
588 "outputs": [
589 {
590 "data": {
591 "text/plain": [
592 "2192"
593 ]
594 },
595 "execution_count": 30,
596 "metadata": {},
597 "output_type": "execute_result"
598 }
599 ],
600 "source": [
601 "len(reachable_in('vice', 10))"
602 ]
603 },
604 {
605 "cell_type": "code",
606 "execution_count": 31,
607 "metadata": {
608 "scrolled": true
609 },
610 "outputs": [
611 {
612 "name": "stdout",
613 "output_type": "stream",
614 "text": [
615 "100 loops, best of 3: 5.96 ms per loop\n"
616 ]
617 }
618 ],
619 "source": [
620 "%%timeit\n",
621 "len(reachable_in('rash', 10))"
622 ]
623 },
624 {
625 "cell_type": "code",
626 "execution_count": 32,
627 "metadata": {
628 "scrolled": true
629 },
630 "outputs": [
631 {
632 "name": "stdout",
633 "output_type": "stream",
634 "text": [
635 "100 loops, best of 3: 2.75 ms per loop\n"
636 ]
637 }
638 ],
639 "source": [
640 "%%timeit\n",
641 "len(reachable_in('rash', 10, trim_extras=True))"
642 ]
643 },
644 {
645 "cell_type": "code",
646 "execution_count": null,
647 "metadata": {
648 "collapsed": true
649 },
650 "outputs": [],
651 "source": []
652 }
653 ],
654 "metadata": {
655 "kernelspec": {
656 "display_name": "Python 3",
657 "language": "python",
658 "name": "python3"
659 },
660 "language_info": {
661 "codemirror_mode": {
662 "name": "ipython",
663 "version": 3
664 },
665 "file_extension": ".py",
666 "mimetype": "text/x-python",
667 "name": "python",
668 "nbconvert_exporter": "python",
669 "pygments_lexer": "ipython3",
670 "version": "3.5.2+"
671 }
672 },
673 "nbformat": 4,
674 "nbformat_minor": 2
675 }