Renamed wordsearch, added example problem
[ou-summer-of-code-2017.git] / 07-word-chains / word-chain-solution.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "# Word chains\n",
8 "\n",
9 "\"Word chain\" puzzles are where you transform one word into another, by changing one letter at a time, with all the intermediate steps being valid words. \n",
10 "\n",
11 "For instance, you can transform 'rash' to 'jags' like this:\n",
12 "\n",
13 "```\n",
14 "rash\n",
15 "Bash\n",
16 "basS\n",
17 "baGs\n",
18 "Jags\n",
19 "```\n",
20 "\n",
21 "(the capital letter is the one changed in each step).\n",
22 "\n",
23 "## Part 1\n",
24 "\n",
25 "Given this [list of words](words4.txt), what is the minimum number of steps to go from `vice` to `wars`?"
26 ]
27 },
28 {
29 "cell_type": "code",
30 "execution_count": 1,
31 "metadata": {
32 "collapsed": true
33 },
34 "outputs": [],
35 "source": [
36 "import string\n",
37 "import heapq"
38 ]
39 },
40 {
41 "cell_type": "code",
42 "execution_count": 2,
43 "metadata": {},
44 "outputs": [
45 {
46 "data": {
47 "text/plain": [
48 "2336"
49 ]
50 },
51 "execution_count": 2,
52 "metadata": {},
53 "output_type": "execute_result"
54 }
55 ],
56 "source": [
57 "words = [w.strip() for w in open('words4.txt').readlines()]\n",
58 "len(words)"
59 ]
60 },
61 {
62 "cell_type": "code",
63 "execution_count": 3,
64 "metadata": {
65 "collapsed": true
66 },
67 "outputs": [],
68 "source": [
69 "def adjacents(word):\n",
70 " return [word[0:i] + l + word[i+1:]\n",
71 " for i in range(len(word))\n",
72 " for l in string.ascii_lowercase\n",
73 " if l != word[i]]"
74 ]
75 },
76 {
77 "cell_type": "code",
78 "execution_count": 4,
79 "metadata": {
80 "collapsed": true
81 },
82 "outputs": [],
83 "source": [
84 "neighbours = {w: [n for n in adjacents(w) if n in words]\n",
85 " for w in words}"
86 ]
87 },
88 {
89 "cell_type": "code",
90 "execution_count": 5,
91 "metadata": {
92 "collapsed": true
93 },
94 "outputs": [],
95 "source": [
96 "def distance(w1, w2):\n",
97 " return sum(1 for i in range(len(w1))\n",
98 " if w1[i] != w2[i])"
99 ]
100 },
101 {
102 "cell_type": "code",
103 "execution_count": 6,
104 "metadata": {
105 "collapsed": true
106 },
107 "outputs": [],
108 "source": [
109 "# def extend(chain):\n",
110 "# return [chain + [s] for s in neighbours[chain[-1]]\n",
111 "# if s not in chain]"
112 ]
113 },
114 {
115 "cell_type": "code",
116 "execution_count": 7,
117 "metadata": {
118 "collapsed": true
119 },
120 "outputs": [],
121 "source": [
122 "def extend(chain, closed=None):\n",
123 " if closed:\n",
124 " nbrs = set(neighbours[chain[-1]]) - closed\n",
125 " else:\n",
126 " nbrs = neighbours[chain[-1]]\n",
127 " return [chain + [s] for s in nbrs\n",
128 " if s not in chain]"
129 ]
130 },
131 {
132 "cell_type": "code",
133 "execution_count": 8,
134 "metadata": {
135 "collapsed": true
136 },
137 "outputs": [],
138 "source": [
139 "def extend_raw(chain):\n",
140 " nbrs = [w for w in adjacents(chain[-1]) if w in words]\n",
141 " return [chain + [s] for s in nbrs\n",
142 " if s not in chain]"
143 ]
144 },
145 {
146 "cell_type": "code",
147 "execution_count": 9,
148 "metadata": {
149 "collapsed": true
150 },
151 "outputs": [],
152 "source": [
153 "def bfs_search(start, goal, debug=False):\n",
154 " agenda = [[start]]\n",
155 " finished = False\n",
156 " while not finished and agenda:\n",
157 " current = agenda[0]\n",
158 " if debug:\n",
159 " print(current)\n",
160 " if current[-1] == goal:\n",
161 " finished = True\n",
162 " else:\n",
163 " successors = extend(current)\n",
164 " agenda = agenda[1:] + successors\n",
165 " if agenda:\n",
166 " return current\n",
167 " else:\n",
168 " return None "
169 ]
170 },
171 {
172 "cell_type": "code",
173 "execution_count": 19,
174 "metadata": {
175 "collapsed": true
176 },
177 "outputs": [],
178 "source": [
179 "def bfs_search_closed(start, goal, debug=False):\n",
180 " agenda = [[start]]\n",
181 " closed = set()\n",
182 " finished = False\n",
183 " while not finished and agenda:\n",
184 " current = agenda[0]\n",
185 " if debug:\n",
186 " print(current)\n",
187 " if current[-1] == goal:\n",
188 " finished = True\n",
189 " else:\n",
190 " closed.add(current[-1])\n",
191 " successors = extend(current, closed)\n",
192 " agenda = agenda[1:] + successors\n",
193 " if agenda:\n",
194 " return current\n",
195 " else:\n",
196 " return None "
197 ]
198 },
199 {
200 "cell_type": "code",
201 "execution_count": 10,
202 "metadata": {
203 "collapsed": true
204 },
205 "outputs": [],
206 "source": [
207 "def dfs_search(start, goal, debug=False):\n",
208 " agenda = [[start]]\n",
209 " finished = False\n",
210 " while not finished and agenda:\n",
211 " current = agenda[0]\n",
212 " if debug:\n",
213 " print(current)\n",
214 " if current[-1] == goal:\n",
215 " finished = True\n",
216 " else:\n",
217 " successors = extend(current)\n",
218 " agenda = successors + agenda[1:]\n",
219 " if agenda:\n",
220 " return current\n",
221 " else:\n",
222 " return None "
223 ]
224 },
225 {
226 "cell_type": "code",
227 "execution_count": 11,
228 "metadata": {
229 "collapsed": true
230 },
231 "outputs": [],
232 "source": [
233 "def astar_search(start, goal, debug=False):\n",
234 " agenda = [(distance(start, goal), [start])]\n",
235 " heapq.heapify(agenda)\n",
236 " finished = False\n",
237 " while not finished and agenda:\n",
238 " _, current = heapq.heappop(agenda)\n",
239 " if debug:\n",
240 " print(current)\n",
241 " if current[-1] == goal:\n",
242 " finished = True\n",
243 " else:\n",
244 " successors = extend(current)\n",
245 " for s in successors:\n",
246 " heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n",
247 " if agenda:\n",
248 " return current\n",
249 " else:\n",
250 " return None "
251 ]
252 },
253 {
254 "cell_type": "code",
255 "execution_count": 12,
256 "metadata": {
257 "collapsed": true
258 },
259 "outputs": [],
260 "source": [
261 "# Uses direct lookup of successors, rather than using cached neighbours in the dict\n",
262 "def astar_search_raw(start, goal, debug=False):\n",
263 " agenda = [(distance(start, goal), [start])]\n",
264 " heapq.heapify(agenda)\n",
265 " finished = False\n",
266 " while not finished and agenda:\n",
267 " _, current = heapq.heappop(agenda)\n",
268 " if debug:\n",
269 " print(current)\n",
270 " if current[-1] == goal:\n",
271 " finished = True\n",
272 " else:\n",
273 " successors = extend_raw(current) # Difference here\n",
274 " for s in successors:\n",
275 " heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n",
276 " if agenda:\n",
277 " return current\n",
278 " else:\n",
279 " return None "
280 ]
281 },
282 {
283 "cell_type": "code",
284 "execution_count": 13,
285 "metadata": {
286 "collapsed": true
287 },
288 "outputs": [],
289 "source": [
290 "def astar_search_closed(start, goal, debug=False):\n",
291 " agenda = [(distance(start, goal), [start])]\n",
292 " heapq.heapify(agenda)\n",
293 " closed = set()\n",
294 " finished = False\n",
295 " while not finished and agenda:\n",
296 " _, current = heapq.heappop(agenda)\n",
297 " if debug:\n",
298 " print(current)\n",
299 " if current[-1] == goal:\n",
300 " finished = True\n",
301 " else:\n",
302 " closed.add(current[-1])\n",
303 " successors = extend(current, closed)\n",
304 " for s in successors:\n",
305 " heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n",
306 " if agenda:\n",
307 " return current\n",
308 " else:\n",
309 " return None "
310 ]
311 },
312 {
313 "cell_type": "code",
314 "execution_count": 14,
315 "metadata": {},
316 "outputs": [
317 {
318 "data": {
319 "text/plain": [
320 "['vice', 'dice', 'dire', 'dare', 'ware', 'wars']"
321 ]
322 },
323 "execution_count": 14,
324 "metadata": {},
325 "output_type": "execute_result"
326 }
327 ],
328 "source": [
329 "astar_search('vice', 'wars')"
330 ]
331 },
332 {
333 "cell_type": "code",
334 "execution_count": 15,
335 "metadata": {},
336 "outputs": [
337 {
338 "data": {
339 "text/plain": [
340 "['vice', 'dice', 'dire', 'dare', 'ware', 'wars']"
341 ]
342 },
343 "execution_count": 15,
344 "metadata": {},
345 "output_type": "execute_result"
346 }
347 ],
348 "source": [
349 "astar_search_raw('vice', 'wars')"
350 ]
351 },
352 {
353 "cell_type": "code",
354 "execution_count": 16,
355 "metadata": {},
356 "outputs": [
357 {
358 "data": {
359 "text/plain": [
360 "6"
361 ]
362 },
363 "execution_count": 16,
364 "metadata": {},
365 "output_type": "execute_result"
366 }
367 ],
368 "source": [
369 "len(astar_search('vice', 'wars'))"
370 ]
371 },
372 {
373 "cell_type": "code",
374 "execution_count": 17,
375 "metadata": {},
376 "outputs": [
377 {
378 "data": {
379 "text/plain": [
380 "6"
381 ]
382 },
383 "execution_count": 17,
384 "metadata": {},
385 "output_type": "execute_result"
386 }
387 ],
388 "source": [
389 "len(bfs_search('vice', 'wars'))"
390 ]
391 },
392 {
393 "cell_type": "code",
394 "execution_count": 20,
395 "metadata": {},
396 "outputs": [
397 {
398 "data": {
399 "text/plain": [
400 "6"
401 ]
402 },
403 "execution_count": 20,
404 "metadata": {},
405 "output_type": "execute_result"
406 }
407 ],
408 "source": [
409 "len(bfs_search_closed('vice', 'wars'))"
410 ]
411 },
412 {
413 "cell_type": "code",
414 "execution_count": 21,
415 "metadata": {},
416 "outputs": [
417 {
418 "data": {
419 "text/plain": [
420 "793"
421 ]
422 },
423 "execution_count": 21,
424 "metadata": {},
425 "output_type": "execute_result"
426 }
427 ],
428 "source": [
429 "len(dfs_search('vice', 'wars'))"
430 ]
431 },
432 {
433 "cell_type": "code",
434 "execution_count": 22,
435 "metadata": {},
436 "outputs": [
437 {
438 "name": "stdout",
439 "output_type": "stream",
440 "text": [
441 "10000 loops, best of 3: 158 µs per loop\n"
442 ]
443 }
444 ],
445 "source": [
446 "%%timeit\n",
447 "astar_search('vice', 'wars')"
448 ]
449 },
450 {
451 "cell_type": "code",
452 "execution_count": 23,
453 "metadata": {},
454 "outputs": [
455 {
456 "name": "stdout",
457 "output_type": "stream",
458 "text": [
459 "100 loops, best of 3: 15.6 ms per loop\n"
460 ]
461 }
462 ],
463 "source": [
464 "%%timeit\n",
465 "astar_search_raw('vice', 'wars')"
466 ]
467 },
468 {
469 "cell_type": "code",
470 "execution_count": 24,
471 "metadata": {},
472 "outputs": [
473 {
474 "name": "stdout",
475 "output_type": "stream",
476 "text": [
477 "10000 loops, best of 3: 168 µs per loop\n"
478 ]
479 }
480 ],
481 "source": [
482 "%%timeit\n",
483 "astar_search_closed('vice', 'wars')"
484 ]
485 },
486 {
487 "cell_type": "code",
488 "execution_count": 25,
489 "metadata": {},
490 "outputs": [
491 {
492 "name": "stdout",
493 "output_type": "stream",
494 "text": [
495 "1 loop, best of 3: 1min 40s per loop\n"
496 ]
497 }
498 ],
499 "source": [
500 "%%timeit\n",
501 "bfs_search('vice', 'wars')"
502 ]
503 },
504 {
505 "cell_type": "code",
506 "execution_count": 26,
507 "metadata": {},
508 "outputs": [
509 {
510 "name": "stdout",
511 "output_type": "stream",
512 "text": [
513 "1 loop, best of 3: 597 ms per loop\n"
514 ]
515 }
516 ],
517 "source": [
518 "%%timeit\n",
519 "bfs_search_closed('vice', 'wars')"
520 ]
521 },
522 {
523 "cell_type": "code",
524 "execution_count": 27,
525 "metadata": {},
526 "outputs": [
527 {
528 "name": "stdout",
529 "output_type": "stream",
530 "text": [
531 "10 loops, best of 3: 85.5 ms per loop\n"
532 ]
533 }
534 ],
535 "source": [
536 "%%timeit\n",
537 "dfs_search('vice', 'wars')"
538 ]
539 },
540 {
541 "cell_type": "markdown",
542 "metadata": {},
543 "source": [
544 "## Part 2\n",
545 "\n",
546 "The example shows that `jags` is reachable in four steps from `rash`. There are 11 words one step away from `rash`: \n",
547 "`bash`, `cash`, `dash`, `gash`, `hash`, `lash`, `mash`, `rasp`, `rush`, `sash`, and `wash`. \n",
548 "\n",
549 "There are 47 words reachable in one or two steps from `rash`. They are `base`, `bash`, `bask`, `bass`, `bast`, `bath`, `bosh`, `bush`, `case`, `cash`, `cask`, `cast`, `dash`, `dish`, `gash`, `gasp`, `gosh`, `gush`, `hash`, `hasp`, `hath`, `hush`, `lash`, `lass`, `last`, `lath`, `lush`, `mash`, `mask`, `mass`, `mast`, `math`, `mesh`, `mush`, `push`, `ramp`, `rasp`, `ruse`, `rush`, `rusk`, `rust`, `sash`, `sass`, `tush`, `wash`, `wasp`, and `wish`.\n",
550 "\n",
551 "There are 180 words reachable in up to three steps from `rash`.\n",
552 "\n",
553 "How many words are reachable in up to ten steps from `vice`?"
554 ]
555 },
556 {
557 "cell_type": "code",
558 "execution_count": 28,
559 "metadata": {
560 "collapsed": true
561 },
562 "outputs": [],
563 "source": [
564 "def reachable_in(word, n, trim_extras=False):\n",
565 " reachable = set()\n",
566 " boundary = set([word])\n",
567 " for i in range(n):\n",
568 " extras = set()\n",
569 " for w in boundary:\n",
570 " extras.update(neighbours[w])\n",
571 " if trim_extras:\n",
572 " extras.difference_update(reachable)\n",
573 " reachable.update(boundary)\n",
574 " boundary = extras.copy()\n",
575 " return reachable.union(extras).difference(set([word]))"
576 ]
577 },
578 {
579 "cell_type": "code",
580 "execution_count": 29,
581 "metadata": {
582 "scrolled": true
583 },
584 "outputs": [
585 {
586 "data": {
587 "text/plain": [
588 "(11,\n",
589 " '`bash`, `cash`, `dash`, `gash`, `hash`, `lash`, `mash`, `rasp`, `rush`, `sash`, `wash`')"
590 ]
591 },
592 "execution_count": 29,
593 "metadata": {},
594 "output_type": "execute_result"
595 }
596 ],
597 "source": [
598 "len(reachable_in('rash', 1)), ', '.join(sorted('`{}`'.format(r) for r in reachable_in('rash', 1)))"
599 ]
600 },
601 {
602 "cell_type": "code",
603 "execution_count": 30,
604 "metadata": {
605 "scrolled": true
606 },
607 "outputs": [
608 {
609 "data": {
610 "text/plain": [
611 "(47,\n",
612 " '`base`, `bash`, `bask`, `bass`, `bast`, `bath`, `bosh`, `bush`, `case`, `cash`, `cask`, `cast`, `dash`, `dish`, `gash`, `gasp`, `gosh`, `gush`, `hash`, `hasp`, `hath`, `hush`, `lash`, `lass`, `last`, `lath`, `lush`, `mash`, `mask`, `mass`, `mast`, `math`, `mesh`, `mush`, `push`, `ramp`, `rasp`, `ruse`, `rush`, `rusk`, `rust`, `sash`, `sass`, `tush`, `wash`, `wasp`, `wish`')"
613 ]
614 },
615 "execution_count": 30,
616 "metadata": {},
617 "output_type": "execute_result"
618 }
619 ],
620 "source": [
621 "len(reachable_in('rash', 2)), ', '.join(sorted('`{}`'.format(r) for r in reachable_in('rash', 2)))"
622 ]
623 },
624 {
625 "cell_type": "code",
626 "execution_count": 31,
627 "metadata": {
628 "scrolled": true
629 },
630 "outputs": [
631 {
632 "data": {
633 "text/plain": [
634 "180"
635 ]
636 },
637 "execution_count": 31,
638 "metadata": {},
639 "output_type": "execute_result"
640 }
641 ],
642 "source": [
643 "len(reachable_in('rash', 3))"
644 ]
645 },
646 {
647 "cell_type": "code",
648 "execution_count": 32,
649 "metadata": {
650 "scrolled": true
651 },
652 "outputs": [
653 {
654 "data": {
655 "text/plain": [
656 "2195"
657 ]
658 },
659 "execution_count": 32,
660 "metadata": {},
661 "output_type": "execute_result"
662 }
663 ],
664 "source": [
665 "len(reachable_in('rash', 10))"
666 ]
667 },
668 {
669 "cell_type": "code",
670 "execution_count": 33,
671 "metadata": {
672 "scrolled": true
673 },
674 "outputs": [
675 {
676 "data": {
677 "text/plain": [
678 "2192"
679 ]
680 },
681 "execution_count": 33,
682 "metadata": {},
683 "output_type": "execute_result"
684 }
685 ],
686 "source": [
687 "len(reachable_in('vice', 10))"
688 ]
689 },
690 {
691 "cell_type": "code",
692 "execution_count": 34,
693 "metadata": {
694 "scrolled": true
695 },
696 "outputs": [
697 {
698 "name": "stdout",
699 "output_type": "stream",
700 "text": [
701 "100 loops, best of 3: 5.82 ms per loop\n"
702 ]
703 }
704 ],
705 "source": [
706 "%%timeit\n",
707 "len(reachable_in('rash', 10))"
708 ]
709 },
710 {
711 "cell_type": "code",
712 "execution_count": 35,
713 "metadata": {
714 "scrolled": true
715 },
716 "outputs": [
717 {
718 "name": "stdout",
719 "output_type": "stream",
720 "text": [
721 "100 loops, best of 3: 2.75 ms per loop\n"
722 ]
723 }
724 ],
725 "source": [
726 "%%timeit\n",
727 "len(reachable_in('rash', 10, trim_extras=True))"
728 ]
729 },
730 {
731 "cell_type": "code",
732 "execution_count": null,
733 "metadata": {
734 "collapsed": true
735 },
736 "outputs": [],
737 "source": []
738 }
739 ],
740 "metadata": {
741 "kernelspec": {
742 "display_name": "Python 3",
743 "language": "python",
744 "name": "python3"
745 },
746 "language_info": {
747 "codemirror_mode": {
748 "name": "ipython",
749 "version": 3
750 },
751 "file_extension": ".py",
752 "mimetype": "text/x-python",
753 "name": "python",
754 "nbconvert_exporter": "python",
755 "pygments_lexer": "ipython3",
756 "version": "3.5.2+"
757 }
758 },
759 "nbformat": 4,
760 "nbformat_minor": 2
761 }