X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=07-word-chains%2Fword-chain-solution.ipynb;fp=07-word-chains%2Fword-chain-solution.ipynb;h=8a87922bb6b57a44ab864fc9365df8122424c93f;hb=3afec0b916cae5ebd717b3d15c71ff9205e144f1;hp=0000000000000000000000000000000000000000;hpb=9df2a294ec8aeb0b48cdbc0e60a2ef88fd91dedf;p=ou-summer-of-code-2017.git diff --git a/07-word-chains/word-chain-solution.ipynb b/07-word-chains/word-chain-solution.ipynb new file mode 100644 index 0000000..8a87922 --- /dev/null +++ b/07-word-chains/word-chain-solution.ipynb @@ -0,0 +1,761 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Word chains\n", + "\n", + "\"Word chain\" puzzles are where you transform one word into another, by changing one letter at a time, with all the intermediate steps being valid words. \n", + "\n", + "For instance, you can transform 'rash' to 'jags' like this:\n", + "\n", + "```\n", + "rash\n", + "Bash\n", + "basS\n", + "baGs\n", + "Jags\n", + "```\n", + "\n", + "(the capital letter is the one changed in each step).\n", + "\n", + "## Part 1\n", + "\n", + "Given this [list of words](words4.txt), what is the minimum number of steps to go from `vice` to `wars`?" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import string\n", + "import heapq" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2336" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "words = [w.strip() for w in open('words4.txt').readlines()]\n", + "len(words)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def adjacents(word):\n", + " return [word[0:i] + l + word[i+1:]\n", + " for i in range(len(word))\n", + " for l in string.ascii_lowercase\n", + " if l != word[i]]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "neighbours = {w: [n for n in adjacents(w) if n in words]\n", + " for w in words}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def distance(w1, w2):\n", + " return sum(1 for i in range(len(w1))\n", + " if w1[i] != w2[i])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# def extend(chain):\n", + "# return [chain + [s] for s in neighbours[chain[-1]]\n", + "# if s not in chain]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def extend(chain, closed=None):\n", + " if closed:\n", + " nbrs = set(neighbours[chain[-1]]) - closed\n", + " else:\n", + " nbrs = neighbours[chain[-1]]\n", + " return [chain + [s] for s in nbrs\n", + " if s not in chain]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def extend_raw(chain):\n", + " nbrs = [w for w in adjacents(chain[-1]) if w in words]\n", + " return [chain + [s] for s in nbrs\n", + " if s not in chain]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def bfs_search(start, goal, debug=False):\n", + " agenda = [[start]]\n", + " finished = False\n", + " while not finished and agenda:\n", + " current = agenda[0]\n", + " if debug:\n", + " print(current)\n", + " if current[-1] == goal:\n", + " finished = True\n", + " else:\n", + " successors = extend(current)\n", + " agenda = agenda[1:] + successors\n", + " if agenda:\n", + " return current\n", + " else:\n", + " return None " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def bfs_search_closed(start, goal, debug=False):\n", + " agenda = [[start]]\n", + " closed = set()\n", + " finished = False\n", + " while not finished and agenda:\n", + " current = agenda[0]\n", + " if debug:\n", + " print(current)\n", + " if current[-1] == goal:\n", + " finished = True\n", + " else:\n", + " closed.add(current[-1])\n", + " successors = extend(current, closed)\n", + " agenda = agenda[1:] + successors\n", + " if agenda:\n", + " return current\n", + " else:\n", + " return None " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def dfs_search(start, goal, debug=False):\n", + " agenda = [[start]]\n", + " finished = False\n", + " while not finished and agenda:\n", + " current = agenda[0]\n", + " if debug:\n", + " print(current)\n", + " if current[-1] == goal:\n", + " finished = True\n", + " else:\n", + " successors = extend(current)\n", + " agenda = successors + agenda[1:]\n", + " if agenda:\n", + " return current\n", + " else:\n", + " return None " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def astar_search(start, goal, debug=False):\n", + " agenda = [(distance(start, goal), [start])]\n", + " heapq.heapify(agenda)\n", + " finished = False\n", + " while not finished and agenda:\n", + " _, current = heapq.heappop(agenda)\n", + " if debug:\n", + " print(current)\n", + " if current[-1] == goal:\n", + " finished = True\n", + " else:\n", + " successors = extend(current)\n", + " for s in successors:\n", + " heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n", + " if agenda:\n", + " return current\n", + " else:\n", + " return None " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Uses direct lookup of successors, rather than using cached neighbours in the dict\n", + "def astar_search_raw(start, goal, debug=False):\n", + " agenda = [(distance(start, goal), [start])]\n", + " heapq.heapify(agenda)\n", + " finished = False\n", + " while not finished and agenda:\n", + " _, current = heapq.heappop(agenda)\n", + " if debug:\n", + " print(current)\n", + " if current[-1] == goal:\n", + " finished = True\n", + " else:\n", + " successors = extend_raw(current) # Difference here\n", + " for s in successors:\n", + " heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n", + " if agenda:\n", + " return current\n", + " else:\n", + " return None " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def astar_search_closed(start, goal, debug=False):\n", + " agenda = [(distance(start, goal), [start])]\n", + " heapq.heapify(agenda)\n", + " closed = set()\n", + " finished = False\n", + " while not finished and agenda:\n", + " _, current = heapq.heappop(agenda)\n", + " if debug:\n", + " print(current)\n", + " if current[-1] == goal:\n", + " finished = True\n", + " else:\n", + " closed.add(current[-1])\n", + " successors = extend(current, closed)\n", + " for s in successors:\n", + " heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n", + " if agenda:\n", + " return current\n", + " else:\n", + " return None " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['vice', 'dice', 'dire', 'dare', 'ware', 'wars']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "astar_search('vice', 'wars')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['vice', 'dice', 'dire', 'dare', 'ware', 'wars']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "astar_search_raw('vice', 'wars')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(astar_search('vice', 'wars'))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(bfs_search('vice', 'wars'))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(bfs_search_closed('vice', 'wars'))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "793" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(dfs_search('vice', 'wars'))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10000 loops, best of 3: 158 µs per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "astar_search('vice', 'wars')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "100 loops, best of 3: 15.6 ms per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "astar_search_raw('vice', 'wars')" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10000 loops, best of 3: 168 µs per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "astar_search_closed('vice', 'wars')" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 loop, best of 3: 1min 40s per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "bfs_search('vice', 'wars')" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 loop, best of 3: 597 ms per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "bfs_search_closed('vice', 'wars')" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10 loops, best of 3: 85.5 ms per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "dfs_search('vice', 'wars')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 2\n", + "\n", + "The example shows that `jags` is reachable in four steps from `rash`. There are 11 words one step away from `rash`: \n", + "`bash`, `cash`, `dash`, `gash`, `hash`, `lash`, `mash`, `rasp`, `rush`, `sash`, and `wash`. \n", + "\n", + "There are 47 words reachable in one or two steps from `rash`. They are `base`, `bash`, `bask`, `bass`, `bast`, `bath`, `bosh`, `bush`, `case`, `cash`, `cask`, `cast`, `dash`, `dish`, `gash`, `gasp`, `gosh`, `gush`, `hash`, `hasp`, `hath`, `hush`, `lash`, `lass`, `last`, `lath`, `lush`, `mash`, `mask`, `mass`, `mast`, `math`, `mesh`, `mush`, `push`, `ramp`, `rasp`, `ruse`, `rush`, `rusk`, `rust`, `sash`, `sass`, `tush`, `wash`, `wasp`, and `wish`.\n", + "\n", + "There are 180 words reachable in up to three steps from `rash`.\n", + "\n", + "How many words are reachable in up to ten steps from `vice`?" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def reachable_in(word, n, trim_extras=False):\n", + " reachable = set()\n", + " boundary = set([word])\n", + " for i in range(n):\n", + " extras = set()\n", + " for w in boundary:\n", + " extras.update(neighbours[w])\n", + " if trim_extras:\n", + " extras.difference_update(reachable)\n", + " reachable.update(boundary)\n", + " boundary = extras.copy()\n", + " return reachable.union(extras).difference(set([word]))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(11,\n", + " '`bash`, `cash`, `dash`, `gash`, `hash`, `lash`, `mash`, `rasp`, `rush`, `sash`, `wash`')" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(reachable_in('rash', 1)), ', '.join(sorted('`{}`'.format(r) for r in reachable_in('rash', 1)))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(47,\n", + " '`base`, `bash`, `bask`, `bass`, `bast`, `bath`, `bosh`, `bush`, `case`, `cash`, `cask`, `cast`, `dash`, `dish`, `gash`, `gasp`, `gosh`, `gush`, `hash`, `hasp`, `hath`, `hush`, `lash`, `lass`, `last`, `lath`, `lush`, `mash`, `mask`, `mass`, `mast`, `math`, `mesh`, `mush`, `push`, `ramp`, `rasp`, `ruse`, `rush`, `rusk`, `rust`, `sash`, `sass`, `tush`, `wash`, `wasp`, `wish`')" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(reachable_in('rash', 2)), ', '.join(sorted('`{}`'.format(r) for r in reachable_in('rash', 2)))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "180" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(reachable_in('rash', 3))" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2195" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(reachable_in('rash', 10))" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2192" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(reachable_in('vice', 10))" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "100 loops, best of 3: 5.82 ms per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "len(reachable_in('rash', 10))" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "100 loops, best of 3: 2.75 ms per loop\n" + ] + } + ], + "source": [ + "%%timeit\n", + "len(reachable_in('rash', 10, trim_extras=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2+" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}