Added a new day 3, rearranged days
[ou-summer-of-code-2017.git] / 07-word-chains / word-chain-solution.ipynb
diff --git a/07-word-chains/word-chain-solution.ipynb b/07-word-chains/word-chain-solution.ipynb
new file mode 100644 (file)
index 0000000..8a87922
--- /dev/null
@@ -0,0 +1,761 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Word chains\n",
+    "\n",
+    "\"Word chain\" puzzles are where you transform one word into another, by changing one letter at a time, with all the intermediate steps being valid words. \n",
+    "\n",
+    "For instance, you can transform 'rash' to 'jags' like this:\n",
+    "\n",
+    "```\n",
+    "rash\n",
+    "Bash\n",
+    "basS\n",
+    "baGs\n",
+    "Jags\n",
+    "```\n",
+    "\n",
+    "(the capital letter is the one changed in each step).\n",
+    "\n",
+    "## Part 1\n",
+    "\n",
+    "Given this [list of words](words4.txt), what is the minimum number of steps to go from `vice` to `wars`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import string\n",
+    "import heapq"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2336"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "words = [w.strip() for w in open('words4.txt').readlines()]\n",
+    "len(words)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def adjacents(word):\n",
+    "    return [word[0:i] + l + word[i+1:]\n",
+    "           for i in range(len(word))\n",
+    "           for l in string.ascii_lowercase\n",
+    "           if l != word[i]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "neighbours = {w: [n for n in adjacents(w) if n in words]\n",
+    "             for w in words}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def distance(w1, w2):\n",
+    "    return sum(1 for i in range(len(w1))\n",
+    "               if w1[i] != w2[i])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# def extend(chain):\n",
+    "#     return [chain + [s] for s in neighbours[chain[-1]]\n",
+    "#            if s not in chain]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def extend(chain, closed=None):\n",
+    "    if closed:\n",
+    "        nbrs = set(neighbours[chain[-1]]) - closed\n",
+    "    else:\n",
+    "        nbrs = neighbours[chain[-1]]\n",
+    "    return [chain + [s] for s in nbrs\n",
+    "           if s not in chain]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def extend_raw(chain):\n",
+    "    nbrs = [w for w in adjacents(chain[-1]) if w in words]\n",
+    "    return [chain + [s] for s in nbrs\n",
+    "           if s not in chain]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def bfs_search(start, goal, debug=False):\n",
+    "    agenda = [[start]]\n",
+    "    finished = False\n",
+    "    while not finished and agenda:\n",
+    "        current = agenda[0]\n",
+    "        if debug:\n",
+    "            print(current)\n",
+    "        if current[-1] == goal:\n",
+    "            finished = True\n",
+    "        else:\n",
+    "            successors = extend(current)\n",
+    "            agenda = agenda[1:] + successors\n",
+    "    if agenda:\n",
+    "        return current\n",
+    "    else:\n",
+    "        return None        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def bfs_search_closed(start, goal, debug=False):\n",
+    "    agenda = [[start]]\n",
+    "    closed = set()\n",
+    "    finished = False\n",
+    "    while not finished and agenda:\n",
+    "        current = agenda[0]\n",
+    "        if debug:\n",
+    "            print(current)\n",
+    "        if current[-1] == goal:\n",
+    "            finished = True\n",
+    "        else:\n",
+    "            closed.add(current[-1])\n",
+    "            successors = extend(current, closed)\n",
+    "            agenda = agenda[1:] + successors\n",
+    "    if agenda:\n",
+    "        return current\n",
+    "    else:\n",
+    "        return None   "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def dfs_search(start, goal, debug=False):\n",
+    "    agenda = [[start]]\n",
+    "    finished = False\n",
+    "    while not finished and agenda:\n",
+    "        current = agenda[0]\n",
+    "        if debug:\n",
+    "            print(current)\n",
+    "        if current[-1] == goal:\n",
+    "            finished = True\n",
+    "        else:\n",
+    "            successors = extend(current)\n",
+    "            agenda = successors + agenda[1:]\n",
+    "    if agenda:\n",
+    "        return current\n",
+    "    else:\n",
+    "        return None        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def astar_search(start, goal, debug=False):\n",
+    "    agenda = [(distance(start, goal), [start])]\n",
+    "    heapq.heapify(agenda)\n",
+    "    finished = False\n",
+    "    while not finished and agenda:\n",
+    "        _, current = heapq.heappop(agenda)\n",
+    "        if debug:\n",
+    "            print(current)\n",
+    "        if current[-1] == goal:\n",
+    "            finished = True\n",
+    "        else:\n",
+    "            successors = extend(current)\n",
+    "            for s in successors:\n",
+    "                heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n",
+    "    if agenda:\n",
+    "        return current\n",
+    "    else:\n",
+    "        return None        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Uses direct lookup of successors, rather than using cached neighbours in the dict\n",
+    "def astar_search_raw(start, goal, debug=False):\n",
+    "    agenda = [(distance(start, goal), [start])]\n",
+    "    heapq.heapify(agenda)\n",
+    "    finished = False\n",
+    "    while not finished and agenda:\n",
+    "        _, current = heapq.heappop(agenda)\n",
+    "        if debug:\n",
+    "            print(current)\n",
+    "        if current[-1] == goal:\n",
+    "            finished = True\n",
+    "        else:\n",
+    "            successors = extend_raw(current) # Difference here\n",
+    "            for s in successors:\n",
+    "                heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n",
+    "    if agenda:\n",
+    "        return current\n",
+    "    else:\n",
+    "        return None        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def astar_search_closed(start, goal, debug=False):\n",
+    "    agenda = [(distance(start, goal), [start])]\n",
+    "    heapq.heapify(agenda)\n",
+    "    closed = set()\n",
+    "    finished = False\n",
+    "    while not finished and agenda:\n",
+    "        _, current = heapq.heappop(agenda)\n",
+    "        if debug:\n",
+    "            print(current)\n",
+    "        if current[-1] == goal:\n",
+    "            finished = True\n",
+    "        else:\n",
+    "            closed.add(current[-1])\n",
+    "            successors = extend(current, closed)\n",
+    "            for s in successors:\n",
+    "                heapq.heappush(agenda, (len(current) + distance(s[-1], goal) - 1, s))\n",
+    "    if agenda:\n",
+    "        return current\n",
+    "    else:\n",
+    "        return None   "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['vice', 'dice', 'dire', 'dare', 'ware', 'wars']"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "astar_search('vice', 'wars')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['vice', 'dice', 'dire', 'dare', 'ware', 'wars']"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "astar_search_raw('vice', 'wars')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "6"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(astar_search('vice', 'wars'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "6"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(bfs_search('vice', 'wars'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "6"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(bfs_search_closed('vice', 'wars'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "793"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(dfs_search('vice', 'wars'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "10000 loops, best of 3: 158 µs per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "astar_search('vice', 'wars')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "100 loops, best of 3: 15.6 ms per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "astar_search_raw('vice', 'wars')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "10000 loops, best of 3: 168 µs per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "astar_search_closed('vice', 'wars')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 1min 40s per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "bfs_search('vice', 'wars')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 loop, best of 3: 597 ms per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "bfs_search_closed('vice', 'wars')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "10 loops, best of 3: 85.5 ms per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "dfs_search('vice', 'wars')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Part 2\n",
+    "\n",
+    "The example shows that `jags` is reachable in four steps from `rash`. There are 11 words one step away from `rash`: \n",
+    "`bash`, `cash`, `dash`, `gash`, `hash`, `lash`, `mash`, `rasp`, `rush`, `sash`, and `wash`. \n",
+    "\n",
+    "There are 47 words reachable in one or two steps from `rash`. They are `base`, `bash`, `bask`, `bass`, `bast`, `bath`, `bosh`, `bush`, `case`, `cash`, `cask`, `cast`, `dash`, `dish`, `gash`, `gasp`, `gosh`, `gush`, `hash`, `hasp`, `hath`, `hush`, `lash`, `lass`, `last`, `lath`, `lush`, `mash`, `mask`, `mass`, `mast`, `math`, `mesh`, `mush`, `push`, `ramp`, `rasp`, `ruse`, `rush`, `rusk`, `rust`, `sash`, `sass`, `tush`, `wash`, `wasp`, and `wish`.\n",
+    "\n",
+    "There are 180 words reachable in up to three steps from `rash`.\n",
+    "\n",
+    "How many words are reachable in up to ten steps from `vice`?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def reachable_in(word, n, trim_extras=False):\n",
+    "    reachable = set()\n",
+    "    boundary = set([word])\n",
+    "    for i in range(n):\n",
+    "        extras = set()\n",
+    "        for w in boundary:\n",
+    "            extras.update(neighbours[w])\n",
+    "        if trim_extras:\n",
+    "            extras.difference_update(reachable)\n",
+    "        reachable.update(boundary)\n",
+    "        boundary = extras.copy()\n",
+    "    return reachable.union(extras).difference(set([word]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(11,\n",
+       " '`bash`, `cash`, `dash`, `gash`, `hash`, `lash`, `mash`, `rasp`, `rush`, `sash`, `wash`')"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(reachable_in('rash', 1)), ', '.join(sorted('`{}`'.format(r) for r in reachable_in('rash', 1)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(47,\n",
+       " '`base`, `bash`, `bask`, `bass`, `bast`, `bath`, `bosh`, `bush`, `case`, `cash`, `cask`, `cast`, `dash`, `dish`, `gash`, `gasp`, `gosh`, `gush`, `hash`, `hasp`, `hath`, `hush`, `lash`, `lass`, `last`, `lath`, `lush`, `mash`, `mask`, `mass`, `mast`, `math`, `mesh`, `mush`, `push`, `ramp`, `rasp`, `ruse`, `rush`, `rusk`, `rust`, `sash`, `sass`, `tush`, `wash`, `wasp`, `wish`')"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(reachable_in('rash', 2)), ', '.join(sorted('`{}`'.format(r) for r in reachable_in('rash', 2)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "180"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(reachable_in('rash', 3))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2195"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(reachable_in('rash', 10))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2192"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(reachable_in('vice', 10))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "100 loops, best of 3: 5.82 ms per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "len(reachable_in('rash', 10))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "100 loops, best of 3: 2.75 ms per loop\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "len(reachable_in('rash', 10, trim_extras=True))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2+"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}