Added word filter comparisons
[cas-master-teacher-training.git] / hangman-better.ipynb
index 96c61fe786d71be1d42661beb4e49de12ccf2f16..7691ed14b7530fbf1cf00f6c45c88c0b9fbd6f6d 100644 (file)
@@ -1,7 +1,7 @@
 {
  "metadata": {
   "name": "",
-  "signature": "sha256:b93ed7f85302bd806b23e539f31a6d8afd7554cafe052439a06de6aa5a19b08d"
+  "signature": "sha256:07f366ec645d178071697c25c43d438fd80cf3a676ad9bd8d7613f5bfa36adf5"
  },
  "nbformat": 3,
  "nbformat_minor": 0,
@@ -44,7 +44,7 @@
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 12
+     "prompt_number": 3
     },
     {
      "cell_type": "code",
       "        return guess\n",
       "    \n",
       "    def play_game(self):\n",
-      "        self.do_turn()\n",
       "        while not self.game_finished:\n",
       "            self.do_turn()\n",
       "        if not self.player:\n",
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "55\n"
+        "43\n"
        ]
       }
      ],
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "336\n"
+        "316\n"
        ]
       }
      ],
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "4\n"
+        "5\n"
        ]
       }
      ],
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 17
+     "prompt_number": 11
     },
     {
      "cell_type": "code",
       {
        "metadata": {},
        "output_type": "pyout",
-       "prompt_number": 18,
+       "prompt_number": 12,
        "text": [
         "Counter({'s': 91332, 'e': 88692, 'i': 66900, 'a': 64468, 'r': 57460, 'n': 57128, 't': 52949, 'o': 49121, 'l': 40995, 'c': 31854, 'd': 28505, 'u': 26372, 'g': 22693, 'm': 22549, 'p': 22249, 'h': 19337, 'b': 15540, 'y': 12652, 'f': 10679, 'k': 8386, 'v': 8000, 'w': 7505, 'x': 2125, 'z': 2058, 'j': 1950, 'q': 1536})"
        ]
       }
      ],
-     "prompt_number": 18
+     "prompt_number": 12
     },
     {
      "cell_type": "code",
        ]
       }
      ],
-     "prompt_number": 19
+     "prompt_number": 13
     },
     {
      "cell_type": "code",
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "451\n"
+        "473\n"
        ]
       }
      ],
-     "prompt_number": 20
+     "prompt_number": 14
     },
     {
      "cell_type": "code",
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 33
+     "prompt_number": 15
     },
     {
      "cell_type": "code",
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "485\n"
+        "489\n"
        ]
       }
      ],
-     "prompt_number": 34
+     "prompt_number": 16
     },
     {
      "cell_type": "code",
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 51
+     "prompt_number": 17
     },
     {
      "cell_type": "code",
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "985\n"
+        "979\n"
        ]
       }
      ],
-     "prompt_number": 52
+     "prompt_number": 18
     },
     {
      "cell_type": "code",
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 50
+     "prompt_number": 19
     },
     {
      "cell_type": "code",
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 62
+     "prompt_number": 20
     },
     {
      "cell_type": "code",
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "491\n"
+        "512\n"
        ]
       }
      ],
-     "prompt_number": 63
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "g.player.candidate_words"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 59,
-       "text": [
-        "['a']"
-       ]
-      }
-     ],
-     "prompt_number": 59
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "g.wrong_letters"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 61,
-       "text": [
-        "['a']"
-       ]
-      }
-     ],
-     "prompt_number": 61
+     "prompt_number": 21
     },
     {
      "cell_type": "code",
       "                self.ordered_letters = self.ordered_letters[:ri] + self.ordered_letters[ri+1:]\n",
       "    \n",
       "    def filter_candidate_words(self, discovered, missed):\n",
-      "        if missed:\n",
-      "            exclusion_pattern = '(?!.*[' + ''.join(missed) + '])'\n",
+      "        attempted_letters = list(set(l.lower() for l in discovered + missed if l in string.ascii_letters))\n",
+      "        if attempted_letters:\n",
+      "            exclusion_pattern = '[^' + ''.join(attempted_letters) + ']'\n",
       "        else:\n",
-      "            exclusion_pattern = ''\n",
-      "        exp = re.compile('^' + exclusion_pattern + ''.join(discovered).replace('_', '.') + '$')\n",
+      "            exclusion_pattern = '.'\n",
+      "        exp = re.compile('^' + ''.join(discovered).replace('_', exclusion_pattern) + '$')\n",
       "        self.candidate_words = [w for w in self.candidate_words if exp.match(w)]\n",
       "        \n",
       "    def set_ordered_letters(self):\n",
       "        counts = collections.Counter(l.lower() for l in ''.join(self.candidate_words) if l in string.ascii_letters)\n",
-      "        self.ordered_letters = [p[0] for p in counts.most_common()]"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 109
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "def fcw(words, discovered, missed):\n",
-      "    if missed:\n",
-      "        exclusion_pattern = '(?!.*[' + ''.join(missed) + '])'\n",
-      "    else:\n",
-      "        exclusion_pattern = ''\n",
-      "    exp = re.compile('^' + exclusion_pattern + ''.join(discovered).replace('_', '.') + '$')\n",
-      "    return [w for w in words if exp.match(w)]"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 97
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "def fcwp(discovered, missed):\n",
-      "    if missed:\n",
-      "        exclusion_pattern = '(?!.*[' + ''.join(missed) + '])'\n",
-      "    else:\n",
-      "        exclusion_pattern = ''\n",
-      "    return '^' + exclusion_pattern + ''.join(discovered).replace('_', '.') + '$'"
+      "        self.ordered_letters = [p[0] for p in counts.most_common()]\n"
      ],
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 102
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "fcwp(['h', '_', 'p', '_'], ['x', 'w'])"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 103,
-       "text": [
-        "'^(?!.*[xw])h.p.$'"
-       ]
-      }
-     ],
-     "prompt_number": 103
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "re.match('^(?!.*[xw])h.p.$', 'hwpe')"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 101
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "re.match('^(?!.*[xw])h.p.$', 'hape')"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 104,
-       "text": [
-        "<_sre.SRE_Match object; span=(0, 4), match='hape'>"
-       ]
-      }
-     ],
-     "prompt_number": 104
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "fcw(WORDS, ['h', '_', 'p', '_'], ['x', 'w', 's'])"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "metadata": {},
-       "output_type": "pyout",
-       "prompt_number": 108,
-       "text": [
-        "['hope', 'hype', 'hypo']"
-       ]
-      }
-     ],
-     "prompt_number": 108
+     "prompt_number": 22
     },
     {
      "cell_type": "code",
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "992\n"
+        "990\n"
        ]
       }
      ],
-     "prompt_number": 110
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "%%timeit\n",
-      "\n",
-      "wins = 0\n",
-      "for _ in range(1000):\n",
-      "    g = Game(random.choice(WORDS), player=PlayerAdaptiveIncludedLetters(WORDS))\n",
-      "    g.play_game()\n",
-      "    if g.game_won:\n",
-      "        wins += 1\n",
-      "print(wins)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "984\n",
-        "979"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "982"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "979"
-       ]
-      },
-      {
-       "output_type": "stream",
-       "stream": "stdout",
-       "text": [
-        "\n",
-        "1 loops, best of 3: 52.9 s per loop\n"
-       ]
-      }
-     ],
-     "prompt_number": 111
+     "prompt_number": 23
     },
     {
      "cell_type": "code",
        "stream": "stdout",
        "text": [
         "986\n",
-        "991"
+        "996"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "989"
+        "991"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "989"
+        "990"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "1 loops, best of 3: 44.7 s per loop\n"
+        "1 loops, best of 3: 57.2 s per loop\n"
        ]
       }
      ],
-     "prompt_number": 112
+     "prompt_number": 24
     },
     {
      "cell_type": "code",
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 174
+     "prompt_number": 25
     },
     {
      "cell_type": "code",
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 175
+     "prompt_number": 26
     },
     {
      "cell_type": "code",
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 176
+     "prompt_number": 27
     },
     {
      "cell_type": "code",
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 177
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "class PlayerAdaptivePatternNegLookahead(PlayerAdaptive):\n",
-      "    def filter_candidate_words(self, discovered, missed):\n",
-      "        if missed:\n",
-      "            exclusion_pattern = '(?!.*[' + ''.join(missed) + '])'\n",
-      "        else:\n",
-      "            exclusion_pattern = ''\n",
-      "        exp = re.compile('^' + exclusion_pattern + ''.join(discovered).replace('_', '.') + '$')\n",
-      "        self.candidate_words = [w for w in self.candidate_words if exp.match(w)]"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 195
+     "prompt_number": 28
     },
     {
      "cell_type": "code",
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 196
+     "prompt_number": 29
     },
     {
      "cell_type": "code",
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "453\n",
-        "494"
+        "463\n",
+        "492"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "505"
+        "451"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "477"
+        "448"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "1 loops, best of 3: 24.3 s per loop\n"
+        "1 loops, best of 3: 30.9 s per loop\n"
        ]
       }
      ],
-     "prompt_number": 179
+     "prompt_number": 30
     },
     {
      "cell_type": "code",
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "984\n",
-        "983"
+        "985\n",
+        "972"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "985"
+        "976"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "982"
+        "986"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "1 loops, best of 3: 52.9 s per loop\n"
+        "1 loops, best of 3: 1min 8s per loop\n"
        ]
       }
      ],
-     "prompt_number": 180
+     "prompt_number": 31
     },
     {
      "cell_type": "code",
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "535\n",
-        "509"
+        "502\n",
+        "491"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "519"
+        "537"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "507"
+        "524"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "1 loops, best of 3: 11min 14s per loop\n"
+        "1 loops, best of 3: 13min 6s per loop\n"
        ]
       }
      ],
-     "prompt_number": 181
+     "prompt_number": 32
     },
     {
      "cell_type": "code",
        "stream": "stdout",
        "text": [
         "993\n",
-        "990"
+        "991"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "992"
+        "994"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "994"
+        "989"
        ]
       },
       {
        "stream": "stdout",
        "text": [
         "\n",
-        "1 loops, best of 3: 44.1 s per loop\n"
+        "1 loops, best of 3: 56.6 s per loop\n"
        ]
       }
      ],
-     "prompt_number": 197
+     "prompt_number": 33
     },
     {
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "%%timeit\n",
-      "\n",
-      "wins = 0\n",
       "for _ in range(1000):\n",
-      "    g = Game(random.choice(WORDS), player=PlayerAdaptivePatternNegLookahead(WORDS))\n",
+      "    g = Game(random.choice(WORDS), player=PlayerAdaptivePattern(WORDS))\n",
       "    g.play_game()\n",
-      "    if g.game_won:\n",
-      "        wins += 1\n",
-      "print(wins)"
+      "    if not g.game_won:\n",
+      "        print(g.target, g.discovered, g.wrong_letters)"
      ],
      "language": "python",
      "metadata": {},
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "989\n",
-        "993"
+        "jutting ['_', 'u', 't', 't', 'i', 'n', 'g'] ['e', 'a', 'o', 'l', 's', 'f', 'p', 'b', 'c', 'r']\n",
+        "faze"
        ]
       },
       {
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "\n",
-        "994"
+        " ['_', 'a', '_', 'e'] ['r', 'l', 'm', 'p', 's', 'g', 'b', 'd', 'v', 'k']\n",
+        "fate"
        ]
       },
       {
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "\n",
-        "993"
+        " ['_', 'a', '_', 'e'] ['r', 'l', 'm', 'p', 's', 'g', 'b', 'd', 'v', 'k']\n",
+        "dunk"
        ]
       },
       {
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "\n",
-        "1 loops, best of 3: 46 s per loop\n"
+        " ['_', 'u', 'n', 'k'] ['e', 's', 'o', 'a', 'i', 'l', 'f', 'r', 'j', 'p']\n",
+        "loons"
        ]
-      }
-     ],
-     "prompt_number": 198
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "for _ in range(1000):\n",
-      "    g = Game(random.choice(WORDS), player=PlayerAdaptivePattern(WORDS))\n",
-      "    g.play_game()\n",
-      "    if not g.game_won:\n",
-      "        print(g.target, g.discovered, g.wrong_letters)"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
+      },
       {
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "rutted ['_', 'u', 't', 't', 'e', 'd'] ['a', 'o', 'i', 'l', 's', 'g', 'b', 'j', 'n', 'p']\n",
-        "cur"
+        " ['_', 'o', 'o', 'n', 's'] ['e', 't', 'k', 'm', 'p', 'd', 'f', 'c', 'b', 'g']\n",
+        "lab"
        ]
       },
       {
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        " ['_', 'u', '_'] ['a', 'o', 'e', 'i', 'b', 'g', 'n', 'm', 'p', 't']\n",
-        "wiles"
+        " ['_', 'a', 'b'] ['t', 'p', 'g', 'w', 'm', 'd', 'y', 'r', 'c', 'n']\n",
+        "joked"
        ]
       },
       {
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        " ['_', 'i', '_', 'e', 's'] ['a', 'm', 'n', 'v', 't', 'r', 'k', 'f', 'p', 'd']\n",
-        "oak"
+        " ['_', 'o', 'k', 'e', 'd'] ['s', 'a', 'w', 'p', 't', 'r', 'h', 'b', 'y', 'c']\n"
        ]
-      },
+      }
+     ],
+     "prompt_number": 34
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "iterations = 10000\n",
+      "wins = 0\n",
+      "for _ in range(iterations):\n",
+      "    g = Game(random.choice(WORDS), player=PlayerAdaptivePattern(WORDS))\n",
+      "    g.play_game()\n",
+      "    if g.game_won:\n",
+      "        wins += 1\n",
+      "print(wins / iterations)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
       {
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        " ['_', 'a', '_'] ['t', 'p', 'g', 'w', 'd', 'y', 'r', 'm', 'b', 's']\n"
+        "0.9923\n"
        ]
       }
      ],
-     "prompt_number": 217
+     "prompt_number": 35
     },
     {
      "cell_type": "code",