Updated notebook versions
[cas-master-teacher-training.git] / hangman / 04-hangman-better.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "## Better strategies\n",
8 "We've got the plumbing all working. Now to play and come up with some better playing strategies. \n",
9 "\n",
10 "First, some repetition from the previous section."
11 ]
12 },
13 {
14 "cell_type": "code",
15 "execution_count": 1,
16 "metadata": {
17 "collapsed": false
18 },
19 "outputs": [],
20 "source": [
21 "import re\n",
22 "import random\n",
23 "import string\n",
24 "import collections"
25 ]
26 },
27 {
28 "cell_type": "code",
29 "execution_count": 2,
30 "metadata": {
31 "collapsed": false
32 },
33 "outputs": [],
34 "source": [
35 "WORDS = [w.strip() for w in open('/usr/share/dict/british-english').readlines() \n",
36 " if re.match(r'^[a-z]*$', w.strip())]"
37 ]
38 },
39 {
40 "cell_type": "code",
41 "execution_count": 3,
42 "metadata": {
43 "collapsed": false
44 },
45 "outputs": [],
46 "source": [
47 "LETTER_COUNTS = collections.Counter(l.lower() for l in open('../sherlock-holmes.txt').read() if l in string.ascii_letters)\n",
48 "LETTERS_IN_ORDER = [p[0] for p in LETTER_COUNTS.most_common()]"
49 ]
50 },
51 {
52 "cell_type": "code",
53 "execution_count": 4,
54 "metadata": {
55 "collapsed": false
56 },
57 "outputs": [],
58 "source": [
59 "STARTING_LIVES = 10"
60 ]
61 },
62 {
63 "cell_type": "code",
64 "execution_count": 5,
65 "metadata": {
66 "collapsed": false
67 },
68 "outputs": [],
69 "source": [
70 "class Game:\n",
71 " def __init__(self, target, player=None, lives=STARTING_LIVES):\n",
72 " self.lives = lives\n",
73 " self.player = player\n",
74 " self.target = target\n",
75 " self.discovered = list('_' * len(target))\n",
76 " self.wrong_letters = []\n",
77 " self.game_finished = False\n",
78 " self.game_won = False\n",
79 " self.game_lost = False\n",
80 " \n",
81 " def find_all(self, letter):\n",
82 " return [p for p, l in enumerate(self.target) if l == letter]\n",
83 " \n",
84 " def update_discovered_word(self, guessed_letter):\n",
85 " locations = self.find_all(guessed_letter)\n",
86 " for location in locations:\n",
87 " self.discovered[location] = guessed_letter\n",
88 " return self.discovered\n",
89 " \n",
90 " def do_turn(self):\n",
91 " if self.player:\n",
92 " guess = self.player.guess(self.discovered, self.wrong_letters, self.lives)\n",
93 " else:\n",
94 " guess = self.ask_for_guess()\n",
95 " if guess in self.target:\n",
96 " self.update_discovered_word(guess)\n",
97 " else:\n",
98 " self.lives -= 1\n",
99 " if guess not in self.wrong_letters:\n",
100 " self.wrong_letters += [guess]\n",
101 " if self.lives == 0:\n",
102 " self.game_finished = True\n",
103 " self.game_lost = True\n",
104 " if '_' not in self.discovered:\n",
105 " self.game_finished = True\n",
106 " self.game_won = True\n",
107 " \n",
108 " def ask_for_guess(self):\n",
109 " print('Word:', ' '.join(self.discovered), \n",
110 " ' : Lives =', self.lives, \n",
111 " ', wrong guesses:', ' '.join(sorted(self.wrong_letters)))\n",
112 " guess = input('Enter letter: ').strip().lower()[0]\n",
113 " return guess\n",
114 " \n",
115 " def play_game(self):\n",
116 " while not self.game_finished:\n",
117 " self.do_turn()\n",
118 " if not self.player:\n",
119 " self.report_on_game()\n",
120 " return self.game_won\n",
121 " \n",
122 " def report_on_game(self):\n",
123 " if self.game_won:\n",
124 " print('You won! The word was', self.target)\n",
125 " else:\n",
126 " print('You lost. The word was', self.target)\n",
127 " return self.game_won"
128 ]
129 },
130 {
131 "cell_type": "code",
132 "execution_count": 6,
133 "metadata": {
134 "collapsed": false
135 },
136 "outputs": [],
137 "source": [
138 "class PlayerFixedOrder:\n",
139 " def __init__(self, ordered_letters):\n",
140 " self.ordered_letters = ordered_letters\n",
141 " \n",
142 " def guess(self, discovered, missed, lives):\n",
143 " return [l for l in self.ordered_letters if l not in discovered + missed][0]"
144 ]
145 },
146 {
147 "cell_type": "code",
148 "execution_count": 7,
149 "metadata": {
150 "collapsed": false
151 },
152 "outputs": [],
153 "source": [
154 "class PlayerAlphabetical(PlayerFixedOrder):\n",
155 " def __init__(self):\n",
156 " super().__init__(string.ascii_lowercase)\n",
157 "\n",
158 "class PlayerFreqOrdered(PlayerFixedOrder):\n",
159 " def __init__(self):\n",
160 " super().__init__(LETTERS_IN_ORDER)\n"
161 ]
162 },
163 {
164 "cell_type": "markdown",
165 "metadata": {},
166 "source": [
167 "## Timing runs\n",
168 "Use the IPython `timeit` cellmagic to time runs."
169 ]
170 },
171 {
172 "cell_type": "code",
173 "execution_count": 8,
174 "metadata": {
175 "collapsed": false
176 },
177 "outputs": [
178 {
179 "name": "stdout",
180 "output_type": "stream",
181 "text": [
182 "49\n",
183 "45\n",
184 "54\n",
185 "54\n",
186 "1 loops, best of 3: 207 ms per loop\n"
187 ]
188 }
189 ],
190 "source": [
191 "%%timeit\n",
192 "\n",
193 "wins = 0\n",
194 "for _ in range(1000):\n",
195 " g = Game(random.choice(WORDS), player=PlayerAlphabetical())\n",
196 " g.play_game()\n",
197 " if g.game_won:\n",
198 " wins += 1\n",
199 "print(wins)"
200 ]
201 },
202 {
203 "cell_type": "code",
204 "execution_count": 9,
205 "metadata": {
206 "collapsed": false
207 },
208 "outputs": [
209 {
210 "name": "stdout",
211 "output_type": "stream",
212 "text": [
213 "305\n",
214 "331\n",
215 "327\n",
216 "316\n",
217 "1 loops, best of 3: 212 ms per loop\n"
218 ]
219 }
220 ],
221 "source": [
222 "%%timeit\n",
223 "\n",
224 "wins = 0\n",
225 "for _ in range(1000):\n",
226 " g = Game(random.choice(WORDS), player=PlayerFreqOrdered())\n",
227 " g.play_game()\n",
228 " if g.game_won:\n",
229 " wins += 1\n",
230 "print(wins)"
231 ]
232 },
233 {
234 "cell_type": "code",
235 "execution_count": 10,
236 "metadata": {
237 "collapsed": false
238 },
239 "outputs": [
240 {
241 "name": "stdout",
242 "output_type": "stream",
243 "text": [
244 "8\n",
245 "8\n",
246 "15\n",
247 "8\n",
248 "1 loops, best of 3: 198 ms per loop\n"
249 ]
250 }
251 ],
252 "source": [
253 "%%timeit\n",
254 "\n",
255 "wins = 0\n",
256 "for _ in range(1000):\n",
257 " g = Game(random.choice(WORDS), player=PlayerFixedOrder(list(reversed(string.ascii_lowercase))))\n",
258 " g.play_game()\n",
259 " if g.game_won:\n",
260 " wins += 1\n",
261 "print(wins)"
262 ]
263 },
264 {
265 "cell_type": "markdown",
266 "metadata": {},
267 "source": [
268 "## Better strategy 1: letters in dictionary frequency order\n",
269 "When we're looking at letter frequencies, we've been looking at running English text. But the words being preseted to the player aren't drawn randomly from novels: they're taken from the dictionary. That means our frequencies are off because we're looking at the wrong distribution of words.\n",
270 "\n",
271 "For example, 'e' and 't' are common in running text because words like 'the' occur very frequently. But 'the' only occurs once in the dictionary. Let's sample letters from the dictionary and see if that makes a difference."
272 ]
273 },
274 {
275 "cell_type": "code",
276 "execution_count": 11,
277 "metadata": {
278 "collapsed": false
279 },
280 "outputs": [],
281 "source": [
282 "DICT_COUNTS = collections.Counter(''.join(WORDS))\n",
283 "DICT_LETTERS_IN_ORDER = [p[0] for p in DICT_COUNTS.most_common()]"
284 ]
285 },
286 {
287 "cell_type": "code",
288 "execution_count": 12,
289 "metadata": {
290 "collapsed": false
291 },
292 "outputs": [
293 {
294 "data": {
295 "text/plain": [
296 "Counter({'e': 60732, 's': 47997, 'i': 45470, 'a': 38268, 'r': 37522, 'n': 36868, 't': 36009, 'o': 31004, 'l': 26902, 'c': 21061, 'd': 20764, 'u': 17682, 'g': 16560, 'p': 15240, 'm': 13855, 'h': 11662, 'b': 9866, 'y': 7877, 'f': 7430, 'v': 5325, 'k': 4829, 'w': 4757, 'x': 1425, 'q': 1020, 'z': 979, 'j': 965})"
297 ]
298 },
299 "execution_count": 12,
300 "metadata": {},
301 "output_type": "execute_result"
302 }
303 ],
304 "source": [
305 "DICT_COUNTS"
306 ]
307 },
308 {
309 "cell_type": "code",
310 "execution_count": 13,
311 "metadata": {
312 "collapsed": false
313 },
314 "outputs": [
315 {
316 "name": "stdout",
317 "output_type": "stream",
318 "text": [
319 "['e', 's', 'i', 'a', 'r', 'n', 't', 'o', 'l', 'c', 'd', 'u', 'g', 'p', 'm', 'h', 'b', 'y', 'f', 'v', 'k', 'w', 'x', 'q', 'z', 'j']\n",
320 "['e', 't', 'a', 'o', 'i', 'h', 'n', 's', 'r', 'd', 'l', 'u', 'm', 'w', 'c', 'y', 'f', 'g', 'p', 'b', 'v', 'k', 'x', 'j', 'q', 'z']\n"
321 ]
322 }
323 ],
324 "source": [
325 "print(DICT_LETTERS_IN_ORDER)\n",
326 "print(LETTERS_IN_ORDER)"
327 ]
328 },
329 {
330 "cell_type": "markdown",
331 "metadata": {},
332 "source": [
333 "The letter orders are certainly different. Let's see if it make the player more effective than the ~30% of `FreqOrder`."
334 ]
335 },
336 {
337 "cell_type": "code",
338 "execution_count": 14,
339 "metadata": {
340 "collapsed": false
341 },
342 "outputs": [
343 {
344 "name": "stdout",
345 "output_type": "stream",
346 "text": [
347 "463\n"
348 ]
349 }
350 ],
351 "source": [
352 "wins = 0\n",
353 "for _ in range(1000):\n",
354 " g = Game(random.choice(WORDS), player=PlayerFixedOrder(DICT_LETTERS_IN_ORDER))\n",
355 " g.play_game()\n",
356 " if g.game_won:\n",
357 " wins += 1\n",
358 "print(wins)"
359 ]
360 },
361 {
362 "cell_type": "markdown",
363 "metadata": {},
364 "source": [
365 "For convenience, we can wrap this new behaviour up in its own class."
366 ]
367 },
368 {
369 "cell_type": "code",
370 "execution_count": 15,
371 "metadata": {
372 "collapsed": false
373 },
374 "outputs": [],
375 "source": [
376 "class PlayerDictOrder(PlayerFixedOrder):\n",
377 " def __init__(self):\n",
378 " super().__init__(DICT_LETTERS_IN_ORDER)"
379 ]
380 },
381 {
382 "cell_type": "code",
383 "execution_count": 16,
384 "metadata": {
385 "collapsed": false
386 },
387 "outputs": [
388 {
389 "name": "stdout",
390 "output_type": "stream",
391 "text": [
392 "474\n",
393 "460\n",
394 "490\n",
395 "467\n",
396 "1 loops, best of 3: 220 ms per loop\n"
397 ]
398 }
399 ],
400 "source": [
401 "%%timeit\n",
402 "\n",
403 "wins = 0\n",
404 "for _ in range(1000):\n",
405 " g = Game(random.choice(WORDS), player=PlayerDictOrder())\n",
406 " g.play_game()\n",
407 " if g.game_won:\n",
408 " wins += 1\n",
409 "print(wins)"
410 ]
411 },
412 {
413 "cell_type": "markdown",
414 "metadata": {},
415 "source": [
416 "That was worth it!"
417 ]
418 },
419 {
420 "cell_type": "markdown",
421 "metadata": {},
422 "source": [
423 "## Better strategy 2: paying attention the target word\n",
424 "The strategies so far haven't paid any attention to the target word. Can we use information about the target to help refine our guesses?\n",
425 "\n",
426 "We've just seen that sampling letters from the dictionary works better than other approaches. But if we're presented with a seven letter word, we don't need to pay attention to the letter occurences in three- or ten-letter words. \n",
427 "\n",
428 "Let's build our first \"adaptive\" player, one that adapts its behaviour to what's happening in this game. \n",
429 "\n",
430 "The idea is that the player will hold internally a set of `candidate_words` that could match what it knows about the target. It will then count the letters in that set and pick the most frequent, but so-far-unguessed, letter.\n",
431 "\n",
432 "When the player is created, the it's give `all_words` it can chose from, but `candidate_words` is non-existent. As soon as it's given a word, it can filter `all_words` so that `candidate_words` are just those words with the same length as the `discovered` word. From the `candidate_words`, the player can create its own set of `ordered_letters`. Guesses are pulled from the `ordered_letters` the same as before."
433 ]
434 },
435 {
436 "cell_type": "code",
437 "execution_count": 17,
438 "metadata": {
439 "collapsed": false
440 },
441 "outputs": [],
442 "source": [
443 "class PlayerAdaptiveLength:\n",
444 " def __init__(self, words):\n",
445 " self.all_words = words\n",
446 " self.candidate_words = None\n",
447 " \n",
448 " def guess(self, discovered, missed, lives):\n",
449 " if not self.candidate_words:\n",
450 " self.set_ordered_letters(len(discovered))\n",
451 " return [l for l in self.ordered_letters if l not in discovered + missed][0]\n",
452 " \n",
453 " def set_ordered_letters(self, word_len):\n",
454 " self.candidate_words = [w for w in self.all_words if len(w) == word_len]\n",
455 " counts = collections.Counter(l.lower() for l in ''.join(self.candidate_words) if l in string.ascii_letters)\n",
456 " self.ordered_letters = [p[0] for p in counts.most_common()]"
457 ]
458 },
459 {
460 "cell_type": "code",
461 "execution_count": 18,
462 "metadata": {
463 "collapsed": false
464 },
465 "outputs": [
466 {
467 "name": "stdout",
468 "output_type": "stream",
469 "text": [
470 "452\n"
471 ]
472 }
473 ],
474 "source": [
475 "wins = 0\n",
476 "for _ in range(1000):\n",
477 " g = Game(random.choice(WORDS), player=PlayerAdaptiveLength(WORDS))\n",
478 " g.play_game()\n",
479 " if g.game_won:\n",
480 " wins += 1\n",
481 "print(wins)"
482 ]
483 },
484 {
485 "cell_type": "code",
486 "execution_count": 19,
487 "metadata": {
488 "collapsed": false
489 },
490 "outputs": [
491 {
492 "name": "stdout",
493 "output_type": "stream",
494 "text": [
495 "480\n",
496 "444\n",
497 "504\n",
498 "463\n",
499 "1 loops, best of 3: 18.5 s per loop\n"
500 ]
501 }
502 ],
503 "source": [
504 "%%timeit\n",
505 "\n",
506 "wins = 0\n",
507 "for _ in range(1000):\n",
508 " g = Game(random.choice(WORDS), player=PlayerAdaptiveLength(WORDS))\n",
509 " g.play_game()\n",
510 " if g.game_won:\n",
511 " wins += 1\n",
512 "print(wins)"
513 ]
514 },
515 {
516 "cell_type": "markdown",
517 "metadata": {},
518 "source": [
519 "It's a bit better, but quite a lot slower. Can we improve the performance, to offset the cost of the slower game?"
520 ]
521 },
522 {
523 "cell_type": "markdown",
524 "metadata": {},
525 "source": [
526 "## Better strategy 3: using discovered letters\n",
527 "Once we've made some successful guesses, we know more about the `target` and we can use that information to further filter the `candiate_words`. For instance, if we know the `target` is seven letters with an 'e' in the second and fourth positions, there are probably very few candiate words to choose from.\n",
528 "\n",
529 "The challenge here is to filter the `candidate_words` depending on the letter pattern. That's non-trivial, so we'll look at how to do that bit first.\n",
530 "\n",
531 "(This matching is the sort of thing that regular expressions are designed for, but I think there are enough new ideas in this material so far. See the end of this notebook for implementations of the adaptive players that use regexes.)"
532 ]
533 },
534 {
535 "cell_type": "markdown",
536 "metadata": {},
537 "source": [
538 "### Matching\n",
539 "The problem: given a pattern and a target word, determine if the word matches the pattern. The return value of `match` should be a Boolean.\n",
540 "\n",
541 "A target is a sequence of letters. \n",
542 "\n",
543 "A pattern is a sequence of either a letter or an underscore.\n",
544 "\n",
545 "A target matches a pattern if the corresponding elements of both match.\n",
546 "\n",
547 "* A target letter always matches an underscore.\n",
548 "\n",
549 "* A target letter matches a pattern letter if they are the same.\n",
550 "\n",
551 "If any element doesn't match, the whole match fails.\n",
552 "\n",
553 "If the target and pattern are different lengths, the match fails.\n",
554 "\n",
555 "The built-in `zip` will allow us to pair up the elements of the pattern and target."
556 ]
557 },
558 {
559 "cell_type": "code",
560 "execution_count": 20,
561 "metadata": {
562 "collapsed": false
563 },
564 "outputs": [],
565 "source": [
566 "def match(pattern, target):\n",
567 " if len(pattern) != len(target):\n",
568 " return False\n",
569 " for m, c in zip(pattern, target):\n",
570 " if m == '_':\n",
571 " # true\n",
572 " pass\n",
573 " elif m != '_' and m == c:\n",
574 " # true\n",
575 " pass\n",
576 " else:\n",
577 " return False\n",
578 " return True "
579 ]
580 },
581 {
582 "cell_type": "code",
583 "execution_count": 21,
584 "metadata": {
585 "collapsed": false
586 },
587 "outputs": [
588 {
589 "data": {
590 "text/plain": [
591 "True"
592 ]
593 },
594 "execution_count": 21,
595 "metadata": {},
596 "output_type": "execute_result"
597 }
598 ],
599 "source": [
600 "match('__pp_', 'happy')"
601 ]
602 },
603 {
604 "cell_type": "code",
605 "execution_count": 22,
606 "metadata": {
607 "collapsed": false
608 },
609 "outputs": [
610 {
611 "data": {
612 "text/plain": [
613 "True"
614 ]
615 },
616 "execution_count": 22,
617 "metadata": {},
618 "output_type": "execute_result"
619 }
620 ],
621 "source": [
622 "match('__pp_', 'hippo')"
623 ]
624 },
625 {
626 "cell_type": "code",
627 "execution_count": 23,
628 "metadata": {
629 "collapsed": false
630 },
631 "outputs": [
632 {
633 "data": {
634 "text/plain": [
635 "False"
636 ]
637 },
638 "execution_count": 23,
639 "metadata": {},
640 "output_type": "execute_result"
641 }
642 ],
643 "source": [
644 "match('__pp_', 'hello')"
645 ]
646 },
647 {
648 "cell_type": "code",
649 "execution_count": 24,
650 "metadata": {
651 "collapsed": false
652 },
653 "outputs": [
654 {
655 "data": {
656 "text/plain": [
657 "False"
658 ]
659 },
660 "execution_count": 24,
661 "metadata": {},
662 "output_type": "execute_result"
663 }
664 ],
665 "source": [
666 "match('__pp_', 'happier')"
667 ]
668 },
669 {
670 "cell_type": "markdown",
671 "metadata": {},
672 "source": [
673 "That all seems to be working. Let's put it in a `player` object."
674 ]
675 },
676 {
677 "cell_type": "code",
678 "execution_count": 25,
679 "metadata": {
680 "collapsed": false
681 },
682 "outputs": [],
683 "source": [
684 "class PlayerAdaptiveIncludedLetters:\n",
685 " def __init__(self, words):\n",
686 " self.candidate_words = words\n",
687 " \n",
688 " def guess(self, discovered, missed, lives):\n",
689 " self.filter_candidate_words(discovered)\n",
690 " self.set_ordered_letters()\n",
691 " return [l for l in self.ordered_letters if l not in discovered + missed][0]\n",
692 " \n",
693 " def filter_candidate_words(self, discovered):\n",
694 " self.candidate_words = [w for w in self.candidate_words if self.match(discovered, w)]\n",
695 " \n",
696 " def set_ordered_letters(self):\n",
697 " counts = collections.Counter(l.lower() for l in ''.join(self.candidate_words) if l in string.ascii_letters)\n",
698 " self.ordered_letters = [p[0] for p in counts.most_common()]\n",
699 " \n",
700 " def match(self, pattern, target):\n",
701 " if len(pattern) != len(target):\n",
702 " return False\n",
703 " for m, c in zip(pattern, target):\n",
704 " if m == '_':\n",
705 " # true\n",
706 " pass\n",
707 " elif m != '_' and m == c:\n",
708 " # true\n",
709 " pass\n",
710 " else:\n",
711 " return False\n",
712 " return True "
713 ]
714 },
715 {
716 "cell_type": "code",
717 "execution_count": 26,
718 "metadata": {
719 "collapsed": false
720 },
721 "outputs": [
722 {
723 "name": "stdout",
724 "output_type": "stream",
725 "text": [
726 "981\n"
727 ]
728 }
729 ],
730 "source": [
731 "wins = 0\n",
732 "for _ in range(1000):\n",
733 " g = Game(random.choice(WORDS), player=PlayerAdaptiveIncludedLetters(WORDS))\n",
734 " g.play_game()\n",
735 " if g.game_won:\n",
736 " wins += 1\n",
737 "print(wins)"
738 ]
739 },
740 {
741 "cell_type": "code",
742 "execution_count": 27,
743 "metadata": {
744 "collapsed": false
745 },
746 "outputs": [
747 {
748 "name": "stdout",
749 "output_type": "stream",
750 "text": [
751 "982\n",
752 "987\n",
753 "984\n",
754 "982\n",
755 "1 loops, best of 3: 51.7 s per loop\n"
756 ]
757 }
758 ],
759 "source": [
760 "%%timeit\n",
761 "\n",
762 "wins = 0\n",
763 "for _ in range(1000):\n",
764 " g = Game(random.choice(WORDS), player=PlayerAdaptiveIncludedLetters(WORDS))\n",
765 " g.play_game()\n",
766 " if g.game_won:\n",
767 " wins += 1\n",
768 "print(wins)"
769 ]
770 },
771 {
772 "cell_type": "markdown",
773 "metadata": {},
774 "source": [
775 "Quite an improvement!\n",
776 "\n",
777 "But let's see if we can do better."
778 ]
779 },
780 {
781 "cell_type": "markdown",
782 "metadata": {},
783 "source": [
784 "## Better strategy 4: using wrong guesses\n",
785 "If we make a guess that's wrong, we can also use that to eliminate `candidate_words`. If we know the `target` doesn't contain an 'e', we can eliminate all `candidate_words` that contain an 'e'. If we also know that the word contains an 'e' in the third positon, we can also eliminate all words that contain and 'e' in any other position.\n",
786 "\n",
787 "This requires an different verion of `match`, so that a word matches a pattern if an underscore in the pattern does not match any of the known-incorrect letters."
788 ]
789 },
790 {
791 "cell_type": "code",
792 "execution_count": 28,
793 "metadata": {
794 "collapsed": false
795 },
796 "outputs": [],
797 "source": [
798 "def match_exclusion(pattern, target, excluded=None):\n",
799 " if not excluded:\n",
800 " excluded = ''\n",
801 " if len(pattern) != len(target):\n",
802 " return False\n",
803 " for m, c in zip(pattern, target):\n",
804 " if m == '_' and c not in excluded:\n",
805 " # true\n",
806 " pass\n",
807 " elif m != '_':\n",
808 " # true\n",
809 " pass\n",
810 " else:\n",
811 " return False\n",
812 " return True "
813 ]
814 },
815 {
816 "cell_type": "code",
817 "execution_count": 29,
818 "metadata": {
819 "collapsed": false
820 },
821 "outputs": [
822 {
823 "data": {
824 "text/plain": [
825 "True"
826 ]
827 },
828 "execution_count": 29,
829 "metadata": {},
830 "output_type": "execute_result"
831 }
832 ],
833 "source": [
834 "match_exclusion('__pp_', 'happy', 'x')"
835 ]
836 },
837 {
838 "cell_type": "code",
839 "execution_count": 30,
840 "metadata": {
841 "collapsed": false
842 },
843 "outputs": [
844 {
845 "data": {
846 "text/plain": [
847 "False"
848 ]
849 },
850 "execution_count": 30,
851 "metadata": {},
852 "output_type": "execute_result"
853 }
854 ],
855 "source": [
856 "match_exclusion('__pp_', 'happy', 'a')"
857 ]
858 },
859 {
860 "cell_type": "code",
861 "execution_count": 31,
862 "metadata": {
863 "collapsed": false
864 },
865 "outputs": [
866 {
867 "data": {
868 "text/plain": [
869 "True"
870 ]
871 },
872 "execution_count": 31,
873 "metadata": {},
874 "output_type": "execute_result"
875 }
876 ],
877 "source": [
878 "match_exclusion('__pp_', 'happy', 'p')"
879 ]
880 },
881 {
882 "cell_type": "code",
883 "execution_count": 32,
884 "metadata": {
885 "collapsed": false
886 },
887 "outputs": [
888 {
889 "data": {
890 "text/plain": [
891 "False"
892 ]
893 },
894 "execution_count": 32,
895 "metadata": {},
896 "output_type": "execute_result"
897 }
898 ],
899 "source": [
900 "match_exclusion('__pp_', 'happy', 'hp')"
901 ]
902 },
903 {
904 "cell_type": "code",
905 "execution_count": 33,
906 "metadata": {
907 "collapsed": false
908 },
909 "outputs": [],
910 "source": [
911 "class PlayerAdaptiveExcludedLetters:\n",
912 " def __init__(self, words):\n",
913 " self.candidate_words = words\n",
914 " \n",
915 " def guess(self, discovered, missed, lives):\n",
916 " self.filter_candidate_words(discovered, missed)\n",
917 " self.set_ordered_letters()\n",
918 " return [l for l in self.ordered_letters if l not in discovered + missed][0]\n",
919 " \n",
920 " def filter_candidate_words(self, discovered, missed):\n",
921 " attempted_letters = list(set(l.lower() for l in discovered + missed if l in string.ascii_letters))\n",
922 " self.candidate_words = [w for w in self.candidate_words if self.match_exclusion(discovered, w, attempted_letters)]\n",
923 " \n",
924 " def set_ordered_letters(self):\n",
925 " counts = collections.Counter(l.lower() for l in ''.join(self.candidate_words) if l in string.ascii_letters)\n",
926 " self.ordered_letters = [p[0] for p in counts.most_common()]\n",
927 " \n",
928 " def match_exclusion(self, pattern, target, excluded=None):\n",
929 " if not excluded:\n",
930 " excluded = ''\n",
931 " if len(pattern) != len(target):\n",
932 " return False\n",
933 " for m, c in zip(pattern, target):\n",
934 " if m == '_' and c not in excluded:\n",
935 " # true\n",
936 " pass\n",
937 " elif m != '_':\n",
938 " # true\n",
939 " pass\n",
940 " else:\n",
941 " return False\n",
942 " return True "
943 ]
944 },
945 {
946 "cell_type": "code",
947 "execution_count": 34,
948 "metadata": {
949 "collapsed": false
950 },
951 "outputs": [
952 {
953 "name": "stdout",
954 "output_type": "stream",
955 "text": [
956 "737\n"
957 ]
958 }
959 ],
960 "source": [
961 "wins = 0\n",
962 "for _ in range(1000):\n",
963 " g = Game(random.choice(WORDS), player=PlayerAdaptiveExcludedLetters(WORDS))\n",
964 " g.play_game()\n",
965 " if g.game_won:\n",
966 " wins += 1\n",
967 "print(wins)"
968 ]
969 },
970 {
971 "cell_type": "code",
972 "execution_count": 35,
973 "metadata": {
974 "collapsed": false
975 },
976 "outputs": [
977 {
978 "name": "stdout",
979 "output_type": "stream",
980 "text": [
981 "735\n",
982 "748\n",
983 "742\n",
984 "748\n",
985 "1 loops, best of 3: 1min 4s per loop\n"
986 ]
987 }
988 ],
989 "source": [
990 "%%timeit\n",
991 "\n",
992 "wins = 0\n",
993 "for _ in range(1000):\n",
994 " g = Game(random.choice(WORDS), player=PlayerAdaptiveExcludedLetters(WORDS))\n",
995 " g.play_game()\n",
996 " if g.game_won:\n",
997 " wins += 1\n",
998 "print(wins)"
999 ]
1000 },
1001 {
1002 "cell_type": "markdown",
1003 "metadata": {},
1004 "source": [
1005 "That's better than the non-adaptive players, but not as good as the `IncludedLetters` player. But we could combine the two approaches. Will that work better?"
1006 ]
1007 },
1008 {
1009 "cell_type": "markdown",
1010 "metadata": {},
1011 "source": [
1012 "## Better strategy 5: using both correct and wrong guesses\n",
1013 "Not much to say. Let's just merge the two previous approaches."
1014 ]
1015 },
1016 {
1017 "cell_type": "code",
1018 "execution_count": 36,
1019 "metadata": {
1020 "collapsed": false
1021 },
1022 "outputs": [],
1023 "source": [
1024 "class PlayerAdaptivePattern:\n",
1025 " def __init__(self, words):\n",
1026 " self.candidate_words = words\n",
1027 " \n",
1028 " def guess(self, discovered, missed, lives):\n",
1029 " self.filter_candidate_words(discovered, missed)\n",
1030 " self.set_ordered_letters()\n",
1031 " return [l for l in self.ordered_letters if l not in discovered + missed][0]\n",
1032 " \n",
1033 " def filter_candidate_words(self, discovered, missed):\n",
1034 " attempted_letters = list(set(l.lower() for l in discovered + missed if l in string.ascii_letters))\n",
1035 " self.candidate_words = [w for w in self.candidate_words if self.match(discovered, w, attempted_letters)]\n",
1036 " \n",
1037 " def set_ordered_letters(self):\n",
1038 " counts = collections.Counter(l.lower() for l in ''.join(self.candidate_words) if l in string.ascii_letters)\n",
1039 " self.ordered_letters = [p[0] for p in counts.most_common()]\n",
1040 " \n",
1041 " def match(self, pattern, target, excluded=None):\n",
1042 " if not excluded:\n",
1043 " excluded = ''\n",
1044 " if len(pattern) != len(target):\n",
1045 " return False\n",
1046 " for m, c in zip(pattern, target):\n",
1047 " if m == '_' and c not in excluded:\n",
1048 " # true\n",
1049 " pass\n",
1050 " elif m != '_' and m == c:\n",
1051 " # true\n",
1052 " pass\n",
1053 " else:\n",
1054 " return False\n",
1055 " return True"
1056 ]
1057 },
1058 {
1059 "cell_type": "code",
1060 "execution_count": 37,
1061 "metadata": {
1062 "collapsed": false
1063 },
1064 "outputs": [
1065 {
1066 "name": "stdout",
1067 "output_type": "stream",
1068 "text": [
1069 "993\n"
1070 ]
1071 }
1072 ],
1073 "source": [
1074 "wins = 0\n",
1075 "for _ in range(1000):\n",
1076 " g = Game(random.choice(WORDS), player=PlayerAdaptivePattern(WORDS))\n",
1077 " g.play_game()\n",
1078 " if g.game_won:\n",
1079 " wins += 1\n",
1080 "print(wins)"
1081 ]
1082 },
1083 {
1084 "cell_type": "code",
1085 "execution_count": 38,
1086 "metadata": {
1087 "collapsed": false
1088 },
1089 "outputs": [
1090 {
1091 "name": "stdout",
1092 "output_type": "stream",
1093 "text": [
1094 "990\n",
1095 "995\n",
1096 "992\n",
1097 "994\n",
1098 "1 loops, best of 3: 47.1 s per loop\n"
1099 ]
1100 }
1101 ],
1102 "source": [
1103 "%%timeit\n",
1104 "\n",
1105 "wins = 0\n",
1106 "for _ in range(1000):\n",
1107 " g = Game(random.choice(WORDS), player=PlayerAdaptivePattern(WORDS))\n",
1108 " g.play_game()\n",
1109 " if g.game_won:\n",
1110 " wins += 1\n",
1111 "print(wins)"
1112 ]
1113 },
1114 {
1115 "cell_type": "markdown",
1116 "metadata": {},
1117 "source": [
1118 "## DRYing it up\n",
1119 "You've probably noticed some duplicate code above. "
1120 ]
1121 },
1122 {
1123 "cell_type": "code",
1124 "execution_count": 39,
1125 "metadata": {
1126 "collapsed": false
1127 },
1128 "outputs": [],
1129 "source": [
1130 "class PlayerAdaptive:\n",
1131 " def __init__(self, words):\n",
1132 " self.candidate_words = words\n",
1133 " \n",
1134 " def guess(self, discovered, missed, lives):\n",
1135 " self.filter_candidate_words(discovered, missed)\n",
1136 " self.set_ordered_letters()\n",
1137 " return [l for l in self.ordered_letters if l not in discovered + missed][0]\n",
1138 " \n",
1139 " def filter_candidate_words(self, discovered, missed):\n",
1140 " pass\n",
1141 " \n",
1142 " def set_ordered_letters(self):\n",
1143 " counts = collections.Counter(l.lower() \n",
1144 " for l in ''.join(self.candidate_words) + string.ascii_lowercase \n",
1145 " if l in string.ascii_letters)\n",
1146 " self.ordered_letters = [p[0] for p in counts.most_common()]\n",
1147 "\n",
1148 " def match(self, pattern, target, excluded=None):\n",
1149 " if not excluded:\n",
1150 " excluded = ''\n",
1151 " if len(pattern) != len(target):\n",
1152 " return False\n",
1153 " for m, c in zip(pattern, target):\n",
1154 " if m == '_' and c not in excluded:\n",
1155 " # true\n",
1156 " pass\n",
1157 " elif m != '_' and m == c:\n",
1158 " # true\n",
1159 " pass\n",
1160 " else:\n",
1161 " return False\n",
1162 " return True "
1163 ]
1164 },
1165 {
1166 "cell_type": "code",
1167 "execution_count": 40,
1168 "metadata": {
1169 "collapsed": false
1170 },
1171 "outputs": [],
1172 "source": [
1173 "class PlayerAdaptiveLength(PlayerAdaptive):\n",
1174 " def __init__(self, words):\n",
1175 " super().__init__(words)\n",
1176 " self.word_len = None\n",
1177 " self.ordered_letters = None\n",
1178 " \n",
1179 " def filter_candidate_words(self, discovered, missed):\n",
1180 " if not self.word_len:\n",
1181 " self.word_len = len(discovered)\n",
1182 " self.candidate_words = [w for w in self.candidate_words if len(w) == self.word_len]\n",
1183 " \n",
1184 " def set_ordered_letters(self):\n",
1185 " if not self.ordered_letters:\n",
1186 " super().set_ordered_letters()"
1187 ]
1188 },
1189 {
1190 "cell_type": "code",
1191 "execution_count": 41,
1192 "metadata": {
1193 "collapsed": false
1194 },
1195 "outputs": [],
1196 "source": [
1197 "class PlayerAdaptiveIncludedLetters(PlayerAdaptive):\n",
1198 " def filter_candidate_words(self, discovered, missed):\n",
1199 " self.candidate_words = [w for w in self.candidate_words if self.match(discovered, w)]"
1200 ]
1201 },
1202 {
1203 "cell_type": "code",
1204 "execution_count": 42,
1205 "metadata": {
1206 "collapsed": false
1207 },
1208 "outputs": [],
1209 "source": [
1210 "class PlayerAdaptiveExcludedLetters(PlayerAdaptive):\n",
1211 " def filter_candidate_words(self, discovered, missed):\n",
1212 " if missed:\n",
1213 " empty_target = '_' * len(discovered)\n",
1214 " self.candidate_words = [w for w in self.candidate_words if self.match(empty_target, w, missed)] "
1215 ]
1216 },
1217 {
1218 "cell_type": "code",
1219 "execution_count": 43,
1220 "metadata": {
1221 "collapsed": false
1222 },
1223 "outputs": [],
1224 "source": [
1225 "class PlayerAdaptivePattern(PlayerAdaptive):\n",
1226 " def filter_candidate_words(self, discovered, missed):\n",
1227 " attempted_letters = [l for l in discovered if l != '_'] + missed\n",
1228 " self.candidate_words = [w for w in self.candidate_words if self.match(discovered, w, attempted_letters)]"
1229 ]
1230 },
1231 {
1232 "cell_type": "code",
1233 "execution_count": 44,
1234 "metadata": {
1235 "collapsed": false
1236 },
1237 "outputs": [
1238 {
1239 "name": "stdout",
1240 "output_type": "stream",
1241 "text": [
1242 "472\n",
1243 "460\n",
1244 "462\n",
1245 "484\n",
1246 "1 loops, best of 3: 17 s per loop\n"
1247 ]
1248 }
1249 ],
1250 "source": [
1251 "%%timeit\n",
1252 "\n",
1253 "wins = 0\n",
1254 "for _ in range(1000):\n",
1255 " g = Game(random.choice(WORDS), player=PlayerAdaptiveLength(WORDS))\n",
1256 " g.play_game()\n",
1257 " if g.game_won:\n",
1258 " wins += 1\n",
1259 "print(wins)"
1260 ]
1261 },
1262 {
1263 "cell_type": "code",
1264 "execution_count": 45,
1265 "metadata": {
1266 "collapsed": false
1267 },
1268 "outputs": [
1269 {
1270 "name": "stdout",
1271 "output_type": "stream",
1272 "text": [
1273 "988\n",
1274 "985\n",
1275 "987\n",
1276 "988\n",
1277 "1 loops, best of 3: 49.3 s per loop\n"
1278 ]
1279 }
1280 ],
1281 "source": [
1282 "%%timeit\n",
1283 "\n",
1284 "wins = 0\n",
1285 "for _ in range(1000):\n",
1286 " g = Game(random.choice(WORDS), player=PlayerAdaptiveIncludedLetters(WORDS))\n",
1287 " g.play_game()\n",
1288 " if g.game_won:\n",
1289 " wins += 1\n",
1290 "print(wins)"
1291 ]
1292 },
1293 {
1294 "cell_type": "code",
1295 "execution_count": 46,
1296 "metadata": {
1297 "collapsed": false
1298 },
1299 "outputs": [
1300 {
1301 "name": "stdout",
1302 "output_type": "stream",
1303 "text": [
1304 "575\n",
1305 "611\n",
1306 "583\n",
1307 "607\n",
1308 "1 loops, best of 3: 5min 10s per loop\n"
1309 ]
1310 }
1311 ],
1312 "source": [
1313 "%%timeit\n",
1314 "\n",
1315 "wins = 0\n",
1316 "for _ in range(1000):\n",
1317 " g = Game(random.choice(WORDS), player=PlayerAdaptiveExcludedLetters(WORDS))\n",
1318 " g.play_game()\n",
1319 " if g.game_won:\n",
1320 " wins += 1\n",
1321 "print(wins)"
1322 ]
1323 },
1324 {
1325 "cell_type": "code",
1326 "execution_count": 47,
1327 "metadata": {
1328 "collapsed": false
1329 },
1330 "outputs": [
1331 {
1332 "name": "stdout",
1333 "output_type": "stream",
1334 "text": [
1335 "990\n",
1336 "992\n",
1337 "995\n",
1338 "991\n",
1339 "1 loops, best of 3: 39.1 s per loop\n"
1340 ]
1341 }
1342 ],
1343 "source": [
1344 "%%timeit\n",
1345 "\n",
1346 "wins = 0\n",
1347 "for _ in range(1000):\n",
1348 " g = Game(random.choice(WORDS), player=PlayerAdaptivePattern(WORDS))\n",
1349 " g.play_game()\n",
1350 " if g.game_won:\n",
1351 " wins += 1\n",
1352 "print(wins)"
1353 ]
1354 },
1355 {
1356 "cell_type": "code",
1357 "execution_count": 48,
1358 "metadata": {
1359 "collapsed": false
1360 },
1361 "outputs": [
1362 {
1363 "name": "stdout",
1364 "output_type": "stream",
1365 "text": [
1366 "puffy ['p', 'u', '_', '_', 'y'] ['s', 'e', 'a', 'o', 'i', 'm', 'l', 'n', 'r', 't']\n",
1367 "jar ['_', 'a', 'r'] ['t', 'p', 'g', 'w', 'd', 'm', 'b', 'y', 'f', 'o']\n",
1368 "fate ['_', 'a', '_', 'e'] ['l', 'r', 'm', 'p', 's', 'g', 'd', 'k', 'v', 'b']\n",
1369 "robs ['_', 'o', 'b', 's'] ['e', 'a', 't', 'h', 'g', 'f', 'l', 'c', 'm', 'j']\n"
1370 ]
1371 }
1372 ],
1373 "source": [
1374 "for _ in range(1000):\n",
1375 " g = Game(random.choice(WORDS), player=PlayerAdaptivePattern(WORDS))\n",
1376 " g.play_game()\n",
1377 " if not g.game_won:\n",
1378 " print(g.target, g.discovered, g.wrong_letters)"
1379 ]
1380 },
1381 {
1382 "cell_type": "markdown",
1383 "metadata": {},
1384 "source": [
1385 "## Better strategy 6: divide and conquer\n",
1386 "This is a general approach to solving big problems: divide the candidate set into two equal halves, so that a simple test can discard half the candidates. \n",
1387 "\n",
1388 "For Hangman, we want to guess the letter that will eliminate half the `candidate_words`. For each letter that's present in all of the candidate words, score the letter +1 for every word it appears in, and -1 for every word it doesn't appear in. The closer the total is to zero, the closer letter is to being in half the `candidate_words`. We then pick the unguesses letter with the score closest to zero.\n",
1389 "\n",
1390 "As the `PlayerAdaptivePattern` seems to work well, we'll use that to keep track of the `candidate_words`. This variant changes the order of the letters, so it's a rewrite of `set_ordered_letters()`."
1391 ]
1392 },
1393 {
1394 "cell_type": "code",
1395 "execution_count": 49,
1396 "metadata": {
1397 "collapsed": false
1398 },
1399 "outputs": [],
1400 "source": [
1401 "class PlayerAdaptiveSplit(PlayerAdaptivePattern):\n",
1402 " def set_ordered_letters(self):\n",
1403 " def letter_diff(l):\n",
1404 " total = 0\n",
1405 " for w in self.candidate_words:\n",
1406 " if l in w:\n",
1407 " total += 1\n",
1408 " else:\n",
1409 " total -= 1\n",
1410 " return total\n",
1411 " # return abs(sum(1 if l in w else -1 for w in self.candidate_words))\n",
1412 " possible_letters = set(''.join(self.candidate_words))\n",
1413 " letter_diffs = [(l, letter_diff(l)) for l in possible_letters]\n",
1414 " self.ordered_letters = [p[0] for p in sorted(letter_diffs, key=lambda p: p[1])]"
1415 ]
1416 },
1417 {
1418 "cell_type": "code",
1419 "execution_count": 50,
1420 "metadata": {
1421 "collapsed": false
1422 },
1423 "outputs": [
1424 {
1425 "name": "stdout",
1426 "output_type": "stream",
1427 "text": [
1428 "113\n",
1429 "114\n",
1430 "131\n",
1431 "132\n",
1432 "1 loops, best of 3: 2min 28s per loop\n"
1433 ]
1434 }
1435 ],
1436 "source": [
1437 "%%timeit\n",
1438 "\n",
1439 "wins = 0\n",
1440 "for _ in range(1000):\n",
1441 " g = Game(random.choice(WORDS), player=PlayerAdaptiveSplit(WORDS))\n",
1442 " g.play_game()\n",
1443 " if g.game_won:\n",
1444 " wins += 1\n",
1445 "print(wins)"
1446 ]
1447 },
1448 {
1449 "cell_type": "markdown",
1450 "metadata": {},
1451 "source": [
1452 "Oh.\n",
1453 "\n",
1454 "Why is this happening?"
1455 ]
1456 },
1457 {
1458 "cell_type": "markdown",
1459 "metadata": {},
1460 "source": [
1461 "## See below for the regex-using versions"
1462 ]
1463 },
1464 {
1465 "cell_type": "code",
1466 "execution_count": 51,
1467 "metadata": {
1468 "collapsed": false
1469 },
1470 "outputs": [],
1471 "source": [
1472 "class PlayerAdaptiveIncludedLettersRegex:\n",
1473 " def __init__(self, words):\n",
1474 " self.candidate_words = words\n",
1475 " \n",
1476 " def guess(self, discovered, missed, lives):\n",
1477 " self.filter_candidate_words(discovered)\n",
1478 " self.set_ordered_letters()\n",
1479 " return [l for l in self.ordered_letters if l not in discovered + missed][0]\n",
1480 " \n",
1481 " def filter_candidate_words(self, discovered):\n",
1482 " exp = re.compile('^' + ''.join(discovered).replace('_', '.') + '$')\n",
1483 " self.candidate_words = [w for w in self.candidate_words if exp.match(w)]\n",
1484 " \n",
1485 " def set_ordered_letters(self):\n",
1486 " counts = collections.Counter(l.lower() for l in ''.join(self.candidate_words) if l in string.ascii_letters)\n",
1487 " self.ordered_letters = [p[0] for p in counts.most_common()]"
1488 ]
1489 },
1490 {
1491 "cell_type": "code",
1492 "execution_count": 52,
1493 "metadata": {
1494 "collapsed": false
1495 },
1496 "outputs": [],
1497 "source": [
1498 "class PlayerAdaptiveExcludedLettersRegex:\n",
1499 " def __init__(self, words):\n",
1500 " self.candidate_words = words\n",
1501 " \n",
1502 " def guess(self, discovered, missed, lives):\n",
1503 " self.filter_candidate_words(missed)\n",
1504 " self.set_ordered_letters()\n",
1505 " return [l for l in self.ordered_letters if l not in discovered + missed][0]\n",
1506 " \n",
1507 " def filter_candidate_words(self, missed):\n",
1508 " if missed:\n",
1509 " exp = re.compile('^[^' + ''.join(missed) + ']*$')\n",
1510 " self.candidate_words = [w for w in self.candidate_words if exp.match(w)]\n",
1511 " \n",
1512 " def set_ordered_letters(self):\n",
1513 " counts = collections.Counter(l.lower() for l in ''.join(self.candidate_words) if l in string.ascii_letters)\n",
1514 " self.ordered_letters = [p[0] for p in counts.most_common()]"
1515 ]
1516 },
1517 {
1518 "cell_type": "code",
1519 "execution_count": 53,
1520 "metadata": {
1521 "collapsed": false
1522 },
1523 "outputs": [],
1524 "source": [
1525 "re.match('^[^xaz]*$', 'happy')"
1526 ]
1527 },
1528 {
1529 "cell_type": "code",
1530 "execution_count": 54,
1531 "metadata": {
1532 "collapsed": false
1533 },
1534 "outputs": [],
1535 "source": [
1536 "class PlayerAdaptivePatternRegex:\n",
1537 " def __init__(self, words):\n",
1538 " self.candidate_words = words\n",
1539 " \n",
1540 " def guess(self, discovered, missed, lives):\n",
1541 " self.filter_candidate_words(discovered, missed)\n",
1542 " self.set_ordered_letters()\n",
1543 " return [l for l in self.ordered_letters if l not in discovered + missed][0]\n",
1544 " \n",
1545 " def filter_candidate_words(self, discovered, missed):\n",
1546 " attempted_letters = list(set(l.lower() for l in discovered + missed if l in string.ascii_letters))\n",
1547 " if attempted_letters:\n",
1548 " exclusion_pattern = '[^' + ''.join(attempted_letters) + ']'\n",
1549 " else:\n",
1550 " exclusion_pattern = '.'\n",
1551 " exp = re.compile('^' + ''.join(discovered).replace('_', exclusion_pattern) + '$')\n",
1552 " self.candidate_words = [w for w in self.candidate_words if exp.match(w)]\n",
1553 " \n",
1554 " def set_ordered_letters(self):\n",
1555 " counts = collections.Counter(l.lower() for l in ''.join(self.candidate_words) if l in string.ascii_letters)\n",
1556 " self.ordered_letters = [p[0] for p in counts.most_common()]\n"
1557 ]
1558 },
1559 {
1560 "cell_type": "code",
1561 "execution_count": 55,
1562 "metadata": {
1563 "collapsed": false
1564 },
1565 "outputs": [],
1566 "source": [
1567 "class PlayerAdaptiveRegex:\n",
1568 " def __init__(self, words):\n",
1569 " self.candidate_words = words\n",
1570 " \n",
1571 " def guess(self, discovered, missed, lives):\n",
1572 " self.filter_candidate_words(discovered, missed)\n",
1573 " self.set_ordered_letters()\n",
1574 " return [l for l in self.ordered_letters if l not in discovered + missed][0]\n",
1575 " \n",
1576 " def filter_candidate_words(self, discovered, missed):\n",
1577 " pass\n",
1578 " \n",
1579 " def set_ordered_letters(self):\n",
1580 " counts = collections.Counter(l.lower() \n",
1581 " for l in ''.join(self.candidate_words) + string.ascii_lowercase \n",
1582 " if l in string.ascii_letters)\n",
1583 " self.ordered_letters = [p[0] for p in counts.most_common()]"
1584 ]
1585 },
1586 {
1587 "cell_type": "code",
1588 "execution_count": 56,
1589 "metadata": {
1590 "collapsed": false
1591 },
1592 "outputs": [],
1593 "source": [
1594 "class PlayerAdaptiveLengthRegex(PlayerAdaptiveRegex):\n",
1595 " def __init__(self, words):\n",
1596 " super().__init__(words)\n",
1597 " self.word_len = None\n",
1598 " self.ordered_letters = None\n",
1599 " \n",
1600 " def filter_candidate_words(self, discovered, missed):\n",
1601 " if not self.word_len:\n",
1602 " self.word_len = len(discovered)\n",
1603 " self.candidate_words = [w for w in self.candidate_words if len(w) == self.word_len]\n",
1604 " \n",
1605 " def set_ordered_letters(self):\n",
1606 " if not self.ordered_letters:\n",
1607 " super().set_ordered_letters()"
1608 ]
1609 },
1610 {
1611 "cell_type": "code",
1612 "execution_count": 57,
1613 "metadata": {
1614 "collapsed": false
1615 },
1616 "outputs": [],
1617 "source": [
1618 "class PlayerAdaptiveIncludedLettersRegex(PlayerAdaptiveRegex):\n",
1619 " def filter_candidate_words(self, discovered, missed):\n",
1620 " exp = re.compile('^' + ''.join(discovered).replace('_', '.') + '$')\n",
1621 " self.candidate_words = [w for w in self.candidate_words if exp.match(w)]"
1622 ]
1623 },
1624 {
1625 "cell_type": "code",
1626 "execution_count": 58,
1627 "metadata": {
1628 "collapsed": false
1629 },
1630 "outputs": [],
1631 "source": [
1632 "class PlayerAdaptiveExcludedLettersRegex(PlayerAdaptiveRegex):\n",
1633 " def filter_candidate_words(self, discovered, missed):\n",
1634 " if missed:\n",
1635 " exp = re.compile('^[^' + ''.join(missed) + ']*$')\n",
1636 " self.candidate_words = [w for w in self.candidate_words if exp.match(w)] "
1637 ]
1638 },
1639 {
1640 "cell_type": "code",
1641 "execution_count": 59,
1642 "metadata": {
1643 "collapsed": false
1644 },
1645 "outputs": [],
1646 "source": [
1647 "class PlayerAdaptivePatternRegex(PlayerAdaptiveRegex):\n",
1648 " def filter_candidate_words(self, discovered, missed):\n",
1649 " attempted_letters = [l for l in discovered if l != '_'] + missed\n",
1650 " if attempted_letters:\n",
1651 " exclusion_pattern = '[^' + ''.join(attempted_letters) + ']'\n",
1652 " else:\n",
1653 " exclusion_pattern = '.'\n",
1654 " exp = re.compile('^' + ''.join(discovered).replace('_', exclusion_pattern) + '$')\n",
1655 " self.candidate_words = [w for w in self.candidate_words if exp.match(w)]"
1656 ]
1657 }
1658 ],
1659 "metadata": {
1660 "kernelspec": {
1661 "display_name": "Python 3",
1662 "language": "python",
1663 "name": "python3"
1664 },
1665 "language_info": {
1666 "codemirror_mode": {
1667 "name": "ipython",
1668 "version": 3
1669 },
1670 "file_extension": ".py",
1671 "mimetype": "text/x-python",
1672 "name": "python",
1673 "nbconvert_exporter": "python",
1674 "pygments_lexer": "ipython3",
1675 "version": "3.5.2+"
1676 }
1677 },
1678 "nbformat": 4,
1679 "nbformat_minor": 0
1680 }