riddle-solver.md

   1 ---
   2 jupyter:
   3   jupytext:
   4     formats: ipynb,md
   5     text_representation:
   6       extension: .md
   7       format_name: markdown
   8       format_version: '1.3'
   9       jupytext_version: 1.14.5
  10   kernelspec:
  11     display_name: Python 3 (ipykernel)
  12     language: python
  13     name: python3
  14 ---
  15
  16 ```python
  17 import unicodedata
  18 import re
  19 from dataclasses import dataclass
  20 from typing import Dict, Tuple, List, Set
  21 from enum import Enum, auto
  22 ```
  23
  24 ```python
  25 dictionary : List[str] = [unicodedata.normalize('NFKD', w.strip()).\
  26                  encode('ascii', 'ignore').\
  27                  decode('utf-8')
  28               for w in open('/usr/share/dict/british-english').readlines()
  29               if w.strip().islower()
  30               if w.strip().isalpha()
  31              ]
  32 dictionary[:5]
  33 ```
  34
  35 ```python
  36 ordinals : Dict[str, int] =  { 'last': -1
  37             , 'first': 1
  38             , 'second': 2
  39             , 'third': 3
  40             , 'fourth': 4
  41             , 'fifth': 5
  42             , 'sixth': 6
  43             , 'seventh': 7
  44             , 'eighth': 8
  45             , 'ninth': 9
  46             , 'tenth': 10
  47             , 'eleventh': 11
  48             , 'twelfth': 12
  49             }
  50
  51 # reverse_ordinals : Dict[int, str] = {n: w for w, n in ordinals.items()}
  52
  53 def from_ordinal(word: str) -> int:
  54   return ordinals[word]
  55
  56 # def to_ordinal(number: int) -> str:
  57 #   return reverse_ordinals[number]
  58 ```
  59
  60 ```python
  61 from_ordinal('seventh')
  62 ```
  63
  64 ```python
  65 def tokenise(phrase: str) -> List[str]:
  66   return [w.lower() for w in re.split(r'\W+', phrase) if w]
  67 ```
  68
  69 ```python
  70 tokenise("My first is in apple, but not in fish.")
  71 ```
  72
  73 ```python
  74 class RiddleValence(Enum):
  75   Include = auto()
  76   Exclude = auto()
  77
  78 @dataclass
  79 class RiddleElement:
  80   valence : RiddleValence
  81   letters : Set[str]
  82
  83 Riddle = Dict[int, RiddleElement]
  84 ```
  85
  86 ```python
  87 stop_words = set('my is in within lies and also always you will find always the found'.split())
  88 negative_words = set('but not never'.split())
  89 ```
  90
  91 ```python
  92 def parse_line(tokens: List[str]) -> Tuple[int, RiddleElement, RiddleElement]:
  93   stripped_tokens = [t for t in tokens if t not in stop_words]
  94
  95   position_word = [t for t in stripped_tokens if t in ordinals][0]
  96   pos = from_ordinal(position_word)
  97
  98   first_index, first_word = [(i, t) for i, t in enumerate(stripped_tokens)
  99                             if t not in ordinals
 100                             if t not in negative_words][0]
 101   second_index, second_word = [(i, t) for i, t in enumerate(stripped_tokens)
 102                             if t not in ordinals
 103                             if t not in negative_words][1]
 104   neg_indices = [i for i, t in enumerate(stripped_tokens) if t in negative_words]
 105
 106   first_clue = None
 107   second_clue = None
 108
 109   if neg_indices:
 110     if neg_indices[0] < first_index:
 111       first_clue = RiddleElement(valence = RiddleValence.Exclude,
 112                              letters = set(first_word))
 113       if len(neg_indices) > 1:
 114           second_clue = RiddleElement(valence = RiddleValence.Exclude,
 115                              letters = set(second_word))
 116     elif neg_indices[0] < second_index:
 117       second_clue = RiddleElement(valence = RiddleValence.Exclude,
 118                              letters = set(second_word))
 119
 120   if first_clue is None:
 121     first_clue = RiddleElement(valence = RiddleValence.Include,
 122                              letters = set(first_word))
 123
 124   if second_clue is None:
 125     second_clue = RiddleElement(valence = RiddleValence.Include,
 126                              letters = set(second_word))
 127
 128   return (pos, first_clue, second_clue)
 129 ```
 130
 131 ```python
 132 e1 = parse_line(tokenise("My first is in apple, but not in pad."))
 133 e1
 134 ```
 135
 136 ```python
 137 e2 = parse_line(tokenise("My second is in apple and also in banana."))
 138 e2
 139 ```
 140
 141 ```python
 142 def collapse_riddle_elements(elems : List[Tuple[int, RiddleElement, RiddleElement]]) -> Dict[int, RiddleElement]:
 143   def combine_elements(a: RiddleElement, b: RiddleElement) -> RiddleElement:
 144     if a.valence == b.valence:
 145       return RiddleElement(letters = a.letters | b.letters, valence = a.valence)
 146     else:
 147       if a.valence == RiddleValence.Include:
 148         p, q = a, b
 149       else:
 150         p, q = b, a
 151       return RiddleElement(letters = p.letters - q.letters, valence = RiddleValence.Include)
 152
 153   return {i: combine_elements(a, b) for i, a, b in elems}
 154 ```
 155
 156 ```python
 157 collapse_riddle_elements([e1, e2])
 158 ```
 159
 160 ```python
 161 sample_riddle_text = """My first is in shoat but not in oath
 162 My second is in orate but not in ratter
 163 My third is in preposition but not in osteoporosis
 164 My fourth is in astern but not in taster
 165 My fifth is in conscientiousness but not in suction
 166 My sixth is in immorality but not in immorally"""
 167
 168 sample_riddle_lines = [parse_line(tokenise(l)) for l in sample_riddle_text.split('\n')]
 169 sample_riddle_lines
 170 ```
 171
 172 ```python
 173 sample_riddle = collapse_riddle_elements(sample_riddle_lines)
 174 sample_riddle
 175 ```
 176
 177 ```python
 178 def parse_riddle(riddle_text: str) -> Dict[int, RiddleElement]:
 179   riddle_lines = [parse_line(tokenise(l)) for l in riddle_text.split('\n')]
 180   return collapse_riddle_elements(riddle_lines)
 181 ```
 182
 183 ```python
 184 def matches_element(pos: int, elem: RiddleElement, word: str) -> bool:
 185   if len(word) < pos:
 186     return False
 187   if elem.valence == RiddleValence.Include:
 188     return word[pos-1] in elem.letters
 189   else:
 190     return word[pos-1] not in elem.letters
 191 ```
 192
 193 ```python
 194 def matches_all_elements(riddle: Dict[int, RiddleElement], word: str) -> bool:
 195   if -1 in riddle:
 196     last_elem = riddle[-1]
 197     new_riddle = {p: e for p, e in riddle.items() if p != -1}
 198     new_riddle[len(word)] = last_elem
 199   else:
 200     new_riddle = riddle
 201   return all(matches_element(i, elem, word) for i, elem in new_riddle.items())
 202 ```
 203
 204 ```python
 205 def solve_riddle(riddle: Dict[int, RiddleElement]) -> str:
 206   return [w for w in dictionary
 207           if len(w) == len(riddle)
 208           if matches_all_elements(riddle, w)]
 209 ```
 210
 211 ```python
 212 solve_riddle(sample_riddle)
 213 ```
 214
 215 ```python
 216 def parse_and_solve_riddle(riddle_text: str) -> List[str]:
 217   riddle = parse_riddle(riddle_text)
 218   return solve_riddle(riddle)
 219 ```
 220
 221 ```python
 222 sample_riddles = open('sample-riddles.txt').read().split('\n\n')
 223 sample_riddles
 224 ```
 225
 226 ```python
 227 [parse_and_solve_riddle(r) for r in sample_riddles]
 228 ```
 229
 230 ```python
 231
 232 ```