Initial commit: solves riddles
[riddle-generator.git] / riddle-solver.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.3'
9 jupytext_version: 1.14.5
10 kernelspec:
11 display_name: Python 3 (ipykernel)
12 language: python
13 name: python3
14 ---
15
16 ```python
17 import unicodedata
18 import re
19 from dataclasses import dataclass
20 from typing import Dict, Tuple, List, Set
21 from enum import Enum, auto
22 ```
23
24 ```python
25 dictionary : List[str] = [unicodedata.normalize('NFKD', w.strip()).\
26 encode('ascii', 'ignore').\
27 decode('utf-8')
28 for w in open('/usr/share/dict/british-english').readlines()
29 if w.strip().islower()
30 if w.strip().isalpha()
31 ]
32 dictionary[:5]
33 ```
34
35 ```python
36 ordinals : Dict[str, int] = { 'last': -1
37 , 'first': 1
38 , 'second': 2
39 , 'third': 3
40 , 'fourth': 4
41 , 'fifth': 5
42 , 'sixth': 6
43 , 'seventh': 7
44 , 'eighth': 8
45 , 'ninth': 9
46 , 'tenth': 10
47 , 'eleventh': 11
48 , 'twelfth': 12
49 }
50
51 # reverse_ordinals : Dict[int, str] = {n: w for w, n in ordinals.items()}
52
53 def from_ordinal(word: str) -> int:
54 return ordinals[word]
55
56 # def to_ordinal(number: int) -> str:
57 # return reverse_ordinals[number]
58 ```
59
60 ```python
61 from_ordinal('seventh')
62 ```
63
64 ```python
65 def tokenise(phrase: str) -> List[str]:
66 return [w.lower() for w in re.split(r'\W+', phrase) if w]
67 ```
68
69 ```python
70 tokenise("My first is in apple, but not in fish.")
71 ```
72
73 ```python
74 class RiddleValence(Enum):
75 Include = auto()
76 Exclude = auto()
77
78 @dataclass
79 class RiddleElement:
80 valence : RiddleValence
81 letters : Set[str]
82
83 Riddle = Dict[int, RiddleElement]
84 ```
85
86 ```python
87 stop_words = set('my is in within lies and also always you will find always the found'.split())
88 negative_words = set('but not never'.split())
89 ```
90
91 ```python
92 def parse_line(tokens: List[str]) -> Tuple[int, RiddleElement, RiddleElement]:
93 stripped_tokens = [t for t in tokens if t not in stop_words]
94
95 position_word = [t for t in stripped_tokens if t in ordinals][0]
96 pos = from_ordinal(position_word)
97
98 first_index, first_word = [(i, t) for i, t in enumerate(stripped_tokens)
99 if t not in ordinals
100 if t not in negative_words][0]
101 second_index, second_word = [(i, t) for i, t in enumerate(stripped_tokens)
102 if t not in ordinals
103 if t not in negative_words][1]
104 neg_indices = [i for i, t in enumerate(stripped_tokens) if t in negative_words]
105
106 first_clue = None
107 second_clue = None
108
109 if neg_indices:
110 if neg_indices[0] < first_index:
111 first_clue = RiddleElement(valence = RiddleValence.Exclude,
112 letters = set(first_word))
113 if len(neg_indices) > 1:
114 second_clue = RiddleElement(valence = RiddleValence.Exclude,
115 letters = set(second_word))
116 elif neg_indices[0] < second_index:
117 second_clue = RiddleElement(valence = RiddleValence.Exclude,
118 letters = set(second_word))
119
120 if first_clue is None:
121 first_clue = RiddleElement(valence = RiddleValence.Include,
122 letters = set(first_word))
123
124 if second_clue is None:
125 second_clue = RiddleElement(valence = RiddleValence.Include,
126 letters = set(second_word))
127
128 return (pos, first_clue, second_clue)
129 ```
130
131 ```python
132 e1 = parse_line(tokenise("My first is in apple, but not in pad."))
133 e1
134 ```
135
136 ```python
137 e2 = parse_line(tokenise("My second is in apple and also in banana."))
138 e2
139 ```
140
141 ```python
142 def collapse_riddle_elements(elems : List[Tuple[int, RiddleElement, RiddleElement]]) -> Dict[int, RiddleElement]:
143 def combine_elements(a: RiddleElement, b: RiddleElement) -> RiddleElement:
144 if a.valence == b.valence:
145 return RiddleElement(letters = a.letters | b.letters, valence = a.valence)
146 else:
147 if a.valence == RiddleValence.Include:
148 p, q = a, b
149 else:
150 p, q = b, a
151 return RiddleElement(letters = p.letters - q.letters, valence = RiddleValence.Include)
152
153 return {i: combine_elements(a, b) for i, a, b in elems}
154 ```
155
156 ```python
157 collapse_riddle_elements([e1, e2])
158 ```
159
160 ```python
161 sample_riddle_text = """My first is in shoat but not in oath
162 My second is in orate but not in ratter
163 My third is in preposition but not in osteoporosis
164 My fourth is in astern but not in taster
165 My fifth is in conscientiousness but not in suction
166 My sixth is in immorality but not in immorally"""
167
168 sample_riddle_lines = [parse_line(tokenise(l)) for l in sample_riddle_text.split('\n')]
169 sample_riddle_lines
170 ```
171
172 ```python
173 sample_riddle = collapse_riddle_elements(sample_riddle_lines)
174 sample_riddle
175 ```
176
177 ```python
178 def parse_riddle(riddle_text: str) -> Dict[int, RiddleElement]:
179 riddle_lines = [parse_line(tokenise(l)) for l in riddle_text.split('\n')]
180 return collapse_riddle_elements(riddle_lines)
181 ```
182
183 ```python
184 def matches_element(pos: int, elem: RiddleElement, word: str) -> bool:
185 if len(word) < pos:
186 return False
187 if elem.valence == RiddleValence.Include:
188 return word[pos-1] in elem.letters
189 else:
190 return word[pos-1] not in elem.letters
191 ```
192
193 ```python
194 def matches_all_elements(riddle: Dict[int, RiddleElement], word: str) -> bool:
195 if -1 in riddle:
196 last_elem = riddle[-1]
197 new_riddle = {p: e for p, e in riddle.items() if p != -1}
198 new_riddle[len(word)] = last_elem
199 else:
200 new_riddle = riddle
201 return all(matches_element(i, elem, word) for i, elem in new_riddle.items())
202 ```
203
204 ```python
205 def solve_riddle(riddle: Dict[int, RiddleElement]) -> str:
206 return [w for w in dictionary
207 if len(w) == len(riddle)
208 if matches_all_elements(riddle, w)]
209 ```
210
211 ```python
212 solve_riddle(sample_riddle)
213 ```
214
215 ```python
216 def parse_and_solve_riddle(riddle_text: str) -> List[str]:
217 riddle = parse_riddle(riddle_text)
218 return solve_riddle(riddle)
219 ```
220
221 ```python
222 sample_riddles = open('sample-riddles.txt').read().split('\n\n')
223 sample_riddles
224 ```
225
226 ```python
227 [parse_and_solve_riddle(r) for r in sample_riddles]
228 ```
229
230 ```python
231
232 ```