General updates
[honeycomb-puzzle.git] / honeycomb_puzzle.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "id": "951180ec",
6 "metadata": {},
7 "source": [
8 "# Honeycomb letter puzzle\n",
9 "Solving the puzzle as posted on [FiveThirtyEight](https://fivethirtyeight.com/features/can-you-solve-the-vexing-vexillology/), also solved by [David Robinson](http://varianceexplained.org/r/honeycomb-puzzle/).\n"
10 ]
11 },
12 {
13 "cell_type": "code",
14 "execution_count": 45,
15 "id": "6f6fa3e7",
16 "metadata": {},
17 "outputs": [],
18 "source": [
19 "import string\n",
20 "import collections\n",
21 "from dataclasses import dataclass\n",
22 "import itertools\n",
23 "import cProfile"
24 ]
25 },
26 {
27 "cell_type": "code",
28 "execution_count": 5,
29 "id": "88f00e4c",
30 "metadata": {},
31 "outputs": [
32 {
33 "data": {
34 "text/plain": [
35 "172820"
36 ]
37 },
38 "execution_count": 5,
39 "metadata": {},
40 "output_type": "execute_result"
41 }
42 ],
43 "source": [
44 "with open('enable1.txt') as f:\n",
45 " words = [line.strip() for line in f]"
46 ]
47 },
48 {
49 "cell_type": "code",
50 "execution_count": 6,
51 "id": "1e75fba2",
52 "metadata": {},
53 "outputs": [],
54 "source": [
55 "words = set(w for w in words \n",
56 " if len(w) >= 4\n",
57 " if len(frozenset(w)) <= 7\n",
58 " if 's' not in w)"
59 ]
60 },
61 {
62 "cell_type": "code",
63 "execution_count": 8,
64 "id": "e1f9b35e",
65 "metadata": {},
66 "outputs": [
67 {
68 "data": {
69 "text/plain": [
70 "21661"
71 ]
72 },
73 "execution_count": 8,
74 "metadata": {},
75 "output_type": "execute_result"
76 }
77 ],
78 "source": [
79 "word_sets = collections.defaultdict(set)\n",
80 "for w in words:\n",
81 " letters = frozenset(w)\n",
82 " word_sets[letters].add(w)"
83 ]
84 },
85 {
86 "cell_type": "code",
87 "execution_count": 11,
88 "id": "267130ba",
89 "metadata": {},
90 "outputs": [],
91 "source": [
92 "@dataclass(frozen=True)\n",
93 "class Honeycomb():\n",
94 " mandatory: str\n",
95 " letters: frozenset\n",
96 " \n",
97 " def __repr__(self):\n",
98 " return f'{self.mandatory}|{\"\".join(sorted(self.letters - set(self.mandatory)))}'"
99 ]
100 },
101 {
102 "cell_type": "code",
103 "execution_count": 13,
104 "id": "bb848e88",
105 "metadata": {},
106 "outputs": [],
107 "source": [
108 "def present(target, honeycomb):\n",
109 " return ( (honeycomb.mandatory in target) \n",
110 " and target.issubset(honeycomb.letters)\n",
111 " )"
112 ]
113 },
114 {
115 "cell_type": "code",
116 "execution_count": 17,
117 "id": "3c6f212e",
118 "metadata": {},
119 "outputs": [],
120 "source": [
121 "def score(present_set):\n",
122 " score = 0\n",
123 " for w in word_sets[present_set]:\n",
124 " if len(w) == 4:\n",
125 " score += 1\n",
126 " else:\n",
127 " score += len(w)\n",
128 " if len(present_set) == 7:\n",
129 " score += len(word_sets[present_set]) * 7\n",
130 " return score"
131 ]
132 },
133 {
134 "cell_type": "code",
135 "execution_count": 22,
136 "id": "78d423a5",
137 "metadata": {},
138 "outputs": [],
139 "source": [
140 "def score_honeycomb(honeycomb):\n",
141 " return sum(sc for letters, sc in scored_sets.items() \n",
142 " # if honeycomb.mandatory in letters\n",
143 " # if letters.issubset(honeycomb.letters)\n",
144 " if present(letters, honeycomb)\n",
145 " )"
146 ]
147 },
148 {
149 "cell_type": "code",
150 "execution_count": 30,
151 "id": "febee1c1",
152 "metadata": {},
153 "outputs": [
154 {
155 "name": "stdout",
156 "output_type": "stream",
157 "text": [
158 "pangram sets: 7986\n"
159 ]
160 }
161 ],
162 "source": [
163 "pangram_sets = [s for s in word_sets.keys() if len(s) == 7 if 's' not in s]\n",
164 "print(\"pangram sets:\", len(pangram_sets))"
165 ]
166 },
167 {
168 "cell_type": "code",
169 "execution_count": 31,
170 "id": "54a06bdf",
171 "metadata": {},
172 "outputs": [],
173 "source": [
174 "with open('pangaram_words.txt', 'w') as f:\n",
175 " for s in pangram_sets:\n",
176 " for w in word_sets[s]:\n",
177 " f.write(f'{w}\\n')"
178 ]
179 },
180 {
181 "cell_type": "code",
182 "execution_count": 32,
183 "id": "007d3404",
184 "metadata": {},
185 "outputs": [
186 {
187 "name": "stdout",
188 "output_type": "stream",
189 "text": [
190 "55902 honeycombs\n"
191 ]
192 }
193 ],
194 "source": [
195 "# ps_honeycombs = []\n",
196 "\n",
197 "# for ps in pangram_sets:\n",
198 "# for mand in ps:\n",
199 "# ps_honeycombs.append(Honeycomb(mandatory=mand, letters=ps))\n",
200 "\n",
201 "ps_honeycombs = [Honeycomb(mandatory=mand, letters=ps) \n",
202 " for ps in pangram_sets\n",
203 " for mand in ps]\n",
204 "print(len(ps_honeycombs), \"honeycombs\")"
205 ]
206 },
207 {
208 "cell_type": "code",
209 "execution_count": 38,
210 "id": "914fece8",
211 "metadata": {},
212 "outputs": [
213 {
214 "data": {
215 "text/plain": [
216 "26"
217 ]
218 },
219 "execution_count": 38,
220 "metadata": {},
221 "output_type": "execute_result"
222 }
223 ],
224 "source": [
225 "partitioned_scored_sets = {l: {s: scored_sets[s] for s in scored_sets if l in s} \n",
226 " for l in string.ascii_lowercase}"
227 ]
228 },
229 {
230 "cell_type": "code",
231 "execution_count": 39,
232 "id": "f1454ccd",
233 "metadata": {},
234 "outputs": [],
235 "source": [
236 "def partitioned_score_honeycomb(honeycomb):\n",
237 " return sum(sc for letters, sc in partitioned_scored_sets[honeycomb.mandatory].items() \n",
238 " if letters.issubset(honeycomb.letters)\n",
239 " )"
240 ]
241 },
242 {
243 "cell_type": "code",
244 "execution_count": 40,
245 "id": "7380dbd6",
246 "metadata": {},
247 "outputs": [
248 {
249 "name": "stdout",
250 "output_type": "stream",
251 "text": [
252 "r|aegint 3898\n",
253 "r|aegint 3898\n",
254 "r|aegint 3898\n",
255 "r|aegint 3898\n",
256 "r|aegint 3898\n",
257 "r|aegint 3898\n",
258 "r|aegint 3898\n",
259 "r|aegint 3898\n",
260 "1min 25s ± 2.51 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
261 ]
262 }
263 ],
264 "source": [
265 "# # %%timeit\n",
266 "# best_honyecomb = max(ps_honeycombs, key=partitioned_score_honeycomb)\n",
267 "# print(best_honyecomb, partitioned_score_honeycomb(best_honyecomb))"
268 ]
269 },
270 {
271 "cell_type": "code",
272 "execution_count": 44,
273 "id": "0239b444-1240-4744-8547-c41367527785",
274 "metadata": {},
275 "outputs": [
276 {
277 "name": "stdout",
278 "output_type": "stream",
279 "text": [
280 "r|aegint 3898\n",
281 "r|aegint 3898\n",
282 "r|aegint 3898\n",
283 "r|aegint 3898\n",
284 "r|aegint 3898\n",
285 "r|aegint 3898\n",
286 "r|aegint 3898\n",
287 "r|aegint 3898\n",
288 "5min 38s ± 4.58 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
289 ]
290 }
291 ],
292 "source": [
293 "# %%timeit\n",
294 "best_honyecomb = max(ps_honeycombs, key=score_honeycomb)\n",
295 "print(best_honyecomb, partitioned_score_honeycomb(best_honyecomb))"
296 ]
297 },
298 {
299 "cell_type": "code",
300 "execution_count": 46,
301 "id": "7e4173b4-26a9-4198-b572-d57db321fe94",
302 "metadata": {
303 "lines_to_next_cell": 2
304 },
305 "outputs": [
306 {
307 "name": "stdout",
308 "output_type": "stream",
309 "text": [
310 " 1612136594 function calls in 589.284 seconds\n",
311 "\n",
312 " Ordered by: standard name\n",
313 "\n",
314 " ncalls tottime percall cumtime percall filename:lineno(function)\n",
315 " 55902 0.316 0.000 589.070 0.011 1101428662.py:1(score_honeycomb)\n",
316 " 1846420 257.034 0.000 588.167 0.000 1101428662.py:2(<genexpr>)\n",
317 "1210893222 276.103 0.000 331.132 0.000 1250517269.py:1(present)\n",
318 " 1 0.000 0.000 589.284 589.284 <string>:1(<module>)\n",
319 " 1 0.000 0.000 589.284 589.284 {built-in method builtins.exec}\n",
320 " 1 0.214 0.214 589.284 589.284 {built-in method builtins.max}\n",
321 " 55902 0.567 0.000 588.733 0.011 {built-in method builtins.sum}\n",
322 " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
323 "399229242 55.030 0.000 55.030 0.000 {method 'issubset' of 'frozenset' objects}\n",
324 " 55902 0.021 0.000 0.021 0.000 {method 'items' of 'dict' objects}\n",
325 "\n",
326 "\n"
327 ]
328 }
329 ],
330 "source": [
331 "# cProfile.run('max(ps_honeycombs, key=score_honeycomb)')"
332 ]
333 },
334 {
335 "cell_type": "code",
336 "execution_count": 47,
337 "id": "8f2de1b8-6a09-44eb-af7b-3e16c902859f",
338 "metadata": {
339 "lines_to_next_cell": 2
340 },
341 "outputs": [
342 {
343 "name": "stdout",
344 "output_type": "stream",
345 "text": [
346 " 401243372 function calls in 150.954 seconds\n",
347 "\n",
348 " Ordered by: standard name\n",
349 "\n",
350 " ncalls tottime percall cumtime percall filename:lineno(function)\n",
351 " 55902 0.226 0.000 150.835 0.003 346452243.py:1(partitioned_score_honeycomb)\n",
352 " 1846420 86.829 0.000 150.189 0.000 346452243.py:2(<genexpr>)\n",
353 " 1 0.000 0.000 150.954 150.954 <string>:1(<module>)\n",
354 " 1 0.000 0.000 150.954 150.954 {built-in method builtins.exec}\n",
355 " 1 0.119 0.119 150.954 150.954 {built-in method builtins.max}\n",
356 " 55902 0.407 0.000 150.596 0.003 {built-in method builtins.sum}\n",
357 " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
358 "399229242 63.360 0.000 63.360 0.000 {method 'issubset' of 'frozenset' objects}\n",
359 " 55902 0.013 0.000 0.013 0.000 {method 'items' of 'dict' objects}\n",
360 "\n",
361 "\n"
362 ]
363 }
364 ],
365 "source": [
366 "# cProfile.run('max(ps_honeycombs, key=partitioned_score_honeycomb)')"
367 ]
368 },
369 {
370 "cell_type": "code",
371 "execution_count": null,
372 "id": "4c3a5684-346b-4cf7-b238-391f9a6ba2dd",
373 "metadata": {},
374 "outputs": [],
375 "source": []
376 }
377 ],
378 "metadata": {
379 "jupytext": {
380 "formats": "ipynb,py:light"
381 },
382 "kernelspec": {
383 "display_name": "Python 3 (ipykernel)",
384 "language": "python",
385 "name": "python3"
386 },
387 "language_info": {
388 "codemirror_mode": {
389 "name": "ipython",
390 "version": 3
391 },
392 "file_extension": ".py",
393 "mimetype": "text/x-python",
394 "name": "python",
395 "nbconvert_exporter": "python",
396 "pygments_lexer": "ipython3",
397 "version": "3.8.8"
398 }
399 },
400 "nbformat": 4,
401 "nbformat_minor": 5
402 }