88e69cb354055b854790c384021824015a0d34e3
[honeycomb-puzzle.git] / honeycomb_puzzle.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "id": "951180ec",
6 "metadata": {},
7 "source": [
8 "# Honeycomb letter puzzle\n",
9 "Solving the puzzle as posted on [FiveThirtyEight](https://fivethirtyeight.com/features/can-you-solve-the-vexing-vexillology/), also solved by [David Robinson](http://varianceexplained.org/r/honeycomb-puzzle/).\n"
10 ]
11 },
12 {
13 "cell_type": "code",
14 "execution_count": 1,
15 "id": "6f6fa3e7",
16 "metadata": {},
17 "outputs": [],
18 "source": [
19 "import string\n",
20 "import collections\n",
21 "from dataclasses import dataclass\n",
22 "import itertools"
23 ]
24 },
25 {
26 "cell_type": "code",
27 "execution_count": 2,
28 "id": "7f00bc22",
29 "metadata": {},
30 "outputs": [],
31 "source": [
32 "def only_letters(text):\n",
33 " return ''.join([c.lower() for c in text if c in string.ascii_letters])"
34 ]
35 },
36 {
37 "cell_type": "code",
38 "execution_count": 3,
39 "id": "cb0b4230",
40 "metadata": {},
41 "outputs": [
42 {
43 "data": {
44 "text/plain": [
45 "'hello'"
46 ]
47 },
48 "execution_count": 3,
49 "metadata": {},
50 "output_type": "execute_result"
51 }
52 ],
53 "source": [
54 "only_letters('Hell!o')"
55 ]
56 },
57 {
58 "cell_type": "code",
59 "execution_count": 4,
60 "id": "9546868c",
61 "metadata": {},
62 "outputs": [
63 {
64 "data": {
65 "text/plain": [
66 "101924"
67 ]
68 },
69 "execution_count": 4,
70 "metadata": {},
71 "output_type": "execute_result"
72 }
73 ],
74 "source": [
75 "with open('/usr/share/dict/british-english') as f:\n",
76 " words = [line.rstrip() for line in f]\n",
77 "len(words)"
78 ]
79 },
80 {
81 "cell_type": "code",
82 "execution_count": 79,
83 "id": "88f00e4c",
84 "metadata": {},
85 "outputs": [
86 {
87 "data": {
88 "text/plain": [
89 "172820"
90 ]
91 },
92 "execution_count": 79,
93 "metadata": {},
94 "output_type": "execute_result"
95 }
96 ],
97 "source": [
98 "with open('enable1.txt') as f:\n",
99 " words = [line.rstrip() for line in f]\n",
100 "len(words)"
101 ]
102 },
103 {
104 "cell_type": "code",
105 "execution_count": 80,
106 "id": "1e75fba2",
107 "metadata": {},
108 "outputs": [],
109 "source": [
110 "words = set(only_letters(w) \n",
111 " for w in words \n",
112 " if len(only_letters(w)) >= 4\n",
113 " if len(frozenset(only_letters(w))) <= 7\n",
114 " if 's' not in w)"
115 ]
116 },
117 {
118 "cell_type": "code",
119 "execution_count": 81,
120 "id": "49473123",
121 "metadata": {},
122 "outputs": [
123 {
124 "data": {
125 "text/plain": [
126 "44585"
127 ]
128 },
129 "execution_count": 81,
130 "metadata": {},
131 "output_type": "execute_result"
132 }
133 ],
134 "source": [
135 "len(words)"
136 ]
137 },
138 {
139 "cell_type": "code",
140 "execution_count": 82,
141 "id": "e1f9b35e",
142 "metadata": {},
143 "outputs": [
144 {
145 "data": {
146 "text/plain": [
147 "21661"
148 ]
149 },
150 "execution_count": 82,
151 "metadata": {},
152 "output_type": "execute_result"
153 }
154 ],
155 "source": [
156 "word_sets = collections.defaultdict(set)\n",
157 "for w in words:\n",
158 " letters = frozenset(w)\n",
159 " word_sets[letters].add(w)\n",
160 "len(word_sets)"
161 ]
162 },
163 {
164 "cell_type": "code",
165 "execution_count": 86,
166 "id": "63121d2b",
167 "metadata": {},
168 "outputs": [
169 {
170 "data": {
171 "text/plain": [
172 "{'elephant', 'naphthalene', 'pentathlete'}"
173 ]
174 },
175 "execution_count": 86,
176 "metadata": {},
177 "output_type": "execute_result"
178 }
179 ],
180 "source": [
181 "word_sets[frozenset('elephant')]"
182 ]
183 },
184 {
185 "cell_type": "code",
186 "execution_count": 87,
187 "id": "267130ba",
188 "metadata": {},
189 "outputs": [],
190 "source": [
191 "@dataclass(frozen=True)\n",
192 "class Honeycomb():\n",
193 " mandatory: str\n",
194 " letters: frozenset\n",
195 " \n",
196 " def __repr__(self):\n",
197 " return f'{self.mandatory}|{\"\".join(sorted(self.letters - set(self.mandatory)))}'"
198 ]
199 },
200 {
201 "cell_type": "code",
202 "execution_count": 88,
203 "id": "0ca00165",
204 "metadata": {},
205 "outputs": [
206 {
207 "data": {
208 "text/plain": [
209 "g|aelmpx"
210 ]
211 },
212 "execution_count": 88,
213 "metadata": {},
214 "output_type": "execute_result"
215 }
216 ],
217 "source": [
218 "honeycomb = Honeycomb(mandatory='g', letters=frozenset('apxmelg'))\n",
219 "honeycomb"
220 ]
221 },
222 {
223 "cell_type": "code",
224 "execution_count": 89,
225 "id": "bb848e88",
226 "metadata": {},
227 "outputs": [],
228 "source": [
229 "def present(honeycomb, target):\n",
230 " return (honeycomb.mandatory in target) and target.issubset(honeycomb.letters)"
231 ]
232 },
233 {
234 "cell_type": "code",
235 "execution_count": 90,
236 "id": "add4f445",
237 "metadata": {},
238 "outputs": [
239 {
240 "data": {
241 "text/plain": [
242 "14"
243 ]
244 },
245 "execution_count": 90,
246 "metadata": {},
247 "output_type": "execute_result"
248 }
249 ],
250 "source": [
251 "present_sets = [s for s in word_sets if present(honeycomb, s)]\n",
252 "len(present_sets)"
253 ]
254 },
255 {
256 "cell_type": "code",
257 "execution_count": 91,
258 "id": "3354f6d7",
259 "metadata": {},
260 "outputs": [
261 {
262 "data": {
263 "text/plain": [
264 "[frozenset({'a', 'e', 'g', 'p'}),\n",
265 " frozenset({'a', 'e', 'g', 'm'}),\n",
266 " frozenset({'a', 'g', 'm'}),\n",
267 " frozenset({'a', 'e', 'g', 'l', 'p'}),\n",
268 " frozenset({'a', 'e', 'g', 'l'}),\n",
269 " frozenset({'a', 'e', 'g'}),\n",
270 " frozenset({'a', 'g', 'l'}),\n",
271 " frozenset({'a', 'g', 'l', 'm'}),\n",
272 " frozenset({'a', 'e', 'g', 'l', 'm'}),\n",
273 " frozenset({'a', 'g'}),\n",
274 " frozenset({'a', 'g', 'l', 'x'}),\n",
275 " frozenset({'e', 'g', 'l'}),\n",
276 " frozenset({'a', 'g', 'l', 'p'}),\n",
277 " frozenset({'a', 'g', 'm', 'p'})]"
278 ]
279 },
280 "execution_count": 91,
281 "metadata": {},
282 "output_type": "execute_result"
283 }
284 ],
285 "source": [
286 "present_sets"
287 ]
288 },
289 {
290 "cell_type": "code",
291 "execution_count": 92,
292 "id": "2e85ce06",
293 "metadata": {},
294 "outputs": [
295 {
296 "name": "stdout",
297 "output_type": "stream",
298 "text": [
299 "{'peag', 'agapae', 'peage', 'gape', 'agape', 'page'}\n",
300 "{'game', 'gemmae', 'mage', 'gemma'}\n",
301 "{'magma', 'agma', 'agama', 'gamma', 'gama'}\n",
302 "{'pelage', 'plage'}\n",
303 "{'eagle', 'algae', 'galeae', 'aglee', 'allege', 'legal', 'galea', 'gale', 'gaggle', 'egal'}\n",
304 "{'gage', 'agee'}\n",
305 "{'gala', 'gall', 'alga', 'algal'}\n",
306 "{'amalgam'}\n",
307 "{'agleam', 'gleam'}\n",
308 "{'gaga'}\n",
309 "{'galax'}\n",
310 "{'glee', 'gelee', 'gleg'}\n",
311 "{'plagal'}\n",
312 "{'gamp'}\n"
313 ]
314 }
315 ],
316 "source": [
317 "for s in present_sets:\n",
318 " print(word_sets[s])"
319 ]
320 },
321 {
322 "cell_type": "code",
323 "execution_count": 93,
324 "id": "3c6f212e",
325 "metadata": {},
326 "outputs": [],
327 "source": [
328 "def score(present_set):\n",
329 " score = 0\n",
330 " for w in word_sets[present_set]:\n",
331 " if len(w) == 4:\n",
332 " score += 1\n",
333 " else:\n",
334 " score += len(w)\n",
335 " if len(present_set) == 7:\n",
336 " score += len(word_sets[present_set]) * 7\n",
337 " return score"
338 ]
339 },
340 {
341 "cell_type": "code",
342 "execution_count": 94,
343 "id": "01c3743c",
344 "metadata": {},
345 "outputs": [
346 {
347 "data": {
348 "text/plain": [
349 "153"
350 ]
351 },
352 "execution_count": 94,
353 "metadata": {},
354 "output_type": "execute_result"
355 }
356 ],
357 "source": [
358 "sum(score(present_set) for present_set in present_sets)"
359 ]
360 },
361 {
362 "cell_type": "code",
363 "execution_count": 95,
364 "id": "1f45f6f5",
365 "metadata": {},
366 "outputs": [
367 {
368 "data": {
369 "text/plain": [
370 "False"
371 ]
372 },
373 "execution_count": 95,
374 "metadata": {},
375 "output_type": "execute_result"
376 }
377 ],
378 "source": [
379 "set('megaplex') in words"
380 ]
381 },
382 {
383 "cell_type": "code",
384 "execution_count": 96,
385 "id": "979d9ed5",
386 "metadata": {},
387 "outputs": [],
388 "source": [
389 "scored_sets = {s: score(s) for s in word_sets}"
390 ]
391 },
392 {
393 "cell_type": "code",
394 "execution_count": 97,
395 "id": "790b7303",
396 "metadata": {},
397 "outputs": [
398 {
399 "name": "stdout",
400 "output_type": "stream",
401 "text": [
402 "frozenset({'e', 'a', 'p', 'g'}) {'peag', 'agapae', 'peage', 'gape', 'agape', 'page'} 19 19\n",
403 "frozenset({'m', 'a', 'e', 'g'}) {'game', 'gemmae', 'mage', 'gemma'} 13 13\n",
404 "frozenset({'m', 'a', 'g'}) {'magma', 'agma', 'agama', 'gamma', 'gama'} 17 17\n",
405 "frozenset({'p', 'l', 'a', 'g', 'e'}) {'pelage', 'plage'} 11 11\n",
406 "frozenset({'a', 'l', 'e', 'g'}) {'eagle', 'algae', 'galeae', 'aglee', 'allege', 'legal', 'galea', 'gale', 'gaggle', 'egal'} 45 45\n",
407 "frozenset({'a', 'e', 'g'}) {'gage', 'agee'} 2 2\n",
408 "frozenset({'a', 'l', 'g'}) {'gala', 'gall', 'alga', 'algal'} 8 8\n",
409 "frozenset({'m', 'a', 'l', 'g'}) {'amalgam'} 7 7\n",
410 "frozenset({'m', 'a', 'l', 'g', 'e'}) {'agleam', 'gleam'} 11 11\n",
411 "frozenset({'a', 'g'}) {'gaga'} 1 1\n",
412 "frozenset({'a', 'l', 'x', 'g'}) {'galax'} 5 5\n",
413 "frozenset({'l', 'e', 'g'}) {'glee', 'gelee', 'gleg'} 7 7\n",
414 "frozenset({'l', 'a', 'p', 'g'}) {'plagal'} 6 6\n",
415 "frozenset({'m', 'a', 'p', 'g'}) {'gamp'} 1 1\n"
416 ]
417 }
418 ],
419 "source": [
420 "for s in present_sets:\n",
421 " print(s, word_sets[s], score(s), scored_sets[s])"
422 ]
423 },
424 {
425 "cell_type": "code",
426 "execution_count": 98,
427 "id": "78d423a5",
428 "metadata": {},
429 "outputs": [],
430 "source": [
431 "def score_honeycomb(honeycomb):\n",
432 " return sum(sc for letters, sc in scored_sets.items() \n",
433 " if honeycomb.mandatory in letters\n",
434 " if letters.issubset(honeycomb.letters)\n",
435 "# if present(honeycomb, letters)\n",
436 " )"
437 ]
438 },
439 {
440 "cell_type": "code",
441 "execution_count": 99,
442 "id": "3c7c7a83",
443 "metadata": {},
444 "outputs": [
445 {
446 "data": {
447 "text/plain": [
448 "153"
449 ]
450 },
451 "execution_count": 99,
452 "metadata": {},
453 "output_type": "execute_result"
454 }
455 ],
456 "source": [
457 "score_honeycomb(honeycomb)"
458 ]
459 },
460 {
461 "cell_type": "code",
462 "execution_count": 100,
463 "id": "d53c4d64",
464 "metadata": {},
465 "outputs": [],
466 "source": [
467 "# hcs = []\n",
468 "\n",
469 "# for mand in 'abcde':\n",
470 "# remaining = set('abcde') - set(mand)\n",
471 "# for others in itertools.combinations(remaining, r=3):\n",
472 "# hcs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
473 "\n",
474 "# print(len(hcs))"
475 ]
476 },
477 {
478 "cell_type": "code",
479 "execution_count": 101,
480 "id": "d967a3df",
481 "metadata": {},
482 "outputs": [],
483 "source": [
484 "# hcs"
485 ]
486 },
487 {
488 "cell_type": "code",
489 "execution_count": 102,
490 "id": "4a07a6b7",
491 "metadata": {},
492 "outputs": [],
493 "source": [
494 "# honeycombs = []\n",
495 "\n",
496 "# for mand in string.ascii_lowercase:\n",
497 "# remaining = set(string.ascii_lowercase) - set(mand)\n",
498 "# for others in itertools.combinations(remaining, r=6):\n",
499 "# honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
500 "\n",
501 "# print(len(honeycombs))"
502 ]
503 },
504 {
505 "cell_type": "code",
506 "execution_count": 103,
507 "id": "14c38054",
508 "metadata": {},
509 "outputs": [],
510 "source": [
511 "# honeycombs = []\n",
512 "\n",
513 "# candidate_letters = set(string.ascii_lowercase)\n",
514 "# candidate_letters.remove('s')\n",
515 "# candidate_letters = frozenset(candidate_letters)\n",
516 "\n",
517 "# for mand in candidate_letters:\n",
518 "# remaining = candidate_letters - set(mand)\n",
519 "# for others in itertools.combinations(remaining, r=6):\n",
520 "# honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
521 "\n",
522 "# print(len(honeycombs))"
523 ]
524 },
525 {
526 "cell_type": "code",
527 "execution_count": 104,
528 "id": "f8ea5307",
529 "metadata": {},
530 "outputs": [],
531 "source": [
532 "# [(h, score_honeycomb(h)) for h in honeycombs[:5]]"
533 ]
534 },
535 {
536 "cell_type": "code",
537 "execution_count": 105,
538 "id": "4f2118dc",
539 "metadata": {},
540 "outputs": [],
541 "source": [
542 "# %%timeit\n",
543 "# max(honeycombs, key=score_honeycomb)"
544 ]
545 },
546 {
547 "cell_type": "code",
548 "execution_count": 106,
549 "id": "febee1c1",
550 "metadata": {},
551 "outputs": [
552 {
553 "data": {
554 "text/plain": [
555 "7986"
556 ]
557 },
558 "execution_count": 106,
559 "metadata": {},
560 "output_type": "execute_result"
561 }
562 ],
563 "source": [
564 "pangram_sets = [s for s in word_sets.keys() if len(s) == 7 if 's' not in s]\n",
565 "len(pangram_sets)"
566 ]
567 },
568 {
569 "cell_type": "code",
570 "execution_count": 107,
571 "id": "54a06bdf",
572 "metadata": {},
573 "outputs": [
574 {
575 "name": "stdout",
576 "output_type": "stream",
577 "text": [
578 "55902\n"
579 ]
580 }
581 ],
582 "source": [
583 "ps_honeycombs = []\n",
584 "\n",
585 "for ps in pangram_sets:\n",
586 " for mand in ps:\n",
587 " ps_honeycombs.append(Honeycomb(mandatory=mand, letters=ps))\n",
588 "\n",
589 "print(len(ps_honeycombs))"
590 ]
591 },
592 {
593 "cell_type": "code",
594 "execution_count": 108,
595 "id": "301b3cd0",
596 "metadata": {},
597 "outputs": [],
598 "source": [
599 "# %%timeit\n",
600 "# max(ps_honeycombs, key=score_honeycomb)"
601 ]
602 },
603 {
604 "cell_type": "code",
605 "execution_count": 109,
606 "id": "653613ac",
607 "metadata": {},
608 "outputs": [],
609 "source": [
610 "## 1 a,e,g,i,n,r,t r 3898\n",
611 "## 2 a,e,g,i,n,r,t n 3782\n",
612 "## 3 a,e,g,i,n,r,t e 3769"
613 ]
614 },
615 {
616 "cell_type": "code",
617 "execution_count": 110,
618 "id": "3cdbd956",
619 "metadata": {},
620 "outputs": [
621 {
622 "data": {
623 "text/plain": [
624 "3898"
625 ]
626 },
627 "execution_count": 110,
628 "metadata": {},
629 "output_type": "execute_result"
630 }
631 ],
632 "source": [
633 "score_honeycomb(Honeycomb('r', frozenset('aeginrt')))"
634 ]
635 },
636 {
637 "cell_type": "code",
638 "execution_count": 111,
639 "id": "5f06f87b",
640 "metadata": {},
641 "outputs": [
642 {
643 "data": {
644 "text/plain": [
645 "3782"
646 ]
647 },
648 "execution_count": 111,
649 "metadata": {},
650 "output_type": "execute_result"
651 }
652 ],
653 "source": [
654 "score_honeycomb(Honeycomb('n', frozenset('aeginrtn')))"
655 ]
656 },
657 {
658 "cell_type": "code",
659 "execution_count": 112,
660 "id": "ab6bc64e",
661 "metadata": {},
662 "outputs": [
663 {
664 "data": {
665 "text/plain": [
666 "3769"
667 ]
668 },
669 "execution_count": 112,
670 "metadata": {},
671 "output_type": "execute_result"
672 }
673 ],
674 "source": [
675 "score_honeycomb(Honeycomb('e', frozenset('aeginrte')))"
676 ]
677 },
678 {
679 "cell_type": "code",
680 "execution_count": 113,
681 "id": "914fece8",
682 "metadata": {},
683 "outputs": [
684 {
685 "data": {
686 "text/plain": [
687 "26"
688 ]
689 },
690 "execution_count": 113,
691 "metadata": {},
692 "output_type": "execute_result"
693 }
694 ],
695 "source": [
696 "partitioned_scored_sets = {l: {s: scored_sets[s] for s in scored_sets if l in s} \n",
697 " for l in string.ascii_lowercase}\n",
698 "len(partitioned_scored_sets)"
699 ]
700 },
701 {
702 "cell_type": "code",
703 "execution_count": 114,
704 "id": "f1454ccd",
705 "metadata": {},
706 "outputs": [],
707 "source": [
708 "def partitioned_score_honeycomb(honeycomb):\n",
709 " return sum(sc for letters, sc in partitioned_scored_sets[honeycomb.mandatory].items() \n",
710 " if letters.issubset(honeycomb.letters)\n",
711 " )"
712 ]
713 },
714 {
715 "cell_type": "code",
716 "execution_count": 115,
717 "id": "7380dbd6",
718 "metadata": {},
719 "outputs": [
720 {
721 "name": "stdout",
722 "output_type": "stream",
723 "text": [
724 "r|aegint\n"
725 ]
726 }
727 ],
728 "source": [
729 "# %%timeit\n",
730 "print(max(ps_honeycombs, key=partitioned_score_honeycomb))"
731 ]
732 },
733 {
734 "cell_type": "code",
735 "execution_count": 74,
736 "id": "51d0317c",
737 "metadata": {},
738 "outputs": [
739 {
740 "data": {
741 "text/plain": [
742 "13"
743 ]
744 },
745 "execution_count": 74,
746 "metadata": {},
747 "output_type": "execute_result"
748 }
749 ],
750 "source": [
751 "# partitioned_score_honeycomb(Honeycomb('a', 'abc'))"
752 ]
753 },
754 {
755 "cell_type": "code",
756 "execution_count": 76,
757 "id": "77d74a74-55e9-498b-abdf-b02c1f7f01a3",
758 "metadata": {},
759 "outputs": [
760 {
761 "data": {
762 "text/plain": [
763 "19941"
764 ]
765 },
766 "execution_count": 76,
767 "metadata": {},
768 "output_type": "execute_result"
769 }
770 ],
771 "source": [
772 "# max(len(partitioned_scored_sets[k]) for k in partitioned_scored_sets)"
773 ]
774 },
775 {
776 "cell_type": "code",
777 "execution_count": 77,
778 "id": "35b58dec-7138-4fdb-86cc-80f3eb6a555a",
779 "metadata": {},
780 "outputs": [
781 {
782 "data": {
783 "text/plain": [
784 "37313"
785 ]
786 },
787 "execution_count": 77,
788 "metadata": {},
789 "output_type": "execute_result"
790 }
791 ],
792 "source": [
793 "# len(scored_sets)"
794 ]
795 },
796 {
797 "cell_type": "code",
798 "execution_count": null,
799 "id": "7e4173b4-26a9-4198-b572-d57db321fe94",
800 "metadata": {},
801 "outputs": [],
802 "source": []
803 }
804 ],
805 "metadata": {
806 "jupytext": {
807 "formats": "ipynb,auto:light"
808 },
809 "kernelspec": {
810 "display_name": "Python 3 (ipykernel)",
811 "language": "python",
812 "name": "python3"
813 },
814 "language_info": {
815 "codemirror_mode": {
816 "name": "ipython",
817 "version": 3
818 },
819 "file_extension": ".py",
820 "mimetype": "text/x-python",
821 "name": "python",
822 "nbconvert_exporter": "python",
823 "pygments_lexer": "ipython3",
824 "version": "3.8.8"
825 }
826 },
827 "nbformat": 4,
828 "nbformat_minor": 5
829 }