+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "951180ec",
+ "metadata": {},
+ "source": [
+ "# Honeycomb letter puzzle\n",
+ "Solving the puzzle as posted on [FiveThirtyEight](https://fivethirtyeight.com/features/can-you-solve-the-vexing-vexillology/), also solved by [David Robinson](http://varianceexplained.org/r/honeycomb-puzzle/).\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "6f6fa3e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import string\n",
+ "import collections\n",
+ "from dataclasses import dataclass\n",
+ "import itertools"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "7f00bc22",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def only_letters(text):\n",
+ " return ''.join([c.lower() for c in text if c in string.ascii_letters])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "cb0b4230",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'hello'"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "only_letters('Hell!o')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "9546868c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "101924"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "with open('/usr/share/dict/british-english') as f:\n",
+ " words = [line.rstrip() for line in f]\n",
+ "len(words)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "id": "88f00e4c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "172820"
+ ]
+ },
+ "execution_count": 79,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "with open('enable1.txt') as f:\n",
+ " words = [line.rstrip() for line in f]\n",
+ "len(words)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "id": "1e75fba2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "words = set(only_letters(w) \n",
+ " for w in words \n",
+ " if len(only_letters(w)) >= 4\n",
+ " if len(frozenset(only_letters(w))) <= 7\n",
+ " if 's' not in w)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "id": "49473123",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "44585"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(words)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "e1f9b35e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "21661"
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "word_sets = collections.defaultdict(set)\n",
+ "for w in words:\n",
+ " letters = frozenset(w)\n",
+ " word_sets[letters].add(w)\n",
+ "len(word_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "id": "63121d2b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'elephant', 'naphthalene', 'pentathlete'}"
+ ]
+ },
+ "execution_count": 86,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "word_sets[frozenset('elephant')]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "id": "267130ba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@dataclass(frozen=True)\n",
+ "class Honeycomb():\n",
+ " mandatory: str\n",
+ " letters: frozenset\n",
+ " \n",
+ " def __repr__(self):\n",
+ " return f'{self.mandatory}|{\"\".join(sorted(self.letters - set(self.mandatory)))}'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 88,
+ "id": "0ca00165",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "g|aelmpx"
+ ]
+ },
+ "execution_count": 88,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "honeycomb = Honeycomb(mandatory='g', letters=frozenset('apxmelg'))\n",
+ "honeycomb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "id": "bb848e88",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def present(honeycomb, target):\n",
+ " return (honeycomb.mandatory in target) and target.issubset(honeycomb.letters)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "id": "add4f445",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "14"
+ ]
+ },
+ "execution_count": 90,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "present_sets = [s for s in word_sets if present(honeycomb, s)]\n",
+ "len(present_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "id": "3354f6d7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[frozenset({'a', 'e', 'g', 'p'}),\n",
+ " frozenset({'a', 'e', 'g', 'm'}),\n",
+ " frozenset({'a', 'g', 'm'}),\n",
+ " frozenset({'a', 'e', 'g', 'l', 'p'}),\n",
+ " frozenset({'a', 'e', 'g', 'l'}),\n",
+ " frozenset({'a', 'e', 'g'}),\n",
+ " frozenset({'a', 'g', 'l'}),\n",
+ " frozenset({'a', 'g', 'l', 'm'}),\n",
+ " frozenset({'a', 'e', 'g', 'l', 'm'}),\n",
+ " frozenset({'a', 'g'}),\n",
+ " frozenset({'a', 'g', 'l', 'x'}),\n",
+ " frozenset({'e', 'g', 'l'}),\n",
+ " frozenset({'a', 'g', 'l', 'p'}),\n",
+ " frozenset({'a', 'g', 'm', 'p'})]"
+ ]
+ },
+ "execution_count": 91,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "present_sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "id": "2e85ce06",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'peag', 'agapae', 'peage', 'gape', 'agape', 'page'}\n",
+ "{'game', 'gemmae', 'mage', 'gemma'}\n",
+ "{'magma', 'agma', 'agama', 'gamma', 'gama'}\n",
+ "{'pelage', 'plage'}\n",
+ "{'eagle', 'algae', 'galeae', 'aglee', 'allege', 'legal', 'galea', 'gale', 'gaggle', 'egal'}\n",
+ "{'gage', 'agee'}\n",
+ "{'gala', 'gall', 'alga', 'algal'}\n",
+ "{'amalgam'}\n",
+ "{'agleam', 'gleam'}\n",
+ "{'gaga'}\n",
+ "{'galax'}\n",
+ "{'glee', 'gelee', 'gleg'}\n",
+ "{'plagal'}\n",
+ "{'gamp'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "for s in present_sets:\n",
+ " print(word_sets[s])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "id": "3c6f212e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def score(present_set):\n",
+ " score = 0\n",
+ " for w in word_sets[present_set]:\n",
+ " if len(w) == 4:\n",
+ " score += 1\n",
+ " else:\n",
+ " score += len(w)\n",
+ " if len(present_set) == 7:\n",
+ " score += len(word_sets[present_set]) * 7\n",
+ " return score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "id": "01c3743c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "153"
+ ]
+ },
+ "execution_count": 94,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sum(score(present_set) for present_set in present_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "id": "1f45f6f5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "False"
+ ]
+ },
+ "execution_count": 95,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "set('megaplex') in words"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "id": "979d9ed5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scored_sets = {s: score(s) for s in word_sets}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "id": "790b7303",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "frozenset({'e', 'a', 'p', 'g'}) {'peag', 'agapae', 'peage', 'gape', 'agape', 'page'} 19 19\n",
+ "frozenset({'m', 'a', 'e', 'g'}) {'game', 'gemmae', 'mage', 'gemma'} 13 13\n",
+ "frozenset({'m', 'a', 'g'}) {'magma', 'agma', 'agama', 'gamma', 'gama'} 17 17\n",
+ "frozenset({'p', 'l', 'a', 'g', 'e'}) {'pelage', 'plage'} 11 11\n",
+ "frozenset({'a', 'l', 'e', 'g'}) {'eagle', 'algae', 'galeae', 'aglee', 'allege', 'legal', 'galea', 'gale', 'gaggle', 'egal'} 45 45\n",
+ "frozenset({'a', 'e', 'g'}) {'gage', 'agee'} 2 2\n",
+ "frozenset({'a', 'l', 'g'}) {'gala', 'gall', 'alga', 'algal'} 8 8\n",
+ "frozenset({'m', 'a', 'l', 'g'}) {'amalgam'} 7 7\n",
+ "frozenset({'m', 'a', 'l', 'g', 'e'}) {'agleam', 'gleam'} 11 11\n",
+ "frozenset({'a', 'g'}) {'gaga'} 1 1\n",
+ "frozenset({'a', 'l', 'x', 'g'}) {'galax'} 5 5\n",
+ "frozenset({'l', 'e', 'g'}) {'glee', 'gelee', 'gleg'} 7 7\n",
+ "frozenset({'l', 'a', 'p', 'g'}) {'plagal'} 6 6\n",
+ "frozenset({'m', 'a', 'p', 'g'}) {'gamp'} 1 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "for s in present_sets:\n",
+ " print(s, word_sets[s], score(s), scored_sets[s])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 98,
+ "id": "78d423a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def score_honeycomb(honeycomb):\n",
+ " return sum(sc for letters, sc in scored_sets.items() \n",
+ " if honeycomb.mandatory in letters\n",
+ " if letters.issubset(honeycomb.letters)\n",
+ "# if present(honeycomb, letters)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 99,
+ "id": "3c7c7a83",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "153"
+ ]
+ },
+ "execution_count": 99,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_honeycomb(honeycomb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "id": "d53c4d64",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# hcs = []\n",
+ "\n",
+ "# for mand in 'abcde':\n",
+ "# remaining = set('abcde') - set(mand)\n",
+ "# for others in itertools.combinations(remaining, r=3):\n",
+ "# hcs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
+ "\n",
+ "# print(len(hcs))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "id": "d967a3df",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# hcs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "id": "4a07a6b7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# honeycombs = []\n",
+ "\n",
+ "# for mand in string.ascii_lowercase:\n",
+ "# remaining = set(string.ascii_lowercase) - set(mand)\n",
+ "# for others in itertools.combinations(remaining, r=6):\n",
+ "# honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
+ "\n",
+ "# print(len(honeycombs))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "id": "14c38054",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# honeycombs = []\n",
+ "\n",
+ "# candidate_letters = set(string.ascii_lowercase)\n",
+ "# candidate_letters.remove('s')\n",
+ "# candidate_letters = frozenset(candidate_letters)\n",
+ "\n",
+ "# for mand in candidate_letters:\n",
+ "# remaining = candidate_letters - set(mand)\n",
+ "# for others in itertools.combinations(remaining, r=6):\n",
+ "# honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
+ "\n",
+ "# print(len(honeycombs))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "id": "f8ea5307",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# [(h, score_honeycomb(h)) for h in honeycombs[:5]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "id": "4f2118dc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# %%timeit\n",
+ "# max(honeycombs, key=score_honeycomb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "id": "febee1c1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "7986"
+ ]
+ },
+ "execution_count": 106,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pangram_sets = [s for s in word_sets.keys() if len(s) == 7 if 's' not in s]\n",
+ "len(pangram_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "id": "54a06bdf",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "55902\n"
+ ]
+ }
+ ],
+ "source": [
+ "ps_honeycombs = []\n",
+ "\n",
+ "for ps in pangram_sets:\n",
+ " for mand in ps:\n",
+ " ps_honeycombs.append(Honeycomb(mandatory=mand, letters=ps))\n",
+ "\n",
+ "print(len(ps_honeycombs))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "id": "301b3cd0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# %%timeit\n",
+ "# max(ps_honeycombs, key=score_honeycomb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "id": "653613ac",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## 1 a,e,g,i,n,r,t r 3898\n",
+ "## 2 a,e,g,i,n,r,t n 3782\n",
+ "## 3 a,e,g,i,n,r,t e 3769"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "id": "3cdbd956",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3898"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_honeycomb(Honeycomb('r', frozenset('aeginrt')))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "id": "5f06f87b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3782"
+ ]
+ },
+ "execution_count": 111,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_honeycomb(Honeycomb('n', frozenset('aeginrtn')))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "id": "ab6bc64e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3769"
+ ]
+ },
+ "execution_count": 112,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_honeycomb(Honeycomb('e', frozenset('aeginrte')))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "id": "914fece8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "26"
+ ]
+ },
+ "execution_count": 113,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "partitioned_scored_sets = {l: {s: scored_sets[s] for s in scored_sets if l in s} \n",
+ " for l in string.ascii_lowercase}\n",
+ "len(partitioned_scored_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 114,
+ "id": "f1454ccd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def partitioned_score_honeycomb(honeycomb):\n",
+ " return sum(sc for letters, sc in partitioned_scored_sets[honeycomb.mandatory].items() \n",
+ " if letters.issubset(honeycomb.letters)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 115,
+ "id": "7380dbd6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "r|aegint\n"
+ ]
+ }
+ ],
+ "source": [
+ "# %%timeit\n",
+ "print(max(ps_honeycombs, key=partitioned_score_honeycomb))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "id": "51d0317c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# partitioned_score_honeycomb(Honeycomb('a', 'abc'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "id": "77d74a74-55e9-498b-abdf-b02c1f7f01a3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "19941"
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# max(len(partitioned_scored_sets[k]) for k in partitioned_scored_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "id": "35b58dec-7138-4fdb-86cc-80f3eb6a555a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "37313"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# len(scored_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7e4173b4-26a9-4198-b572-d57db321fe94",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "jupytext": {
+ "formats": "ipynb,auto:light"
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}