{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "951180ec",
   "metadata": {},
   "source": [
    "# Honeycomb letter puzzle\n",
    "Solving the puzzle as posted on [FiveThirtyEight](https://fivethirtyeight.com/features/can-you-solve-the-vexing-vexillology/), also solved by [David Robinson](http://varianceexplained.org/r/honeycomb-puzzle/).\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6f6fa3e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import string\n",
    "import collections\n",
    "from dataclasses import dataclass\n",
    "import itertools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7f00bc22",
   "metadata": {},
   "outputs": [],
   "source": [
    "def only_letters(text):\n",
    "    return ''.join([c.lower() for c in text if c in string.ascii_letters])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cb0b4230",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'hello'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "only_letters('Hell!o')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9546868c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "101924"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open('/usr/share/dict/british-english') as f:\n",
    "    words = [line.rstrip() for line in f]\n",
    "len(words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "88f00e4c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "172820"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open('enable1.txt') as f:\n",
    "    words = [line.rstrip() for line in f]\n",
    "len(words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "1e75fba2",
   "metadata": {},
   "outputs": [],
   "source": [
    "words = set(only_letters(w) \n",
    "            for w in words \n",
    "            if len(only_letters(w)) >= 4\n",
    "            if len(frozenset(only_letters(w))) <= 7\n",
    "            if 's' not in w)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "49473123",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "44585"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "e1f9b35e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21661"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_sets = collections.defaultdict(set)\n",
    "for w in words:\n",
    "    letters = frozenset(w)\n",
    "    word_sets[letters].add(w)\n",
    "len(word_sets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "63121d2b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'elephant', 'naphthalene', 'pentathlete'}"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_sets[frozenset('elephant')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "267130ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "@dataclass(frozen=True)\n",
    "class Honeycomb():\n",
    "    mandatory: str\n",
    "    letters: frozenset\n",
    "      \n",
    "    def __repr__(self):\n",
    "        return f'{self.mandatory}|{\"\".join(sorted(self.letters - set(self.mandatory)))}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "0ca00165",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "g|aelmpx"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "honeycomb = Honeycomb(mandatory='g', letters=frozenset('apxmelg'))\n",
    "honeycomb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "bb848e88",
   "metadata": {},
   "outputs": [],
   "source": [
    "def present(honeycomb, target):\n",
    "    return (honeycomb.mandatory in target) and target.issubset(honeycomb.letters)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "add4f445",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "14"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "present_sets = [s for s in word_sets if present(honeycomb, s)]\n",
    "len(present_sets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "3354f6d7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[frozenset({'a', 'e', 'g', 'p'}),\n",
       " frozenset({'a', 'e', 'g', 'm'}),\n",
       " frozenset({'a', 'g', 'm'}),\n",
       " frozenset({'a', 'e', 'g', 'l', 'p'}),\n",
       " frozenset({'a', 'e', 'g', 'l'}),\n",
       " frozenset({'a', 'e', 'g'}),\n",
       " frozenset({'a', 'g', 'l'}),\n",
       " frozenset({'a', 'g', 'l', 'm'}),\n",
       " frozenset({'a', 'e', 'g', 'l', 'm'}),\n",
       " frozenset({'a', 'g'}),\n",
       " frozenset({'a', 'g', 'l', 'x'}),\n",
       " frozenset({'e', 'g', 'l'}),\n",
       " frozenset({'a', 'g', 'l', 'p'}),\n",
       " frozenset({'a', 'g', 'm', 'p'})]"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "present_sets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "2e85ce06",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'peag', 'agapae', 'peage', 'gape', 'agape', 'page'}\n",
      "{'game', 'gemmae', 'mage', 'gemma'}\n",
      "{'magma', 'agma', 'agama', 'gamma', 'gama'}\n",
      "{'pelage', 'plage'}\n",
      "{'eagle', 'algae', 'galeae', 'aglee', 'allege', 'legal', 'galea', 'gale', 'gaggle', 'egal'}\n",
      "{'gage', 'agee'}\n",
      "{'gala', 'gall', 'alga', 'algal'}\n",
      "{'amalgam'}\n",
      "{'agleam', 'gleam'}\n",
      "{'gaga'}\n",
      "{'galax'}\n",
      "{'glee', 'gelee', 'gleg'}\n",
      "{'plagal'}\n",
      "{'gamp'}\n"
     ]
    }
   ],
   "source": [
    "for s in present_sets:\n",
    "    print(word_sets[s])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "3c6f212e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def score(present_set):\n",
    "    score = 0\n",
    "    for w in word_sets[present_set]:\n",
    "        if len(w) == 4:\n",
    "            score += 1\n",
    "        else:\n",
    "            score += len(w)\n",
    "    if len(present_set) == 7:\n",
    "        score += len(word_sets[present_set]) * 7\n",
    "    return score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "01c3743c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "153"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(score(present_set) for present_set in present_sets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "1f45f6f5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set('megaplex') in words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "979d9ed5",
   "metadata": {},
   "outputs": [],
   "source": [
    "scored_sets = {s: score(s) for s in word_sets}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "790b7303",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frozenset({'e', 'a', 'p', 'g'}) {'peag', 'agapae', 'peage', 'gape', 'agape', 'page'} 19 19\n",
      "frozenset({'m', 'a', 'e', 'g'}) {'game', 'gemmae', 'mage', 'gemma'} 13 13\n",
      "frozenset({'m', 'a', 'g'}) {'magma', 'agma', 'agama', 'gamma', 'gama'} 17 17\n",
      "frozenset({'p', 'l', 'a', 'g', 'e'}) {'pelage', 'plage'} 11 11\n",
      "frozenset({'a', 'l', 'e', 'g'}) {'eagle', 'algae', 'galeae', 'aglee', 'allege', 'legal', 'galea', 'gale', 'gaggle', 'egal'} 45 45\n",
      "frozenset({'a', 'e', 'g'}) {'gage', 'agee'} 2 2\n",
      "frozenset({'a', 'l', 'g'}) {'gala', 'gall', 'alga', 'algal'} 8 8\n",
      "frozenset({'m', 'a', 'l', 'g'}) {'amalgam'} 7 7\n",
      "frozenset({'m', 'a', 'l', 'g', 'e'}) {'agleam', 'gleam'} 11 11\n",
      "frozenset({'a', 'g'}) {'gaga'} 1 1\n",
      "frozenset({'a', 'l', 'x', 'g'}) {'galax'} 5 5\n",
      "frozenset({'l', 'e', 'g'}) {'glee', 'gelee', 'gleg'} 7 7\n",
      "frozenset({'l', 'a', 'p', 'g'}) {'plagal'} 6 6\n",
      "frozenset({'m', 'a', 'p', 'g'}) {'gamp'} 1 1\n"
     ]
    }
   ],
   "source": [
    "for s in present_sets:\n",
    "    print(s, word_sets[s], score(s), scored_sets[s])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "78d423a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def score_honeycomb(honeycomb):\n",
    "    return sum(sc for letters, sc in scored_sets.items() \n",
    "               if honeycomb.mandatory in letters\n",
    "               if letters.issubset(honeycomb.letters)\n",
    "#                if present(honeycomb, letters)\n",
    "              )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "3c7c7a83",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "153"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score_honeycomb(honeycomb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "d53c4d64",
   "metadata": {},
   "outputs": [],
   "source": [
    "# hcs = []\n",
    "\n",
    "# for mand in 'abcde':\n",
    "#     remaining = set('abcde') - set(mand)\n",
    "#     for others in itertools.combinations(remaining, r=3):\n",
    "#         hcs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
    "\n",
    "# print(len(hcs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "d967a3df",
   "metadata": {},
   "outputs": [],
   "source": [
    "# hcs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "4a07a6b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# honeycombs = []\n",
    "\n",
    "# for mand in string.ascii_lowercase:\n",
    "#     remaining = set(string.ascii_lowercase) - set(mand)\n",
    "#     for others in itertools.combinations(remaining, r=6):\n",
    "#         honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
    "\n",
    "# print(len(honeycombs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "14c38054",
   "metadata": {},
   "outputs": [],
   "source": [
    "# honeycombs = []\n",
    "\n",
    "# candidate_letters = set(string.ascii_lowercase)\n",
    "# candidate_letters.remove('s')\n",
    "# candidate_letters = frozenset(candidate_letters)\n",
    "\n",
    "# for mand in candidate_letters:\n",
    "#     remaining = candidate_letters - set(mand)\n",
    "#     for others in itertools.combinations(remaining, r=6):\n",
    "#         honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
    "\n",
    "# print(len(honeycombs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "f8ea5307",
   "metadata": {},
   "outputs": [],
   "source": [
    "# [(h, score_honeycomb(h)) for h in honeycombs[:5]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "4f2118dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%timeit\n",
    "# max(honeycombs, key=score_honeycomb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "id": "febee1c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7986"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pangram_sets = [s for s in word_sets.keys() if len(s) == 7 if 's' not in s]\n",
    "len(pangram_sets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "id": "54a06bdf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "55902\n"
     ]
    }
   ],
   "source": [
    "ps_honeycombs = []\n",
    "\n",
    "for ps in pangram_sets:\n",
    "    for mand in ps:\n",
    "        ps_honeycombs.append(Honeycomb(mandatory=mand, letters=ps))\n",
    "\n",
    "print(len(ps_honeycombs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "301b3cd0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# %%timeit\n",
    "# max(ps_honeycombs, key=score_honeycomb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "653613ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "##  1 a,e,g,i,n,r,t r       3898\n",
    "##  2 a,e,g,i,n,r,t n       3782\n",
    "##  3 a,e,g,i,n,r,t e       3769"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "id": "3cdbd956",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3898"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score_honeycomb(Honeycomb('r', frozenset('aeginrt')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "5f06f87b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3782"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score_honeycomb(Honeycomb('n', frozenset('aeginrtn')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "id": "ab6bc64e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3769"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score_honeycomb(Honeycomb('e', frozenset('aeginrte')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "914fece8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "26"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "partitioned_scored_sets = {l: {s: scored_sets[s] for s in scored_sets if l in s} \n",
    "                           for l in string.ascii_lowercase}\n",
    "len(partitioned_scored_sets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "id": "f1454ccd",
   "metadata": {},
   "outputs": [],
   "source": [
    "def partitioned_score_honeycomb(honeycomb):\n",
    "    return sum(sc for letters, sc in partitioned_scored_sets[honeycomb.mandatory].items() \n",
    "               if letters.issubset(honeycomb.letters)\n",
    "              )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "id": "7380dbd6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "r|aegint\n"
     ]
    }
   ],
   "source": [
    "# %%timeit\n",
    "print(max(ps_honeycombs, key=partitioned_score_honeycomb))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "51d0317c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# partitioned_score_honeycomb(Honeycomb('a', 'abc'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "77d74a74-55e9-498b-abdf-b02c1f7f01a3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19941"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# max(len(partitioned_scored_sets[k]) for k in partitioned_scored_sets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "35b58dec-7138-4fdb-86cc-80f3eb6a555a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "37313"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# len(scored_sets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e4173b4-26a9-4198-b572-d57db321fe94",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "jupytext": {
   "formats": "ipynb,auto:light"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}