honeycomb_puzzle.ipynb

   1 {
   2  "cells": [
   3   {
   4    "cell_type": "markdown",
   5    "id": "951180ec",
   6    "metadata": {},
   7    "source": [
   8     "# Honeycomb letter puzzle\n",
   9     "Solving the puzzle as posted on [FiveThirtyEight](https://fivethirtyeight.com/features/can-you-solve-the-vexing-vexillology/), also solved by [David Robinson](http://varianceexplained.org/r/honeycomb-puzzle/).\n"
  10    ]
  11   },
  12   {
  13    "cell_type": "code",
  14    "execution_count": 45,
  15    "id": "6f6fa3e7",
  16    "metadata": {},
  17    "outputs": [],
  18    "source": [
  19     "import string\n",
  20     "import collections\n",
  21     "from dataclasses import dataclass\n",
  22     "import itertools\n",
  23     "import cProfile"
  24    ]
  25   },
  26   {
  27    "cell_type": "code",
  28    "execution_count": 5,
  29    "id": "88f00e4c",
  30    "metadata": {},
  31    "outputs": [
  32     {
  33      "data": {
  34       "text/plain": [
  35        "172820"
  36       ]
  37      },
  38      "execution_count": 5,
  39      "metadata": {},
  40      "output_type": "execute_result"
  41     }
  42    ],
  43    "source": [
  44     "with open('enable1.txt') as f:\n",
  45     "    words = [line.strip() for line in f]"
  46    ]
  47   },
  48   {
  49    "cell_type": "code",
  50    "execution_count": 6,
  51    "id": "1e75fba2",
  52    "metadata": {},
  53    "outputs": [],
  54    "source": [
  55     "words = set(w for w in words \n",
  56     "            if len(w) >= 4\n",
  57     "            if len(frozenset(w)) <= 7\n",
  58     "            if 's' not in w)"
  59    ]
  60   },
  61   {
  62    "cell_type": "code",
  63    "execution_count": 8,
  64    "id": "e1f9b35e",
  65    "metadata": {},
  66    "outputs": [
  67     {
  68      "data": {
  69       "text/plain": [
  70        "21661"
  71       ]
  72      },
  73      "execution_count": 8,
  74      "metadata": {},
  75      "output_type": "execute_result"
  76     }
  77    ],
  78    "source": [
  79     "word_sets = collections.defaultdict(set)\n",
  80     "for w in words:\n",
  81     "    letters = frozenset(w)\n",
  82     "    word_sets[letters].add(w)"
  83    ]
  84   },
  85   {
  86    "cell_type": "code",
  87    "execution_count": 11,
  88    "id": "267130ba",
  89    "metadata": {},
  90    "outputs": [],
  91    "source": [
  92     "@dataclass(frozen=True)\n",
  93     "class Honeycomb():\n",
  94     "    mandatory: str\n",
  95     "    letters: frozenset\n",
  96     "      \n",
  97     "    def __repr__(self):\n",
  98     "        return f'{self.mandatory}|{\"\".join(sorted(self.letters - set(self.mandatory)))}'"
  99    ]
 100   },
 101   {
 102    "cell_type": "code",
 103    "execution_count": 13,
 104    "id": "bb848e88",
 105    "metadata": {},
 106    "outputs": [],
 107    "source": [
 108     "def present(target, honeycomb):\n",
 109     "    return (   (honeycomb.mandatory in target) \n",
 110     "           and target.issubset(honeycomb.letters)\n",
 111     "           )"
 112    ]
 113   },
 114   {
 115    "cell_type": "code",
 116    "execution_count": 17,
 117    "id": "3c6f212e",
 118    "metadata": {},
 119    "outputs": [],
 120    "source": [
 121     "def score(present_set):\n",
 122     "    score = 0\n",
 123     "    for w in word_sets[present_set]:\n",
 124     "        if len(w) == 4:\n",
 125     "            score += 1\n",
 126     "        else:\n",
 127     "            score += len(w)\n",
 128     "    if len(present_set) == 7:\n",
 129     "        score += len(word_sets[present_set]) * 7\n",
 130     "    return score"
 131    ]
 132   },
 133   {
 134    "cell_type": "code",
 135    "execution_count": 22,
 136    "id": "78d423a5",
 137    "metadata": {},
 138    "outputs": [],
 139    "source": [
 140     "def score_honeycomb(honeycomb):\n",
 141     "    return sum(sc for letters, sc in scored_sets.items() \n",
 142     "               # if honeycomb.mandatory in letters\n",
 143     "               # if letters.issubset(honeycomb.letters)\n",
 144     "               if present(letters, honeycomb)\n",
 145     "              )"
 146    ]
 147   },
 148   {
 149    "cell_type": "code",
 150    "execution_count": 30,
 151    "id": "febee1c1",
 152    "metadata": {},
 153    "outputs": [
 154     {
 155      "name": "stdout",
 156      "output_type": "stream",
 157      "text": [
 158       "pangram sets: 7986\n"
 159      ]
 160     }
 161    ],
 162    "source": [
 163     "pangram_sets = [s for s in word_sets.keys() if len(s) == 7 if 's' not in s]\n",
 164     "print(\"pangram sets:\", len(pangram_sets))"
 165    ]
 166   },
 167   {
 168    "cell_type": "code",
 169    "execution_count": 31,
 170    "id": "54a06bdf",
 171    "metadata": {},
 172    "outputs": [],
 173    "source": [
 174     "with open('pangaram_words.txt', 'w') as f:\n",
 175     "    for s in pangram_sets:\n",
 176     "        for w in word_sets[s]:\n",
 177     "            f.write(f'{w}\\n')"
 178    ]
 179   },
 180   {
 181    "cell_type": "code",
 182    "execution_count": 32,
 183    "id": "007d3404",
 184    "metadata": {},
 185    "outputs": [
 186     {
 187      "name": "stdout",
 188      "output_type": "stream",
 189      "text": [
 190       "55902 honeycombs\n"
 191      ]
 192     }
 193    ],
 194    "source": [
 195     "# ps_honeycombs = []\n",
 196     "\n",
 197     "# for ps in pangram_sets:\n",
 198     "#     for mand in ps:\n",
 199     "#         ps_honeycombs.append(Honeycomb(mandatory=mand, letters=ps))\n",
 200     "\n",
 201     "ps_honeycombs = [Honeycomb(mandatory=mand, letters=ps) \n",
 202     "                 for ps in pangram_sets\n",
 203     "                 for mand in ps]\n",
 204     "print(len(ps_honeycombs), \"honeycombs\")"
 205    ]
 206   },
 207   {
 208    "cell_type": "code",
 209    "execution_count": 38,
 210    "id": "914fece8",
 211    "metadata": {},
 212    "outputs": [
 213     {
 214      "data": {
 215       "text/plain": [
 216        "26"
 217       ]
 218      },
 219      "execution_count": 38,
 220      "metadata": {},
 221      "output_type": "execute_result"
 222     }
 223    ],
 224    "source": [
 225     "partitioned_scored_sets = {l: {s: scored_sets[s] for s in scored_sets if l in s} \n",
 226     "                           for l in string.ascii_lowercase}"
 227    ]
 228   },
 229   {
 230    "cell_type": "code",
 231    "execution_count": 39,
 232    "id": "f1454ccd",
 233    "metadata": {},
 234    "outputs": [],
 235    "source": [
 236     "def partitioned_score_honeycomb(honeycomb):\n",
 237     "    return sum(sc for letters, sc in partitioned_scored_sets[honeycomb.mandatory].items() \n",
 238     "               if letters.issubset(honeycomb.letters)\n",
 239     "              )"
 240    ]
 241   },
 242   {
 243    "cell_type": "code",
 244    "execution_count": 40,
 245    "id": "7380dbd6",
 246    "metadata": {},
 247    "outputs": [
 248     {
 249      "name": "stdout",
 250      "output_type": "stream",
 251      "text": [
 252       "r|aegint 3898\n",
 253       "r|aegint 3898\n",
 254       "r|aegint 3898\n",
 255       "r|aegint 3898\n",
 256       "r|aegint 3898\n",
 257       "r|aegint 3898\n",
 258       "r|aegint 3898\n",
 259       "r|aegint 3898\n",
 260       "1min 25s ± 2.51 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
 261      ]
 262     }
 263    ],
 264    "source": [
 265     "# # %%timeit\n",
 266     "# best_honyecomb = max(ps_honeycombs, key=partitioned_score_honeycomb)\n",
 267     "# print(best_honyecomb, partitioned_score_honeycomb(best_honyecomb))"
 268    ]
 269   },
 270   {
 271    "cell_type": "code",
 272    "execution_count": 44,
 273    "id": "0239b444-1240-4744-8547-c41367527785",
 274    "metadata": {},
 275    "outputs": [
 276     {
 277      "name": "stdout",
 278      "output_type": "stream",
 279      "text": [
 280       "r|aegint 3898\n",
 281       "r|aegint 3898\n",
 282       "r|aegint 3898\n",
 283       "r|aegint 3898\n",
 284       "r|aegint 3898\n",
 285       "r|aegint 3898\n",
 286       "r|aegint 3898\n",
 287       "r|aegint 3898\n",
 288       "5min 38s ± 4.58 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
 289      ]
 290     }
 291    ],
 292    "source": [
 293     "# %%timeit\n",
 294     "best_honyecomb = max(ps_honeycombs, key=score_honeycomb)\n",
 295     "print(best_honyecomb, partitioned_score_honeycomb(best_honyecomb))"
 296    ]
 297   },
 298   {
 299    "cell_type": "code",
 300    "execution_count": 46,
 301    "id": "7e4173b4-26a9-4198-b572-d57db321fe94",
 302    "metadata": {
 303     "lines_to_next_cell": 2
 304    },
 305    "outputs": [
 306     {
 307      "name": "stdout",
 308      "output_type": "stream",
 309      "text": [
 310       "         1612136594 function calls in 589.284 seconds\n",
 311       "\n",
 312       "   Ordered by: standard name\n",
 313       "\n",
 314       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
 315       "    55902    0.316    0.000  589.070    0.011 1101428662.py:1(score_honeycomb)\n",
 316       "  1846420  257.034    0.000  588.167    0.000 1101428662.py:2(<genexpr>)\n",
 317       "1210893222  276.103    0.000  331.132    0.000 1250517269.py:1(present)\n",
 318       "        1    0.000    0.000  589.284  589.284 <string>:1(<module>)\n",
 319       "        1    0.000    0.000  589.284  589.284 {built-in method builtins.exec}\n",
 320       "        1    0.214    0.214  589.284  589.284 {built-in method builtins.max}\n",
 321       "    55902    0.567    0.000  588.733    0.011 {built-in method builtins.sum}\n",
 322       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
 323       "399229242   55.030    0.000   55.030    0.000 {method 'issubset' of 'frozenset' objects}\n",
 324       "    55902    0.021    0.000    0.021    0.000 {method 'items' of 'dict' objects}\n",
 325       "\n",
 326       "\n"
 327      ]
 328     }
 329    ],
 330    "source": [
 331     "# cProfile.run('max(ps_honeycombs, key=score_honeycomb)')"
 332    ]
 333   },
 334   {
 335    "cell_type": "code",
 336    "execution_count": 47,
 337    "id": "8f2de1b8-6a09-44eb-af7b-3e16c902859f",
 338    "metadata": {
 339     "lines_to_next_cell": 2
 340    },
 341    "outputs": [
 342     {
 343      "name": "stdout",
 344      "output_type": "stream",
 345      "text": [
 346       "         401243372 function calls in 150.954 seconds\n",
 347       "\n",
 348       "   Ordered by: standard name\n",
 349       "\n",
 350       "   ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n",
 351       "    55902    0.226    0.000  150.835    0.003 346452243.py:1(partitioned_score_honeycomb)\n",
 352       "  1846420   86.829    0.000  150.189    0.000 346452243.py:2(<genexpr>)\n",
 353       "        1    0.000    0.000  150.954  150.954 <string>:1(<module>)\n",
 354       "        1    0.000    0.000  150.954  150.954 {built-in method builtins.exec}\n",
 355       "        1    0.119    0.119  150.954  150.954 {built-in method builtins.max}\n",
 356       "    55902    0.407    0.000  150.596    0.003 {built-in method builtins.sum}\n",
 357       "        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}\n",
 358       "399229242   63.360    0.000   63.360    0.000 {method 'issubset' of 'frozenset' objects}\n",
 359       "    55902    0.013    0.000    0.013    0.000 {method 'items' of 'dict' objects}\n",
 360       "\n",
 361       "\n"
 362      ]
 363     }
 364    ],
 365    "source": [
 366     "# cProfile.run('max(ps_honeycombs, key=partitioned_score_honeycomb)')"
 367    ]
 368   },
 369   {
 370    "cell_type": "code",
 371    "execution_count": null,
 372    "id": "4c3a5684-346b-4cf7-b238-391f9a6ba2dd",
 373    "metadata": {},
 374    "outputs": [],
 375    "source": []
 376   }
 377  ],
 378  "metadata": {
 379   "jupytext": {
 380    "formats": "ipynb,py:light"
 381   },
 382   "kernelspec": {
 383    "display_name": "Python 3 (ipykernel)",
 384    "language": "python",
 385    "name": "python3"
 386   },
 387   "language_info": {
 388    "codemirror_mode": {
 389     "name": "ipython",
 390     "version": 3
 391    },
 392    "file_extension": ".py",
 393    "mimetype": "text/x-python",
 394    "name": "python",
 395    "nbconvert_exporter": "python",
 396    "pygments_lexer": "ipython3",
 397    "version": "3.8.8"
 398   }
 399  },
 400  "nbformat": 4,
 401  "nbformat_minor": 5
 402 }