--- /dev/null
+{-# LANGUAGE DeriveGeneric #-}
+
+import GHC.Generics (Generic)
+import Data.Hashable
+import qualified Data.HashSet as S
+import qualified Data.HashMap.Strict as M
+import Data.HashMap.Strict ((!))
+import Data.List
+import Data.Function
+
+type LetterSet = S.HashSet Char
+type WordSet = M.HashMap LetterSet (S.HashSet String)
+type ScoredSet = M.HashMap LetterSet Int
+type PartitionedScoredSet = M.HashMap Char ScoredSet
+
+data Honeycomb = Honeycomb Char LetterSet
+ deriving (Show, Eq, Generic)
+instance Hashable Honeycomb
+
+main = do
+ lines <- readFile "enable1.txt"
+ let allWords = [w | w <- words lines,
+ length w >= 4,
+ (S.size $ S.fromList w) <= 7,
+ 's' `notElem` w]
+ print $ length allWords
+ print $ head allWords
+ let wordSets = mkWordSets allWords
+ print $ M.size wordSets
+ print $ wordSets ! (S.fromList "elephant")
+ let hc = Honeycomb 'g' (S.fromList "apxmelg")
+ let ps = filter (\t -> present t hc) $ M.keys wordSets
+ print ps
+ print $ sum $ map (scoreLetterSet wordSets) ps
+ let scoredSets = M.mapWithKey (\ls _ -> scoreLetterSet wordSets ls) wordSets
+ let partScoredSets = mkPartitionedScoredSets scoredSets
+ print $ scoreHoneycomb scoredSets hc
+ print $ M.size partScoredSets
+ let pangramSets = S.filter (\k -> (S.size k == 7) && (not ('s' `S.member` k))) $ M.keysSet scoredSets
+ print $ S.size pangramSets
+ let plausibleHoneycombs = mkPlausibleHoneycombs pangramSets
+ print $ S.size plausibleHoneycombs
+ -- let bestHoneycomb = maximumBy (compare `on` (scoreHoneycomb scoredSets))
+ -- (S.toList plausibleHoneycombs)
+ -- let bestHoneycomb = maximumBy (compare `on` (scoreHoneycombP partScoredSets))
+ -- (S.toList plausibleHoneycombs)
+ let bestHoneycomb = findBestHoneycomb partScoredSets plausibleHoneycombs
+ print bestHoneycomb
+
+
+mkWordSets :: [String] -> WordSet
+mkWordSets ws = foldr addWord M.empty ws
+ where addWord w = M.insertWith S.union (S.fromList w) (S.singleton w)
+
+present :: LetterSet -> Honeycomb -> Bool
+present target (Honeycomb mandatory letters) =
+ (mandatory `S.member` target) && (target `isSubsetOf` letters)
+
+isSubsetOf smaller larger =
+ S.null $ smaller `S.difference` larger
+ -- S.foldr (\elem val -> val && (elem `S.member` larger))
+ -- True smaller
+
+scoreLetterSet :: WordSet -> LetterSet -> Int
+scoreLetterSet wordSets letterSet = bonus + (sum $ fmap scoreAWord (S.toList scoringWords))
+ where scoringWords = wordSets ! letterSet
+ scoreAWord w = if length w == 4 then 1 else length w
+ bonus = if (S.size letterSet) == 7 then (S.size scoringWords) * 7 else 0
+
+mkPartitionedScoredSets scoredSets = M.fromList [(c, scoreSetWithLetter c) | c <- ['a'..'z']]
+ where scoreSetWithLetter c = M.filterWithKey (\k _ -> c `S.member` k) scoredSets
+
+scoreHoneycomb :: ScoredSet -> Honeycomb -> Int
+scoreHoneycomb scoredSets (Honeycomb mandatory letters) = sum(validScores)
+ where hasMand = M.filterWithKey (\k _ -> mandatory `S.member` k) scoredSets
+ hasLetters = M.filterWithKey (\k _ -> k `isSubsetOf` letters) hasMand
+ validScores = M.elems hasLetters
+
+scoreHoneycombP :: PartitionedScoredSet -> Honeycomb -> Int
+scoreHoneycombP scoredSets (Honeycomb mandatory letters) = sum(validScores)
+ where hasMand = scoredSets ! mandatory
+ hasLetters = M.filterWithKey (\k _ -> k `isSubsetOf` letters) hasMand
+ validScores = M.elems hasLetters
+
+mkPlausibleHoneycombs :: S.HashSet LetterSet -> S.HashSet Honeycomb
+mkPlausibleHoneycombs pangramSets = S.foldr S.union S.empty honeycombSets
+ where honeycombSets = S.map honeycombsOfLetters pangramSets
+ honeycombsOfLetters ls = S.map (\l -> Honeycomb l ls) ls
+
+
+findBestHoneycomb partScoredSets honeycombs = S.foldr (betterHc partScoredSets) (0, initHc) honeycombs
+ where initHc = Honeycomb 'a' $ S.singleton 'a'
+
+betterHc partScoredSets hc (bestScore, bestHc) =
+ if thisScore > bestScore
+ then (thisScore, hc)
+ else (bestScore, bestHc)
+ where thisScore = scoreHoneycombP partScoredSets hc
--- /dev/null
+import qualified Data.Set as S
+import qualified Data.Map.Strict as M
+import Data.Map.Strict ((!))
+import Data.List
+import Data.Function
+
+type LetterSet = S.Set Char
+type WordSet = M.Map LetterSet (S.Set String)
+type ScoredSet = M.Map LetterSet Int
+type PartitionedScoredSet = M.Map Char ScoredSet
+
+data Honeycomb = Honeycomb Char LetterSet
+ deriving (Show, Eq, Ord)
+
+main = do
+ lines <- readFile "enable1.txt"
+ let allWords = [w | w <- words lines,
+ length w >= 4,
+ (S.size $ S.fromList w) <= 7,
+ 's' `notElem` w]
+ let wordSets = mkWordSets allWords
+ let scoredSets = M.mapWithKey (\ls _ -> scoreLetterSet wordSets ls) wordSets
+ let partScoredSets = mkPartitionedScoredSets scoredSets
+ let pangramSets = S.filter (\k -> (S.size k == 7) && (not ('s' `S.member` k))) $ M.keysSet scoredSets
+ let plausibleHoneycombs = mkPlausibleHoneycombs pangramSets
+ -- this takes 6 minutes to execute
+ -- let bestHoneycomb = maximumBy (compare `on` (scoreHoneycombP partScoredSets))
+ -- (S.toList plausibleHoneycombs)
+
+ -- this takes 2 minutes to execute
+ let bestHoneycomb = findBestHoneycomb partScoredSets plausibleHoneycombs
+ print bestHoneycomb
+
+
+mkWordSets :: [String] -> WordSet
+mkWordSets ws = foldr addWord M.empty ws
+ where addWord w = M.insertWith S.union (S.fromList w) (S.singleton w)
+
+present :: LetterSet -> Honeycomb -> Bool
+present target (Honeycomb mandatory letters) =
+ (mandatory `S.member` target) && (target `S.isSubsetOf` letters)
+
+scoreLetterSet :: WordSet -> LetterSet -> Int
+scoreLetterSet wordSets letterSet = bonus + (sum $ fmap scoreAWord (S.toList scoringWords))
+ where scoringWords = wordSets ! letterSet
+ scoreAWord w = if length w == 4 then 1 else length w
+ bonus = if (S.size letterSet) == 7 then (S.size scoringWords) * 7 else 0
+
+mkPartitionedScoredSets scoredSets = M.fromList [(c, scoreSetWithLetter c) | c <- ['a'..'z']]
+ where scoreSetWithLetter c = M.filterWithKey (\k _ -> c `S.member` k) scoredSets
+
+scoreHoneycomb :: ScoredSet -> Honeycomb -> Int
+scoreHoneycomb scoredSets (Honeycomb mandatory letters) = sum(validScores)
+ where hasMand = M.filterWithKey (\k _ -> mandatory `S.member` k) scoredSets
+ hasLetters = M.filterWithKey (\k _ -> k `S.isSubsetOf` letters) hasMand
+ validScores = M.elems hasLetters
+
+scoreHoneycombP :: PartitionedScoredSet -> Honeycomb -> Int
+scoreHoneycombP scoredSets (Honeycomb mandatory letters) = sum(validScores)
+ where hasMand = scoredSets ! mandatory
+ hasLetters = M.filterWithKey (\k _ -> k `S.isSubsetOf` letters) hasMand
+ validScores = M.elems hasLetters
+
+mkPlausibleHoneycombs :: S.Set LetterSet -> S.Set Honeycomb
+mkPlausibleHoneycombs pangramSets = S.foldr S.union S.empty honeycombSets
+ where honeycombSets = S.map honeycombsOfLetters pangramSets
+ honeycombsOfLetters ls = S.map (\l -> Honeycomb l ls) ls
+
+
+findBestHoneycomb partScoredSets honeycombs =
+ S.foldr (betterHc partScoredSets) (0, initHc) honeycombs
+ where initHc = Honeycomb 'a' $ S.singleton 'a'
+
+betterHc partScoredSets hc (bestScore, bestHc) =
+ if thisScore > bestScore
+ then (thisScore, hc)
+ else (bestScore, bestHc)
+ where thisScore = scoreHoneycombP partScoredSets hc
--- /dev/null
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "951180ec",
+ "metadata": {},
+ "source": [
+ "# Honeycomb letter puzzle\n",
+ "Solving the puzzle as posted on [FiveThirtyEight](https://fivethirtyeight.com/features/can-you-solve-the-vexing-vexillology/), also solved by [David Robinson](http://varianceexplained.org/r/honeycomb-puzzle/).\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "6f6fa3e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import string\n",
+ "import collections\n",
+ "from dataclasses import dataclass\n",
+ "import itertools"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "7f00bc22",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def only_letters(text):\n",
+ " return ''.join([c.lower() for c in text if c in string.ascii_letters])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "cb0b4230",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'hello'"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "only_letters('Hell!o')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "9546868c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "101924"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "with open('/usr/share/dict/british-english') as f:\n",
+ " words = [line.rstrip() for line in f]\n",
+ "len(words)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "id": "88f00e4c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "172820"
+ ]
+ },
+ "execution_count": 79,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "with open('enable1.txt') as f:\n",
+ " words = [line.rstrip() for line in f]\n",
+ "len(words)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "id": "1e75fba2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "words = set(only_letters(w) \n",
+ " for w in words \n",
+ " if len(only_letters(w)) >= 4\n",
+ " if len(frozenset(only_letters(w))) <= 7\n",
+ " if 's' not in w)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "id": "49473123",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "44585"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(words)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "e1f9b35e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "21661"
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "word_sets = collections.defaultdict(set)\n",
+ "for w in words:\n",
+ " letters = frozenset(w)\n",
+ " word_sets[letters].add(w)\n",
+ "len(word_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "id": "63121d2b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'elephant', 'naphthalene', 'pentathlete'}"
+ ]
+ },
+ "execution_count": 86,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "word_sets[frozenset('elephant')]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "id": "267130ba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@dataclass(frozen=True)\n",
+ "class Honeycomb():\n",
+ " mandatory: str\n",
+ " letters: frozenset\n",
+ " \n",
+ " def __repr__(self):\n",
+ " return f'{self.mandatory}|{\"\".join(sorted(self.letters - set(self.mandatory)))}'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 88,
+ "id": "0ca00165",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "g|aelmpx"
+ ]
+ },
+ "execution_count": 88,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "honeycomb = Honeycomb(mandatory='g', letters=frozenset('apxmelg'))\n",
+ "honeycomb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "id": "bb848e88",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def present(honeycomb, target):\n",
+ " return (honeycomb.mandatory in target) and target.issubset(honeycomb.letters)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "id": "add4f445",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "14"
+ ]
+ },
+ "execution_count": 90,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "present_sets = [s for s in word_sets if present(honeycomb, s)]\n",
+ "len(present_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "id": "3354f6d7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[frozenset({'a', 'e', 'g', 'p'}),\n",
+ " frozenset({'a', 'e', 'g', 'm'}),\n",
+ " frozenset({'a', 'g', 'm'}),\n",
+ " frozenset({'a', 'e', 'g', 'l', 'p'}),\n",
+ " frozenset({'a', 'e', 'g', 'l'}),\n",
+ " frozenset({'a', 'e', 'g'}),\n",
+ " frozenset({'a', 'g', 'l'}),\n",
+ " frozenset({'a', 'g', 'l', 'm'}),\n",
+ " frozenset({'a', 'e', 'g', 'l', 'm'}),\n",
+ " frozenset({'a', 'g'}),\n",
+ " frozenset({'a', 'g', 'l', 'x'}),\n",
+ " frozenset({'e', 'g', 'l'}),\n",
+ " frozenset({'a', 'g', 'l', 'p'}),\n",
+ " frozenset({'a', 'g', 'm', 'p'})]"
+ ]
+ },
+ "execution_count": 91,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "present_sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "id": "2e85ce06",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'peag', 'agapae', 'peage', 'gape', 'agape', 'page'}\n",
+ "{'game', 'gemmae', 'mage', 'gemma'}\n",
+ "{'magma', 'agma', 'agama', 'gamma', 'gama'}\n",
+ "{'pelage', 'plage'}\n",
+ "{'eagle', 'algae', 'galeae', 'aglee', 'allege', 'legal', 'galea', 'gale', 'gaggle', 'egal'}\n",
+ "{'gage', 'agee'}\n",
+ "{'gala', 'gall', 'alga', 'algal'}\n",
+ "{'amalgam'}\n",
+ "{'agleam', 'gleam'}\n",
+ "{'gaga'}\n",
+ "{'galax'}\n",
+ "{'glee', 'gelee', 'gleg'}\n",
+ "{'plagal'}\n",
+ "{'gamp'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "for s in present_sets:\n",
+ " print(word_sets[s])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "id": "3c6f212e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def score(present_set):\n",
+ " score = 0\n",
+ " for w in word_sets[present_set]:\n",
+ " if len(w) == 4:\n",
+ " score += 1\n",
+ " else:\n",
+ " score += len(w)\n",
+ " if len(present_set) == 7:\n",
+ " score += len(word_sets[present_set]) * 7\n",
+ " return score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "id": "01c3743c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "153"
+ ]
+ },
+ "execution_count": 94,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sum(score(present_set) for present_set in present_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "id": "1f45f6f5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "False"
+ ]
+ },
+ "execution_count": 95,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "set('megaplex') in words"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "id": "979d9ed5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scored_sets = {s: score(s) for s in word_sets}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "id": "790b7303",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "frozenset({'e', 'a', 'p', 'g'}) {'peag', 'agapae', 'peage', 'gape', 'agape', 'page'} 19 19\n",
+ "frozenset({'m', 'a', 'e', 'g'}) {'game', 'gemmae', 'mage', 'gemma'} 13 13\n",
+ "frozenset({'m', 'a', 'g'}) {'magma', 'agma', 'agama', 'gamma', 'gama'} 17 17\n",
+ "frozenset({'p', 'l', 'a', 'g', 'e'}) {'pelage', 'plage'} 11 11\n",
+ "frozenset({'a', 'l', 'e', 'g'}) {'eagle', 'algae', 'galeae', 'aglee', 'allege', 'legal', 'galea', 'gale', 'gaggle', 'egal'} 45 45\n",
+ "frozenset({'a', 'e', 'g'}) {'gage', 'agee'} 2 2\n",
+ "frozenset({'a', 'l', 'g'}) {'gala', 'gall', 'alga', 'algal'} 8 8\n",
+ "frozenset({'m', 'a', 'l', 'g'}) {'amalgam'} 7 7\n",
+ "frozenset({'m', 'a', 'l', 'g', 'e'}) {'agleam', 'gleam'} 11 11\n",
+ "frozenset({'a', 'g'}) {'gaga'} 1 1\n",
+ "frozenset({'a', 'l', 'x', 'g'}) {'galax'} 5 5\n",
+ "frozenset({'l', 'e', 'g'}) {'glee', 'gelee', 'gleg'} 7 7\n",
+ "frozenset({'l', 'a', 'p', 'g'}) {'plagal'} 6 6\n",
+ "frozenset({'m', 'a', 'p', 'g'}) {'gamp'} 1 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "for s in present_sets:\n",
+ " print(s, word_sets[s], score(s), scored_sets[s])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 98,
+ "id": "78d423a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def score_honeycomb(honeycomb):\n",
+ " return sum(sc for letters, sc in scored_sets.items() \n",
+ " if honeycomb.mandatory in letters\n",
+ " if letters.issubset(honeycomb.letters)\n",
+ "# if present(honeycomb, letters)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 99,
+ "id": "3c7c7a83",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "153"
+ ]
+ },
+ "execution_count": 99,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_honeycomb(honeycomb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "id": "d53c4d64",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# hcs = []\n",
+ "\n",
+ "# for mand in 'abcde':\n",
+ "# remaining = set('abcde') - set(mand)\n",
+ "# for others in itertools.combinations(remaining, r=3):\n",
+ "# hcs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
+ "\n",
+ "# print(len(hcs))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "id": "d967a3df",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# hcs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "id": "4a07a6b7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# honeycombs = []\n",
+ "\n",
+ "# for mand in string.ascii_lowercase:\n",
+ "# remaining = set(string.ascii_lowercase) - set(mand)\n",
+ "# for others in itertools.combinations(remaining, r=6):\n",
+ "# honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
+ "\n",
+ "# print(len(honeycombs))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "id": "14c38054",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# honeycombs = []\n",
+ "\n",
+ "# candidate_letters = set(string.ascii_lowercase)\n",
+ "# candidate_letters.remove('s')\n",
+ "# candidate_letters = frozenset(candidate_letters)\n",
+ "\n",
+ "# for mand in candidate_letters:\n",
+ "# remaining = candidate_letters - set(mand)\n",
+ "# for others in itertools.combinations(remaining, r=6):\n",
+ "# honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n",
+ "\n",
+ "# print(len(honeycombs))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "id": "f8ea5307",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# [(h, score_honeycomb(h)) for h in honeycombs[:5]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "id": "4f2118dc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# %%timeit\n",
+ "# max(honeycombs, key=score_honeycomb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "id": "febee1c1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "7986"
+ ]
+ },
+ "execution_count": 106,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pangram_sets = [s for s in word_sets.keys() if len(s) == 7 if 's' not in s]\n",
+ "len(pangram_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "id": "54a06bdf",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "55902\n"
+ ]
+ }
+ ],
+ "source": [
+ "ps_honeycombs = []\n",
+ "\n",
+ "for ps in pangram_sets:\n",
+ " for mand in ps:\n",
+ " ps_honeycombs.append(Honeycomb(mandatory=mand, letters=ps))\n",
+ "\n",
+ "print(len(ps_honeycombs))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "id": "301b3cd0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# %%timeit\n",
+ "# max(ps_honeycombs, key=score_honeycomb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "id": "653613ac",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## 1 a,e,g,i,n,r,t r 3898\n",
+ "## 2 a,e,g,i,n,r,t n 3782\n",
+ "## 3 a,e,g,i,n,r,t e 3769"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "id": "3cdbd956",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3898"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_honeycomb(Honeycomb('r', frozenset('aeginrt')))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "id": "5f06f87b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3782"
+ ]
+ },
+ "execution_count": 111,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_honeycomb(Honeycomb('n', frozenset('aeginrtn')))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "id": "ab6bc64e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3769"
+ ]
+ },
+ "execution_count": 112,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_honeycomb(Honeycomb('e', frozenset('aeginrte')))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "id": "914fece8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "26"
+ ]
+ },
+ "execution_count": 113,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "partitioned_scored_sets = {l: {s: scored_sets[s] for s in scored_sets if l in s} \n",
+ " for l in string.ascii_lowercase}\n",
+ "len(partitioned_scored_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 114,
+ "id": "f1454ccd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def partitioned_score_honeycomb(honeycomb):\n",
+ " return sum(sc for letters, sc in partitioned_scored_sets[honeycomb.mandatory].items() \n",
+ " if letters.issubset(honeycomb.letters)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 115,
+ "id": "7380dbd6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "r|aegint\n"
+ ]
+ }
+ ],
+ "source": [
+ "# %%timeit\n",
+ "print(max(ps_honeycombs, key=partitioned_score_honeycomb))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "id": "51d0317c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# partitioned_score_honeycomb(Honeycomb('a', 'abc'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "id": "77d74a74-55e9-498b-abdf-b02c1f7f01a3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "19941"
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# max(len(partitioned_scored_sets[k]) for k in partitioned_scored_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "id": "35b58dec-7138-4fdb-86cc-80f3eb6a555a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "37313"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# len(scored_sets)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7e4173b4-26a9-4198-b572-d57db321fe94",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "jupytext": {
+ "formats": "ipynb,auto:light"
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- /dev/null
+# ---
+# jupyter:
+# jupytext:
+# formats: ipynb,py:light
+# text_representation:
+# extension: .py
+# format_name: light
+# format_version: '1.5'
+# jupytext_version: 1.11.1
+# kernelspec:
+# display_name: Python 3 (ipykernel)
+# language: python
+# name: python3
+# ---
+
+# # Honeycomb letter puzzle
+# Solving the puzzle as posted on [FiveThirtyEight](https://fivethirtyeight.com/features/can-you-solve-the-vexing-vexillology/), also solved by [David Robinson](http://varianceexplained.org/r/honeycomb-puzzle/).
+#
+
+import string
+import collections
+from dataclasses import dataclass
+import itertools
+
+
+def only_letters(text):
+ return ''.join([c.lower() for c in text if c in string.ascii_letters])
+
+
+only_letters('Hell!o')
+
+with open('/usr/share/dict/british-english') as f:
+ words = [line.rstrip() for line in f]
+len(words)
+
+with open('enable1.txt') as f:
+ words = [line.rstrip() for line in f]
+len(words)
+
+words = set(only_letters(w)
+ for w in words
+ if len(only_letters(w)) >= 4
+ if len(frozenset(only_letters(w))) <= 7
+ if 's' not in w)
+
+len(words)
+
+word_sets = collections.defaultdict(set)
+for w in words:
+ letters = frozenset(w)
+ word_sets[letters].add(w)
+len(word_sets)
+
+word_sets[frozenset('elephant')]
+
+
+@dataclass(frozen=True)
+class Honeycomb():
+ mandatory: str
+ letters: frozenset
+
+ def __repr__(self):
+ return f'{self.mandatory}|{"".join(sorted(self.letters - set(self.mandatory)))}'
+
+
+honeycomb = Honeycomb(mandatory='g', letters=frozenset('apxmelg'))
+honeycomb
+
+
+def present(honeycomb, target):
+ return (honeycomb.mandatory in target) and target.issubset(honeycomb.letters)
+
+
+present_sets = [s for s in word_sets if present(honeycomb, s)]
+len(present_sets)
+
+present_sets
+
+for s in present_sets:
+ print(word_sets[s])
+
+
+def score(present_set):
+ score = 0
+ for w in word_sets[present_set]:
+ if len(w) == 4:
+ score += 1
+ else:
+ score += len(w)
+ if len(present_set) == 7:
+ score += len(word_sets[present_set]) * 7
+ return score
+
+
+sum(score(present_set) for present_set in present_sets)
+
+set('megaplex') in words
+
+scored_sets = {s: score(s) for s in word_sets}
+
+for s in present_sets:
+ print(s, word_sets[s], score(s), scored_sets[s])
+
+
+def score_honeycomb(honeycomb):
+ return sum(sc for letters, sc in scored_sets.items()
+ if honeycomb.mandatory in letters
+ if letters.issubset(honeycomb.letters)
+# if present(honeycomb, letters)
+ )
+
+
+score_honeycomb(honeycomb)
+
+# +
+# hcs = []
+
+# for mand in 'abcde':
+# remaining = set('abcde') - set(mand)
+# for others in itertools.combinations(remaining, r=3):
+# hcs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))
+
+# print(len(hcs))
+
+# +
+# hcs
+
+# +
+# honeycombs = []
+
+# for mand in string.ascii_lowercase:
+# remaining = set(string.ascii_lowercase) - set(mand)
+# for others in itertools.combinations(remaining, r=6):
+# honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))
+
+# print(len(honeycombs))
+
+# +
+# honeycombs = []
+
+# candidate_letters = set(string.ascii_lowercase)
+# candidate_letters.remove('s')
+# candidate_letters = frozenset(candidate_letters)
+
+# for mand in candidate_letters:
+# remaining = candidate_letters - set(mand)
+# for others in itertools.combinations(remaining, r=6):
+# honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))
+
+# print(len(honeycombs))
+
+# +
+# [(h, score_honeycomb(h)) for h in honeycombs[:5]]
+
+# +
+# # %%timeit
+# max(honeycombs, key=score_honeycomb)
+# -
+
+pangram_sets = [s for s in word_sets.keys() if len(s) == 7 if 's' not in s]
+len(pangram_sets)
+
+# +
+ps_honeycombs = []
+
+for ps in pangram_sets:
+ for mand in ps:
+ ps_honeycombs.append(Honeycomb(mandatory=mand, letters=ps))
+
+print(len(ps_honeycombs))
+
+# +
+# # %%timeit
+# max(ps_honeycombs, key=score_honeycomb)
+
+# +
+## 1 a,e,g,i,n,r,t r 3898
+## 2 a,e,g,i,n,r,t n 3782
+## 3 a,e,g,i,n,r,t e 3769
+# -
+
+score_honeycomb(Honeycomb('r', frozenset('aeginrt')))
+
+score_honeycomb(Honeycomb('n', frozenset('aeginrtn')))
+
+score_honeycomb(Honeycomb('e', frozenset('aeginrte')))
+
+partitioned_scored_sets = {l: {s: scored_sets[s] for s in scored_sets if l in s}
+ for l in string.ascii_lowercase}
+len(partitioned_scored_sets)
+
+
+def partitioned_score_honeycomb(honeycomb):
+ return sum(sc for letters, sc in partitioned_scored_sets[honeycomb.mandatory].items()
+ if letters.issubset(honeycomb.letters)
+ )
+
+
+# # %%timeit
+print(max(ps_honeycombs, key=partitioned_score_honeycomb))
+
+# +
+# partitioned_score_honeycomb(Honeycomb('a', 'abc'))
+
+# +
+# max(len(partitioned_scored_sets[k]) for k in partitioned_scored_sets)
+
+# +
+# len(scored_sets)
+# -
+
+