X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=honeycomb_puzzle.ipynb;fp=honeycomb_puzzle.ipynb;h=2daf2b2c8bf4ff1f2b53d73ca5941e6186f597a6;hb=978aa4af1b4a8753d0cd04f7fcceaf6d899bca79;hp=88e69cb354055b854790c384021824015a0d34e3;hpb=68b4df6b83ce0f6bea2379d3f46d7296dd36a3c9;p=honeycomb-puzzle.git diff --git a/honeycomb_puzzle.ipynb b/honeycomb_puzzle.ipynb index 88e69cb..2daf2b2 100644 --- a/honeycomb_puzzle.ipynb +++ b/honeycomb_puzzle.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 45, "id": "6f6fa3e7", "metadata": {}, "outputs": [], @@ -19,67 +19,13 @@ "import string\n", "import collections\n", "from dataclasses import dataclass\n", - "import itertools" + "import itertools\n", + "import cProfile" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "7f00bc22", - "metadata": {}, - "outputs": [], - "source": [ - "def only_letters(text):\n", - " return ''.join([c.lower() for c in text if c in string.ascii_letters])" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "cb0b4230", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'hello'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "only_letters('Hell!o')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "9546868c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "101924" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "with open('/usr/share/dict/british-english') as f:\n", - " words = [line.rstrip() for line in f]\n", - "len(words)" - ] - }, - { - "cell_type": "code", - "execution_count": 79, + "execution_count": 5, "id": "88f00e4c", "metadata": {}, "outputs": [ @@ -89,55 +35,32 @@ "172820" ] }, - "execution_count": 79, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open('enable1.txt') as f:\n", - " words = [line.rstrip() for line in f]\n", - "len(words)" + " words = [line.strip() for line in f]" ] }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 6, "id": "1e75fba2", "metadata": {}, "outputs": [], "source": [ - "words = set(only_letters(w) \n", - " for w in words \n", - " if len(only_letters(w)) >= 4\n", - " if len(frozenset(only_letters(w))) <= 7\n", + "words = set(w for w in words \n", + " if len(w) >= 4\n", + " if len(frozenset(w)) <= 7\n", " if 's' not in w)" ] }, { "cell_type": "code", - "execution_count": 81, - "id": "49473123", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "44585" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(words)" - ] - }, - { - "cell_type": "code", - "execution_count": 82, + "execution_count": 8, "id": "e1f9b35e", "metadata": {}, "outputs": [ @@ -147,7 +70,7 @@ "21661" ] }, - "execution_count": 82, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -156,34 +79,12 @@ "word_sets = collections.defaultdict(set)\n", "for w in words:\n", " letters = frozenset(w)\n", - " word_sets[letters].add(w)\n", - "len(word_sets)" + " word_sets[letters].add(w)" ] }, { "cell_type": "code", - "execution_count": 86, - "id": "63121d2b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'elephant', 'naphthalene', 'pentathlete'}" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "word_sets[frozenset('elephant')]" - ] - }, - { - "cell_type": "code", - "execution_count": 87, + "execution_count": 11, "id": "267130ba", "metadata": {}, "outputs": [], @@ -199,128 +100,20 @@ }, { "cell_type": "code", - "execution_count": 88, - "id": "0ca00165", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "g|aelmpx" - ] - }, - "execution_count": 88, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "honeycomb = Honeycomb(mandatory='g', letters=frozenset('apxmelg'))\n", - "honeycomb" - ] - }, - { - "cell_type": "code", - "execution_count": 89, + "execution_count": 13, "id": "bb848e88", "metadata": {}, "outputs": [], "source": [ - "def present(honeycomb, target):\n", - " return (honeycomb.mandatory in target) and target.issubset(honeycomb.letters)" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "id": "add4f445", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "14" - ] - }, - "execution_count": 90, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "present_sets = [s for s in word_sets if present(honeycomb, s)]\n", - "len(present_sets)" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "id": "3354f6d7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[frozenset({'a', 'e', 'g', 'p'}),\n", - " frozenset({'a', 'e', 'g', 'm'}),\n", - " frozenset({'a', 'g', 'm'}),\n", - " frozenset({'a', 'e', 'g', 'l', 'p'}),\n", - " frozenset({'a', 'e', 'g', 'l'}),\n", - " frozenset({'a', 'e', 'g'}),\n", - " frozenset({'a', 'g', 'l'}),\n", - " frozenset({'a', 'g', 'l', 'm'}),\n", - " frozenset({'a', 'e', 'g', 'l', 'm'}),\n", - " frozenset({'a', 'g'}),\n", - " frozenset({'a', 'g', 'l', 'x'}),\n", - " frozenset({'e', 'g', 'l'}),\n", - " frozenset({'a', 'g', 'l', 'p'}),\n", - " frozenset({'a', 'g', 'm', 'p'})]" - ] - }, - "execution_count": 91, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "present_sets" + "def present(target, honeycomb):\n", + " return ( (honeycomb.mandatory in target) \n", + " and target.issubset(honeycomb.letters)\n", + " )" ] }, { "cell_type": "code", - "execution_count": 92, - "id": "2e85ce06", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'peag', 'agapae', 'peage', 'gape', 'agape', 'page'}\n", - "{'game', 'gemmae', 'mage', 'gemma'}\n", - "{'magma', 'agma', 'agama', 'gamma', 'gama'}\n", - "{'pelage', 'plage'}\n", - "{'eagle', 'algae', 'galeae', 'aglee', 'allege', 'legal', 'galea', 'gale', 'gaggle', 'egal'}\n", - "{'gage', 'agee'}\n", - "{'gala', 'gall', 'alga', 'algal'}\n", - "{'amalgam'}\n", - "{'agleam', 'gleam'}\n", - "{'gaga'}\n", - "{'galax'}\n", - "{'glee', 'gelee', 'gleg'}\n", - "{'plagal'}\n", - "{'gamp'}\n" - ] - } - ], - "source": [ - "for s in present_sets:\n", - " print(word_sets[s])" - ] - }, - { - "cell_type": "code", - "execution_count": 93, + "execution_count": 17, "id": "3c6f212e", "metadata": {}, "outputs": [], @@ -339,345 +132,81 @@ }, { "cell_type": "code", - "execution_count": 94, - "id": "01c3743c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "153" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sum(score(present_set) for present_set in present_sets)" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "id": "1f45f6f5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "set('megaplex') in words" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "id": "979d9ed5", - "metadata": {}, - "outputs": [], - "source": [ - "scored_sets = {s: score(s) for s in word_sets}" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "id": "790b7303", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "frozenset({'e', 'a', 'p', 'g'}) {'peag', 'agapae', 'peage', 'gape', 'agape', 'page'} 19 19\n", - "frozenset({'m', 'a', 'e', 'g'}) {'game', 'gemmae', 'mage', 'gemma'} 13 13\n", - "frozenset({'m', 'a', 'g'}) {'magma', 'agma', 'agama', 'gamma', 'gama'} 17 17\n", - "frozenset({'p', 'l', 'a', 'g', 'e'}) {'pelage', 'plage'} 11 11\n", - "frozenset({'a', 'l', 'e', 'g'}) {'eagle', 'algae', 'galeae', 'aglee', 'allege', 'legal', 'galea', 'gale', 'gaggle', 'egal'} 45 45\n", - "frozenset({'a', 'e', 'g'}) {'gage', 'agee'} 2 2\n", - "frozenset({'a', 'l', 'g'}) {'gala', 'gall', 'alga', 'algal'} 8 8\n", - "frozenset({'m', 'a', 'l', 'g'}) {'amalgam'} 7 7\n", - "frozenset({'m', 'a', 'l', 'g', 'e'}) {'agleam', 'gleam'} 11 11\n", - "frozenset({'a', 'g'}) {'gaga'} 1 1\n", - "frozenset({'a', 'l', 'x', 'g'}) {'galax'} 5 5\n", - "frozenset({'l', 'e', 'g'}) {'glee', 'gelee', 'gleg'} 7 7\n", - "frozenset({'l', 'a', 'p', 'g'}) {'plagal'} 6 6\n", - "frozenset({'m', 'a', 'p', 'g'}) {'gamp'} 1 1\n" - ] - } - ], - "source": [ - "for s in present_sets:\n", - " print(s, word_sets[s], score(s), scored_sets[s])" - ] - }, - { - "cell_type": "code", - "execution_count": 98, + "execution_count": 22, "id": "78d423a5", "metadata": {}, "outputs": [], "source": [ "def score_honeycomb(honeycomb):\n", " return sum(sc for letters, sc in scored_sets.items() \n", - " if honeycomb.mandatory in letters\n", - " if letters.issubset(honeycomb.letters)\n", - "# if present(honeycomb, letters)\n", + " # if honeycomb.mandatory in letters\n", + " # if letters.issubset(honeycomb.letters)\n", + " if present(letters, honeycomb)\n", " )" ] }, { "cell_type": "code", - "execution_count": 99, - "id": "3c7c7a83", + "execution_count": 30, + "id": "febee1c1", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "153" - ] - }, - "execution_count": 99, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "pangram sets: 7986\n" + ] } ], "source": [ - "score_honeycomb(honeycomb)" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "id": "d53c4d64", - "metadata": {}, - "outputs": [], - "source": [ - "# hcs = []\n", - "\n", - "# for mand in 'abcde':\n", - "# remaining = set('abcde') - set(mand)\n", - "# for others in itertools.combinations(remaining, r=3):\n", - "# hcs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n", - "\n", - "# print(len(hcs))" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "id": "d967a3df", - "metadata": {}, - "outputs": [], - "source": [ - "# hcs" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "id": "4a07a6b7", - "metadata": {}, - "outputs": [], - "source": [ - "# honeycombs = []\n", - "\n", - "# for mand in string.ascii_lowercase:\n", - "# remaining = set(string.ascii_lowercase) - set(mand)\n", - "# for others in itertools.combinations(remaining, r=6):\n", - "# honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n", - "\n", - "# print(len(honeycombs))" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "id": "14c38054", - "metadata": {}, - "outputs": [], - "source": [ - "# honeycombs = []\n", - "\n", - "# candidate_letters = set(string.ascii_lowercase)\n", - "# candidate_letters.remove('s')\n", - "# candidate_letters = frozenset(candidate_letters)\n", - "\n", - "# for mand in candidate_letters:\n", - "# remaining = candidate_letters - set(mand)\n", - "# for others in itertools.combinations(remaining, r=6):\n", - "# honeycombs.append(Honeycomb(mandatory=mand, letters=frozenset(others) | frozenset(mand)))\n", - "\n", - "# print(len(honeycombs))" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "id": "f8ea5307", - "metadata": {}, - "outputs": [], - "source": [ - "# [(h, score_honeycomb(h)) for h in honeycombs[:5]]" + "pangram_sets = [s for s in word_sets.keys() if len(s) == 7 if 's' not in s]\n", + "print(\"pangram sets:\", len(pangram_sets))" ] }, { "cell_type": "code", - "execution_count": 105, - "id": "4f2118dc", + "execution_count": 31, + "id": "54a06bdf", "metadata": {}, "outputs": [], "source": [ - "# %%timeit\n", - "# max(honeycombs, key=score_honeycomb)" + "with open('pangaram_words.txt', 'w') as f:\n", + " for s in pangram_sets:\n", + " for w in word_sets[s]:\n", + " f.write(f'{w}\\n')" ] }, { "cell_type": "code", - "execution_count": 106, - "id": "febee1c1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "7986" - ] - }, - "execution_count": 106, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pangram_sets = [s for s in word_sets.keys() if len(s) == 7 if 's' not in s]\n", - "len(pangram_sets)" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "id": "54a06bdf", + "execution_count": 32, + "id": "007d3404", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "55902\n" + "55902 honeycombs\n" ] } ], "source": [ - "ps_honeycombs = []\n", + "# ps_honeycombs = []\n", "\n", - "for ps in pangram_sets:\n", - " for mand in ps:\n", - " ps_honeycombs.append(Honeycomb(mandatory=mand, letters=ps))\n", + "# for ps in pangram_sets:\n", + "# for mand in ps:\n", + "# ps_honeycombs.append(Honeycomb(mandatory=mand, letters=ps))\n", "\n", - "print(len(ps_honeycombs))" + "ps_honeycombs = [Honeycomb(mandatory=mand, letters=ps) \n", + " for ps in pangram_sets\n", + " for mand in ps]\n", + "print(len(ps_honeycombs), \"honeycombs\")" ] }, { "cell_type": "code", - "execution_count": 108, - "id": "301b3cd0", - "metadata": {}, - "outputs": [], - "source": [ - "# %%timeit\n", - "# max(ps_honeycombs, key=score_honeycomb)" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "id": "653613ac", - "metadata": {}, - "outputs": [], - "source": [ - "## 1 a,e,g,i,n,r,t r 3898\n", - "## 2 a,e,g,i,n,r,t n 3782\n", - "## 3 a,e,g,i,n,r,t e 3769" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "id": "3cdbd956", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3898" - ] - }, - "execution_count": 110, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "score_honeycomb(Honeycomb('r', frozenset('aeginrt')))" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "id": "5f06f87b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3782" - ] - }, - "execution_count": 111, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "score_honeycomb(Honeycomb('n', frozenset('aeginrtn')))" - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "id": "ab6bc64e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3769" - ] - }, - "execution_count": 112, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "score_honeycomb(Honeycomb('e', frozenset('aeginrte')))" - ] - }, - { - "cell_type": "code", - "execution_count": 113, + "execution_count": 38, "id": "914fece8", "metadata": {}, "outputs": [ @@ -687,20 +216,19 @@ "26" ] }, - "execution_count": 113, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "partitioned_scored_sets = {l: {s: scored_sets[s] for s in scored_sets if l in s} \n", - " for l in string.ascii_lowercase}\n", - "len(partitioned_scored_sets)" + " for l in string.ascii_lowercase}" ] }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 39, "id": "f1454ccd", "metadata": {}, "outputs": [], @@ -713,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 40, "id": "7380dbd6", "metadata": {}, "outputs": [ @@ -721,82 +249,127 @@ "name": "stdout", "output_type": "stream", "text": [ - "r|aegint\n" + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "1min 25s ± 2.51 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ - "# %%timeit\n", - "print(max(ps_honeycombs, key=partitioned_score_honeycomb))" + "# # %%timeit\n", + "# best_honyecomb = max(ps_honeycombs, key=partitioned_score_honeycomb)\n", + "# print(best_honyecomb, partitioned_score_honeycomb(best_honyecomb))" ] }, { "cell_type": "code", - "execution_count": 74, - "id": "51d0317c", + "execution_count": 44, + "id": "0239b444-1240-4744-8547-c41367527785", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "13" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "r|aegint 3898\n", + "5min 38s ± 4.58 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] } ], "source": [ - "# partitioned_score_honeycomb(Honeycomb('a', 'abc'))" + "# %%timeit\n", + "best_honyecomb = max(ps_honeycombs, key=score_honeycomb)\n", + "print(best_honyecomb, partitioned_score_honeycomb(best_honyecomb))" ] }, { "cell_type": "code", - "execution_count": 76, - "id": "77d74a74-55e9-498b-abdf-b02c1f7f01a3", - "metadata": {}, + "execution_count": 46, + "id": "7e4173b4-26a9-4198-b572-d57db321fe94", + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [ { - "data": { - "text/plain": [ - "19941" - ] - }, - "execution_count": 76, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " 1612136594 function calls in 589.284 seconds\n", + "\n", + " Ordered by: standard name\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 55902 0.316 0.000 589.070 0.011 1101428662.py:1(score_honeycomb)\n", + " 1846420 257.034 0.000 588.167 0.000 1101428662.py:2()\n", + "1210893222 276.103 0.000 331.132 0.000 1250517269.py:1(present)\n", + " 1 0.000 0.000 589.284 589.284 :1()\n", + " 1 0.000 0.000 589.284 589.284 {built-in method builtins.exec}\n", + " 1 0.214 0.214 589.284 589.284 {built-in method builtins.max}\n", + " 55902 0.567 0.000 588.733 0.011 {built-in method builtins.sum}\n", + " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", + "399229242 55.030 0.000 55.030 0.000 {method 'issubset' of 'frozenset' objects}\n", + " 55902 0.021 0.000 0.021 0.000 {method 'items' of 'dict' objects}\n", + "\n", + "\n" + ] } ], "source": [ - "# max(len(partitioned_scored_sets[k]) for k in partitioned_scored_sets)" + "# cProfile.run('max(ps_honeycombs, key=score_honeycomb)')" ] }, { "cell_type": "code", - "execution_count": 77, - "id": "35b58dec-7138-4fdb-86cc-80f3eb6a555a", - "metadata": {}, + "execution_count": 47, + "id": "8f2de1b8-6a09-44eb-af7b-3e16c902859f", + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [ { - "data": { - "text/plain": [ - "37313" - ] - }, - "execution_count": 77, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " 401243372 function calls in 150.954 seconds\n", + "\n", + " Ordered by: standard name\n", + "\n", + " ncalls tottime percall cumtime percall filename:lineno(function)\n", + " 55902 0.226 0.000 150.835 0.003 346452243.py:1(partitioned_score_honeycomb)\n", + " 1846420 86.829 0.000 150.189 0.000 346452243.py:2()\n", + " 1 0.000 0.000 150.954 150.954 :1()\n", + " 1 0.000 0.000 150.954 150.954 {built-in method builtins.exec}\n", + " 1 0.119 0.119 150.954 150.954 {built-in method builtins.max}\n", + " 55902 0.407 0.000 150.596 0.003 {built-in method builtins.sum}\n", + " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", + "399229242 63.360 0.000 63.360 0.000 {method 'issubset' of 'frozenset' objects}\n", + " 55902 0.013 0.000 0.013 0.000 {method 'items' of 'dict' objects}\n", + "\n", + "\n" + ] } ], "source": [ - "# len(scored_sets)" + "# cProfile.run('max(ps_honeycombs, key=partitioned_score_honeycomb)')" ] }, { "cell_type": "code", "execution_count": null, - "id": "7e4173b4-26a9-4198-b572-d57db321fe94", + "id": "4c3a5684-346b-4cf7-b238-391f9a6ba2dd", "metadata": {}, "outputs": [], "source": [] @@ -804,7 +377,7 @@ ], "metadata": { "jupytext": { - "formats": "ipynb,auto:light" + "formats": "ipynb,py:light" }, "kernelspec": { "display_name": "Python 3 (ipykernel)",