Optimised riddle creation
authorNeil Smith <NeilNjae@users.noreply.github.com>
Fri, 1 Sep 2023 11:35:08 +0000 (12:35 +0100)
committerNeil Smith <NeilNjae@users.noreply.github.com>
Fri, 1 Sep 2023 11:35:08 +0000 (12:35 +0100)
creation_analysis.md [new file with mode: 0644]
riddle_creator.md
riddle_creator_filtered_dict.md [new file with mode: 0644]
riddle_creator_lazy.md [new file with mode: 0644]
riddle_dict_builder.md [new file with mode: 0644]
riddle_solver.md

diff --git a/creation_analysis.md b/creation_analysis.md
new file mode 100644 (file)
index 0000000..405d559
--- /dev/null
@@ -0,0 +1,251 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.3'
+      jupytext_version: 1.14.5
+  kernelspec:
+    display_name: Python 3 (ipykernel)
+    language: python
+    name: python3
+---
+
+```python
+import pandas as pd
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+%matplotlib inline
+import pstats
+from pstats import SortKey
+
+from riddle_definitions import *
+import random
+```
+
+```python
+distances = [edit_distance(random.choice(dictionary), 
+                           random.choice(dictionary))
+             for _ in range(10000)]
+distances = pd.Series(distances)
+distances.describe()
+```
+
+```python
+distances[distances <= 3].count() / distances.count()
+```
+
+```python
+metrics_original = pd.read_csv('metrics_original.csv')
+metrics_original
+```
+
+```python
+metrics_related = pd.read_csv('metrics_related.csv')
+metrics_related
+```
+
+```python
+metrics_lazy = pd.read_csv('metrics_lazy.csv')
+metrics_lazy
+```
+
+```python
+metrics_original.describe()
+```
+
+```python
+metrics_related.describe()
+```
+
+```python
+metrics_lazy.describe()
+```
+
+```python
+fig, ax = plt.subplots(1, 1)
+
+metrics_original.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5)
+metrics_related.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5)
+metrics_lazy.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5);
+```
+
+```python
+ax = metrics_original.cpu_time.plot.hist(bins=20, alpha=0.5)
+metrics_related.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5)
+metrics_lazy.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5);
+```
+
+```python
+ax = metrics_original.cpu_time.plot.kde(xlim=(0, 10))
+metrics_related.cpu_time.plot.kde(ax=ax)
+metrics_lazy.cpu_time.plot.kde(ax=ax);
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.cpu_time], bins=20)
+ax.legend(['Original', 'Related', 'Related 2'])
+ax.set_title('Times taken to generate a riddle')
+ax.set_xlabel('Time (s)')
+plt.savefig('original_time_histogram.png')
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.cpu_time, 
+         metrics_related.cpu_time], bins=20)
+ax.legend(['Original', 'Related'])
+ax.set_title('Times taken to generate a riddle')
+ax.set_xlabel('Time (s)')
+plt.savefig('original_related_time_histogram.png')
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.cpu_time, 
+         metrics_related.cpu_time, 
+         metrics_lazy.cpu_time], bins=20)
+ax.legend(['Original', 'Related', 'Lazy'])
+ax.set_title('Times taken to generate a riddle')
+ax.set_xlabel('Time (s)')
+plt.savefig('original_related_lazy_time_histogram.png')
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_related.cpu_time, 
+         metrics_lazy.cpu_time], bins=20,
+        color=['#ff7f0e', '#2ca02c'])
+ax.legend(['Related', 'Lazy'])
+ax.set_title('Times taken to generate a riddle')
+ax.set_xlabel('Time (s)')
+plt.savefig('related_lazy_time_histogram.png')
+```
+
+```python
+metrics_original['generated_per_line'] = metrics_original.generated_lines / metrics_original.riddle_lines
+metrics_original['time_per_line'] = metrics_original.cpu_time / metrics_original.riddle_lines
+metrics_original
+```
+
+```python
+metrics_related['generated_per_line'] = metrics_related.generated_lines / metrics_related.riddle_lines
+metrics_related['time_per_line'] = metrics_related.cpu_time / metrics_related.riddle_lines
+metrics_related
+```
+
+```python
+metrics_lazy['generated_per_line'] = metrics_lazy.generated_lines / metrics_lazy.riddle_lines
+metrics_lazy['time_per_line'] = metrics_lazy.cpu_time / metrics_lazy.riddle_lines
+metrics_lazy
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.time_per_line], bins=20)
+ax.legend(['Original']);
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.time_per_line, 
+         metrics_related.time_per_line], bins=20)
+ax.legend(['Original', 'Related']);
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_original.time_per_line, 
+         metrics_related.time_per_line, 
+         metrics_lazy.time_per_line], bins=20)
+ax.legend(['Original', 'Related', 'Lazy'])
+```
+
+```python
+plt.rcParams['axes.prop_cycle'].by_key()['color']
+```
+
+```python
+for bars, column in zip(*ax.get_legend_handles_labels()):
+  color = bars[0].get_facecolor()
+  print(column, color)
+```
+
+```python
+fig, ax = plt.subplots()
+ax.hist([metrics_related.time_per_line, 
+         metrics_lazy.time_per_line], bins=20,
+       color=['#ff7f0e', '#2ca02c'])
+ax.legend(['Related', 'Lazy']);
+```
+
+```python
+ax = metrics_original.time_per_line.plot.kde(xlim=(0, 2))
+metrics_related.time_per_line.plot.kde(ax=ax)
+metrics_lazy.time_per_line.plot.kde(ax=ax);
+```
+
+```python
+metrics_original.describe()
+```
+
+```python
+metrics_related.describe()
+```
+
+```python
+metrics_lazy.describe()
+```
+
+```python
+metrics_original.time_per_line.mean() / metrics_related.time_per_line.mean()
+```
+
+```python
+metrics_original.time_per_line.median() / metrics_related.time_per_line.median()
+```
+
+```python
+metrics_related.time_per_line.mean() / metrics_lazy.time_per_line.mean()
+```
+
+```python
+metrics_related.time_per_line.median() / metrics_lazy.time_per_line.median()
+```
+
+```python
+metrics_original.time_per_line.mean() / metrics_lazy.time_per_line.mean()
+```
+
+```python
+metrics_original.time_per_line.median() / metrics_lazy.time_per_line.median()
+```
+
+```python
+metrics_original.wall_time.mean() / metrics_related.wall_time.mean()
+```
+
+```python
+metrics_related.wall_time.mean() / metrics_lazy.wall_time.mean()
+```
+
+```python
+metrics_original.wall_time.mean() / metrics_lazy.wall_time.mean()
+```
+
+```python
+stats = pstats.Stats('filtered.stats')
+stats.strip_dirs().sort_stats(SortKey.TIME).print_stats(10)
+```
+
+```python
+stats2 = pstats.Stats('lazy.stats')
+stats2.strip_dirs().sort_stats(SortKey.TIME).print_stats(10)
+```
+
+```python
+
+```
index 5484c03212afd2ab31a1b4bc48182a9a010369f3..5b4b43e4071fa125cd0a6424765d1d05f08663e2 100644 (file)
@@ -1,12 +1,12 @@
 ---
 jupyter:
   jupytext:
-    formats: ipynb,md,py:percent
+    formats: ipynb,md
     text_representation:
       extension: .md
       format_name: markdown
       format_version: '1.3'
-      jupytext_version: 1.15.0
+      jupytext_version: 1.14.5
   kernelspec:
     display_name: Python 3 (ipykernel)
     language: python
@@ -21,45 +21,6 @@ from enum import Enum, auto
 import random
 ```
 
-```python
-gencount = 0
-```
-
-```python
-len([w for w in dictionary if 's' in w])
-```
-
-```python
-len([w for w in dictionary if 's' not in w])
-```
-
-```python
-dset = set(frozenset(w) for w in dictionary)
-len(dset), len(dictionary)
-```
-
-```python
-len([w for w in dset if 's' in w])
-```
-
-```python
-len([w for w in dset if 's' not in w])
-```
-
-```python
-sw = random.choice([w for w in dictionary if 's' in w])
-sw
-```
-
-```python
-swrel = [w for w in dictionary if 's' not in w if edit_distance(w, sw) <=3]
-len(swrel)
-```
-
-```python
-swrel
-```
-
 ```python
 def include_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue):
   with_letter = [w for w in dictionary if letter in w]
@@ -67,8 +28,6 @@ def include_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue
   
   finished = False
   while not finished:
-    global gencount
-    gencount +=1
     a = random.choice(with_letter)
     b = random.choice(without_letter)
     finished = ((edit_distance(a, b) <= limit) and
@@ -87,8 +46,6 @@ def include_include_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue
   
   finished = False
   while not finished:
-    global gencount
-    gencount +=1
     a = random.choice(with_letter)
     b = random.choice(with_letter)
     finished = ((a != b) and 
@@ -108,8 +65,6 @@ def exclude_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue
   
   finished = False
   while not finished:
-    global gencount
-    gencount +=1
     a = random.choice(without_letter)
     b = random.choice(without_letter)
     finished = ((a != b) and 
@@ -160,27 +115,6 @@ sample_riddle
 solve_riddle(collapse_riddle_clues(sample_riddle))
 ```
 
-```python
-# write_riddle(sample_riddle)
-```
-
-```python
-# sample_riddle = random_riddle('sonnet', limit=4)
-# sample_riddle
-```
-
-```python
-sample_riddle
-```
-
-```python
-collapse_riddle_clues(sample_riddle)
-```
-
-```python
-solve_riddle(collapse_riddle_clues(sample_riddle))
-```
-
 ```python
 def valid_random_riddle(word: str) -> Riddle:
   finished = False
@@ -192,34 +126,27 @@ def valid_random_riddle(word: str) -> Riddle:
 ```
 
 ```python
-# import time
-# w_times = []
-# c_times = []
-# for _ in range(1000):
-#   w1, c1 = time.perf_counter(), time.process_time()
-#   valid_random_riddle(random.choice(dictionary))
-#   w2, c2 = time.perf_counter(), time.process_time()
-#   w_times.append(w2 - w1)
-#   c_times.append(c2 - c1)
-
-# with open('cpu_times.txt', 'w') as f:
-#   f.writelines(f'{t}\n' for t in c_times)
-# with open('wall_times.txt', 'w') as f:
-#   f.writelines(f'{t}\n' for t in w_times)
-
-```
-
-```python
-glcounts = []
+import time
+import csv
+reports = []
 for _ in range(1000):
-  gencount = 0
+  w1, c1 = time.perf_counter(), time.process_time()
   r = valid_random_riddle(random.choice(dictionary))
+  w2, c2 = time.perf_counter(), time.process_time()
   linecount = len(r)
-  glcounts.append((gencount, linecount))
+  reports.append({'wall_time': w2 - w1,
+                  'cpu_time': c2 - c1,
+                  'riddle_lines': linecount})
+  w_times.append(w2 - w1)
+  c_times.append(c2 - c1)
+
+with open('metrics_original.csv', 'w', newline='') as csvfile:
+  fieldnames = list(reports[0].keys())
+  writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
 
-with open('linecounts.txt', 'w') as f:
-  f.write('"Lines generated","Lines in riddle"\n')
-  f.writelines(f'{g},{l}\n' for g, l in glcounts)
+  writer.writeheader()
+  for r in reports:
+    writer.writerow(r)
 ```
 
 ```python
@@ -230,7 +157,10 @@ def write_include_exclude_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str:
 
 ```python
 def write_include_include_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str:
-  line = f"is in {clue_a.word} and also in {clue_b.word}"
+  if random.randrange(2) == 0:
+    line = f"is in {clue_a.word} and also in {clue_b.word}"
+  else:
+    line = f"is in both {clue_a.word} and {clue_b.word}"
   return line
 ```
 
diff --git a/riddle_creator_filtered_dict.md b/riddle_creator_filtered_dict.md
new file mode 100644 (file)
index 0000000..9af3c93
--- /dev/null
@@ -0,0 +1,269 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.3'
+      jupytext_version: 1.14.5
+  kernelspec:
+    display_name: Python 3 (ipykernel)
+    language: python
+    name: python3
+---
+
+```python
+from riddle_definitions import *
+
+from typing import Dict, Tuple, List, Set
+from enum import Enum, auto
+import random
+import gzip
+```
+
+```python
+dictionary_neighbours = {}
+
+for line in gzip.open('dictionary_neighbours.txt.gz', 'rt').readlines():
+  words = line.strip().split(',')
+  dictionary_neighbours[words[0]] = words[1:]
+
+possible_riddle_solutions = list(dictionary_neighbours.keys())
+```
+
+```python
+len(dictionary_neighbours['sonnet'])
+```
+
+```python
+def include_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue):
+  finished = False
+  while not finished:
+    with_letter = random.choice([w for w in dictionary_neighbours if letter in w])
+    without_letter = [w for w in dictionary_neighbours[with_letter] 
+                      if letter not in w
+                      if edit_distance(with_letter, w) <= limit]
+    if without_letter:
+      other = random.choice(without_letter)
+      finished = True
+
+  return (RiddleClue(word=with_letter, valence=RiddleValence.Include),
+          RiddleClue(word=other, valence=RiddleValence.Exclude))
+
+a, b = include_exclude_clue('s')
+a, b, set(a.word) - set(b.word), edit_distance(a.word, b.word)
+```
+
+```python
+def include_include_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue):
+  finished = False
+  while not finished:
+    with_letter = random.choice([w for w in dictionary_neighbours if letter in w])
+    others = [w for w in dictionary_neighbours[with_letter] 
+                      if letter in w
+                      if edit_distance(with_letter, w) <= limit]
+    if others:
+      other = random.choice(others)
+      finished = True
+  return (RiddleClue(word=with_letter, valence=RiddleValence.Include),
+          RiddleClue(word=other, valence=RiddleValence.Include))
+
+a, b = include_include_clue('s')
+a, b, set(a.word) | set(b.word), edit_distance(a.word, b.word)
+```
+
+```python
+def exclude_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue):
+  finished = False
+  while not finished:
+    without_letter = random.choice([w for w in dictionary_neighbours if letter not in w])
+    others = [w for w in dictionary_neighbours[without_letter] 
+                      if letter not in w
+                      if edit_distance(without_letter, w) <= limit]
+    if others:
+      other = random.choice(others)
+      finished = True
+
+  return (RiddleClue(word=without_letter, valence=RiddleValence.Exclude),
+          RiddleClue(word=other, valence=RiddleValence.Exclude))
+
+a, b = exclude_exclude_clue('s')
+a, b, set(a.word) | set(b.word), edit_distance(a.word, b.word)
+```
+
+```python
+def random_clue( letter: str
+               , ie_limit: int = 3
+               , ii_limit: int = 2
+               , ee_limit: int = 2) -> (RiddleClue, RiddleClue):
+  clue_type = random.choices(['include_exclude', 'include_include', 'exclude_exclude'],
+                             weights=[7, 2, 1],
+                             k=1)[0]
+  if clue_type == 'include_exclude':
+    return include_exclude_clue(letter, limit=ie_limit)
+  elif clue_type =='include_include':
+    return include_include_clue(letter, limit=ii_limit)
+  else:
+    return exclude_exclude_clue(letter, limit=ee_limit)
+```
+
+```python
+def random_riddle( word: str
+                 , ie_limit: int = 3
+                 , ii_limit: int = 2
+                 , ee_limit: int = 2
+                 ) -> Riddle:
+  return {i+1 : 
+    random_clue(l, 
+                ie_limit=ie_limit, ii_limit=ii_limit, ee_limit=ee_limit)
+          for i, l in enumerate(word)}  
+```
+
+```python
+sample_riddle = random_riddle('teacup')
+sample_riddle
+```
+
+```python
+solve_riddle(collapse_riddle_clues(sample_riddle))
+```
+
+```python
+# write_riddle(sample_riddle)
+```
+
+```python
+# sample_riddle = random_riddle('sonnet', limit=4)
+# sample_riddle
+```
+
+```python
+sample_riddle
+```
+
+```python
+collapse_riddle_clues(sample_riddle)
+```
+
+```python
+solve_riddle(collapse_riddle_clues(sample_riddle))
+```
+
+```python
+def valid_random_riddle(word: str) -> Riddle:
+  finished = False
+  while not finished:
+    riddle = random_riddle(word)
+    solns = solve_riddle(collapse_riddle_clues(riddle))
+    finished = (len(solns) == 1)
+  return riddle
+```
+
+```python
+import time
+import csv
+reports = []
+for _ in range(1000):
+  gencount = 0
+  w1, c1 = time.perf_counter(), time.process_time()
+  r = valid_random_riddle(random.choice(dictionary))
+  w2, c2 = time.perf_counter(), time.process_time()
+  linecount = len(r)
+  reports.append({'wall_time': w2 - w1,
+                  'cpu_time': c2 - c1,
+                  'generated_lines': gencount,
+                  'riddle_lines': linecount})
+  glcounts.append((gencount, linecount))
+  w_times.append(w2 - w1)
+  c_times.append(c2 - c1)
+
+with open('metrics_related.csv', 'w', newline='') as csvfile:
+  fieldnames = list(reports[0].keys())
+  writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+  writer.writeheader()
+  for r in reports:
+    writer.writerow(r)
+```
+
+```python
+def write_include_exclude_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str:
+  line = f"is in {clue_a.word} but not in {clue_b.word}"
+  return line
+```
+
+```python
+def write_include_include_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str:
+  if random.randrange(2) == 0:
+    line = f"is in {clue_a.word} and also in {clue_b.word}"
+  else:
+    line = f"is in both {clue_a.word} and {clue_b.word}"
+  return line
+```
+
+```python
+def write_exclude_exclude_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str:
+  line = f"is neither in {clue_a.word} nor in {clue_b.word}"
+  return line
+```
+
+```python
+def write_line(a: RiddleClue, b: RiddleClue) -> str:
+  if a.valence == RiddleValence.Include and b.valence == RiddleValence.Include:
+    return write_include_include_line(a, b)
+  elif a.valence == RiddleValence.Include and b.valence == RiddleValence.Exclude:
+    return write_include_exclude_line(a, b)
+  elif a.valence == RiddleValence.Exclude and b.valence == RiddleValence.Exclude:
+    return write_exclude_exclude_line(a, b)
+  else:
+    return "illegal line"
+```
+
+```python
+def write_riddle(riddle: Riddle) -> List[str]:
+  output = []
+  for i, (clue_a, clue_b) in sorted(riddle.items()):
+    pos = reverse_ordinals[i]
+    if i == len(riddle) and random.random() <= 0.3:
+      pos = reverse_ordinals[-1]
+    line = write_line(clue_a, clue_b)
+    full_line = f"My {pos} {line}"
+    output.append(full_line)
+  return output  
+```
+
+```python
+
+```
+
+```python
+sample_riddle = valid_random_riddle("elephant")
+sample_riddle
+```
+
+```python
+write_riddle(sample_riddle)
+```
+
+```python
+solve_riddle(collapse_riddle_clues(sample_riddle))
+```
+
+```python
+with open("generated-riddles.txt", 'w') as file:
+  between = False
+  for _ in range(10):
+    if between:
+      file.write('\n')
+    between = True
+    target = random.choice(dictionary)
+    riddle = valid_random_riddle(target)
+    lines = write_riddle(riddle)
+    file.writelines(l + '\n' for l in lines)
+    file.write(f'Target: {target}\n')
+```
+
+```python
+
+```
diff --git a/riddle_creator_lazy.md b/riddle_creator_lazy.md
new file mode 100644 (file)
index 0000000..4af0f00
--- /dev/null
@@ -0,0 +1,341 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.3'
+      jupytext_version: 1.14.5
+  kernelspec:
+    display_name: Python 3 (ipykernel)
+    language: python
+    name: python3
+---
+
+```python
+from riddle_definitions import *
+
+from typing import Dict, Tuple, List, Set
+from enum import Enum, auto
+import random
+import gzip
+```
+
+```python
+dictionary_neighbours = {}
+
+for line in gzip.open('dictionary_neighbours.txt.gz', 'rt').readlines():
+  words = line.strip().split(',')
+  dictionary_neighbours[words[0]] = words[1:]
+
+possible_riddle_clues = list(dictionary_neighbours.keys())
+```
+
+```python
+len(dictionary_neighbours['sonnet'])
+```
+
+```python
+def include_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue):
+  finished = False
+  while not finished:
+
+    has_first = False
+    while not has_first:
+      with_letter = random.choice(possible_riddle_clues)
+      has_first = letter in with_letter
+    
+    others = dictionary_neighbours[with_letter][:]
+    random.shuffle(others)
+    
+    while not finished and others:
+      other = others[0]
+      
+      if letter not in other and edit_distance(with_letter, other) <= limit:
+        finished = True
+      else:
+        others = others[1:]
+
+  return (RiddleClue(word=with_letter, valence=RiddleValence.Include),
+          RiddleClue(word=other, valence=RiddleValence.Exclude))
+
+a, b = include_exclude_clue('s')
+a, b, set(a.word) - set(b.word), edit_distance(a.word, b.word)
+```
+
+```python
+def include_include_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue):
+  finished = False
+  while not finished:
+    
+    has_first = False
+    while not has_first:
+      with_letter = random.choice(possible_riddle_clues)
+      has_first = letter in with_letter
+
+    others = dictionary_neighbours[with_letter][:]
+    random.shuffle(others)
+
+    while not finished and others:
+      other = others[0]
+      
+      if letter in other and edit_distance(with_letter, other) <= limit:
+        finished = True
+      else:
+        others = others[1:]
+
+  return (RiddleClue(word=with_letter, valence=RiddleValence.Include),
+          RiddleClue(word=other, valence=RiddleValence.Include))
+
+a, b = include_include_clue('s')
+a, b, set(a.word) | set(b.word), edit_distance(a.word, b.word)
+```
+
+```python
+def exclude_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue):
+  finished = False
+  while not finished:
+    
+    has_first = False
+    while not has_first:
+      without_letter = random.choice(possible_riddle_clues)
+      has_first = letter not in without_letter
+
+    others = dictionary_neighbours[without_letter][:]
+    random.shuffle(others)
+
+    while not finished and others:
+      other = others[0]
+
+      if letter not in other and edit_distance(without_letter, other) <= limit:
+        finished = True
+      else:
+        others = others[1:]
+
+
+  return (RiddleClue(word=without_letter, valence=RiddleValence.Exclude),
+          RiddleClue(word=other, valence=RiddleValence.Exclude))
+
+a, b = exclude_exclude_clue('s')
+a, b, set(a.word) | set(b.word), edit_distance(a.word, b.word)
+```
+
+```python
+def random_clue( letter: str
+               , ie_limit: int = 3
+               , ii_limit: int = 2
+               , ee_limit: int = 2) -> (RiddleClue, RiddleClue):
+  clue_type = random.choices(['include_exclude', 'include_include', 'exclude_exclude'],
+                             weights=[7, 2, 1],
+                             k=1)[0]
+  if clue_type == 'include_exclude':
+    return include_exclude_clue(letter, limit=ie_limit)
+  elif clue_type =='include_include':
+    return include_include_clue(letter, limit=ii_limit)
+  else:
+    return exclude_exclude_clue(letter, limit=ee_limit)
+```
+
+```python
+def random_riddle( word: str
+                 , ie_limit: int = 3
+                 , ii_limit: int = 2
+                 , ee_limit: int = 2
+                 ) -> Riddle:
+  return {i+1 : 
+    random_clue(l, 
+                ie_limit=ie_limit, ii_limit=ii_limit, ee_limit=ee_limit)
+          for i, l in enumerate(word)}  
+```
+
+```python
+sample_riddle = random_riddle('teacup')
+sample_riddle
+```
+
+```python
+collapse_riddle_clues(sample_riddle)
+```
+
+```python
+solve_riddle(collapse_riddle_clues(sample_riddle))
+```
+
+```python
+# write_riddle(sample_riddle)
+```
+
+```python
+# sample_riddle = random_riddle('sonnet', limit=4)
+# sample_riddle
+```
+
+```python
+sample_riddle
+```
+
+```python
+collapse_riddle_clues(sample_riddle)
+```
+
+```python
+solve_riddle(collapse_riddle_clues(sample_riddle))
+```
+
+```python
+def valid_random_riddle(word: str) -> Riddle:
+  finished = False
+  while not finished:
+    riddle = random_riddle(word)
+    solns = solve_riddle(collapse_riddle_clues(riddle))
+    finished = (len(solns) == 1)
+  return riddle
+```
+
+```python
+import time
+import csv
+reports = []
+for _ in range(1000):
+  w1, c1 = time.perf_counter(), time.process_time()
+  r = valid_random_riddle(random.choice(possible_riddle_clues))
+  w2, c2 = time.perf_counter(), time.process_time()
+  linecount = len(r)
+  reports.append({'wall_time': w2 - w1,
+                  'cpu_time': c2 - c1,
+                  'riddle_lines': linecount})
+
+with open('metrics_lazy.csv', 'w', newline='') as csvfile:
+  fieldnames = list(reports[0].keys())
+  writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+  writer.writeheader()
+  for r in reports:
+    writer.writerow(r)
+```
+
+```python
+def write_include_exclude_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str:
+  line = f"is in {clue_a.word} but not in {clue_b.word}"
+  return line
+```
+
+```python
+def write_include_include_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str:
+  if random.randrange(2) == 0:
+    line = f"is in {clue_a.word} and also in {clue_b.word}"
+  else:
+    line = f"is in both {clue_a.word} and {clue_b.word}"
+  return line
+```
+
+```python
+def write_exclude_exclude_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str:
+  line = f"is neither in {clue_a.word} nor in {clue_b.word}"
+  return line
+```
+
+```python
+def write_line(a: RiddleClue, b: RiddleClue) -> str:
+  if a.valence == RiddleValence.Include and b.valence == RiddleValence.Include:
+    return write_include_include_line(a, b)
+  elif a.valence == RiddleValence.Include and b.valence == RiddleValence.Exclude:
+    return write_include_exclude_line(a, b)
+  elif a.valence == RiddleValence.Exclude and b.valence == RiddleValence.Exclude:
+    return write_exclude_exclude_line(a, b)
+  else:
+    return "illegal line"
+```
+
+```python
+def write_riddle(riddle: Riddle) -> List[str]:
+  output = []
+  for i, (clue_a, clue_b) in sorted(riddle.items()):
+    pos = reverse_ordinals[i]
+    if i == len(riddle) and random.random() <= 0.3:
+      pos = reverse_ordinals[-1]
+    line = write_line(clue_a, clue_b)
+    full_line = f"My {pos} {line}"
+    output.append(full_line)
+  return output  
+```
+
+```python
+
+```
+
+```python
+sample_riddle = valid_random_riddle("elephant")
+sample_riddle
+```
+
+```python
+write_riddle(sample_riddle)
+```
+
+```python
+solve_riddle(collapse_riddle_clues(sample_riddle))
+```
+
+```python
+with open("generated-riddles-lazy.txt", 'w') as file:
+  between = False
+  for _ in range(10):
+    if between:
+      file.write('\n')
+    between = True
+    target = random.choice(possible_riddle_clues)
+    riddle = valid_random_riddle(target)
+    lines = write_riddle(riddle)
+    file.writelines(l + '\n' for l in lines)
+    file.write(f'Target: {target}\n')
+                  
+```
+
+```python
+print('\n'.join(write_riddle(valid_random_riddle("faster"))))
+```
+
+```python
+len(dictionary_neighbours['sonnet'])
+```
+
+```python
+ndls = sum(len(ws) for ws in dictionary_neighbours.values())
+ndls
+```
+
+```python
+ndls / len(dictionary_neighbours)
+```
+
+```python
+dn_trimmed = {w : [o for o in dictionary_neighbours[w] if edit_distance(w, o) <= 3]
+              for w in dictionary_neighbours}
+```
+
+```python
+ndlts = sum(len(ws) for ws in dn_trimmed.values())
+ndlts
+```
+
+```python
+ndlts / len(dn_trimmed)
+```
+
+```python
+148 / 940
+```
+
+```python
+1/7
+```
+
+```python
+1/6
+```
+
+```python
+
+```
diff --git a/riddle_dict_builder.md b/riddle_dict_builder.md
new file mode 100644 (file)
index 0000000..229be10
--- /dev/null
@@ -0,0 +1,122 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md,py:percent
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.3'
+      jupytext_version: 1.14.5
+  kernelspec:
+    display_name: Python 3 (ipykernel)
+    language: python
+    name: python3
+---
+
+# Generate a dictionary of related words
+
+```python
+import unicodedata
+import re
+from dataclasses import dataclass
+from typing import Dict, Tuple, List, Set
+from enum import Enum, auto
+import functools
+import random
+import multiprocessing
+import gzip
+# import csv
+```
+
+```python
+stop_words = set('my is in within lies and also always you will find the found but'.split())
+negative_words = set('not never neither nor'.split())
+```
+
+```python
+ordinals : Dict[str, int] =  { 'last': -1
+            , 'first': 1
+            , 'second': 2
+            , 'third': 3
+            , 'fourth': 4
+            , 'fifth': 5
+            , 'sixth': 6
+            , 'seventh': 7
+            , 'eighth': 8
+            , 'ninth': 9
+            , 'tenth': 10
+            , 'eleventh': 11
+            , 'twelfth': 12
+            }
+
+reverse_ordinals : Dict[int, str] = {n: w for w, n in ordinals.items()}
+```
+
+```python
+dictionary : List[str] = [unicodedata.normalize('NFKD', w.strip()).\
+                 encode('ascii', 'ignore').\
+                 decode('utf-8')
+              for w in open('/usr/share/dict/british-english').readlines()
+              if w.strip().islower()
+              if w.strip().isalpha()
+              if len(w.strip()) >= 5
+              if len(w.strip()) <= 12
+              if w not in stop_words
+              if w not in negative_words
+              if w not in ordinals
+             ]
+```
+
+Some types that will be used throughout the library
+
+```python
+@functools.lru_cache
+def edit_distance(s: str, t: str) -> int:
+  if s == "":
+    return len(t)
+  if t == "":
+    return len(s)
+  if s[0] == t[0]:
+    cost = 0
+  else:
+    cost = 1
+       
+  res = min(
+    [ edit_distance(s[1:], t) + 1
+    , edit_distance(s, t[1:]) + 1
+    , edit_distance(s[1:], t[1:]) + cost
+    ])
+
+  return res
+```
+
+```python
+# def find_neighbours(word: str, limit: int = 4) -> Tuple[str, List[str]]:
+def find_neighbours(word, limit=4):
+  sword = set(word)
+  others = []
+  for other in dictionary:
+    if other != word:
+      soth = set(other)
+      if (not sword <= soth and 
+          not soth <= sword and 
+          edit_distance(word, other) <= limit):
+        others.append(other)
+  return word, others
+```
+
+```python
+with multiprocessing.Pool() as pool:
+  # word_other_pairs = pool.imap_unordered(find_neighbours, dictionary, chunksize=5000)
+  word_other_pairs = pool.map(find_neighbours, dictionary)
+```
+
+```python
+with gzip.open('dictionary_neighbours.txt.gz', 'wt') as f:
+  for word, related in word_other_pairs:
+    f.write(f'{word},{",".join(related)}\n')
+```
+
+```python
+
+```
index 54c060b24021ea2bf581ac8a661829f28df409db..e58deab56d81b84357e16e09984b06585b278ab0 100644 (file)
@@ -6,7 +6,7 @@ jupyter:
       extension: .md
       format_name: markdown
       format_version: '1.3'
-      jupytext_version: 1.15.0
+      jupytext_version: 1.14.5
   kernelspec:
     display_name: Python 3 (ipykernel)
     language: python
@@ -81,6 +81,10 @@ e2 = parse_line(tokenise("My second is in apple and also in banana."))
 e2
 ```
 
+```python
+parse_line(tokenise("My fourth is in both apple and banana."))
+```
+
 ```python
 e3 = parse_line(tokenise('My seventh is neither in callus nor in calves'))
 e3