From 95c4c545a4abe7ef5f222b674da8535739ef1fcb Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Fri, 1 Sep 2023 12:35:08 +0100 Subject: [PATCH] Optimised riddle creation --- creation_analysis.md | 251 +++++++++++++++++++++++ riddle_creator.md | 116 +++-------- riddle_creator_filtered_dict.md | 269 +++++++++++++++++++++++++ riddle_creator_lazy.md | 341 ++++++++++++++++++++++++++++++++ riddle_dict_builder.md | 122 ++++++++++++ riddle_solver.md | 6 +- 6 files changed, 1011 insertions(+), 94 deletions(-) create mode 100644 creation_analysis.md create mode 100644 riddle_creator_filtered_dict.md create mode 100644 riddle_creator_lazy.md create mode 100644 riddle_dict_builder.md diff --git a/creation_analysis.md b/creation_analysis.md new file mode 100644 index 0000000..405d559 --- /dev/null +++ b/creation_analysis.md @@ -0,0 +1,251 @@ +--- +jupyter: + jupytext: + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.14.5 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +```python +import pandas as pd +import matplotlib as mpl +import matplotlib.pyplot as plt +%matplotlib inline +import pstats +from pstats import SortKey + +from riddle_definitions import * +import random +``` + +```python +distances = [edit_distance(random.choice(dictionary), + random.choice(dictionary)) + for _ in range(10000)] +distances = pd.Series(distances) +distances.describe() +``` + +```python +distances[distances <= 3].count() / distances.count() +``` + +```python +metrics_original = pd.read_csv('metrics_original.csv') +metrics_original +``` + +```python +metrics_related = pd.read_csv('metrics_related.csv') +metrics_related +``` + +```python +metrics_lazy = pd.read_csv('metrics_lazy.csv') +metrics_lazy +``` + +```python +metrics_original.describe() +``` + +```python +metrics_related.describe() +``` + +```python +metrics_lazy.describe() +``` + +```python +fig, ax = plt.subplots(1, 1) + +metrics_original.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5) +metrics_related.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5) +metrics_lazy.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5); +``` + +```python +ax = metrics_original.cpu_time.plot.hist(bins=20, alpha=0.5) +metrics_related.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5) +metrics_lazy.cpu_time.plot.hist(bins=20, ax=ax, alpha=0.5); +``` + +```python +ax = metrics_original.cpu_time.plot.kde(xlim=(0, 10)) +metrics_related.cpu_time.plot.kde(ax=ax) +metrics_lazy.cpu_time.plot.kde(ax=ax); +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.cpu_time], bins=20) +ax.legend(['Original', 'Related', 'Related 2']) +ax.set_title('Times taken to generate a riddle') +ax.set_xlabel('Time (s)') +plt.savefig('original_time_histogram.png') +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.cpu_time, + metrics_related.cpu_time], bins=20) +ax.legend(['Original', 'Related']) +ax.set_title('Times taken to generate a riddle') +ax.set_xlabel('Time (s)') +plt.savefig('original_related_time_histogram.png') +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.cpu_time, + metrics_related.cpu_time, + metrics_lazy.cpu_time], bins=20) +ax.legend(['Original', 'Related', 'Lazy']) +ax.set_title('Times taken to generate a riddle') +ax.set_xlabel('Time (s)') +plt.savefig('original_related_lazy_time_histogram.png') +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_related.cpu_time, + metrics_lazy.cpu_time], bins=20, + color=['#ff7f0e', '#2ca02c']) +ax.legend(['Related', 'Lazy']) +ax.set_title('Times taken to generate a riddle') +ax.set_xlabel('Time (s)') +plt.savefig('related_lazy_time_histogram.png') +``` + +```python +metrics_original['generated_per_line'] = metrics_original.generated_lines / metrics_original.riddle_lines +metrics_original['time_per_line'] = metrics_original.cpu_time / metrics_original.riddle_lines +metrics_original +``` + +```python +metrics_related['generated_per_line'] = metrics_related.generated_lines / metrics_related.riddle_lines +metrics_related['time_per_line'] = metrics_related.cpu_time / metrics_related.riddle_lines +metrics_related +``` + +```python +metrics_lazy['generated_per_line'] = metrics_lazy.generated_lines / metrics_lazy.riddle_lines +metrics_lazy['time_per_line'] = metrics_lazy.cpu_time / metrics_lazy.riddle_lines +metrics_lazy +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.time_per_line], bins=20) +ax.legend(['Original']); +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.time_per_line, + metrics_related.time_per_line], bins=20) +ax.legend(['Original', 'Related']); +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_original.time_per_line, + metrics_related.time_per_line, + metrics_lazy.time_per_line], bins=20) +ax.legend(['Original', 'Related', 'Lazy']) +``` + +```python +plt.rcParams['axes.prop_cycle'].by_key()['color'] +``` + +```python +for bars, column in zip(*ax.get_legend_handles_labels()): + color = bars[0].get_facecolor() + print(column, color) +``` + +```python +fig, ax = plt.subplots() +ax.hist([metrics_related.time_per_line, + metrics_lazy.time_per_line], bins=20, + color=['#ff7f0e', '#2ca02c']) +ax.legend(['Related', 'Lazy']); +``` + +```python +ax = metrics_original.time_per_line.plot.kde(xlim=(0, 2)) +metrics_related.time_per_line.plot.kde(ax=ax) +metrics_lazy.time_per_line.plot.kde(ax=ax); +``` + +```python +metrics_original.describe() +``` + +```python +metrics_related.describe() +``` + +```python +metrics_lazy.describe() +``` + +```python +metrics_original.time_per_line.mean() / metrics_related.time_per_line.mean() +``` + +```python +metrics_original.time_per_line.median() / metrics_related.time_per_line.median() +``` + +```python +metrics_related.time_per_line.mean() / metrics_lazy.time_per_line.mean() +``` + +```python +metrics_related.time_per_line.median() / metrics_lazy.time_per_line.median() +``` + +```python +metrics_original.time_per_line.mean() / metrics_lazy.time_per_line.mean() +``` + +```python +metrics_original.time_per_line.median() / metrics_lazy.time_per_line.median() +``` + +```python +metrics_original.wall_time.mean() / metrics_related.wall_time.mean() +``` + +```python +metrics_related.wall_time.mean() / metrics_lazy.wall_time.mean() +``` + +```python +metrics_original.wall_time.mean() / metrics_lazy.wall_time.mean() +``` + +```python +stats = pstats.Stats('filtered.stats') +stats.strip_dirs().sort_stats(SortKey.TIME).print_stats(10) +``` + +```python +stats2 = pstats.Stats('lazy.stats') +stats2.strip_dirs().sort_stats(SortKey.TIME).print_stats(10) +``` + +```python + +``` diff --git a/riddle_creator.md b/riddle_creator.md index 5484c03..5b4b43e 100644 --- a/riddle_creator.md +++ b/riddle_creator.md @@ -1,12 +1,12 @@ --- jupyter: jupytext: - formats: ipynb,md,py:percent + formats: ipynb,md text_representation: extension: .md format_name: markdown format_version: '1.3' - jupytext_version: 1.15.0 + jupytext_version: 1.14.5 kernelspec: display_name: Python 3 (ipykernel) language: python @@ -21,45 +21,6 @@ from enum import Enum, auto import random ``` -```python -gencount = 0 -``` - -```python -len([w for w in dictionary if 's' in w]) -``` - -```python -len([w for w in dictionary if 's' not in w]) -``` - -```python -dset = set(frozenset(w) for w in dictionary) -len(dset), len(dictionary) -``` - -```python -len([w for w in dset if 's' in w]) -``` - -```python -len([w for w in dset if 's' not in w]) -``` - -```python -sw = random.choice([w for w in dictionary if 's' in w]) -sw -``` - -```python -swrel = [w for w in dictionary if 's' not in w if edit_distance(w, sw) <=3] -len(swrel) -``` - -```python -swrel -``` - ```python def include_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue): with_letter = [w for w in dictionary if letter in w] @@ -67,8 +28,6 @@ def include_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue finished = False while not finished: - global gencount - gencount +=1 a = random.choice(with_letter) b = random.choice(without_letter) finished = ((edit_distance(a, b) <= limit) and @@ -87,8 +46,6 @@ def include_include_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue finished = False while not finished: - global gencount - gencount +=1 a = random.choice(with_letter) b = random.choice(with_letter) finished = ((a != b) and @@ -108,8 +65,6 @@ def exclude_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue finished = False while not finished: - global gencount - gencount +=1 a = random.choice(without_letter) b = random.choice(without_letter) finished = ((a != b) and @@ -160,27 +115,6 @@ sample_riddle solve_riddle(collapse_riddle_clues(sample_riddle)) ``` -```python -# write_riddle(sample_riddle) -``` - -```python -# sample_riddle = random_riddle('sonnet', limit=4) -# sample_riddle -``` - -```python -sample_riddle -``` - -```python -collapse_riddle_clues(sample_riddle) -``` - -```python -solve_riddle(collapse_riddle_clues(sample_riddle)) -``` - ```python def valid_random_riddle(word: str) -> Riddle: finished = False @@ -192,34 +126,27 @@ def valid_random_riddle(word: str) -> Riddle: ``` ```python -# import time -# w_times = [] -# c_times = [] -# for _ in range(1000): -# w1, c1 = time.perf_counter(), time.process_time() -# valid_random_riddle(random.choice(dictionary)) -# w2, c2 = time.perf_counter(), time.process_time() -# w_times.append(w2 - w1) -# c_times.append(c2 - c1) - -# with open('cpu_times.txt', 'w') as f: -# f.writelines(f'{t}\n' for t in c_times) -# with open('wall_times.txt', 'w') as f: -# f.writelines(f'{t}\n' for t in w_times) - -``` - -```python -glcounts = [] +import time +import csv +reports = [] for _ in range(1000): - gencount = 0 + w1, c1 = time.perf_counter(), time.process_time() r = valid_random_riddle(random.choice(dictionary)) + w2, c2 = time.perf_counter(), time.process_time() linecount = len(r) - glcounts.append((gencount, linecount)) + reports.append({'wall_time': w2 - w1, + 'cpu_time': c2 - c1, + 'riddle_lines': linecount}) + w_times.append(w2 - w1) + c_times.append(c2 - c1) + +with open('metrics_original.csv', 'w', newline='') as csvfile: + fieldnames = list(reports[0].keys()) + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) -with open('linecounts.txt', 'w') as f: - f.write('"Lines generated","Lines in riddle"\n') - f.writelines(f'{g},{l}\n' for g, l in glcounts) + writer.writeheader() + for r in reports: + writer.writerow(r) ``` ```python @@ -230,7 +157,10 @@ def write_include_exclude_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str: ```python def write_include_include_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str: - line = f"is in {clue_a.word} and also in {clue_b.word}" + if random.randrange(2) == 0: + line = f"is in {clue_a.word} and also in {clue_b.word}" + else: + line = f"is in both {clue_a.word} and {clue_b.word}" return line ``` diff --git a/riddle_creator_filtered_dict.md b/riddle_creator_filtered_dict.md new file mode 100644 index 0000000..9af3c93 --- /dev/null +++ b/riddle_creator_filtered_dict.md @@ -0,0 +1,269 @@ +--- +jupyter: + jupytext: + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.14.5 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +```python +from riddle_definitions import * + +from typing import Dict, Tuple, List, Set +from enum import Enum, auto +import random +import gzip +``` + +```python +dictionary_neighbours = {} + +for line in gzip.open('dictionary_neighbours.txt.gz', 'rt').readlines(): + words = line.strip().split(',') + dictionary_neighbours[words[0]] = words[1:] + +possible_riddle_solutions = list(dictionary_neighbours.keys()) +``` + +```python +len(dictionary_neighbours['sonnet']) +``` + +```python +def include_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue): + finished = False + while not finished: + with_letter = random.choice([w for w in dictionary_neighbours if letter in w]) + without_letter = [w for w in dictionary_neighbours[with_letter] + if letter not in w + if edit_distance(with_letter, w) <= limit] + if without_letter: + other = random.choice(without_letter) + finished = True + + return (RiddleClue(word=with_letter, valence=RiddleValence.Include), + RiddleClue(word=other, valence=RiddleValence.Exclude)) + +a, b = include_exclude_clue('s') +a, b, set(a.word) - set(b.word), edit_distance(a.word, b.word) +``` + +```python +def include_include_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue): + finished = False + while not finished: + with_letter = random.choice([w for w in dictionary_neighbours if letter in w]) + others = [w for w in dictionary_neighbours[with_letter] + if letter in w + if edit_distance(with_letter, w) <= limit] + if others: + other = random.choice(others) + finished = True + return (RiddleClue(word=with_letter, valence=RiddleValence.Include), + RiddleClue(word=other, valence=RiddleValence.Include)) + +a, b = include_include_clue('s') +a, b, set(a.word) | set(b.word), edit_distance(a.word, b.word) +``` + +```python +def exclude_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue): + finished = False + while not finished: + without_letter = random.choice([w for w in dictionary_neighbours if letter not in w]) + others = [w for w in dictionary_neighbours[without_letter] + if letter not in w + if edit_distance(without_letter, w) <= limit] + if others: + other = random.choice(others) + finished = True + + return (RiddleClue(word=without_letter, valence=RiddleValence.Exclude), + RiddleClue(word=other, valence=RiddleValence.Exclude)) + +a, b = exclude_exclude_clue('s') +a, b, set(a.word) | set(b.word), edit_distance(a.word, b.word) +``` + +```python +def random_clue( letter: str + , ie_limit: int = 3 + , ii_limit: int = 2 + , ee_limit: int = 2) -> (RiddleClue, RiddleClue): + clue_type = random.choices(['include_exclude', 'include_include', 'exclude_exclude'], + weights=[7, 2, 1], + k=1)[0] + if clue_type == 'include_exclude': + return include_exclude_clue(letter, limit=ie_limit) + elif clue_type =='include_include': + return include_include_clue(letter, limit=ii_limit) + else: + return exclude_exclude_clue(letter, limit=ee_limit) +``` + +```python +def random_riddle( word: str + , ie_limit: int = 3 + , ii_limit: int = 2 + , ee_limit: int = 2 + ) -> Riddle: + return {i+1 : + random_clue(l, + ie_limit=ie_limit, ii_limit=ii_limit, ee_limit=ee_limit) + for i, l in enumerate(word)} +``` + +```python +sample_riddle = random_riddle('teacup') +sample_riddle +``` + +```python +solve_riddle(collapse_riddle_clues(sample_riddle)) +``` + +```python +# write_riddle(sample_riddle) +``` + +```python +# sample_riddle = random_riddle('sonnet', limit=4) +# sample_riddle +``` + +```python +sample_riddle +``` + +```python +collapse_riddle_clues(sample_riddle) +``` + +```python +solve_riddle(collapse_riddle_clues(sample_riddle)) +``` + +```python +def valid_random_riddle(word: str) -> Riddle: + finished = False + while not finished: + riddle = random_riddle(word) + solns = solve_riddle(collapse_riddle_clues(riddle)) + finished = (len(solns) == 1) + return riddle +``` + +```python +import time +import csv +reports = [] +for _ in range(1000): + gencount = 0 + w1, c1 = time.perf_counter(), time.process_time() + r = valid_random_riddle(random.choice(dictionary)) + w2, c2 = time.perf_counter(), time.process_time() + linecount = len(r) + reports.append({'wall_time': w2 - w1, + 'cpu_time': c2 - c1, + 'generated_lines': gencount, + 'riddle_lines': linecount}) + glcounts.append((gencount, linecount)) + w_times.append(w2 - w1) + c_times.append(c2 - c1) + +with open('metrics_related.csv', 'w', newline='') as csvfile: + fieldnames = list(reports[0].keys()) + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for r in reports: + writer.writerow(r) +``` + +```python +def write_include_exclude_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str: + line = f"is in {clue_a.word} but not in {clue_b.word}" + return line +``` + +```python +def write_include_include_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str: + if random.randrange(2) == 0: + line = f"is in {clue_a.word} and also in {clue_b.word}" + else: + line = f"is in both {clue_a.word} and {clue_b.word}" + return line +``` + +```python +def write_exclude_exclude_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str: + line = f"is neither in {clue_a.word} nor in {clue_b.word}" + return line +``` + +```python +def write_line(a: RiddleClue, b: RiddleClue) -> str: + if a.valence == RiddleValence.Include and b.valence == RiddleValence.Include: + return write_include_include_line(a, b) + elif a.valence == RiddleValence.Include and b.valence == RiddleValence.Exclude: + return write_include_exclude_line(a, b) + elif a.valence == RiddleValence.Exclude and b.valence == RiddleValence.Exclude: + return write_exclude_exclude_line(a, b) + else: + return "illegal line" +``` + +```python +def write_riddle(riddle: Riddle) -> List[str]: + output = [] + for i, (clue_a, clue_b) in sorted(riddle.items()): + pos = reverse_ordinals[i] + if i == len(riddle) and random.random() <= 0.3: + pos = reverse_ordinals[-1] + line = write_line(clue_a, clue_b) + full_line = f"My {pos} {line}" + output.append(full_line) + return output +``` + +```python + +``` + +```python +sample_riddle = valid_random_riddle("elephant") +sample_riddle +``` + +```python +write_riddle(sample_riddle) +``` + +```python +solve_riddle(collapse_riddle_clues(sample_riddle)) +``` + +```python +with open("generated-riddles.txt", 'w') as file: + between = False + for _ in range(10): + if between: + file.write('\n') + between = True + target = random.choice(dictionary) + riddle = valid_random_riddle(target) + lines = write_riddle(riddle) + file.writelines(l + '\n' for l in lines) + file.write(f'Target: {target}\n') +``` + +```python + +``` diff --git a/riddle_creator_lazy.md b/riddle_creator_lazy.md new file mode 100644 index 0000000..4af0f00 --- /dev/null +++ b/riddle_creator_lazy.md @@ -0,0 +1,341 @@ +--- +jupyter: + jupytext: + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.14.5 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +```python +from riddle_definitions import * + +from typing import Dict, Tuple, List, Set +from enum import Enum, auto +import random +import gzip +``` + +```python +dictionary_neighbours = {} + +for line in gzip.open('dictionary_neighbours.txt.gz', 'rt').readlines(): + words = line.strip().split(',') + dictionary_neighbours[words[0]] = words[1:] + +possible_riddle_clues = list(dictionary_neighbours.keys()) +``` + +```python +len(dictionary_neighbours['sonnet']) +``` + +```python +def include_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue): + finished = False + while not finished: + + has_first = False + while not has_first: + with_letter = random.choice(possible_riddle_clues) + has_first = letter in with_letter + + others = dictionary_neighbours[with_letter][:] + random.shuffle(others) + + while not finished and others: + other = others[0] + + if letter not in other and edit_distance(with_letter, other) <= limit: + finished = True + else: + others = others[1:] + + return (RiddleClue(word=with_letter, valence=RiddleValence.Include), + RiddleClue(word=other, valence=RiddleValence.Exclude)) + +a, b = include_exclude_clue('s') +a, b, set(a.word) - set(b.word), edit_distance(a.word, b.word) +``` + +```python +def include_include_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue): + finished = False + while not finished: + + has_first = False + while not has_first: + with_letter = random.choice(possible_riddle_clues) + has_first = letter in with_letter + + others = dictionary_neighbours[with_letter][:] + random.shuffle(others) + + while not finished and others: + other = others[0] + + if letter in other and edit_distance(with_letter, other) <= limit: + finished = True + else: + others = others[1:] + + return (RiddleClue(word=with_letter, valence=RiddleValence.Include), + RiddleClue(word=other, valence=RiddleValence.Include)) + +a, b = include_include_clue('s') +a, b, set(a.word) | set(b.word), edit_distance(a.word, b.word) +``` + +```python +def exclude_exclude_clue(letter: str, limit: int = 3) -> (RiddleClue, RiddleClue): + finished = False + while not finished: + + has_first = False + while not has_first: + without_letter = random.choice(possible_riddle_clues) + has_first = letter not in without_letter + + others = dictionary_neighbours[without_letter][:] + random.shuffle(others) + + while not finished and others: + other = others[0] + + if letter not in other and edit_distance(without_letter, other) <= limit: + finished = True + else: + others = others[1:] + + + return (RiddleClue(word=without_letter, valence=RiddleValence.Exclude), + RiddleClue(word=other, valence=RiddleValence.Exclude)) + +a, b = exclude_exclude_clue('s') +a, b, set(a.word) | set(b.word), edit_distance(a.word, b.word) +``` + +```python +def random_clue( letter: str + , ie_limit: int = 3 + , ii_limit: int = 2 + , ee_limit: int = 2) -> (RiddleClue, RiddleClue): + clue_type = random.choices(['include_exclude', 'include_include', 'exclude_exclude'], + weights=[7, 2, 1], + k=1)[0] + if clue_type == 'include_exclude': + return include_exclude_clue(letter, limit=ie_limit) + elif clue_type =='include_include': + return include_include_clue(letter, limit=ii_limit) + else: + return exclude_exclude_clue(letter, limit=ee_limit) +``` + +```python +def random_riddle( word: str + , ie_limit: int = 3 + , ii_limit: int = 2 + , ee_limit: int = 2 + ) -> Riddle: + return {i+1 : + random_clue(l, + ie_limit=ie_limit, ii_limit=ii_limit, ee_limit=ee_limit) + for i, l in enumerate(word)} +``` + +```python +sample_riddle = random_riddle('teacup') +sample_riddle +``` + +```python +collapse_riddle_clues(sample_riddle) +``` + +```python +solve_riddle(collapse_riddle_clues(sample_riddle)) +``` + +```python +# write_riddle(sample_riddle) +``` + +```python +# sample_riddle = random_riddle('sonnet', limit=4) +# sample_riddle +``` + +```python +sample_riddle +``` + +```python +collapse_riddle_clues(sample_riddle) +``` + +```python +solve_riddle(collapse_riddle_clues(sample_riddle)) +``` + +```python +def valid_random_riddle(word: str) -> Riddle: + finished = False + while not finished: + riddle = random_riddle(word) + solns = solve_riddle(collapse_riddle_clues(riddle)) + finished = (len(solns) == 1) + return riddle +``` + +```python +import time +import csv +reports = [] +for _ in range(1000): + w1, c1 = time.perf_counter(), time.process_time() + r = valid_random_riddle(random.choice(possible_riddle_clues)) + w2, c2 = time.perf_counter(), time.process_time() + linecount = len(r) + reports.append({'wall_time': w2 - w1, + 'cpu_time': c2 - c1, + 'riddle_lines': linecount}) + +with open('metrics_lazy.csv', 'w', newline='') as csvfile: + fieldnames = list(reports[0].keys()) + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for r in reports: + writer.writerow(r) +``` + +```python +def write_include_exclude_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str: + line = f"is in {clue_a.word} but not in {clue_b.word}" + return line +``` + +```python +def write_include_include_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str: + if random.randrange(2) == 0: + line = f"is in {clue_a.word} and also in {clue_b.word}" + else: + line = f"is in both {clue_a.word} and {clue_b.word}" + return line +``` + +```python +def write_exclude_exclude_line(clue_a: RiddleClue, clue_b: RiddleClue) -> str: + line = f"is neither in {clue_a.word} nor in {clue_b.word}" + return line +``` + +```python +def write_line(a: RiddleClue, b: RiddleClue) -> str: + if a.valence == RiddleValence.Include and b.valence == RiddleValence.Include: + return write_include_include_line(a, b) + elif a.valence == RiddleValence.Include and b.valence == RiddleValence.Exclude: + return write_include_exclude_line(a, b) + elif a.valence == RiddleValence.Exclude and b.valence == RiddleValence.Exclude: + return write_exclude_exclude_line(a, b) + else: + return "illegal line" +``` + +```python +def write_riddle(riddle: Riddle) -> List[str]: + output = [] + for i, (clue_a, clue_b) in sorted(riddle.items()): + pos = reverse_ordinals[i] + if i == len(riddle) and random.random() <= 0.3: + pos = reverse_ordinals[-1] + line = write_line(clue_a, clue_b) + full_line = f"My {pos} {line}" + output.append(full_line) + return output +``` + +```python + +``` + +```python +sample_riddle = valid_random_riddle("elephant") +sample_riddle +``` + +```python +write_riddle(sample_riddle) +``` + +```python +solve_riddle(collapse_riddle_clues(sample_riddle)) +``` + +```python +with open("generated-riddles-lazy.txt", 'w') as file: + between = False + for _ in range(10): + if between: + file.write('\n') + between = True + target = random.choice(possible_riddle_clues) + riddle = valid_random_riddle(target) + lines = write_riddle(riddle) + file.writelines(l + '\n' for l in lines) + file.write(f'Target: {target}\n') + +``` + +```python +print('\n'.join(write_riddle(valid_random_riddle("faster")))) +``` + +```python +len(dictionary_neighbours['sonnet']) +``` + +```python +ndls = sum(len(ws) for ws in dictionary_neighbours.values()) +ndls +``` + +```python +ndls / len(dictionary_neighbours) +``` + +```python +dn_trimmed = {w : [o for o in dictionary_neighbours[w] if edit_distance(w, o) <= 3] + for w in dictionary_neighbours} +``` + +```python +ndlts = sum(len(ws) for ws in dn_trimmed.values()) +ndlts +``` + +```python +ndlts / len(dn_trimmed) +``` + +```python +148 / 940 +``` + +```python +1/7 +``` + +```python +1/6 +``` + +```python + +``` diff --git a/riddle_dict_builder.md b/riddle_dict_builder.md new file mode 100644 index 0000000..229be10 --- /dev/null +++ b/riddle_dict_builder.md @@ -0,0 +1,122 @@ +--- +jupyter: + jupytext: + formats: ipynb,md,py:percent + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.14.5 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Generate a dictionary of related words + +```python +import unicodedata +import re +from dataclasses import dataclass +from typing import Dict, Tuple, List, Set +from enum import Enum, auto +import functools +import random +import multiprocessing +import gzip +# import csv +``` + +```python +stop_words = set('my is in within lies and also always you will find the found but'.split()) +negative_words = set('not never neither nor'.split()) +``` + +```python +ordinals : Dict[str, int] = { 'last': -1 + , 'first': 1 + , 'second': 2 + , 'third': 3 + , 'fourth': 4 + , 'fifth': 5 + , 'sixth': 6 + , 'seventh': 7 + , 'eighth': 8 + , 'ninth': 9 + , 'tenth': 10 + , 'eleventh': 11 + , 'twelfth': 12 + } + +reverse_ordinals : Dict[int, str] = {n: w for w, n in ordinals.items()} +``` + +```python +dictionary : List[str] = [unicodedata.normalize('NFKD', w.strip()).\ + encode('ascii', 'ignore').\ + decode('utf-8') + for w in open('/usr/share/dict/british-english').readlines() + if w.strip().islower() + if w.strip().isalpha() + if len(w.strip()) >= 5 + if len(w.strip()) <= 12 + if w not in stop_words + if w not in negative_words + if w not in ordinals + ] +``` + +Some types that will be used throughout the library + +```python +@functools.lru_cache +def edit_distance(s: str, t: str) -> int: + if s == "": + return len(t) + if t == "": + return len(s) + if s[0] == t[0]: + cost = 0 + else: + cost = 1 + + res = min( + [ edit_distance(s[1:], t) + 1 + , edit_distance(s, t[1:]) + 1 + , edit_distance(s[1:], t[1:]) + cost + ]) + + return res +``` + +```python +# def find_neighbours(word: str, limit: int = 4) -> Tuple[str, List[str]]: +def find_neighbours(word, limit=4): + sword = set(word) + others = [] + for other in dictionary: + if other != word: + soth = set(other) + if (not sword <= soth and + not soth <= sword and + edit_distance(word, other) <= limit): + others.append(other) + return word, others +``` + +```python +with multiprocessing.Pool() as pool: + # word_other_pairs = pool.imap_unordered(find_neighbours, dictionary, chunksize=5000) + word_other_pairs = pool.map(find_neighbours, dictionary) +``` + +```python +with gzip.open('dictionary_neighbours.txt.gz', 'wt') as f: + for word, related in word_other_pairs: + f.write(f'{word},{",".join(related)}\n') +``` + +```python + +``` diff --git a/riddle_solver.md b/riddle_solver.md index 54c060b..e58deab 100644 --- a/riddle_solver.md +++ b/riddle_solver.md @@ -6,7 +6,7 @@ jupyter: extension: .md format_name: markdown format_version: '1.3' - jupytext_version: 1.15.0 + jupytext_version: 1.14.5 kernelspec: display_name: Python 3 (ipykernel) language: python @@ -81,6 +81,10 @@ e2 = parse_line(tokenise("My second is in apple and also in banana.")) e2 ``` +```python +parse_line(tokenise("My fourth is in both apple and banana.")) +``` + ```python e3 = parse_line(tokenise('My seventh is neither in callus nor in calves')) e3 -- 2.34.1