From: Neil Smith Date: Fri, 4 Jan 2019 22:21:43 +0000 (+0000) Subject: Written some more examples X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=c78432a60064579208e363a1a08f930d7c1e326d;p=cipher-tools.git Written some more examples --- diff --git a/blog-images/chris-flexen-726558-unsplash.jpg b/blog-images/chris-flexen-726558-unsplash.jpg new file mode 100644 index 0000000..6fdfee2 Binary files /dev/null and b/blog-images/chris-flexen-726558-unsplash.jpg differ diff --git a/hillclimbing-results/hillclimbing-examples.ipynb b/hillclimbing-results/hillclimbing-examples.ipynb new file mode 100644 index 0000000..fadec1b --- /dev/null +++ b/hillclimbing-results/hillclimbing-examples.ipynb @@ -0,0 +1,756 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [], + "source": [ + "import string\n", + "import random\n", + "import itertools\n", + "from cipher.keyword_cipher import *\n", + "from support.utilities import *" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'gpavtdyzocqnrsujmxikwbehlf'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pt = \"catch the cat\"\n", + "\n", + "ca = list(string.ascii_lowercase)\n", + "random.shuffle(ca)\n", + "ca = cat(ca)\n", + "ca" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'agkaz kzt agk'" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ct = keyword_encipher(pt, ca)\n", + "ct" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "def show_mapping_alpha(c_a, p_a=string.ascii_lowercase, letters=string.ascii_lowercase):\n", + " mapping = {p: c for (p, c) in zip(p_a, c_a) if p in letters}\n", + " return show_mapping(mapping)\n", + "\n", + "def show_mapping(mapping):\n", + " retval = '| plaintext letter | ' + ' | '.join(l for l in sorted(mapping)) + ' |\\n'\n", + " retval += '|-------------------|---|---|---|---|---|\\n'\n", + " retval += '| ciphertext letter | ' + ' | '.join(mapping[l] for l in sorted(mapping)) + ' |\\n'\n", + " return retval" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| plaintext letter | a | c | e | h | t |\n", + "|-------------------|---|---|---|---|---|\n", + "| ciphertext letter | g | a | t | z | k |\n", + "\n" + ] + } + ], + "source": [ + "print(show_mapping_alpha(ca, letters=sanitise(pt)))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'a': 'g', 'c': 'a', 'e': 't', 'h': 'z', 't': 'k'},\n", + " {'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'})" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m0 = {p: c for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n", + "im0 = {c: p for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n", + "m0, im0" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| a | c | e | h | t |\n", + "| g | a | t | z | k |\n", + "\n" + ] + } + ], + "source": [ + "print(show_mapping(m0))" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "def apply_inverse_map(ciphertext, mapping):\n", + " plaintext = cat(mapping[l] if l in mapping else l for l in ciphertext)\n", + " return plaintext, Pbigrams(sanitise(plaintext))" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "def swap(letters, i, j):\n", + " if i > j:\n", + " i, j = j, i\n", + " if i == j:\n", + " return letters\n", + " else:\n", + " return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +\n", + " letters[j+1:])" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "def map_swap(mapping):\n", + " keys = sorted(mapping)\n", + " values = cat(mapping[l] for l in keys)\n", + " n = len(keys)\n", + " swapped_values = swap(values, random.randrange(n), random.randrange(n))\n", + " return {k: sv for (k, sv) in zip(keys, swapped_values)}" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "im1 = map_swap(im0)\n", + "im2 = map_swap(im1)" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('aceah eht ace', -24.470656262279007)" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "apply_inverse_map(ct, im2)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('actah the act', -23.337953804339712)" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "apply_inverse_map(ct, im1)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('catch the cat', -22.142275954584633)" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "apply_inverse_map(ct, im0)" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n", + " {'a': 'a', 'g': 'c', 'k': 't', 't': 'e', 'z': 'h'},\n", + " {'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'})" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "im0, im1, im2" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "m1 = {im1[l]: l for l in im1}\n", + "m2 = {im2[l]: l for l in im2}" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| plaintext letter | a | c | e | h | t |\n", + "|-------------------|---|---|---|---|---|\n", + "| ciphertext letter | g | a | t | z | k |\n", + "\n" + ] + } + ], + "source": [ + "print(show_mapping(m0))" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| plaintext letter | a | c | e | h | t |\n", + "|-------------------|---|---|---|---|---|\n", + "| ciphertext letter | a | g | t | z | k |\n", + "\n" + ] + } + ], + "source": [ + "print(show_mapping(m1))" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| plaintext letter | a | c | e | h | t |\n", + "|-------------------|---|---|---|---|---|\n", + "| ciphertext letter | a | g | k | z | t |\n", + "\n" + ] + } + ], + "source": [ + "print(show_mapping(m2))" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('hceha eat hce', -26.41716766077668)" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "im3 = map_swap(im2)\n", + "apply_inverse_map(ct, im3)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "m3 = {im3[l]: l for l in im3}" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| plaintext letter | a | c | e | h | t |\n", + "|-------------------|---|---|---|---|---|\n", + "| ciphertext letter | z | g | k | a | t |\n", + "\n" + ] + } + ], + "source": [ + "print(show_mapping(m3))" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "def all_swaps(mapping):\n", + " keys = sorted(mapping)\n", + " values = cat(mapping[l] for l in keys)\n", + " n = len(keys)\n", + " swapped_values = [swap(values, i, j) for i in range(n) for j in range(n) if i < j]\n", + " return [{k: sv for (k, sv) in zip(keys, svs)} for svs in swapped_values]" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n", + " [('actah the act', -23.337953804339712),\n", + " ('tacth che tac', -22.992889593694795),\n", + " ('eateh thc eat', -23.337174988961543),\n", + " ('hathc tce hat', -24.20565798548872),\n", + " ('ctach ahe cta', -23.361982341471602),\n", + " ('cetch tha cet', -23.152196785128968),\n", + " ('chtca tae cht', -25.47053856384374),\n", + " ('caech eht cae', -27.119008761052356),\n", + " ('cahct hte cah', -25.96020844569102),\n", + " ('catce teh cat', -24.369461369323975)])" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "im0, [apply_inverse_map(ct, tim) for tim in all_swaps(im0)]" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'},\n", + " [('caech eht cae', -27.119008761052356),\n", + " ('ecaeh aht eca', -26.10317913928645),\n", + " ('tceth eha tce', -23.289877585658743),\n", + " ('hceha eat hce', -26.41716766077668),\n", + " ('aecah cht aec', -28.466074945814817),\n", + " ('ateah ehc ate', -23.89678491033435),\n", + " ('aheac ect ahe', -23.82052347276842),\n", + " ('actah the act', -23.337953804339712),\n", + " ('achae het ach', -24.4061387567535),\n", + " ('aceat eth ace', -21.139211036323402)])" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "im2, [apply_inverse_map(ct, tim) for tim in all_swaps(im2)]" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [], + "source": [ + "def all_swaps_worse(mapping):\n", + " _, score0 = apply_inverse_map(ct, mapping)\n", + " swapped_mappings = all_swaps(mapping)\n", + " scores = [apply_inverse_map(ct, m)[1] for m in swapped_mappings]\n", + " better_scores = [s for s in scores if s > score0]\n", + " return better_scores == []" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_swaps_worse(im3)" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [], + "source": [ + "def make_map(als, bls):\n", + " return {a: b for (a, b) in zip(als, bls)}" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('aceht', 'agktz')" + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ptls = cat(sorted(deduplicate(sanitise(pt))))\n", + "ctls = cat(sorted(deduplicate(sanitise(ct))))\n", + "ptls, ctls" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(120,\n", + " [{'a': 'a', 'g': 'c', 'k': 'e', 't': 'h', 'z': 't'},\n", + " {'a': 'a', 'g': 'c', 'k': 'e', 'z': 'h', 't': 't'},\n", + " {'a': 'a', 'g': 'c', 't': 'e', 'k': 'h', 'z': 't'},\n", + " {'a': 'a', 'g': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n", + " {'a': 'a', 'g': 'c', 'z': 'e', 'k': 'h', 't': 't'}])" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_maps = [make_map(c, ptls) for c in itertools.permutations(ctls)]\n", + "len(all_maps), all_maps[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'a': 'a', 'z': 'c', 'k': 'e', 't': 'h', 'g': 't'},\n", + " {'g': 'a', 't': 'c', 'z': 'e', 'a': 'h', 'k': 't'},\n", + " {'g': 'a', 'z': 'c', 'a': 'e', 't': 'h', 'k': 't'},\n", + " {'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'},\n", + " {'t': 'a', 'z': 'c', 'k': 'e', 'g': 'h', 'a': 't'},\n", + " {'z': 'a', 't': 'c', 'a': 'e', 'k': 'h', 'g': 't'}]" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "local_optima = [m for m in all_maps if all_swaps_worse(m) if m != im0]\n", + "local_optima" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(('tecth cha tec', -22.37718617528681),\n", + " [('etceh cha etc', -27.45222919076422),\n", + " ('cetch tha cet', -23.152196785128968),\n", + " ('aecah cht aec', -28.466074945814817),\n", + " ('hecht cta hec', -24.0528877258752),\n", + " ('tceth eha tce', -23.289877585658743),\n", + " ('tacth che tac', -22.992889593694795),\n", + " ('thcte cea thc', -23.37530629522044),\n", + " ('teath ahc tea', -23.192822966291835),\n", + " ('tehtc hca teh', -25.824045558109102),\n", + " ('tecta cah tec', -23.630623398955464)])" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "apply_inverse_map(ct, local_optima[3]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[3])]" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(('ethea hac eth', -21.831799648932474),\n", + " [('tehta hac teh', -25.2630658238322),\n", + " ('hteha eac hte', -25.12161519433393),\n", + " ('cthca hae cth', -25.56645047924706),\n", + " ('athae hec ath', -22.523920547555058),\n", + " ('ehtea tac eht', -24.414224893001006),\n", + " ('echea hat ech', -22.34614937355321),\n", + " ('eahet htc eah', -24.64789885786501),\n", + " ('etcea cah etc', -24.40643936994998),\n", + " ('etaeh ahc eta', -27.042650227267693),\n", + " ('ethec hca eth', -23.70218668022281)])" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "apply_inverse_map(ct, local_optima[5]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[5])]" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('catch the cat', -22.142275954584633)" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "apply_inverse_map(ct, im0)" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'}" + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "local_optima[3]" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| plaintext letter | a | c | e | h | t |\n", + "|-------------------|---|---|---|---|---|\n", + "| ciphertext letter | t | k | g | z | a |\n", + "\n" + ] + } + ], + "source": [ + "l3 = {local_optima[3][l]: l for l in local_optima[3]}\n", + "print(show_mapping(l3))" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'etoainhsrdlumwycfgpbvkxjqz'" + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat(p[0] for p in english_counts.most_common())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hillclimbing-results/hillclimbing-random-unigram-uniform.csv.png b/hillclimbing-results/hillclimbing-random-unigram-uniform.csv.png new file mode 100644 index 0000000..68076c1 Binary files /dev/null and b/hillclimbing-results/hillclimbing-random-unigram-uniform.csv.png differ diff --git a/hillclimbing-results/sa-random-unigram-uniform.csv.png b/hillclimbing-results/sa-random-unigram-uniform.csv.png new file mode 100644 index 0000000..29e2d35 Binary files /dev/null and b/hillclimbing-results/sa-random-unigram-uniform.csv.png differ