Written some more examples
authorNeil Smith <neil.git@njae.me.uk>
Fri, 4 Jan 2019 22:21:43 +0000 (22:21 +0000)
committerNeil Smith <neil.git@njae.me.uk>
Fri, 4 Jan 2019 22:21:43 +0000 (22:21 +0000)
blog-images/chris-flexen-726558-unsplash.jpg [new file with mode: 0644]
hillclimbing-results/hillclimbing-examples.ipynb [new file with mode: 0644]
hillclimbing-results/hillclimbing-random-unigram-uniform.csv.png [new file with mode: 0644]
hillclimbing-results/sa-random-unigram-uniform.csv.png [new file with mode: 0644]

diff --git a/blog-images/chris-flexen-726558-unsplash.jpg b/blog-images/chris-flexen-726558-unsplash.jpg
new file mode 100644 (file)
index 0000000..6fdfee2
Binary files /dev/null and b/blog-images/chris-flexen-726558-unsplash.jpg differ
diff --git a/hillclimbing-results/hillclimbing-examples.ipynb b/hillclimbing-results/hillclimbing-examples.ipynb
new file mode 100644 (file)
index 0000000..fadec1b
--- /dev/null
@@ -0,0 +1,756 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 125,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import string\n",
+    "import random\n",
+    "import itertools\n",
+    "from cipher.keyword_cipher import *\n",
+    "from support.utilities import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'gpavtdyzocqnrsujmxikwbehlf'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pt = \"catch the cat\"\n",
+    "\n",
+    "ca = list(string.ascii_lowercase)\n",
+    "random.shuffle(ca)\n",
+    "ca = cat(ca)\n",
+    "ca"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'agkaz kzt agk'"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ct = keyword_encipher(pt, ca)\n",
+    "ct"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def show_mapping_alpha(c_a, p_a=string.ascii_lowercase, letters=string.ascii_lowercase):\n",
+    "    mapping = {p: c for (p, c) in zip(p_a, c_a) if p in letters}\n",
+    "    return show_mapping(mapping)\n",
+    "\n",
+    "def show_mapping(mapping):\n",
+    "    retval  = '| plaintext letter  | ' + ' | '.join(l for l in sorted(mapping)) + ' |\\n'\n",
+    "    retval += '|-------------------|---|---|---|---|---|\\n'\n",
+    "    retval += '| ciphertext letter | ' + ' | '.join(mapping[l] for l in sorted(mapping)) + ' |\\n'\n",
+    "    return retval"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "| plaintext letter  | a | c | e | h | t |\n",
+      "|-------------------|---|---|---|---|---|\n",
+      "| ciphertext letter | g | a | t | z | k |\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(show_mapping_alpha(ca, letters=sanitise(pt)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "({'a': 'g', 'c': 'a', 'e': 't', 'h': 'z', 't': 'k'},\n",
+       " {'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'})"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m0 = {p: c for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n",
+    "im0 = {c: p for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n",
+    "m0, im0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "| a | c | e | h | t |\n",
+      "| g | a | t | z | k |\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(show_mapping(m0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def apply_inverse_map(ciphertext, mapping):\n",
+    "    plaintext = cat(mapping[l] if l in mapping else l for l in ciphertext)\n",
+    "    return plaintext, Pbigrams(sanitise(plaintext))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def swap(letters, i, j):\n",
+    "    if i > j:\n",
+    "        i, j = j, i\n",
+    "    if i == j:\n",
+    "        return letters\n",
+    "    else:\n",
+    "        return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +\n",
+    "                letters[j+1:])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def map_swap(mapping):\n",
+    "    keys = sorted(mapping)\n",
+    "    values = cat(mapping[l] for l in keys)\n",
+    "    n = len(keys)\n",
+    "    swapped_values = swap(values, random.randrange(n), random.randrange(n))\n",
+    "    return {k: sv for (k, sv) in zip(keys, swapped_values)}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "im1 = map_swap(im0)\n",
+    "im2 = map_swap(im1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('aceah eht ace', -24.470656262279007)"
+      ]
+     },
+     "execution_count": 90,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "apply_inverse_map(ct, im2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('actah the act', -23.337953804339712)"
+      ]
+     },
+     "execution_count": 91,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "apply_inverse_map(ct, im1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('catch the cat', -22.142275954584633)"
+      ]
+     },
+     "execution_count": 88,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "apply_inverse_map(ct, im0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
+       " {'a': 'a', 'g': 'c', 'k': 't', 't': 'e', 'z': 'h'},\n",
+       " {'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'})"
+      ]
+     },
+     "execution_count": 92,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "im0, im1, im2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m1 = {im1[l]: l for l in im1}\n",
+    "m2 = {im2[l]: l for l in im2}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "| plaintext letter  | a | c | e | h | t |\n",
+      "|-------------------|---|---|---|---|---|\n",
+      "| ciphertext letter | g | a | t | z | k |\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(show_mapping(m0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "| plaintext letter  | a | c | e | h | t |\n",
+      "|-------------------|---|---|---|---|---|\n",
+      "| ciphertext letter | a | g | t | z | k |\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(show_mapping(m1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "| plaintext letter  | a | c | e | h | t |\n",
+      "|-------------------|---|---|---|---|---|\n",
+      "| ciphertext letter | a | g | k | z | t |\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(show_mapping(m2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('hceha eat hce', -26.41716766077668)"
+      ]
+     },
+     "execution_count": 110,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "im3 = map_swap(im2)\n",
+    "apply_inverse_map(ct, im3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m3 = {im3[l]: l for l in im3}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "| plaintext letter  | a | c | e | h | t |\n",
+      "|-------------------|---|---|---|---|---|\n",
+      "| ciphertext letter | z | g | k | a | t |\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(show_mapping(m3))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 113,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def all_swaps(mapping):\n",
+    "    keys = sorted(mapping)\n",
+    "    values = cat(mapping[l] for l in keys)\n",
+    "    n = len(keys)\n",
+    "    swapped_values = [swap(values, i, j) for i in range(n) for j in range(n) if i < j]\n",
+    "    return [{k: sv for (k, sv) in zip(keys, svs)} for svs in swapped_values]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 117,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
+       " [('actah the act', -23.337953804339712),\n",
+       "  ('tacth che tac', -22.992889593694795),\n",
+       "  ('eateh thc eat', -23.337174988961543),\n",
+       "  ('hathc tce hat', -24.20565798548872),\n",
+       "  ('ctach ahe cta', -23.361982341471602),\n",
+       "  ('cetch tha cet', -23.152196785128968),\n",
+       "  ('chtca tae cht', -25.47053856384374),\n",
+       "  ('caech eht cae', -27.119008761052356),\n",
+       "  ('cahct hte cah', -25.96020844569102),\n",
+       "  ('catce teh cat', -24.369461369323975)])"
+      ]
+     },
+     "execution_count": 117,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "im0, [apply_inverse_map(ct, tim) for tim in all_swaps(im0)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 118,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "({'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'},\n",
+       " [('caech eht cae', -27.119008761052356),\n",
+       "  ('ecaeh aht eca', -26.10317913928645),\n",
+       "  ('tceth eha tce', -23.289877585658743),\n",
+       "  ('hceha eat hce', -26.41716766077668),\n",
+       "  ('aecah cht aec', -28.466074945814817),\n",
+       "  ('ateah ehc ate', -23.89678491033435),\n",
+       "  ('aheac ect ahe', -23.82052347276842),\n",
+       "  ('actah the act', -23.337953804339712),\n",
+       "  ('achae het ach', -24.4061387567535),\n",
+       "  ('aceat eth ace', -21.139211036323402)])"
+      ]
+     },
+     "execution_count": 118,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "im2, [apply_inverse_map(ct, tim) for tim in all_swaps(im2)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 119,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def all_swaps_worse(mapping):\n",
+    "    _, score0 = apply_inverse_map(ct, mapping)\n",
+    "    swapped_mappings = all_swaps(mapping)\n",
+    "    scores = [apply_inverse_map(ct, m)[1] for m in swapped_mappings]\n",
+    "    better_scores = [s for s in scores if s > score0]\n",
+    "    return better_scores == []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False"
+      ]
+     },
+     "execution_count": 123,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "all_swaps_worse(im3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def make_map(als, bls):\n",
+    "    return {a: b for (a, b) in zip(als, bls)}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 129,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('aceht', 'agktz')"
+      ]
+     },
+     "execution_count": 129,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ptls = cat(sorted(deduplicate(sanitise(pt))))\n",
+    "ctls = cat(sorted(deduplicate(sanitise(ct))))\n",
+    "ptls, ctls"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 132,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(120,\n",
+       " [{'a': 'a', 'g': 'c', 'k': 'e', 't': 'h', 'z': 't'},\n",
+       "  {'a': 'a', 'g': 'c', 'k': 'e', 'z': 'h', 't': 't'},\n",
+       "  {'a': 'a', 'g': 'c', 't': 'e', 'k': 'h', 'z': 't'},\n",
+       "  {'a': 'a', 'g': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
+       "  {'a': 'a', 'g': 'c', 'z': 'e', 'k': 'h', 't': 't'}])"
+      ]
+     },
+     "execution_count": 132,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "all_maps = [make_map(c, ptls) for c in itertools.permutations(ctls)]\n",
+    "len(all_maps), all_maps[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 138,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'a': 'a', 'z': 'c', 'k': 'e', 't': 'h', 'g': 't'},\n",
+       " {'g': 'a', 't': 'c', 'z': 'e', 'a': 'h', 'k': 't'},\n",
+       " {'g': 'a', 'z': 'c', 'a': 'e', 't': 'h', 'k': 't'},\n",
+       " {'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'},\n",
+       " {'t': 'a', 'z': 'c', 'k': 'e', 'g': 'h', 'a': 't'},\n",
+       " {'z': 'a', 't': 'c', 'a': 'e', 'k': 'h', 'g': 't'}]"
+      ]
+     },
+     "execution_count": 138,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "local_optima = [m for m in all_maps if all_swaps_worse(m) if m != im0]\n",
+    "local_optima"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 143,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(('tecth cha tec', -22.37718617528681),\n",
+       " [('etceh cha etc', -27.45222919076422),\n",
+       "  ('cetch tha cet', -23.152196785128968),\n",
+       "  ('aecah cht aec', -28.466074945814817),\n",
+       "  ('hecht cta hec', -24.0528877258752),\n",
+       "  ('tceth eha tce', -23.289877585658743),\n",
+       "  ('tacth che tac', -22.992889593694795),\n",
+       "  ('thcte cea thc', -23.37530629522044),\n",
+       "  ('teath ahc tea', -23.192822966291835),\n",
+       "  ('tehtc hca teh', -25.824045558109102),\n",
+       "  ('tecta cah tec', -23.630623398955464)])"
+      ]
+     },
+     "execution_count": 143,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "apply_inverse_map(ct, local_optima[3]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[3])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 145,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(('ethea hac eth', -21.831799648932474),\n",
+       " [('tehta hac teh', -25.2630658238322),\n",
+       "  ('hteha eac hte', -25.12161519433393),\n",
+       "  ('cthca hae cth', -25.56645047924706),\n",
+       "  ('athae hec ath', -22.523920547555058),\n",
+       "  ('ehtea tac eht', -24.414224893001006),\n",
+       "  ('echea hat ech', -22.34614937355321),\n",
+       "  ('eahet htc eah', -24.64789885786501),\n",
+       "  ('etcea cah etc', -24.40643936994998),\n",
+       "  ('etaeh ahc eta', -27.042650227267693),\n",
+       "  ('ethec hca eth', -23.70218668022281)])"
+      ]
+     },
+     "execution_count": 145,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "apply_inverse_map(ct, local_optima[5]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[5])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 140,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('catch the cat', -22.142275954584633)"
+      ]
+     },
+     "execution_count": 140,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "apply_inverse_map(ct, im0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 146,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'}"
+      ]
+     },
+     "execution_count": 146,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "local_optima[3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 147,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "| plaintext letter  | a | c | e | h | t |\n",
+      "|-------------------|---|---|---|---|---|\n",
+      "| ciphertext letter | t | k | g | z | a |\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "l3 = {local_optima[3][l]: l for l in local_optima[3]}\n",
+    "print(show_mapping(l3))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 150,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'etoainhsrdlumwycfgpbvkxjqz'"
+      ]
+     },
+     "execution_count": 150,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cat(p[0] for p in english_counts.most_common())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hillclimbing-results/hillclimbing-random-unigram-uniform.csv.png b/hillclimbing-results/hillclimbing-random-unigram-uniform.csv.png
new file mode 100644 (file)
index 0000000..68076c1
Binary files /dev/null and b/hillclimbing-results/hillclimbing-random-unigram-uniform.csv.png differ
diff --git a/hillclimbing-results/sa-random-unigram-uniform.csv.png b/hillclimbing-results/sa-random-unigram-uniform.csv.png
new file mode 100644 (file)
index 0000000..29e2d35
Binary files /dev/null and b/hillclimbing-results/sa-random-unigram-uniform.csv.png differ