{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "import string\n",
    "import random\n",
    "import itertools\n",
    "from cipher.keyword_cipher import *\n",
    "from support.utilities import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'gpavtdyzocqnrsujmxikwbehlf'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pt = \"catch the cat\"\n",
    "\n",
    "ca = list(string.ascii_lowercase)\n",
    "random.shuffle(ca)\n",
    "ca = cat(ca)\n",
    "ca"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'agkaz kzt agk'"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ct = keyword_encipher(pt, ca)\n",
    "ct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "def show_mapping_alpha(c_a, p_a=string.ascii_lowercase, letters=string.ascii_lowercase):\n",
    "    mapping = {p: c for (p, c) in zip(p_a, c_a) if p in letters}\n",
    "    return show_mapping(mapping)\n",
    "\n",
    "def show_mapping(mapping):\n",
    "    retval  = '| plaintext letter  | ' + ' | '.join(l for l in sorted(mapping)) + ' |\\n'\n",
    "    retval += '|-------------------|---|---|---|---|---|\\n'\n",
    "    retval += '| ciphertext letter | ' + ' | '.join(mapping[l] for l in sorted(mapping)) + ' |\\n'\n",
    "    return retval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "| plaintext letter  | a | c | e | h | t |\n",
      "|-------------------|---|---|---|---|---|\n",
      "| ciphertext letter | g | a | t | z | k |\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(show_mapping_alpha(ca, letters=sanitise(pt)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'a': 'g', 'c': 'a', 'e': 't', 'h': 'z', 't': 'k'},\n",
       " {'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'})"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m0 = {p: c for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n",
    "im0 = {c: p for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n",
    "m0, im0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "| a | c | e | h | t |\n",
      "| g | a | t | z | k |\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(show_mapping(m0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "def apply_inverse_map(ciphertext, mapping):\n",
    "    plaintext = cat(mapping[l] if l in mapping else l for l in ciphertext)\n",
    "    return plaintext, Pbigrams(sanitise(plaintext))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "def swap(letters, i, j):\n",
    "    if i > j:\n",
    "        i, j = j, i\n",
    "    if i == j:\n",
    "        return letters\n",
    "    else:\n",
    "        return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +\n",
    "                letters[j+1:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "def map_swap(mapping):\n",
    "    keys = sorted(mapping)\n",
    "    values = cat(mapping[l] for l in keys)\n",
    "    n = len(keys)\n",
    "    swapped_values = swap(values, random.randrange(n), random.randrange(n))\n",
    "    return {k: sv for (k, sv) in zip(keys, swapped_values)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "im1 = map_swap(im0)\n",
    "im2 = map_swap(im1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('aceah eht ace', -24.470656262279007)"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "apply_inverse_map(ct, im2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('actah the act', -23.337953804339712)"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "apply_inverse_map(ct, im1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('catch the cat', -22.142275954584633)"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "apply_inverse_map(ct, im0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
       " {'a': 'a', 'g': 'c', 'k': 't', 't': 'e', 'z': 'h'},\n",
       " {'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'})"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "im0, im1, im2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "m1 = {im1[l]: l for l in im1}\n",
    "m2 = {im2[l]: l for l in im2}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "| plaintext letter  | a | c | e | h | t |\n",
      "|-------------------|---|---|---|---|---|\n",
      "| ciphertext letter | g | a | t | z | k |\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(show_mapping(m0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "| plaintext letter  | a | c | e | h | t |\n",
      "|-------------------|---|---|---|---|---|\n",
      "| ciphertext letter | a | g | t | z | k |\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(show_mapping(m1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "| plaintext letter  | a | c | e | h | t |\n",
      "|-------------------|---|---|---|---|---|\n",
      "| ciphertext letter | a | g | k | z | t |\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(show_mapping(m2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('hceha eat hce', -26.41716766077668)"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "im3 = map_swap(im2)\n",
    "apply_inverse_map(ct, im3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "m3 = {im3[l]: l for l in im3}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "| plaintext letter  | a | c | e | h | t |\n",
      "|-------------------|---|---|---|---|---|\n",
      "| ciphertext letter | z | g | k | a | t |\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(show_mapping(m3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "def all_swaps(mapping):\n",
    "    keys = sorted(mapping)\n",
    "    values = cat(mapping[l] for l in keys)\n",
    "    n = len(keys)\n",
    "    swapped_values = [swap(values, i, j) for i in range(n) for j in range(n) if i < j]\n",
    "    return [{k: sv for (k, sv) in zip(keys, svs)} for svs in swapped_values]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
       " [('actah the act', -23.337953804339712),\n",
       "  ('tacth che tac', -22.992889593694795),\n",
       "  ('eateh thc eat', -23.337174988961543),\n",
       "  ('hathc tce hat', -24.20565798548872),\n",
       "  ('ctach ahe cta', -23.361982341471602),\n",
       "  ('cetch tha cet', -23.152196785128968),\n",
       "  ('chtca tae cht', -25.47053856384374),\n",
       "  ('caech eht cae', -27.119008761052356),\n",
       "  ('cahct hte cah', -25.96020844569102),\n",
       "  ('catce teh cat', -24.369461369323975)])"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "im0, [apply_inverse_map(ct, tim) for tim in all_swaps(im0)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "({'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'},\n",
       " [('caech eht cae', -27.119008761052356),\n",
       "  ('ecaeh aht eca', -26.10317913928645),\n",
       "  ('tceth eha tce', -23.289877585658743),\n",
       "  ('hceha eat hce', -26.41716766077668),\n",
       "  ('aecah cht aec', -28.466074945814817),\n",
       "  ('ateah ehc ate', -23.89678491033435),\n",
       "  ('aheac ect ahe', -23.82052347276842),\n",
       "  ('actah the act', -23.337953804339712),\n",
       "  ('achae het ach', -24.4061387567535),\n",
       "  ('aceat eth ace', -21.139211036323402)])"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "im2, [apply_inverse_map(ct, tim) for tim in all_swaps(im2)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "def all_swaps_worse(mapping):\n",
    "    _, score0 = apply_inverse_map(ct, mapping)\n",
    "    swapped_mappings = all_swaps(mapping)\n",
    "    scores = [apply_inverse_map(ct, m)[1] for m in swapped_mappings]\n",
    "    better_scores = [s for s in scores if s > score0]\n",
    "    return better_scores == []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_swaps_worse(im3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_map(als, bls):\n",
    "    return {a: b for (a, b) in zip(als, bls)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('aceht', 'agktz')"
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ptls = cat(sorted(deduplicate(sanitise(pt))))\n",
    "ctls = cat(sorted(deduplicate(sanitise(ct))))\n",
    "ptls, ctls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(120,\n",
       " [{'a': 'a', 'g': 'c', 'k': 'e', 't': 'h', 'z': 't'},\n",
       "  {'a': 'a', 'g': 'c', 'k': 'e', 'z': 'h', 't': 't'},\n",
       "  {'a': 'a', 'g': 'c', 't': 'e', 'k': 'h', 'z': 't'},\n",
       "  {'a': 'a', 'g': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
       "  {'a': 'a', 'g': 'c', 'z': 'e', 'k': 'h', 't': 't'}])"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_maps = [make_map(c, ptls) for c in itertools.permutations(ctls)]\n",
    "len(all_maps), all_maps[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'a': 'a', 'z': 'c', 'k': 'e', 't': 'h', 'g': 't'},\n",
       " {'g': 'a', 't': 'c', 'z': 'e', 'a': 'h', 'k': 't'},\n",
       " {'g': 'a', 'z': 'c', 'a': 'e', 't': 'h', 'k': 't'},\n",
       " {'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'},\n",
       " {'t': 'a', 'z': 'c', 'k': 'e', 'g': 'h', 'a': 't'},\n",
       " {'z': 'a', 't': 'c', 'a': 'e', 'k': 'h', 'g': 't'}]"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "local_optima = [m for m in all_maps if all_swaps_worse(m) if m != im0]\n",
    "local_optima"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(('tecth cha tec', -22.37718617528681),\n",
       " [('etceh cha etc', -27.45222919076422),\n",
       "  ('cetch tha cet', -23.152196785128968),\n",
       "  ('aecah cht aec', -28.466074945814817),\n",
       "  ('hecht cta hec', -24.0528877258752),\n",
       "  ('tceth eha tce', -23.289877585658743),\n",
       "  ('tacth che tac', -22.992889593694795),\n",
       "  ('thcte cea thc', -23.37530629522044),\n",
       "  ('teath ahc tea', -23.192822966291835),\n",
       "  ('tehtc hca teh', -25.824045558109102),\n",
       "  ('tecta cah tec', -23.630623398955464)])"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "apply_inverse_map(ct, local_optima[3]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[3])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(('ethea hac eth', -21.831799648932474),\n",
       " [('tehta hac teh', -25.2630658238322),\n",
       "  ('hteha eac hte', -25.12161519433393),\n",
       "  ('cthca hae cth', -25.56645047924706),\n",
       "  ('athae hec ath', -22.523920547555058),\n",
       "  ('ehtea tac eht', -24.414224893001006),\n",
       "  ('echea hat ech', -22.34614937355321),\n",
       "  ('eahet htc eah', -24.64789885786501),\n",
       "  ('etcea cah etc', -24.40643936994998),\n",
       "  ('etaeh ahc eta', -27.042650227267693),\n",
       "  ('ethec hca eth', -23.70218668022281)])"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "apply_inverse_map(ct, local_optima[5]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[5])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('catch the cat', -22.142275954584633)"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "apply_inverse_map(ct, im0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'}"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "local_optima[3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "| plaintext letter  | a | c | e | h | t |\n",
      "|-------------------|---|---|---|---|---|\n",
      "| ciphertext letter | t | k | g | z | a |\n",
      "\n"
     ]
    }
   ],
   "source": [
    "l3 = {local_optima[3][l]: l for l in local_optima[3]}\n",
    "print(show_mapping(l3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'etoainhsrdlumwycfgpbvkxjqz'"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cat(p[0] for p in english_counts.most_common())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}