--- /dev/null
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 125,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import string\n",
+ "import random\n",
+ "import itertools\n",
+ "from cipher.keyword_cipher import *\n",
+ "from support.utilities import *"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'gpavtdyzocqnrsujmxikwbehlf'"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pt = \"catch the cat\"\n",
+ "\n",
+ "ca = list(string.ascii_lowercase)\n",
+ "random.shuffle(ca)\n",
+ "ca = cat(ca)\n",
+ "ca"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'agkaz kzt agk'"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ct = keyword_encipher(pt, ca)\n",
+ "ct"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def show_mapping_alpha(c_a, p_a=string.ascii_lowercase, letters=string.ascii_lowercase):\n",
+ " mapping = {p: c for (p, c) in zip(p_a, c_a) if p in letters}\n",
+ " return show_mapping(mapping)\n",
+ "\n",
+ "def show_mapping(mapping):\n",
+ " retval = '| plaintext letter | ' + ' | '.join(l for l in sorted(mapping)) + ' |\\n'\n",
+ " retval += '|-------------------|---|---|---|---|---|\\n'\n",
+ " retval += '| ciphertext letter | ' + ' | '.join(mapping[l] for l in sorted(mapping)) + ' |\\n'\n",
+ " return retval"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "| plaintext letter | a | c | e | h | t |\n",
+ "|-------------------|---|---|---|---|---|\n",
+ "| ciphertext letter | g | a | t | z | k |\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(show_mapping_alpha(ca, letters=sanitise(pt)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "({'a': 'g', 'c': 'a', 'e': 't', 'h': 'z', 't': 'k'},\n",
+ " {'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'})"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "m0 = {p: c for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n",
+ "im0 = {c: p for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n",
+ "m0, im0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "| a | c | e | h | t |\n",
+ "| g | a | t | z | k |\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(show_mapping(m0))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def apply_inverse_map(ciphertext, mapping):\n",
+ " plaintext = cat(mapping[l] if l in mapping else l for l in ciphertext)\n",
+ " return plaintext, Pbigrams(sanitise(plaintext))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def swap(letters, i, j):\n",
+ " if i > j:\n",
+ " i, j = j, i\n",
+ " if i == j:\n",
+ " return letters\n",
+ " else:\n",
+ " return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +\n",
+ " letters[j+1:])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def map_swap(mapping):\n",
+ " keys = sorted(mapping)\n",
+ " values = cat(mapping[l] for l in keys)\n",
+ " n = len(keys)\n",
+ " swapped_values = swap(values, random.randrange(n), random.randrange(n))\n",
+ " return {k: sv for (k, sv) in zip(keys, swapped_values)}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "im1 = map_swap(im0)\n",
+ "im2 = map_swap(im1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('aceah eht ace', -24.470656262279007)"
+ ]
+ },
+ "execution_count": 90,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "apply_inverse_map(ct, im2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('actah the act', -23.337953804339712)"
+ ]
+ },
+ "execution_count": 91,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "apply_inverse_map(ct, im1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 88,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('catch the cat', -22.142275954584633)"
+ ]
+ },
+ "execution_count": 88,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "apply_inverse_map(ct, im0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
+ " {'a': 'a', 'g': 'c', 'k': 't', 't': 'e', 'z': 'h'},\n",
+ " {'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'})"
+ ]
+ },
+ "execution_count": 92,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "im0, im1, im2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "m1 = {im1[l]: l for l in im1}\n",
+ "m2 = {im2[l]: l for l in im2}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "| plaintext letter | a | c | e | h | t |\n",
+ "|-------------------|---|---|---|---|---|\n",
+ "| ciphertext letter | g | a | t | z | k |\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(show_mapping(m0))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "| plaintext letter | a | c | e | h | t |\n",
+ "|-------------------|---|---|---|---|---|\n",
+ "| ciphertext letter | a | g | t | z | k |\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(show_mapping(m1))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "| plaintext letter | a | c | e | h | t |\n",
+ "|-------------------|---|---|---|---|---|\n",
+ "| ciphertext letter | a | g | k | z | t |\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(show_mapping(m2))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('hceha eat hce', -26.41716766077668)"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "im3 = map_swap(im2)\n",
+ "apply_inverse_map(ct, im3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "m3 = {im3[l]: l for l in im3}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "| plaintext letter | a | c | e | h | t |\n",
+ "|-------------------|---|---|---|---|---|\n",
+ "| ciphertext letter | z | g | k | a | t |\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(show_mapping(m3))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def all_swaps(mapping):\n",
+ " keys = sorted(mapping)\n",
+ " values = cat(mapping[l] for l in keys)\n",
+ " n = len(keys)\n",
+ " swapped_values = [swap(values, i, j) for i in range(n) for j in range(n) if i < j]\n",
+ " return [{k: sv for (k, sv) in zip(keys, svs)} for svs in swapped_values]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 117,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
+ " [('actah the act', -23.337953804339712),\n",
+ " ('tacth che tac', -22.992889593694795),\n",
+ " ('eateh thc eat', -23.337174988961543),\n",
+ " ('hathc tce hat', -24.20565798548872),\n",
+ " ('ctach ahe cta', -23.361982341471602),\n",
+ " ('cetch tha cet', -23.152196785128968),\n",
+ " ('chtca tae cht', -25.47053856384374),\n",
+ " ('caech eht cae', -27.119008761052356),\n",
+ " ('cahct hte cah', -25.96020844569102),\n",
+ " ('catce teh cat', -24.369461369323975)])"
+ ]
+ },
+ "execution_count": 117,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "im0, [apply_inverse_map(ct, tim) for tim in all_swaps(im0)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 118,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "({'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'},\n",
+ " [('caech eht cae', -27.119008761052356),\n",
+ " ('ecaeh aht eca', -26.10317913928645),\n",
+ " ('tceth eha tce', -23.289877585658743),\n",
+ " ('hceha eat hce', -26.41716766077668),\n",
+ " ('aecah cht aec', -28.466074945814817),\n",
+ " ('ateah ehc ate', -23.89678491033435),\n",
+ " ('aheac ect ahe', -23.82052347276842),\n",
+ " ('actah the act', -23.337953804339712),\n",
+ " ('achae het ach', -24.4061387567535),\n",
+ " ('aceat eth ace', -21.139211036323402)])"
+ ]
+ },
+ "execution_count": 118,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "im2, [apply_inverse_map(ct, tim) for tim in all_swaps(im2)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 119,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def all_swaps_worse(mapping):\n",
+ " _, score0 = apply_inverse_map(ct, mapping)\n",
+ " swapped_mappings = all_swaps(mapping)\n",
+ " scores = [apply_inverse_map(ct, m)[1] for m in swapped_mappings]\n",
+ " better_scores = [s for s in scores if s > score0]\n",
+ " return better_scores == []"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 123,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "False"
+ ]
+ },
+ "execution_count": 123,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_swaps_worse(im3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 124,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def make_map(als, bls):\n",
+ " return {a: b for (a, b) in zip(als, bls)}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 129,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('aceht', 'agktz')"
+ ]
+ },
+ "execution_count": 129,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ptls = cat(sorted(deduplicate(sanitise(pt))))\n",
+ "ctls = cat(sorted(deduplicate(sanitise(ct))))\n",
+ "ptls, ctls"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 132,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(120,\n",
+ " [{'a': 'a', 'g': 'c', 'k': 'e', 't': 'h', 'z': 't'},\n",
+ " {'a': 'a', 'g': 'c', 'k': 'e', 'z': 'h', 't': 't'},\n",
+ " {'a': 'a', 'g': 'c', 't': 'e', 'k': 'h', 'z': 't'},\n",
+ " {'a': 'a', 'g': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
+ " {'a': 'a', 'g': 'c', 'z': 'e', 'k': 'h', 't': 't'}])"
+ ]
+ },
+ "execution_count": 132,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_maps = [make_map(c, ptls) for c in itertools.permutations(ctls)]\n",
+ "len(all_maps), all_maps[:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 138,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[{'a': 'a', 'z': 'c', 'k': 'e', 't': 'h', 'g': 't'},\n",
+ " {'g': 'a', 't': 'c', 'z': 'e', 'a': 'h', 'k': 't'},\n",
+ " {'g': 'a', 'z': 'c', 'a': 'e', 't': 'h', 'k': 't'},\n",
+ " {'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'},\n",
+ " {'t': 'a', 'z': 'c', 'k': 'e', 'g': 'h', 'a': 't'},\n",
+ " {'z': 'a', 't': 'c', 'a': 'e', 'k': 'h', 'g': 't'}]"
+ ]
+ },
+ "execution_count": 138,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "local_optima = [m for m in all_maps if all_swaps_worse(m) if m != im0]\n",
+ "local_optima"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 143,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(('tecth cha tec', -22.37718617528681),\n",
+ " [('etceh cha etc', -27.45222919076422),\n",
+ " ('cetch tha cet', -23.152196785128968),\n",
+ " ('aecah cht aec', -28.466074945814817),\n",
+ " ('hecht cta hec', -24.0528877258752),\n",
+ " ('tceth eha tce', -23.289877585658743),\n",
+ " ('tacth che tac', -22.992889593694795),\n",
+ " ('thcte cea thc', -23.37530629522044),\n",
+ " ('teath ahc tea', -23.192822966291835),\n",
+ " ('tehtc hca teh', -25.824045558109102),\n",
+ " ('tecta cah tec', -23.630623398955464)])"
+ ]
+ },
+ "execution_count": 143,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "apply_inverse_map(ct, local_optima[3]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[3])]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 145,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(('ethea hac eth', -21.831799648932474),\n",
+ " [('tehta hac teh', -25.2630658238322),\n",
+ " ('hteha eac hte', -25.12161519433393),\n",
+ " ('cthca hae cth', -25.56645047924706),\n",
+ " ('athae hec ath', -22.523920547555058),\n",
+ " ('ehtea tac eht', -24.414224893001006),\n",
+ " ('echea hat ech', -22.34614937355321),\n",
+ " ('eahet htc eah', -24.64789885786501),\n",
+ " ('etcea cah etc', -24.40643936994998),\n",
+ " ('etaeh ahc eta', -27.042650227267693),\n",
+ " ('ethec hca eth', -23.70218668022281)])"
+ ]
+ },
+ "execution_count": 145,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "apply_inverse_map(ct, local_optima[5]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[5])]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 140,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('catch the cat', -22.142275954584633)"
+ ]
+ },
+ "execution_count": 140,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "apply_inverse_map(ct, im0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 146,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'}"
+ ]
+ },
+ "execution_count": 146,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "local_optima[3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 147,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "| plaintext letter | a | c | e | h | t |\n",
+ "|-------------------|---|---|---|---|---|\n",
+ "| ciphertext letter | t | k | g | z | a |\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "l3 = {local_optima[3][l]: l for l in local_optima[3]}\n",
+ "print(show_mapping(l3))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 150,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'etoainhsrdlumwycfgpbvkxjqz'"
+ ]
+ },
+ "execution_count": 150,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cat(p[0] for p in english_counts.most_common())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}