{ "cells": [ { "cell_type": "code", "execution_count": 125, "metadata": {}, "outputs": [], "source": [ "import string\n", "import random\n", "import itertools\n", "from cipher.keyword_cipher import *\n", "from support.utilities import *" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'gpavtdyzocqnrsujmxikwbehlf'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pt = \"catch the cat\"\n", "\n", "ca = list(string.ascii_lowercase)\n", "random.shuffle(ca)\n", "ca = cat(ca)\n", "ca" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'agkaz kzt agk'" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ct = keyword_encipher(pt, ca)\n", "ct" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "def show_mapping_alpha(c_a, p_a=string.ascii_lowercase, letters=string.ascii_lowercase):\n", " mapping = {p: c for (p, c) in zip(p_a, c_a) if p in letters}\n", " return show_mapping(mapping)\n", "\n", "def show_mapping(mapping):\n", " retval = '| plaintext letter | ' + ' | '.join(l for l in sorted(mapping)) + ' |\\n'\n", " retval += '|-------------------|---|---|---|---|---|\\n'\n", " retval += '| ciphertext letter | ' + ' | '.join(mapping[l] for l in sorted(mapping)) + ' |\\n'\n", " return retval" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "| plaintext letter | a | c | e | h | t |\n", "|-------------------|---|---|---|---|---|\n", "| ciphertext letter | g | a | t | z | k |\n", "\n" ] } ], "source": [ "print(show_mapping_alpha(ca, letters=sanitise(pt)))" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "({'a': 'g', 'c': 'a', 'e': 't', 'h': 'z', 't': 'k'},\n", " {'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'})" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m0 = {p: c for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n", "im0 = {c: p for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n", "m0, im0" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "| a | c | e | h | t |\n", "| g | a | t | z | k |\n", "\n" ] } ], "source": [ "print(show_mapping(m0))" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "def apply_inverse_map(ciphertext, mapping):\n", " plaintext = cat(mapping[l] if l in mapping else l for l in ciphertext)\n", " return plaintext, Pbigrams(sanitise(plaintext))" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "def swap(letters, i, j):\n", " if i > j:\n", " i, j = j, i\n", " if i == j:\n", " return letters\n", " else:\n", " return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +\n", " letters[j+1:])" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "def map_swap(mapping):\n", " keys = sorted(mapping)\n", " values = cat(mapping[l] for l in keys)\n", " n = len(keys)\n", " swapped_values = swap(values, random.randrange(n), random.randrange(n))\n", " return {k: sv for (k, sv) in zip(keys, swapped_values)}" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "im1 = map_swap(im0)\n", "im2 = map_swap(im1)" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('aceah eht ace', -24.470656262279007)" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apply_inverse_map(ct, im2)" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('actah the act', -23.337953804339712)" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apply_inverse_map(ct, im1)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('catch the cat', -22.142275954584633)" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apply_inverse_map(ct, im0)" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n", " {'a': 'a', 'g': 'c', 'k': 't', 't': 'e', 'z': 'h'},\n", " {'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'})" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "im0, im1, im2" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [], "source": [ "m1 = {im1[l]: l for l in im1}\n", "m2 = {im2[l]: l for l in im2}" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "| plaintext letter | a | c | e | h | t |\n", "|-------------------|---|---|---|---|---|\n", "| ciphertext letter | g | a | t | z | k |\n", "\n" ] } ], "source": [ "print(show_mapping(m0))" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "| plaintext letter | a | c | e | h | t |\n", "|-------------------|---|---|---|---|---|\n", "| ciphertext letter | a | g | t | z | k |\n", "\n" ] } ], "source": [ "print(show_mapping(m1))" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "| plaintext letter | a | c | e | h | t |\n", "|-------------------|---|---|---|---|---|\n", "| ciphertext letter | a | g | k | z | t |\n", "\n" ] } ], "source": [ "print(show_mapping(m2))" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('hceha eat hce', -26.41716766077668)" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "im3 = map_swap(im2)\n", "apply_inverse_map(ct, im3)" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [], "source": [ "m3 = {im3[l]: l for l in im3}" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "| plaintext letter | a | c | e | h | t |\n", "|-------------------|---|---|---|---|---|\n", "| ciphertext letter | z | g | k | a | t |\n", "\n" ] } ], "source": [ "print(show_mapping(m3))" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [], "source": [ "def all_swaps(mapping):\n", " keys = sorted(mapping)\n", " values = cat(mapping[l] for l in keys)\n", " n = len(keys)\n", " swapped_values = [swap(values, i, j) for i in range(n) for j in range(n) if i < j]\n", " return [{k: sv for (k, sv) in zip(keys, svs)} for svs in swapped_values]" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n", " [('actah the act', -23.337953804339712),\n", " ('tacth che tac', -22.992889593694795),\n", " ('eateh thc eat', -23.337174988961543),\n", " ('hathc tce hat', -24.20565798548872),\n", " ('ctach ahe cta', -23.361982341471602),\n", " ('cetch tha cet', -23.152196785128968),\n", " ('chtca tae cht', -25.47053856384374),\n", " ('caech eht cae', -27.119008761052356),\n", " ('cahct hte cah', -25.96020844569102),\n", " ('catce teh cat', -24.369461369323975)])" ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "im0, [apply_inverse_map(ct, tim) for tim in all_swaps(im0)]" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "({'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'},\n", " [('caech eht cae', -27.119008761052356),\n", " ('ecaeh aht eca', -26.10317913928645),\n", " ('tceth eha tce', -23.289877585658743),\n", " ('hceha eat hce', -26.41716766077668),\n", " ('aecah cht aec', -28.466074945814817),\n", " ('ateah ehc ate', -23.89678491033435),\n", " ('aheac ect ahe', -23.82052347276842),\n", " ('actah the act', -23.337953804339712),\n", " ('achae het ach', -24.4061387567535),\n", " ('aceat eth ace', -21.139211036323402)])" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "im2, [apply_inverse_map(ct, tim) for tim in all_swaps(im2)]" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [], "source": [ "def all_swaps_worse(mapping):\n", " _, score0 = apply_inverse_map(ct, mapping)\n", " swapped_mappings = all_swaps(mapping)\n", " scores = [apply_inverse_map(ct, m)[1] for m in swapped_mappings]\n", " better_scores = [s for s in scores if s > score0]\n", " return better_scores == []" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_swaps_worse(im3)" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [], "source": [ "def make_map(als, bls):\n", " return {a: b for (a, b) in zip(als, bls)}" ] }, { "cell_type": "code", "execution_count": 129, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('aceht', 'agktz')" ] }, "execution_count": 129, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ptls = cat(sorted(deduplicate(sanitise(pt))))\n", "ctls = cat(sorted(deduplicate(sanitise(ct))))\n", "ptls, ctls" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(120,\n", " [{'a': 'a', 'g': 'c', 'k': 'e', 't': 'h', 'z': 't'},\n", " {'a': 'a', 'g': 'c', 'k': 'e', 'z': 'h', 't': 't'},\n", " {'a': 'a', 'g': 'c', 't': 'e', 'k': 'h', 'z': 't'},\n", " {'a': 'a', 'g': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n", " {'a': 'a', 'g': 'c', 'z': 'e', 'k': 'h', 't': 't'}])" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_maps = [make_map(c, ptls) for c in itertools.permutations(ctls)]\n", "len(all_maps), all_maps[:5]" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'a': 'a', 'z': 'c', 'k': 'e', 't': 'h', 'g': 't'},\n", " {'g': 'a', 't': 'c', 'z': 'e', 'a': 'h', 'k': 't'},\n", " {'g': 'a', 'z': 'c', 'a': 'e', 't': 'h', 'k': 't'},\n", " {'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'},\n", " {'t': 'a', 'z': 'c', 'k': 'e', 'g': 'h', 'a': 't'},\n", " {'z': 'a', 't': 'c', 'a': 'e', 'k': 'h', 'g': 't'}]" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "local_optima = [m for m in all_maps if all_swaps_worse(m) if m != im0]\n", "local_optima" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(('tecth cha tec', -22.37718617528681),\n", " [('etceh cha etc', -27.45222919076422),\n", " ('cetch tha cet', -23.152196785128968),\n", " ('aecah cht aec', -28.466074945814817),\n", " ('hecht cta hec', -24.0528877258752),\n", " ('tceth eha tce', -23.289877585658743),\n", " ('tacth che tac', -22.992889593694795),\n", " ('thcte cea thc', -23.37530629522044),\n", " ('teath ahc tea', -23.192822966291835),\n", " ('tehtc hca teh', -25.824045558109102),\n", " ('tecta cah tec', -23.630623398955464)])" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apply_inverse_map(ct, local_optima[3]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[3])]" ] }, { "cell_type": "code", "execution_count": 145, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(('ethea hac eth', -21.831799648932474),\n", " [('tehta hac teh', -25.2630658238322),\n", " ('hteha eac hte', -25.12161519433393),\n", " ('cthca hae cth', -25.56645047924706),\n", " ('athae hec ath', -22.523920547555058),\n", " ('ehtea tac eht', -24.414224893001006),\n", " ('echea hat ech', -22.34614937355321),\n", " ('eahet htc eah', -24.64789885786501),\n", " ('etcea cah etc', -24.40643936994998),\n", " ('etaeh ahc eta', -27.042650227267693),\n", " ('ethec hca eth', -23.70218668022281)])" ] }, "execution_count": 145, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apply_inverse_map(ct, local_optima[5]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[5])]" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('catch the cat', -22.142275954584633)" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apply_inverse_map(ct, im0)" ] }, { "cell_type": "code", "execution_count": 146, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'}" ] }, "execution_count": 146, "metadata": {}, "output_type": "execute_result" } ], "source": [ "local_optima[3]" ] }, { "cell_type": "code", "execution_count": 147, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "| plaintext letter | a | c | e | h | t |\n", "|-------------------|---|---|---|---|---|\n", "| ciphertext letter | t | k | g | z | a |\n", "\n" ] } ], "source": [ "l3 = {local_optima[3][l]: l for l in local_optima[3]}\n", "print(show_mapping(l3))" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'etoainhsrdlumwycfgpbvkxjqz'" ] }, "execution_count": 150, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cat(p[0] for p in english_counts.most_common())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }