{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os,sys,inspect\n", "currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n", "parentdir = os.path.dirname(currentdir)\n", "sys.path.insert(0,parentdir) " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from cipher.caesar import *\n", "from cipher.affine import *\n", "from cipher.keyword_cipher import *\n", "from cipher.vigenere import *\n", "from cipher.playfair import *\n", "from cipher.column_transposition import *\n", "from support.text_prettify import *\n", "from support.plot_frequency_histogram import *" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# import logger as myl\n", "# import logging\n", "# myl.logger.setLevel(logging.DEBUG)\n", "# mylg = logging.getLogger('cipherbreak')\n", "import logging\n", "from logger import logger\n", "\n", "import re\n", "from datetime import datetime\n", "import pandas as pd\n", "import csv\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "from scipy.stats import kendalltau" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "logger.setLevel(logging.DEBUG)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def commonest_alphabet(text):\n", " counts = collections.Counter(sanitise(text))\n", " letters = cat(p[0] for p in counts.most_common())\n", " return cat(deduplicate(letters + string.ascii_lowercase))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def random_ciphertext(message_length):\n", " sample_start = random.randint(0, corpus_length - message_length)\n", " sample = corpus[sample_start:(sample_start + message_length)]\n", " key = list(string.ascii_lowercase)\n", " random.shuffle(key)\n", " key = cat(key)\n", " ciphertext = keyword_encipher(sample, key)\n", " return key, ciphertext" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def log_parse(text, verbose=False):\n", " parts = text.split(' - ')\n", " dt = datetime.strptime(parts[0], \"%Y-%m-%d %H:%M:%S,%f\")\n", " blurb = parts[-1]\n", " worker = int(re.search('worker (\\d+)', blurb).group(1))\n", " iteration = int(re.search('iteration (\\d+)', blurb).group(1))\n", " fitness = float(re.search('fitness (-?\\d+\\.\\d+)', blurb).group(1))\n", " if verbose:\n", " ca = re.search('current alphabet (\\w+)', blurb).group(1)\n", " pa = re.search('plain alphabet (\\w+)', blurb).group(1)\n", " mapped_ca = cat(p[1] for p in sorted(zip(pa, ca)))\n", " return {'time': dt, 'worker': worker, 'iteration': iteration, 'fitness': fitness, \n", " 'cipher_alphabet': ca, 'plain_alphabet': pa, 'mapped_cipher_alphabet': mapped_ca}\n", " else:\n", " return {'time': dt, 'worker': worker, 'iteration': iteration, 'fitness': fitness}" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# ps = [log_parse(line, verbose=True) for line in open('cipher.log').readlines()[:10]]\n", "# df = pd.DataFrame(ps)\n", "# df = df.set_index(['worker', 'iteration']).sort_index()\n", "# df[['fitness', 'plain_alphabet', 'cipher_alphabet']].to_csv('test.csv', header=True)\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def dump_result(starttime, filename, verbose=False, target_cipher_alphabet=''):\n", " parsed = [log_parse(line, verbose=verbose) for line in open('cipher.log')]\n", " trace = pd.DataFrame([p for p in parsed if p['time'] > starttime])\n", " trace = trace.set_index(['worker', 'iteration']).sort_index()\n", " trace['target_cipher_alphabet'] = target_cipher_alphabet\n", " workers = list(sorted(set(trace.index.get_level_values(0))))\n", " if verbose:\n", " trace[['fitness', 'plain_alphabet', 'cipher_alphabet', 'mapped_cipher_alphabet', 'target_cipher_alphabet']].to_csv(filename, header=True)\n", " else:\n", " trace.fitness.to_csv(filename, header=True)\n", " return workers, trace" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'etoainhsrdlumwycfgpbvkxjqz'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plain_alpha = cat(p[0] for p in english_counts.most_common())\n", "plain_alpha" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def unscramble_alphabet(cipher_alphabet, plain_alphabet):\n", " mapping = {p: c for p, c in zip(plain_alphabet, cipher_alphabet)}\n", " unscrambled = cat(mapping[p] for p in sorted(mapping))\n", " return unscrambled" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'theadventuresofsherl'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# pt = sanitise(open('../2017/8b.plaintext').read())\n", "corpus = sanitise(open('../support/sherlock-holmes.txt').read())\n", "corpus_length = len(corpus)\n", "pt = corpus\n", "pt[:20]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Development" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-542391.5369482826" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Pletters(pt)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-1471429.4753165497" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Ptrigrams(pt)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'etaoihnsrdlumwcyfgpbvkxjqz'" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "commonest_alphabet(pt)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('sbyopakxntlewgimvfcduqrzhj',\n", " 'seirqinyprxncmpfporselmscdwpsg',\n", " 'alowvoicewhisperedwalkpastmean')" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k, c = random_ciphertext(30)\n", "k, c, keyword_decipher(c, k)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'yearningforrespiteth'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pt = sanitise(open('../2017/8b.plaintext').read())\n", "pt[:20]" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'qviaysynjpaaverswvwxvapciyeetjjvavzieziqewtayvzsywpfvvmeiyzfvvmesywpcpywxefswxgihnigteiyzwxvgihvzpys'" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ct_key = list(string.ascii_lowercase)\n", "random.shuffle(ct_key)\n", "ct_key = cat(ct_key)\n", "# ct = keyword_encipher(pt, 'arcanaimperii')\n", "ct = keyword_encipher(pt, ct_key)\n", "ct_alpha = commonest_alphabet(ct)\n", "ct[:100]" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('vwpisyxeazhtcfqgjnrokmldbu', -14681.308607565503)" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "start_time = datetime.now()\n", "sa_cipher_alphabet, score = simulated_annealing_break(ct, plain_alphabet=plain_alpha, cipher_alphabet=ct_alpha)\n", "sa_cipher_alphabet, score" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'iogzvjnxsdmhcyprbaewtkflqu'" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ct_key" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'iogzvjnxsdmhcyprbaewtkflqu'" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cat(p[1] for p in sorted(zip(plain_alpha, sa_cipher_alphabet)))" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'arcnimpebdfghjkloqstuvwxyz'" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "keyword_cipher_alphabet_of('arcanaimperii')" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cipher.log old.cipher.log\n" ] } ], "source": [ "!ls *log" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['2019-10-28 09:59:14,978 - cipherbreak - DEBUG - Simulated annealing worker 2: iteration 0, temperature 200, current alphabet rjgzieptunyxodfsmbacqhkwvl, plain alphabet etoainhsrdlumwycfgpbvkxjqz, current_fitness -17464.568516864027, best_plaintext getsninycassehpioeoresamtnhhuccesedthdtghousnedino',\n", " '2019-10-28 09:59:14,978 - cipherbreak - DEBUG - Simulated annealing worker 8: iteration 0, temperature 200, current alphabet rjgzieptunyxocfsmbadqhkwvl, plain alphabet etoainhsrdlumwycfgpbvkxjqz, current_fitness -17394.10216261065, best_plaintext geosninycassehpitetresamonhhuccesedohdoghtusnedint',\n", " '2019-10-28 09:59:14,980 - cipherbreak - DEBUG - Simulated annealing worker 0: iteration 0, temperature 200, current alphabet rjgzieptunyxodcsmbafqhkwvl, plain alphabet etoainhsrdlumwycfgpbvkxjqz, current_fitness -17476.383874573305, best_plaintext geosninbcassehpitetresamonhhuccesedohdoghtusnedint',\n", " '2019-10-28 09:59:14,980 - cipherbreak - DEBUG - Simulated annealing worker 1: iteration 0, temperature 200, current alphabet rjgzieptunyxodfsmbacqhkwvl, plain alphabet etoainhsrdlumwycfgpbvkxjqz, current_fitness -17464.568516864027, best_plaintext geosninychsseapitetreshmonaauccesedoadogatusnedint',\n", " '2019-10-28 09:59:14,980 - cipherbreak - DEBUG - Simulated annealing worker 4: iteration 0, temperature 200, current alphabet rjgzieptunyxodfsmbacqhkwvl, plain alphabet etoainhsrdlumwycfgpbvkxjqz, current_fitness -17464.568516864027, best_plaintext geosninycassezpitetresamonzzuccesedozdogztusnedint']" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recs = open('cipher.log').read().splitlines()\n", "recs[:5]" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'time': datetime.datetime(2019, 10, 28, 9, 59, 14, 978000),\n", " 'worker': 2,\n", " 'iteration': 0,\n", " 'fitness': -17464.568516864027}" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log_parse(recs[0])" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'time': datetime.datetime(2019, 10, 28, 9, 59, 14, 978000),\n", " 'worker': 2,\n", " 'iteration': 0,\n", " 'fitness': -17464.568516864027},\n", " {'time': datetime.datetime(2019, 10, 28, 9, 59, 14, 978000),\n", " 'worker': 8,\n", " 'iteration': 0,\n", " 'fitness': -17394.10216261065},\n", " {'time': datetime.datetime(2019, 10, 28, 9, 59, 14, 980000),\n", " 'worker': 0,\n", " 'iteration': 0,\n", " 'fitness': -17476.383874573305},\n", " {'time': datetime.datetime(2019, 10, 28, 9, 59, 14, 980000),\n", " 'worker': 1,\n", " 'iteration': 0,\n", " 'fitness': -17464.568516864027},\n", " {'time': datetime.datetime(2019, 10, 28, 9, 59, 14, 980000),\n", " 'worker': 4,\n", " 'iteration': 0,\n", " 'fitness': -17464.568516864027}]" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "parsed = [log_parse(line) for line in open('cipher.log')]\n", "parsed[:5]" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | \n", " | fitness | \n", "time | \n", "
---|---|---|---|
worker | \n", "iteration | \n", "\n", " | \n", " |
0 | \n", "0 | \n", "-17464.568517 | \n", "2019-10-28 10:14:21.136 | \n", "
500 | \n", "-18531.679762 | \n", "2019-10-28 10:14:22.493 | \n", "|
1000 | \n", "-20903.487109 | \n", "2019-10-28 10:14:23.787 | \n", "|
1500 | \n", "-19941.571807 | \n", "2019-10-28 10:14:25.084 | \n", "|
2000 | \n", "-18871.699801 | \n", "2019-10-28 10:14:26.133 | \n", "|
2500 | \n", "-18847.246876 | \n", "2019-10-28 10:14:27.408 | \n", "|
3000 | \n", "-19111.386196 | \n", "2019-10-28 10:14:28.707 | \n", "|
3500 | \n", "-19693.452817 | \n", "2019-10-28 10:14:29.835 | \n", "|
4000 | \n", "-18959.289175 | \n", "2019-10-28 10:14:31.228 | \n", "|
4500 | \n", "-19040.556583 | \n", "2019-10-28 10:14:32.569 | \n", "|
5000 | \n", "-18169.812374 | \n", "2019-10-28 10:14:33.891 | \n", "|
5500 | \n", "-16657.860123 | \n", "2019-10-28 10:14:35.200 | \n", "|
6000 | \n", "-16220.268468 | \n", "2019-10-28 10:14:36.485 | \n", "|
6500 | \n", "-16472.952274 | \n", "2019-10-28 10:14:37.784 | \n", "|
7000 | \n", "-17046.418912 | \n", "2019-10-28 10:14:39.054 | \n", "|
7500 | \n", "-17320.865489 | \n", "2019-10-28 10:14:40.343 | \n", "|
8000 | \n", "-16438.413488 | \n", "2019-10-28 10:14:41.622 | \n", "|
8500 | \n", "-16116.768242 | \n", "2019-10-28 10:14:42.893 | \n", "|
9000 | \n", "-16085.659977 | \n", "2019-10-28 10:14:44.167 | \n", "|
9500 | \n", "-15679.574171 | \n", "2019-10-28 10:14:45.405 | \n", "|
10000 | \n", "-16473.325447 | \n", "2019-10-28 10:14:46.650 | \n", "|
10500 | \n", "-16421.027387 | \n", "2019-10-28 10:14:48.070 | \n", "|
11000 | \n", "-16167.752107 | \n", "2019-10-28 10:14:49.371 | \n", "|
11500 | \n", "-15213.074262 | \n", "2019-10-28 10:14:50.686 | \n", "|
12000 | \n", "-15634.979337 | \n", "2019-10-28 10:14:51.967 | \n", "|
12500 | \n", "-15296.397297 | \n", "2019-10-28 10:14:52.824 | \n", "|
13000 | \n", "-15025.983510 | \n", "2019-10-28 10:14:53.881 | \n", "|
13500 | \n", "-15175.912750 | \n", "2019-10-28 10:14:54.871 | \n", "|
14000 | \n", "-15235.513700 | \n", "2019-10-28 10:14:56.119 | \n", "|
14500 | \n", "-14923.520462 | \n", "2019-10-28 10:14:57.339 | \n", "|
... | \n", "... | \n", "... | \n", "... | \n", "
9 | \n", "5000 | \n", "-19218.644968 | \n", "2019-10-28 10:14:34.337 | \n", "
5500 | \n", "-18770.828622 | \n", "2019-10-28 10:14:35.625 | \n", "|
6000 | \n", "-18390.233128 | \n", "2019-10-28 10:14:36.920 | \n", "|
6500 | \n", "-17361.547211 | \n", "2019-10-28 10:14:38.208 | \n", "|
7000 | \n", "-16846.113490 | \n", "2019-10-28 10:14:39.501 | \n", "|
7500 | \n", "-15002.318165 | \n", "2019-10-28 10:14:40.762 | \n", "|
8000 | \n", "-16992.780932 | \n", "2019-10-28 10:14:41.757 | \n", "|
8500 | \n", "-17115.242295 | \n", "2019-10-28 10:14:43.037 | \n", "|
9000 | \n", "-17297.012437 | \n", "2019-10-28 10:14:44.327 | \n", "|
9500 | \n", "-16511.948405 | \n", "2019-10-28 10:14:45.552 | \n", "|
10000 | \n", "-16175.059178 | \n", "2019-10-28 10:14:46.828 | \n", "|
10500 | \n", "-15482.711195 | \n", "2019-10-28 10:14:48.096 | \n", "|
11000 | \n", "-15190.359782 | \n", "2019-10-28 10:14:49.376 | \n", "|
11500 | \n", "-15776.667896 | \n", "2019-10-28 10:14:50.523 | \n", "|
12000 | \n", "-15112.798387 | \n", "2019-10-28 10:14:51.327 | \n", "|
12500 | \n", "-15539.267169 | \n", "2019-10-28 10:14:52.194 | \n", "|
13000 | \n", "-15209.937930 | \n", "2019-10-28 10:14:52.907 | \n", "|
13500 | \n", "-14917.833732 | \n", "2019-10-28 10:14:53.804 | \n", "|
14000 | \n", "-14822.068093 | \n", "2019-10-28 10:14:54.706 | \n", "|
14500 | \n", "-14996.772583 | \n", "2019-10-28 10:14:55.742 | \n", "|
15000 | \n", "-14818.384023 | \n", "2019-10-28 10:14:57.001 | \n", "|
15500 | \n", "-14698.864982 | \n", "2019-10-28 10:14:58.250 | \n", "|
16000 | \n", "-14689.841559 | \n", "2019-10-28 10:14:59.546 | \n", "|
16500 | \n", "-14698.864982 | \n", "2019-10-28 10:15:00.651 | \n", "|
17000 | \n", "-14681.308608 | \n", "2019-10-28 10:15:01.702 | \n", "|
17500 | \n", "-14681.308608 | \n", "2019-10-28 10:15:02.920 | \n", "|
18000 | \n", "-14681.308608 | \n", "2019-10-28 10:15:04.255 | \n", "|
18500 | \n", "-14681.308608 | \n", "2019-10-28 10:15:05.523 | \n", "|
19000 | \n", "-14681.308608 | \n", "2019-10-28 10:15:06.791 | \n", "|
19500 | \n", "-14681.308608 | \n", "2019-10-28 10:15:08.055 | \n", "
400 rows × 2 columns
\n", "