+ "outputs": [],
+ "source": [
+ "logger.setLevel(logging.DEBUG)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def commonest_alphabet(text):\n",
+ " counts = collections.Counter(sanitise(text))\n",
+ " letters = cat(p[0] for p in counts.most_common())\n",
+ " return cat(deduplicate(letters + string.ascii_lowercase))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def random_ciphertext(message_length):\n",
+ " sample_start = random.randint(0, corpus_length - message_length)\n",
+ " sample = corpus[sample_start:(sample_start + message_length)]\n",
+ " key = list(string.ascii_lowercase)\n",
+ " random.shuffle(key)\n",
+ " key = cat(key)\n",
+ " ciphertext = keyword_encipher(sample, key)\n",
+ " return key, ciphertext"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def log_parse(text, verbose=False):\n",
+ " parts = text.split(' - ')\n",
+ " dt = datetime.strptime(parts[0], \"%Y-%m-%d %H:%M:%S,%f\")\n",
+ " blurb = parts[-1]\n",
+ " worker = int(re.search('worker (\\d+)', blurb).group(1))\n",
+ " iteration = int(re.search('iteration (\\d+)', blurb).group(1))\n",
+ " fitness = float(re.search('fitness (-?\\d+\\.\\d+)', blurb).group(1))\n",
+ " if verbose:\n",
+ " ca = re.search('current alphabet (\\w+)', blurb).group(1)\n",
+ " pa = re.search('plain alphabet (\\w+)', blurb).group(1)\n",
+ " return {'time': dt, 'worker': worker, 'iteration': iteration, 'fitness': fitness, \n",
+ " 'cipher_alphabet': ca, 'plain_alphabet': pa}\n",
+ " else:\n",
+ " return {'time': dt, 'worker': worker, 'iteration': iteration, 'fitness': fitness}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ps = [log_parse(line, verbose=True) for line in open('cipher.log').readlines()[:10]]\n",
+ "# df = pd.DataFrame(ps)\n",
+ "# df = df.set_index(['worker', 'iteration']).sort_index()\n",
+ "# df[['fitness', 'plain_alphabet', 'cipher_alphabet']].to_csv('test.csv', header=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def dump_result(starttime, filename, verbose=False):\n",
+ " parsed = [log_parse(line, verbose=verbose) for line in open('cipher.log')]\n",
+ " trace = pd.DataFrame([p for p in parsed if p['time'] > starttime])\n",
+ " trace = trace.set_index(['worker', 'iteration']).sort_index()\n",
+ " workers = list(sorted(set(trace.index.get_level_values(0))))\n",
+ " if verbose:\n",
+ " trace[['fitness', 'plain_alphabet', 'cipher_alphabet']].to_csv(filename, header=True)\n",
+ " else:\n",
+ " trace.fitness.to_csv(filename, header=True)\n",
+ " return workers, trace"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},