Updated for challenge 9
[cipher-tools.git] / hillclimbing-results / hillclimbing-examples.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 125,
6 "metadata": {},
7 "outputs": [],
8 "source": [
9 "import string\n",
10 "import random\n",
11 "import itertools\n",
12 "from cipher.keyword_cipher import *\n",
13 "from support.utilities import *"
14 ]
15 },
16 {
17 "cell_type": "code",
18 "execution_count": 11,
19 "metadata": {},
20 "outputs": [
21 {
22 "data": {
23 "text/plain": [
24 "'gpavtdyzocqnrsujmxikwbehlf'"
25 ]
26 },
27 "execution_count": 11,
28 "metadata": {},
29 "output_type": "execute_result"
30 }
31 ],
32 "source": [
33 "pt = \"catch the cat\"\n",
34 "\n",
35 "ca = list(string.ascii_lowercase)\n",
36 "random.shuffle(ca)\n",
37 "ca = cat(ca)\n",
38 "ca"
39 ]
40 },
41 {
42 "cell_type": "code",
43 "execution_count": 18,
44 "metadata": {},
45 "outputs": [
46 {
47 "data": {
48 "text/plain": [
49 "'agkaz kzt agk'"
50 ]
51 },
52 "execution_count": 18,
53 "metadata": {},
54 "output_type": "execute_result"
55 }
56 ],
57 "source": [
58 "ct = keyword_encipher(pt, ca)\n",
59 "ct"
60 ]
61 },
62 {
63 "cell_type": "code",
64 "execution_count": 104,
65 "metadata": {},
66 "outputs": [],
67 "source": [
68 "def show_mapping_alpha(c_a, p_a=string.ascii_lowercase, letters=string.ascii_lowercase):\n",
69 " mapping = {p: c for (p, c) in zip(p_a, c_a) if p in letters}\n",
70 " return show_mapping(mapping)\n",
71 "\n",
72 "def show_mapping(mapping):\n",
73 " retval = '| plaintext letter | ' + ' | '.join(l for l in sorted(mapping)) + ' |\\n'\n",
74 " retval += '|-------------------|---|---|---|---|---|\\n'\n",
75 " retval += '| ciphertext letter | ' + ' | '.join(mapping[l] for l in sorted(mapping)) + ' |\\n'\n",
76 " return retval"
77 ]
78 },
79 {
80 "cell_type": "code",
81 "execution_count": 105,
82 "metadata": {},
83 "outputs": [
84 {
85 "name": "stdout",
86 "output_type": "stream",
87 "text": [
88 "| plaintext letter | a | c | e | h | t |\n",
89 "|-------------------|---|---|---|---|---|\n",
90 "| ciphertext letter | g | a | t | z | k |\n",
91 "\n"
92 ]
93 }
94 ],
95 "source": [
96 "print(show_mapping_alpha(ca, letters=sanitise(pt)))"
97 ]
98 },
99 {
100 "cell_type": "code",
101 "execution_count": 17,
102 "metadata": {},
103 "outputs": [
104 {
105 "data": {
106 "text/plain": [
107 "({'a': 'g', 'c': 'a', 'e': 't', 'h': 'z', 't': 'k'},\n",
108 " {'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'})"
109 ]
110 },
111 "execution_count": 17,
112 "metadata": {},
113 "output_type": "execute_result"
114 }
115 ],
116 "source": [
117 "m0 = {p: c for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n",
118 "im0 = {c: p for (p, c) in zip(string.ascii_letters, ca) if p in pt}\n",
119 "m0, im0"
120 ]
121 },
122 {
123 "cell_type": "code",
124 "execution_count": 55,
125 "metadata": {},
126 "outputs": [
127 {
128 "name": "stdout",
129 "output_type": "stream",
130 "text": [
131 "| a | c | e | h | t |\n",
132 "| g | a | t | z | k |\n",
133 "\n"
134 ]
135 }
136 ],
137 "source": [
138 "print(show_mapping(m0))"
139 ]
140 },
141 {
142 "cell_type": "code",
143 "execution_count": 61,
144 "metadata": {},
145 "outputs": [],
146 "source": [
147 "def apply_inverse_map(ciphertext, mapping):\n",
148 " plaintext = cat(mapping[l] if l in mapping else l for l in ciphertext)\n",
149 " return plaintext, Pbigrams(sanitise(plaintext))"
150 ]
151 },
152 {
153 "cell_type": "code",
154 "execution_count": 44,
155 "metadata": {},
156 "outputs": [],
157 "source": [
158 "def swap(letters, i, j):\n",
159 " if i > j:\n",
160 " i, j = j, i\n",
161 " if i == j:\n",
162 " return letters\n",
163 " else:\n",
164 " return (letters[:i] + letters[j] + letters[i+1:j] + letters[i] +\n",
165 " letters[j+1:])"
166 ]
167 },
168 {
169 "cell_type": "code",
170 "execution_count": 50,
171 "metadata": {},
172 "outputs": [],
173 "source": [
174 "def map_swap(mapping):\n",
175 " keys = sorted(mapping)\n",
176 " values = cat(mapping[l] for l in keys)\n",
177 " n = len(keys)\n",
178 " swapped_values = swap(values, random.randrange(n), random.randrange(n))\n",
179 " return {k: sv for (k, sv) in zip(keys, swapped_values)}"
180 ]
181 },
182 {
183 "cell_type": "code",
184 "execution_count": 89,
185 "metadata": {},
186 "outputs": [],
187 "source": [
188 "im1 = map_swap(im0)\n",
189 "im2 = map_swap(im1)"
190 ]
191 },
192 {
193 "cell_type": "code",
194 "execution_count": 90,
195 "metadata": {},
196 "outputs": [
197 {
198 "data": {
199 "text/plain": [
200 "('aceah eht ace', -24.470656262279007)"
201 ]
202 },
203 "execution_count": 90,
204 "metadata": {},
205 "output_type": "execute_result"
206 }
207 ],
208 "source": [
209 "apply_inverse_map(ct, im2)"
210 ]
211 },
212 {
213 "cell_type": "code",
214 "execution_count": 91,
215 "metadata": {},
216 "outputs": [
217 {
218 "data": {
219 "text/plain": [
220 "('actah the act', -23.337953804339712)"
221 ]
222 },
223 "execution_count": 91,
224 "metadata": {},
225 "output_type": "execute_result"
226 }
227 ],
228 "source": [
229 "apply_inverse_map(ct, im1)"
230 ]
231 },
232 {
233 "cell_type": "code",
234 "execution_count": 88,
235 "metadata": {},
236 "outputs": [
237 {
238 "data": {
239 "text/plain": [
240 "('catch the cat', -22.142275954584633)"
241 ]
242 },
243 "execution_count": 88,
244 "metadata": {},
245 "output_type": "execute_result"
246 }
247 ],
248 "source": [
249 "apply_inverse_map(ct, im0)"
250 ]
251 },
252 {
253 "cell_type": "code",
254 "execution_count": 92,
255 "metadata": {},
256 "outputs": [
257 {
258 "data": {
259 "text/plain": [
260 "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
261 " {'a': 'a', 'g': 'c', 'k': 't', 't': 'e', 'z': 'h'},\n",
262 " {'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'})"
263 ]
264 },
265 "execution_count": 92,
266 "metadata": {},
267 "output_type": "execute_result"
268 }
269 ],
270 "source": [
271 "im0, im1, im2"
272 ]
273 },
274 {
275 "cell_type": "code",
276 "execution_count": 93,
277 "metadata": {},
278 "outputs": [],
279 "source": [
280 "m1 = {im1[l]: l for l in im1}\n",
281 "m2 = {im2[l]: l for l in im2}"
282 ]
283 },
284 {
285 "cell_type": "code",
286 "execution_count": 106,
287 "metadata": {},
288 "outputs": [
289 {
290 "name": "stdout",
291 "output_type": "stream",
292 "text": [
293 "| plaintext letter | a | c | e | h | t |\n",
294 "|-------------------|---|---|---|---|---|\n",
295 "| ciphertext letter | g | a | t | z | k |\n",
296 "\n"
297 ]
298 }
299 ],
300 "source": [
301 "print(show_mapping(m0))"
302 ]
303 },
304 {
305 "cell_type": "code",
306 "execution_count": 107,
307 "metadata": {},
308 "outputs": [
309 {
310 "name": "stdout",
311 "output_type": "stream",
312 "text": [
313 "| plaintext letter | a | c | e | h | t |\n",
314 "|-------------------|---|---|---|---|---|\n",
315 "| ciphertext letter | a | g | t | z | k |\n",
316 "\n"
317 ]
318 }
319 ],
320 "source": [
321 "print(show_mapping(m1))"
322 ]
323 },
324 {
325 "cell_type": "code",
326 "execution_count": 108,
327 "metadata": {},
328 "outputs": [
329 {
330 "name": "stdout",
331 "output_type": "stream",
332 "text": [
333 "| plaintext letter | a | c | e | h | t |\n",
334 "|-------------------|---|---|---|---|---|\n",
335 "| ciphertext letter | a | g | k | z | t |\n",
336 "\n"
337 ]
338 }
339 ],
340 "source": [
341 "print(show_mapping(m2))"
342 ]
343 },
344 {
345 "cell_type": "code",
346 "execution_count": 110,
347 "metadata": {},
348 "outputs": [
349 {
350 "data": {
351 "text/plain": [
352 "('hceha eat hce', -26.41716766077668)"
353 ]
354 },
355 "execution_count": 110,
356 "metadata": {},
357 "output_type": "execute_result"
358 }
359 ],
360 "source": [
361 "im3 = map_swap(im2)\n",
362 "apply_inverse_map(ct, im3)"
363 ]
364 },
365 {
366 "cell_type": "code",
367 "execution_count": 111,
368 "metadata": {},
369 "outputs": [],
370 "source": [
371 "m3 = {im3[l]: l for l in im3}"
372 ]
373 },
374 {
375 "cell_type": "code",
376 "execution_count": 112,
377 "metadata": {},
378 "outputs": [
379 {
380 "name": "stdout",
381 "output_type": "stream",
382 "text": [
383 "| plaintext letter | a | c | e | h | t |\n",
384 "|-------------------|---|---|---|---|---|\n",
385 "| ciphertext letter | z | g | k | a | t |\n",
386 "\n"
387 ]
388 }
389 ],
390 "source": [
391 "print(show_mapping(m3))"
392 ]
393 },
394 {
395 "cell_type": "code",
396 "execution_count": 113,
397 "metadata": {},
398 "outputs": [],
399 "source": [
400 "def all_swaps(mapping):\n",
401 " keys = sorted(mapping)\n",
402 " values = cat(mapping[l] for l in keys)\n",
403 " n = len(keys)\n",
404 " swapped_values = [swap(values, i, j) for i in range(n) for j in range(n) if i < j]\n",
405 " return [{k: sv for (k, sv) in zip(keys, svs)} for svs in swapped_values]"
406 ]
407 },
408 {
409 "cell_type": "code",
410 "execution_count": 117,
411 "metadata": {},
412 "outputs": [
413 {
414 "data": {
415 "text/plain": [
416 "({'g': 'a', 'a': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
417 " [('actah the act', -23.337953804339712),\n",
418 " ('tacth che tac', -22.992889593694795),\n",
419 " ('eateh thc eat', -23.337174988961543),\n",
420 " ('hathc tce hat', -24.20565798548872),\n",
421 " ('ctach ahe cta', -23.361982341471602),\n",
422 " ('cetch tha cet', -23.152196785128968),\n",
423 " ('chtca tae cht', -25.47053856384374),\n",
424 " ('caech eht cae', -27.119008761052356),\n",
425 " ('cahct hte cah', -25.96020844569102),\n",
426 " ('catce teh cat', -24.369461369323975)])"
427 ]
428 },
429 "execution_count": 117,
430 "metadata": {},
431 "output_type": "execute_result"
432 }
433 ],
434 "source": [
435 "im0, [apply_inverse_map(ct, tim) for tim in all_swaps(im0)]"
436 ]
437 },
438 {
439 "cell_type": "code",
440 "execution_count": 118,
441 "metadata": {},
442 "outputs": [
443 {
444 "data": {
445 "text/plain": [
446 "({'a': 'a', 'g': 'c', 'k': 'e', 't': 't', 'z': 'h'},\n",
447 " [('caech eht cae', -27.119008761052356),\n",
448 " ('ecaeh aht eca', -26.10317913928645),\n",
449 " ('tceth eha tce', -23.289877585658743),\n",
450 " ('hceha eat hce', -26.41716766077668),\n",
451 " ('aecah cht aec', -28.466074945814817),\n",
452 " ('ateah ehc ate', -23.89678491033435),\n",
453 " ('aheac ect ahe', -23.82052347276842),\n",
454 " ('actah the act', -23.337953804339712),\n",
455 " ('achae het ach', -24.4061387567535),\n",
456 " ('aceat eth ace', -21.139211036323402)])"
457 ]
458 },
459 "execution_count": 118,
460 "metadata": {},
461 "output_type": "execute_result"
462 }
463 ],
464 "source": [
465 "im2, [apply_inverse_map(ct, tim) for tim in all_swaps(im2)]"
466 ]
467 },
468 {
469 "cell_type": "code",
470 "execution_count": 119,
471 "metadata": {},
472 "outputs": [],
473 "source": [
474 "def all_swaps_worse(mapping):\n",
475 " _, score0 = apply_inverse_map(ct, mapping)\n",
476 " swapped_mappings = all_swaps(mapping)\n",
477 " scores = [apply_inverse_map(ct, m)[1] for m in swapped_mappings]\n",
478 " better_scores = [s for s in scores if s > score0]\n",
479 " return better_scores == []"
480 ]
481 },
482 {
483 "cell_type": "code",
484 "execution_count": 123,
485 "metadata": {},
486 "outputs": [
487 {
488 "data": {
489 "text/plain": [
490 "False"
491 ]
492 },
493 "execution_count": 123,
494 "metadata": {},
495 "output_type": "execute_result"
496 }
497 ],
498 "source": [
499 "all_swaps_worse(im3)"
500 ]
501 },
502 {
503 "cell_type": "code",
504 "execution_count": 124,
505 "metadata": {},
506 "outputs": [],
507 "source": [
508 "def make_map(als, bls):\n",
509 " return {a: b for (a, b) in zip(als, bls)}"
510 ]
511 },
512 {
513 "cell_type": "code",
514 "execution_count": 129,
515 "metadata": {},
516 "outputs": [
517 {
518 "data": {
519 "text/plain": [
520 "('aceht', 'agktz')"
521 ]
522 },
523 "execution_count": 129,
524 "metadata": {},
525 "output_type": "execute_result"
526 }
527 ],
528 "source": [
529 "ptls = cat(sorted(deduplicate(sanitise(pt))))\n",
530 "ctls = cat(sorted(deduplicate(sanitise(ct))))\n",
531 "ptls, ctls"
532 ]
533 },
534 {
535 "cell_type": "code",
536 "execution_count": 132,
537 "metadata": {},
538 "outputs": [
539 {
540 "data": {
541 "text/plain": [
542 "(120,\n",
543 " [{'a': 'a', 'g': 'c', 'k': 'e', 't': 'h', 'z': 't'},\n",
544 " {'a': 'a', 'g': 'c', 'k': 'e', 'z': 'h', 't': 't'},\n",
545 " {'a': 'a', 'g': 'c', 't': 'e', 'k': 'h', 'z': 't'},\n",
546 " {'a': 'a', 'g': 'c', 't': 'e', 'z': 'h', 'k': 't'},\n",
547 " {'a': 'a', 'g': 'c', 'z': 'e', 'k': 'h', 't': 't'}])"
548 ]
549 },
550 "execution_count": 132,
551 "metadata": {},
552 "output_type": "execute_result"
553 }
554 ],
555 "source": [
556 "all_maps = [make_map(c, ptls) for c in itertools.permutations(ctls)]\n",
557 "len(all_maps), all_maps[:5]"
558 ]
559 },
560 {
561 "cell_type": "code",
562 "execution_count": 138,
563 "metadata": {},
564 "outputs": [
565 {
566 "data": {
567 "text/plain": [
568 "[{'a': 'a', 'z': 'c', 'k': 'e', 't': 'h', 'g': 't'},\n",
569 " {'g': 'a', 't': 'c', 'z': 'e', 'a': 'h', 'k': 't'},\n",
570 " {'g': 'a', 'z': 'c', 'a': 'e', 't': 'h', 'k': 't'},\n",
571 " {'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'},\n",
572 " {'t': 'a', 'z': 'c', 'k': 'e', 'g': 'h', 'a': 't'},\n",
573 " {'z': 'a', 't': 'c', 'a': 'e', 'k': 'h', 'g': 't'}]"
574 ]
575 },
576 "execution_count": 138,
577 "metadata": {},
578 "output_type": "execute_result"
579 }
580 ],
581 "source": [
582 "local_optima = [m for m in all_maps if all_swaps_worse(m) if m != im0]\n",
583 "local_optima"
584 ]
585 },
586 {
587 "cell_type": "code",
588 "execution_count": 143,
589 "metadata": {},
590 "outputs": [
591 {
592 "data": {
593 "text/plain": [
594 "(('tecth cha tec', -22.37718617528681),\n",
595 " [('etceh cha etc', -27.45222919076422),\n",
596 " ('cetch tha cet', -23.152196785128968),\n",
597 " ('aecah cht aec', -28.466074945814817),\n",
598 " ('hecht cta hec', -24.0528877258752),\n",
599 " ('tceth eha tce', -23.289877585658743),\n",
600 " ('tacth che tac', -22.992889593694795),\n",
601 " ('thcte cea thc', -23.37530629522044),\n",
602 " ('teath ahc tea', -23.192822966291835),\n",
603 " ('tehtc hca teh', -25.824045558109102),\n",
604 " ('tecta cah tec', -23.630623398955464)])"
605 ]
606 },
607 "execution_count": 143,
608 "metadata": {},
609 "output_type": "execute_result"
610 }
611 ],
612 "source": [
613 "apply_inverse_map(ct, local_optima[3]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[3])]"
614 ]
615 },
616 {
617 "cell_type": "code",
618 "execution_count": 145,
619 "metadata": {},
620 "outputs": [
621 {
622 "data": {
623 "text/plain": [
624 "(('ethea hac eth', -21.831799648932474),\n",
625 " [('tehta hac teh', -25.2630658238322),\n",
626 " ('hteha eac hte', -25.12161519433393),\n",
627 " ('cthca hae cth', -25.56645047924706),\n",
628 " ('athae hec ath', -22.523920547555058),\n",
629 " ('ehtea tac eht', -24.414224893001006),\n",
630 " ('echea hat ech', -22.34614937355321),\n",
631 " ('eahet htc eah', -24.64789885786501),\n",
632 " ('etcea cah etc', -24.40643936994998),\n",
633 " ('etaeh ahc eta', -27.042650227267693),\n",
634 " ('ethec hca eth', -23.70218668022281)])"
635 ]
636 },
637 "execution_count": 145,
638 "metadata": {},
639 "output_type": "execute_result"
640 }
641 ],
642 "source": [
643 "apply_inverse_map(ct, local_optima[5]), [apply_inverse_map(ct, tim) for tim in all_swaps(local_optima[5])]"
644 ]
645 },
646 {
647 "cell_type": "code",
648 "execution_count": 140,
649 "metadata": {},
650 "outputs": [
651 {
652 "data": {
653 "text/plain": [
654 "('catch the cat', -22.142275954584633)"
655 ]
656 },
657 "execution_count": 140,
658 "metadata": {},
659 "output_type": "execute_result"
660 }
661 ],
662 "source": [
663 "apply_inverse_map(ct, im0)"
664 ]
665 },
666 {
667 "cell_type": "code",
668 "execution_count": 146,
669 "metadata": {},
670 "outputs": [
671 {
672 "data": {
673 "text/plain": [
674 "{'t': 'a', 'k': 'c', 'g': 'e', 'z': 'h', 'a': 't'}"
675 ]
676 },
677 "execution_count": 146,
678 "metadata": {},
679 "output_type": "execute_result"
680 }
681 ],
682 "source": [
683 "local_optima[3]"
684 ]
685 },
686 {
687 "cell_type": "code",
688 "execution_count": 147,
689 "metadata": {},
690 "outputs": [
691 {
692 "name": "stdout",
693 "output_type": "stream",
694 "text": [
695 "| plaintext letter | a | c | e | h | t |\n",
696 "|-------------------|---|---|---|---|---|\n",
697 "| ciphertext letter | t | k | g | z | a |\n",
698 "\n"
699 ]
700 }
701 ],
702 "source": [
703 "l3 = {local_optima[3][l]: l for l in local_optima[3]}\n",
704 "print(show_mapping(l3))"
705 ]
706 },
707 {
708 "cell_type": "code",
709 "execution_count": 150,
710 "metadata": {},
711 "outputs": [
712 {
713 "data": {
714 "text/plain": [
715 "'etoainhsrdlumwycfgpbvkxjqz'"
716 ]
717 },
718 "execution_count": 150,
719 "metadata": {},
720 "output_type": "execute_result"
721 }
722 ],
723 "source": [
724 "cat(p[0] for p in english_counts.most_common())"
725 ]
726 },
727 {
728 "cell_type": "code",
729 "execution_count": null,
730 "metadata": {},
731 "outputs": [],
732 "source": []
733 }
734 ],
735 "metadata": {
736 "kernelspec": {
737 "display_name": "Python 3",
738 "language": "python",
739 "name": "python3"
740 },
741 "language_info": {
742 "codemirror_mode": {
743 "name": "ipython",
744 "version": 3
745 },
746 "file_extension": ".py",
747 "mimetype": "text/x-python",
748 "name": "python",
749 "nbconvert_exporter": "python",
750 "pygments_lexer": "ipython3",
751 "version": "3.6.8"
752 }
753 },
754 "nbformat": 4,
755 "nbformat_minor": 2
756 }