Updated for challenge 9
[cipher-tools.git] / make-letter-frequency-treemap.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 32,
6 "metadata": {},
7 "outputs": [],
8 "source": [
9 "# libraries\n",
10 "import matplotlib.pyplot as plt\n",
11 "%matplotlib inline\n",
12 "\n",
13 "import squarify # pip install squarify (algorithm for treemap)\n",
14 "\n",
15 "from support.language_models import *\n",
16 "from support.utilities import *"
17 ]
18 },
19 {
20 "cell_type": "code",
21 "execution_count": 3,
22 "metadata": {},
23 "outputs": [
24 {
25 "data": {
26 "text/plain": [
27 "defaultdict(int,\n",
28 " {'a': 0.07822525209432887,\n",
29 " 'b': 0.014829998223636929,\n",
30 " 'c': 0.02251879345845122,\n",
31 " 'd': 0.042759915992231244,\n",
32 " 'e': 0.12099426536374505,\n",
33 " 'f': 0.02159693603704411,\n",
34 " 'g': 0.018815084434702378,\n",
35 " 'h': 0.06645305621431015,\n",
36 " 'i': 0.06723047441023709,\n",
37 " 'j': 0.0010659774441790274,\n",
38 " 'k': 0.00865805425839555,\n",
39 " 'l': 0.04134042154867259,\n",
40 " 'm': 0.027483193578407596,\n",
41 " 'n': 0.06693265828344594,\n",
42 " 'o': 0.08052207518149467,\n",
43 " 'p': 0.016070260346516884,\n",
44 " 'q': 0.0008776478463153873,\n",
45 " 'r': 0.059626906298523796,\n",
46 " 's': 0.06455443850567806,\n",
47 " 't': 0.08946868868814231,\n",
48 " 'u': 0.03036719004738724,\n",
49 " 'v': 0.010421489620086533,\n",
50 " 'w': 0.024603665947343364,\n",
51 " 'x': 0.0011832844394584982,\n",
52 " 'y': 0.022829377693572104,\n",
53 " 'z': 0.0005708940436934243})"
54 ]
55 },
56 "execution_count": 3,
57 "metadata": {},
58 "output_type": "execute_result"
59 }
60 ],
61 "source": [
62 "normalised_english_counts"
63 ]
64 },
65 {
66 "cell_type": "code",
67 "execution_count": 11,
68 "metadata": {
69 "scrolled": true
70 },
71 "outputs": [
72 {
73 "data": {
74 "text/plain": [
75 "(['e',\n",
76 " 't',\n",
77 " 'o',\n",
78 " 'a',\n",
79 " 'i',\n",
80 " 'n',\n",
81 " 'h',\n",
82 " 's',\n",
83 " 'r',\n",
84 " 'd',\n",
85 " 'l',\n",
86 " 'u',\n",
87 " 'm',\n",
88 " 'w',\n",
89 " 'y',\n",
90 " 'c',\n",
91 " 'f',\n",
92 " 'g',\n",
93 " 'p',\n",
94 " 'b',\n",
95 " 'v',\n",
96 " 'k',\n",
97 " 'x',\n",
98 " 'j',\n",
99 " 'q',\n",
100 " 'z'],\n",
101 " [0.12099426536374505,\n",
102 " 0.08946868868814231,\n",
103 " 0.08052207518149467,\n",
104 " 0.07822525209432887,\n",
105 " 0.06723047441023709,\n",
106 " 0.06693265828344594,\n",
107 " 0.06645305621431015,\n",
108 " 0.06455443850567806,\n",
109 " 0.059626906298523796,\n",
110 " 0.042759915992231244,\n",
111 " 0.04134042154867259,\n",
112 " 0.03036719004738724,\n",
113 " 0.027483193578407596,\n",
114 " 0.024603665947343364,\n",
115 " 0.022829377693572104,\n",
116 " 0.02251879345845122,\n",
117 " 0.02159693603704411,\n",
118 " 0.018815084434702378,\n",
119 " 0.016070260346516884,\n",
120 " 0.014829998223636929,\n",
121 " 0.010421489620086533,\n",
122 " 0.00865805425839555,\n",
123 " 0.0011832844394584982,\n",
124 " 0.0010659774441790274,\n",
125 " 0.0008776478463153873,\n",
126 " 0.0005708940436934243])"
127 ]
128 },
129 "execution_count": 11,
130 "metadata": {},
131 "output_type": "execute_result"
132 }
133 ],
134 "source": [
135 "ls = sorted(normalised_english_counts, key=normalised_english_counts.get, reverse=True)\n",
136 "cs = [normalised_english_counts[l] for l in ls]\n",
137 "ls, cs"
138 ]
139 },
140 {
141 "cell_type": "code",
142 "execution_count": 29,
143 "metadata": {},
144 "outputs": [
145 {
146 "data": {
147 "image/png": "iVBORw0KGgoAAAANSUhEUgAAARAAAAD8CAYAAAC/+/tYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAEmtJREFUeJzt3Xl0XNV9wPH7ZkazaGa0zMhaPFoteRV4N8aybDkhtDZgk5AEmrQhiYGEkMY5CeEUSGh7yglJ6xA4lCb0QJw2UEhJgQDxUnJivMgYvOANbEu2ZGuxdmk00kgzmu31j9SBQHDgjjRXo/f9/OXRkd79/WF9de9o9EbTdV0AgAyT6gEApC8CAkAaAQEgjYAAkEZAAEgjIACkERAA0ggIAGkEBIA0i+oBhBDizdbSl1XPoNoNO7+ueoSU27zqWWVr1w/NUrb2pZTb+8f1eq+sXyCEEOLswGtlHcMny4QQIqZHMmxm1+jKkpv3bz+7eX0y158UAQEwsao8NS1VnpqWhB7TXmt/akVZ9qLm8bguRxjAQI51b7ss1+7rK866vHs8rkdAAINo9h8oHosFHfPyrmocr2tyhAGmsNoXTwkhhHjm24cvO/u77lJvmTNwtuAXi4vmZA2uvXNu0scYdiDAFNf0el/2mb29PpvTEo2NxS3nDvQXnt7VPX08rs0OBJDU3xRwvPTNPVd8+Tfrd6ue5VKaXu/zLLjOd/76f7i8QQghfnX3kXnuafbweFybHQiQRjrPBB33X723TvUcFxEQIAl6XNd+/fVdi7Zc89Ka57+yc8lYMGpWPdN7Va7IGzi7v7dwbCRmGh2MmE/s6Cwfr2sTECAJwe5R14KbZp7fuG3DroxMS+zgE2+XpWrtjobhzAfW1q9u2Nef/UGf8/iB1WKndkMgNmfRhQfW7q/bfNOJ5dbZVZ3HO4vF4wdWJz0Dz4EASXDk2kIVq31+IYSYc215+7FfnqkQQozLi7QupfWtgPOp75xYctP91Ucrl+UO/bnP93z++rOez19/VgghWm69+1prSVFgPOYgIEAytD/zeAKEh2K2/9h0bNnND80/VL4gJzjxK34wjjBAEkIDY47z9R25QgjRsK3FV3i5d2Ci17RmmqPuPFuo8bUBj8zXa2ZzIvfGa3kpO6CaqyAzePSXjeVbrnlpzVgwal12S3XLRK9psmiJ23+2+NCxHd3Fu3/R4pvo9S6FIwwgyVuZHdq4fcMuFWs73Bnx27csPvDozYdW2J2W2PJP+8blb1s+KgKCtPPSE93Fr/xXb6UmhF5UYR++d0vVEdUzpUrRTFfovt+u2i2EEG6vLXbP1pV7L/X5ngMZ7/tYa0J75+NfTm4eAoK00vBm0LXt5z0zv//c7H3eQmvE3xN9/3cI/shQW0N2255fLZxz4117o8FBq56ImYJd59yuworhZK9NQJBWjuwayltYl9XpLbRGhBAiNz8jqnqmyS6rZHbAXTyrq2Xn09XDbQ0+Z35Zz3jEQwieRAUMoXjlpxpD/R25Ga6c4Mzrv35wvK7LDgRpZdGarL6HvnFu2UB3pMlTYI36e6IZMruQl3ctm4jxkvaNtTsm5LrR0YA1EYtaNFMikYhGzGabIz4e1yUgSCuzF7uC13wp/8x9NzbWmExC91XZh+5+vOqo6rkmu5adz8wvWPixhsjQgKN1z7NzK67+4lvjcV0CAiGEELquC6HrQjNN/lPthtsK2jfcVtCueo500X301WLNZNLz59dd0BNxcerZzbX+pqPe3MqFSd/BmYAYWORCl6PnwZ9fmVFc6I92dOfkf+vLb1h9BSHVcxnZC3dfPa7Xc4mYcOWvahf5q9pFR0wIIcQVq79TL4QQFx8ng4AYXNwfcHq/dMMRx/zZg6pnQfqZ/PtVTChTlitEPCCLgBicZs1Ifh8LwyIgAKTxHAggKdLZ5eh6/PHl1oKCQKS7OzsjzztcsHHjUZPdPi6vsUgHht+BPPKDwRnXreyou25lR92j/zxYoXqeVLL6CkPFm/9uUt9RfLKLDw66smpXni/9+/t2aVZbbPCV36bsloaTgaEDcqA+nP3braMlz2wvrH96W2H9jhdHyw7tD2epngvpw+RyhjKrq/1CCOFeurQ93NoqdZMfWY1Hnp3TcvqV8ouPzx5/flbz21tnpGp9YwfktbDnylX2LneWKZ6VbYqvqLN3vlEf9qqeC+lEu8SjiZdfsqSjv+vtP7xJ1ED36emFpcs6UrW+oQMCJCsRDDpGT57KFUKI4OHDPltZ2YTf0vDdcvIqh2LRkDU00mcL9DdnmS22aKY7f1zeNOrDMHRAltfaB97YGy4MDifMw0MJ8/494cLltfakX94L4zDn5ASH6veWt/7T/WsS4bA15+pPTPgtDd8rN392Z3froendbYenewurU7b7EMLgv4VZVmMPfOLazLa/WttVK4QQazdkti5dYf+zt8gHLtI0TS/8yleU3hGtoGTphTPH/mdBLBqyzl/5tddSubahAyKEEJvuyWnedE/OhL+PBzBRsjxlwXg8YsmwusIOp3cslWsbPiBQ5yd33Khu8bXJX8JaVBgque97k+LX4Fd84h4lcxj6ORAAySEgAKQREADSCAgAaTyJCkP65rptqkf4I4++cK0QQojOK9PrW5IdCABpBASANAICQBoBASCNgACQRkAASCMgAKSl1y+dpzCrK6J6BAWsqgdAktiBAJBGQJBWGk4+P7v5zI4/3D2/4e3n5rz7MVKLgCCtFJfWtPZ2nygWQghdT4j+3tPTfSU17arm2fpwU8U/frx+zWO3HlmkagaVeA4EacXpKgxZLPaof6ApKxIesmU6pwVs9qyoqnkOvthZdseWRa/nVzhTdiPjyYSAIO0UTF/S2tl+oCQSCdoKfUvaVM2xZdPxy4d6I87Hbju6fNG6gtb1d1adS+X6vS89P3Pk5IlikyNzzOx2h21F0wfz1m1I6e05OcIg7RT5lnUG/OfzR4M9OfmFC3tUzbHxkfknnDkZ4U1PLdmf6niMnmnIHm08VVT67bt3+267441IT3d2Kte/iB0I0o7ZnKG7s0v6LBZb1GQyqx5HiVDzWY+jalaXyWpLCKtNOGZUdauYgx0I0o6uJ0RwuCPXV1qj7PiC3yMgSCuBwVbX/t0/+Hh2TlmfO8s3onoeVRwzqgZCTWcKEpExUzw0ag41ny1QMQdHGKSV7JzSYM2a7+5UPYdqmTNnBxxVs7pbfvzDOnNm5pjVmzdssjtiqZ6DgABJuL9+1e9Ure39i2ua8q//TGMiHDa3/eShGntpeSDVMxAQIE11PfPk/NhAn1uPx03O6vntmZUzjRmQLz78LdUjqFcTVD0B0oxv41eVvievEDyJCiAJBASANAICQBoBASCNgACQRkAASCMgAKQREADSJsULyWBMtsNNytbeXjvjfR/TS4oUTPJ7M8SgGPtR+v1tIDsQANLYgRhUfCRs7njgySXxQNCuJ3Qtd33NmZx1V3aongvphYAY1PC+E9PM2c5wyfdvOyCEELGhEf4v4CPjCGNQ9krfcLixbVrXv70wN3jwlMeS5Uz5vSSQ/giIQdkrp4+Ubr5jj620YKj/v1+d0/PEb2aqngnph22rQUW6+m2WHHc0d33NBbPLEQ28+map6pmQfgiIQYXPtGf1/3LnPE3TdGE26fm3XHtc9UxIPwTEoLJWLejNWrVgt+o5kN54DgSANAICQBoBASCNgACQRkAASCMgAKQREADSCAgAaQQEgDReiYq0NBwbcLw5/L9X1OV+brcQQjSMvDEjrkct81y1jamaIRjqcbzZ/PRyl6NgcDjUleuy5w/6PAvbmrv3zI7Gw9bqkg1H8rIqBydi7aZ/311pyjAnKjbWnjv1wNbq0daBrCWPfWF/7+4Gb+f2t0rn//DTKXnbS3YgQBLC0WFnRf7KptXzvvlqKOJ3dfpP+FbMvn1fZWHdyebuvVUTtW7OotL+oZMdHiGEGDnXlx0fi5kTkZg2eKzNm109vX+i1n0vwwckNjps6d6/o0z1HEhPtgzXaK6rdFjTTCLT6hn2uMr7NE0T2Zm+4bHocOZErZu7sCQw2jqQHQ2ELJrFnHBV5fv9R1pzhk51eXKXlA1M1LrvRUBGgxmDJw+Wq54DH41JM+lCCO3i44SIm1XMoWmmxLseCJPJkhBCCE1oui4S2gd+YZJMVotu9TpH2587XOKeVeDPudw34D903jvWN+x0zykKTtS675sjVQtNVp27X5wbCw45G7d8f3X7jqfnqp4HH47D5B6L6mPWcGIkI67HTP3RCwWqZ0o195yigc6tx2fkLCzp91xR0d+z83RZZrEnoJkmrFvvY/gnUYvqrj/V8uIT7lkbv7tH9Sz48EyaWS+zV595ffDXqzJM9rDD5E7ZT93JImdByUDn1uMzPcsq/BaXLa5lmBNZ84pSdnwRQghN1/VUrvcnXf7th15WtXa4r9PR8uITV8y+5T6l98YYqzHc/38x4/Y21SP8EZXvCyOEUPK+MLuu+tH6ZL7e8EcYAPIMHxCzPTOWiEYNf5QDZBg+IBmu7Kgj3zfQ8LP763gSFfho+MkrhCi/4aspedUeMNUYfgcCQB47kEkiHlPyOiggKexAAEgjIACkERAA0ggIAGkEBIA0AgJAGgEBII2AAJBGQABIIyAApPFSdiijPzdh9xyWFPjIXxH+kW/8ln/YNX7X+rCuSu7L2YEAkEZAAEgjIACkERAA0ggIAGkEBIA0AgJAGgEBII2AAJBGQABIIyAGFWnrdrRu2lyneg6kNwICQBp/TGdkCV3revCp+ZHznR5zljNU9L1bDpoctoTqsZA+2IEYWGxgyJm9ruZ86b/etUtz2GJDOw8WqZ4J6YWAGJg5xz3qmDdjSAghbGVFg7Fe/2T7+3pMcpPiCGML6KpHUG5UwZqaxfTOccWk6XokoSkYA2mMHQimvJMP7ZzV+NO9M1TPIaN+273rVM9wKQQEgLRJcYRB6llLCkKlj9y1++Jj71+va1Y5z3g7/ciuqu49TSUZbtuYzesMuyvzBlXPNBWxA8GU03+4Nbv3tXO+lf/5N3uWPvzpA8Fz/TmqZ5qq2IFgyhk43ObxLivtsjhtcSGE8Cwu7lI901TFDgSANAKCKceztHSg/1BrYWw0YooOhc0DR9oLVM80VXGEwZTjXVwSmLai4sK+m5+sy3DbxlzlXp5AnSAEBFPSnE1rzs7ZtOas6jmSVXvNA9tVz3ApHGEASCMgAKQREADSCAgAaQQEgDQCAkAaAQEgjYAAkEZAAEgjIACkERAA0ggIAGkEBIA0AgJAGgEBII2AAJDGDYUmiZzsEdUjpITzsXdukB4SPoWTJMdf9f/fOrPUzqEaOxAA0ggIAGkEBIA0AgJAGgEBII2AAJBGQABIIyAApBEQANIICABpBASANAICQBoBASCNgACQRkAASCMgAKQZ/oZC3W/t9vU1vF6hJ+Imh2f64IyPfeG4ZjKrHgtIC4begYz0tLj8549Pn/vJO/dd9tl79whN07vf3lusei4gXRh6BzLYdjIvHOjJOfn8v6wSQohEPGa22F1jqucC0oWhAyKEEDmll7WVr7rptOo5gHRk6CNMdsm8vqELp4vGgn6rEEJER4cyQv4uh+q5gHRh6B2IK78sWDj/qoYz2396pS50TdNMiZLlnzzhyC0MqZ4NSAeGDogQQuTPq+3In1fboXoOIB0Z+ggDIDkEBIA0AgJAGgEBII2AYMo5/OqDK1XPYBQEBFPOko/duU/1DEZh+F/jGlXjXU8ujfpHHHosbvb+5YLmos/Vtqqeabzs+82961Ze98B21XPICvd1OlpeeHy5Pb94INx7wWNxuELln/3aQbPVnlA923uxAzGoins+daz6idv3zv3JrXv6XzlWEekfzlA9E94RDQaceYtXn5996327TFZ7zH/i9SLVM/0p7EAMqvPp+oqhN5sLhRAiNjjqCDX3OK1e96DqufB7FmfWqLOkakgIIezTpg9GAgOZqmf6UwiIAfn3nfYGT7blzX30ln3mTFv81N/+bEUiEuUmKJOIZjIl3vm3puuxhKZyng/CEcaA4sGwxZxpi5ozbfGRhg5XuLU3V/VMSE/sQAzIU1fd27fjaPmJLz66xpafHbSXTvOrngnpSdN1XfUMYuktP35Z9Qyq6Z/pVz1CSjgfy1E9wrjwV02Nn70nHvzW+mS+niMMAGkEBIA0AgJAGgEBII2AAJBGQABIIyAApBEQANIICABpBASANAICQBoBASCNgACQRkAASCMgAKQREADSCAgAaQQEgDQCAkAaAQEgjYAAkEZAAEgjIACkERAA0ggIAGkEBIA0AgJAGgEBIG1SvLk2gPTEDgSANAICQBoBASCNgACQRkAASCMgAKQREADSCAgAaQQEgDQCAkAaAQEgjYAAkEZAAEgjIACkERAA0ggIAGkEBIA0AgJAGgEBII2AAJBGQABIIyAApBEQANL+D7vRSmjps6oAAAAAAElFTkSuQmCC\n",
148 "text/plain": [
149 "<matplotlib.figure.Figure at 0x7f1224d9be10>"
150 ]
151 },
152 "metadata": {},
153 "output_type": "display_data"
154 }
155 ],
156 "source": [
157 "# If you have 2 lists\n",
158 "plt.rcParams[\"figure.figsize\"] = (4,4)\n",
159 "squarify.plot(sizes=cs, label=ls, alpha=.7 )\n",
160 "plt.axis('off')\n",
161 "plt.savefig('letter-treemap.png', bbox_inches='tight')\n",
162 "plt.show()"
163 ]
164 },
165 {
166 "cell_type": "code",
167 "execution_count": 42,
168 "metadata": {},
169 "outputs": [
170 {
171 "data": {
172 "text/plain": [
173 "'treattlpis'"
174 ]
175 },
176 "execution_count": 42,
177 "metadata": {},
178 "output_type": "execute_result"
179 }
180 ],
181 "source": [
182 "cat(random_english_letter() for _ in range(10))"
183 ]
184 },
185 {
186 "cell_type": "code",
187 "execution_count": 46,
188 "metadata": {},
189 "outputs": [
190 {
191 "data": {
192 "text/plain": [
193 "'lbycjleuqz'"
194 ]
195 },
196 "execution_count": 46,
197 "metadata": {},
198 "output_type": "execute_result"
199 }
200 ],
201 "source": [
202 "import random\n",
203 "import string\n",
204 "\n",
205 "cat(random.choices(string.ascii_lowercase, k=10))"
206 ]
207 },
208 {
209 "cell_type": "code",
210 "execution_count": null,
211 "metadata": {},
212 "outputs": [],
213 "source": []
214 }
215 ],
216 "metadata": {
217 "kernelspec": {
218 "display_name": "Python 3",
219 "language": "python",
220 "name": "python3"
221 },
222 "language_info": {
223 "codemirror_mode": {
224 "name": "ipython",
225 "version": 3
226 },
227 "file_extension": ".py",
228 "mimetype": "text/x-python",
229 "name": "python",
230 "nbconvert_exporter": "python",
231 "pygments_lexer": "ipython3",
232 "version": "3.6.3"
233 }
234 },
235 "nbformat": 4,
236 "nbformat_minor": 2
237 }