Updated for imported data format
[covid19.git] / data_import.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {
6 "Collapsed": "false"
7 },
8 "source": [
9 "Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)"
10 ]
11 },
12 {
13 "cell_type": "code",
14 "execution_count": 314,
15 "metadata": {
16 "Collapsed": "false"
17 },
18 "outputs": [],
19 "source": [
20 "from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float\n",
21 "from sqlalchemy import create_engine"
22 ]
23 },
24 {
25 "cell_type": "code",
26 "execution_count": 315,
27 "metadata": {
28 "Collapsed": "false"
29 },
30 "outputs": [],
31 "source": [
32 "import itertools\n",
33 "import collections\n",
34 "import json\n",
35 "import pandas as pd\n",
36 "import numpy as np\n",
37 "from scipy.stats import gmean\n",
38 "import datetime\n",
39 "\n",
40 "import matplotlib as mpl\n",
41 "import matplotlib.pyplot as plt\n",
42 "%matplotlib inline"
43 ]
44 },
45 {
46 "cell_type": "code",
47 "execution_count": 316,
48 "metadata": {
49 "Collapsed": "false"
50 },
51 "outputs": [
52 {
53 "name": "stdout",
54 "output_type": "stream",
55 "text": [
56 "The sql extension is already loaded. To reload it, use:\n",
57 " %reload_ext sql\n"
58 ]
59 }
60 ],
61 "source": [
62 "%load_ext sql"
63 ]
64 },
65 {
66 "cell_type": "code",
67 "execution_count": 317,
68 "metadata": {
69 "Collapsed": "false"
70 },
71 "outputs": [],
72 "source": [
73 "connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'"
74 ]
75 },
76 {
77 "cell_type": "code",
78 "execution_count": 318,
79 "metadata": {
80 "Collapsed": "false"
81 },
82 "outputs": [
83 {
84 "data": {
85 "text/plain": [
86 "'Connected: covid@covid'"
87 ]
88 },
89 "execution_count": 318,
90 "metadata": {},
91 "output_type": "execute_result"
92 }
93 ],
94 "source": [
95 "%sql $connection_string"
96 ]
97 },
98 {
99 "cell_type": "code",
100 "execution_count": 319,
101 "metadata": {
102 "Collapsed": "false"
103 },
104 "outputs": [],
105 "source": [
106 "eng = create_engine(connection_string)\n",
107 "engine = eng.execution_options(isolation_level=\"AUTOCOMMIT\")"
108 ]
109 },
110 {
111 "cell_type": "code",
112 "execution_count": 320,
113 "metadata": {
114 "Collapsed": "false"
115 },
116 "outputs": [],
117 "source": [
118 "DEATH_COUNT_THRESHOLD = 10\n",
119 "COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()\n",
120 "COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()\n",
121 "COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()\n",
122 "# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()\n",
123 "\n",
124 "COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA\n",
125 " 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',\n",
126 " 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',\n",
127 " 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']\n",
128 "COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))\n",
129 "COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))"
130 ]
131 },
132 {
133 "cell_type": "code",
134 "execution_count": 321,
135 "metadata": {
136 "Collapsed": "false"
137 },
138 "outputs": [
139 {
140 "name": "stdout",
141 "output_type": "stream",
142 "text": [
143 " % Total % Received % Xferd Average Speed Time Time Time Current\n",
144 " Dload Upload Total Spent Left Speed\n",
145 "100 623k 100 623k 0 0 927k 0 --:--:-- --:--:-- --:--:-- 925k\n"
146 ]
147 }
148 ],
149 "source": [
150 "!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv"
151 ]
152 },
153 {
154 "cell_type": "code",
155 "execution_count": 322,
156 "metadata": {
157 "Collapsed": "false"
158 },
159 "outputs": [],
160 "source": [
161 "# First col is a date, treat geoId of NA as 'Namibia', not \"NA\" value\n",
162 "raw_data = pd.read_csv('covid.csv', \n",
163 " parse_dates=[0], dayfirst=True,\n",
164 " keep_default_na=False, na_values = [''],\n",
165 "# dtype = {'day': np.int64, \n",
166 "# 'month': np.int64, \n",
167 "# 'year': np.int64, \n",
168 "# 'cases': np.int64, \n",
169 "# 'deaths': np.int64, \n",
170 "# 'countriesAndTerritories': str, \n",
171 "# 'geoId': str, \n",
172 "# 'countryterritoryCode': str, \n",
173 "# 'popData2019': np.int64, \n",
174 "# 'continentExp': str, \n",
175 "# }\n",
176 " )"
177 ]
178 },
179 {
180 "cell_type": "code",
181 "execution_count": 323,
182 "metadata": {
183 "Collapsed": "false"
184 },
185 "outputs": [
186 {
187 "data": {
188 "text/plain": [
189 "100050"
190 ]
191 },
192 "execution_count": 323,
193 "metadata": {},
194 "output_type": "execute_result"
195 }
196 ],
197 "source": [
198 "raw_data.size"
199 ]
200 },
201 {
202 "cell_type": "code",
203 "execution_count": 324,
204 "metadata": {
205 "Collapsed": "false"
206 },
207 "outputs": [],
208 "source": [
209 "raw_data.fillna(0, inplace=True)"
210 ]
211 },
212 {
213 "cell_type": "code",
214 "execution_count": 325,
215 "metadata": {
216 "Collapsed": "false"
217 },
218 "outputs": [
219 {
220 "data": {
221 "text/html": [
222 "<div>\n",
223 "<style scoped>\n",
224 " .dataframe tbody tr th:only-of-type {\n",
225 " vertical-align: middle;\n",
226 " }\n",
227 "\n",
228 " .dataframe tbody tr th {\n",
229 " vertical-align: top;\n",
230 " }\n",
231 "\n",
232 " .dataframe thead th {\n",
233 " text-align: right;\n",
234 " }\n",
235 "</style>\n",
236 "<table border=\"1\" class=\"dataframe\">\n",
237 " <thead>\n",
238 " <tr style=\"text-align: right;\">\n",
239 " <th></th>\n",
240 " <th>dateRep</th>\n",
241 " <th>year_week</th>\n",
242 " <th>cases_weekly</th>\n",
243 " <th>deaths_weekly</th>\n",
244 " <th>countriesAndTerritories</th>\n",
245 " <th>geoId</th>\n",
246 " <th>countryterritoryCode</th>\n",
247 " <th>popData2019</th>\n",
248 " <th>continentExp</th>\n",
249 " <th>notification_rate_per_100000_population_14-days</th>\n",
250 " </tr>\n",
251 " </thead>\n",
252 " <tbody>\n",
253 " <tr>\n",
254 " <th>0</th>\n",
255 " <td>2021-01-18</td>\n",
256 " <td>2021-02</td>\n",
257 " <td>557</td>\n",
258 " <td>45</td>\n",
259 " <td>Afghanistan</td>\n",
260 " <td>AF</td>\n",
261 " <td>AFG</td>\n",
262 " <td>38041757.0</td>\n",
263 " <td>Asia</td>\n",
264 " <td>3.24</td>\n",
265 " </tr>\n",
266 " <tr>\n",
267 " <th>1</th>\n",
268 " <td>2021-01-11</td>\n",
269 " <td>2021-01</td>\n",
270 " <td>675</td>\n",
271 " <td>71</td>\n",
272 " <td>Afghanistan</td>\n",
273 " <td>AF</td>\n",
274 " <td>AFG</td>\n",
275 " <td>38041757.0</td>\n",
276 " <td>Asia</td>\n",
277 " <td>4.15</td>\n",
278 " </tr>\n",
279 " <tr>\n",
280 " <th>2</th>\n",
281 " <td>2021-01-04</td>\n",
282 " <td>2020-53</td>\n",
283 " <td>902</td>\n",
284 " <td>60</td>\n",
285 " <td>Afghanistan</td>\n",
286 " <td>AF</td>\n",
287 " <td>AFG</td>\n",
288 " <td>38041757.0</td>\n",
289 " <td>Asia</td>\n",
290 " <td>7.61</td>\n",
291 " </tr>\n",
292 " <tr>\n",
293 " <th>3</th>\n",
294 " <td>2020-12-28</td>\n",
295 " <td>2020-52</td>\n",
296 " <td>1994</td>\n",
297 " <td>88</td>\n",
298 " <td>Afghanistan</td>\n",
299 " <td>AF</td>\n",
300 " <td>AFG</td>\n",
301 " <td>38041757.0</td>\n",
302 " <td>Asia</td>\n",
303 " <td>7.19</td>\n",
304 " </tr>\n",
305 " <tr>\n",
306 " <th>4</th>\n",
307 " <td>2020-12-21</td>\n",
308 " <td>2020-51</td>\n",
309 " <td>740</td>\n",
310 " <td>111</td>\n",
311 " <td>Afghanistan</td>\n",
312 " <td>AF</td>\n",
313 " <td>AFG</td>\n",
314 " <td>38041757.0</td>\n",
315 " <td>Asia</td>\n",
316 " <td>6.56</td>\n",
317 " </tr>\n",
318 " </tbody>\n",
319 "</table>\n",
320 "</div>"
321 ],
322 "text/plain": [
323 " dateRep year_week cases_weekly deaths_weekly countriesAndTerritories \\\n",
324 "0 2021-01-18 2021-02 557 45 Afghanistan \n",
325 "1 2021-01-11 2021-01 675 71 Afghanistan \n",
326 "2 2021-01-04 2020-53 902 60 Afghanistan \n",
327 "3 2020-12-28 2020-52 1994 88 Afghanistan \n",
328 "4 2020-12-21 2020-51 740 111 Afghanistan \n",
329 "\n",
330 " geoId countryterritoryCode popData2019 continentExp \\\n",
331 "0 AF AFG 38041757.0 Asia \n",
332 "1 AF AFG 38041757.0 Asia \n",
333 "2 AF AFG 38041757.0 Asia \n",
334 "3 AF AFG 38041757.0 Asia \n",
335 "4 AF AFG 38041757.0 Asia \n",
336 "\n",
337 " notification_rate_per_100000_population_14-days \n",
338 "0 3.24 \n",
339 "1 4.15 \n",
340 "2 7.61 \n",
341 "3 7.19 \n",
342 "4 6.56 "
343 ]
344 },
345 "execution_count": 325,
346 "metadata": {},
347 "output_type": "execute_result"
348 }
349 ],
350 "source": [
351 "raw_data.head()"
352 ]
353 },
354 {
355 "cell_type": "code",
356 "execution_count": 326,
357 "metadata": {
358 "Collapsed": "false"
359 },
360 "outputs": [],
361 "source": [
362 "raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',\n",
363 " 'countriesAndTerritories': 'country_name',\n",
364 " 'countryterritoryCode': 'country_territory_code',\n",
365 " 'popData2019': 'population_2019',\n",
366 " 'continentExp': 'continent'}, inplace=True)"
367 ]
368 },
369 {
370 "cell_type": "code",
371 "execution_count": 327,
372 "metadata": {
373 "Collapsed": "false"
374 },
375 "outputs": [
376 {
377 "data": {
378 "text/plain": [
379 "Index(['report_date', 'year_week', 'cases_weekly', 'deaths_weekly',\n",
380 " 'country_name', 'geo_id', 'country_territory_code', 'population_2019',\n",
381 " 'continent', 'notification_rate_per_100000_population_14-days'],\n",
382 " dtype='object')"
383 ]
384 },
385 "execution_count": 327,
386 "metadata": {},
387 "output_type": "execute_result"
388 }
389 ],
390 "source": [
391 "raw_data.columns"
392 ]
393 },
394 {
395 "cell_type": "code",
396 "execution_count": 328,
397 "metadata": {
398 "Collapsed": "false"
399 },
400 "outputs": [
401 {
402 "data": {
403 "text/plain": [
404 "report_date datetime64[ns]\n",
405 "year_week object\n",
406 "cases_weekly int64\n",
407 "deaths_weekly int64\n",
408 "country_name object\n",
409 "geo_id object\n",
410 "country_territory_code object\n",
411 "population_2019 float64\n",
412 "continent object\n",
413 "notification_rate_per_100000_population_14-days float64\n",
414 "dtype: object"
415 ]
416 },
417 "execution_count": 328,
418 "metadata": {},
419 "output_type": "execute_result"
420 }
421 ],
422 "source": [
423 "raw_data.dtypes"
424 ]
425 },
426 {
427 "cell_type": "code",
428 "execution_count": 329,
429 "metadata": {
430 "Collapsed": "false"
431 },
432 "outputs": [
433 {
434 "data": {
435 "text/html": [
436 "<div>\n",
437 "<style scoped>\n",
438 " .dataframe tbody tr th:only-of-type {\n",
439 " vertical-align: middle;\n",
440 " }\n",
441 "\n",
442 " .dataframe tbody tr th {\n",
443 " vertical-align: top;\n",
444 " }\n",
445 "\n",
446 " .dataframe thead th {\n",
447 " text-align: right;\n",
448 " }\n",
449 "</style>\n",
450 "<table border=\"1\" class=\"dataframe\">\n",
451 " <thead>\n",
452 " <tr style=\"text-align: right;\">\n",
453 " <th></th>\n",
454 " <th>report_date</th>\n",
455 " <th>cases_weekly</th>\n",
456 " <th>deaths_weekly</th>\n",
457 " <th>geo_id</th>\n",
458 " <th>notification_rate_per_100000_population_14-days</th>\n",
459 " </tr>\n",
460 " </thead>\n",
461 " <tbody>\n",
462 " <tr>\n",
463 " <th>0</th>\n",
464 " <td>2021-01-18</td>\n",
465 " <td>557</td>\n",
466 " <td>45</td>\n",
467 " <td>AF</td>\n",
468 " <td>3.24</td>\n",
469 " </tr>\n",
470 " <tr>\n",
471 " <th>1</th>\n",
472 " <td>2021-01-11</td>\n",
473 " <td>675</td>\n",
474 " <td>71</td>\n",
475 " <td>AF</td>\n",
476 " <td>4.15</td>\n",
477 " </tr>\n",
478 " <tr>\n",
479 " <th>2</th>\n",
480 " <td>2021-01-04</td>\n",
481 " <td>902</td>\n",
482 " <td>60</td>\n",
483 " <td>AF</td>\n",
484 " <td>7.61</td>\n",
485 " </tr>\n",
486 " <tr>\n",
487 " <th>3</th>\n",
488 " <td>2020-12-28</td>\n",
489 " <td>1994</td>\n",
490 " <td>88</td>\n",
491 " <td>AF</td>\n",
492 " <td>7.19</td>\n",
493 " </tr>\n",
494 " <tr>\n",
495 " <th>4</th>\n",
496 " <td>2020-12-21</td>\n",
497 " <td>740</td>\n",
498 " <td>111</td>\n",
499 " <td>AF</td>\n",
500 " <td>6.56</td>\n",
501 " </tr>\n",
502 " <tr>\n",
503 " <th>...</th>\n",
504 " <td>...</td>\n",
505 " <td>...</td>\n",
506 " <td>...</td>\n",
507 " <td>...</td>\n",
508 " <td>...</td>\n",
509 " </tr>\n",
510 " <tr>\n",
511 " <th>10000</th>\n",
512 " <td>2020-04-20</td>\n",
513 " <td>11</td>\n",
514 " <td>0</td>\n",
515 " <td>ZW</td>\n",
516 " <td>0.11</td>\n",
517 " </tr>\n",
518 " <tr>\n",
519 " <th>10001</th>\n",
520 " <td>2020-04-13</td>\n",
521 " <td>5</td>\n",
522 " <td>2</td>\n",
523 " <td>ZW</td>\n",
524 " <td>0.05</td>\n",
525 " </tr>\n",
526 " <tr>\n",
527 " <th>10002</th>\n",
528 " <td>2020-04-06</td>\n",
529 " <td>2</td>\n",
530 " <td>0</td>\n",
531 " <td>ZW</td>\n",
532 " <td>0.05</td>\n",
533 " </tr>\n",
534 " <tr>\n",
535 " <th>10003</th>\n",
536 " <td>2020-03-30</td>\n",
537 " <td>5</td>\n",
538 " <td>1</td>\n",
539 " <td>ZW</td>\n",
540 " <td>0.05</td>\n",
541 " </tr>\n",
542 " <tr>\n",
543 " <th>10004</th>\n",
544 " <td>2020-03-23</td>\n",
545 " <td>2</td>\n",
546 " <td>0</td>\n",
547 " <td>ZW</td>\n",
548 " <td>0.00</td>\n",
549 " </tr>\n",
550 " </tbody>\n",
551 "</table>\n",
552 "<p>10005 rows × 5 columns</p>\n",
553 "</div>"
554 ],
555 "text/plain": [
556 " report_date cases_weekly deaths_weekly geo_id \\\n",
557 "0 2021-01-18 557 45 AF \n",
558 "1 2021-01-11 675 71 AF \n",
559 "2 2021-01-04 902 60 AF \n",
560 "3 2020-12-28 1994 88 AF \n",
561 "4 2020-12-21 740 111 AF \n",
562 "... ... ... ... ... \n",
563 "10000 2020-04-20 11 0 ZW \n",
564 "10001 2020-04-13 5 2 ZW \n",
565 "10002 2020-04-06 2 0 ZW \n",
566 "10003 2020-03-30 5 1 ZW \n",
567 "10004 2020-03-23 2 0 ZW \n",
568 "\n",
569 " notification_rate_per_100000_population_14-days \n",
570 "0 3.24 \n",
571 "1 4.15 \n",
572 "2 7.61 \n",
573 "3 7.19 \n",
574 "4 6.56 \n",
575 "... ... \n",
576 "10000 0.11 \n",
577 "10001 0.05 \n",
578 "10002 0.05 \n",
579 "10003 0.05 \n",
580 "10004 0.00 \n",
581 "\n",
582 "[10005 rows x 5 columns]"
583 ]
584 },
585 "execution_count": 329,
586 "metadata": {},
587 "output_type": "execute_result"
588 }
589 ],
590 "source": [
591 "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]"
592 ]
593 },
594 {
595 "cell_type": "code",
596 "execution_count": 330,
597 "metadata": {
598 "Collapsed": "false"
599 },
600 "outputs": [
601 {
602 "data": {
603 "text/html": [
604 "<div>\n",
605 "<style scoped>\n",
606 " .dataframe tbody tr th:only-of-type {\n",
607 " vertical-align: middle;\n",
608 " }\n",
609 "\n",
610 " .dataframe tbody tr th {\n",
611 " vertical-align: top;\n",
612 " }\n",
613 "\n",
614 " .dataframe thead th {\n",
615 " text-align: right;\n",
616 " }\n",
617 "</style>\n",
618 "<table border=\"1\" class=\"dataframe\">\n",
619 " <thead>\n",
620 " <tr style=\"text-align: right;\">\n",
621 " <th></th>\n",
622 " <th>country_name</th>\n",
623 " <th>geo_id</th>\n",
624 " <th>country_territory_code</th>\n",
625 " <th>population_2019</th>\n",
626 " <th>continent</th>\n",
627 " </tr>\n",
628 " </thead>\n",
629 " <tbody>\n",
630 " <tr>\n",
631 " <th>0</th>\n",
632 " <td>Afghanistan</td>\n",
633 " <td>AF</td>\n",
634 " <td>AFG</td>\n",
635 " <td>38041757.0</td>\n",
636 " <td>Asia</td>\n",
637 " </tr>\n",
638 " <tr>\n",
639 " <th>1</th>\n",
640 " <td>Afghanistan</td>\n",
641 " <td>AF</td>\n",
642 " <td>AFG</td>\n",
643 " <td>38041757.0</td>\n",
644 " <td>Asia</td>\n",
645 " </tr>\n",
646 " <tr>\n",
647 " <th>2</th>\n",
648 " <td>Afghanistan</td>\n",
649 " <td>AF</td>\n",
650 " <td>AFG</td>\n",
651 " <td>38041757.0</td>\n",
652 " <td>Asia</td>\n",
653 " </tr>\n",
654 " <tr>\n",
655 " <th>3</th>\n",
656 " <td>Afghanistan</td>\n",
657 " <td>AF</td>\n",
658 " <td>AFG</td>\n",
659 " <td>38041757.0</td>\n",
660 " <td>Asia</td>\n",
661 " </tr>\n",
662 " <tr>\n",
663 " <th>4</th>\n",
664 " <td>Afghanistan</td>\n",
665 " <td>AF</td>\n",
666 " <td>AFG</td>\n",
667 " <td>38041757.0</td>\n",
668 " <td>Asia</td>\n",
669 " </tr>\n",
670 " <tr>\n",
671 " <th>...</th>\n",
672 " <td>...</td>\n",
673 " <td>...</td>\n",
674 " <td>...</td>\n",
675 " <td>...</td>\n",
676 " <td>...</td>\n",
677 " </tr>\n",
678 " <tr>\n",
679 " <th>10000</th>\n",
680 " <td>Zimbabwe</td>\n",
681 " <td>ZW</td>\n",
682 " <td>ZWE</td>\n",
683 " <td>14645473.0</td>\n",
684 " <td>Africa</td>\n",
685 " </tr>\n",
686 " <tr>\n",
687 " <th>10001</th>\n",
688 " <td>Zimbabwe</td>\n",
689 " <td>ZW</td>\n",
690 " <td>ZWE</td>\n",
691 " <td>14645473.0</td>\n",
692 " <td>Africa</td>\n",
693 " </tr>\n",
694 " <tr>\n",
695 " <th>10002</th>\n",
696 " <td>Zimbabwe</td>\n",
697 " <td>ZW</td>\n",
698 " <td>ZWE</td>\n",
699 " <td>14645473.0</td>\n",
700 " <td>Africa</td>\n",
701 " </tr>\n",
702 " <tr>\n",
703 " <th>10003</th>\n",
704 " <td>Zimbabwe</td>\n",
705 " <td>ZW</td>\n",
706 " <td>ZWE</td>\n",
707 " <td>14645473.0</td>\n",
708 " <td>Africa</td>\n",
709 " </tr>\n",
710 " <tr>\n",
711 " <th>10004</th>\n",
712 " <td>Zimbabwe</td>\n",
713 " <td>ZW</td>\n",
714 " <td>ZWE</td>\n",
715 " <td>14645473.0</td>\n",
716 " <td>Africa</td>\n",
717 " </tr>\n",
718 " </tbody>\n",
719 "</table>\n",
720 "<p>10005 rows × 5 columns</p>\n",
721 "</div>"
722 ],
723 "text/plain": [
724 " country_name geo_id country_territory_code population_2019 continent\n",
725 "0 Afghanistan AF AFG 38041757.0 Asia\n",
726 "1 Afghanistan AF AFG 38041757.0 Asia\n",
727 "2 Afghanistan AF AFG 38041757.0 Asia\n",
728 "3 Afghanistan AF AFG 38041757.0 Asia\n",
729 "4 Afghanistan AF AFG 38041757.0 Asia\n",
730 "... ... ... ... ... ...\n",
731 "10000 Zimbabwe ZW ZWE 14645473.0 Africa\n",
732 "10001 Zimbabwe ZW ZWE 14645473.0 Africa\n",
733 "10002 Zimbabwe ZW ZWE 14645473.0 Africa\n",
734 "10003 Zimbabwe ZW ZWE 14645473.0 Africa\n",
735 "10004 Zimbabwe ZW ZWE 14645473.0 Africa\n",
736 "\n",
737 "[10005 rows x 5 columns]"
738 ]
739 },
740 "execution_count": 330,
741 "metadata": {},
742 "output_type": "execute_result"
743 }
744 ],
745 "source": [
746 "raw_data[['country_name', 'geo_id', 'country_territory_code',\n",
747 " 'population_2019', 'continent']]"
748 ]
749 },
750 {
751 "cell_type": "code",
752 "execution_count": 331,
753 "metadata": {
754 "Collapsed": "false"
755 },
756 "outputs": [],
757 "source": [
758 "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(\n",
759 " 'weekly_cases',\n",
760 " engine,\n",
761 " if_exists='replace',\n",
762 " index=False,\n",
763 " chunksize=500,\n",
764 " dtype={\n",
765 " \"report_date\": Date,\n",
766 " \"cases_weekly\": Integer,\n",
767 " \"deaths_weekly\": Integer,\n",
768 " \"geo_id\": String,\n",
769 " \"notification_rate_per_100000_population_14-days\": Float\n",
770 " }\n",
771 ")"
772 ]
773 },
774 {
775 "cell_type": "code",
776 "execution_count": 332,
777 "metadata": {
778 "Collapsed": "false"
779 },
780 "outputs": [],
781 "source": [
782 "raw_data[['country_name', 'geo_id', 'country_territory_code',\n",
783 " 'population_2019', 'continent']].drop_duplicates().to_sql(\n",
784 " 'countries',\n",
785 " engine,\n",
786 " if_exists='replace',\n",
787 " index=False,\n",
788 " chunksize=500,\n",
789 " dtype={\n",
790 " \"country_name\": Text,\n",
791 " \"geo_id\": String,\n",
792 " \"country_territory_code\": String,\n",
793 " \"population_2019\": Integer,\n",
794 " \"continent\": Text\n",
795 " }\n",
796 ")"
797 ]
798 },
799 {
800 "cell_type": "code",
801 "execution_count": 333,
802 "metadata": {
803 "Collapsed": "false",
804 "scrolled": true
805 },
806 "outputs": [],
807 "source": [
808 "# %sql select geo_id from weekly_cases limit 10"
809 ]
810 },
811 {
812 "cell_type": "code",
813 "execution_count": 334,
814 "metadata": {
815 "Collapsed": "false"
816 },
817 "outputs": [],
818 "source": [
819 "# %%sql alter table weekly_cases add primary key (geo_id, report_date);\n",
820 "# alter table countries add primary key (geo_id);\n",
821 "# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);\n",
822 "# alter table weekly_cases add culm_cases integer;\n",
823 "# alter table weekly_cases add culm_deaths integer;"
824 ]
825 },
826 {
827 "cell_type": "code",
828 "execution_count": 335,
829 "metadata": {},
830 "outputs": [],
831 "source": [
832 "with engine.connect() as connection:\n",
833 " connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')\n",
834 " connection.execute('alter table countries add primary key (geo_id);')\n",
835 " connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')\n",
836 " connection.execute('alter table weekly_cases add culm_cases integer;')\n",
837 " connection.execute('alter table weekly_cases add culm_deaths integer;')"
838 ]
839 },
840 {
841 "cell_type": "code",
842 "execution_count": 336,
843 "metadata": {
844 "Collapsed": "false"
845 },
846 "outputs": [],
847 "source": [
848 "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10"
849 ]
850 },
851 {
852 "cell_type": "code",
853 "execution_count": 337,
854 "metadata": {
855 "Collapsed": "false"
856 },
857 "outputs": [],
858 "source": [
859 "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10"
860 ]
861 },
862 {
863 "cell_type": "code",
864 "execution_count": 338,
865 "metadata": {
866 "Collapsed": "false"
867 },
868 "outputs": [
869 {
870 "name": "stdout",
871 "output_type": "stream",
872 "text": [
873 " * postgresql://covid:***@localhost/covid\n",
874 "10 rows affected.\n"
875 ]
876 },
877 {
878 "data": {
879 "text/html": [
880 "<table>\n",
881 " <thead>\n",
882 " <tr>\n",
883 " <th>report_date</th>\n",
884 " <th>cases_weekly</th>\n",
885 " <th>deaths_weekly</th>\n",
886 " <th>geo_id</th>\n",
887 " <th>notification_rate_per_100000_population_14-days</th>\n",
888 " <th>culm_cases</th>\n",
889 " <th>culm_deaths</th>\n",
890 " </tr>\n",
891 " </thead>\n",
892 " <tbody>\n",
893 " <tr>\n",
894 " <td>2021-01-18</td>\n",
895 " <td>557</td>\n",
896 " <td>45</td>\n",
897 " <td>AF</td>\n",
898 " <td>3.24</td>\n",
899 " <td>None</td>\n",
900 " <td>None</td>\n",
901 " </tr>\n",
902 " <tr>\n",
903 " <td>2021-01-11</td>\n",
904 " <td>675</td>\n",
905 " <td>71</td>\n",
906 " <td>AF</td>\n",
907 " <td>4.15</td>\n",
908 " <td>None</td>\n",
909 " <td>None</td>\n",
910 " </tr>\n",
911 " <tr>\n",
912 " <td>2021-01-04</td>\n",
913 " <td>902</td>\n",
914 " <td>60</td>\n",
915 " <td>AF</td>\n",
916 " <td>7.61</td>\n",
917 " <td>None</td>\n",
918 " <td>None</td>\n",
919 " </tr>\n",
920 " <tr>\n",
921 " <td>2020-12-28</td>\n",
922 " <td>1994</td>\n",
923 " <td>88</td>\n",
924 " <td>AF</td>\n",
925 " <td>7.19</td>\n",
926 " <td>None</td>\n",
927 " <td>None</td>\n",
928 " </tr>\n",
929 " <tr>\n",
930 " <td>2020-12-21</td>\n",
931 " <td>740</td>\n",
932 " <td>111</td>\n",
933 " <td>AF</td>\n",
934 " <td>6.56</td>\n",
935 " <td>None</td>\n",
936 " <td>None</td>\n",
937 " </tr>\n",
938 " <tr>\n",
939 " <td>2020-12-14</td>\n",
940 " <td>1757</td>\n",
941 " <td>71</td>\n",
942 " <td>AF</td>\n",
943 " <td>9.01</td>\n",
944 " <td>None</td>\n",
945 " <td>None</td>\n",
946 " </tr>\n",
947 " <tr>\n",
948 " <td>2020-12-07</td>\n",
949 " <td>1672</td>\n",
950 " <td>137</td>\n",
951 " <td>AF</td>\n",
952 " <td>7.22</td>\n",
953 " <td>None</td>\n",
954 " <td>None</td>\n",
955 " </tr>\n",
956 " <tr>\n",
957 " <td>2020-11-30</td>\n",
958 " <td>1073</td>\n",
959 " <td>68</td>\n",
960 " <td>AF</td>\n",
961 " <td>6.42</td>\n",
962 " <td>None</td>\n",
963 " <td>None</td>\n",
964 " </tr>\n",
965 " <tr>\n",
966 " <td>2020-11-23</td>\n",
967 " <td>1368</td>\n",
968 " <td>69</td>\n",
969 " <td>AF</td>\n",
970 " <td>6.66</td>\n",
971 " <td>None</td>\n",
972 " <td>None</td>\n",
973 " </tr>\n",
974 " <tr>\n",
975 " <td>2020-11-16</td>\n",
976 " <td>1164</td>\n",
977 " <td>61</td>\n",
978 " <td>AF</td>\n",
979 " <td>4.65</td>\n",
980 " <td>None</td>\n",
981 " <td>None</td>\n",
982 " </tr>\n",
983 " </tbody>\n",
984 "</table>"
985 ],
986 "text/plain": [
987 "[(datetime.date(2021, 1, 18), 557, 45, 'AF', 3.24, None, None),\n",
988 " (datetime.date(2021, 1, 11), 675, 71, 'AF', 4.15, None, None),\n",
989 " (datetime.date(2021, 1, 4), 902, 60, 'AF', 7.61, None, None),\n",
990 " (datetime.date(2020, 12, 28), 1994, 88, 'AF', 7.19, None, None),\n",
991 " (datetime.date(2020, 12, 21), 740, 111, 'AF', 6.56, None, None),\n",
992 " (datetime.date(2020, 12, 14), 1757, 71, 'AF', 9.01, None, None),\n",
993 " (datetime.date(2020, 12, 7), 1672, 137, 'AF', 7.22, None, None),\n",
994 " (datetime.date(2020, 11, 30), 1073, 68, 'AF', 6.42, None, None),\n",
995 " (datetime.date(2020, 11, 23), 1368, 69, 'AF', 6.66, None, None),\n",
996 " (datetime.date(2020, 11, 16), 1164, 61, 'AF', 4.65, None, None)]"
997 ]
998 },
999 "execution_count": 338,
1000 "metadata": {},
1001 "output_type": "execute_result"
1002 }
1003 ],
1004 "source": [
1005 "%sql select * from weekly_cases limit 10"
1006 ]
1007 },
1008 {
1009 "cell_type": "code",
1010 "execution_count": 339,
1011 "metadata": {
1012 "Collapsed": "false"
1013 },
1014 "outputs": [],
1015 "source": [
1016 "# %%sql\n",
1017 "# with culm as \n",
1018 "# (select report_date, geo_id,\n",
1019 "# sum(cases_weekly) over (partition by geo_id \n",
1020 "# order by report_date) as culm_data\n",
1021 "# from weekly_cases\n",
1022 "# )\n",
1023 "# update weekly_cases\n",
1024 "# set culm_cases = culm_data\n",
1025 "# from culm\n",
1026 "# where weekly_cases.report_date = culm.report_date and\n",
1027 "# weekly_cases.geo_id = culm.geo_id"
1028 ]
1029 },
1030 {
1031 "cell_type": "code",
1032 "execution_count": 340,
1033 "metadata": {
1034 "Collapsed": "false"
1035 },
1036 "outputs": [],
1037 "source": [
1038 "query_string = '''with culm as \n",
1039 " (select report_date, geo_id,\n",
1040 " sum(cases_weekly) over (partition by geo_id \n",
1041 " order by report_date) as culm_data\n",
1042 " from weekly_cases\n",
1043 " )\n",
1044 "update weekly_cases\n",
1045 " set culm_cases = culm_data\n",
1046 " from culm\n",
1047 " where weekly_cases.report_date = culm.report_date and\n",
1048 " weekly_cases.geo_id = culm.geo_id'''\n",
1049 "with engine.connect() as connection:\n",
1050 " connection.execute(query_string)"
1051 ]
1052 },
1053 {
1054 "cell_type": "code",
1055 "execution_count": 341,
1056 "metadata": {
1057 "Collapsed": "false"
1058 },
1059 "outputs": [],
1060 "source": [
1061 "# %%sql\n",
1062 "# with culm as \n",
1063 "# (select report_date, geo_id,\n",
1064 "# sum(deaths_weekly) over (partition by geo_id \n",
1065 "# order by report_date) as culm_data\n",
1066 "# from weekly_cases\n",
1067 "# )\n",
1068 "# update weekly_cases\n",
1069 "# set culm_deaths = culm_data\n",
1070 "# from culm\n",
1071 "# where weekly_cases.report_date = culm.report_date and\n",
1072 "# weekly_cases.geo_id = culm.geo_id"
1073 ]
1074 },
1075 {
1076 "cell_type": "code",
1077 "execution_count": 342,
1078 "metadata": {
1079 "Collapsed": "false"
1080 },
1081 "outputs": [],
1082 "source": [
1083 "query_string = '''with culm as \n",
1084 " (select report_date, geo_id,\n",
1085 " sum(deaths_weekly) over (partition by geo_id \n",
1086 " order by report_date) as culm_data\n",
1087 " from weekly_cases\n",
1088 " )\n",
1089 "update weekly_cases\n",
1090 " set culm_deaths = culm_data\n",
1091 " from culm\n",
1092 " where weekly_cases.report_date = culm.report_date and\n",
1093 " weekly_cases.geo_id = culm.geo_id'''\n",
1094 "with engine.connect() as connection:\n",
1095 " connection.execute(query_string)"
1096 ]
1097 },
1098 {
1099 "cell_type": "code",
1100 "execution_count": 343,
1101 "metadata": {
1102 "Collapsed": "false"
1103 },
1104 "outputs": [
1105 {
1106 "name": "stdout",
1107 "output_type": "stream",
1108 "text": [
1109 " % Total % Received % Xferd Average Speed Time Time Time Current\n",
1110 " Dload Upload Total Spent Left Speed\n",
1111 "100 26053 100 26053 0 0 54277 0 --:--:-- --:--:-- --:--:-- 54277\n"
1112 ]
1113 }
1114 ],
1115 "source": [
1116 "uk_query_string = (\n",
1117 "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
1118 "\"metric=covidOccupiedMVBeds&\"\n",
1119 "\"metric=newAdmissions&\"\n",
1120 "\"metric=newCasesBySpecimenDate&\"\n",
1121 "\"metric=hospitalCases&\"\n",
1122 "\"metric=newDeaths28DaysByPublishDate&\"\n",
1123 "\"format=csv\"\n",
1124 ")\n",
1125 "\n",
1126 "!curl \"$uk_query_string\" > uk_data.csv"
1127 ]
1128 },
1129 {
1130 "cell_type": "code",
1131 "execution_count": 344,
1132 "metadata": {
1133 "Collapsed": "false"
1134 },
1135 "outputs": [
1136 {
1137 "name": "stdout",
1138 "output_type": "stream",
1139 "text": [
1140 " % Total % Received % Xferd Average Speed Time Time Time Current\n",
1141 " Dload Upload Total Spent Left Speed\n",
1142 "100 20223 100 20223 0 0 78688 0 --:--:-- --:--:-- --:--:-- 78383\n"
1143 ]
1144 }
1145 ],
1146 "source": [
1147 "test_query_string = (\n",
1148 "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
1149 "\"metric=newPCRTestsByPublishDate&\"\n",
1150 "\"metric=newTestsByPublishDate&\"\n",
1151 "\"metric=newPillarOneTwoTestsByPublishDate&\"\n",
1152 "\"format=csv\"\n",
1153 ")\n",
1154 "!curl \"$test_query_string\" > test_data.csv"
1155 ]
1156 },
1157 {
1158 "cell_type": "code",
1159 "execution_count": 345,
1160 "metadata": {
1161 "Collapsed": "false"
1162 },
1163 "outputs": [],
1164 "source": [
1165 "# hospital_query_string = (\n",
1166 "# \"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
1167 "# \"metric=newAdmissions&\"\n",
1168 "# \"format=csv\"\n",
1169 "# )\n",
1170 "# !curl \"$hospital_query_string\" > hospital_admissions.csv"
1171 ]
1172 },
1173 {
1174 "cell_type": "code",
1175 "execution_count": 346,
1176 "metadata": {
1177 "Collapsed": "false"
1178 },
1179 "outputs": [],
1180 "source": [
1181 "# hospital_query_string = (\n",
1182 "# \"https://api.coronavirus.data.gov.uk/v1/data?\"\n",
1183 "# \"filters=areaName=United%2520Kingdom;areaType=overview&\"\n",
1184 "# \"structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,\"\n",
1185 "# \"%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv\"\n",
1186 "# )\n",
1187 " \n",
1188 "# !curl \"$hospital_query_string\" | gunzip > hospital_admissions.csv"
1189 ]
1190 },
1191 {
1192 "cell_type": "code",
1193 "execution_count": 347,
1194 "metadata": {
1195 "Collapsed": "false"
1196 },
1197 "outputs": [
1198 {
1199 "data": {
1200 "text/html": [
1201 "<div>\n",
1202 "<style scoped>\n",
1203 " .dataframe tbody tr th:only-of-type {\n",
1204 " vertical-align: middle;\n",
1205 " }\n",
1206 "\n",
1207 " .dataframe tbody tr th {\n",
1208 " vertical-align: top;\n",
1209 " }\n",
1210 "\n",
1211 " .dataframe thead th {\n",
1212 " text-align: right;\n",
1213 " }\n",
1214 "</style>\n",
1215 "<table border=\"1\" class=\"dataframe\">\n",
1216 " <thead>\n",
1217 " <tr style=\"text-align: right;\">\n",
1218 " <th></th>\n",
1219 " <th>date</th>\n",
1220 " <th>areaType</th>\n",
1221 " <th>areaCode</th>\n",
1222 " <th>areaName</th>\n",
1223 " <th>covidOccupiedMVBeds</th>\n",
1224 " <th>newAdmissions</th>\n",
1225 " <th>newCasesBySpecimenDate</th>\n",
1226 " <th>hospitalCases</th>\n",
1227 " <th>newDeaths28DaysByPublishDate</th>\n",
1228 " </tr>\n",
1229 " </thead>\n",
1230 " <tbody>\n",
1231 " <tr>\n",
1232 " <th>0</th>\n",
1233 " <td>2021-01-26</td>\n",
1234 " <td>overview</td>\n",
1235 " <td>K02000001</td>\n",
1236 " <td>United Kingdom</td>\n",
1237 " <td>NaN</td>\n",
1238 " <td>NaN</td>\n",
1239 " <td>NaN</td>\n",
1240 " <td>NaN</td>\n",
1241 " <td>1631</td>\n",
1242 " </tr>\n",
1243 " <tr>\n",
1244 " <th>1</th>\n",
1245 " <td>2021-01-25</td>\n",
1246 " <td>overview</td>\n",
1247 " <td>K02000001</td>\n",
1248 " <td>United Kingdom</td>\n",
1249 " <td>4032.0</td>\n",
1250 " <td>NaN</td>\n",
1251 " <td>4482.0</td>\n",
1252 " <td>NaN</td>\n",
1253 " <td>592</td>\n",
1254 " </tr>\n",
1255 " <tr>\n",
1256 " <th>2</th>\n",
1257 " <td>2021-01-24</td>\n",
1258 " <td>overview</td>\n",
1259 " <td>K02000001</td>\n",
1260 " <td>United Kingdom</td>\n",
1261 " <td>4077.0</td>\n",
1262 " <td>NaN</td>\n",
1263 " <td>14266.0</td>\n",
1264 " <td>37561.0</td>\n",
1265 " <td>610</td>\n",
1266 " </tr>\n",
1267 " <tr>\n",
1268 " <th>3</th>\n",
1269 " <td>2021-01-23</td>\n",
1270 " <td>overview</td>\n",
1271 " <td>K02000001</td>\n",
1272 " <td>United Kingdom</td>\n",
1273 " <td>4066.0</td>\n",
1274 " <td>NaN</td>\n",
1275 " <td>20495.0</td>\n",
1276 " <td>37266.0</td>\n",
1277 " <td>1348</td>\n",
1278 " </tr>\n",
1279 " <tr>\n",
1280 " <th>4</th>\n",
1281 " <td>2021-01-22</td>\n",
1282 " <td>overview</td>\n",
1283 " <td>K02000001</td>\n",
1284 " <td>United Kingdom</td>\n",
1285 " <td>4076.0</td>\n",
1286 " <td>3341.0</td>\n",
1287 " <td>29094.0</td>\n",
1288 " <td>38144.0</td>\n",
1289 " <td>1401</td>\n",
1290 " </tr>\n",
1291 " <tr>\n",
1292 " <th>...</th>\n",
1293 " <td>...</td>\n",
1294 " <td>...</td>\n",
1295 " <td>...</td>\n",
1296 " <td>...</td>\n",
1297 " <td>...</td>\n",
1298 " <td>...</td>\n",
1299 " <td>...</td>\n",
1300 " <td>...</td>\n",
1301 " <td>...</td>\n",
1302 " </tr>\n",
1303 " <tr>\n",
1304 " <th>385</th>\n",
1305 " <td>2020-01-06</td>\n",
1306 " <td>overview</td>\n",
1307 " <td>K02000001</td>\n",
1308 " <td>United Kingdom</td>\n",
1309 " <td>NaN</td>\n",
1310 " <td>NaN</td>\n",
1311 " <td>NaN</td>\n",
1312 " <td>NaN</td>\n",
1313 " <td>0</td>\n",
1314 " </tr>\n",
1315 " <tr>\n",
1316 " <th>386</th>\n",
1317 " <td>2020-01-05</td>\n",
1318 " <td>overview</td>\n",
1319 " <td>K02000001</td>\n",
1320 " <td>United Kingdom</td>\n",
1321 " <td>NaN</td>\n",
1322 " <td>NaN</td>\n",
1323 " <td>NaN</td>\n",
1324 " <td>NaN</td>\n",
1325 " <td>0</td>\n",
1326 " </tr>\n",
1327 " <tr>\n",
1328 " <th>387</th>\n",
1329 " <td>2020-01-04</td>\n",
1330 " <td>overview</td>\n",
1331 " <td>K02000001</td>\n",
1332 " <td>United Kingdom</td>\n",
1333 " <td>NaN</td>\n",
1334 " <td>NaN</td>\n",
1335 " <td>NaN</td>\n",
1336 " <td>NaN</td>\n",
1337 " <td>0</td>\n",
1338 " </tr>\n",
1339 " <tr>\n",
1340 " <th>388</th>\n",
1341 " <td>2020-01-03</td>\n",
1342 " <td>overview</td>\n",
1343 " <td>K02000001</td>\n",
1344 " <td>United Kingdom</td>\n",
1345 " <td>NaN</td>\n",
1346 " <td>NaN</td>\n",
1347 " <td>NaN</td>\n",
1348 " <td>NaN</td>\n",
1349 " <td>0</td>\n",
1350 " </tr>\n",
1351 " <tr>\n",
1352 " <th>389</th>\n",
1353 " <td>2020-06-14</td>\n",
1354 " <td>overview</td>\n",
1355 " <td>K02000001</td>\n",
1356 " <td>United Kingdom</td>\n",
1357 " <td>393.0</td>\n",
1358 " <td>364.0</td>\n",
1359 " <td>821.0</td>\n",
1360 " <td>4907.0</td>\n",
1361 " <td>27</td>\n",
1362 " </tr>\n",
1363 " </tbody>\n",
1364 "</table>\n",
1365 "<p>390 rows × 9 columns</p>\n",
1366 "</div>"
1367 ],
1368 "text/plain": [
1369 " date areaType areaCode areaName covidOccupiedMVBeds \\\n",
1370 "0 2021-01-26 overview K02000001 United Kingdom NaN \n",
1371 "1 2021-01-25 overview K02000001 United Kingdom 4032.0 \n",
1372 "2 2021-01-24 overview K02000001 United Kingdom 4077.0 \n",
1373 "3 2021-01-23 overview K02000001 United Kingdom 4066.0 \n",
1374 "4 2021-01-22 overview K02000001 United Kingdom 4076.0 \n",
1375 ".. ... ... ... ... ... \n",
1376 "385 2020-01-06 overview K02000001 United Kingdom NaN \n",
1377 "386 2020-01-05 overview K02000001 United Kingdom NaN \n",
1378 "387 2020-01-04 overview K02000001 United Kingdom NaN \n",
1379 "388 2020-01-03 overview K02000001 United Kingdom NaN \n",
1380 "389 2020-06-14 overview K02000001 United Kingdom 393.0 \n",
1381 "\n",
1382 " newAdmissions newCasesBySpecimenDate hospitalCases \\\n",
1383 "0 NaN NaN NaN \n",
1384 "1 NaN 4482.0 NaN \n",
1385 "2 NaN 14266.0 37561.0 \n",
1386 "3 NaN 20495.0 37266.0 \n",
1387 "4 3341.0 29094.0 38144.0 \n",
1388 ".. ... ... ... \n",
1389 "385 NaN NaN NaN \n",
1390 "386 NaN NaN NaN \n",
1391 "387 NaN NaN NaN \n",
1392 "388 NaN NaN NaN \n",
1393 "389 364.0 821.0 4907.0 \n",
1394 "\n",
1395 " newDeaths28DaysByPublishDate \n",
1396 "0 1631 \n",
1397 "1 592 \n",
1398 "2 610 \n",
1399 "3 1348 \n",
1400 "4 1401 \n",
1401 ".. ... \n",
1402 "385 0 \n",
1403 "386 0 \n",
1404 "387 0 \n",
1405 "388 0 \n",
1406 "389 27 \n",
1407 "\n",
1408 "[390 rows x 9 columns]"
1409 ]
1410 },
1411 "execution_count": 347,
1412 "metadata": {},
1413 "output_type": "execute_result"
1414 }
1415 ],
1416 "source": [
1417 "uk_data = pd.read_csv('uk_data.csv', \n",
1418 " parse_dates=[0], dayfirst=True)\n",
1419 "uk_data"
1420 ]
1421 },
1422 {
1423 "cell_type": "code",
1424 "execution_count": 348,
1425 "metadata": {
1426 "Collapsed": "false"
1427 },
1428 "outputs": [
1429 {
1430 "data": {
1431 "text/html": [
1432 "<div>\n",
1433 "<style scoped>\n",
1434 " .dataframe tbody tr th:only-of-type {\n",
1435 " vertical-align: middle;\n",
1436 " }\n",
1437 "\n",
1438 " .dataframe tbody tr th {\n",
1439 " vertical-align: top;\n",
1440 " }\n",
1441 "\n",
1442 " .dataframe thead th {\n",
1443 " text-align: right;\n",
1444 " }\n",
1445 "</style>\n",
1446 "<table border=\"1\" class=\"dataframe\">\n",
1447 " <thead>\n",
1448 " <tr style=\"text-align: right;\">\n",
1449 " <th></th>\n",
1450 " <th>date</th>\n",
1451 " <th>areaType</th>\n",
1452 " <th>areaCode</th>\n",
1453 " <th>areaName</th>\n",
1454 " <th>newPCRTestsByPublishDate</th>\n",
1455 " <th>newTestsByPublishDate</th>\n",
1456 " <th>newPillarOneTwoTestsByPublishDate</th>\n",
1457 " </tr>\n",
1458 " </thead>\n",
1459 " <tbody>\n",
1460 " <tr>\n",
1461 " <th>0</th>\n",
1462 " <td>2021-01-25</td>\n",
1463 " <td>overview</td>\n",
1464 " <td>K02000001</td>\n",
1465 " <td>United Kingdom</td>\n",
1466 " <td>196510.0</td>\n",
1467 " <td>546734</td>\n",
1468 " <td>531104</td>\n",
1469 " </tr>\n",
1470 " <tr>\n",
1471 " <th>1</th>\n",
1472 " <td>2021-01-24</td>\n",
1473 " <td>overview</td>\n",
1474 " <td>K02000001</td>\n",
1475 " <td>United Kingdom</td>\n",
1476 " <td>262111.0</td>\n",
1477 " <td>412775</td>\n",
1478 " <td>394479</td>\n",
1479 " </tr>\n",
1480 " <tr>\n",
1481 " <th>2</th>\n",
1482 " <td>2021-01-23</td>\n",
1483 " <td>overview</td>\n",
1484 " <td>K02000001</td>\n",
1485 " <td>United Kingdom</td>\n",
1486 " <td>389209.0</td>\n",
1487 " <td>486425</td>\n",
1488 " <td>465231</td>\n",
1489 " </tr>\n",
1490 " <tr>\n",
1491 " <th>3</th>\n",
1492 " <td>2021-01-22</td>\n",
1493 " <td>overview</td>\n",
1494 " <td>K02000001</td>\n",
1495 " <td>United Kingdom</td>\n",
1496 " <td>401075.0</td>\n",
1497 " <td>631901</td>\n",
1498 " <td>608829</td>\n",
1499 " </tr>\n",
1500 " <tr>\n",
1501 " <th>4</th>\n",
1502 " <td>2021-01-21</td>\n",
1503 " <td>overview</td>\n",
1504 " <td>K02000001</td>\n",
1505 " <td>United Kingdom</td>\n",
1506 " <td>439408.0</td>\n",
1507 " <td>668989</td>\n",
1508 " <td>644537</td>\n",
1509 " </tr>\n",
1510 " <tr>\n",
1511 " <th>...</th>\n",
1512 " <td>...</td>\n",
1513 " <td>...</td>\n",
1514 " <td>...</td>\n",
1515 " <td>...</td>\n",
1516 " <td>...</td>\n",
1517 " <td>...</td>\n",
1518 " <td>...</td>\n",
1519 " </tr>\n",
1520 " <tr>\n",
1521 " <th>296</th>\n",
1522 " <td>2020-04-03</td>\n",
1523 " <td>overview</td>\n",
1524 " <td>K02000001</td>\n",
1525 " <td>United Kingdom</td>\n",
1526 " <td>NaN</td>\n",
1527 " <td>14629</td>\n",
1528 " <td>14293</td>\n",
1529 " </tr>\n",
1530 " <tr>\n",
1531 " <th>297</th>\n",
1532 " <td>2020-04-02</td>\n",
1533 " <td>overview</td>\n",
1534 " <td>K02000001</td>\n",
1535 " <td>United Kingdom</td>\n",
1536 " <td>NaN</td>\n",
1537 " <td>13623</td>\n",
1538 " <td>13457</td>\n",
1539 " </tr>\n",
1540 " <tr>\n",
1541 " <th>298</th>\n",
1542 " <td>2020-04-01</td>\n",
1543 " <td>overview</td>\n",
1544 " <td>K02000001</td>\n",
1545 " <td>United Kingdom</td>\n",
1546 " <td>NaN</td>\n",
1547 " <td>11947</td>\n",
1548 " <td>11924</td>\n",
1549 " </tr>\n",
1550 " <tr>\n",
1551 " <th>299</th>\n",
1552 " <td>2020-03-31</td>\n",
1553 " <td>overview</td>\n",
1554 " <td>K02000001</td>\n",
1555 " <td>United Kingdom</td>\n",
1556 " <td>NaN</td>\n",
1557 " <td>11896</td>\n",
1558 " <td>11896</td>\n",
1559 " </tr>\n",
1560 " <tr>\n",
1561 " <th>300</th>\n",
1562 " <td>2020-06-14</td>\n",
1563 " <td>overview</td>\n",
1564 " <td>K02000001</td>\n",
1565 " <td>United Kingdom</td>\n",
1566 " <td>69529.0</td>\n",
1567 " <td>83965</td>\n",
1568 " <td>66688</td>\n",
1569 " </tr>\n",
1570 " </tbody>\n",
1571 "</table>\n",
1572 "<p>301 rows × 7 columns</p>\n",
1573 "</div>"
1574 ],
1575 "text/plain": [
1576 " date areaType areaCode areaName newPCRTestsByPublishDate \\\n",
1577 "0 2021-01-25 overview K02000001 United Kingdom 196510.0 \n",
1578 "1 2021-01-24 overview K02000001 United Kingdom 262111.0 \n",
1579 "2 2021-01-23 overview K02000001 United Kingdom 389209.0 \n",
1580 "3 2021-01-22 overview K02000001 United Kingdom 401075.0 \n",
1581 "4 2021-01-21 overview K02000001 United Kingdom 439408.0 \n",
1582 ".. ... ... ... ... ... \n",
1583 "296 2020-04-03 overview K02000001 United Kingdom NaN \n",
1584 "297 2020-04-02 overview K02000001 United Kingdom NaN \n",
1585 "298 2020-04-01 overview K02000001 United Kingdom NaN \n",
1586 "299 2020-03-31 overview K02000001 United Kingdom NaN \n",
1587 "300 2020-06-14 overview K02000001 United Kingdom 69529.0 \n",
1588 "\n",
1589 " newTestsByPublishDate newPillarOneTwoTestsByPublishDate \n",
1590 "0 546734 531104 \n",
1591 "1 412775 394479 \n",
1592 "2 486425 465231 \n",
1593 "3 631901 608829 \n",
1594 "4 668989 644537 \n",
1595 ".. ... ... \n",
1596 "296 14629 14293 \n",
1597 "297 13623 13457 \n",
1598 "298 11947 11924 \n",
1599 "299 11896 11896 \n",
1600 "300 83965 66688 \n",
1601 "\n",
1602 "[301 rows x 7 columns]"
1603 ]
1604 },
1605 "execution_count": 348,
1606 "metadata": {},
1607 "output_type": "execute_result"
1608 }
1609 ],
1610 "source": [
1611 "test_data = pd.read_csv('test_data.csv', \n",
1612 " parse_dates=[0], dayfirst=True)\n",
1613 "test_data"
1614 ]
1615 },
1616 {
1617 "cell_type": "code",
1618 "execution_count": 349,
1619 "metadata": {
1620 "Collapsed": "false"
1621 },
1622 "outputs": [
1623 {
1624 "data": {
1625 "text/plain": [
1626 "Index(['date', 'areaType', 'areaCode', 'areaName', 'newPCRTestsByPublishDate',\n",
1627 " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n",
1628 " dtype='object')"
1629 ]
1630 },
1631 "execution_count": 349,
1632 "metadata": {},
1633 "output_type": "execute_result"
1634 }
1635 ],
1636 "source": [
1637 "test_data.columns"
1638 ]
1639 },
1640 {
1641 "cell_type": "code",
1642 "execution_count": 350,
1643 "metadata": {
1644 "Collapsed": "false"
1645 },
1646 "outputs": [],
1647 "source": [
1648 "uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',\n",
1649 " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')"
1650 ]
1651 },
1652 {
1653 "cell_type": "code",
1654 "execution_count": 351,
1655 "metadata": {
1656 "Collapsed": "false"
1657 },
1658 "outputs": [
1659 {
1660 "data": {
1661 "text/plain": [
1662 "Index(['date', 'areaType', 'areaCode', 'areaName', 'covidOccupiedMVBeds',\n",
1663 " 'newAdmissions', 'newCasesBySpecimenDate', 'hospitalCases',\n",
1664 " 'newDeaths28DaysByPublishDate', 'newPCRTestsByPublishDate',\n",
1665 " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n",
1666 " dtype='object')"
1667 ]
1668 },
1669 "execution_count": 351,
1670 "metadata": {},
1671 "output_type": "execute_result"
1672 }
1673 ],
1674 "source": [
1675 "uk_data.columns"
1676 ]
1677 },
1678 {
1679 "cell_type": "code",
1680 "execution_count": 352,
1681 "metadata": {
1682 "Collapsed": "false"
1683 },
1684 "outputs": [],
1685 "source": [
1686 "uk_data.rename(\n",
1687 " columns={\n",
1688 " 'covidOccupiedMVBeds': 'ventilator_beds',\n",
1689 " 'newCasesBySpecimenDate': 'new_cases',\n",
1690 " 'hospitalCases': 'hospital_cases', \n",
1691 " 'newDeaths28DaysByPublishDate': 'new_deaths',\n",
1692 " 'newAdmissions': 'new_admissions',\n",
1693 " 'newPCRTestsByPublishDate': 'new_pcr_tests',\n",
1694 " 'newTestsByPublishDate': 'new_tests',\n",
1695 " 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'\n",
1696 " }, inplace=True)"
1697 ]
1698 },
1699 {
1700 "cell_type": "code",
1701 "execution_count": 353,
1702 "metadata": {
1703 "Collapsed": "false"
1704 },
1705 "outputs": [
1706 {
1707 "data": {
1708 "text/plain": [
1709 "date datetime64[ns]\n",
1710 "areaType object\n",
1711 "areaCode object\n",
1712 "areaName object\n",
1713 "ventilator_beds float64\n",
1714 "new_admissions float64\n",
1715 "new_cases float64\n",
1716 "hospital_cases float64\n",
1717 "new_deaths int64\n",
1718 "new_pcr_tests float64\n",
1719 "new_tests float64\n",
1720 "new_pillar_1_2_tests float64\n",
1721 "dtype: object"
1722 ]
1723 },
1724 "execution_count": 353,
1725 "metadata": {},
1726 "output_type": "execute_result"
1727 }
1728 ],
1729 "source": [
1730 "uk_data.dtypes"
1731 ]
1732 },
1733 {
1734 "cell_type": "code",
1735 "execution_count": 354,
1736 "metadata": {
1737 "Collapsed": "false"
1738 },
1739 "outputs": [
1740 {
1741 "data": {
1742 "text/plain": [
1743 "Index(['date', 'areaType', 'areaCode', 'areaName', 'ventilator_beds',\n",
1744 " 'new_admissions', 'new_cases', 'hospital_cases', 'new_deaths',\n",
1745 " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'],\n",
1746 " dtype='object')"
1747 ]
1748 },
1749 "execution_count": 354,
1750 "metadata": {},
1751 "output_type": "execute_result"
1752 }
1753 ],
1754 "source": [
1755 " uk_data.columns"
1756 ]
1757 },
1758 {
1759 "cell_type": "code",
1760 "execution_count": 355,
1761 "metadata": {
1762 "Collapsed": "false"
1763 },
1764 "outputs": [],
1765 "source": [
1766 "uk_data[['date', \n",
1767 " 'hospital_cases', 'ventilator_beds',\n",
1768 " 'new_cases', 'new_deaths', \n",
1769 " 'hospital_cases', 'new_admissions',\n",
1770 " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'\n",
1771 " ]].to_sql(\n",
1772 " 'uk_data',\n",
1773 " engine,\n",
1774 " if_exists='replace',\n",
1775 " index=False,\n",
1776 " chunksize=500,\n",
1777 " dtype={\n",
1778 " \"date\": Date,\n",
1779 " \"hospital_cases\": Integer,\n",
1780 " \"ventilator_beds\": Integer,\n",
1781 " \"new_cases\": Integer,\n",
1782 " \"hospital_cases\": Integer,\n",
1783 " \"new_deaths\": Integer,\n",
1784 " \"new_admissions\": Integer,\n",
1785 " 'new_pcr_tests': Integer, \n",
1786 " 'new_tests': Integer, \n",
1787 " 'new_pillar_1_2_tests': Integer\n",
1788 " }\n",
1789 ")"
1790 ]
1791 },
1792 {
1793 "cell_type": "code",
1794 "execution_count": 356,
1795 "metadata": {
1796 "Collapsed": "false"
1797 },
1798 "outputs": [],
1799 "source": [
1800 "# %sql select * from uk_data order by date desc limit 10"
1801 ]
1802 },
1803 {
1804 "cell_type": "code",
1805 "execution_count": 424,
1806 "metadata": {
1807 "Collapsed": "false"
1808 },
1809 "outputs": [],
1810 "source": [
1811 "query_string = '''drop table if exists uk_data_7;\n",
1812 "create table uk_data_7 \n",
1813 "(date date primary key,\n",
1814 " hospital_cases real,\n",
1815 " ventilator_beds real,\n",
1816 " new_cases real,\n",
1817 " new_deaths real,\n",
1818 " new_admissions real,\n",
1819 " new_pcr_tests real,\n",
1820 " new_tests real,\n",
1821 " new_pillar_1_2_tests real\n",
1822 ");'''\n",
1823 "\n",
1824 "with engine.connect() as connection:\n",
1825 " connection.execute(query_string)"
1826 ]
1827 },
1828 {
1829 "cell_type": "code",
1830 "execution_count": 425,
1831 "metadata": {},
1832 "outputs": [],
1833 "source": [
1834 "update_string = '''with ownd as (\n",
1835 " select date,\n",
1836 " avg(hospital_cases) over wnd as w_hospital_cases,\n",
1837 " avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
1838 " avg(new_cases) over wnd as w_new_cases,\n",
1839 " avg(new_deaths) over wnd as w_new_deaths,\n",
1840 " avg(new_admissions) over wnd as w_new_admissions,\n",
1841 " avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
1842 " avg(new_tests) over wnd as w_new_tests,\n",
1843 " avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
1844 " count(*) over wnd as w_size\n",
1845 " from uk_data\n",
1846 " window wnd as (\n",
1847 " order by uk_data.date\n",
1848 " rows between 3 preceding and 3 following\n",
1849 " )\n",
1850 ")\n",
1851 "insert into uk_data_7(date, \n",
1852 " hospital_cases, \n",
1853 " ventilator_beds, \n",
1854 " new_cases,\n",
1855 " new_deaths,\n",
1856 " new_admissions, \n",
1857 " new_pcr_tests, \n",
1858 " new_tests, \n",
1859 " new_pillar_1_2_tests\n",
1860 " )\n",
1861 "(select date,\n",
1862 " w_hospital_cases,\n",
1863 " w_ventilator_beds,\n",
1864 " w_new_cases,\n",
1865 " w_new_deaths,\n",
1866 " w_new_admissions,\n",
1867 " w_new_pcr_tests,\n",
1868 " w_new_tests,\n",
1869 " w_new_pillar_1_2_tests\n",
1870 " from ownd\n",
1871 " where w_size = 7\n",
1872 ")'''\n",
1873 "with engine.connect() as connection:\n",
1874 " connection.execute(update_string)"
1875 ]
1876 },
1877 {
1878 "cell_type": "code",
1879 "execution_count": 398,
1880 "metadata": {
1881 "Collapsed": "false"
1882 },
1883 "outputs": [],
1884 "source": [
1885 "# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)\n",
1886 "# values (\n",
1887 "# select date, \n",
1888 "# avg(ventilator_beds) over (order by date rows between 6 preceding and current row)\n",
1889 "# from uk_data\n",
1890 "# )"
1891 ]
1892 },
1893 {
1894 "cell_type": "code",
1895 "execution_count": 399,
1896 "metadata": {
1897 "Collapsed": "false"
1898 },
1899 "outputs": [],
1900 "source": [
1901 "# query_string = '''insert into uk_data_7(date, hospital_cases)\n",
1902 "# select uk_data.date, \n",
1903 "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases\n",
1904 "# from uk_data'''\n",
1905 "# with engine.connect() as connection:\n",
1906 "# connection.execute(query_string) "
1907 ]
1908 },
1909 {
1910 "cell_type": "code",
1911 "execution_count": 360,
1912 "metadata": {
1913 "Collapsed": "false"
1914 },
1915 "outputs": [],
1916 "source": [
1917 "# %%sql \n",
1918 "# with m7 as \n",
1919 "# (select uk_data.date as date7, \n",
1920 "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7\n",
1921 "# from uk_data\n",
1922 "# )\n",
1923 "# update uk_data_7\n",
1924 "# set ventilator_beds = nc7\n",
1925 "# from m7\n",
1926 "# where uk_data_7.date = m7.date7"
1927 ]
1928 },
1929 {
1930 "cell_type": "code",
1931 "execution_count": 361,
1932 "metadata": {
1933 "Collapsed": "false"
1934 },
1935 "outputs": [
1936 {
1937 "name": "stdout",
1938 "output_type": "stream",
1939 "text": [
1940 " * postgresql://covid:***@localhost/covid\n",
1941 "390 rows affected.\n"
1942 ]
1943 },
1944 {
1945 "data": {
1946 "text/plain": [
1947 "[]"
1948 ]
1949 },
1950 "execution_count": 361,
1951 "metadata": {},
1952 "output_type": "execute_result"
1953 }
1954 ],
1955 "source": [
1956 "# %%sql\n",
1957 "# with m7 as \n",
1958 "# (select uk_data.date as date7, \n",
1959 "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n",
1960 "# from uk_data\n",
1961 "# )\n",
1962 "# update uk_data_7\n",
1963 "# set ventilator_beds = nc7\n",
1964 "# from m7\n",
1965 "# where uk_data_7.date = m7.date7"
1966 ]
1967 },
1968 {
1969 "cell_type": "code",
1970 "execution_count": 362,
1971 "metadata": {
1972 "Collapsed": "false"
1973 },
1974 "outputs": [
1975 {
1976 "name": "stdout",
1977 "output_type": "stream",
1978 "text": [
1979 " * postgresql://covid:***@localhost/covid\n",
1980 "390 rows affected.\n"
1981 ]
1982 },
1983 {
1984 "data": {
1985 "text/plain": [
1986 "[]"
1987 ]
1988 },
1989 "execution_count": 362,
1990 "metadata": {},
1991 "output_type": "execute_result"
1992 }
1993 ],
1994 "source": [
1995 "# %%sql\n",
1996 "# with m7 as \n",
1997 "# (select uk_data.date as date7, \n",
1998 "# avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n",
1999 "# from uk_data\n",
2000 "# )\n",
2001 "# update uk_data_7\n",
2002 "# set new_cases = nc7\n",
2003 "# from m7\n",
2004 "# where uk_data_7.date = m7.date7"
2005 ]
2006 },
2007 {
2008 "cell_type": "code",
2009 "execution_count": 363,
2010 "metadata": {
2011 "Collapsed": "false"
2012 },
2013 "outputs": [
2014 {
2015 "name": "stdout",
2016 "output_type": "stream",
2017 "text": [
2018 " * postgresql://covid:***@localhost/covid\n",
2019 "390 rows affected.\n"
2020 ]
2021 },
2022 {
2023 "data": {
2024 "text/plain": [
2025 "[]"
2026 ]
2027 },
2028 "execution_count": 363,
2029 "metadata": {},
2030 "output_type": "execute_result"
2031 }
2032 ],
2033 "source": [
2034 "# %%sql\n",
2035 "# with m7 as \n",
2036 "# (select uk_data.date as date7, \n",
2037 "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2038 "# from uk_data\n",
2039 "# )\n",
2040 "# update uk_data_7\n",
2041 "# set hospital_cases = d7\n",
2042 "# from m7\n",
2043 "# where uk_data_7.date = m7.date7"
2044 ]
2045 },
2046 {
2047 "cell_type": "code",
2048 "execution_count": 364,
2049 "metadata": {
2050 "Collapsed": "false"
2051 },
2052 "outputs": [
2053 {
2054 "name": "stdout",
2055 "output_type": "stream",
2056 "text": [
2057 " * postgresql://covid:***@localhost/covid\n",
2058 "390 rows affected.\n"
2059 ]
2060 },
2061 {
2062 "data": {
2063 "text/plain": [
2064 "[]"
2065 ]
2066 },
2067 "execution_count": 364,
2068 "metadata": {},
2069 "output_type": "execute_result"
2070 }
2071 ],
2072 "source": [
2073 "# %%sql\n",
2074 "# with m7 as \n",
2075 "# (select uk_data.date as date7, \n",
2076 "# avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2077 "# from uk_data\n",
2078 "# )\n",
2079 "# update uk_data_7\n",
2080 "# set new_deaths = d7\n",
2081 "# from m7\n",
2082 "# where uk_data_7.date = m7.date7"
2083 ]
2084 },
2085 {
2086 "cell_type": "code",
2087 "execution_count": 365,
2088 "metadata": {
2089 "Collapsed": "false"
2090 },
2091 "outputs": [
2092 {
2093 "name": "stdout",
2094 "output_type": "stream",
2095 "text": [
2096 " * postgresql://covid:***@localhost/covid\n",
2097 "390 rows affected.\n"
2098 ]
2099 },
2100 {
2101 "data": {
2102 "text/plain": [
2103 "[]"
2104 ]
2105 },
2106 "execution_count": 365,
2107 "metadata": {},
2108 "output_type": "execute_result"
2109 }
2110 ],
2111 "source": [
2112 "# %%sql\n",
2113 "# with m7 as \n",
2114 "# (select uk_data.date as date7, \n",
2115 "# avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2116 "# from uk_data\n",
2117 "# )\n",
2118 "# update uk_data_7\n",
2119 "# set new_admissions = d7\n",
2120 "# from m7\n",
2121 "# where uk_data_7.date = m7.date7"
2122 ]
2123 },
2124 {
2125 "cell_type": "code",
2126 "execution_count": 366,
2127 "metadata": {
2128 "Collapsed": "false"
2129 },
2130 "outputs": [
2131 {
2132 "name": "stdout",
2133 "output_type": "stream",
2134 "text": [
2135 " * postgresql://covid:***@localhost/covid\n",
2136 "390 rows affected.\n"
2137 ]
2138 },
2139 {
2140 "data": {
2141 "text/plain": [
2142 "[]"
2143 ]
2144 },
2145 "execution_count": 366,
2146 "metadata": {},
2147 "output_type": "execute_result"
2148 }
2149 ],
2150 "source": [
2151 "# %%sql\n",
2152 "# with m7 as \n",
2153 "# (select uk_data.date as date7, \n",
2154 "# avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2155 "# from uk_data\n",
2156 "# )\n",
2157 "# update uk_data_7\n",
2158 "# set new_pcr_tests = d7\n",
2159 "# from m7\n",
2160 "# where uk_data_7.date = m7.date7"
2161 ]
2162 },
2163 {
2164 "cell_type": "code",
2165 "execution_count": 367,
2166 "metadata": {
2167 "Collapsed": "false"
2168 },
2169 "outputs": [
2170 {
2171 "name": "stdout",
2172 "output_type": "stream",
2173 "text": [
2174 " * postgresql://covid:***@localhost/covid\n",
2175 "390 rows affected.\n"
2176 ]
2177 },
2178 {
2179 "data": {
2180 "text/plain": [
2181 "[]"
2182 ]
2183 },
2184 "execution_count": 367,
2185 "metadata": {},
2186 "output_type": "execute_result"
2187 }
2188 ],
2189 "source": [
2190 "# %%sql\n",
2191 "# with m7 as \n",
2192 "# (select uk_data.date as date7, \n",
2193 "# avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2194 "# from uk_data\n",
2195 "# )\n",
2196 "# update uk_data_7\n",
2197 "# set new_tests = d7\n",
2198 "# from m7\n",
2199 "# where uk_data_7.date = m7.date7"
2200 ]
2201 },
2202 {
2203 "cell_type": "code",
2204 "execution_count": 309,
2205 "metadata": {
2206 "Collapsed": "false"
2207 },
2208 "outputs": [],
2209 "source": [
2210 "# %%sql\n",
2211 "# with m7 as \n",
2212 "# (select uk_data.date as date7, \n",
2213 "# avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2214 "# from uk_data\n",
2215 "# )\n",
2216 "# update uk_data_7\n",
2217 "# set new_pillar_1_2_tests = d7\n",
2218 "# from m7\n",
2219 "# where uk_data_7.date = m7.date7"
2220 ]
2221 },
2222 {
2223 "cell_type": "code",
2224 "execution_count": 310,
2225 "metadata": {
2226 "Collapsed": "false"
2227 },
2228 "outputs": [
2229 {
2230 "name": "stdout",
2231 "output_type": "stream",
2232 "text": [
2233 " * postgresql://covid:***@localhost/covid\n",
2234 "0 rows affected.\n"
2235 ]
2236 },
2237 {
2238 "data": {
2239 "text/plain": [
2240 "[]"
2241 ]
2242 },
2243 "execution_count": 310,
2244 "metadata": {},
2245 "output_type": "execute_result"
2246 }
2247 ],
2248 "source": [
2249 "# %%sql\n",
2250 "# with wnd as\n",
2251 "# ( select date, \n",
2252 "# avg(new_pillar_1_2_tests)\n",
2253 "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests\n",
2254 "# from uk_data\n",
2255 "# )\n",
2256 "# update uk_data_7\n",
2257 "# set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests\n",
2258 "# from wnd\n",
2259 "# where uk_data_7.date = wnd.date\n",
2260 "# and (select count(*) from wnd) = 7"
2261 ]
2262 },
2263 {
2264 "cell_type": "code",
2265 "execution_count": 379,
2266 "metadata": {
2267 "Collapsed": "false"
2268 },
2269 "outputs": [
2270 {
2271 "name": "stdout",
2272 "output_type": "stream",
2273 "text": [
2274 " * postgresql://covid:***@localhost/covid\n",
2275 "10 rows affected.\n"
2276 ]
2277 },
2278 {
2279 "data": {
2280 "text/html": [
2281 "<table>\n",
2282 " <thead>\n",
2283 " <tr>\n",
2284 " <th>date</th>\n",
2285 " <th>new_pillar_1_2_tests</th>\n",
2286 " <th>a_new_pillar_1_2_tests</th>\n",
2287 " <th>a_count</th>\n",
2288 " </tr>\n",
2289 " </thead>\n",
2290 " <tbody>\n",
2291 " <tr>\n",
2292 " <td>2021-01-26</td>\n",
2293 " <td>None</td>\n",
2294 " <td>463604.666666666667</td>\n",
2295 " <td>3</td>\n",
2296 " </tr>\n",
2297 " <tr>\n",
2298 " <td>2021-01-25</td>\n",
2299 " <td>None</td>\n",
2300 " <td>499910.750000000000</td>\n",
2301 " <td>4</td>\n",
2302 " </tr>\n",
2303 " <tr>\n",
2304 " <td>2021-01-24</td>\n",
2305 " <td>None</td>\n",
2306 " <td>528836.000000000000</td>\n",
2307 " <td>5</td>\n",
2308 " </tr>\n",
2309 " <tr>\n",
2310 " <td>2021-01-23</td>\n",
2311 " <td>None</td>\n",
2312 " <td>543566.833333333333</td>\n",
2313 " <td>6</td>\n",
2314 " </tr>\n",
2315 " <tr>\n",
2316 " <td>2021-01-22</td>\n",
2317 " <td>None</td>\n",
2318 " <td>545780.285714285714</td>\n",
2319 " <td>7</td>\n",
2320 " </tr>\n",
2321 " <tr>\n",
2322 " <td>2021-01-21</td>\n",
2323 " <td>None</td>\n",
2324 " <td>546626.571428571429</td>\n",
2325 " <td>7</td>\n",
2326 " </tr>\n",
2327 " <tr>\n",
2328 " <td>2021-01-20</td>\n",
2329 " <td>None</td>\n",
2330 " <td>546806.285714285714</td>\n",
2331 " <td>7</td>\n",
2332 " </tr>\n",
2333 " <tr>\n",
2334 " <td>2021-01-19</td>\n",
2335 " <td>None</td>\n",
2336 " <td>547402.428571428571</td>\n",
2337 " <td>7</td>\n",
2338 " </tr>\n",
2339 " <tr>\n",
2340 " <td>2021-01-18</td>\n",
2341 " <td>None</td>\n",
2342 " <td>542617.857142857143</td>\n",
2343 " <td>7</td>\n",
2344 " </tr>\n",
2345 " <tr>\n",
2346 " <td>2021-01-17</td>\n",
2347 " <td>None</td>\n",
2348 " <td>545088.142857142857</td>\n",
2349 " <td>7</td>\n",
2350 " </tr>\n",
2351 " </tbody>\n",
2352 "</table>"
2353 ],
2354 "text/plain": [
2355 "[(datetime.date(2021, 1, 26), None, Decimal('463604.666666666667'), 3),\n",
2356 " (datetime.date(2021, 1, 25), None, Decimal('499910.750000000000'), 4),\n",
2357 " (datetime.date(2021, 1, 24), None, Decimal('528836.000000000000'), 5),\n",
2358 " (datetime.date(2021, 1, 23), None, Decimal('543566.833333333333'), 6),\n",
2359 " (datetime.date(2021, 1, 22), None, Decimal('545780.285714285714'), 7),\n",
2360 " (datetime.date(2021, 1, 21), None, Decimal('546626.571428571429'), 7),\n",
2361 " (datetime.date(2021, 1, 20), None, Decimal('546806.285714285714'), 7),\n",
2362 " (datetime.date(2021, 1, 19), None, Decimal('547402.428571428571'), 7),\n",
2363 " (datetime.date(2021, 1, 18), None, Decimal('542617.857142857143'), 7),\n",
2364 " (datetime.date(2021, 1, 17), None, Decimal('545088.142857142857'), 7)]"
2365 ]
2366 },
2367 "execution_count": 379,
2368 "metadata": {},
2369 "output_type": "execute_result"
2370 }
2371 ],
2372 "source": [
2373 "# %%sql\n",
2374 "# with wnd as\n",
2375 "# ( select date, \n",
2376 "# avg(new_pillar_1_2_tests)\n",
2377 "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,\n",
2378 "# count(new_pillar_1_2_tests)\n",
2379 "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_count\n",
2380 "# from uk_data\n",
2381 "# )\n",
2382 "# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count\n",
2383 "# from uk_data_7, wnd\n",
2384 "# where uk_data_7.date = wnd.date\n",
2385 "# order by uk_data_7.date desc limit 10\n",
2386 " \n",
2387 "# select date, \n",
2388 "# count(*) over wnd as w_size\n",
2389 "# from uk_data\n",
2390 "# window wnd as (\n",
2391 "# order by uk_data.date\n",
2392 "# rows between 3 preceding and 3 following\n",
2393 "# );"
2394 ]
2395 },
2396 {
2397 "cell_type": "code",
2398 "execution_count": 401,
2399 "metadata": {
2400 "Collapsed": "false"
2401 },
2402 "outputs": [],
2403 "source": [
2404 "# %%sql\n",
2405 "# select date, \n",
2406 "# count(*) over wnd as w_size\n",
2407 "# from uk_data\n",
2408 "# window wnd as (\n",
2409 "# order by uk_data.date\n",
2410 "# rows between 3 preceding and 3 following\n",
2411 "# )\n",
2412 "# order by date desc limit 10"
2413 ]
2414 },
2415 {
2416 "cell_type": "code",
2417 "execution_count": 407,
2418 "metadata": {},
2419 "outputs": [
2420 {
2421 "name": "stdout",
2422 "output_type": "stream",
2423 "text": [
2424 " * postgresql://covid:***@localhost/covid\n",
2425 "0 rows affected.\n"
2426 ]
2427 },
2428 {
2429 "data": {
2430 "text/plain": [
2431 "[]"
2432 ]
2433 },
2434 "execution_count": 407,
2435 "metadata": {},
2436 "output_type": "execute_result"
2437 }
2438 ],
2439 "source": [
2440 "# %%sql\n",
2441 "# with ownd as (\n",
2442 "# select date,\n",
2443 "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
2444 "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
2445 "# avg(new_cases) over wnd as w_new_cases,\n",
2446 "# avg(new_deaths) over wnd as w_new_deaths,\n",
2447 "# avg(new_admissions) over wnd as w_new_admissions,\n",
2448 "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
2449 "# avg(new_tests) over wnd as w_new_tests,\n",
2450 "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
2451 "# count(*) over wnd as w_size\n",
2452 "# from uk_data\n",
2453 "# window wnd as (\n",
2454 "# order by uk_data.date\n",
2455 "# rows between 3 preceding and 3 following\n",
2456 "# ))\n",
2457 "# insert into uk_data_7(date, \n",
2458 "# hospital_cases, \n",
2459 "# ventilator_beds, \n",
2460 "# new_cases,\n",
2461 "# new_deaths,\n",
2462 "# new_admissions, \n",
2463 "# new_pcr_tests, \n",
2464 "# new_tests, \n",
2465 "# new_pillar_1_2_tests\n",
2466 "# )\n",
2467 "# (select date,\n",
2468 "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
2469 "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
2470 "# avg(new_cases) over wnd as w_new_cases,\n",
2471 "# avg(new_deaths) over wnd as w_new_deaths,\n",
2472 "# avg(new_admissions) over wnd as w_new_admissions,\n",
2473 "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
2474 "# avg(new_tests) over wnd as w_new_tests,\n",
2475 "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
2476 "# count(*) over wnd as w_size\n",
2477 "# from uk_data\n",
2478 "# window wnd as (\n",
2479 "# order by uk_data.date\n",
2480 "# rows between 3 preceding and 3 following\n",
2481 "# )\n",
2482 "# )\n",
2483 "# set date = ownd.date,\n",
2484 "# hospital_cases = w_hospital_cases,\n",
2485 "# ventilator_beds = w_ventilator_beds,\n",
2486 "# new_cases = w_new_cases,\n",
2487 "# new_deaths = w_new_deaths,\n",
2488 "# new_admissions = w_new_admissions,\n",
2489 "# new_pcr_tests = w_new_pcr_tests,\n",
2490 "# new_tests = w_new_tests,\n",
2491 "# new_pillar_1_2_tests = w_new_pillar_1_2_tests\n",
2492 "# from ownd\n",
2493 "# where w_size = 7"
2494 ]
2495 },
2496 {
2497 "cell_type": "code",
2498 "execution_count": 417,
2499 "metadata": {},
2500 "outputs": [
2501 {
2502 "name": "stdout",
2503 "output_type": "stream",
2504 "text": [
2505 " * postgresql://covid:***@localhost/covid\n",
2506 "384 rows affected.\n"
2507 ]
2508 },
2509 {
2510 "data": {
2511 "text/plain": [
2512 "[]"
2513 ]
2514 },
2515 "execution_count": 417,
2516 "metadata": {},
2517 "output_type": "execute_result"
2518 }
2519 ],
2520 "source": [
2521 "# %%sql\n",
2522 "# with ownd as (\n",
2523 "# select date,\n",
2524 "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
2525 "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
2526 "# avg(new_cases) over wnd as w_new_cases,\n",
2527 "# avg(new_deaths) over wnd as w_new_deaths,\n",
2528 "# avg(new_admissions) over wnd as w_new_admissions,\n",
2529 "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
2530 "# avg(new_tests) over wnd as w_new_tests,\n",
2531 "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
2532 "# count(*) over wnd as w_size\n",
2533 "# from uk_data\n",
2534 "# window wnd as (\n",
2535 "# order by uk_data.date\n",
2536 "# rows between 3 preceding and 3 following\n",
2537 "# )\n",
2538 "# )\n",
2539 "# insert into uk_data_7(date, \n",
2540 "# hospital_cases, \n",
2541 "# ventilator_beds, \n",
2542 "# new_cases,\n",
2543 "# new_deaths,\n",
2544 "# new_admissions, \n",
2545 "# new_pcr_tests, \n",
2546 "# new_tests, \n",
2547 "# new_pillar_1_2_tests\n",
2548 "# )\n",
2549 "# (select date,\n",
2550 "# w_hospital_cases,\n",
2551 "# w_ventilator_beds,\n",
2552 "# w_new_cases,\n",
2553 "# w_new_deaths,\n",
2554 "# w_new_admissions,\n",
2555 "# w_new_pcr_tests,\n",
2556 "# w_new_tests,\n",
2557 "# w_new_pillar_1_2_tests\n",
2558 "# from ownd\n",
2559 "# where w_size = 7\n",
2560 "# )"
2561 ]
2562 },
2563 {
2564 "cell_type": "code",
2565 "execution_count": 427,
2566 "metadata": {
2567 "Collapsed": "false"
2568 },
2569 "outputs": [
2570 {
2571 "name": "stdout",
2572 "output_type": "stream",
2573 "text": [
2574 " * postgresql://covid:***@localhost/covid\n",
2575 "10 rows affected.\n"
2576 ]
2577 },
2578 {
2579 "data": {
2580 "text/html": [
2581 "<table>\n",
2582 " <thead>\n",
2583 " <tr>\n",
2584 " <th>date</th>\n",
2585 " <th>hospital_cases</th>\n",
2586 " <th>ventilator_beds</th>\n",
2587 " <th>new_cases</th>\n",
2588 " <th>new_deaths</th>\n",
2589 " <th>new_admissions</th>\n",
2590 " <th>new_pcr_tests</th>\n",
2591 " <th>new_tests</th>\n",
2592 " <th>new_pillar_1_2_tests</th>\n",
2593 " </tr>\n",
2594 " </thead>\n",
2595 " <tbody>\n",
2596 " <tr>\n",
2597 " <td>2021-01-23</td>\n",
2598 " <td>37915.6</td>\n",
2599 " <td>4027.3333</td>\n",
2600 " <td>22463.666</td>\n",
2601 " <td>1241.7142</td>\n",
2602 " <td>3651.6667</td>\n",
2603 " <td>353735.34</td>\n",
2604 " <td>565312.3</td>\n",
2605 " <td>543566.8</td>\n",
2606 " </tr>\n",
2607 " <tr>\n",
2608 " <td>2021-01-22</td>\n",
2609 " <td>38057.168</td>\n",
2610 " <td>4015.8572</td>\n",
2611 " <td>24870.428</td>\n",
2612 " <td>1238.7142</td>\n",
2613 " <td>3771.75</td>\n",
2614 " <td>354323.16</td>\n",
2615 " <td>567830.3</td>\n",
2616 " <td>545780.3</td>\n",
2617 " </tr>\n",
2618 " <tr>\n",
2619 " <td>2021-01-21</td>\n",
2620 " <td>38217.715</td>\n",
2621 " <td>3999.2856</td>\n",
2622 " <td>30620.428</td>\n",
2623 " <td>1239.7142</td>\n",
2624 " <td>3828.2</td>\n",
2625 " <td>363750.16</td>\n",
2626 " <td>569716.7</td>\n",
2627 " <td>546626.56</td>\n",
2628 " </tr>\n",
2629 " <tr>\n",
2630 " <td>2021-01-20</td>\n",
2631 " <td>38294.0</td>\n",
2632 " <td>3969.8572</td>\n",
2633 " <td>32707.428</td>\n",
2634 " <td>1248.4286</td>\n",
2635 " <td>3811.0</td>\n",
2636 " <td>372037.44</td>\n",
2637 " <td>570458.1</td>\n",
2638 " <td>546806.3</td>\n",
2639 " </tr>\n",
2640 " <tr>\n",
2641 " <td>2021-01-19</td>\n",
2642 " <td>38348.43</td>\n",
2643 " <td>3939.5715</td>\n",
2644 " <td>34143.285</td>\n",
2645 " <td>1240.8572</td>\n",
2646 " <td>3825.8572</td>\n",
2647 " <td>374771.16</td>\n",
2648 " <td>571336.56</td>\n",
2649 " <td>547402.44</td>\n",
2650 " </tr>\n",
2651 " <tr>\n",
2652 " <td>2021-01-18</td>\n",
2653 " <td>38338.145</td>\n",
2654 " <td>3898.5715</td>\n",
2655 " <td>35766.57</td>\n",
2656 " <td>1223.5714</td>\n",
2657 " <td>3872.8572</td>\n",
2658 " <td>377019.28</td>\n",
2659 " <td>566901.1</td>\n",
2660 " <td>542617.9</td>\n",
2661 " </tr>\n",
2662 " <tr>\n",
2663 " <td>2021-01-17</td>\n",
2664 " <td>38292.285</td>\n",
2665 " <td>3857.4285</td>\n",
2666 " <td>37337.855</td>\n",
2667 " <td>1217.5714</td>\n",
2668 " <td>3939.0</td>\n",
2669 " <td>389469.84</td>\n",
2670 " <td>571242.56</td>\n",
2671 " <td>545088.1</td>\n",
2672 " </tr>\n",
2673 " <tr>\n",
2674 " <td>2021-01-16</td>\n",
2675 " <td>38119.855</td>\n",
2676 " <td>3810.7144</td>\n",
2677 " <td>38824.0</td>\n",
2678 " <td>1181.0</td>\n",
2679 " <td>3967.2856</td>\n",
2680 " <td>395066.72</td>\n",
2681 " <td>569445.0</td>\n",
2682 " <td>541938.0</td>\n",
2683 " </tr>\n",
2684 " <tr>\n",
2685 " <td>2021-01-15</td>\n",
2686 " <td>37858.715</td>\n",
2687 " <td>3746.2856</td>\n",
2688 " <td>40268.57</td>\n",
2689 " <td>1128.5714</td>\n",
2690 " <td>4029.2856</td>\n",
2691 " <td>403526.72</td>\n",
2692 " <td>570188.7</td>\n",
2693 " <td>540199.9</td>\n",
2694 " </tr>\n",
2695 " <tr>\n",
2696 " <td>2021-01-14</td>\n",
2697 " <td>37505.57</td>\n",
2698 " <td>3667.2856</td>\n",
2699 " <td>41989.145</td>\n",
2700 " <td>1118.5714</td>\n",
2701 " <td>4065.2856</td>\n",
2702 " <td>412496.44</td>\n",
2703 " <td>567178.7</td>\n",
2704 " <td>537417.1</td>\n",
2705 " </tr>\n",
2706 " </tbody>\n",
2707 "</table>"
2708 ],
2709 "text/plain": [
2710 "[(datetime.date(2021, 1, 23), 37915.6, 4027.3333, 22463.666, 1241.7142, 3651.6667, 353735.34, 565312.3, 543566.8),\n",
2711 " (datetime.date(2021, 1, 22), 38057.168, 4015.8572, 24870.428, 1238.7142, 3771.75, 354323.16, 567830.3, 545780.3),\n",
2712 " (datetime.date(2021, 1, 21), 38217.715, 3999.2856, 30620.428, 1239.7142, 3828.2, 363750.16, 569716.7, 546626.56),\n",
2713 " (datetime.date(2021, 1, 20), 38294.0, 3969.8572, 32707.428, 1248.4286, 3811.0, 372037.44, 570458.1, 546806.3),\n",
2714 " (datetime.date(2021, 1, 19), 38348.43, 3939.5715, 34143.285, 1240.8572, 3825.8572, 374771.16, 571336.56, 547402.44),\n",
2715 " (datetime.date(2021, 1, 18), 38338.145, 3898.5715, 35766.57, 1223.5714, 3872.8572, 377019.28, 566901.1, 542617.9),\n",
2716 " (datetime.date(2021, 1, 17), 38292.285, 3857.4285, 37337.855, 1217.5714, 3939.0, 389469.84, 571242.56, 545088.1),\n",
2717 " (datetime.date(2021, 1, 16), 38119.855, 3810.7144, 38824.0, 1181.0, 3967.2856, 395066.72, 569445.0, 541938.0),\n",
2718 " (datetime.date(2021, 1, 15), 37858.715, 3746.2856, 40268.57, 1128.5714, 4029.2856, 403526.72, 570188.7, 540199.9),\n",
2719 " (datetime.date(2021, 1, 14), 37505.57, 3667.2856, 41989.145, 1118.5714, 4065.2856, 412496.44, 567178.7, 537417.1)]"
2720 ]
2721 },
2722 "execution_count": 427,
2723 "metadata": {},
2724 "output_type": "execute_result"
2725 }
2726 ],
2727 "source": [
2728 "%sql select * from uk_data_7 order by date desc limit 10"
2729 ]
2730 },
2731 {
2732 "cell_type": "code",
2733 "execution_count": 428,
2734 "metadata": {
2735 "Collapsed": "false"
2736 },
2737 "outputs": [
2738 {
2739 "name": "stdout",
2740 "output_type": "stream",
2741 "text": [
2742 " * postgresql://covid:***@localhost/covid\n",
2743 "10 rows affected.\n"
2744 ]
2745 },
2746 {
2747 "data": {
2748 "text/html": [
2749 "<table>\n",
2750 " <thead>\n",
2751 " <tr>\n",
2752 " <th>date</th>\n",
2753 " <th>hospital_cases</th>\n",
2754 " <th>ventilator_beds</th>\n",
2755 " <th>new_cases</th>\n",
2756 " <th>new_deaths</th>\n",
2757 " <th>new_admissions</th>\n",
2758 " <th>new_pcr_tests</th>\n",
2759 " <th>new_tests</th>\n",
2760 " <th>new_pillar_1_2_tests</th>\n",
2761 " </tr>\n",
2762 " </thead>\n",
2763 " <tbody>\n",
2764 " <tr>\n",
2765 " <td>2021-01-26</td>\n",
2766 " <td>None</td>\n",
2767 " <td>None</td>\n",
2768 " <td>None</td>\n",
2769 " <td>1631</td>\n",
2770 " <td>None</td>\n",
2771 " <td>None</td>\n",
2772 " <td>None</td>\n",
2773 " <td>None</td>\n",
2774 " </tr>\n",
2775 " <tr>\n",
2776 " <td>2021-01-25</td>\n",
2777 " <td>None</td>\n",
2778 " <td>4032</td>\n",
2779 " <td>4482</td>\n",
2780 " <td>592</td>\n",
2781 " <td>None</td>\n",
2782 " <td>196510</td>\n",
2783 " <td>546734</td>\n",
2784 " <td>531104</td>\n",
2785 " </tr>\n",
2786 " <tr>\n",
2787 " <td>2021-01-24</td>\n",
2788 " <td>37561</td>\n",
2789 " <td>4077</td>\n",
2790 " <td>14266</td>\n",
2791 " <td>610</td>\n",
2792 " <td>None</td>\n",
2793 " <td>262111</td>\n",
2794 " <td>412775</td>\n",
2795 " <td>394479</td>\n",
2796 " </tr>\n",
2797 " <tr>\n",
2798 " <td>2021-01-23</td>\n",
2799 " <td>37266</td>\n",
2800 " <td>4066</td>\n",
2801 " <td>20495</td>\n",
2802 " <td>1348</td>\n",
2803 " <td>None</td>\n",
2804 " <td>389209</td>\n",
2805 " <td>486425</td>\n",
2806 " <td>465231</td>\n",
2807 " </tr>\n",
2808 " <tr>\n",
2809 " <td>2021-01-22</td>\n",
2810 " <td>38144</td>\n",
2811 " <td>4076</td>\n",
2812 " <td>29094</td>\n",
2813 " <td>1401</td>\n",
2814 " <td>3341</td>\n",
2815 " <td>401075</td>\n",
2816 " <td>631901</td>\n",
2817 " <td>608829</td>\n",
2818 " </tr>\n",
2819 " <tr>\n",
2820 " <td>2021-01-21</td>\n",
2821 " <td>37957</td>\n",
2822 " <td>3960</td>\n",
2823 " <td>31430</td>\n",
2824 " <td>1290</td>\n",
2825 " <td>3598</td>\n",
2826 " <td>439408</td>\n",
2827 " <td>668989</td>\n",
2828 " <td>644537</td>\n",
2829 " </tr>\n",
2830 " <tr>\n",
2831 " <td>2021-01-20</td>\n",
2832 " <td>38650</td>\n",
2833 " <td>3953</td>\n",
2834 " <td>35015</td>\n",
2835 " <td>1820</td>\n",
2836 " <td>4016</td>\n",
2837 " <td>434099</td>\n",
2838 " <td>645050</td>\n",
2839 " <td>617221</td>\n",
2840 " </tr>\n",
2841 " <tr>\n",
2842 " <td>2021-01-19</td>\n",
2843 " <td>38765</td>\n",
2844 " <td>3947</td>\n",
2845 " <td>39311</td>\n",
2846 " <td>1610</td>\n",
2847 " <td>4132</td>\n",
2848 " <td>357850</td>\n",
2849 " <td>582938</td>\n",
2850 " <td>559061</td>\n",
2851 " </tr>\n",
2852 " <tr>\n",
2853 " <td>2021-01-18</td>\n",
2854 " <td>39181</td>\n",
2855 " <td>3916</td>\n",
2856 " <td>44732</td>\n",
2857 " <td>599</td>\n",
2858 " <td>4054</td>\n",
2859 " <td>262499</td>\n",
2860 " <td>559939</td>\n",
2861 " <td>537028</td>\n",
2862 " </tr>\n",
2863 " <tr>\n",
2864 " <td>2021-01-17</td>\n",
2865 " <td>38095</td>\n",
2866 " <td>3871</td>\n",
2867 " <td>28875</td>\n",
2868 " <td>671</td>\n",
2869 " <td>3725</td>\n",
2870 " <td>320122</td>\n",
2871 " <td>417965</td>\n",
2872 " <td>395737</td>\n",
2873 " </tr>\n",
2874 " </tbody>\n",
2875 "</table>"
2876 ],
2877 "text/plain": [
2878 "[(datetime.date(2021, 1, 26), None, None, None, 1631, None, None, None, None),\n",
2879 " (datetime.date(2021, 1, 25), None, 4032, 4482, 592, None, 196510, 546734, 531104),\n",
2880 " (datetime.date(2021, 1, 24), 37561, 4077, 14266, 610, None, 262111, 412775, 394479),\n",
2881 " (datetime.date(2021, 1, 23), 37266, 4066, 20495, 1348, None, 389209, 486425, 465231),\n",
2882 " (datetime.date(2021, 1, 22), 38144, 4076, 29094, 1401, 3341, 401075, 631901, 608829),\n",
2883 " (datetime.date(2021, 1, 21), 37957, 3960, 31430, 1290, 3598, 439408, 668989, 644537),\n",
2884 " (datetime.date(2021, 1, 20), 38650, 3953, 35015, 1820, 4016, 434099, 645050, 617221),\n",
2885 " (datetime.date(2021, 1, 19), 38765, 3947, 39311, 1610, 4132, 357850, 582938, 559061),\n",
2886 " (datetime.date(2021, 1, 18), 39181, 3916, 44732, 599, 4054, 262499, 559939, 537028),\n",
2887 " (datetime.date(2021, 1, 17), 38095, 3871, 28875, 671, 3725, 320122, 417965, 395737)]"
2888 ]
2889 },
2890 "execution_count": 428,
2891 "metadata": {},
2892 "output_type": "execute_result"
2893 }
2894 ],
2895 "source": [
2896 "%sql select * from uk_data order by date desc limit 10"
2897 ]
2898 },
2899 {
2900 "cell_type": "code",
2901 "execution_count": null,
2902 "metadata": {
2903 "Collapsed": "false"
2904 },
2905 "outputs": [],
2906 "source": []
2907 }
2908 ],
2909 "metadata": {
2910 "jupytext": {
2911 "formats": "ipynb,md"
2912 },
2913 "kernelspec": {
2914 "display_name": "Python 3",
2915 "language": "python",
2916 "name": "python3"
2917 },
2918 "language_info": {
2919 "codemirror_mode": {
2920 "name": "ipython",
2921 "version": 3
2922 },
2923 "file_extension": ".py",
2924 "mimetype": "text/x-python",
2925 "name": "python",
2926 "nbconvert_exporter": "python",
2927 "pygments_lexer": "ipython3",
2928 "version": "3.8.5"
2929 }
2930 },
2931 "nbformat": 4,
2932 "nbformat_minor": 4
2933 }