Now using py files, for automation
[covid19.git] / data_import.ipynb
diff --git a/data_import.ipynb b/data_import.ipynb
new file mode 100644 (file)
index 0000000..31d7ef9
--- /dev/null
@@ -0,0 +1,2933 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "source": [
+    "Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 314,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float\n",
+    "from sqlalchemy import create_engine"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 315,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "import itertools\n",
+    "import collections\n",
+    "import json\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from scipy.stats import gmean\n",
+    "import datetime\n",
+    "\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 316,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The sql extension is already loaded. To reload it, use:\n",
+      "  %reload_ext sql\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext sql"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 317,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 318,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Connected: covid@covid'"
+      ]
+     },
+     "execution_count": 318,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%sql $connection_string"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 319,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "eng = create_engine(connection_string)\n",
+    "engine = eng.execution_options(isolation_level=\"AUTOCOMMIT\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 320,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "DEATH_COUNT_THRESHOLD = 10\n",
+    "COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()\n",
+    "COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()\n",
+    "COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()\n",
+    "# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()\n",
+    "\n",
+    "COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA\n",
+    "       'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',\n",
+    "       'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',\n",
+    "       'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']\n",
+    "COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))\n",
+    "COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 321,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+      "100  623k  100  623k    0     0   927k      0 --:--:-- --:--:-- --:--:--  925k\n"
+     ]
+    }
+   ],
+   "source": [
+    "!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 322,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# First col is a date, treat geoId of NA as 'Namibia', not \"NA\" value\n",
+    "raw_data = pd.read_csv('covid.csv', \n",
+    "                       parse_dates=[0], dayfirst=True,\n",
+    "                       keep_default_na=False, na_values = [''],\n",
+    "#                        dtype = {'day': np.int64, \n",
+    "#                                 'month': np.int64, \n",
+    "#                                 'year': np.int64, \n",
+    "#                                 'cases': np.int64, \n",
+    "#                                 'deaths': np.int64, \n",
+    "#                                 'countriesAndTerritories': str, \n",
+    "#                                 'geoId': str, \n",
+    "#                                 'countryterritoryCode': str, \n",
+    "#                                 'popData2019': np.int64, \n",
+    "#                                 'continentExp': str, \n",
+    "#                                 }\n",
+    "                      )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 323,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "100050"
+      ]
+     },
+     "execution_count": 323,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data.size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 324,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "raw_data.fillna(0, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 325,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>dateRep</th>\n",
+       "      <th>year_week</th>\n",
+       "      <th>cases_weekly</th>\n",
+       "      <th>deaths_weekly</th>\n",
+       "      <th>countriesAndTerritories</th>\n",
+       "      <th>geoId</th>\n",
+       "      <th>countryterritoryCode</th>\n",
+       "      <th>popData2019</th>\n",
+       "      <th>continentExp</th>\n",
+       "      <th>notification_rate_per_100000_population_14-days</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2021-01-18</td>\n",
+       "      <td>2021-02</td>\n",
+       "      <td>557</td>\n",
+       "      <td>45</td>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>AFG</td>\n",
+       "      <td>38041757.0</td>\n",
+       "      <td>Asia</td>\n",
+       "      <td>3.24</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2021-01-11</td>\n",
+       "      <td>2021-01</td>\n",
+       "      <td>675</td>\n",
+       "      <td>71</td>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>AFG</td>\n",
+       "      <td>38041757.0</td>\n",
+       "      <td>Asia</td>\n",
+       "      <td>4.15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2021-01-04</td>\n",
+       "      <td>2020-53</td>\n",
+       "      <td>902</td>\n",
+       "      <td>60</td>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>AFG</td>\n",
+       "      <td>38041757.0</td>\n",
+       "      <td>Asia</td>\n",
+       "      <td>7.61</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2020-12-28</td>\n",
+       "      <td>2020-52</td>\n",
+       "      <td>1994</td>\n",
+       "      <td>88</td>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>AFG</td>\n",
+       "      <td>38041757.0</td>\n",
+       "      <td>Asia</td>\n",
+       "      <td>7.19</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2020-12-21</td>\n",
+       "      <td>2020-51</td>\n",
+       "      <td>740</td>\n",
+       "      <td>111</td>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>AFG</td>\n",
+       "      <td>38041757.0</td>\n",
+       "      <td>Asia</td>\n",
+       "      <td>6.56</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     dateRep year_week  cases_weekly  deaths_weekly countriesAndTerritories  \\\n",
+       "0 2021-01-18   2021-02           557             45             Afghanistan   \n",
+       "1 2021-01-11   2021-01           675             71             Afghanistan   \n",
+       "2 2021-01-04   2020-53           902             60             Afghanistan   \n",
+       "3 2020-12-28   2020-52          1994             88             Afghanistan   \n",
+       "4 2020-12-21   2020-51           740            111             Afghanistan   \n",
+       "\n",
+       "  geoId countryterritoryCode  popData2019 continentExp  \\\n",
+       "0    AF                  AFG   38041757.0         Asia   \n",
+       "1    AF                  AFG   38041757.0         Asia   \n",
+       "2    AF                  AFG   38041757.0         Asia   \n",
+       "3    AF                  AFG   38041757.0         Asia   \n",
+       "4    AF                  AFG   38041757.0         Asia   \n",
+       "\n",
+       "   notification_rate_per_100000_population_14-days  \n",
+       "0                                             3.24  \n",
+       "1                                             4.15  \n",
+       "2                                             7.61  \n",
+       "3                                             7.19  \n",
+       "4                                             6.56  "
+      ]
+     },
+     "execution_count": 325,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 326,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',\n",
+    "                        'countriesAndTerritories': 'country_name',\n",
+    "                        'countryterritoryCode': 'country_territory_code',\n",
+    "                        'popData2019': 'population_2019',\n",
+    "                        'continentExp': 'continent'}, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 327,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['report_date', 'year_week', 'cases_weekly', 'deaths_weekly',\n",
+       "       'country_name', 'geo_id', 'country_territory_code', 'population_2019',\n",
+       "       'continent', 'notification_rate_per_100000_population_14-days'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 327,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 328,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "report_date                                        datetime64[ns]\n",
+       "year_week                                                  object\n",
+       "cases_weekly                                                int64\n",
+       "deaths_weekly                                               int64\n",
+       "country_name                                               object\n",
+       "geo_id                                                     object\n",
+       "country_territory_code                                     object\n",
+       "population_2019                                           float64\n",
+       "continent                                                  object\n",
+       "notification_rate_per_100000_population_14-days           float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 328,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 329,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>report_date</th>\n",
+       "      <th>cases_weekly</th>\n",
+       "      <th>deaths_weekly</th>\n",
+       "      <th>geo_id</th>\n",
+       "      <th>notification_rate_per_100000_population_14-days</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2021-01-18</td>\n",
+       "      <td>557</td>\n",
+       "      <td>45</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>3.24</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2021-01-11</td>\n",
+       "      <td>675</td>\n",
+       "      <td>71</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>4.15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2021-01-04</td>\n",
+       "      <td>902</td>\n",
+       "      <td>60</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>7.61</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2020-12-28</td>\n",
+       "      <td>1994</td>\n",
+       "      <td>88</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>7.19</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2020-12-21</td>\n",
+       "      <td>740</td>\n",
+       "      <td>111</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>6.56</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10000</th>\n",
+       "      <td>2020-04-20</td>\n",
+       "      <td>11</td>\n",
+       "      <td>0</td>\n",
+       "      <td>ZW</td>\n",
+       "      <td>0.11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10001</th>\n",
+       "      <td>2020-04-13</td>\n",
+       "      <td>5</td>\n",
+       "      <td>2</td>\n",
+       "      <td>ZW</td>\n",
+       "      <td>0.05</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10002</th>\n",
+       "      <td>2020-04-06</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>ZW</td>\n",
+       "      <td>0.05</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10003</th>\n",
+       "      <td>2020-03-30</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>ZW</td>\n",
+       "      <td>0.05</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10004</th>\n",
+       "      <td>2020-03-23</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>ZW</td>\n",
+       "      <td>0.00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10005 rows Ã— 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      report_date  cases_weekly  deaths_weekly geo_id  \\\n",
+       "0      2021-01-18           557             45     AF   \n",
+       "1      2021-01-11           675             71     AF   \n",
+       "2      2021-01-04           902             60     AF   \n",
+       "3      2020-12-28          1994             88     AF   \n",
+       "4      2020-12-21           740            111     AF   \n",
+       "...           ...           ...            ...    ...   \n",
+       "10000  2020-04-20            11              0     ZW   \n",
+       "10001  2020-04-13             5              2     ZW   \n",
+       "10002  2020-04-06             2              0     ZW   \n",
+       "10003  2020-03-30             5              1     ZW   \n",
+       "10004  2020-03-23             2              0     ZW   \n",
+       "\n",
+       "       notification_rate_per_100000_population_14-days  \n",
+       "0                                                 3.24  \n",
+       "1                                                 4.15  \n",
+       "2                                                 7.61  \n",
+       "3                                                 7.19  \n",
+       "4                                                 6.56  \n",
+       "...                                                ...  \n",
+       "10000                                             0.11  \n",
+       "10001                                             0.05  \n",
+       "10002                                             0.05  \n",
+       "10003                                             0.05  \n",
+       "10004                                             0.00  \n",
+       "\n",
+       "[10005 rows x 5 columns]"
+      ]
+     },
+     "execution_count": 329,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 330,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>country_name</th>\n",
+       "      <th>geo_id</th>\n",
+       "      <th>country_territory_code</th>\n",
+       "      <th>population_2019</th>\n",
+       "      <th>continent</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>AFG</td>\n",
+       "      <td>38041757.0</td>\n",
+       "      <td>Asia</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>AFG</td>\n",
+       "      <td>38041757.0</td>\n",
+       "      <td>Asia</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>AFG</td>\n",
+       "      <td>38041757.0</td>\n",
+       "      <td>Asia</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>AFG</td>\n",
+       "      <td>38041757.0</td>\n",
+       "      <td>Asia</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>AF</td>\n",
+       "      <td>AFG</td>\n",
+       "      <td>38041757.0</td>\n",
+       "      <td>Asia</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10000</th>\n",
+       "      <td>Zimbabwe</td>\n",
+       "      <td>ZW</td>\n",
+       "      <td>ZWE</td>\n",
+       "      <td>14645473.0</td>\n",
+       "      <td>Africa</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10001</th>\n",
+       "      <td>Zimbabwe</td>\n",
+       "      <td>ZW</td>\n",
+       "      <td>ZWE</td>\n",
+       "      <td>14645473.0</td>\n",
+       "      <td>Africa</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10002</th>\n",
+       "      <td>Zimbabwe</td>\n",
+       "      <td>ZW</td>\n",
+       "      <td>ZWE</td>\n",
+       "      <td>14645473.0</td>\n",
+       "      <td>Africa</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10003</th>\n",
+       "      <td>Zimbabwe</td>\n",
+       "      <td>ZW</td>\n",
+       "      <td>ZWE</td>\n",
+       "      <td>14645473.0</td>\n",
+       "      <td>Africa</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10004</th>\n",
+       "      <td>Zimbabwe</td>\n",
+       "      <td>ZW</td>\n",
+       "      <td>ZWE</td>\n",
+       "      <td>14645473.0</td>\n",
+       "      <td>Africa</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10005 rows Ã— 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      country_name geo_id country_territory_code  population_2019 continent\n",
+       "0      Afghanistan     AF                    AFG       38041757.0      Asia\n",
+       "1      Afghanistan     AF                    AFG       38041757.0      Asia\n",
+       "2      Afghanistan     AF                    AFG       38041757.0      Asia\n",
+       "3      Afghanistan     AF                    AFG       38041757.0      Asia\n",
+       "4      Afghanistan     AF                    AFG       38041757.0      Asia\n",
+       "...            ...    ...                    ...              ...       ...\n",
+       "10000     Zimbabwe     ZW                    ZWE       14645473.0    Africa\n",
+       "10001     Zimbabwe     ZW                    ZWE       14645473.0    Africa\n",
+       "10002     Zimbabwe     ZW                    ZWE       14645473.0    Africa\n",
+       "10003     Zimbabwe     ZW                    ZWE       14645473.0    Africa\n",
+       "10004     Zimbabwe     ZW                    ZWE       14645473.0    Africa\n",
+       "\n",
+       "[10005 rows x 5 columns]"
+      ]
+     },
+     "execution_count": 330,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data[['country_name', 'geo_id', 'country_territory_code',\n",
+    "       'population_2019', 'continent']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 331,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(\n",
+    "    'weekly_cases',\n",
+    "    engine,\n",
+    "    if_exists='replace',\n",
+    "    index=False,\n",
+    "    chunksize=500,\n",
+    "    dtype={\n",
+    "        \"report_date\": Date,\n",
+    "        \"cases_weekly\": Integer,\n",
+    "        \"deaths_weekly\": Integer,\n",
+    "        \"geo_id\": String,\n",
+    "        \"notification_rate_per_100000_population_14-days\": Float\n",
+    "    }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 332,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "raw_data[['country_name', 'geo_id', 'country_territory_code',\n",
+    "       'population_2019', 'continent']].drop_duplicates().to_sql(\n",
+    "    'countries',\n",
+    "    engine,\n",
+    "    if_exists='replace',\n",
+    "    index=False,\n",
+    "    chunksize=500,\n",
+    "    dtype={\n",
+    "        \"country_name\": Text,\n",
+    "        \"geo_id\": String,\n",
+    "        \"country_territory_code\": String,\n",
+    "        \"population_2019\": Integer,\n",
+    "        \"continent\": Text\n",
+    "    }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 333,
+   "metadata": {
+    "Collapsed": "false",
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# %sql select geo_id from weekly_cases limit 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 334,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# %%sql alter table weekly_cases add primary key (geo_id, report_date);\n",
+    "# alter table countries add primary key (geo_id);\n",
+    "# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);\n",
+    "# alter table weekly_cases add culm_cases integer;\n",
+    "# alter table weekly_cases add culm_deaths integer;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 335,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with engine.connect() as connection:\n",
+    "    connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')\n",
+    "    connection.execute('alter table countries add primary key (geo_id);')\n",
+    "    connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')\n",
+    "    connection.execute('alter table weekly_cases add culm_cases integer;')\n",
+    "    connection.execute('alter table weekly_cases add culm_deaths integer;')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 336,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 337,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 338,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "10 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <thead>\n",
+       "        <tr>\n",
+       "            <th>report_date</th>\n",
+       "            <th>cases_weekly</th>\n",
+       "            <th>deaths_weekly</th>\n",
+       "            <th>geo_id</th>\n",
+       "            <th>notification_rate_per_100000_population_14-days</th>\n",
+       "            <th>culm_cases</th>\n",
+       "            <th>culm_deaths</th>\n",
+       "        </tr>\n",
+       "    </thead>\n",
+       "    <tbody>\n",
+       "        <tr>\n",
+       "            <td>2021-01-18</td>\n",
+       "            <td>557</td>\n",
+       "            <td>45</td>\n",
+       "            <td>AF</td>\n",
+       "            <td>3.24</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-11</td>\n",
+       "            <td>675</td>\n",
+       "            <td>71</td>\n",
+       "            <td>AF</td>\n",
+       "            <td>4.15</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-04</td>\n",
+       "            <td>902</td>\n",
+       "            <td>60</td>\n",
+       "            <td>AF</td>\n",
+       "            <td>7.61</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2020-12-28</td>\n",
+       "            <td>1994</td>\n",
+       "            <td>88</td>\n",
+       "            <td>AF</td>\n",
+       "            <td>7.19</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2020-12-21</td>\n",
+       "            <td>740</td>\n",
+       "            <td>111</td>\n",
+       "            <td>AF</td>\n",
+       "            <td>6.56</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2020-12-14</td>\n",
+       "            <td>1757</td>\n",
+       "            <td>71</td>\n",
+       "            <td>AF</td>\n",
+       "            <td>9.01</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2020-12-07</td>\n",
+       "            <td>1672</td>\n",
+       "            <td>137</td>\n",
+       "            <td>AF</td>\n",
+       "            <td>7.22</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2020-11-30</td>\n",
+       "            <td>1073</td>\n",
+       "            <td>68</td>\n",
+       "            <td>AF</td>\n",
+       "            <td>6.42</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2020-11-23</td>\n",
+       "            <td>1368</td>\n",
+       "            <td>69</td>\n",
+       "            <td>AF</td>\n",
+       "            <td>6.66</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2020-11-16</td>\n",
+       "            <td>1164</td>\n",
+       "            <td>61</td>\n",
+       "            <td>AF</td>\n",
+       "            <td>4.65</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "    </tbody>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(datetime.date(2021, 1, 18), 557, 45, 'AF', 3.24, None, None),\n",
+       " (datetime.date(2021, 1, 11), 675, 71, 'AF', 4.15, None, None),\n",
+       " (datetime.date(2021, 1, 4), 902, 60, 'AF', 7.61, None, None),\n",
+       " (datetime.date(2020, 12, 28), 1994, 88, 'AF', 7.19, None, None),\n",
+       " (datetime.date(2020, 12, 21), 740, 111, 'AF', 6.56, None, None),\n",
+       " (datetime.date(2020, 12, 14), 1757, 71, 'AF', 9.01, None, None),\n",
+       " (datetime.date(2020, 12, 7), 1672, 137, 'AF', 7.22, None, None),\n",
+       " (datetime.date(2020, 11, 30), 1073, 68, 'AF', 6.42, None, None),\n",
+       " (datetime.date(2020, 11, 23), 1368, 69, 'AF', 6.66, None, None),\n",
+       " (datetime.date(2020, 11, 16), 1164, 61, 'AF', 4.65, None, None)]"
+      ]
+     },
+     "execution_count": 338,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%sql select * from weekly_cases limit 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 339,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# %%sql\n",
+    "# with culm as \n",
+    "#     (select report_date, geo_id,\n",
+    "#         sum(cases_weekly) over (partition by geo_id \n",
+    "#                                 order by report_date) as culm_data\n",
+    "#      from weekly_cases\n",
+    "#     )\n",
+    "# update weekly_cases\n",
+    "#     set culm_cases = culm_data\n",
+    "#     from culm\n",
+    "#     where weekly_cases.report_date = culm.report_date and\n",
+    "#           weekly_cases.geo_id = culm.geo_id"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 340,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "query_string = '''with culm as \n",
+    "    (select report_date, geo_id,\n",
+    "        sum(cases_weekly) over (partition by geo_id \n",
+    "                                order by report_date) as culm_data\n",
+    "     from weekly_cases\n",
+    "    )\n",
+    "update weekly_cases\n",
+    "    set culm_cases = culm_data\n",
+    "    from culm\n",
+    "    where weekly_cases.report_date = culm.report_date and\n",
+    "          weekly_cases.geo_id = culm.geo_id'''\n",
+    "with engine.connect() as connection:\n",
+    "    connection.execute(query_string)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 341,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# %%sql\n",
+    "# with culm as \n",
+    "#     (select report_date, geo_id,\n",
+    "#         sum(deaths_weekly) over (partition by geo_id \n",
+    "#                                 order by report_date) as culm_data\n",
+    "#      from weekly_cases\n",
+    "#     )\n",
+    "# update weekly_cases\n",
+    "#     set culm_deaths = culm_data\n",
+    "#     from culm\n",
+    "#     where weekly_cases.report_date = culm.report_date and\n",
+    "#           weekly_cases.geo_id = culm.geo_id"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 342,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "query_string = '''with culm as \n",
+    "    (select report_date, geo_id,\n",
+    "        sum(deaths_weekly) over (partition by geo_id \n",
+    "                                order by report_date) as culm_data\n",
+    "     from weekly_cases\n",
+    "    )\n",
+    "update weekly_cases\n",
+    "    set culm_deaths = culm_data\n",
+    "    from culm\n",
+    "    where weekly_cases.report_date = culm.report_date and\n",
+    "          weekly_cases.geo_id = culm.geo_id'''\n",
+    "with engine.connect() as connection:\n",
+    "    connection.execute(query_string)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 343,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+      "100 26053  100 26053    0     0  54277      0 --:--:-- --:--:-- --:--:-- 54277\n"
+     ]
+    }
+   ],
+   "source": [
+    "uk_query_string = (\n",
+    "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
+    "\"metric=covidOccupiedMVBeds&\"\n",
+    "\"metric=newAdmissions&\"\n",
+    "\"metric=newCasesBySpecimenDate&\"\n",
+    "\"metric=hospitalCases&\"\n",
+    "\"metric=newDeaths28DaysByPublishDate&\"\n",
+    "\"format=csv\"\n",
+    ")\n",
+    "\n",
+    "!curl \"$uk_query_string\" > uk_data.csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 344,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+      "100 20223  100 20223    0     0  78688      0 --:--:-- --:--:-- --:--:-- 78383\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_query_string = (\n",
+    "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
+    "\"metric=newPCRTestsByPublishDate&\"\n",
+    "\"metric=newTestsByPublishDate&\"\n",
+    "\"metric=newPillarOneTwoTestsByPublishDate&\"\n",
+    "\"format=csv\"\n",
+    ")\n",
+    "!curl \"$test_query_string\" > test_data.csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 345,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# hospital_query_string = (\n",
+    "# \"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
+    "# \"metric=newAdmissions&\"\n",
+    "# \"format=csv\"\n",
+    "# )\n",
+    "# !curl \"$hospital_query_string\" > hospital_admissions.csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 346,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# hospital_query_string = (\n",
+    "# \"https://api.coronavirus.data.gov.uk/v1/data?\"\n",
+    "# \"filters=areaName=United%2520Kingdom;areaType=overview&\"\n",
+    "# \"structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,\"\n",
+    "# \"%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv\"\n",
+    "# )\n",
+    "    \n",
+    "# !curl \"$hospital_query_string\" | gunzip > hospital_admissions.csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 347,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>date</th>\n",
+       "      <th>areaType</th>\n",
+       "      <th>areaCode</th>\n",
+       "      <th>areaName</th>\n",
+       "      <th>covidOccupiedMVBeds</th>\n",
+       "      <th>newAdmissions</th>\n",
+       "      <th>newCasesBySpecimenDate</th>\n",
+       "      <th>hospitalCases</th>\n",
+       "      <th>newDeaths28DaysByPublishDate</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2021-01-26</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1631</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2021-01-25</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>4032.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4482.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>592</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2021-01-24</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>4077.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>14266.0</td>\n",
+       "      <td>37561.0</td>\n",
+       "      <td>610</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2021-01-23</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>4066.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>20495.0</td>\n",
+       "      <td>37266.0</td>\n",
+       "      <td>1348</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2021-01-22</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>4076.0</td>\n",
+       "      <td>3341.0</td>\n",
+       "      <td>29094.0</td>\n",
+       "      <td>38144.0</td>\n",
+       "      <td>1401</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>385</th>\n",
+       "      <td>2020-01-06</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>386</th>\n",
+       "      <td>2020-01-05</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>387</th>\n",
+       "      <td>2020-01-04</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>388</th>\n",
+       "      <td>2020-01-03</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>389</th>\n",
+       "      <td>2020-06-14</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>364.0</td>\n",
+       "      <td>821.0</td>\n",
+       "      <td>4907.0</td>\n",
+       "      <td>27</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>390 rows Ã— 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          date  areaType   areaCode        areaName  covidOccupiedMVBeds  \\\n",
+       "0   2021-01-26  overview  K02000001  United Kingdom                  NaN   \n",
+       "1   2021-01-25  overview  K02000001  United Kingdom               4032.0   \n",
+       "2   2021-01-24  overview  K02000001  United Kingdom               4077.0   \n",
+       "3   2021-01-23  overview  K02000001  United Kingdom               4066.0   \n",
+       "4   2021-01-22  overview  K02000001  United Kingdom               4076.0   \n",
+       "..         ...       ...        ...             ...                  ...   \n",
+       "385 2020-01-06  overview  K02000001  United Kingdom                  NaN   \n",
+       "386 2020-01-05  overview  K02000001  United Kingdom                  NaN   \n",
+       "387 2020-01-04  overview  K02000001  United Kingdom                  NaN   \n",
+       "388 2020-01-03  overview  K02000001  United Kingdom                  NaN   \n",
+       "389 2020-06-14  overview  K02000001  United Kingdom                393.0   \n",
+       "\n",
+       "     newAdmissions  newCasesBySpecimenDate  hospitalCases  \\\n",
+       "0              NaN                     NaN            NaN   \n",
+       "1              NaN                  4482.0            NaN   \n",
+       "2              NaN                 14266.0        37561.0   \n",
+       "3              NaN                 20495.0        37266.0   \n",
+       "4           3341.0                 29094.0        38144.0   \n",
+       "..             ...                     ...            ...   \n",
+       "385            NaN                     NaN            NaN   \n",
+       "386            NaN                     NaN            NaN   \n",
+       "387            NaN                     NaN            NaN   \n",
+       "388            NaN                     NaN            NaN   \n",
+       "389          364.0                   821.0         4907.0   \n",
+       "\n",
+       "     newDeaths28DaysByPublishDate  \n",
+       "0                            1631  \n",
+       "1                             592  \n",
+       "2                             610  \n",
+       "3                            1348  \n",
+       "4                            1401  \n",
+       "..                            ...  \n",
+       "385                             0  \n",
+       "386                             0  \n",
+       "387                             0  \n",
+       "388                             0  \n",
+       "389                            27  \n",
+       "\n",
+       "[390 rows x 9 columns]"
+      ]
+     },
+     "execution_count": 347,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "uk_data = pd.read_csv('uk_data.csv', \n",
+    "                       parse_dates=[0], dayfirst=True)\n",
+    "uk_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 348,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>date</th>\n",
+       "      <th>areaType</th>\n",
+       "      <th>areaCode</th>\n",
+       "      <th>areaName</th>\n",
+       "      <th>newPCRTestsByPublishDate</th>\n",
+       "      <th>newTestsByPublishDate</th>\n",
+       "      <th>newPillarOneTwoTestsByPublishDate</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2021-01-25</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>196510.0</td>\n",
+       "      <td>546734</td>\n",
+       "      <td>531104</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2021-01-24</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>262111.0</td>\n",
+       "      <td>412775</td>\n",
+       "      <td>394479</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2021-01-23</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>389209.0</td>\n",
+       "      <td>486425</td>\n",
+       "      <td>465231</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2021-01-22</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>401075.0</td>\n",
+       "      <td>631901</td>\n",
+       "      <td>608829</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2021-01-21</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>439408.0</td>\n",
+       "      <td>668989</td>\n",
+       "      <td>644537</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>296</th>\n",
+       "      <td>2020-04-03</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>14629</td>\n",
+       "      <td>14293</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>297</th>\n",
+       "      <td>2020-04-02</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>13623</td>\n",
+       "      <td>13457</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>298</th>\n",
+       "      <td>2020-04-01</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>11947</td>\n",
+       "      <td>11924</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>299</th>\n",
+       "      <td>2020-03-31</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>11896</td>\n",
+       "      <td>11896</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>300</th>\n",
+       "      <td>2020-06-14</td>\n",
+       "      <td>overview</td>\n",
+       "      <td>K02000001</td>\n",
+       "      <td>United Kingdom</td>\n",
+       "      <td>69529.0</td>\n",
+       "      <td>83965</td>\n",
+       "      <td>66688</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>301 rows Ã— 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          date  areaType   areaCode        areaName  newPCRTestsByPublishDate  \\\n",
+       "0   2021-01-25  overview  K02000001  United Kingdom                  196510.0   \n",
+       "1   2021-01-24  overview  K02000001  United Kingdom                  262111.0   \n",
+       "2   2021-01-23  overview  K02000001  United Kingdom                  389209.0   \n",
+       "3   2021-01-22  overview  K02000001  United Kingdom                  401075.0   \n",
+       "4   2021-01-21  overview  K02000001  United Kingdom                  439408.0   \n",
+       "..         ...       ...        ...             ...                       ...   \n",
+       "296 2020-04-03  overview  K02000001  United Kingdom                       NaN   \n",
+       "297 2020-04-02  overview  K02000001  United Kingdom                       NaN   \n",
+       "298 2020-04-01  overview  K02000001  United Kingdom                       NaN   \n",
+       "299 2020-03-31  overview  K02000001  United Kingdom                       NaN   \n",
+       "300 2020-06-14  overview  K02000001  United Kingdom                   69529.0   \n",
+       "\n",
+       "     newTestsByPublishDate  newPillarOneTwoTestsByPublishDate  \n",
+       "0                   546734                             531104  \n",
+       "1                   412775                             394479  \n",
+       "2                   486425                             465231  \n",
+       "3                   631901                             608829  \n",
+       "4                   668989                             644537  \n",
+       "..                     ...                                ...  \n",
+       "296                  14629                              14293  \n",
+       "297                  13623                              13457  \n",
+       "298                  11947                              11924  \n",
+       "299                  11896                              11896  \n",
+       "300                  83965                              66688  \n",
+       "\n",
+       "[301 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 348,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_data = pd.read_csv('test_data.csv', \n",
+    "                       parse_dates=[0], dayfirst=True)\n",
+    "test_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 349,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['date', 'areaType', 'areaCode', 'areaName', 'newPCRTestsByPublishDate',\n",
+       "       'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 349,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 350,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',\n",
+    "       'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 351,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['date', 'areaType', 'areaCode', 'areaName', 'covidOccupiedMVBeds',\n",
+       "       'newAdmissions', 'newCasesBySpecimenDate', 'hospitalCases',\n",
+       "       'newDeaths28DaysByPublishDate', 'newPCRTestsByPublishDate',\n",
+       "       'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 351,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "uk_data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 352,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "uk_data.rename(\n",
+    "    columns={\n",
+    "        'covidOccupiedMVBeds': 'ventilator_beds',\n",
+    "        'newCasesBySpecimenDate': 'new_cases',\n",
+    "        'hospitalCases': 'hospital_cases', \n",
+    "        'newDeaths28DaysByPublishDate': 'new_deaths',\n",
+    "        'newAdmissions': 'new_admissions',\n",
+    "        'newPCRTestsByPublishDate': 'new_pcr_tests',\n",
+    "        'newTestsByPublishDate': 'new_tests',\n",
+    "        'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'\n",
+    "    }, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 353,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "date                    datetime64[ns]\n",
+       "areaType                        object\n",
+       "areaCode                        object\n",
+       "areaName                        object\n",
+       "ventilator_beds                float64\n",
+       "new_admissions                 float64\n",
+       "new_cases                      float64\n",
+       "hospital_cases                 float64\n",
+       "new_deaths                       int64\n",
+       "new_pcr_tests                  float64\n",
+       "new_tests                      float64\n",
+       "new_pillar_1_2_tests           float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 353,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "uk_data.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 354,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['date', 'areaType', 'areaCode', 'areaName', 'ventilator_beds',\n",
+       "       'new_admissions', 'new_cases', 'hospital_cases', 'new_deaths',\n",
+       "       'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 354,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    " uk_data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 355,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "uk_data[['date', \n",
+    "         'hospital_cases', 'ventilator_beds',\n",
+    "         'new_cases', 'new_deaths', \n",
+    "         'hospital_cases', 'new_admissions',\n",
+    "         'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'\n",
+    "        ]].to_sql(\n",
+    "    'uk_data',\n",
+    "    engine,\n",
+    "    if_exists='replace',\n",
+    "    index=False,\n",
+    "    chunksize=500,\n",
+    "    dtype={\n",
+    "        \"date\": Date,\n",
+    "        \"hospital_cases\": Integer,\n",
+    "        \"ventilator_beds\": Integer,\n",
+    "        \"new_cases\": Integer,\n",
+    "        \"hospital_cases\": Integer,\n",
+    "        \"new_deaths\": Integer,\n",
+    "        \"new_admissions\": Integer,\n",
+    "        'new_pcr_tests': Integer, \n",
+    "        'new_tests': Integer, \n",
+    "        'new_pillar_1_2_tests': Integer\n",
+    "    }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 356,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# %sql select * from uk_data order by date desc limit 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 424,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "query_string = '''drop table if exists uk_data_7;\n",
+    "create table uk_data_7 \n",
+    "(date date primary key,\n",
+    " hospital_cases real,\n",
+    " ventilator_beds real,\n",
+    " new_cases real,\n",
+    " new_deaths real,\n",
+    " new_admissions real,\n",
+    " new_pcr_tests real,\n",
+    " new_tests real,\n",
+    " new_pillar_1_2_tests real\n",
+    ");'''\n",
+    "\n",
+    "with engine.connect() as connection:\n",
+    "    connection.execute(query_string)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 425,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "update_string = '''with ownd as (\n",
+    "    select date,\n",
+    "        avg(hospital_cases) over wnd as w_hospital_cases,\n",
+    "        avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
+    "        avg(new_cases) over wnd as w_new_cases,\n",
+    "        avg(new_deaths) over wnd as w_new_deaths,\n",
+    "        avg(new_admissions) over wnd as w_new_admissions,\n",
+    "        avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
+    "        avg(new_tests) over wnd as w_new_tests,\n",
+    "        avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
+    "        count(*) over wnd as w_size\n",
+    "    from uk_data\n",
+    "    window wnd as (\n",
+    "        order by uk_data.date\n",
+    "        rows between 3 preceding and 3 following\n",
+    "    )\n",
+    ")\n",
+    "insert into uk_data_7(date, \n",
+    "    hospital_cases, \n",
+    "    ventilator_beds, \n",
+    "    new_cases,\n",
+    "    new_deaths,\n",
+    "    new_admissions, \n",
+    "    new_pcr_tests, \n",
+    "    new_tests, \n",
+    "    new_pillar_1_2_tests\n",
+    "    )\n",
+    "(select date,\n",
+    "        w_hospital_cases,\n",
+    "        w_ventilator_beds,\n",
+    "        w_new_cases,\n",
+    "        w_new_deaths,\n",
+    "        w_new_admissions,\n",
+    "        w_new_pcr_tests,\n",
+    "        w_new_tests,\n",
+    "        w_new_pillar_1_2_tests\n",
+    "    from ownd\n",
+    "    where w_size = 7\n",
+    ")'''\n",
+    "with engine.connect() as connection:\n",
+    "    connection.execute(update_string)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 398,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)\n",
+    "# values (\n",
+    "#     select date, \n",
+    "#     avg(ventilator_beds) over (order by date rows between 6 preceding and current row)\n",
+    "#     from uk_data\n",
+    "# )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 399,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# query_string = '''insert into uk_data_7(date, hospital_cases)\n",
+    "#     select uk_data.date, \n",
+    "#         avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases\n",
+    "#     from uk_data'''\n",
+    "# with engine.connect() as connection:\n",
+    "#     connection.execute(query_string)   "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 360,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# %%sql \n",
+    "# with m7 as \n",
+    "#     (select uk_data.date as date7, \n",
+    "#         avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7\n",
+    "#      from uk_data\n",
+    "#     )\n",
+    "# update uk_data_7\n",
+    "#     set ventilator_beds = nc7\n",
+    "#     from m7\n",
+    "#     where uk_data_7.date = m7.date7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 361,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "390 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 361,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with m7 as \n",
+    "#     (select uk_data.date as date7, \n",
+    "#         avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n",
+    "#      from uk_data\n",
+    "#     )\n",
+    "# update uk_data_7\n",
+    "#     set ventilator_beds = nc7\n",
+    "#     from m7\n",
+    "#     where uk_data_7.date = m7.date7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 362,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "390 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 362,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with m7 as \n",
+    "#     (select uk_data.date as date7, \n",
+    "#         avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n",
+    "#      from uk_data\n",
+    "#     )\n",
+    "# update uk_data_7\n",
+    "#     set new_cases = nc7\n",
+    "#     from m7\n",
+    "#     where uk_data_7.date = m7.date7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 363,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "390 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 363,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with m7 as \n",
+    "#     (select uk_data.date as date7, \n",
+    "#         avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+    "#      from uk_data\n",
+    "#     )\n",
+    "# update uk_data_7\n",
+    "#     set hospital_cases = d7\n",
+    "#     from m7\n",
+    "#     where uk_data_7.date = m7.date7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 364,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "390 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 364,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with m7 as \n",
+    "#     (select uk_data.date as date7, \n",
+    "#         avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+    "#      from uk_data\n",
+    "#     )\n",
+    "# update uk_data_7\n",
+    "#     set new_deaths = d7\n",
+    "#     from m7\n",
+    "#     where uk_data_7.date = m7.date7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 365,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "390 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 365,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with m7 as \n",
+    "#     (select uk_data.date as date7, \n",
+    "#         avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+    "#      from uk_data\n",
+    "#     )\n",
+    "# update uk_data_7\n",
+    "#     set new_admissions = d7\n",
+    "#     from m7\n",
+    "#     where uk_data_7.date = m7.date7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 366,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "390 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 366,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with m7 as \n",
+    "#     (select uk_data.date as date7, \n",
+    "#         avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+    "#      from uk_data\n",
+    "#     )\n",
+    "# update uk_data_7\n",
+    "#     set new_pcr_tests = d7\n",
+    "#     from m7\n",
+    "#     where uk_data_7.date = m7.date7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 367,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "390 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 367,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with m7 as \n",
+    "#     (select uk_data.date as date7, \n",
+    "#         avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+    "#      from uk_data\n",
+    "#     )\n",
+    "# update uk_data_7\n",
+    "#     set new_tests = d7\n",
+    "#     from m7\n",
+    "#     where uk_data_7.date = m7.date7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 309,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# %%sql\n",
+    "# with m7 as \n",
+    "#     (select uk_data.date as date7, \n",
+    "#         avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+    "#      from uk_data\n",
+    "#     )\n",
+    "# update uk_data_7\n",
+    "#     set new_pillar_1_2_tests = d7\n",
+    "#     from m7\n",
+    "#     where uk_data_7.date = m7.date7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 310,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "0 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 310,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with wnd as\n",
+    "#     (   select date, \n",
+    "#              avg(new_pillar_1_2_tests)\n",
+    "#                 over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests\n",
+    "#         from uk_data\n",
+    "#     )\n",
+    "# update uk_data_7\n",
+    "#     set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests\n",
+    "#     from wnd\n",
+    "#     where uk_data_7.date = wnd.date\n",
+    "#     and (select count(*) from wnd) = 7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 379,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "10 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <thead>\n",
+       "        <tr>\n",
+       "            <th>date</th>\n",
+       "            <th>new_pillar_1_2_tests</th>\n",
+       "            <th>a_new_pillar_1_2_tests</th>\n",
+       "            <th>a_count</th>\n",
+       "        </tr>\n",
+       "    </thead>\n",
+       "    <tbody>\n",
+       "        <tr>\n",
+       "            <td>2021-01-26</td>\n",
+       "            <td>None</td>\n",
+       "            <td>463604.666666666667</td>\n",
+       "            <td>3</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-25</td>\n",
+       "            <td>None</td>\n",
+       "            <td>499910.750000000000</td>\n",
+       "            <td>4</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-24</td>\n",
+       "            <td>None</td>\n",
+       "            <td>528836.000000000000</td>\n",
+       "            <td>5</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-23</td>\n",
+       "            <td>None</td>\n",
+       "            <td>543566.833333333333</td>\n",
+       "            <td>6</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-22</td>\n",
+       "            <td>None</td>\n",
+       "            <td>545780.285714285714</td>\n",
+       "            <td>7</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-21</td>\n",
+       "            <td>None</td>\n",
+       "            <td>546626.571428571429</td>\n",
+       "            <td>7</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-20</td>\n",
+       "            <td>None</td>\n",
+       "            <td>546806.285714285714</td>\n",
+       "            <td>7</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-19</td>\n",
+       "            <td>None</td>\n",
+       "            <td>547402.428571428571</td>\n",
+       "            <td>7</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-18</td>\n",
+       "            <td>None</td>\n",
+       "            <td>542617.857142857143</td>\n",
+       "            <td>7</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-17</td>\n",
+       "            <td>None</td>\n",
+       "            <td>545088.142857142857</td>\n",
+       "            <td>7</td>\n",
+       "        </tr>\n",
+       "    </tbody>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(datetime.date(2021, 1, 26), None, Decimal('463604.666666666667'), 3),\n",
+       " (datetime.date(2021, 1, 25), None, Decimal('499910.750000000000'), 4),\n",
+       " (datetime.date(2021, 1, 24), None, Decimal('528836.000000000000'), 5),\n",
+       " (datetime.date(2021, 1, 23), None, Decimal('543566.833333333333'), 6),\n",
+       " (datetime.date(2021, 1, 22), None, Decimal('545780.285714285714'), 7),\n",
+       " (datetime.date(2021, 1, 21), None, Decimal('546626.571428571429'), 7),\n",
+       " (datetime.date(2021, 1, 20), None, Decimal('546806.285714285714'), 7),\n",
+       " (datetime.date(2021, 1, 19), None, Decimal('547402.428571428571'), 7),\n",
+       " (datetime.date(2021, 1, 18), None, Decimal('542617.857142857143'), 7),\n",
+       " (datetime.date(2021, 1, 17), None, Decimal('545088.142857142857'), 7)]"
+      ]
+     },
+     "execution_count": 379,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with wnd as\n",
+    "#     (   select date, \n",
+    "#              avg(new_pillar_1_2_tests)\n",
+    "#                 over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,\n",
+    "#              count(new_pillar_1_2_tests)\n",
+    "#                 over (order by uk_data.date rows between 3 preceding and 3 following) as a_count\n",
+    "#         from uk_data\n",
+    "#     )\n",
+    "# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count\n",
+    "#     from uk_data_7, wnd\n",
+    "#     where uk_data_7.date = wnd.date\n",
+    "#     order by uk_data_7.date desc limit 10\n",
+    "    \n",
+    "# select date, \n",
+    "#     count(*) over wnd as w_size\n",
+    "# from uk_data\n",
+    "# window wnd as (\n",
+    "#     order by uk_data.date\n",
+    "#     rows between 3 preceding and 3 following\n",
+    "# );"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 401,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": [
+    "# %%sql\n",
+    "# select date, \n",
+    "#     count(*) over wnd as w_size\n",
+    "# from uk_data\n",
+    "# window wnd as (\n",
+    "#     order by uk_data.date\n",
+    "#     rows between 3 preceding and 3 following\n",
+    "# )\n",
+    "# order by date desc limit 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 407,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "0 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 407,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with ownd as (\n",
+    "#     select date,\n",
+    "#         avg(hospital_cases) over wnd as w_hospital_cases,\n",
+    "#         avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
+    "#         avg(new_cases) over wnd as w_new_cases,\n",
+    "#         avg(new_deaths) over wnd as w_new_deaths,\n",
+    "#         avg(new_admissions) over wnd as w_new_admissions,\n",
+    "#         avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
+    "#         avg(new_tests) over wnd as w_new_tests,\n",
+    "#         avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
+    "#         count(*) over wnd as w_size\n",
+    "#     from uk_data\n",
+    "#     window wnd as (\n",
+    "#         order by uk_data.date\n",
+    "#         rows between 3 preceding and 3 following\n",
+    "#     ))\n",
+    "# insert into uk_data_7(date, \n",
+    "#     hospital_cases, \n",
+    "#     ventilator_beds, \n",
+    "#     new_cases,\n",
+    "#     new_deaths,\n",
+    "#     new_admissions, \n",
+    "#     new_pcr_tests, \n",
+    "#     new_tests, \n",
+    "#     new_pillar_1_2_tests\n",
+    "#     )\n",
+    "# (select date,\n",
+    "#         avg(hospital_cases) over wnd as w_hospital_cases,\n",
+    "#         avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
+    "#         avg(new_cases) over wnd as w_new_cases,\n",
+    "#         avg(new_deaths) over wnd as w_new_deaths,\n",
+    "#         avg(new_admissions) over wnd as w_new_admissions,\n",
+    "#         avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
+    "#         avg(new_tests) over wnd as w_new_tests,\n",
+    "#         avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
+    "#         count(*) over wnd as w_size\n",
+    "#     from uk_data\n",
+    "#     window wnd as (\n",
+    "#         order by uk_data.date\n",
+    "#         rows between 3 preceding and 3 following\n",
+    "#     )\n",
+    "# )\n",
+    "#     set date = ownd.date,\n",
+    "#         hospital_cases = w_hospital_cases,\n",
+    "#         ventilator_beds = w_ventilator_beds,\n",
+    "#         new_cases = w_new_cases,\n",
+    "#         new_deaths = w_new_deaths,\n",
+    "#         new_admissions = w_new_admissions,\n",
+    "#         new_pcr_tests = w_new_pcr_tests,\n",
+    "#         new_tests = w_new_tests,\n",
+    "#         new_pillar_1_2_tests = w_new_pillar_1_2_tests\n",
+    "#     from ownd\n",
+    "#     where w_size = 7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 417,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "384 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 417,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# %%sql\n",
+    "# with ownd as (\n",
+    "#     select date,\n",
+    "#         avg(hospital_cases) over wnd as w_hospital_cases,\n",
+    "#         avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
+    "#         avg(new_cases) over wnd as w_new_cases,\n",
+    "#         avg(new_deaths) over wnd as w_new_deaths,\n",
+    "#         avg(new_admissions) over wnd as w_new_admissions,\n",
+    "#         avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
+    "#         avg(new_tests) over wnd as w_new_tests,\n",
+    "#         avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
+    "#         count(*) over wnd as w_size\n",
+    "#     from uk_data\n",
+    "#     window wnd as (\n",
+    "#         order by uk_data.date\n",
+    "#         rows between 3 preceding and 3 following\n",
+    "#     )\n",
+    "# )\n",
+    "# insert into uk_data_7(date, \n",
+    "#     hospital_cases, \n",
+    "#     ventilator_beds, \n",
+    "#     new_cases,\n",
+    "#     new_deaths,\n",
+    "#     new_admissions, \n",
+    "#     new_pcr_tests, \n",
+    "#     new_tests, \n",
+    "#     new_pillar_1_2_tests\n",
+    "#     )\n",
+    "# (select date,\n",
+    "#         w_hospital_cases,\n",
+    "#         w_ventilator_beds,\n",
+    "#         w_new_cases,\n",
+    "#         w_new_deaths,\n",
+    "#         w_new_admissions,\n",
+    "#         w_new_pcr_tests,\n",
+    "#         w_new_tests,\n",
+    "#         w_new_pillar_1_2_tests\n",
+    "#     from ownd\n",
+    "#     where w_size = 7\n",
+    "# )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 427,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "10 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <thead>\n",
+       "        <tr>\n",
+       "            <th>date</th>\n",
+       "            <th>hospital_cases</th>\n",
+       "            <th>ventilator_beds</th>\n",
+       "            <th>new_cases</th>\n",
+       "            <th>new_deaths</th>\n",
+       "            <th>new_admissions</th>\n",
+       "            <th>new_pcr_tests</th>\n",
+       "            <th>new_tests</th>\n",
+       "            <th>new_pillar_1_2_tests</th>\n",
+       "        </tr>\n",
+       "    </thead>\n",
+       "    <tbody>\n",
+       "        <tr>\n",
+       "            <td>2021-01-23</td>\n",
+       "            <td>37915.6</td>\n",
+       "            <td>4027.3333</td>\n",
+       "            <td>22463.666</td>\n",
+       "            <td>1241.7142</td>\n",
+       "            <td>3651.6667</td>\n",
+       "            <td>353735.34</td>\n",
+       "            <td>565312.3</td>\n",
+       "            <td>543566.8</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-22</td>\n",
+       "            <td>38057.168</td>\n",
+       "            <td>4015.8572</td>\n",
+       "            <td>24870.428</td>\n",
+       "            <td>1238.7142</td>\n",
+       "            <td>3771.75</td>\n",
+       "            <td>354323.16</td>\n",
+       "            <td>567830.3</td>\n",
+       "            <td>545780.3</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-21</td>\n",
+       "            <td>38217.715</td>\n",
+       "            <td>3999.2856</td>\n",
+       "            <td>30620.428</td>\n",
+       "            <td>1239.7142</td>\n",
+       "            <td>3828.2</td>\n",
+       "            <td>363750.16</td>\n",
+       "            <td>569716.7</td>\n",
+       "            <td>546626.56</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-20</td>\n",
+       "            <td>38294.0</td>\n",
+       "            <td>3969.8572</td>\n",
+       "            <td>32707.428</td>\n",
+       "            <td>1248.4286</td>\n",
+       "            <td>3811.0</td>\n",
+       "            <td>372037.44</td>\n",
+       "            <td>570458.1</td>\n",
+       "            <td>546806.3</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-19</td>\n",
+       "            <td>38348.43</td>\n",
+       "            <td>3939.5715</td>\n",
+       "            <td>34143.285</td>\n",
+       "            <td>1240.8572</td>\n",
+       "            <td>3825.8572</td>\n",
+       "            <td>374771.16</td>\n",
+       "            <td>571336.56</td>\n",
+       "            <td>547402.44</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-18</td>\n",
+       "            <td>38338.145</td>\n",
+       "            <td>3898.5715</td>\n",
+       "            <td>35766.57</td>\n",
+       "            <td>1223.5714</td>\n",
+       "            <td>3872.8572</td>\n",
+       "            <td>377019.28</td>\n",
+       "            <td>566901.1</td>\n",
+       "            <td>542617.9</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-17</td>\n",
+       "            <td>38292.285</td>\n",
+       "            <td>3857.4285</td>\n",
+       "            <td>37337.855</td>\n",
+       "            <td>1217.5714</td>\n",
+       "            <td>3939.0</td>\n",
+       "            <td>389469.84</td>\n",
+       "            <td>571242.56</td>\n",
+       "            <td>545088.1</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-16</td>\n",
+       "            <td>38119.855</td>\n",
+       "            <td>3810.7144</td>\n",
+       "            <td>38824.0</td>\n",
+       "            <td>1181.0</td>\n",
+       "            <td>3967.2856</td>\n",
+       "            <td>395066.72</td>\n",
+       "            <td>569445.0</td>\n",
+       "            <td>541938.0</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-15</td>\n",
+       "            <td>37858.715</td>\n",
+       "            <td>3746.2856</td>\n",
+       "            <td>40268.57</td>\n",
+       "            <td>1128.5714</td>\n",
+       "            <td>4029.2856</td>\n",
+       "            <td>403526.72</td>\n",
+       "            <td>570188.7</td>\n",
+       "            <td>540199.9</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-14</td>\n",
+       "            <td>37505.57</td>\n",
+       "            <td>3667.2856</td>\n",
+       "            <td>41989.145</td>\n",
+       "            <td>1118.5714</td>\n",
+       "            <td>4065.2856</td>\n",
+       "            <td>412496.44</td>\n",
+       "            <td>567178.7</td>\n",
+       "            <td>537417.1</td>\n",
+       "        </tr>\n",
+       "    </tbody>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(datetime.date(2021, 1, 23), 37915.6, 4027.3333, 22463.666, 1241.7142, 3651.6667, 353735.34, 565312.3, 543566.8),\n",
+       " (datetime.date(2021, 1, 22), 38057.168, 4015.8572, 24870.428, 1238.7142, 3771.75, 354323.16, 567830.3, 545780.3),\n",
+       " (datetime.date(2021, 1, 21), 38217.715, 3999.2856, 30620.428, 1239.7142, 3828.2, 363750.16, 569716.7, 546626.56),\n",
+       " (datetime.date(2021, 1, 20), 38294.0, 3969.8572, 32707.428, 1248.4286, 3811.0, 372037.44, 570458.1, 546806.3),\n",
+       " (datetime.date(2021, 1, 19), 38348.43, 3939.5715, 34143.285, 1240.8572, 3825.8572, 374771.16, 571336.56, 547402.44),\n",
+       " (datetime.date(2021, 1, 18), 38338.145, 3898.5715, 35766.57, 1223.5714, 3872.8572, 377019.28, 566901.1, 542617.9),\n",
+       " (datetime.date(2021, 1, 17), 38292.285, 3857.4285, 37337.855, 1217.5714, 3939.0, 389469.84, 571242.56, 545088.1),\n",
+       " (datetime.date(2021, 1, 16), 38119.855, 3810.7144, 38824.0, 1181.0, 3967.2856, 395066.72, 569445.0, 541938.0),\n",
+       " (datetime.date(2021, 1, 15), 37858.715, 3746.2856, 40268.57, 1128.5714, 4029.2856, 403526.72, 570188.7, 540199.9),\n",
+       " (datetime.date(2021, 1, 14), 37505.57, 3667.2856, 41989.145, 1118.5714, 4065.2856, 412496.44, 567178.7, 537417.1)]"
+      ]
+     },
+     "execution_count": 427,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%sql select * from uk_data_7 order by date desc limit 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 428,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " * postgresql://covid:***@localhost/covid\n",
+      "10 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <thead>\n",
+       "        <tr>\n",
+       "            <th>date</th>\n",
+       "            <th>hospital_cases</th>\n",
+       "            <th>ventilator_beds</th>\n",
+       "            <th>new_cases</th>\n",
+       "            <th>new_deaths</th>\n",
+       "            <th>new_admissions</th>\n",
+       "            <th>new_pcr_tests</th>\n",
+       "            <th>new_tests</th>\n",
+       "            <th>new_pillar_1_2_tests</th>\n",
+       "        </tr>\n",
+       "    </thead>\n",
+       "    <tbody>\n",
+       "        <tr>\n",
+       "            <td>2021-01-26</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "            <td>1631</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "            <td>None</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-25</td>\n",
+       "            <td>None</td>\n",
+       "            <td>4032</td>\n",
+       "            <td>4482</td>\n",
+       "            <td>592</td>\n",
+       "            <td>None</td>\n",
+       "            <td>196510</td>\n",
+       "            <td>546734</td>\n",
+       "            <td>531104</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-24</td>\n",
+       "            <td>37561</td>\n",
+       "            <td>4077</td>\n",
+       "            <td>14266</td>\n",
+       "            <td>610</td>\n",
+       "            <td>None</td>\n",
+       "            <td>262111</td>\n",
+       "            <td>412775</td>\n",
+       "            <td>394479</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-23</td>\n",
+       "            <td>37266</td>\n",
+       "            <td>4066</td>\n",
+       "            <td>20495</td>\n",
+       "            <td>1348</td>\n",
+       "            <td>None</td>\n",
+       "            <td>389209</td>\n",
+       "            <td>486425</td>\n",
+       "            <td>465231</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-22</td>\n",
+       "            <td>38144</td>\n",
+       "            <td>4076</td>\n",
+       "            <td>29094</td>\n",
+       "            <td>1401</td>\n",
+       "            <td>3341</td>\n",
+       "            <td>401075</td>\n",
+       "            <td>631901</td>\n",
+       "            <td>608829</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-21</td>\n",
+       "            <td>37957</td>\n",
+       "            <td>3960</td>\n",
+       "            <td>31430</td>\n",
+       "            <td>1290</td>\n",
+       "            <td>3598</td>\n",
+       "            <td>439408</td>\n",
+       "            <td>668989</td>\n",
+       "            <td>644537</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-20</td>\n",
+       "            <td>38650</td>\n",
+       "            <td>3953</td>\n",
+       "            <td>35015</td>\n",
+       "            <td>1820</td>\n",
+       "            <td>4016</td>\n",
+       "            <td>434099</td>\n",
+       "            <td>645050</td>\n",
+       "            <td>617221</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-19</td>\n",
+       "            <td>38765</td>\n",
+       "            <td>3947</td>\n",
+       "            <td>39311</td>\n",
+       "            <td>1610</td>\n",
+       "            <td>4132</td>\n",
+       "            <td>357850</td>\n",
+       "            <td>582938</td>\n",
+       "            <td>559061</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-18</td>\n",
+       "            <td>39181</td>\n",
+       "            <td>3916</td>\n",
+       "            <td>44732</td>\n",
+       "            <td>599</td>\n",
+       "            <td>4054</td>\n",
+       "            <td>262499</td>\n",
+       "            <td>559939</td>\n",
+       "            <td>537028</td>\n",
+       "        </tr>\n",
+       "        <tr>\n",
+       "            <td>2021-01-17</td>\n",
+       "            <td>38095</td>\n",
+       "            <td>3871</td>\n",
+       "            <td>28875</td>\n",
+       "            <td>671</td>\n",
+       "            <td>3725</td>\n",
+       "            <td>320122</td>\n",
+       "            <td>417965</td>\n",
+       "            <td>395737</td>\n",
+       "        </tr>\n",
+       "    </tbody>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(datetime.date(2021, 1, 26), None, None, None, 1631, None, None, None, None),\n",
+       " (datetime.date(2021, 1, 25), None, 4032, 4482, 592, None, 196510, 546734, 531104),\n",
+       " (datetime.date(2021, 1, 24), 37561, 4077, 14266, 610, None, 262111, 412775, 394479),\n",
+       " (datetime.date(2021, 1, 23), 37266, 4066, 20495, 1348, None, 389209, 486425, 465231),\n",
+       " (datetime.date(2021, 1, 22), 38144, 4076, 29094, 1401, 3341, 401075, 631901, 608829),\n",
+       " (datetime.date(2021, 1, 21), 37957, 3960, 31430, 1290, 3598, 439408, 668989, 644537),\n",
+       " (datetime.date(2021, 1, 20), 38650, 3953, 35015, 1820, 4016, 434099, 645050, 617221),\n",
+       " (datetime.date(2021, 1, 19), 38765, 3947, 39311, 1610, 4132, 357850, 582938, 559061),\n",
+       " (datetime.date(2021, 1, 18), 39181, 3916, 44732, 599, 4054, 262499, 559939, 537028),\n",
+       " (datetime.date(2021, 1, 17), 38095, 3871, 28875, 671, 3725, 320122, 417965, 395737)]"
+      ]
+     },
+     "execution_count": 428,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%sql select * from uk_data order by date desc limit 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "Collapsed": "false"
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "formats": "ipynb,md"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}