--- /dev/null
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "source": [
+ "Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 314,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float\n",
+ "from sqlalchemy import create_engine"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 315,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "import itertools\n",
+ "import collections\n",
+ "import json\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from scipy.stats import gmean\n",
+ "import datetime\n",
+ "\n",
+ "import matplotlib as mpl\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 316,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The sql extension is already loaded. To reload it, use:\n",
+ " %reload_ext sql\n"
+ ]
+ }
+ ],
+ "source": [
+ "%load_ext sql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 317,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 318,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Connected: covid@covid'"
+ ]
+ },
+ "execution_count": 318,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%sql $connection_string"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 319,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "eng = create_engine(connection_string)\n",
+ "engine = eng.execution_options(isolation_level=\"AUTOCOMMIT\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 320,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "DEATH_COUNT_THRESHOLD = 10\n",
+ "COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()\n",
+ "COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()\n",
+ "COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()\n",
+ "# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()\n",
+ "\n",
+ "COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA\n",
+ " 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',\n",
+ " 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',\n",
+ " 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']\n",
+ "COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))\n",
+ "COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 321,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " % Total % Received % Xferd Average Speed Time Time Time Current\n",
+ " Dload Upload Total Spent Left Speed\n",
+ "100 623k 100 623k 0 0 927k 0 --:--:-- --:--:-- --:--:-- 925k\n"
+ ]
+ }
+ ],
+ "source": [
+ "!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 322,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# First col is a date, treat geoId of NA as 'Namibia', not \"NA\" value\n",
+ "raw_data = pd.read_csv('covid.csv', \n",
+ " parse_dates=[0], dayfirst=True,\n",
+ " keep_default_na=False, na_values = [''],\n",
+ "# dtype = {'day': np.int64, \n",
+ "# 'month': np.int64, \n",
+ "# 'year': np.int64, \n",
+ "# 'cases': np.int64, \n",
+ "# 'deaths': np.int64, \n",
+ "# 'countriesAndTerritories': str, \n",
+ "# 'geoId': str, \n",
+ "# 'countryterritoryCode': str, \n",
+ "# 'popData2019': np.int64, \n",
+ "# 'continentExp': str, \n",
+ "# }\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 323,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "100050"
+ ]
+ },
+ "execution_count": 323,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data.size"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 324,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "raw_data.fillna(0, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 325,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>dateRep</th>\n",
+ " <th>year_week</th>\n",
+ " <th>cases_weekly</th>\n",
+ " <th>deaths_weekly</th>\n",
+ " <th>countriesAndTerritories</th>\n",
+ " <th>geoId</th>\n",
+ " <th>countryterritoryCode</th>\n",
+ " <th>popData2019</th>\n",
+ " <th>continentExp</th>\n",
+ " <th>notification_rate_per_100000_population_14-days</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>2021-01-18</td>\n",
+ " <td>2021-02</td>\n",
+ " <td>557</td>\n",
+ " <td>45</td>\n",
+ " <td>Afghanistan</td>\n",
+ " <td>AF</td>\n",
+ " <td>AFG</td>\n",
+ " <td>38041757.0</td>\n",
+ " <td>Asia</td>\n",
+ " <td>3.24</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>2021-01-11</td>\n",
+ " <td>2021-01</td>\n",
+ " <td>675</td>\n",
+ " <td>71</td>\n",
+ " <td>Afghanistan</td>\n",
+ " <td>AF</td>\n",
+ " <td>AFG</td>\n",
+ " <td>38041757.0</td>\n",
+ " <td>Asia</td>\n",
+ " <td>4.15</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>2021-01-04</td>\n",
+ " <td>2020-53</td>\n",
+ " <td>902</td>\n",
+ " <td>60</td>\n",
+ " <td>Afghanistan</td>\n",
+ " <td>AF</td>\n",
+ " <td>AFG</td>\n",
+ " <td>38041757.0</td>\n",
+ " <td>Asia</td>\n",
+ " <td>7.61</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>2020-12-28</td>\n",
+ " <td>2020-52</td>\n",
+ " <td>1994</td>\n",
+ " <td>88</td>\n",
+ " <td>Afghanistan</td>\n",
+ " <td>AF</td>\n",
+ " <td>AFG</td>\n",
+ " <td>38041757.0</td>\n",
+ " <td>Asia</td>\n",
+ " <td>7.19</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>2020-12-21</td>\n",
+ " <td>2020-51</td>\n",
+ " <td>740</td>\n",
+ " <td>111</td>\n",
+ " <td>Afghanistan</td>\n",
+ " <td>AF</td>\n",
+ " <td>AFG</td>\n",
+ " <td>38041757.0</td>\n",
+ " <td>Asia</td>\n",
+ " <td>6.56</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " dateRep year_week cases_weekly deaths_weekly countriesAndTerritories \\\n",
+ "0 2021-01-18 2021-02 557 45 Afghanistan \n",
+ "1 2021-01-11 2021-01 675 71 Afghanistan \n",
+ "2 2021-01-04 2020-53 902 60 Afghanistan \n",
+ "3 2020-12-28 2020-52 1994 88 Afghanistan \n",
+ "4 2020-12-21 2020-51 740 111 Afghanistan \n",
+ "\n",
+ " geoId countryterritoryCode popData2019 continentExp \\\n",
+ "0 AF AFG 38041757.0 Asia \n",
+ "1 AF AFG 38041757.0 Asia \n",
+ "2 AF AFG 38041757.0 Asia \n",
+ "3 AF AFG 38041757.0 Asia \n",
+ "4 AF AFG 38041757.0 Asia \n",
+ "\n",
+ " notification_rate_per_100000_population_14-days \n",
+ "0 3.24 \n",
+ "1 4.15 \n",
+ "2 7.61 \n",
+ "3 7.19 \n",
+ "4 6.56 "
+ ]
+ },
+ "execution_count": 325,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 326,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',\n",
+ " 'countriesAndTerritories': 'country_name',\n",
+ " 'countryterritoryCode': 'country_territory_code',\n",
+ " 'popData2019': 'population_2019',\n",
+ " 'continentExp': 'continent'}, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 327,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['report_date', 'year_week', 'cases_weekly', 'deaths_weekly',\n",
+ " 'country_name', 'geo_id', 'country_territory_code', 'population_2019',\n",
+ " 'continent', 'notification_rate_per_100000_population_14-days'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 327,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 328,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "report_date datetime64[ns]\n",
+ "year_week object\n",
+ "cases_weekly int64\n",
+ "deaths_weekly int64\n",
+ "country_name object\n",
+ "geo_id object\n",
+ "country_territory_code object\n",
+ "population_2019 float64\n",
+ "continent object\n",
+ "notification_rate_per_100000_population_14-days float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 328,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 329,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>report_date</th>\n",
+ " <th>cases_weekly</th>\n",
+ " <th>deaths_weekly</th>\n",
+ " <th>geo_id</th>\n",
+ " <th>notification_rate_per_100000_population_14-days</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>2021-01-18</td>\n",
+ " <td>557</td>\n",
+ " <td>45</td>\n",
+ " <td>AF</td>\n",
+ " <td>3.24</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>2021-01-11</td>\n",
+ " <td>675</td>\n",
+ " <td>71</td>\n",
+ " <td>AF</td>\n",
+ " <td>4.15</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>2021-01-04</td>\n",
+ " <td>902</td>\n",
+ " <td>60</td>\n",
+ " <td>AF</td>\n",
+ " <td>7.61</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>2020-12-28</td>\n",
+ " <td>1994</td>\n",
+ " <td>88</td>\n",
+ " <td>AF</td>\n",
+ " <td>7.19</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>2020-12-21</td>\n",
+ " <td>740</td>\n",
+ " <td>111</td>\n",
+ " <td>AF</td>\n",
+ " <td>6.56</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>...</th>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10000</th>\n",
+ " <td>2020-04-20</td>\n",
+ " <td>11</td>\n",
+ " <td>0</td>\n",
+ " <td>ZW</td>\n",
+ " <td>0.11</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10001</th>\n",
+ " <td>2020-04-13</td>\n",
+ " <td>5</td>\n",
+ " <td>2</td>\n",
+ " <td>ZW</td>\n",
+ " <td>0.05</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10002</th>\n",
+ " <td>2020-04-06</td>\n",
+ " <td>2</td>\n",
+ " <td>0</td>\n",
+ " <td>ZW</td>\n",
+ " <td>0.05</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10003</th>\n",
+ " <td>2020-03-30</td>\n",
+ " <td>5</td>\n",
+ " <td>1</td>\n",
+ " <td>ZW</td>\n",
+ " <td>0.05</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10004</th>\n",
+ " <td>2020-03-23</td>\n",
+ " <td>2</td>\n",
+ " <td>0</td>\n",
+ " <td>ZW</td>\n",
+ " <td>0.00</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "<p>10005 rows × 5 columns</p>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " report_date cases_weekly deaths_weekly geo_id \\\n",
+ "0 2021-01-18 557 45 AF \n",
+ "1 2021-01-11 675 71 AF \n",
+ "2 2021-01-04 902 60 AF \n",
+ "3 2020-12-28 1994 88 AF \n",
+ "4 2020-12-21 740 111 AF \n",
+ "... ... ... ... ... \n",
+ "10000 2020-04-20 11 0 ZW \n",
+ "10001 2020-04-13 5 2 ZW \n",
+ "10002 2020-04-06 2 0 ZW \n",
+ "10003 2020-03-30 5 1 ZW \n",
+ "10004 2020-03-23 2 0 ZW \n",
+ "\n",
+ " notification_rate_per_100000_population_14-days \n",
+ "0 3.24 \n",
+ "1 4.15 \n",
+ "2 7.61 \n",
+ "3 7.19 \n",
+ "4 6.56 \n",
+ "... ... \n",
+ "10000 0.11 \n",
+ "10001 0.05 \n",
+ "10002 0.05 \n",
+ "10003 0.05 \n",
+ "10004 0.00 \n",
+ "\n",
+ "[10005 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 329,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 330,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>country_name</th>\n",
+ " <th>geo_id</th>\n",
+ " <th>country_territory_code</th>\n",
+ " <th>population_2019</th>\n",
+ " <th>continent</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>Afghanistan</td>\n",
+ " <td>AF</td>\n",
+ " <td>AFG</td>\n",
+ " <td>38041757.0</td>\n",
+ " <td>Asia</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>Afghanistan</td>\n",
+ " <td>AF</td>\n",
+ " <td>AFG</td>\n",
+ " <td>38041757.0</td>\n",
+ " <td>Asia</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>Afghanistan</td>\n",
+ " <td>AF</td>\n",
+ " <td>AFG</td>\n",
+ " <td>38041757.0</td>\n",
+ " <td>Asia</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>Afghanistan</td>\n",
+ " <td>AF</td>\n",
+ " <td>AFG</td>\n",
+ " <td>38041757.0</td>\n",
+ " <td>Asia</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>Afghanistan</td>\n",
+ " <td>AF</td>\n",
+ " <td>AFG</td>\n",
+ " <td>38041757.0</td>\n",
+ " <td>Asia</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>...</th>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10000</th>\n",
+ " <td>Zimbabwe</td>\n",
+ " <td>ZW</td>\n",
+ " <td>ZWE</td>\n",
+ " <td>14645473.0</td>\n",
+ " <td>Africa</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10001</th>\n",
+ " <td>Zimbabwe</td>\n",
+ " <td>ZW</td>\n",
+ " <td>ZWE</td>\n",
+ " <td>14645473.0</td>\n",
+ " <td>Africa</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10002</th>\n",
+ " <td>Zimbabwe</td>\n",
+ " <td>ZW</td>\n",
+ " <td>ZWE</td>\n",
+ " <td>14645473.0</td>\n",
+ " <td>Africa</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10003</th>\n",
+ " <td>Zimbabwe</td>\n",
+ " <td>ZW</td>\n",
+ " <td>ZWE</td>\n",
+ " <td>14645473.0</td>\n",
+ " <td>Africa</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10004</th>\n",
+ " <td>Zimbabwe</td>\n",
+ " <td>ZW</td>\n",
+ " <td>ZWE</td>\n",
+ " <td>14645473.0</td>\n",
+ " <td>Africa</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "<p>10005 rows × 5 columns</p>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " country_name geo_id country_territory_code population_2019 continent\n",
+ "0 Afghanistan AF AFG 38041757.0 Asia\n",
+ "1 Afghanistan AF AFG 38041757.0 Asia\n",
+ "2 Afghanistan AF AFG 38041757.0 Asia\n",
+ "3 Afghanistan AF AFG 38041757.0 Asia\n",
+ "4 Afghanistan AF AFG 38041757.0 Asia\n",
+ "... ... ... ... ... ...\n",
+ "10000 Zimbabwe ZW ZWE 14645473.0 Africa\n",
+ "10001 Zimbabwe ZW ZWE 14645473.0 Africa\n",
+ "10002 Zimbabwe ZW ZWE 14645473.0 Africa\n",
+ "10003 Zimbabwe ZW ZWE 14645473.0 Africa\n",
+ "10004 Zimbabwe ZW ZWE 14645473.0 Africa\n",
+ "\n",
+ "[10005 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 330,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data[['country_name', 'geo_id', 'country_territory_code',\n",
+ " 'population_2019', 'continent']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 331,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(\n",
+ " 'weekly_cases',\n",
+ " engine,\n",
+ " if_exists='replace',\n",
+ " index=False,\n",
+ " chunksize=500,\n",
+ " dtype={\n",
+ " \"report_date\": Date,\n",
+ " \"cases_weekly\": Integer,\n",
+ " \"deaths_weekly\": Integer,\n",
+ " \"geo_id\": String,\n",
+ " \"notification_rate_per_100000_population_14-days\": Float\n",
+ " }\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 332,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "raw_data[['country_name', 'geo_id', 'country_territory_code',\n",
+ " 'population_2019', 'continent']].drop_duplicates().to_sql(\n",
+ " 'countries',\n",
+ " engine,\n",
+ " if_exists='replace',\n",
+ " index=False,\n",
+ " chunksize=500,\n",
+ " dtype={\n",
+ " \"country_name\": Text,\n",
+ " \"geo_id\": String,\n",
+ " \"country_territory_code\": String,\n",
+ " \"population_2019\": Integer,\n",
+ " \"continent\": Text\n",
+ " }\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 333,
+ "metadata": {
+ "Collapsed": "false",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# %sql select geo_id from weekly_cases limit 10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 334,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# %%sql alter table weekly_cases add primary key (geo_id, report_date);\n",
+ "# alter table countries add primary key (geo_id);\n",
+ "# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);\n",
+ "# alter table weekly_cases add culm_cases integer;\n",
+ "# alter table weekly_cases add culm_deaths integer;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 335,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with engine.connect() as connection:\n",
+ " connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')\n",
+ " connection.execute('alter table countries add primary key (geo_id);')\n",
+ " connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')\n",
+ " connection.execute('alter table weekly_cases add culm_cases integer;')\n",
+ " connection.execute('alter table weekly_cases add culm_deaths integer;')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 336,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 337,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 338,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "10 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <thead>\n",
+ " <tr>\n",
+ " <th>report_date</th>\n",
+ " <th>cases_weekly</th>\n",
+ " <th>deaths_weekly</th>\n",
+ " <th>geo_id</th>\n",
+ " <th>notification_rate_per_100000_population_14-days</th>\n",
+ " <th>culm_cases</th>\n",
+ " <th>culm_deaths</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <td>2021-01-18</td>\n",
+ " <td>557</td>\n",
+ " <td>45</td>\n",
+ " <td>AF</td>\n",
+ " <td>3.24</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-11</td>\n",
+ " <td>675</td>\n",
+ " <td>71</td>\n",
+ " <td>AF</td>\n",
+ " <td>4.15</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-04</td>\n",
+ " <td>902</td>\n",
+ " <td>60</td>\n",
+ " <td>AF</td>\n",
+ " <td>7.61</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020-12-28</td>\n",
+ " <td>1994</td>\n",
+ " <td>88</td>\n",
+ " <td>AF</td>\n",
+ " <td>7.19</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020-12-21</td>\n",
+ " <td>740</td>\n",
+ " <td>111</td>\n",
+ " <td>AF</td>\n",
+ " <td>6.56</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020-12-14</td>\n",
+ " <td>1757</td>\n",
+ " <td>71</td>\n",
+ " <td>AF</td>\n",
+ " <td>9.01</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020-12-07</td>\n",
+ " <td>1672</td>\n",
+ " <td>137</td>\n",
+ " <td>AF</td>\n",
+ " <td>7.22</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020-11-30</td>\n",
+ " <td>1073</td>\n",
+ " <td>68</td>\n",
+ " <td>AF</td>\n",
+ " <td>6.42</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020-11-23</td>\n",
+ " <td>1368</td>\n",
+ " <td>69</td>\n",
+ " <td>AF</td>\n",
+ " <td>6.66</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2020-11-16</td>\n",
+ " <td>1164</td>\n",
+ " <td>61</td>\n",
+ " <td>AF</td>\n",
+ " <td>4.65</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(datetime.date(2021, 1, 18), 557, 45, 'AF', 3.24, None, None),\n",
+ " (datetime.date(2021, 1, 11), 675, 71, 'AF', 4.15, None, None),\n",
+ " (datetime.date(2021, 1, 4), 902, 60, 'AF', 7.61, None, None),\n",
+ " (datetime.date(2020, 12, 28), 1994, 88, 'AF', 7.19, None, None),\n",
+ " (datetime.date(2020, 12, 21), 740, 111, 'AF', 6.56, None, None),\n",
+ " (datetime.date(2020, 12, 14), 1757, 71, 'AF', 9.01, None, None),\n",
+ " (datetime.date(2020, 12, 7), 1672, 137, 'AF', 7.22, None, None),\n",
+ " (datetime.date(2020, 11, 30), 1073, 68, 'AF', 6.42, None, None),\n",
+ " (datetime.date(2020, 11, 23), 1368, 69, 'AF', 6.66, None, None),\n",
+ " (datetime.date(2020, 11, 16), 1164, 61, 'AF', 4.65, None, None)]"
+ ]
+ },
+ "execution_count": 338,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%sql select * from weekly_cases limit 10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 339,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# %%sql\n",
+ "# with culm as \n",
+ "# (select report_date, geo_id,\n",
+ "# sum(cases_weekly) over (partition by geo_id \n",
+ "# order by report_date) as culm_data\n",
+ "# from weekly_cases\n",
+ "# )\n",
+ "# update weekly_cases\n",
+ "# set culm_cases = culm_data\n",
+ "# from culm\n",
+ "# where weekly_cases.report_date = culm.report_date and\n",
+ "# weekly_cases.geo_id = culm.geo_id"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 340,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "query_string = '''with culm as \n",
+ " (select report_date, geo_id,\n",
+ " sum(cases_weekly) over (partition by geo_id \n",
+ " order by report_date) as culm_data\n",
+ " from weekly_cases\n",
+ " )\n",
+ "update weekly_cases\n",
+ " set culm_cases = culm_data\n",
+ " from culm\n",
+ " where weekly_cases.report_date = culm.report_date and\n",
+ " weekly_cases.geo_id = culm.geo_id'''\n",
+ "with engine.connect() as connection:\n",
+ " connection.execute(query_string)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 341,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# %%sql\n",
+ "# with culm as \n",
+ "# (select report_date, geo_id,\n",
+ "# sum(deaths_weekly) over (partition by geo_id \n",
+ "# order by report_date) as culm_data\n",
+ "# from weekly_cases\n",
+ "# )\n",
+ "# update weekly_cases\n",
+ "# set culm_deaths = culm_data\n",
+ "# from culm\n",
+ "# where weekly_cases.report_date = culm.report_date and\n",
+ "# weekly_cases.geo_id = culm.geo_id"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 342,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "query_string = '''with culm as \n",
+ " (select report_date, geo_id,\n",
+ " sum(deaths_weekly) over (partition by geo_id \n",
+ " order by report_date) as culm_data\n",
+ " from weekly_cases\n",
+ " )\n",
+ "update weekly_cases\n",
+ " set culm_deaths = culm_data\n",
+ " from culm\n",
+ " where weekly_cases.report_date = culm.report_date and\n",
+ " weekly_cases.geo_id = culm.geo_id'''\n",
+ "with engine.connect() as connection:\n",
+ " connection.execute(query_string)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 343,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " % Total % Received % Xferd Average Speed Time Time Time Current\n",
+ " Dload Upload Total Spent Left Speed\n",
+ "100 26053 100 26053 0 0 54277 0 --:--:-- --:--:-- --:--:-- 54277\n"
+ ]
+ }
+ ],
+ "source": [
+ "uk_query_string = (\n",
+ "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
+ "\"metric=covidOccupiedMVBeds&\"\n",
+ "\"metric=newAdmissions&\"\n",
+ "\"metric=newCasesBySpecimenDate&\"\n",
+ "\"metric=hospitalCases&\"\n",
+ "\"metric=newDeaths28DaysByPublishDate&\"\n",
+ "\"format=csv\"\n",
+ ")\n",
+ "\n",
+ "!curl \"$uk_query_string\" > uk_data.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 344,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " % Total % Received % Xferd Average Speed Time Time Time Current\n",
+ " Dload Upload Total Spent Left Speed\n",
+ "100 20223 100 20223 0 0 78688 0 --:--:-- --:--:-- --:--:-- 78383\n"
+ ]
+ }
+ ],
+ "source": [
+ "test_query_string = (\n",
+ "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
+ "\"metric=newPCRTestsByPublishDate&\"\n",
+ "\"metric=newTestsByPublishDate&\"\n",
+ "\"metric=newPillarOneTwoTestsByPublishDate&\"\n",
+ "\"format=csv\"\n",
+ ")\n",
+ "!curl \"$test_query_string\" > test_data.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 345,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# hospital_query_string = (\n",
+ "# \"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
+ "# \"metric=newAdmissions&\"\n",
+ "# \"format=csv\"\n",
+ "# )\n",
+ "# !curl \"$hospital_query_string\" > hospital_admissions.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 346,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# hospital_query_string = (\n",
+ "# \"https://api.coronavirus.data.gov.uk/v1/data?\"\n",
+ "# \"filters=areaName=United%2520Kingdom;areaType=overview&\"\n",
+ "# \"structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,\"\n",
+ "# \"%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv\"\n",
+ "# )\n",
+ " \n",
+ "# !curl \"$hospital_query_string\" | gunzip > hospital_admissions.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 347,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>date</th>\n",
+ " <th>areaType</th>\n",
+ " <th>areaCode</th>\n",
+ " <th>areaName</th>\n",
+ " <th>covidOccupiedMVBeds</th>\n",
+ " <th>newAdmissions</th>\n",
+ " <th>newCasesBySpecimenDate</th>\n",
+ " <th>hospitalCases</th>\n",
+ " <th>newDeaths28DaysByPublishDate</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>2021-01-26</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>1631</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>2021-01-25</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>4032.0</td>\n",
+ " <td>NaN</td>\n",
+ " <td>4482.0</td>\n",
+ " <td>NaN</td>\n",
+ " <td>592</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>2021-01-24</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>4077.0</td>\n",
+ " <td>NaN</td>\n",
+ " <td>14266.0</td>\n",
+ " <td>37561.0</td>\n",
+ " <td>610</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>2021-01-23</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>4066.0</td>\n",
+ " <td>NaN</td>\n",
+ " <td>20495.0</td>\n",
+ " <td>37266.0</td>\n",
+ " <td>1348</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>2021-01-22</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>4076.0</td>\n",
+ " <td>3341.0</td>\n",
+ " <td>29094.0</td>\n",
+ " <td>38144.0</td>\n",
+ " <td>1401</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>...</th>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>385</th>\n",
+ " <td>2020-01-06</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>386</th>\n",
+ " <td>2020-01-05</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>387</th>\n",
+ " <td>2020-01-04</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>388</th>\n",
+ " <td>2020-01-03</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>NaN</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>389</th>\n",
+ " <td>2020-06-14</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>393.0</td>\n",
+ " <td>364.0</td>\n",
+ " <td>821.0</td>\n",
+ " <td>4907.0</td>\n",
+ " <td>27</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "<p>390 rows × 9 columns</p>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " date areaType areaCode areaName covidOccupiedMVBeds \\\n",
+ "0 2021-01-26 overview K02000001 United Kingdom NaN \n",
+ "1 2021-01-25 overview K02000001 United Kingdom 4032.0 \n",
+ "2 2021-01-24 overview K02000001 United Kingdom 4077.0 \n",
+ "3 2021-01-23 overview K02000001 United Kingdom 4066.0 \n",
+ "4 2021-01-22 overview K02000001 United Kingdom 4076.0 \n",
+ ".. ... ... ... ... ... \n",
+ "385 2020-01-06 overview K02000001 United Kingdom NaN \n",
+ "386 2020-01-05 overview K02000001 United Kingdom NaN \n",
+ "387 2020-01-04 overview K02000001 United Kingdom NaN \n",
+ "388 2020-01-03 overview K02000001 United Kingdom NaN \n",
+ "389 2020-06-14 overview K02000001 United Kingdom 393.0 \n",
+ "\n",
+ " newAdmissions newCasesBySpecimenDate hospitalCases \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN 4482.0 NaN \n",
+ "2 NaN 14266.0 37561.0 \n",
+ "3 NaN 20495.0 37266.0 \n",
+ "4 3341.0 29094.0 38144.0 \n",
+ ".. ... ... ... \n",
+ "385 NaN NaN NaN \n",
+ "386 NaN NaN NaN \n",
+ "387 NaN NaN NaN \n",
+ "388 NaN NaN NaN \n",
+ "389 364.0 821.0 4907.0 \n",
+ "\n",
+ " newDeaths28DaysByPublishDate \n",
+ "0 1631 \n",
+ "1 592 \n",
+ "2 610 \n",
+ "3 1348 \n",
+ "4 1401 \n",
+ ".. ... \n",
+ "385 0 \n",
+ "386 0 \n",
+ "387 0 \n",
+ "388 0 \n",
+ "389 27 \n",
+ "\n",
+ "[390 rows x 9 columns]"
+ ]
+ },
+ "execution_count": 347,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "uk_data = pd.read_csv('uk_data.csv', \n",
+ " parse_dates=[0], dayfirst=True)\n",
+ "uk_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 348,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>date</th>\n",
+ " <th>areaType</th>\n",
+ " <th>areaCode</th>\n",
+ " <th>areaName</th>\n",
+ " <th>newPCRTestsByPublishDate</th>\n",
+ " <th>newTestsByPublishDate</th>\n",
+ " <th>newPillarOneTwoTestsByPublishDate</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>2021-01-25</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>196510.0</td>\n",
+ " <td>546734</td>\n",
+ " <td>531104</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>2021-01-24</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>262111.0</td>\n",
+ " <td>412775</td>\n",
+ " <td>394479</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>2021-01-23</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>389209.0</td>\n",
+ " <td>486425</td>\n",
+ " <td>465231</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>2021-01-22</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>401075.0</td>\n",
+ " <td>631901</td>\n",
+ " <td>608829</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>2021-01-21</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>439408.0</td>\n",
+ " <td>668989</td>\n",
+ " <td>644537</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>...</th>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>296</th>\n",
+ " <td>2020-04-03</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>NaN</td>\n",
+ " <td>14629</td>\n",
+ " <td>14293</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>297</th>\n",
+ " <td>2020-04-02</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>NaN</td>\n",
+ " <td>13623</td>\n",
+ " <td>13457</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>298</th>\n",
+ " <td>2020-04-01</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>NaN</td>\n",
+ " <td>11947</td>\n",
+ " <td>11924</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>299</th>\n",
+ " <td>2020-03-31</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>NaN</td>\n",
+ " <td>11896</td>\n",
+ " <td>11896</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>300</th>\n",
+ " <td>2020-06-14</td>\n",
+ " <td>overview</td>\n",
+ " <td>K02000001</td>\n",
+ " <td>United Kingdom</td>\n",
+ " <td>69529.0</td>\n",
+ " <td>83965</td>\n",
+ " <td>66688</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "<p>301 rows × 7 columns</p>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " date areaType areaCode areaName newPCRTestsByPublishDate \\\n",
+ "0 2021-01-25 overview K02000001 United Kingdom 196510.0 \n",
+ "1 2021-01-24 overview K02000001 United Kingdom 262111.0 \n",
+ "2 2021-01-23 overview K02000001 United Kingdom 389209.0 \n",
+ "3 2021-01-22 overview K02000001 United Kingdom 401075.0 \n",
+ "4 2021-01-21 overview K02000001 United Kingdom 439408.0 \n",
+ ".. ... ... ... ... ... \n",
+ "296 2020-04-03 overview K02000001 United Kingdom NaN \n",
+ "297 2020-04-02 overview K02000001 United Kingdom NaN \n",
+ "298 2020-04-01 overview K02000001 United Kingdom NaN \n",
+ "299 2020-03-31 overview K02000001 United Kingdom NaN \n",
+ "300 2020-06-14 overview K02000001 United Kingdom 69529.0 \n",
+ "\n",
+ " newTestsByPublishDate newPillarOneTwoTestsByPublishDate \n",
+ "0 546734 531104 \n",
+ "1 412775 394479 \n",
+ "2 486425 465231 \n",
+ "3 631901 608829 \n",
+ "4 668989 644537 \n",
+ ".. ... ... \n",
+ "296 14629 14293 \n",
+ "297 13623 13457 \n",
+ "298 11947 11924 \n",
+ "299 11896 11896 \n",
+ "300 83965 66688 \n",
+ "\n",
+ "[301 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 348,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_data = pd.read_csv('test_data.csv', \n",
+ " parse_dates=[0], dayfirst=True)\n",
+ "test_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 349,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['date', 'areaType', 'areaCode', 'areaName', 'newPCRTestsByPublishDate',\n",
+ " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 349,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_data.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 350,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',\n",
+ " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 351,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['date', 'areaType', 'areaCode', 'areaName', 'covidOccupiedMVBeds',\n",
+ " 'newAdmissions', 'newCasesBySpecimenDate', 'hospitalCases',\n",
+ " 'newDeaths28DaysByPublishDate', 'newPCRTestsByPublishDate',\n",
+ " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 351,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "uk_data.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 352,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "uk_data.rename(\n",
+ " columns={\n",
+ " 'covidOccupiedMVBeds': 'ventilator_beds',\n",
+ " 'newCasesBySpecimenDate': 'new_cases',\n",
+ " 'hospitalCases': 'hospital_cases', \n",
+ " 'newDeaths28DaysByPublishDate': 'new_deaths',\n",
+ " 'newAdmissions': 'new_admissions',\n",
+ " 'newPCRTestsByPublishDate': 'new_pcr_tests',\n",
+ " 'newTestsByPublishDate': 'new_tests',\n",
+ " 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'\n",
+ " }, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 353,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "date datetime64[ns]\n",
+ "areaType object\n",
+ "areaCode object\n",
+ "areaName object\n",
+ "ventilator_beds float64\n",
+ "new_admissions float64\n",
+ "new_cases float64\n",
+ "hospital_cases float64\n",
+ "new_deaths int64\n",
+ "new_pcr_tests float64\n",
+ "new_tests float64\n",
+ "new_pillar_1_2_tests float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 353,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "uk_data.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 354,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['date', 'areaType', 'areaCode', 'areaName', 'ventilator_beds',\n",
+ " 'new_admissions', 'new_cases', 'hospital_cases', 'new_deaths',\n",
+ " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 354,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " uk_data.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 355,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "uk_data[['date', \n",
+ " 'hospital_cases', 'ventilator_beds',\n",
+ " 'new_cases', 'new_deaths', \n",
+ " 'hospital_cases', 'new_admissions',\n",
+ " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'\n",
+ " ]].to_sql(\n",
+ " 'uk_data',\n",
+ " engine,\n",
+ " if_exists='replace',\n",
+ " index=False,\n",
+ " chunksize=500,\n",
+ " dtype={\n",
+ " \"date\": Date,\n",
+ " \"hospital_cases\": Integer,\n",
+ " \"ventilator_beds\": Integer,\n",
+ " \"new_cases\": Integer,\n",
+ " \"hospital_cases\": Integer,\n",
+ " \"new_deaths\": Integer,\n",
+ " \"new_admissions\": Integer,\n",
+ " 'new_pcr_tests': Integer, \n",
+ " 'new_tests': Integer, \n",
+ " 'new_pillar_1_2_tests': Integer\n",
+ " }\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 356,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# %sql select * from uk_data order by date desc limit 10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 424,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "query_string = '''drop table if exists uk_data_7;\n",
+ "create table uk_data_7 \n",
+ "(date date primary key,\n",
+ " hospital_cases real,\n",
+ " ventilator_beds real,\n",
+ " new_cases real,\n",
+ " new_deaths real,\n",
+ " new_admissions real,\n",
+ " new_pcr_tests real,\n",
+ " new_tests real,\n",
+ " new_pillar_1_2_tests real\n",
+ ");'''\n",
+ "\n",
+ "with engine.connect() as connection:\n",
+ " connection.execute(query_string)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 425,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "update_string = '''with ownd as (\n",
+ " select date,\n",
+ " avg(hospital_cases) over wnd as w_hospital_cases,\n",
+ " avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
+ " avg(new_cases) over wnd as w_new_cases,\n",
+ " avg(new_deaths) over wnd as w_new_deaths,\n",
+ " avg(new_admissions) over wnd as w_new_admissions,\n",
+ " avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
+ " avg(new_tests) over wnd as w_new_tests,\n",
+ " avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
+ " count(*) over wnd as w_size\n",
+ " from uk_data\n",
+ " window wnd as (\n",
+ " order by uk_data.date\n",
+ " rows between 3 preceding and 3 following\n",
+ " )\n",
+ ")\n",
+ "insert into uk_data_7(date, \n",
+ " hospital_cases, \n",
+ " ventilator_beds, \n",
+ " new_cases,\n",
+ " new_deaths,\n",
+ " new_admissions, \n",
+ " new_pcr_tests, \n",
+ " new_tests, \n",
+ " new_pillar_1_2_tests\n",
+ " )\n",
+ "(select date,\n",
+ " w_hospital_cases,\n",
+ " w_ventilator_beds,\n",
+ " w_new_cases,\n",
+ " w_new_deaths,\n",
+ " w_new_admissions,\n",
+ " w_new_pcr_tests,\n",
+ " w_new_tests,\n",
+ " w_new_pillar_1_2_tests\n",
+ " from ownd\n",
+ " where w_size = 7\n",
+ ")'''\n",
+ "with engine.connect() as connection:\n",
+ " connection.execute(update_string)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 398,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)\n",
+ "# values (\n",
+ "# select date, \n",
+ "# avg(ventilator_beds) over (order by date rows between 6 preceding and current row)\n",
+ "# from uk_data\n",
+ "# )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 399,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# query_string = '''insert into uk_data_7(date, hospital_cases)\n",
+ "# select uk_data.date, \n",
+ "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases\n",
+ "# from uk_data'''\n",
+ "# with engine.connect() as connection:\n",
+ "# connection.execute(query_string) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 360,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# %%sql \n",
+ "# with m7 as \n",
+ "# (select uk_data.date as date7, \n",
+ "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# update uk_data_7\n",
+ "# set ventilator_beds = nc7\n",
+ "# from m7\n",
+ "# where uk_data_7.date = m7.date7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 361,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "390 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 361,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with m7 as \n",
+ "# (select uk_data.date as date7, \n",
+ "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# update uk_data_7\n",
+ "# set ventilator_beds = nc7\n",
+ "# from m7\n",
+ "# where uk_data_7.date = m7.date7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 362,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "390 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 362,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with m7 as \n",
+ "# (select uk_data.date as date7, \n",
+ "# avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# update uk_data_7\n",
+ "# set new_cases = nc7\n",
+ "# from m7\n",
+ "# where uk_data_7.date = m7.date7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 363,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "390 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 363,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with m7 as \n",
+ "# (select uk_data.date as date7, \n",
+ "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# update uk_data_7\n",
+ "# set hospital_cases = d7\n",
+ "# from m7\n",
+ "# where uk_data_7.date = m7.date7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 364,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "390 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 364,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with m7 as \n",
+ "# (select uk_data.date as date7, \n",
+ "# avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# update uk_data_7\n",
+ "# set new_deaths = d7\n",
+ "# from m7\n",
+ "# where uk_data_7.date = m7.date7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 365,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "390 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 365,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with m7 as \n",
+ "# (select uk_data.date as date7, \n",
+ "# avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# update uk_data_7\n",
+ "# set new_admissions = d7\n",
+ "# from m7\n",
+ "# where uk_data_7.date = m7.date7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 366,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "390 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 366,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with m7 as \n",
+ "# (select uk_data.date as date7, \n",
+ "# avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# update uk_data_7\n",
+ "# set new_pcr_tests = d7\n",
+ "# from m7\n",
+ "# where uk_data_7.date = m7.date7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 367,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "390 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 367,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with m7 as \n",
+ "# (select uk_data.date as date7, \n",
+ "# avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# update uk_data_7\n",
+ "# set new_tests = d7\n",
+ "# from m7\n",
+ "# where uk_data_7.date = m7.date7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 309,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# %%sql\n",
+ "# with m7 as \n",
+ "# (select uk_data.date as date7, \n",
+ "# avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# update uk_data_7\n",
+ "# set new_pillar_1_2_tests = d7\n",
+ "# from m7\n",
+ "# where uk_data_7.date = m7.date7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 310,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "0 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 310,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with wnd as\n",
+ "# ( select date, \n",
+ "# avg(new_pillar_1_2_tests)\n",
+ "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# update uk_data_7\n",
+ "# set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests\n",
+ "# from wnd\n",
+ "# where uk_data_7.date = wnd.date\n",
+ "# and (select count(*) from wnd) = 7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 379,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "10 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <thead>\n",
+ " <tr>\n",
+ " <th>date</th>\n",
+ " <th>new_pillar_1_2_tests</th>\n",
+ " <th>a_new_pillar_1_2_tests</th>\n",
+ " <th>a_count</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <td>2021-01-26</td>\n",
+ " <td>None</td>\n",
+ " <td>463604.666666666667</td>\n",
+ " <td>3</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-25</td>\n",
+ " <td>None</td>\n",
+ " <td>499910.750000000000</td>\n",
+ " <td>4</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-24</td>\n",
+ " <td>None</td>\n",
+ " <td>528836.000000000000</td>\n",
+ " <td>5</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-23</td>\n",
+ " <td>None</td>\n",
+ " <td>543566.833333333333</td>\n",
+ " <td>6</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-22</td>\n",
+ " <td>None</td>\n",
+ " <td>545780.285714285714</td>\n",
+ " <td>7</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-21</td>\n",
+ " <td>None</td>\n",
+ " <td>546626.571428571429</td>\n",
+ " <td>7</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-20</td>\n",
+ " <td>None</td>\n",
+ " <td>546806.285714285714</td>\n",
+ " <td>7</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-19</td>\n",
+ " <td>None</td>\n",
+ " <td>547402.428571428571</td>\n",
+ " <td>7</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-18</td>\n",
+ " <td>None</td>\n",
+ " <td>542617.857142857143</td>\n",
+ " <td>7</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-17</td>\n",
+ " <td>None</td>\n",
+ " <td>545088.142857142857</td>\n",
+ " <td>7</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(datetime.date(2021, 1, 26), None, Decimal('463604.666666666667'), 3),\n",
+ " (datetime.date(2021, 1, 25), None, Decimal('499910.750000000000'), 4),\n",
+ " (datetime.date(2021, 1, 24), None, Decimal('528836.000000000000'), 5),\n",
+ " (datetime.date(2021, 1, 23), None, Decimal('543566.833333333333'), 6),\n",
+ " (datetime.date(2021, 1, 22), None, Decimal('545780.285714285714'), 7),\n",
+ " (datetime.date(2021, 1, 21), None, Decimal('546626.571428571429'), 7),\n",
+ " (datetime.date(2021, 1, 20), None, Decimal('546806.285714285714'), 7),\n",
+ " (datetime.date(2021, 1, 19), None, Decimal('547402.428571428571'), 7),\n",
+ " (datetime.date(2021, 1, 18), None, Decimal('542617.857142857143'), 7),\n",
+ " (datetime.date(2021, 1, 17), None, Decimal('545088.142857142857'), 7)]"
+ ]
+ },
+ "execution_count": 379,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with wnd as\n",
+ "# ( select date, \n",
+ "# avg(new_pillar_1_2_tests)\n",
+ "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,\n",
+ "# count(new_pillar_1_2_tests)\n",
+ "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_count\n",
+ "# from uk_data\n",
+ "# )\n",
+ "# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count\n",
+ "# from uk_data_7, wnd\n",
+ "# where uk_data_7.date = wnd.date\n",
+ "# order by uk_data_7.date desc limit 10\n",
+ " \n",
+ "# select date, \n",
+ "# count(*) over wnd as w_size\n",
+ "# from uk_data\n",
+ "# window wnd as (\n",
+ "# order by uk_data.date\n",
+ "# rows between 3 preceding and 3 following\n",
+ "# );"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 401,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": [
+ "# %%sql\n",
+ "# select date, \n",
+ "# count(*) over wnd as w_size\n",
+ "# from uk_data\n",
+ "# window wnd as (\n",
+ "# order by uk_data.date\n",
+ "# rows between 3 preceding and 3 following\n",
+ "# )\n",
+ "# order by date desc limit 10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 407,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "0 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 407,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with ownd as (\n",
+ "# select date,\n",
+ "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
+ "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
+ "# avg(new_cases) over wnd as w_new_cases,\n",
+ "# avg(new_deaths) over wnd as w_new_deaths,\n",
+ "# avg(new_admissions) over wnd as w_new_admissions,\n",
+ "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
+ "# avg(new_tests) over wnd as w_new_tests,\n",
+ "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
+ "# count(*) over wnd as w_size\n",
+ "# from uk_data\n",
+ "# window wnd as (\n",
+ "# order by uk_data.date\n",
+ "# rows between 3 preceding and 3 following\n",
+ "# ))\n",
+ "# insert into uk_data_7(date, \n",
+ "# hospital_cases, \n",
+ "# ventilator_beds, \n",
+ "# new_cases,\n",
+ "# new_deaths,\n",
+ "# new_admissions, \n",
+ "# new_pcr_tests, \n",
+ "# new_tests, \n",
+ "# new_pillar_1_2_tests\n",
+ "# )\n",
+ "# (select date,\n",
+ "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
+ "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
+ "# avg(new_cases) over wnd as w_new_cases,\n",
+ "# avg(new_deaths) over wnd as w_new_deaths,\n",
+ "# avg(new_admissions) over wnd as w_new_admissions,\n",
+ "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
+ "# avg(new_tests) over wnd as w_new_tests,\n",
+ "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
+ "# count(*) over wnd as w_size\n",
+ "# from uk_data\n",
+ "# window wnd as (\n",
+ "# order by uk_data.date\n",
+ "# rows between 3 preceding and 3 following\n",
+ "# )\n",
+ "# )\n",
+ "# set date = ownd.date,\n",
+ "# hospital_cases = w_hospital_cases,\n",
+ "# ventilator_beds = w_ventilator_beds,\n",
+ "# new_cases = w_new_cases,\n",
+ "# new_deaths = w_new_deaths,\n",
+ "# new_admissions = w_new_admissions,\n",
+ "# new_pcr_tests = w_new_pcr_tests,\n",
+ "# new_tests = w_new_tests,\n",
+ "# new_pillar_1_2_tests = w_new_pillar_1_2_tests\n",
+ "# from ownd\n",
+ "# where w_size = 7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 417,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "384 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 417,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# %%sql\n",
+ "# with ownd as (\n",
+ "# select date,\n",
+ "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
+ "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
+ "# avg(new_cases) over wnd as w_new_cases,\n",
+ "# avg(new_deaths) over wnd as w_new_deaths,\n",
+ "# avg(new_admissions) over wnd as w_new_admissions,\n",
+ "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
+ "# avg(new_tests) over wnd as w_new_tests,\n",
+ "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
+ "# count(*) over wnd as w_size\n",
+ "# from uk_data\n",
+ "# window wnd as (\n",
+ "# order by uk_data.date\n",
+ "# rows between 3 preceding and 3 following\n",
+ "# )\n",
+ "# )\n",
+ "# insert into uk_data_7(date, \n",
+ "# hospital_cases, \n",
+ "# ventilator_beds, \n",
+ "# new_cases,\n",
+ "# new_deaths,\n",
+ "# new_admissions, \n",
+ "# new_pcr_tests, \n",
+ "# new_tests, \n",
+ "# new_pillar_1_2_tests\n",
+ "# )\n",
+ "# (select date,\n",
+ "# w_hospital_cases,\n",
+ "# w_ventilator_beds,\n",
+ "# w_new_cases,\n",
+ "# w_new_deaths,\n",
+ "# w_new_admissions,\n",
+ "# w_new_pcr_tests,\n",
+ "# w_new_tests,\n",
+ "# w_new_pillar_1_2_tests\n",
+ "# from ownd\n",
+ "# where w_size = 7\n",
+ "# )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 427,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "10 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <thead>\n",
+ " <tr>\n",
+ " <th>date</th>\n",
+ " <th>hospital_cases</th>\n",
+ " <th>ventilator_beds</th>\n",
+ " <th>new_cases</th>\n",
+ " <th>new_deaths</th>\n",
+ " <th>new_admissions</th>\n",
+ " <th>new_pcr_tests</th>\n",
+ " <th>new_tests</th>\n",
+ " <th>new_pillar_1_2_tests</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <td>2021-01-23</td>\n",
+ " <td>37915.6</td>\n",
+ " <td>4027.3333</td>\n",
+ " <td>22463.666</td>\n",
+ " <td>1241.7142</td>\n",
+ " <td>3651.6667</td>\n",
+ " <td>353735.34</td>\n",
+ " <td>565312.3</td>\n",
+ " <td>543566.8</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-22</td>\n",
+ " <td>38057.168</td>\n",
+ " <td>4015.8572</td>\n",
+ " <td>24870.428</td>\n",
+ " <td>1238.7142</td>\n",
+ " <td>3771.75</td>\n",
+ " <td>354323.16</td>\n",
+ " <td>567830.3</td>\n",
+ " <td>545780.3</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-21</td>\n",
+ " <td>38217.715</td>\n",
+ " <td>3999.2856</td>\n",
+ " <td>30620.428</td>\n",
+ " <td>1239.7142</td>\n",
+ " <td>3828.2</td>\n",
+ " <td>363750.16</td>\n",
+ " <td>569716.7</td>\n",
+ " <td>546626.56</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-20</td>\n",
+ " <td>38294.0</td>\n",
+ " <td>3969.8572</td>\n",
+ " <td>32707.428</td>\n",
+ " <td>1248.4286</td>\n",
+ " <td>3811.0</td>\n",
+ " <td>372037.44</td>\n",
+ " <td>570458.1</td>\n",
+ " <td>546806.3</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-19</td>\n",
+ " <td>38348.43</td>\n",
+ " <td>3939.5715</td>\n",
+ " <td>34143.285</td>\n",
+ " <td>1240.8572</td>\n",
+ " <td>3825.8572</td>\n",
+ " <td>374771.16</td>\n",
+ " <td>571336.56</td>\n",
+ " <td>547402.44</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-18</td>\n",
+ " <td>38338.145</td>\n",
+ " <td>3898.5715</td>\n",
+ " <td>35766.57</td>\n",
+ " <td>1223.5714</td>\n",
+ " <td>3872.8572</td>\n",
+ " <td>377019.28</td>\n",
+ " <td>566901.1</td>\n",
+ " <td>542617.9</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-17</td>\n",
+ " <td>38292.285</td>\n",
+ " <td>3857.4285</td>\n",
+ " <td>37337.855</td>\n",
+ " <td>1217.5714</td>\n",
+ " <td>3939.0</td>\n",
+ " <td>389469.84</td>\n",
+ " <td>571242.56</td>\n",
+ " <td>545088.1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-16</td>\n",
+ " <td>38119.855</td>\n",
+ " <td>3810.7144</td>\n",
+ " <td>38824.0</td>\n",
+ " <td>1181.0</td>\n",
+ " <td>3967.2856</td>\n",
+ " <td>395066.72</td>\n",
+ " <td>569445.0</td>\n",
+ " <td>541938.0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-15</td>\n",
+ " <td>37858.715</td>\n",
+ " <td>3746.2856</td>\n",
+ " <td>40268.57</td>\n",
+ " <td>1128.5714</td>\n",
+ " <td>4029.2856</td>\n",
+ " <td>403526.72</td>\n",
+ " <td>570188.7</td>\n",
+ " <td>540199.9</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-14</td>\n",
+ " <td>37505.57</td>\n",
+ " <td>3667.2856</td>\n",
+ " <td>41989.145</td>\n",
+ " <td>1118.5714</td>\n",
+ " <td>4065.2856</td>\n",
+ " <td>412496.44</td>\n",
+ " <td>567178.7</td>\n",
+ " <td>537417.1</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(datetime.date(2021, 1, 23), 37915.6, 4027.3333, 22463.666, 1241.7142, 3651.6667, 353735.34, 565312.3, 543566.8),\n",
+ " (datetime.date(2021, 1, 22), 38057.168, 4015.8572, 24870.428, 1238.7142, 3771.75, 354323.16, 567830.3, 545780.3),\n",
+ " (datetime.date(2021, 1, 21), 38217.715, 3999.2856, 30620.428, 1239.7142, 3828.2, 363750.16, 569716.7, 546626.56),\n",
+ " (datetime.date(2021, 1, 20), 38294.0, 3969.8572, 32707.428, 1248.4286, 3811.0, 372037.44, 570458.1, 546806.3),\n",
+ " (datetime.date(2021, 1, 19), 38348.43, 3939.5715, 34143.285, 1240.8572, 3825.8572, 374771.16, 571336.56, 547402.44),\n",
+ " (datetime.date(2021, 1, 18), 38338.145, 3898.5715, 35766.57, 1223.5714, 3872.8572, 377019.28, 566901.1, 542617.9),\n",
+ " (datetime.date(2021, 1, 17), 38292.285, 3857.4285, 37337.855, 1217.5714, 3939.0, 389469.84, 571242.56, 545088.1),\n",
+ " (datetime.date(2021, 1, 16), 38119.855, 3810.7144, 38824.0, 1181.0, 3967.2856, 395066.72, 569445.0, 541938.0),\n",
+ " (datetime.date(2021, 1, 15), 37858.715, 3746.2856, 40268.57, 1128.5714, 4029.2856, 403526.72, 570188.7, 540199.9),\n",
+ " (datetime.date(2021, 1, 14), 37505.57, 3667.2856, 41989.145, 1118.5714, 4065.2856, 412496.44, 567178.7, 537417.1)]"
+ ]
+ },
+ "execution_count": 427,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%sql select * from uk_data_7 order by date desc limit 10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 428,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " * postgresql://covid:***@localhost/covid\n",
+ "10 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <thead>\n",
+ " <tr>\n",
+ " <th>date</th>\n",
+ " <th>hospital_cases</th>\n",
+ " <th>ventilator_beds</th>\n",
+ " <th>new_cases</th>\n",
+ " <th>new_deaths</th>\n",
+ " <th>new_admissions</th>\n",
+ " <th>new_pcr_tests</th>\n",
+ " <th>new_tests</th>\n",
+ " <th>new_pillar_1_2_tests</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <td>2021-01-26</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " <td>1631</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " <td>None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-25</td>\n",
+ " <td>None</td>\n",
+ " <td>4032</td>\n",
+ " <td>4482</td>\n",
+ " <td>592</td>\n",
+ " <td>None</td>\n",
+ " <td>196510</td>\n",
+ " <td>546734</td>\n",
+ " <td>531104</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-24</td>\n",
+ " <td>37561</td>\n",
+ " <td>4077</td>\n",
+ " <td>14266</td>\n",
+ " <td>610</td>\n",
+ " <td>None</td>\n",
+ " <td>262111</td>\n",
+ " <td>412775</td>\n",
+ " <td>394479</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-23</td>\n",
+ " <td>37266</td>\n",
+ " <td>4066</td>\n",
+ " <td>20495</td>\n",
+ " <td>1348</td>\n",
+ " <td>None</td>\n",
+ " <td>389209</td>\n",
+ " <td>486425</td>\n",
+ " <td>465231</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-22</td>\n",
+ " <td>38144</td>\n",
+ " <td>4076</td>\n",
+ " <td>29094</td>\n",
+ " <td>1401</td>\n",
+ " <td>3341</td>\n",
+ " <td>401075</td>\n",
+ " <td>631901</td>\n",
+ " <td>608829</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-21</td>\n",
+ " <td>37957</td>\n",
+ " <td>3960</td>\n",
+ " <td>31430</td>\n",
+ " <td>1290</td>\n",
+ " <td>3598</td>\n",
+ " <td>439408</td>\n",
+ " <td>668989</td>\n",
+ " <td>644537</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-20</td>\n",
+ " <td>38650</td>\n",
+ " <td>3953</td>\n",
+ " <td>35015</td>\n",
+ " <td>1820</td>\n",
+ " <td>4016</td>\n",
+ " <td>434099</td>\n",
+ " <td>645050</td>\n",
+ " <td>617221</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-19</td>\n",
+ " <td>38765</td>\n",
+ " <td>3947</td>\n",
+ " <td>39311</td>\n",
+ " <td>1610</td>\n",
+ " <td>4132</td>\n",
+ " <td>357850</td>\n",
+ " <td>582938</td>\n",
+ " <td>559061</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-18</td>\n",
+ " <td>39181</td>\n",
+ " <td>3916</td>\n",
+ " <td>44732</td>\n",
+ " <td>599</td>\n",
+ " <td>4054</td>\n",
+ " <td>262499</td>\n",
+ " <td>559939</td>\n",
+ " <td>537028</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2021-01-17</td>\n",
+ " <td>38095</td>\n",
+ " <td>3871</td>\n",
+ " <td>28875</td>\n",
+ " <td>671</td>\n",
+ " <td>3725</td>\n",
+ " <td>320122</td>\n",
+ " <td>417965</td>\n",
+ " <td>395737</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(datetime.date(2021, 1, 26), None, None, None, 1631, None, None, None, None),\n",
+ " (datetime.date(2021, 1, 25), None, 4032, 4482, 592, None, 196510, 546734, 531104),\n",
+ " (datetime.date(2021, 1, 24), 37561, 4077, 14266, 610, None, 262111, 412775, 394479),\n",
+ " (datetime.date(2021, 1, 23), 37266, 4066, 20495, 1348, None, 389209, 486425, 465231),\n",
+ " (datetime.date(2021, 1, 22), 38144, 4076, 29094, 1401, 3341, 401075, 631901, 608829),\n",
+ " (datetime.date(2021, 1, 21), 37957, 3960, 31430, 1290, 3598, 439408, 668989, 644537),\n",
+ " (datetime.date(2021, 1, 20), 38650, 3953, 35015, 1820, 4016, 434099, 645050, 617221),\n",
+ " (datetime.date(2021, 1, 19), 38765, 3947, 39311, 1610, 4132, 357850, 582938, 559061),\n",
+ " (datetime.date(2021, 1, 18), 39181, 3916, 44732, 599, 4054, 262499, 559939, 537028),\n",
+ " (datetime.date(2021, 1, 17), 38095, 3871, 28875, 671, 3725, 320122, 417965, 395737)]"
+ ]
+ },
+ "execution_count": 428,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%sql select * from uk_data order by date desc limit 10"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "Collapsed": "false"
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "jupytext": {
+ "formats": "ipynb,md"
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}