+++ /dev/null
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "Collapsed": "false"
- },
- "source": [
- "Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 314,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float\n",
- "from sqlalchemy import create_engine"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 315,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "import itertools\n",
- "import collections\n",
- "import json\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "from scipy.stats import gmean\n",
- "import datetime\n",
- "\n",
- "import matplotlib as mpl\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 316,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The sql extension is already loaded. To reload it, use:\n",
- " %reload_ext sql\n"
- ]
- }
- ],
- "source": [
- "%load_ext sql"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 317,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 318,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'Connected: covid@covid'"
- ]
- },
- "execution_count": 318,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%sql $connection_string"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 319,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "eng = create_engine(connection_string)\n",
- "engine = eng.execution_options(isolation_level=\"AUTOCOMMIT\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 320,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "DEATH_COUNT_THRESHOLD = 10\n",
- "COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()\n",
- "COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()\n",
- "COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()\n",
- "# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()\n",
- "\n",
- "COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA\n",
- " 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',\n",
- " 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',\n",
- " 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']\n",
- "COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))\n",
- "COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 321,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " % Total % Received % Xferd Average Speed Time Time Time Current\n",
- " Dload Upload Total Spent Left Speed\n",
- "100 623k 100 623k 0 0 927k 0 --:--:-- --:--:-- --:--:-- 925k\n"
- ]
- }
- ],
- "source": [
- "!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 322,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# First col is a date, treat geoId of NA as 'Namibia', not \"NA\" value\n",
- "raw_data = pd.read_csv('covid.csv', \n",
- " parse_dates=[0], dayfirst=True,\n",
- " keep_default_na=False, na_values = [''],\n",
- "# dtype = {'day': np.int64, \n",
- "# 'month': np.int64, \n",
- "# 'year': np.int64, \n",
- "# 'cases': np.int64, \n",
- "# 'deaths': np.int64, \n",
- "# 'countriesAndTerritories': str, \n",
- "# 'geoId': str, \n",
- "# 'countryterritoryCode': str, \n",
- "# 'popData2019': np.int64, \n",
- "# 'continentExp': str, \n",
- "# }\n",
- " )"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 323,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "100050"
- ]
- },
- "execution_count": 323,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "raw_data.size"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 324,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "raw_data.fillna(0, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 325,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>dateRep</th>\n",
- " <th>year_week</th>\n",
- " <th>cases_weekly</th>\n",
- " <th>deaths_weekly</th>\n",
- " <th>countriesAndTerritories</th>\n",
- " <th>geoId</th>\n",
- " <th>countryterritoryCode</th>\n",
- " <th>popData2019</th>\n",
- " <th>continentExp</th>\n",
- " <th>notification_rate_per_100000_population_14-days</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>2021-01-18</td>\n",
- " <td>2021-02</td>\n",
- " <td>557</td>\n",
- " <td>45</td>\n",
- " <td>Afghanistan</td>\n",
- " <td>AF</td>\n",
- " <td>AFG</td>\n",
- " <td>38041757.0</td>\n",
- " <td>Asia</td>\n",
- " <td>3.24</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>2021-01-11</td>\n",
- " <td>2021-01</td>\n",
- " <td>675</td>\n",
- " <td>71</td>\n",
- " <td>Afghanistan</td>\n",
- " <td>AF</td>\n",
- " <td>AFG</td>\n",
- " <td>38041757.0</td>\n",
- " <td>Asia</td>\n",
- " <td>4.15</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>2021-01-04</td>\n",
- " <td>2020-53</td>\n",
- " <td>902</td>\n",
- " <td>60</td>\n",
- " <td>Afghanistan</td>\n",
- " <td>AF</td>\n",
- " <td>AFG</td>\n",
- " <td>38041757.0</td>\n",
- " <td>Asia</td>\n",
- " <td>7.61</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>2020-12-28</td>\n",
- " <td>2020-52</td>\n",
- " <td>1994</td>\n",
- " <td>88</td>\n",
- " <td>Afghanistan</td>\n",
- " <td>AF</td>\n",
- " <td>AFG</td>\n",
- " <td>38041757.0</td>\n",
- " <td>Asia</td>\n",
- " <td>7.19</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>2020-12-21</td>\n",
- " <td>2020-51</td>\n",
- " <td>740</td>\n",
- " <td>111</td>\n",
- " <td>Afghanistan</td>\n",
- " <td>AF</td>\n",
- " <td>AFG</td>\n",
- " <td>38041757.0</td>\n",
- " <td>Asia</td>\n",
- " <td>6.56</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " dateRep year_week cases_weekly deaths_weekly countriesAndTerritories \\\n",
- "0 2021-01-18 2021-02 557 45 Afghanistan \n",
- "1 2021-01-11 2021-01 675 71 Afghanistan \n",
- "2 2021-01-04 2020-53 902 60 Afghanistan \n",
- "3 2020-12-28 2020-52 1994 88 Afghanistan \n",
- "4 2020-12-21 2020-51 740 111 Afghanistan \n",
- "\n",
- " geoId countryterritoryCode popData2019 continentExp \\\n",
- "0 AF AFG 38041757.0 Asia \n",
- "1 AF AFG 38041757.0 Asia \n",
- "2 AF AFG 38041757.0 Asia \n",
- "3 AF AFG 38041757.0 Asia \n",
- "4 AF AFG 38041757.0 Asia \n",
- "\n",
- " notification_rate_per_100000_population_14-days \n",
- "0 3.24 \n",
- "1 4.15 \n",
- "2 7.61 \n",
- "3 7.19 \n",
- "4 6.56 "
- ]
- },
- "execution_count": 325,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "raw_data.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 326,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',\n",
- " 'countriesAndTerritories': 'country_name',\n",
- " 'countryterritoryCode': 'country_territory_code',\n",
- " 'popData2019': 'population_2019',\n",
- " 'continentExp': 'continent'}, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 327,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['report_date', 'year_week', 'cases_weekly', 'deaths_weekly',\n",
- " 'country_name', 'geo_id', 'country_territory_code', 'population_2019',\n",
- " 'continent', 'notification_rate_per_100000_population_14-days'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 327,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "raw_data.columns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 328,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "report_date datetime64[ns]\n",
- "year_week object\n",
- "cases_weekly int64\n",
- "deaths_weekly int64\n",
- "country_name object\n",
- "geo_id object\n",
- "country_territory_code object\n",
- "population_2019 float64\n",
- "continent object\n",
- "notification_rate_per_100000_population_14-days float64\n",
- "dtype: object"
- ]
- },
- "execution_count": 328,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "raw_data.dtypes"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 329,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>report_date</th>\n",
- " <th>cases_weekly</th>\n",
- " <th>deaths_weekly</th>\n",
- " <th>geo_id</th>\n",
- " <th>notification_rate_per_100000_population_14-days</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>2021-01-18</td>\n",
- " <td>557</td>\n",
- " <td>45</td>\n",
- " <td>AF</td>\n",
- " <td>3.24</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>2021-01-11</td>\n",
- " <td>675</td>\n",
- " <td>71</td>\n",
- " <td>AF</td>\n",
- " <td>4.15</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>2021-01-04</td>\n",
- " <td>902</td>\n",
- " <td>60</td>\n",
- " <td>AF</td>\n",
- " <td>7.61</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>2020-12-28</td>\n",
- " <td>1994</td>\n",
- " <td>88</td>\n",
- " <td>AF</td>\n",
- " <td>7.19</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>2020-12-21</td>\n",
- " <td>740</td>\n",
- " <td>111</td>\n",
- " <td>AF</td>\n",
- " <td>6.56</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10000</th>\n",
- " <td>2020-04-20</td>\n",
- " <td>11</td>\n",
- " <td>0</td>\n",
- " <td>ZW</td>\n",
- " <td>0.11</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10001</th>\n",
- " <td>2020-04-13</td>\n",
- " <td>5</td>\n",
- " <td>2</td>\n",
- " <td>ZW</td>\n",
- " <td>0.05</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10002</th>\n",
- " <td>2020-04-06</td>\n",
- " <td>2</td>\n",
- " <td>0</td>\n",
- " <td>ZW</td>\n",
- " <td>0.05</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10003</th>\n",
- " <td>2020-03-30</td>\n",
- " <td>5</td>\n",
- " <td>1</td>\n",
- " <td>ZW</td>\n",
- " <td>0.05</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10004</th>\n",
- " <td>2020-03-23</td>\n",
- " <td>2</td>\n",
- " <td>0</td>\n",
- " <td>ZW</td>\n",
- " <td>0.00</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>10005 rows × 5 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " report_date cases_weekly deaths_weekly geo_id \\\n",
- "0 2021-01-18 557 45 AF \n",
- "1 2021-01-11 675 71 AF \n",
- "2 2021-01-04 902 60 AF \n",
- "3 2020-12-28 1994 88 AF \n",
- "4 2020-12-21 740 111 AF \n",
- "... ... ... ... ... \n",
- "10000 2020-04-20 11 0 ZW \n",
- "10001 2020-04-13 5 2 ZW \n",
- "10002 2020-04-06 2 0 ZW \n",
- "10003 2020-03-30 5 1 ZW \n",
- "10004 2020-03-23 2 0 ZW \n",
- "\n",
- " notification_rate_per_100000_population_14-days \n",
- "0 3.24 \n",
- "1 4.15 \n",
- "2 7.61 \n",
- "3 7.19 \n",
- "4 6.56 \n",
- "... ... \n",
- "10000 0.11 \n",
- "10001 0.05 \n",
- "10002 0.05 \n",
- "10003 0.05 \n",
- "10004 0.00 \n",
- "\n",
- "[10005 rows x 5 columns]"
- ]
- },
- "execution_count": 329,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 330,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>country_name</th>\n",
- " <th>geo_id</th>\n",
- " <th>country_territory_code</th>\n",
- " <th>population_2019</th>\n",
- " <th>continent</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>Afghanistan</td>\n",
- " <td>AF</td>\n",
- " <td>AFG</td>\n",
- " <td>38041757.0</td>\n",
- " <td>Asia</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>Afghanistan</td>\n",
- " <td>AF</td>\n",
- " <td>AFG</td>\n",
- " <td>38041757.0</td>\n",
- " <td>Asia</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>Afghanistan</td>\n",
- " <td>AF</td>\n",
- " <td>AFG</td>\n",
- " <td>38041757.0</td>\n",
- " <td>Asia</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>Afghanistan</td>\n",
- " <td>AF</td>\n",
- " <td>AFG</td>\n",
- " <td>38041757.0</td>\n",
- " <td>Asia</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>Afghanistan</td>\n",
- " <td>AF</td>\n",
- " <td>AFG</td>\n",
- " <td>38041757.0</td>\n",
- " <td>Asia</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10000</th>\n",
- " <td>Zimbabwe</td>\n",
- " <td>ZW</td>\n",
- " <td>ZWE</td>\n",
- " <td>14645473.0</td>\n",
- " <td>Africa</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10001</th>\n",
- " <td>Zimbabwe</td>\n",
- " <td>ZW</td>\n",
- " <td>ZWE</td>\n",
- " <td>14645473.0</td>\n",
- " <td>Africa</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10002</th>\n",
- " <td>Zimbabwe</td>\n",
- " <td>ZW</td>\n",
- " <td>ZWE</td>\n",
- " <td>14645473.0</td>\n",
- " <td>Africa</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10003</th>\n",
- " <td>Zimbabwe</td>\n",
- " <td>ZW</td>\n",
- " <td>ZWE</td>\n",
- " <td>14645473.0</td>\n",
- " <td>Africa</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10004</th>\n",
- " <td>Zimbabwe</td>\n",
- " <td>ZW</td>\n",
- " <td>ZWE</td>\n",
- " <td>14645473.0</td>\n",
- " <td>Africa</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>10005 rows × 5 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " country_name geo_id country_territory_code population_2019 continent\n",
- "0 Afghanistan AF AFG 38041757.0 Asia\n",
- "1 Afghanistan AF AFG 38041757.0 Asia\n",
- "2 Afghanistan AF AFG 38041757.0 Asia\n",
- "3 Afghanistan AF AFG 38041757.0 Asia\n",
- "4 Afghanistan AF AFG 38041757.0 Asia\n",
- "... ... ... ... ... ...\n",
- "10000 Zimbabwe ZW ZWE 14645473.0 Africa\n",
- "10001 Zimbabwe ZW ZWE 14645473.0 Africa\n",
- "10002 Zimbabwe ZW ZWE 14645473.0 Africa\n",
- "10003 Zimbabwe ZW ZWE 14645473.0 Africa\n",
- "10004 Zimbabwe ZW ZWE 14645473.0 Africa\n",
- "\n",
- "[10005 rows x 5 columns]"
- ]
- },
- "execution_count": 330,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "raw_data[['country_name', 'geo_id', 'country_territory_code',\n",
- " 'population_2019', 'continent']]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 331,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(\n",
- " 'weekly_cases',\n",
- " engine,\n",
- " if_exists='replace',\n",
- " index=False,\n",
- " chunksize=500,\n",
- " dtype={\n",
- " \"report_date\": Date,\n",
- " \"cases_weekly\": Integer,\n",
- " \"deaths_weekly\": Integer,\n",
- " \"geo_id\": String,\n",
- " \"notification_rate_per_100000_population_14-days\": Float\n",
- " }\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 332,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "raw_data[['country_name', 'geo_id', 'country_territory_code',\n",
- " 'population_2019', 'continent']].drop_duplicates().to_sql(\n",
- " 'countries',\n",
- " engine,\n",
- " if_exists='replace',\n",
- " index=False,\n",
- " chunksize=500,\n",
- " dtype={\n",
- " \"country_name\": Text,\n",
- " \"geo_id\": String,\n",
- " \"country_territory_code\": String,\n",
- " \"population_2019\": Integer,\n",
- " \"continent\": Text\n",
- " }\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 333,
- "metadata": {
- "Collapsed": "false",
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "# %sql select geo_id from weekly_cases limit 10"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 334,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# %%sql alter table weekly_cases add primary key (geo_id, report_date);\n",
- "# alter table countries add primary key (geo_id);\n",
- "# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);\n",
- "# alter table weekly_cases add culm_cases integer;\n",
- "# alter table weekly_cases add culm_deaths integer;"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 335,
- "metadata": {},
- "outputs": [],
- "source": [
- "with engine.connect() as connection:\n",
- " connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')\n",
- " connection.execute('alter table countries add primary key (geo_id);')\n",
- " connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')\n",
- " connection.execute('alter table weekly_cases add culm_cases integer;')\n",
- " connection.execute('alter table weekly_cases add culm_deaths integer;')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 336,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 337,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 338,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "10 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <thead>\n",
- " <tr>\n",
- " <th>report_date</th>\n",
- " <th>cases_weekly</th>\n",
- " <th>deaths_weekly</th>\n",
- " <th>geo_id</th>\n",
- " <th>notification_rate_per_100000_population_14-days</th>\n",
- " <th>culm_cases</th>\n",
- " <th>culm_deaths</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <td>2021-01-18</td>\n",
- " <td>557</td>\n",
- " <td>45</td>\n",
- " <td>AF</td>\n",
- " <td>3.24</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-11</td>\n",
- " <td>675</td>\n",
- " <td>71</td>\n",
- " <td>AF</td>\n",
- " <td>4.15</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-04</td>\n",
- " <td>902</td>\n",
- " <td>60</td>\n",
- " <td>AF</td>\n",
- " <td>7.61</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2020-12-28</td>\n",
- " <td>1994</td>\n",
- " <td>88</td>\n",
- " <td>AF</td>\n",
- " <td>7.19</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2020-12-21</td>\n",
- " <td>740</td>\n",
- " <td>111</td>\n",
- " <td>AF</td>\n",
- " <td>6.56</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2020-12-14</td>\n",
- " <td>1757</td>\n",
- " <td>71</td>\n",
- " <td>AF</td>\n",
- " <td>9.01</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2020-12-07</td>\n",
- " <td>1672</td>\n",
- " <td>137</td>\n",
- " <td>AF</td>\n",
- " <td>7.22</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2020-11-30</td>\n",
- " <td>1073</td>\n",
- " <td>68</td>\n",
- " <td>AF</td>\n",
- " <td>6.42</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2020-11-23</td>\n",
- " <td>1368</td>\n",
- " <td>69</td>\n",
- " <td>AF</td>\n",
- " <td>6.66</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2020-11-16</td>\n",
- " <td>1164</td>\n",
- " <td>61</td>\n",
- " <td>AF</td>\n",
- " <td>4.65</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>"
- ],
- "text/plain": [
- "[(datetime.date(2021, 1, 18), 557, 45, 'AF', 3.24, None, None),\n",
- " (datetime.date(2021, 1, 11), 675, 71, 'AF', 4.15, None, None),\n",
- " (datetime.date(2021, 1, 4), 902, 60, 'AF', 7.61, None, None),\n",
- " (datetime.date(2020, 12, 28), 1994, 88, 'AF', 7.19, None, None),\n",
- " (datetime.date(2020, 12, 21), 740, 111, 'AF', 6.56, None, None),\n",
- " (datetime.date(2020, 12, 14), 1757, 71, 'AF', 9.01, None, None),\n",
- " (datetime.date(2020, 12, 7), 1672, 137, 'AF', 7.22, None, None),\n",
- " (datetime.date(2020, 11, 30), 1073, 68, 'AF', 6.42, None, None),\n",
- " (datetime.date(2020, 11, 23), 1368, 69, 'AF', 6.66, None, None),\n",
- " (datetime.date(2020, 11, 16), 1164, 61, 'AF', 4.65, None, None)]"
- ]
- },
- "execution_count": 338,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%sql select * from weekly_cases limit 10"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 339,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# %%sql\n",
- "# with culm as \n",
- "# (select report_date, geo_id,\n",
- "# sum(cases_weekly) over (partition by geo_id \n",
- "# order by report_date) as culm_data\n",
- "# from weekly_cases\n",
- "# )\n",
- "# update weekly_cases\n",
- "# set culm_cases = culm_data\n",
- "# from culm\n",
- "# where weekly_cases.report_date = culm.report_date and\n",
- "# weekly_cases.geo_id = culm.geo_id"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 340,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "query_string = '''with culm as \n",
- " (select report_date, geo_id,\n",
- " sum(cases_weekly) over (partition by geo_id \n",
- " order by report_date) as culm_data\n",
- " from weekly_cases\n",
- " )\n",
- "update weekly_cases\n",
- " set culm_cases = culm_data\n",
- " from culm\n",
- " where weekly_cases.report_date = culm.report_date and\n",
- " weekly_cases.geo_id = culm.geo_id'''\n",
- "with engine.connect() as connection:\n",
- " connection.execute(query_string)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 341,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# %%sql\n",
- "# with culm as \n",
- "# (select report_date, geo_id,\n",
- "# sum(deaths_weekly) over (partition by geo_id \n",
- "# order by report_date) as culm_data\n",
- "# from weekly_cases\n",
- "# )\n",
- "# update weekly_cases\n",
- "# set culm_deaths = culm_data\n",
- "# from culm\n",
- "# where weekly_cases.report_date = culm.report_date and\n",
- "# weekly_cases.geo_id = culm.geo_id"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 342,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "query_string = '''with culm as \n",
- " (select report_date, geo_id,\n",
- " sum(deaths_weekly) over (partition by geo_id \n",
- " order by report_date) as culm_data\n",
- " from weekly_cases\n",
- " )\n",
- "update weekly_cases\n",
- " set culm_deaths = culm_data\n",
- " from culm\n",
- " where weekly_cases.report_date = culm.report_date and\n",
- " weekly_cases.geo_id = culm.geo_id'''\n",
- "with engine.connect() as connection:\n",
- " connection.execute(query_string)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 343,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " % Total % Received % Xferd Average Speed Time Time Time Current\n",
- " Dload Upload Total Spent Left Speed\n",
- "100 26053 100 26053 0 0 54277 0 --:--:-- --:--:-- --:--:-- 54277\n"
- ]
- }
- ],
- "source": [
- "uk_query_string = (\n",
- "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
- "\"metric=covidOccupiedMVBeds&\"\n",
- "\"metric=newAdmissions&\"\n",
- "\"metric=newCasesBySpecimenDate&\"\n",
- "\"metric=hospitalCases&\"\n",
- "\"metric=newDeaths28DaysByPublishDate&\"\n",
- "\"format=csv\"\n",
- ")\n",
- "\n",
- "!curl \"$uk_query_string\" > uk_data.csv"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 344,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " % Total % Received % Xferd Average Speed Time Time Time Current\n",
- " Dload Upload Total Spent Left Speed\n",
- "100 20223 100 20223 0 0 78688 0 --:--:-- --:--:-- --:--:-- 78383\n"
- ]
- }
- ],
- "source": [
- "test_query_string = (\n",
- "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
- "\"metric=newPCRTestsByPublishDate&\"\n",
- "\"metric=newTestsByPublishDate&\"\n",
- "\"metric=newPillarOneTwoTestsByPublishDate&\"\n",
- "\"format=csv\"\n",
- ")\n",
- "!curl \"$test_query_string\" > test_data.csv"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 345,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# hospital_query_string = (\n",
- "# \"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
- "# \"metric=newAdmissions&\"\n",
- "# \"format=csv\"\n",
- "# )\n",
- "# !curl \"$hospital_query_string\" > hospital_admissions.csv"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 346,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# hospital_query_string = (\n",
- "# \"https://api.coronavirus.data.gov.uk/v1/data?\"\n",
- "# \"filters=areaName=United%2520Kingdom;areaType=overview&\"\n",
- "# \"structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,\"\n",
- "# \"%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv\"\n",
- "# )\n",
- " \n",
- "# !curl \"$hospital_query_string\" | gunzip > hospital_admissions.csv"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 347,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>date</th>\n",
- " <th>areaType</th>\n",
- " <th>areaCode</th>\n",
- " <th>areaName</th>\n",
- " <th>covidOccupiedMVBeds</th>\n",
- " <th>newAdmissions</th>\n",
- " <th>newCasesBySpecimenDate</th>\n",
- " <th>hospitalCases</th>\n",
- " <th>newDeaths28DaysByPublishDate</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>2021-01-26</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>1631</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>2021-01-25</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>4032.0</td>\n",
- " <td>NaN</td>\n",
- " <td>4482.0</td>\n",
- " <td>NaN</td>\n",
- " <td>592</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>2021-01-24</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>4077.0</td>\n",
- " <td>NaN</td>\n",
- " <td>14266.0</td>\n",
- " <td>37561.0</td>\n",
- " <td>610</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>2021-01-23</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>4066.0</td>\n",
- " <td>NaN</td>\n",
- " <td>20495.0</td>\n",
- " <td>37266.0</td>\n",
- " <td>1348</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>2021-01-22</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>4076.0</td>\n",
- " <td>3341.0</td>\n",
- " <td>29094.0</td>\n",
- " <td>38144.0</td>\n",
- " <td>1401</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>385</th>\n",
- " <td>2020-01-06</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>386</th>\n",
- " <td>2020-01-05</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>387</th>\n",
- " <td>2020-01-04</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>388</th>\n",
- " <td>2020-01-03</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>389</th>\n",
- " <td>2020-06-14</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>393.0</td>\n",
- " <td>364.0</td>\n",
- " <td>821.0</td>\n",
- " <td>4907.0</td>\n",
- " <td>27</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>390 rows × 9 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " date areaType areaCode areaName covidOccupiedMVBeds \\\n",
- "0 2021-01-26 overview K02000001 United Kingdom NaN \n",
- "1 2021-01-25 overview K02000001 United Kingdom 4032.0 \n",
- "2 2021-01-24 overview K02000001 United Kingdom 4077.0 \n",
- "3 2021-01-23 overview K02000001 United Kingdom 4066.0 \n",
- "4 2021-01-22 overview K02000001 United Kingdom 4076.0 \n",
- ".. ... ... ... ... ... \n",
- "385 2020-01-06 overview K02000001 United Kingdom NaN \n",
- "386 2020-01-05 overview K02000001 United Kingdom NaN \n",
- "387 2020-01-04 overview K02000001 United Kingdom NaN \n",
- "388 2020-01-03 overview K02000001 United Kingdom NaN \n",
- "389 2020-06-14 overview K02000001 United Kingdom 393.0 \n",
- "\n",
- " newAdmissions newCasesBySpecimenDate hospitalCases \\\n",
- "0 NaN NaN NaN \n",
- "1 NaN 4482.0 NaN \n",
- "2 NaN 14266.0 37561.0 \n",
- "3 NaN 20495.0 37266.0 \n",
- "4 3341.0 29094.0 38144.0 \n",
- ".. ... ... ... \n",
- "385 NaN NaN NaN \n",
- "386 NaN NaN NaN \n",
- "387 NaN NaN NaN \n",
- "388 NaN NaN NaN \n",
- "389 364.0 821.0 4907.0 \n",
- "\n",
- " newDeaths28DaysByPublishDate \n",
- "0 1631 \n",
- "1 592 \n",
- "2 610 \n",
- "3 1348 \n",
- "4 1401 \n",
- ".. ... \n",
- "385 0 \n",
- "386 0 \n",
- "387 0 \n",
- "388 0 \n",
- "389 27 \n",
- "\n",
- "[390 rows x 9 columns]"
- ]
- },
- "execution_count": 347,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "uk_data = pd.read_csv('uk_data.csv', \n",
- " parse_dates=[0], dayfirst=True)\n",
- "uk_data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 348,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>date</th>\n",
- " <th>areaType</th>\n",
- " <th>areaCode</th>\n",
- " <th>areaName</th>\n",
- " <th>newPCRTestsByPublishDate</th>\n",
- " <th>newTestsByPublishDate</th>\n",
- " <th>newPillarOneTwoTestsByPublishDate</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>2021-01-25</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>196510.0</td>\n",
- " <td>546734</td>\n",
- " <td>531104</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>2021-01-24</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>262111.0</td>\n",
- " <td>412775</td>\n",
- " <td>394479</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>2021-01-23</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>389209.0</td>\n",
- " <td>486425</td>\n",
- " <td>465231</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>2021-01-22</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>401075.0</td>\n",
- " <td>631901</td>\n",
- " <td>608829</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>2021-01-21</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>439408.0</td>\n",
- " <td>668989</td>\n",
- " <td>644537</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>296</th>\n",
- " <td>2020-04-03</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>NaN</td>\n",
- " <td>14629</td>\n",
- " <td>14293</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>297</th>\n",
- " <td>2020-04-02</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>NaN</td>\n",
- " <td>13623</td>\n",
- " <td>13457</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>298</th>\n",
- " <td>2020-04-01</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>NaN</td>\n",
- " <td>11947</td>\n",
- " <td>11924</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>299</th>\n",
- " <td>2020-03-31</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>NaN</td>\n",
- " <td>11896</td>\n",
- " <td>11896</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>300</th>\n",
- " <td>2020-06-14</td>\n",
- " <td>overview</td>\n",
- " <td>K02000001</td>\n",
- " <td>United Kingdom</td>\n",
- " <td>69529.0</td>\n",
- " <td>83965</td>\n",
- " <td>66688</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>301 rows × 7 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " date areaType areaCode areaName newPCRTestsByPublishDate \\\n",
- "0 2021-01-25 overview K02000001 United Kingdom 196510.0 \n",
- "1 2021-01-24 overview K02000001 United Kingdom 262111.0 \n",
- "2 2021-01-23 overview K02000001 United Kingdom 389209.0 \n",
- "3 2021-01-22 overview K02000001 United Kingdom 401075.0 \n",
- "4 2021-01-21 overview K02000001 United Kingdom 439408.0 \n",
- ".. ... ... ... ... ... \n",
- "296 2020-04-03 overview K02000001 United Kingdom NaN \n",
- "297 2020-04-02 overview K02000001 United Kingdom NaN \n",
- "298 2020-04-01 overview K02000001 United Kingdom NaN \n",
- "299 2020-03-31 overview K02000001 United Kingdom NaN \n",
- "300 2020-06-14 overview K02000001 United Kingdom 69529.0 \n",
- "\n",
- " newTestsByPublishDate newPillarOneTwoTestsByPublishDate \n",
- "0 546734 531104 \n",
- "1 412775 394479 \n",
- "2 486425 465231 \n",
- "3 631901 608829 \n",
- "4 668989 644537 \n",
- ".. ... ... \n",
- "296 14629 14293 \n",
- "297 13623 13457 \n",
- "298 11947 11924 \n",
- "299 11896 11896 \n",
- "300 83965 66688 \n",
- "\n",
- "[301 rows x 7 columns]"
- ]
- },
- "execution_count": 348,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "test_data = pd.read_csv('test_data.csv', \n",
- " parse_dates=[0], dayfirst=True)\n",
- "test_data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 349,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['date', 'areaType', 'areaCode', 'areaName', 'newPCRTestsByPublishDate',\n",
- " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 349,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "test_data.columns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 350,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',\n",
- " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 351,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['date', 'areaType', 'areaCode', 'areaName', 'covidOccupiedMVBeds',\n",
- " 'newAdmissions', 'newCasesBySpecimenDate', 'hospitalCases',\n",
- " 'newDeaths28DaysByPublishDate', 'newPCRTestsByPublishDate',\n",
- " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 351,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "uk_data.columns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 352,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "uk_data.rename(\n",
- " columns={\n",
- " 'covidOccupiedMVBeds': 'ventilator_beds',\n",
- " 'newCasesBySpecimenDate': 'new_cases',\n",
- " 'hospitalCases': 'hospital_cases', \n",
- " 'newDeaths28DaysByPublishDate': 'new_deaths',\n",
- " 'newAdmissions': 'new_admissions',\n",
- " 'newPCRTestsByPublishDate': 'new_pcr_tests',\n",
- " 'newTestsByPublishDate': 'new_tests',\n",
- " 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'\n",
- " }, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 353,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "date datetime64[ns]\n",
- "areaType object\n",
- "areaCode object\n",
- "areaName object\n",
- "ventilator_beds float64\n",
- "new_admissions float64\n",
- "new_cases float64\n",
- "hospital_cases float64\n",
- "new_deaths int64\n",
- "new_pcr_tests float64\n",
- "new_tests float64\n",
- "new_pillar_1_2_tests float64\n",
- "dtype: object"
- ]
- },
- "execution_count": 353,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "uk_data.dtypes"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 354,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['date', 'areaType', 'areaCode', 'areaName', 'ventilator_beds',\n",
- " 'new_admissions', 'new_cases', 'hospital_cases', 'new_deaths',\n",
- " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 354,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- " uk_data.columns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 355,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "uk_data[['date', \n",
- " 'hospital_cases', 'ventilator_beds',\n",
- " 'new_cases', 'new_deaths', \n",
- " 'hospital_cases', 'new_admissions',\n",
- " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'\n",
- " ]].to_sql(\n",
- " 'uk_data',\n",
- " engine,\n",
- " if_exists='replace',\n",
- " index=False,\n",
- " chunksize=500,\n",
- " dtype={\n",
- " \"date\": Date,\n",
- " \"hospital_cases\": Integer,\n",
- " \"ventilator_beds\": Integer,\n",
- " \"new_cases\": Integer,\n",
- " \"hospital_cases\": Integer,\n",
- " \"new_deaths\": Integer,\n",
- " \"new_admissions\": Integer,\n",
- " 'new_pcr_tests': Integer, \n",
- " 'new_tests': Integer, \n",
- " 'new_pillar_1_2_tests': Integer\n",
- " }\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 356,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# %sql select * from uk_data order by date desc limit 10"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 424,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "query_string = '''drop table if exists uk_data_7;\n",
- "create table uk_data_7 \n",
- "(date date primary key,\n",
- " hospital_cases real,\n",
- " ventilator_beds real,\n",
- " new_cases real,\n",
- " new_deaths real,\n",
- " new_admissions real,\n",
- " new_pcr_tests real,\n",
- " new_tests real,\n",
- " new_pillar_1_2_tests real\n",
- ");'''\n",
- "\n",
- "with engine.connect() as connection:\n",
- " connection.execute(query_string)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 425,
- "metadata": {},
- "outputs": [],
- "source": [
- "update_string = '''with ownd as (\n",
- " select date,\n",
- " avg(hospital_cases) over wnd as w_hospital_cases,\n",
- " avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
- " avg(new_cases) over wnd as w_new_cases,\n",
- " avg(new_deaths) over wnd as w_new_deaths,\n",
- " avg(new_admissions) over wnd as w_new_admissions,\n",
- " avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
- " avg(new_tests) over wnd as w_new_tests,\n",
- " avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
- " count(*) over wnd as w_size\n",
- " from uk_data\n",
- " window wnd as (\n",
- " order by uk_data.date\n",
- " rows between 3 preceding and 3 following\n",
- " )\n",
- ")\n",
- "insert into uk_data_7(date, \n",
- " hospital_cases, \n",
- " ventilator_beds, \n",
- " new_cases,\n",
- " new_deaths,\n",
- " new_admissions, \n",
- " new_pcr_tests, \n",
- " new_tests, \n",
- " new_pillar_1_2_tests\n",
- " )\n",
- "(select date,\n",
- " w_hospital_cases,\n",
- " w_ventilator_beds,\n",
- " w_new_cases,\n",
- " w_new_deaths,\n",
- " w_new_admissions,\n",
- " w_new_pcr_tests,\n",
- " w_new_tests,\n",
- " w_new_pillar_1_2_tests\n",
- " from ownd\n",
- " where w_size = 7\n",
- ")'''\n",
- "with engine.connect() as connection:\n",
- " connection.execute(update_string)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 398,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)\n",
- "# values (\n",
- "# select date, \n",
- "# avg(ventilator_beds) over (order by date rows between 6 preceding and current row)\n",
- "# from uk_data\n",
- "# )"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 399,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# query_string = '''insert into uk_data_7(date, hospital_cases)\n",
- "# select uk_data.date, \n",
- "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases\n",
- "# from uk_data'''\n",
- "# with engine.connect() as connection:\n",
- "# connection.execute(query_string) "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 360,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# %%sql \n",
- "# with m7 as \n",
- "# (select uk_data.date as date7, \n",
- "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7\n",
- "# from uk_data\n",
- "# )\n",
- "# update uk_data_7\n",
- "# set ventilator_beds = nc7\n",
- "# from m7\n",
- "# where uk_data_7.date = m7.date7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 361,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "390 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 361,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with m7 as \n",
- "# (select uk_data.date as date7, \n",
- "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n",
- "# from uk_data\n",
- "# )\n",
- "# update uk_data_7\n",
- "# set ventilator_beds = nc7\n",
- "# from m7\n",
- "# where uk_data_7.date = m7.date7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 362,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "390 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 362,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with m7 as \n",
- "# (select uk_data.date as date7, \n",
- "# avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n",
- "# from uk_data\n",
- "# )\n",
- "# update uk_data_7\n",
- "# set new_cases = nc7\n",
- "# from m7\n",
- "# where uk_data_7.date = m7.date7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 363,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "390 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 363,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with m7 as \n",
- "# (select uk_data.date as date7, \n",
- "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
- "# from uk_data\n",
- "# )\n",
- "# update uk_data_7\n",
- "# set hospital_cases = d7\n",
- "# from m7\n",
- "# where uk_data_7.date = m7.date7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 364,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "390 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 364,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with m7 as \n",
- "# (select uk_data.date as date7, \n",
- "# avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
- "# from uk_data\n",
- "# )\n",
- "# update uk_data_7\n",
- "# set new_deaths = d7\n",
- "# from m7\n",
- "# where uk_data_7.date = m7.date7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 365,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "390 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 365,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with m7 as \n",
- "# (select uk_data.date as date7, \n",
- "# avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
- "# from uk_data\n",
- "# )\n",
- "# update uk_data_7\n",
- "# set new_admissions = d7\n",
- "# from m7\n",
- "# where uk_data_7.date = m7.date7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 366,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "390 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 366,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with m7 as \n",
- "# (select uk_data.date as date7, \n",
- "# avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
- "# from uk_data\n",
- "# )\n",
- "# update uk_data_7\n",
- "# set new_pcr_tests = d7\n",
- "# from m7\n",
- "# where uk_data_7.date = m7.date7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 367,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "390 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 367,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with m7 as \n",
- "# (select uk_data.date as date7, \n",
- "# avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
- "# from uk_data\n",
- "# )\n",
- "# update uk_data_7\n",
- "# set new_tests = d7\n",
- "# from m7\n",
- "# where uk_data_7.date = m7.date7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 309,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# %%sql\n",
- "# with m7 as \n",
- "# (select uk_data.date as date7, \n",
- "# avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
- "# from uk_data\n",
- "# )\n",
- "# update uk_data_7\n",
- "# set new_pillar_1_2_tests = d7\n",
- "# from m7\n",
- "# where uk_data_7.date = m7.date7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 310,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "0 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 310,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with wnd as\n",
- "# ( select date, \n",
- "# avg(new_pillar_1_2_tests)\n",
- "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests\n",
- "# from uk_data\n",
- "# )\n",
- "# update uk_data_7\n",
- "# set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests\n",
- "# from wnd\n",
- "# where uk_data_7.date = wnd.date\n",
- "# and (select count(*) from wnd) = 7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 379,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "10 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <thead>\n",
- " <tr>\n",
- " <th>date</th>\n",
- " <th>new_pillar_1_2_tests</th>\n",
- " <th>a_new_pillar_1_2_tests</th>\n",
- " <th>a_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <td>2021-01-26</td>\n",
- " <td>None</td>\n",
- " <td>463604.666666666667</td>\n",
- " <td>3</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-25</td>\n",
- " <td>None</td>\n",
- " <td>499910.750000000000</td>\n",
- " <td>4</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-24</td>\n",
- " <td>None</td>\n",
- " <td>528836.000000000000</td>\n",
- " <td>5</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-23</td>\n",
- " <td>None</td>\n",
- " <td>543566.833333333333</td>\n",
- " <td>6</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-22</td>\n",
- " <td>None</td>\n",
- " <td>545780.285714285714</td>\n",
- " <td>7</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-21</td>\n",
- " <td>None</td>\n",
- " <td>546626.571428571429</td>\n",
- " <td>7</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-20</td>\n",
- " <td>None</td>\n",
- " <td>546806.285714285714</td>\n",
- " <td>7</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-19</td>\n",
- " <td>None</td>\n",
- " <td>547402.428571428571</td>\n",
- " <td>7</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-18</td>\n",
- " <td>None</td>\n",
- " <td>542617.857142857143</td>\n",
- " <td>7</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-17</td>\n",
- " <td>None</td>\n",
- " <td>545088.142857142857</td>\n",
- " <td>7</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>"
- ],
- "text/plain": [
- "[(datetime.date(2021, 1, 26), None, Decimal('463604.666666666667'), 3),\n",
- " (datetime.date(2021, 1, 25), None, Decimal('499910.750000000000'), 4),\n",
- " (datetime.date(2021, 1, 24), None, Decimal('528836.000000000000'), 5),\n",
- " (datetime.date(2021, 1, 23), None, Decimal('543566.833333333333'), 6),\n",
- " (datetime.date(2021, 1, 22), None, Decimal('545780.285714285714'), 7),\n",
- " (datetime.date(2021, 1, 21), None, Decimal('546626.571428571429'), 7),\n",
- " (datetime.date(2021, 1, 20), None, Decimal('546806.285714285714'), 7),\n",
- " (datetime.date(2021, 1, 19), None, Decimal('547402.428571428571'), 7),\n",
- " (datetime.date(2021, 1, 18), None, Decimal('542617.857142857143'), 7),\n",
- " (datetime.date(2021, 1, 17), None, Decimal('545088.142857142857'), 7)]"
- ]
- },
- "execution_count": 379,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with wnd as\n",
- "# ( select date, \n",
- "# avg(new_pillar_1_2_tests)\n",
- "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,\n",
- "# count(new_pillar_1_2_tests)\n",
- "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_count\n",
- "# from uk_data\n",
- "# )\n",
- "# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count\n",
- "# from uk_data_7, wnd\n",
- "# where uk_data_7.date = wnd.date\n",
- "# order by uk_data_7.date desc limit 10\n",
- " \n",
- "# select date, \n",
- "# count(*) over wnd as w_size\n",
- "# from uk_data\n",
- "# window wnd as (\n",
- "# order by uk_data.date\n",
- "# rows between 3 preceding and 3 following\n",
- "# );"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 401,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": [
- "# %%sql\n",
- "# select date, \n",
- "# count(*) over wnd as w_size\n",
- "# from uk_data\n",
- "# window wnd as (\n",
- "# order by uk_data.date\n",
- "# rows between 3 preceding and 3 following\n",
- "# )\n",
- "# order by date desc limit 10"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 407,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "0 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 407,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with ownd as (\n",
- "# select date,\n",
- "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
- "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
- "# avg(new_cases) over wnd as w_new_cases,\n",
- "# avg(new_deaths) over wnd as w_new_deaths,\n",
- "# avg(new_admissions) over wnd as w_new_admissions,\n",
- "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
- "# avg(new_tests) over wnd as w_new_tests,\n",
- "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
- "# count(*) over wnd as w_size\n",
- "# from uk_data\n",
- "# window wnd as (\n",
- "# order by uk_data.date\n",
- "# rows between 3 preceding and 3 following\n",
- "# ))\n",
- "# insert into uk_data_7(date, \n",
- "# hospital_cases, \n",
- "# ventilator_beds, \n",
- "# new_cases,\n",
- "# new_deaths,\n",
- "# new_admissions, \n",
- "# new_pcr_tests, \n",
- "# new_tests, \n",
- "# new_pillar_1_2_tests\n",
- "# )\n",
- "# (select date,\n",
- "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
- "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
- "# avg(new_cases) over wnd as w_new_cases,\n",
- "# avg(new_deaths) over wnd as w_new_deaths,\n",
- "# avg(new_admissions) over wnd as w_new_admissions,\n",
- "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
- "# avg(new_tests) over wnd as w_new_tests,\n",
- "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
- "# count(*) over wnd as w_size\n",
- "# from uk_data\n",
- "# window wnd as (\n",
- "# order by uk_data.date\n",
- "# rows between 3 preceding and 3 following\n",
- "# )\n",
- "# )\n",
- "# set date = ownd.date,\n",
- "# hospital_cases = w_hospital_cases,\n",
- "# ventilator_beds = w_ventilator_beds,\n",
- "# new_cases = w_new_cases,\n",
- "# new_deaths = w_new_deaths,\n",
- "# new_admissions = w_new_admissions,\n",
- "# new_pcr_tests = w_new_pcr_tests,\n",
- "# new_tests = w_new_tests,\n",
- "# new_pillar_1_2_tests = w_new_pillar_1_2_tests\n",
- "# from ownd\n",
- "# where w_size = 7"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 417,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "384 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[]"
- ]
- },
- "execution_count": 417,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# %%sql\n",
- "# with ownd as (\n",
- "# select date,\n",
- "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
- "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
- "# avg(new_cases) over wnd as w_new_cases,\n",
- "# avg(new_deaths) over wnd as w_new_deaths,\n",
- "# avg(new_admissions) over wnd as w_new_admissions,\n",
- "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
- "# avg(new_tests) over wnd as w_new_tests,\n",
- "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
- "# count(*) over wnd as w_size\n",
- "# from uk_data\n",
- "# window wnd as (\n",
- "# order by uk_data.date\n",
- "# rows between 3 preceding and 3 following\n",
- "# )\n",
- "# )\n",
- "# insert into uk_data_7(date, \n",
- "# hospital_cases, \n",
- "# ventilator_beds, \n",
- "# new_cases,\n",
- "# new_deaths,\n",
- "# new_admissions, \n",
- "# new_pcr_tests, \n",
- "# new_tests, \n",
- "# new_pillar_1_2_tests\n",
- "# )\n",
- "# (select date,\n",
- "# w_hospital_cases,\n",
- "# w_ventilator_beds,\n",
- "# w_new_cases,\n",
- "# w_new_deaths,\n",
- "# w_new_admissions,\n",
- "# w_new_pcr_tests,\n",
- "# w_new_tests,\n",
- "# w_new_pillar_1_2_tests\n",
- "# from ownd\n",
- "# where w_size = 7\n",
- "# )"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 427,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "10 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <thead>\n",
- " <tr>\n",
- " <th>date</th>\n",
- " <th>hospital_cases</th>\n",
- " <th>ventilator_beds</th>\n",
- " <th>new_cases</th>\n",
- " <th>new_deaths</th>\n",
- " <th>new_admissions</th>\n",
- " <th>new_pcr_tests</th>\n",
- " <th>new_tests</th>\n",
- " <th>new_pillar_1_2_tests</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <td>2021-01-23</td>\n",
- " <td>37915.6</td>\n",
- " <td>4027.3333</td>\n",
- " <td>22463.666</td>\n",
- " <td>1241.7142</td>\n",
- " <td>3651.6667</td>\n",
- " <td>353735.34</td>\n",
- " <td>565312.3</td>\n",
- " <td>543566.8</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-22</td>\n",
- " <td>38057.168</td>\n",
- " <td>4015.8572</td>\n",
- " <td>24870.428</td>\n",
- " <td>1238.7142</td>\n",
- " <td>3771.75</td>\n",
- " <td>354323.16</td>\n",
- " <td>567830.3</td>\n",
- " <td>545780.3</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-21</td>\n",
- " <td>38217.715</td>\n",
- " <td>3999.2856</td>\n",
- " <td>30620.428</td>\n",
- " <td>1239.7142</td>\n",
- " <td>3828.2</td>\n",
- " <td>363750.16</td>\n",
- " <td>569716.7</td>\n",
- " <td>546626.56</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-20</td>\n",
- " <td>38294.0</td>\n",
- " <td>3969.8572</td>\n",
- " <td>32707.428</td>\n",
- " <td>1248.4286</td>\n",
- " <td>3811.0</td>\n",
- " <td>372037.44</td>\n",
- " <td>570458.1</td>\n",
- " <td>546806.3</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-19</td>\n",
- " <td>38348.43</td>\n",
- " <td>3939.5715</td>\n",
- " <td>34143.285</td>\n",
- " <td>1240.8572</td>\n",
- " <td>3825.8572</td>\n",
- " <td>374771.16</td>\n",
- " <td>571336.56</td>\n",
- " <td>547402.44</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-18</td>\n",
- " <td>38338.145</td>\n",
- " <td>3898.5715</td>\n",
- " <td>35766.57</td>\n",
- " <td>1223.5714</td>\n",
- " <td>3872.8572</td>\n",
- " <td>377019.28</td>\n",
- " <td>566901.1</td>\n",
- " <td>542617.9</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-17</td>\n",
- " <td>38292.285</td>\n",
- " <td>3857.4285</td>\n",
- " <td>37337.855</td>\n",
- " <td>1217.5714</td>\n",
- " <td>3939.0</td>\n",
- " <td>389469.84</td>\n",
- " <td>571242.56</td>\n",
- " <td>545088.1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-16</td>\n",
- " <td>38119.855</td>\n",
- " <td>3810.7144</td>\n",
- " <td>38824.0</td>\n",
- " <td>1181.0</td>\n",
- " <td>3967.2856</td>\n",
- " <td>395066.72</td>\n",
- " <td>569445.0</td>\n",
- " <td>541938.0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-15</td>\n",
- " <td>37858.715</td>\n",
- " <td>3746.2856</td>\n",
- " <td>40268.57</td>\n",
- " <td>1128.5714</td>\n",
- " <td>4029.2856</td>\n",
- " <td>403526.72</td>\n",
- " <td>570188.7</td>\n",
- " <td>540199.9</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-14</td>\n",
- " <td>37505.57</td>\n",
- " <td>3667.2856</td>\n",
- " <td>41989.145</td>\n",
- " <td>1118.5714</td>\n",
- " <td>4065.2856</td>\n",
- " <td>412496.44</td>\n",
- " <td>567178.7</td>\n",
- " <td>537417.1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>"
- ],
- "text/plain": [
- "[(datetime.date(2021, 1, 23), 37915.6, 4027.3333, 22463.666, 1241.7142, 3651.6667, 353735.34, 565312.3, 543566.8),\n",
- " (datetime.date(2021, 1, 22), 38057.168, 4015.8572, 24870.428, 1238.7142, 3771.75, 354323.16, 567830.3, 545780.3),\n",
- " (datetime.date(2021, 1, 21), 38217.715, 3999.2856, 30620.428, 1239.7142, 3828.2, 363750.16, 569716.7, 546626.56),\n",
- " (datetime.date(2021, 1, 20), 38294.0, 3969.8572, 32707.428, 1248.4286, 3811.0, 372037.44, 570458.1, 546806.3),\n",
- " (datetime.date(2021, 1, 19), 38348.43, 3939.5715, 34143.285, 1240.8572, 3825.8572, 374771.16, 571336.56, 547402.44),\n",
- " (datetime.date(2021, 1, 18), 38338.145, 3898.5715, 35766.57, 1223.5714, 3872.8572, 377019.28, 566901.1, 542617.9),\n",
- " (datetime.date(2021, 1, 17), 38292.285, 3857.4285, 37337.855, 1217.5714, 3939.0, 389469.84, 571242.56, 545088.1),\n",
- " (datetime.date(2021, 1, 16), 38119.855, 3810.7144, 38824.0, 1181.0, 3967.2856, 395066.72, 569445.0, 541938.0),\n",
- " (datetime.date(2021, 1, 15), 37858.715, 3746.2856, 40268.57, 1128.5714, 4029.2856, 403526.72, 570188.7, 540199.9),\n",
- " (datetime.date(2021, 1, 14), 37505.57, 3667.2856, 41989.145, 1118.5714, 4065.2856, 412496.44, 567178.7, 537417.1)]"
- ]
- },
- "execution_count": 427,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%sql select * from uk_data_7 order by date desc limit 10"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 428,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " * postgresql://covid:***@localhost/covid\n",
- "10 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <thead>\n",
- " <tr>\n",
- " <th>date</th>\n",
- " <th>hospital_cases</th>\n",
- " <th>ventilator_beds</th>\n",
- " <th>new_cases</th>\n",
- " <th>new_deaths</th>\n",
- " <th>new_admissions</th>\n",
- " <th>new_pcr_tests</th>\n",
- " <th>new_tests</th>\n",
- " <th>new_pillar_1_2_tests</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <td>2021-01-26</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " <td>1631</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " <td>None</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-25</td>\n",
- " <td>None</td>\n",
- " <td>4032</td>\n",
- " <td>4482</td>\n",
- " <td>592</td>\n",
- " <td>None</td>\n",
- " <td>196510</td>\n",
- " <td>546734</td>\n",
- " <td>531104</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-24</td>\n",
- " <td>37561</td>\n",
- " <td>4077</td>\n",
- " <td>14266</td>\n",
- " <td>610</td>\n",
- " <td>None</td>\n",
- " <td>262111</td>\n",
- " <td>412775</td>\n",
- " <td>394479</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-23</td>\n",
- " <td>37266</td>\n",
- " <td>4066</td>\n",
- " <td>20495</td>\n",
- " <td>1348</td>\n",
- " <td>None</td>\n",
- " <td>389209</td>\n",
- " <td>486425</td>\n",
- " <td>465231</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-22</td>\n",
- " <td>38144</td>\n",
- " <td>4076</td>\n",
- " <td>29094</td>\n",
- " <td>1401</td>\n",
- " <td>3341</td>\n",
- " <td>401075</td>\n",
- " <td>631901</td>\n",
- " <td>608829</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-21</td>\n",
- " <td>37957</td>\n",
- " <td>3960</td>\n",
- " <td>31430</td>\n",
- " <td>1290</td>\n",
- " <td>3598</td>\n",
- " <td>439408</td>\n",
- " <td>668989</td>\n",
- " <td>644537</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-20</td>\n",
- " <td>38650</td>\n",
- " <td>3953</td>\n",
- " <td>35015</td>\n",
- " <td>1820</td>\n",
- " <td>4016</td>\n",
- " <td>434099</td>\n",
- " <td>645050</td>\n",
- " <td>617221</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-19</td>\n",
- " <td>38765</td>\n",
- " <td>3947</td>\n",
- " <td>39311</td>\n",
- " <td>1610</td>\n",
- " <td>4132</td>\n",
- " <td>357850</td>\n",
- " <td>582938</td>\n",
- " <td>559061</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-18</td>\n",
- " <td>39181</td>\n",
- " <td>3916</td>\n",
- " <td>44732</td>\n",
- " <td>599</td>\n",
- " <td>4054</td>\n",
- " <td>262499</td>\n",
- " <td>559939</td>\n",
- " <td>537028</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2021-01-17</td>\n",
- " <td>38095</td>\n",
- " <td>3871</td>\n",
- " <td>28875</td>\n",
- " <td>671</td>\n",
- " <td>3725</td>\n",
- " <td>320122</td>\n",
- " <td>417965</td>\n",
- " <td>395737</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>"
- ],
- "text/plain": [
- "[(datetime.date(2021, 1, 26), None, None, None, 1631, None, None, None, None),\n",
- " (datetime.date(2021, 1, 25), None, 4032, 4482, 592, None, 196510, 546734, 531104),\n",
- " (datetime.date(2021, 1, 24), 37561, 4077, 14266, 610, None, 262111, 412775, 394479),\n",
- " (datetime.date(2021, 1, 23), 37266, 4066, 20495, 1348, None, 389209, 486425, 465231),\n",
- " (datetime.date(2021, 1, 22), 38144, 4076, 29094, 1401, 3341, 401075, 631901, 608829),\n",
- " (datetime.date(2021, 1, 21), 37957, 3960, 31430, 1290, 3598, 439408, 668989, 644537),\n",
- " (datetime.date(2021, 1, 20), 38650, 3953, 35015, 1820, 4016, 434099, 645050, 617221),\n",
- " (datetime.date(2021, 1, 19), 38765, 3947, 39311, 1610, 4132, 357850, 582938, 559061),\n",
- " (datetime.date(2021, 1, 18), 39181, 3916, 44732, 599, 4054, 262499, 559939, 537028),\n",
- " (datetime.date(2021, 1, 17), 38095, 3871, 28875, 671, 3725, 320122, 417965, 395737)]"
- ]
- },
- "execution_count": 428,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%sql select * from uk_data order by date desc limit 10"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "Collapsed": "false"
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "jupytext": {
- "formats": "ipynb,md"
- },
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.5"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}