{ "cells": [ { "cell_type": "markdown", "metadata": { "Collapsed": "false" }, "source": [ "Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)" ] }, { "cell_type": "code", "execution_count": 314, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float\n", "from sqlalchemy import create_engine" ] }, { "cell_type": "code", "execution_count": 315, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "import itertools\n", "import collections\n", "import json\n", "import pandas as pd\n", "import numpy as np\n", "from scipy.stats import gmean\n", "import datetime\n", "\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 316, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The sql extension is already loaded. To reload it, use:\n", " %reload_ext sql\n" ] } ], "source": [ "%load_ext sql" ] }, { "cell_type": "code", "execution_count": 317, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'" ] }, { "cell_type": "code", "execution_count": 318, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/plain": [ "'Connected: covid@covid'" ] }, "execution_count": 318, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%sql $connection_string" ] }, { "cell_type": "code", "execution_count": 319, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "eng = create_engine(connection_string)\n", "engine = eng.execution_options(isolation_level=\"AUTOCOMMIT\")" ] }, { "cell_type": "code", "execution_count": 320, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "DEATH_COUNT_THRESHOLD = 10\n", "COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()\n", "COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()\n", "COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()\n", "# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()\n", "\n", "COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA\n", " 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',\n", " 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',\n", " 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']\n", "COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))\n", "COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))" ] }, { "cell_type": "code", "execution_count": 321, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 623k 100 623k 0 0 927k 0 --:--:-- --:--:-- --:--:-- 925k\n" ] } ], "source": [ "!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv" ] }, { "cell_type": "code", "execution_count": 322, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# First col is a date, treat geoId of NA as 'Namibia', not \"NA\" value\n", "raw_data = pd.read_csv('covid.csv', \n", " parse_dates=[0], dayfirst=True,\n", " keep_default_na=False, na_values = [''],\n", "# dtype = {'day': np.int64, \n", "# 'month': np.int64, \n", "# 'year': np.int64, \n", "# 'cases': np.int64, \n", "# 'deaths': np.int64, \n", "# 'countriesAndTerritories': str, \n", "# 'geoId': str, \n", "# 'countryterritoryCode': str, \n", "# 'popData2019': np.int64, \n", "# 'continentExp': str, \n", "# }\n", " )" ] }, { "cell_type": "code", "execution_count": 323, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/plain": [ "100050" ] }, "execution_count": 323, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data.size" ] }, { "cell_type": "code", "execution_count": 324, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "raw_data.fillna(0, inplace=True)" ] }, { "cell_type": "code", "execution_count": 325, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateRepyear_weekcases_weeklydeaths_weeklycountriesAndTerritoriesgeoIdcountryterritoryCodepopData2019continentExpnotification_rate_per_100000_population_14-days
02021-01-182021-0255745AfghanistanAFAFG38041757.0Asia3.24
12021-01-112021-0167571AfghanistanAFAFG38041757.0Asia4.15
22021-01-042020-5390260AfghanistanAFAFG38041757.0Asia7.61
32020-12-282020-52199488AfghanistanAFAFG38041757.0Asia7.19
42020-12-212020-51740111AfghanistanAFAFG38041757.0Asia6.56
\n", "
" ], "text/plain": [ " dateRep year_week cases_weekly deaths_weekly countriesAndTerritories \\\n", "0 2021-01-18 2021-02 557 45 Afghanistan \n", "1 2021-01-11 2021-01 675 71 Afghanistan \n", "2 2021-01-04 2020-53 902 60 Afghanistan \n", "3 2020-12-28 2020-52 1994 88 Afghanistan \n", "4 2020-12-21 2020-51 740 111 Afghanistan \n", "\n", " geoId countryterritoryCode popData2019 continentExp \\\n", "0 AF AFG 38041757.0 Asia \n", "1 AF AFG 38041757.0 Asia \n", "2 AF AFG 38041757.0 Asia \n", "3 AF AFG 38041757.0 Asia \n", "4 AF AFG 38041757.0 Asia \n", "\n", " notification_rate_per_100000_population_14-days \n", "0 3.24 \n", "1 4.15 \n", "2 7.61 \n", "3 7.19 \n", "4 6.56 " ] }, "execution_count": 325, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data.head()" ] }, { "cell_type": "code", "execution_count": 326, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',\n", " 'countriesAndTerritories': 'country_name',\n", " 'countryterritoryCode': 'country_territory_code',\n", " 'popData2019': 'population_2019',\n", " 'continentExp': 'continent'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 327, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/plain": [ "Index(['report_date', 'year_week', 'cases_weekly', 'deaths_weekly',\n", " 'country_name', 'geo_id', 'country_territory_code', 'population_2019',\n", " 'continent', 'notification_rate_per_100000_population_14-days'],\n", " dtype='object')" ] }, "execution_count": 327, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data.columns" ] }, { "cell_type": "code", "execution_count": 328, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/plain": [ "report_date datetime64[ns]\n", "year_week object\n", "cases_weekly int64\n", "deaths_weekly int64\n", "country_name object\n", "geo_id object\n", "country_territory_code object\n", "population_2019 float64\n", "continent object\n", "notification_rate_per_100000_population_14-days float64\n", "dtype: object" ] }, "execution_count": 328, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data.dtypes" ] }, { "cell_type": "code", "execution_count": 329, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
report_datecases_weeklydeaths_weeklygeo_idnotification_rate_per_100000_population_14-days
02021-01-1855745AF3.24
12021-01-1167571AF4.15
22021-01-0490260AF7.61
32020-12-28199488AF7.19
42020-12-21740111AF6.56
..................
100002020-04-20110ZW0.11
100012020-04-1352ZW0.05
100022020-04-0620ZW0.05
100032020-03-3051ZW0.05
100042020-03-2320ZW0.00
\n", "

10005 rows × 5 columns

\n", "
" ], "text/plain": [ " report_date cases_weekly deaths_weekly geo_id \\\n", "0 2021-01-18 557 45 AF \n", "1 2021-01-11 675 71 AF \n", "2 2021-01-04 902 60 AF \n", "3 2020-12-28 1994 88 AF \n", "4 2020-12-21 740 111 AF \n", "... ... ... ... ... \n", "10000 2020-04-20 11 0 ZW \n", "10001 2020-04-13 5 2 ZW \n", "10002 2020-04-06 2 0 ZW \n", "10003 2020-03-30 5 1 ZW \n", "10004 2020-03-23 2 0 ZW \n", "\n", " notification_rate_per_100000_population_14-days \n", "0 3.24 \n", "1 4.15 \n", "2 7.61 \n", "3 7.19 \n", "4 6.56 \n", "... ... \n", "10000 0.11 \n", "10001 0.05 \n", "10002 0.05 \n", "10003 0.05 \n", "10004 0.00 \n", "\n", "[10005 rows x 5 columns]" ] }, "execution_count": 329, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]" ] }, { "cell_type": "code", "execution_count": 330, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
country_namegeo_idcountry_territory_codepopulation_2019continent
0AfghanistanAFAFG38041757.0Asia
1AfghanistanAFAFG38041757.0Asia
2AfghanistanAFAFG38041757.0Asia
3AfghanistanAFAFG38041757.0Asia
4AfghanistanAFAFG38041757.0Asia
..................
10000ZimbabweZWZWE14645473.0Africa
10001ZimbabweZWZWE14645473.0Africa
10002ZimbabweZWZWE14645473.0Africa
10003ZimbabweZWZWE14645473.0Africa
10004ZimbabweZWZWE14645473.0Africa
\n", "

10005 rows × 5 columns

\n", "
" ], "text/plain": [ " country_name geo_id country_territory_code population_2019 continent\n", "0 Afghanistan AF AFG 38041757.0 Asia\n", "1 Afghanistan AF AFG 38041757.0 Asia\n", "2 Afghanistan AF AFG 38041757.0 Asia\n", "3 Afghanistan AF AFG 38041757.0 Asia\n", "4 Afghanistan AF AFG 38041757.0 Asia\n", "... ... ... ... ... ...\n", "10000 Zimbabwe ZW ZWE 14645473.0 Africa\n", "10001 Zimbabwe ZW ZWE 14645473.0 Africa\n", "10002 Zimbabwe ZW ZWE 14645473.0 Africa\n", "10003 Zimbabwe ZW ZWE 14645473.0 Africa\n", "10004 Zimbabwe ZW ZWE 14645473.0 Africa\n", "\n", "[10005 rows x 5 columns]" ] }, "execution_count": 330, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data[['country_name', 'geo_id', 'country_territory_code',\n", " 'population_2019', 'continent']]" ] }, { "cell_type": "code", "execution_count": 331, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(\n", " 'weekly_cases',\n", " engine,\n", " if_exists='replace',\n", " index=False,\n", " chunksize=500,\n", " dtype={\n", " \"report_date\": Date,\n", " \"cases_weekly\": Integer,\n", " \"deaths_weekly\": Integer,\n", " \"geo_id\": String,\n", " \"notification_rate_per_100000_population_14-days\": Float\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": 332, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "raw_data[['country_name', 'geo_id', 'country_territory_code',\n", " 'population_2019', 'continent']].drop_duplicates().to_sql(\n", " 'countries',\n", " engine,\n", " if_exists='replace',\n", " index=False,\n", " chunksize=500,\n", " dtype={\n", " \"country_name\": Text,\n", " \"geo_id\": String,\n", " \"country_territory_code\": String,\n", " \"population_2019\": Integer,\n", " \"continent\": Text\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": 333, "metadata": { "Collapsed": "false", "scrolled": true }, "outputs": [], "source": [ "# %sql select geo_id from weekly_cases limit 10" ] }, { "cell_type": "code", "execution_count": 334, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# %%sql alter table weekly_cases add primary key (geo_id, report_date);\n", "# alter table countries add primary key (geo_id);\n", "# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);\n", "# alter table weekly_cases add culm_cases integer;\n", "# alter table weekly_cases add culm_deaths integer;" ] }, { "cell_type": "code", "execution_count": 335, "metadata": {}, "outputs": [], "source": [ "with engine.connect() as connection:\n", " connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')\n", " connection.execute('alter table countries add primary key (geo_id);')\n", " connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')\n", " connection.execute('alter table weekly_cases add culm_cases integer;')\n", " connection.execute('alter table weekly_cases add culm_deaths integer;')" ] }, { "cell_type": "code", "execution_count": 336, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10" ] }, { "cell_type": "code", "execution_count": 337, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10" ] }, { "cell_type": "code", "execution_count": 338, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "10 rows affected.\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
report_datecases_weeklydeaths_weeklygeo_idnotification_rate_per_100000_population_14-daysculm_casesculm_deaths
2021-01-1855745AF3.24NoneNone
2021-01-1167571AF4.15NoneNone
2021-01-0490260AF7.61NoneNone
2020-12-28199488AF7.19NoneNone
2020-12-21740111AF6.56NoneNone
2020-12-14175771AF9.01NoneNone
2020-12-071672137AF7.22NoneNone
2020-11-30107368AF6.42NoneNone
2020-11-23136869AF6.66NoneNone
2020-11-16116461AF4.65NoneNone
" ], "text/plain": [ "[(datetime.date(2021, 1, 18), 557, 45, 'AF', 3.24, None, None),\n", " (datetime.date(2021, 1, 11), 675, 71, 'AF', 4.15, None, None),\n", " (datetime.date(2021, 1, 4), 902, 60, 'AF', 7.61, None, None),\n", " (datetime.date(2020, 12, 28), 1994, 88, 'AF', 7.19, None, None),\n", " (datetime.date(2020, 12, 21), 740, 111, 'AF', 6.56, None, None),\n", " (datetime.date(2020, 12, 14), 1757, 71, 'AF', 9.01, None, None),\n", " (datetime.date(2020, 12, 7), 1672, 137, 'AF', 7.22, None, None),\n", " (datetime.date(2020, 11, 30), 1073, 68, 'AF', 6.42, None, None),\n", " (datetime.date(2020, 11, 23), 1368, 69, 'AF', 6.66, None, None),\n", " (datetime.date(2020, 11, 16), 1164, 61, 'AF', 4.65, None, None)]" ] }, "execution_count": 338, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%sql select * from weekly_cases limit 10" ] }, { "cell_type": "code", "execution_count": 339, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# %%sql\n", "# with culm as \n", "# (select report_date, geo_id,\n", "# sum(cases_weekly) over (partition by geo_id \n", "# order by report_date) as culm_data\n", "# from weekly_cases\n", "# )\n", "# update weekly_cases\n", "# set culm_cases = culm_data\n", "# from culm\n", "# where weekly_cases.report_date = culm.report_date and\n", "# weekly_cases.geo_id = culm.geo_id" ] }, { "cell_type": "code", "execution_count": 340, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "query_string = '''with culm as \n", " (select report_date, geo_id,\n", " sum(cases_weekly) over (partition by geo_id \n", " order by report_date) as culm_data\n", " from weekly_cases\n", " )\n", "update weekly_cases\n", " set culm_cases = culm_data\n", " from culm\n", " where weekly_cases.report_date = culm.report_date and\n", " weekly_cases.geo_id = culm.geo_id'''\n", "with engine.connect() as connection:\n", " connection.execute(query_string)" ] }, { "cell_type": "code", "execution_count": 341, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# %%sql\n", "# with culm as \n", "# (select report_date, geo_id,\n", "# sum(deaths_weekly) over (partition by geo_id \n", "# order by report_date) as culm_data\n", "# from weekly_cases\n", "# )\n", "# update weekly_cases\n", "# set culm_deaths = culm_data\n", "# from culm\n", "# where weekly_cases.report_date = culm.report_date and\n", "# weekly_cases.geo_id = culm.geo_id" ] }, { "cell_type": "code", "execution_count": 342, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "query_string = '''with culm as \n", " (select report_date, geo_id,\n", " sum(deaths_weekly) over (partition by geo_id \n", " order by report_date) as culm_data\n", " from weekly_cases\n", " )\n", "update weekly_cases\n", " set culm_deaths = culm_data\n", " from culm\n", " where weekly_cases.report_date = culm.report_date and\n", " weekly_cases.geo_id = culm.geo_id'''\n", "with engine.connect() as connection:\n", " connection.execute(query_string)" ] }, { "cell_type": "code", "execution_count": 343, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 26053 100 26053 0 0 54277 0 --:--:-- --:--:-- --:--:-- 54277\n" ] } ], "source": [ "uk_query_string = (\n", "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n", "\"metric=covidOccupiedMVBeds&\"\n", "\"metric=newAdmissions&\"\n", "\"metric=newCasesBySpecimenDate&\"\n", "\"metric=hospitalCases&\"\n", "\"metric=newDeaths28DaysByPublishDate&\"\n", "\"format=csv\"\n", ")\n", "\n", "!curl \"$uk_query_string\" > uk_data.csv" ] }, { "cell_type": "code", "execution_count": 344, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 20223 100 20223 0 0 78688 0 --:--:-- --:--:-- --:--:-- 78383\n" ] } ], "source": [ "test_query_string = (\n", "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n", "\"metric=newPCRTestsByPublishDate&\"\n", "\"metric=newTestsByPublishDate&\"\n", "\"metric=newPillarOneTwoTestsByPublishDate&\"\n", "\"format=csv\"\n", ")\n", "!curl \"$test_query_string\" > test_data.csv" ] }, { "cell_type": "code", "execution_count": 345, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# hospital_query_string = (\n", "# \"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n", "# \"metric=newAdmissions&\"\n", "# \"format=csv\"\n", "# )\n", "# !curl \"$hospital_query_string\" > hospital_admissions.csv" ] }, { "cell_type": "code", "execution_count": 346, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# hospital_query_string = (\n", "# \"https://api.coronavirus.data.gov.uk/v1/data?\"\n", "# \"filters=areaName=United%2520Kingdom;areaType=overview&\"\n", "# \"structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,\"\n", "# \"%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv\"\n", "# )\n", " \n", "# !curl \"$hospital_query_string\" | gunzip > hospital_admissions.csv" ] }, { "cell_type": "code", "execution_count": 347, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateareaTypeareaCodeareaNamecovidOccupiedMVBedsnewAdmissionsnewCasesBySpecimenDatehospitalCasesnewDeaths28DaysByPublishDate
02021-01-26overviewK02000001United KingdomNaNNaNNaNNaN1631
12021-01-25overviewK02000001United Kingdom4032.0NaN4482.0NaN592
22021-01-24overviewK02000001United Kingdom4077.0NaN14266.037561.0610
32021-01-23overviewK02000001United Kingdom4066.0NaN20495.037266.01348
42021-01-22overviewK02000001United Kingdom4076.03341.029094.038144.01401
..............................
3852020-01-06overviewK02000001United KingdomNaNNaNNaNNaN0
3862020-01-05overviewK02000001United KingdomNaNNaNNaNNaN0
3872020-01-04overviewK02000001United KingdomNaNNaNNaNNaN0
3882020-01-03overviewK02000001United KingdomNaNNaNNaNNaN0
3892020-06-14overviewK02000001United Kingdom393.0364.0821.04907.027
\n", "

390 rows × 9 columns

\n", "
" ], "text/plain": [ " date areaType areaCode areaName covidOccupiedMVBeds \\\n", "0 2021-01-26 overview K02000001 United Kingdom NaN \n", "1 2021-01-25 overview K02000001 United Kingdom 4032.0 \n", "2 2021-01-24 overview K02000001 United Kingdom 4077.0 \n", "3 2021-01-23 overview K02000001 United Kingdom 4066.0 \n", "4 2021-01-22 overview K02000001 United Kingdom 4076.0 \n", ".. ... ... ... ... ... \n", "385 2020-01-06 overview K02000001 United Kingdom NaN \n", "386 2020-01-05 overview K02000001 United Kingdom NaN \n", "387 2020-01-04 overview K02000001 United Kingdom NaN \n", "388 2020-01-03 overview K02000001 United Kingdom NaN \n", "389 2020-06-14 overview K02000001 United Kingdom 393.0 \n", "\n", " newAdmissions newCasesBySpecimenDate hospitalCases \\\n", "0 NaN NaN NaN \n", "1 NaN 4482.0 NaN \n", "2 NaN 14266.0 37561.0 \n", "3 NaN 20495.0 37266.0 \n", "4 3341.0 29094.0 38144.0 \n", ".. ... ... ... \n", "385 NaN NaN NaN \n", "386 NaN NaN NaN \n", "387 NaN NaN NaN \n", "388 NaN NaN NaN \n", "389 364.0 821.0 4907.0 \n", "\n", " newDeaths28DaysByPublishDate \n", "0 1631 \n", "1 592 \n", "2 610 \n", "3 1348 \n", "4 1401 \n", ".. ... \n", "385 0 \n", "386 0 \n", "387 0 \n", "388 0 \n", "389 27 \n", "\n", "[390 rows x 9 columns]" ] }, "execution_count": 347, "metadata": {}, "output_type": "execute_result" } ], "source": [ "uk_data = pd.read_csv('uk_data.csv', \n", " parse_dates=[0], dayfirst=True)\n", "uk_data" ] }, { "cell_type": "code", "execution_count": 348, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateareaTypeareaCodeareaNamenewPCRTestsByPublishDatenewTestsByPublishDatenewPillarOneTwoTestsByPublishDate
02021-01-25overviewK02000001United Kingdom196510.0546734531104
12021-01-24overviewK02000001United Kingdom262111.0412775394479
22021-01-23overviewK02000001United Kingdom389209.0486425465231
32021-01-22overviewK02000001United Kingdom401075.0631901608829
42021-01-21overviewK02000001United Kingdom439408.0668989644537
........................
2962020-04-03overviewK02000001United KingdomNaN1462914293
2972020-04-02overviewK02000001United KingdomNaN1362313457
2982020-04-01overviewK02000001United KingdomNaN1194711924
2992020-03-31overviewK02000001United KingdomNaN1189611896
3002020-06-14overviewK02000001United Kingdom69529.08396566688
\n", "

301 rows × 7 columns

\n", "
" ], "text/plain": [ " date areaType areaCode areaName newPCRTestsByPublishDate \\\n", "0 2021-01-25 overview K02000001 United Kingdom 196510.0 \n", "1 2021-01-24 overview K02000001 United Kingdom 262111.0 \n", "2 2021-01-23 overview K02000001 United Kingdom 389209.0 \n", "3 2021-01-22 overview K02000001 United Kingdom 401075.0 \n", "4 2021-01-21 overview K02000001 United Kingdom 439408.0 \n", ".. ... ... ... ... ... \n", "296 2020-04-03 overview K02000001 United Kingdom NaN \n", "297 2020-04-02 overview K02000001 United Kingdom NaN \n", "298 2020-04-01 overview K02000001 United Kingdom NaN \n", "299 2020-03-31 overview K02000001 United Kingdom NaN \n", "300 2020-06-14 overview K02000001 United Kingdom 69529.0 \n", "\n", " newTestsByPublishDate newPillarOneTwoTestsByPublishDate \n", "0 546734 531104 \n", "1 412775 394479 \n", "2 486425 465231 \n", "3 631901 608829 \n", "4 668989 644537 \n", ".. ... ... \n", "296 14629 14293 \n", "297 13623 13457 \n", "298 11947 11924 \n", "299 11896 11896 \n", "300 83965 66688 \n", "\n", "[301 rows x 7 columns]" ] }, "execution_count": 348, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_data = pd.read_csv('test_data.csv', \n", " parse_dates=[0], dayfirst=True)\n", "test_data" ] }, { "cell_type": "code", "execution_count": 349, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/plain": [ "Index(['date', 'areaType', 'areaCode', 'areaName', 'newPCRTestsByPublishDate',\n", " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n", " dtype='object')" ] }, "execution_count": 349, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_data.columns" ] }, { "cell_type": "code", "execution_count": 350, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',\n", " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')" ] }, { "cell_type": "code", "execution_count": 351, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/plain": [ "Index(['date', 'areaType', 'areaCode', 'areaName', 'covidOccupiedMVBeds',\n", " 'newAdmissions', 'newCasesBySpecimenDate', 'hospitalCases',\n", " 'newDeaths28DaysByPublishDate', 'newPCRTestsByPublishDate',\n", " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n", " dtype='object')" ] }, "execution_count": 351, "metadata": {}, "output_type": "execute_result" } ], "source": [ "uk_data.columns" ] }, { "cell_type": "code", "execution_count": 352, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "uk_data.rename(\n", " columns={\n", " 'covidOccupiedMVBeds': 'ventilator_beds',\n", " 'newCasesBySpecimenDate': 'new_cases',\n", " 'hospitalCases': 'hospital_cases', \n", " 'newDeaths28DaysByPublishDate': 'new_deaths',\n", " 'newAdmissions': 'new_admissions',\n", " 'newPCRTestsByPublishDate': 'new_pcr_tests',\n", " 'newTestsByPublishDate': 'new_tests',\n", " 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'\n", " }, inplace=True)" ] }, { "cell_type": "code", "execution_count": 353, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/plain": [ "date datetime64[ns]\n", "areaType object\n", "areaCode object\n", "areaName object\n", "ventilator_beds float64\n", "new_admissions float64\n", "new_cases float64\n", "hospital_cases float64\n", "new_deaths int64\n", "new_pcr_tests float64\n", "new_tests float64\n", "new_pillar_1_2_tests float64\n", "dtype: object" ] }, "execution_count": 353, "metadata": {}, "output_type": "execute_result" } ], "source": [ "uk_data.dtypes" ] }, { "cell_type": "code", "execution_count": 354, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/plain": [ "Index(['date', 'areaType', 'areaCode', 'areaName', 'ventilator_beds',\n", " 'new_admissions', 'new_cases', 'hospital_cases', 'new_deaths',\n", " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'],\n", " dtype='object')" ] }, "execution_count": 354, "metadata": {}, "output_type": "execute_result" } ], "source": [ " uk_data.columns" ] }, { "cell_type": "code", "execution_count": 355, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "uk_data[['date', \n", " 'hospital_cases', 'ventilator_beds',\n", " 'new_cases', 'new_deaths', \n", " 'hospital_cases', 'new_admissions',\n", " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'\n", " ]].to_sql(\n", " 'uk_data',\n", " engine,\n", " if_exists='replace',\n", " index=False,\n", " chunksize=500,\n", " dtype={\n", " \"date\": Date,\n", " \"hospital_cases\": Integer,\n", " \"ventilator_beds\": Integer,\n", " \"new_cases\": Integer,\n", " \"hospital_cases\": Integer,\n", " \"new_deaths\": Integer,\n", " \"new_admissions\": Integer,\n", " 'new_pcr_tests': Integer, \n", " 'new_tests': Integer, \n", " 'new_pillar_1_2_tests': Integer\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": 356, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# %sql select * from uk_data order by date desc limit 10" ] }, { "cell_type": "code", "execution_count": 424, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "query_string = '''drop table if exists uk_data_7;\n", "create table uk_data_7 \n", "(date date primary key,\n", " hospital_cases real,\n", " ventilator_beds real,\n", " new_cases real,\n", " new_deaths real,\n", " new_admissions real,\n", " new_pcr_tests real,\n", " new_tests real,\n", " new_pillar_1_2_tests real\n", ");'''\n", "\n", "with engine.connect() as connection:\n", " connection.execute(query_string)" ] }, { "cell_type": "code", "execution_count": 425, "metadata": {}, "outputs": [], "source": [ "update_string = '''with ownd as (\n", " select date,\n", " avg(hospital_cases) over wnd as w_hospital_cases,\n", " avg(ventilator_beds) over wnd as w_ventilator_beds,\n", " avg(new_cases) over wnd as w_new_cases,\n", " avg(new_deaths) over wnd as w_new_deaths,\n", " avg(new_admissions) over wnd as w_new_admissions,\n", " avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n", " avg(new_tests) over wnd as w_new_tests,\n", " avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n", " count(*) over wnd as w_size\n", " from uk_data\n", " window wnd as (\n", " order by uk_data.date\n", " rows between 3 preceding and 3 following\n", " )\n", ")\n", "insert into uk_data_7(date, \n", " hospital_cases, \n", " ventilator_beds, \n", " new_cases,\n", " new_deaths,\n", " new_admissions, \n", " new_pcr_tests, \n", " new_tests, \n", " new_pillar_1_2_tests\n", " )\n", "(select date,\n", " w_hospital_cases,\n", " w_ventilator_beds,\n", " w_new_cases,\n", " w_new_deaths,\n", " w_new_admissions,\n", " w_new_pcr_tests,\n", " w_new_tests,\n", " w_new_pillar_1_2_tests\n", " from ownd\n", " where w_size = 7\n", ")'''\n", "with engine.connect() as connection:\n", " connection.execute(update_string)" ] }, { "cell_type": "code", "execution_count": 398, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)\n", "# values (\n", "# select date, \n", "# avg(ventilator_beds) over (order by date rows between 6 preceding and current row)\n", "# from uk_data\n", "# )" ] }, { "cell_type": "code", "execution_count": 399, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# query_string = '''insert into uk_data_7(date, hospital_cases)\n", "# select uk_data.date, \n", "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases\n", "# from uk_data'''\n", "# with engine.connect() as connection:\n", "# connection.execute(query_string) " ] }, { "cell_type": "code", "execution_count": 360, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# %%sql \n", "# with m7 as \n", "# (select uk_data.date as date7, \n", "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7\n", "# from uk_data\n", "# )\n", "# update uk_data_7\n", "# set ventilator_beds = nc7\n", "# from m7\n", "# where uk_data_7.date = m7.date7" ] }, { "cell_type": "code", "execution_count": 361, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "390 rows affected.\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 361, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with m7 as \n", "# (select uk_data.date as date7, \n", "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n", "# from uk_data\n", "# )\n", "# update uk_data_7\n", "# set ventilator_beds = nc7\n", "# from m7\n", "# where uk_data_7.date = m7.date7" ] }, { "cell_type": "code", "execution_count": 362, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "390 rows affected.\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 362, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with m7 as \n", "# (select uk_data.date as date7, \n", "# avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n", "# from uk_data\n", "# )\n", "# update uk_data_7\n", "# set new_cases = nc7\n", "# from m7\n", "# where uk_data_7.date = m7.date7" ] }, { "cell_type": "code", "execution_count": 363, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "390 rows affected.\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 363, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with m7 as \n", "# (select uk_data.date as date7, \n", "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", "# from uk_data\n", "# )\n", "# update uk_data_7\n", "# set hospital_cases = d7\n", "# from m7\n", "# where uk_data_7.date = m7.date7" ] }, { "cell_type": "code", "execution_count": 364, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "390 rows affected.\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 364, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with m7 as \n", "# (select uk_data.date as date7, \n", "# avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", "# from uk_data\n", "# )\n", "# update uk_data_7\n", "# set new_deaths = d7\n", "# from m7\n", "# where uk_data_7.date = m7.date7" ] }, { "cell_type": "code", "execution_count": 365, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "390 rows affected.\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 365, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with m7 as \n", "# (select uk_data.date as date7, \n", "# avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", "# from uk_data\n", "# )\n", "# update uk_data_7\n", "# set new_admissions = d7\n", "# from m7\n", "# where uk_data_7.date = m7.date7" ] }, { "cell_type": "code", "execution_count": 366, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "390 rows affected.\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 366, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with m7 as \n", "# (select uk_data.date as date7, \n", "# avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", "# from uk_data\n", "# )\n", "# update uk_data_7\n", "# set new_pcr_tests = d7\n", "# from m7\n", "# where uk_data_7.date = m7.date7" ] }, { "cell_type": "code", "execution_count": 367, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "390 rows affected.\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 367, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with m7 as \n", "# (select uk_data.date as date7, \n", "# avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", "# from uk_data\n", "# )\n", "# update uk_data_7\n", "# set new_tests = d7\n", "# from m7\n", "# where uk_data_7.date = m7.date7" ] }, { "cell_type": "code", "execution_count": 309, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# %%sql\n", "# with m7 as \n", "# (select uk_data.date as date7, \n", "# avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", "# from uk_data\n", "# )\n", "# update uk_data_7\n", "# set new_pillar_1_2_tests = d7\n", "# from m7\n", "# where uk_data_7.date = m7.date7" ] }, { "cell_type": "code", "execution_count": 310, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "0 rows affected.\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 310, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with wnd as\n", "# ( select date, \n", "# avg(new_pillar_1_2_tests)\n", "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests\n", "# from uk_data\n", "# )\n", "# update uk_data_7\n", "# set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests\n", "# from wnd\n", "# where uk_data_7.date = wnd.date\n", "# and (select count(*) from wnd) = 7" ] }, { "cell_type": "code", "execution_count": 379, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "10 rows affected.\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datenew_pillar_1_2_testsa_new_pillar_1_2_testsa_count
2021-01-26None463604.6666666666673
2021-01-25None499910.7500000000004
2021-01-24None528836.0000000000005
2021-01-23None543566.8333333333336
2021-01-22None545780.2857142857147
2021-01-21None546626.5714285714297
2021-01-20None546806.2857142857147
2021-01-19None547402.4285714285717
2021-01-18None542617.8571428571437
2021-01-17None545088.1428571428577
" ], "text/plain": [ "[(datetime.date(2021, 1, 26), None, Decimal('463604.666666666667'), 3),\n", " (datetime.date(2021, 1, 25), None, Decimal('499910.750000000000'), 4),\n", " (datetime.date(2021, 1, 24), None, Decimal('528836.000000000000'), 5),\n", " (datetime.date(2021, 1, 23), None, Decimal('543566.833333333333'), 6),\n", " (datetime.date(2021, 1, 22), None, Decimal('545780.285714285714'), 7),\n", " (datetime.date(2021, 1, 21), None, Decimal('546626.571428571429'), 7),\n", " (datetime.date(2021, 1, 20), None, Decimal('546806.285714285714'), 7),\n", " (datetime.date(2021, 1, 19), None, Decimal('547402.428571428571'), 7),\n", " (datetime.date(2021, 1, 18), None, Decimal('542617.857142857143'), 7),\n", " (datetime.date(2021, 1, 17), None, Decimal('545088.142857142857'), 7)]" ] }, "execution_count": 379, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with wnd as\n", "# ( select date, \n", "# avg(new_pillar_1_2_tests)\n", "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,\n", "# count(new_pillar_1_2_tests)\n", "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_count\n", "# from uk_data\n", "# )\n", "# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count\n", "# from uk_data_7, wnd\n", "# where uk_data_7.date = wnd.date\n", "# order by uk_data_7.date desc limit 10\n", " \n", "# select date, \n", "# count(*) over wnd as w_size\n", "# from uk_data\n", "# window wnd as (\n", "# order by uk_data.date\n", "# rows between 3 preceding and 3 following\n", "# );" ] }, { "cell_type": "code", "execution_count": 401, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# %%sql\n", "# select date, \n", "# count(*) over wnd as w_size\n", "# from uk_data\n", "# window wnd as (\n", "# order by uk_data.date\n", "# rows between 3 preceding and 3 following\n", "# )\n", "# order by date desc limit 10" ] }, { "cell_type": "code", "execution_count": 407, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "0 rows affected.\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 407, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with ownd as (\n", "# select date,\n", "# avg(hospital_cases) over wnd as w_hospital_cases,\n", "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n", "# avg(new_cases) over wnd as w_new_cases,\n", "# avg(new_deaths) over wnd as w_new_deaths,\n", "# avg(new_admissions) over wnd as w_new_admissions,\n", "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n", "# avg(new_tests) over wnd as w_new_tests,\n", "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n", "# count(*) over wnd as w_size\n", "# from uk_data\n", "# window wnd as (\n", "# order by uk_data.date\n", "# rows between 3 preceding and 3 following\n", "# ))\n", "# insert into uk_data_7(date, \n", "# hospital_cases, \n", "# ventilator_beds, \n", "# new_cases,\n", "# new_deaths,\n", "# new_admissions, \n", "# new_pcr_tests, \n", "# new_tests, \n", "# new_pillar_1_2_tests\n", "# )\n", "# (select date,\n", "# avg(hospital_cases) over wnd as w_hospital_cases,\n", "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n", "# avg(new_cases) over wnd as w_new_cases,\n", "# avg(new_deaths) over wnd as w_new_deaths,\n", "# avg(new_admissions) over wnd as w_new_admissions,\n", "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n", "# avg(new_tests) over wnd as w_new_tests,\n", "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n", "# count(*) over wnd as w_size\n", "# from uk_data\n", "# window wnd as (\n", "# order by uk_data.date\n", "# rows between 3 preceding and 3 following\n", "# )\n", "# )\n", "# set date = ownd.date,\n", "# hospital_cases = w_hospital_cases,\n", "# ventilator_beds = w_ventilator_beds,\n", "# new_cases = w_new_cases,\n", "# new_deaths = w_new_deaths,\n", "# new_admissions = w_new_admissions,\n", "# new_pcr_tests = w_new_pcr_tests,\n", "# new_tests = w_new_tests,\n", "# new_pillar_1_2_tests = w_new_pillar_1_2_tests\n", "# from ownd\n", "# where w_size = 7" ] }, { "cell_type": "code", "execution_count": 417, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "384 rows affected.\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 417, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%sql\n", "# with ownd as (\n", "# select date,\n", "# avg(hospital_cases) over wnd as w_hospital_cases,\n", "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n", "# avg(new_cases) over wnd as w_new_cases,\n", "# avg(new_deaths) over wnd as w_new_deaths,\n", "# avg(new_admissions) over wnd as w_new_admissions,\n", "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n", "# avg(new_tests) over wnd as w_new_tests,\n", "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n", "# count(*) over wnd as w_size\n", "# from uk_data\n", "# window wnd as (\n", "# order by uk_data.date\n", "# rows between 3 preceding and 3 following\n", "# )\n", "# )\n", "# insert into uk_data_7(date, \n", "# hospital_cases, \n", "# ventilator_beds, \n", "# new_cases,\n", "# new_deaths,\n", "# new_admissions, \n", "# new_pcr_tests, \n", "# new_tests, \n", "# new_pillar_1_2_tests\n", "# )\n", "# (select date,\n", "# w_hospital_cases,\n", "# w_ventilator_beds,\n", "# w_new_cases,\n", "# w_new_deaths,\n", "# w_new_admissions,\n", "# w_new_pcr_tests,\n", "# w_new_tests,\n", "# w_new_pillar_1_2_tests\n", "# from ownd\n", "# where w_size = 7\n", "# )" ] }, { "cell_type": "code", "execution_count": 427, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "10 rows affected.\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datehospital_casesventilator_bedsnew_casesnew_deathsnew_admissionsnew_pcr_testsnew_testsnew_pillar_1_2_tests
2021-01-2337915.64027.333322463.6661241.71423651.6667353735.34565312.3543566.8
2021-01-2238057.1684015.857224870.4281238.71423771.75354323.16567830.3545780.3
2021-01-2138217.7153999.285630620.4281239.71423828.2363750.16569716.7546626.56
2021-01-2038294.03969.857232707.4281248.42863811.0372037.44570458.1546806.3
2021-01-1938348.433939.571534143.2851240.85723825.8572374771.16571336.56547402.44
2021-01-1838338.1453898.571535766.571223.57143872.8572377019.28566901.1542617.9
2021-01-1738292.2853857.428537337.8551217.57143939.0389469.84571242.56545088.1
2021-01-1638119.8553810.714438824.01181.03967.2856395066.72569445.0541938.0
2021-01-1537858.7153746.285640268.571128.57144029.2856403526.72570188.7540199.9
2021-01-1437505.573667.285641989.1451118.57144065.2856412496.44567178.7537417.1
" ], "text/plain": [ "[(datetime.date(2021, 1, 23), 37915.6, 4027.3333, 22463.666, 1241.7142, 3651.6667, 353735.34, 565312.3, 543566.8),\n", " (datetime.date(2021, 1, 22), 38057.168, 4015.8572, 24870.428, 1238.7142, 3771.75, 354323.16, 567830.3, 545780.3),\n", " (datetime.date(2021, 1, 21), 38217.715, 3999.2856, 30620.428, 1239.7142, 3828.2, 363750.16, 569716.7, 546626.56),\n", " (datetime.date(2021, 1, 20), 38294.0, 3969.8572, 32707.428, 1248.4286, 3811.0, 372037.44, 570458.1, 546806.3),\n", " (datetime.date(2021, 1, 19), 38348.43, 3939.5715, 34143.285, 1240.8572, 3825.8572, 374771.16, 571336.56, 547402.44),\n", " (datetime.date(2021, 1, 18), 38338.145, 3898.5715, 35766.57, 1223.5714, 3872.8572, 377019.28, 566901.1, 542617.9),\n", " (datetime.date(2021, 1, 17), 38292.285, 3857.4285, 37337.855, 1217.5714, 3939.0, 389469.84, 571242.56, 545088.1),\n", " (datetime.date(2021, 1, 16), 38119.855, 3810.7144, 38824.0, 1181.0, 3967.2856, 395066.72, 569445.0, 541938.0),\n", " (datetime.date(2021, 1, 15), 37858.715, 3746.2856, 40268.57, 1128.5714, 4029.2856, 403526.72, 570188.7, 540199.9),\n", " (datetime.date(2021, 1, 14), 37505.57, 3667.2856, 41989.145, 1118.5714, 4065.2856, 412496.44, 567178.7, 537417.1)]" ] }, "execution_count": 427, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%sql select * from uk_data_7 order by date desc limit 10" ] }, { "cell_type": "code", "execution_count": 428, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * postgresql://covid:***@localhost/covid\n", "10 rows affected.\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datehospital_casesventilator_bedsnew_casesnew_deathsnew_admissionsnew_pcr_testsnew_testsnew_pillar_1_2_tests
2021-01-26NoneNoneNone1631NoneNoneNoneNone
2021-01-25None40324482592None196510546734531104
2021-01-2437561407714266610None262111412775394479
2021-01-23372664066204951348None389209486425465231
2021-01-223814440762909414013341401075631901608829
2021-01-213795739603143012903598439408668989644537
2021-01-203865039533501518204016434099645050617221
2021-01-193876539473931116104132357850582938559061
2021-01-18391813916447325994054262499559939537028
2021-01-17380953871288756713725320122417965395737
" ], "text/plain": [ "[(datetime.date(2021, 1, 26), None, None, None, 1631, None, None, None, None),\n", " (datetime.date(2021, 1, 25), None, 4032, 4482, 592, None, 196510, 546734, 531104),\n", " (datetime.date(2021, 1, 24), 37561, 4077, 14266, 610, None, 262111, 412775, 394479),\n", " (datetime.date(2021, 1, 23), 37266, 4066, 20495, 1348, None, 389209, 486425, 465231),\n", " (datetime.date(2021, 1, 22), 38144, 4076, 29094, 1401, 3341, 401075, 631901, 608829),\n", " (datetime.date(2021, 1, 21), 37957, 3960, 31430, 1290, 3598, 439408, 668989, 644537),\n", " (datetime.date(2021, 1, 20), 38650, 3953, 35015, 1820, 4016, 434099, 645050, 617221),\n", " (datetime.date(2021, 1, 19), 38765, 3947, 39311, 1610, 4132, 357850, 582938, 559061),\n", " (datetime.date(2021, 1, 18), 39181, 3916, 44732, 599, 4054, 262499, 559939, 537028),\n", " (datetime.date(2021, 1, 17), 38095, 3871, 28875, 671, 3725, 320122, 417965, 395737)]" ] }, "execution_count": 428, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%sql select * from uk_data order by date desc limit 10" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [] } ], "metadata": { "jupytext": { "formats": "ipynb,md" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }