X-Git-Url: https://git.njae.me.uk/?p=covid19.git;a=blobdiff_plain;f=data_import.ipynb;fp=data_import.ipynb;h=0000000000000000000000000000000000000000;hp=31d7ef97604b9a192a59c8d9656f3e4e7c52dfb0;hb=6fc69eb203d25465f8ccef719bc4d662dc66863a;hpb=4feace1e2a5076f3efa0fd65b31411ac511efbb1 diff --git a/data_import.ipynb b/data_import.ipynb deleted file mode 100644 index 31d7ef9..0000000 --- a/data_import.ipynb +++ /dev/null @@ -1,2933 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "Collapsed": "false" - }, - "source": [ - "Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)" - ] - }, - { - "cell_type": "code", - "execution_count": 314, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float\n", - "from sqlalchemy import create_engine" - ] - }, - { - "cell_type": "code", - "execution_count": 315, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "import itertools\n", - "import collections\n", - "import json\n", - "import pandas as pd\n", - "import numpy as np\n", - "from scipy.stats import gmean\n", - "import datetime\n", - "\n", - "import matplotlib as mpl\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 316, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The sql extension is already loaded. To reload it, use:\n", - " %reload_ext sql\n" - ] - } - ], - "source": [ - "%load_ext sql" - ] - }, - { - "cell_type": "code", - "execution_count": 317, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'" - ] - }, - { - "cell_type": "code", - "execution_count": 318, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'Connected: covid@covid'" - ] - }, - "execution_count": 318, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%sql $connection_string" - ] - }, - { - "cell_type": "code", - "execution_count": 319, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "eng = create_engine(connection_string)\n", - "engine = eng.execution_options(isolation_level=\"AUTOCOMMIT\")" - ] - }, - { - "cell_type": "code", - "execution_count": 320, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "DEATH_COUNT_THRESHOLD = 10\n", - "COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()\n", - "COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()\n", - "COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()\n", - "# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()\n", - "\n", - "COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA\n", - " 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',\n", - " 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',\n", - " 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']\n", - "COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))\n", - "COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))" - ] - }, - { - "cell_type": "code", - "execution_count": 321, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n", - "100 623k 100 623k 0 0 927k 0 --:--:-- --:--:-- --:--:-- 925k\n" - ] - } - ], - "source": [ - "!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv" - ] - }, - { - "cell_type": "code", - "execution_count": 322, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# First col is a date, treat geoId of NA as 'Namibia', not \"NA\" value\n", - "raw_data = pd.read_csv('covid.csv', \n", - " parse_dates=[0], dayfirst=True,\n", - " keep_default_na=False, na_values = [''],\n", - "# dtype = {'day': np.int64, \n", - "# 'month': np.int64, \n", - "# 'year': np.int64, \n", - "# 'cases': np.int64, \n", - "# 'deaths': np.int64, \n", - "# 'countriesAndTerritories': str, \n", - "# 'geoId': str, \n", - "# 'countryterritoryCode': str, \n", - "# 'popData2019': np.int64, \n", - "# 'continentExp': str, \n", - "# }\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 323, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "100050" - ] - }, - "execution_count": 323, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "raw_data.size" - ] - }, - { - "cell_type": "code", - "execution_count": 324, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "raw_data.fillna(0, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 325, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateRepyear_weekcases_weeklydeaths_weeklycountriesAndTerritoriesgeoIdcountryterritoryCodepopData2019continentExpnotification_rate_per_100000_population_14-days
02021-01-182021-0255745AfghanistanAFAFG38041757.0Asia3.24
12021-01-112021-0167571AfghanistanAFAFG38041757.0Asia4.15
22021-01-042020-5390260AfghanistanAFAFG38041757.0Asia7.61
32020-12-282020-52199488AfghanistanAFAFG38041757.0Asia7.19
42020-12-212020-51740111AfghanistanAFAFG38041757.0Asia6.56
\n", - "
" - ], - "text/plain": [ - " dateRep year_week cases_weekly deaths_weekly countriesAndTerritories \\\n", - "0 2021-01-18 2021-02 557 45 Afghanistan \n", - "1 2021-01-11 2021-01 675 71 Afghanistan \n", - "2 2021-01-04 2020-53 902 60 Afghanistan \n", - "3 2020-12-28 2020-52 1994 88 Afghanistan \n", - "4 2020-12-21 2020-51 740 111 Afghanistan \n", - "\n", - " geoId countryterritoryCode popData2019 continentExp \\\n", - "0 AF AFG 38041757.0 Asia \n", - "1 AF AFG 38041757.0 Asia \n", - "2 AF AFG 38041757.0 Asia \n", - "3 AF AFG 38041757.0 Asia \n", - "4 AF AFG 38041757.0 Asia \n", - "\n", - " notification_rate_per_100000_population_14-days \n", - "0 3.24 \n", - "1 4.15 \n", - "2 7.61 \n", - "3 7.19 \n", - "4 6.56 " - ] - }, - "execution_count": 325, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "raw_data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 326, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',\n", - " 'countriesAndTerritories': 'country_name',\n", - " 'countryterritoryCode': 'country_territory_code',\n", - " 'popData2019': 'population_2019',\n", - " 'continentExp': 'continent'}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 327, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['report_date', 'year_week', 'cases_weekly', 'deaths_weekly',\n", - " 'country_name', 'geo_id', 'country_territory_code', 'population_2019',\n", - " 'continent', 'notification_rate_per_100000_population_14-days'],\n", - " dtype='object')" - ] - }, - "execution_count": 327, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "raw_data.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 328, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "report_date datetime64[ns]\n", - "year_week object\n", - "cases_weekly int64\n", - "deaths_weekly int64\n", - "country_name object\n", - "geo_id object\n", - "country_territory_code object\n", - "population_2019 float64\n", - "continent object\n", - "notification_rate_per_100000_population_14-days float64\n", - "dtype: object" - ] - }, - "execution_count": 328, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "raw_data.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": 329, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
report_datecases_weeklydeaths_weeklygeo_idnotification_rate_per_100000_population_14-days
02021-01-1855745AF3.24
12021-01-1167571AF4.15
22021-01-0490260AF7.61
32020-12-28199488AF7.19
42020-12-21740111AF6.56
..................
100002020-04-20110ZW0.11
100012020-04-1352ZW0.05
100022020-04-0620ZW0.05
100032020-03-3051ZW0.05
100042020-03-2320ZW0.00
\n", - "

10005 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " report_date cases_weekly deaths_weekly geo_id \\\n", - "0 2021-01-18 557 45 AF \n", - "1 2021-01-11 675 71 AF \n", - "2 2021-01-04 902 60 AF \n", - "3 2020-12-28 1994 88 AF \n", - "4 2020-12-21 740 111 AF \n", - "... ... ... ... ... \n", - "10000 2020-04-20 11 0 ZW \n", - "10001 2020-04-13 5 2 ZW \n", - "10002 2020-04-06 2 0 ZW \n", - "10003 2020-03-30 5 1 ZW \n", - "10004 2020-03-23 2 0 ZW \n", - "\n", - " notification_rate_per_100000_population_14-days \n", - "0 3.24 \n", - "1 4.15 \n", - "2 7.61 \n", - "3 7.19 \n", - "4 6.56 \n", - "... ... \n", - "10000 0.11 \n", - "10001 0.05 \n", - "10002 0.05 \n", - "10003 0.05 \n", - "10004 0.00 \n", - "\n", - "[10005 rows x 5 columns]" - ] - }, - "execution_count": 329, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]" - ] - }, - { - "cell_type": "code", - "execution_count": 330, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
country_namegeo_idcountry_territory_codepopulation_2019continent
0AfghanistanAFAFG38041757.0Asia
1AfghanistanAFAFG38041757.0Asia
2AfghanistanAFAFG38041757.0Asia
3AfghanistanAFAFG38041757.0Asia
4AfghanistanAFAFG38041757.0Asia
..................
10000ZimbabweZWZWE14645473.0Africa
10001ZimbabweZWZWE14645473.0Africa
10002ZimbabweZWZWE14645473.0Africa
10003ZimbabweZWZWE14645473.0Africa
10004ZimbabweZWZWE14645473.0Africa
\n", - "

10005 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " country_name geo_id country_territory_code population_2019 continent\n", - "0 Afghanistan AF AFG 38041757.0 Asia\n", - "1 Afghanistan AF AFG 38041757.0 Asia\n", - "2 Afghanistan AF AFG 38041757.0 Asia\n", - "3 Afghanistan AF AFG 38041757.0 Asia\n", - "4 Afghanistan AF AFG 38041757.0 Asia\n", - "... ... ... ... ... ...\n", - "10000 Zimbabwe ZW ZWE 14645473.0 Africa\n", - "10001 Zimbabwe ZW ZWE 14645473.0 Africa\n", - "10002 Zimbabwe ZW ZWE 14645473.0 Africa\n", - "10003 Zimbabwe ZW ZWE 14645473.0 Africa\n", - "10004 Zimbabwe ZW ZWE 14645473.0 Africa\n", - "\n", - "[10005 rows x 5 columns]" - ] - }, - "execution_count": 330, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "raw_data[['country_name', 'geo_id', 'country_territory_code',\n", - " 'population_2019', 'continent']]" - ] - }, - { - "cell_type": "code", - "execution_count": 331, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(\n", - " 'weekly_cases',\n", - " engine,\n", - " if_exists='replace',\n", - " index=False,\n", - " chunksize=500,\n", - " dtype={\n", - " \"report_date\": Date,\n", - " \"cases_weekly\": Integer,\n", - " \"deaths_weekly\": Integer,\n", - " \"geo_id\": String,\n", - " \"notification_rate_per_100000_population_14-days\": Float\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 332, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "raw_data[['country_name', 'geo_id', 'country_territory_code',\n", - " 'population_2019', 'continent']].drop_duplicates().to_sql(\n", - " 'countries',\n", - " engine,\n", - " if_exists='replace',\n", - " index=False,\n", - " chunksize=500,\n", - " dtype={\n", - " \"country_name\": Text,\n", - " \"geo_id\": String,\n", - " \"country_territory_code\": String,\n", - " \"population_2019\": Integer,\n", - " \"continent\": Text\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 333, - "metadata": { - "Collapsed": "false", - "scrolled": true - }, - "outputs": [], - "source": [ - "# %sql select geo_id from weekly_cases limit 10" - ] - }, - { - "cell_type": "code", - "execution_count": 334, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# %%sql alter table weekly_cases add primary key (geo_id, report_date);\n", - "# alter table countries add primary key (geo_id);\n", - "# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);\n", - "# alter table weekly_cases add culm_cases integer;\n", - "# alter table weekly_cases add culm_deaths integer;" - ] - }, - { - "cell_type": "code", - "execution_count": 335, - "metadata": {}, - "outputs": [], - "source": [ - "with engine.connect() as connection:\n", - " connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')\n", - " connection.execute('alter table countries add primary key (geo_id);')\n", - " connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')\n", - " connection.execute('alter table weekly_cases add culm_cases integer;')\n", - " connection.execute('alter table weekly_cases add culm_deaths integer;')" - ] - }, - { - "cell_type": "code", - "execution_count": 336, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10" - ] - }, - { - "cell_type": "code", - "execution_count": 337, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10" - ] - }, - { - "cell_type": "code", - "execution_count": 338, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "10 rows affected.\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
report_datecases_weeklydeaths_weeklygeo_idnotification_rate_per_100000_population_14-daysculm_casesculm_deaths
2021-01-1855745AF3.24NoneNone
2021-01-1167571AF4.15NoneNone
2021-01-0490260AF7.61NoneNone
2020-12-28199488AF7.19NoneNone
2020-12-21740111AF6.56NoneNone
2020-12-14175771AF9.01NoneNone
2020-12-071672137AF7.22NoneNone
2020-11-30107368AF6.42NoneNone
2020-11-23136869AF6.66NoneNone
2020-11-16116461AF4.65NoneNone
" - ], - "text/plain": [ - "[(datetime.date(2021, 1, 18), 557, 45, 'AF', 3.24, None, None),\n", - " (datetime.date(2021, 1, 11), 675, 71, 'AF', 4.15, None, None),\n", - " (datetime.date(2021, 1, 4), 902, 60, 'AF', 7.61, None, None),\n", - " (datetime.date(2020, 12, 28), 1994, 88, 'AF', 7.19, None, None),\n", - " (datetime.date(2020, 12, 21), 740, 111, 'AF', 6.56, None, None),\n", - " (datetime.date(2020, 12, 14), 1757, 71, 'AF', 9.01, None, None),\n", - " (datetime.date(2020, 12, 7), 1672, 137, 'AF', 7.22, None, None),\n", - " (datetime.date(2020, 11, 30), 1073, 68, 'AF', 6.42, None, None),\n", - " (datetime.date(2020, 11, 23), 1368, 69, 'AF', 6.66, None, None),\n", - " (datetime.date(2020, 11, 16), 1164, 61, 'AF', 4.65, None, None)]" - ] - }, - "execution_count": 338, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%sql select * from weekly_cases limit 10" - ] - }, - { - "cell_type": "code", - "execution_count": 339, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# %%sql\n", - "# with culm as \n", - "# (select report_date, geo_id,\n", - "# sum(cases_weekly) over (partition by geo_id \n", - "# order by report_date) as culm_data\n", - "# from weekly_cases\n", - "# )\n", - "# update weekly_cases\n", - "# set culm_cases = culm_data\n", - "# from culm\n", - "# where weekly_cases.report_date = culm.report_date and\n", - "# weekly_cases.geo_id = culm.geo_id" - ] - }, - { - "cell_type": "code", - "execution_count": 340, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "query_string = '''with culm as \n", - " (select report_date, geo_id,\n", - " sum(cases_weekly) over (partition by geo_id \n", - " order by report_date) as culm_data\n", - " from weekly_cases\n", - " )\n", - "update weekly_cases\n", - " set culm_cases = culm_data\n", - " from culm\n", - " where weekly_cases.report_date = culm.report_date and\n", - " weekly_cases.geo_id = culm.geo_id'''\n", - "with engine.connect() as connection:\n", - " connection.execute(query_string)" - ] - }, - { - "cell_type": "code", - "execution_count": 341, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# %%sql\n", - "# with culm as \n", - "# (select report_date, geo_id,\n", - "# sum(deaths_weekly) over (partition by geo_id \n", - "# order by report_date) as culm_data\n", - "# from weekly_cases\n", - "# )\n", - "# update weekly_cases\n", - "# set culm_deaths = culm_data\n", - "# from culm\n", - "# where weekly_cases.report_date = culm.report_date and\n", - "# weekly_cases.geo_id = culm.geo_id" - ] - }, - { - "cell_type": "code", - "execution_count": 342, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "query_string = '''with culm as \n", - " (select report_date, geo_id,\n", - " sum(deaths_weekly) over (partition by geo_id \n", - " order by report_date) as culm_data\n", - " from weekly_cases\n", - " )\n", - "update weekly_cases\n", - " set culm_deaths = culm_data\n", - " from culm\n", - " where weekly_cases.report_date = culm.report_date and\n", - " weekly_cases.geo_id = culm.geo_id'''\n", - "with engine.connect() as connection:\n", - " connection.execute(query_string)" - ] - }, - { - "cell_type": "code", - "execution_count": 343, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n", - "100 26053 100 26053 0 0 54277 0 --:--:-- --:--:-- --:--:-- 54277\n" - ] - } - ], - "source": [ - "uk_query_string = (\n", - "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n", - "\"metric=covidOccupiedMVBeds&\"\n", - "\"metric=newAdmissions&\"\n", - "\"metric=newCasesBySpecimenDate&\"\n", - "\"metric=hospitalCases&\"\n", - "\"metric=newDeaths28DaysByPublishDate&\"\n", - "\"format=csv\"\n", - ")\n", - "\n", - "!curl \"$uk_query_string\" > uk_data.csv" - ] - }, - { - "cell_type": "code", - "execution_count": 344, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n", - "100 20223 100 20223 0 0 78688 0 --:--:-- --:--:-- --:--:-- 78383\n" - ] - } - ], - "source": [ - "test_query_string = (\n", - "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n", - "\"metric=newPCRTestsByPublishDate&\"\n", - "\"metric=newTestsByPublishDate&\"\n", - "\"metric=newPillarOneTwoTestsByPublishDate&\"\n", - "\"format=csv\"\n", - ")\n", - "!curl \"$test_query_string\" > test_data.csv" - ] - }, - { - "cell_type": "code", - "execution_count": 345, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# hospital_query_string = (\n", - "# \"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n", - "# \"metric=newAdmissions&\"\n", - "# \"format=csv\"\n", - "# )\n", - "# !curl \"$hospital_query_string\" > hospital_admissions.csv" - ] - }, - { - "cell_type": "code", - "execution_count": 346, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# hospital_query_string = (\n", - "# \"https://api.coronavirus.data.gov.uk/v1/data?\"\n", - "# \"filters=areaName=United%2520Kingdom;areaType=overview&\"\n", - "# \"structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,\"\n", - "# \"%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv\"\n", - "# )\n", - " \n", - "# !curl \"$hospital_query_string\" | gunzip > hospital_admissions.csv" - ] - }, - { - "cell_type": "code", - "execution_count": 347, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateareaTypeareaCodeareaNamecovidOccupiedMVBedsnewAdmissionsnewCasesBySpecimenDatehospitalCasesnewDeaths28DaysByPublishDate
02021-01-26overviewK02000001United KingdomNaNNaNNaNNaN1631
12021-01-25overviewK02000001United Kingdom4032.0NaN4482.0NaN592
22021-01-24overviewK02000001United Kingdom4077.0NaN14266.037561.0610
32021-01-23overviewK02000001United Kingdom4066.0NaN20495.037266.01348
42021-01-22overviewK02000001United Kingdom4076.03341.029094.038144.01401
..............................
3852020-01-06overviewK02000001United KingdomNaNNaNNaNNaN0
3862020-01-05overviewK02000001United KingdomNaNNaNNaNNaN0
3872020-01-04overviewK02000001United KingdomNaNNaNNaNNaN0
3882020-01-03overviewK02000001United KingdomNaNNaNNaNNaN0
3892020-06-14overviewK02000001United Kingdom393.0364.0821.04907.027
\n", - "

390 rows × 9 columns

\n", - "
" - ], - "text/plain": [ - " date areaType areaCode areaName covidOccupiedMVBeds \\\n", - "0 2021-01-26 overview K02000001 United Kingdom NaN \n", - "1 2021-01-25 overview K02000001 United Kingdom 4032.0 \n", - "2 2021-01-24 overview K02000001 United Kingdom 4077.0 \n", - "3 2021-01-23 overview K02000001 United Kingdom 4066.0 \n", - "4 2021-01-22 overview K02000001 United Kingdom 4076.0 \n", - ".. ... ... ... ... ... \n", - "385 2020-01-06 overview K02000001 United Kingdom NaN \n", - "386 2020-01-05 overview K02000001 United Kingdom NaN \n", - "387 2020-01-04 overview K02000001 United Kingdom NaN \n", - "388 2020-01-03 overview K02000001 United Kingdom NaN \n", - "389 2020-06-14 overview K02000001 United Kingdom 393.0 \n", - "\n", - " newAdmissions newCasesBySpecimenDate hospitalCases \\\n", - "0 NaN NaN NaN \n", - "1 NaN 4482.0 NaN \n", - "2 NaN 14266.0 37561.0 \n", - "3 NaN 20495.0 37266.0 \n", - "4 3341.0 29094.0 38144.0 \n", - ".. ... ... ... \n", - "385 NaN NaN NaN \n", - "386 NaN NaN NaN \n", - "387 NaN NaN NaN \n", - "388 NaN NaN NaN \n", - "389 364.0 821.0 4907.0 \n", - "\n", - " newDeaths28DaysByPublishDate \n", - "0 1631 \n", - "1 592 \n", - "2 610 \n", - "3 1348 \n", - "4 1401 \n", - ".. ... \n", - "385 0 \n", - "386 0 \n", - "387 0 \n", - "388 0 \n", - "389 27 \n", - "\n", - "[390 rows x 9 columns]" - ] - }, - "execution_count": 347, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "uk_data = pd.read_csv('uk_data.csv', \n", - " parse_dates=[0], dayfirst=True)\n", - "uk_data" - ] - }, - { - "cell_type": "code", - "execution_count": 348, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateareaTypeareaCodeareaNamenewPCRTestsByPublishDatenewTestsByPublishDatenewPillarOneTwoTestsByPublishDate
02021-01-25overviewK02000001United Kingdom196510.0546734531104
12021-01-24overviewK02000001United Kingdom262111.0412775394479
22021-01-23overviewK02000001United Kingdom389209.0486425465231
32021-01-22overviewK02000001United Kingdom401075.0631901608829
42021-01-21overviewK02000001United Kingdom439408.0668989644537
........................
2962020-04-03overviewK02000001United KingdomNaN1462914293
2972020-04-02overviewK02000001United KingdomNaN1362313457
2982020-04-01overviewK02000001United KingdomNaN1194711924
2992020-03-31overviewK02000001United KingdomNaN1189611896
3002020-06-14overviewK02000001United Kingdom69529.08396566688
\n", - "

301 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " date areaType areaCode areaName newPCRTestsByPublishDate \\\n", - "0 2021-01-25 overview K02000001 United Kingdom 196510.0 \n", - "1 2021-01-24 overview K02000001 United Kingdom 262111.0 \n", - "2 2021-01-23 overview K02000001 United Kingdom 389209.0 \n", - "3 2021-01-22 overview K02000001 United Kingdom 401075.0 \n", - "4 2021-01-21 overview K02000001 United Kingdom 439408.0 \n", - ".. ... ... ... ... ... \n", - "296 2020-04-03 overview K02000001 United Kingdom NaN \n", - "297 2020-04-02 overview K02000001 United Kingdom NaN \n", - "298 2020-04-01 overview K02000001 United Kingdom NaN \n", - "299 2020-03-31 overview K02000001 United Kingdom NaN \n", - "300 2020-06-14 overview K02000001 United Kingdom 69529.0 \n", - "\n", - " newTestsByPublishDate newPillarOneTwoTestsByPublishDate \n", - "0 546734 531104 \n", - "1 412775 394479 \n", - "2 486425 465231 \n", - "3 631901 608829 \n", - "4 668989 644537 \n", - ".. ... ... \n", - "296 14629 14293 \n", - "297 13623 13457 \n", - "298 11947 11924 \n", - "299 11896 11896 \n", - "300 83965 66688 \n", - "\n", - "[301 rows x 7 columns]" - ] - }, - "execution_count": 348, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_data = pd.read_csv('test_data.csv', \n", - " parse_dates=[0], dayfirst=True)\n", - "test_data" - ] - }, - { - "cell_type": "code", - "execution_count": 349, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['date', 'areaType', 'areaCode', 'areaName', 'newPCRTestsByPublishDate',\n", - " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n", - " dtype='object')" - ] - }, - "execution_count": 349, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_data.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 350, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',\n", - " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')" - ] - }, - { - "cell_type": "code", - "execution_count": 351, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['date', 'areaType', 'areaCode', 'areaName', 'covidOccupiedMVBeds',\n", - " 'newAdmissions', 'newCasesBySpecimenDate', 'hospitalCases',\n", - " 'newDeaths28DaysByPublishDate', 'newPCRTestsByPublishDate',\n", - " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n", - " dtype='object')" - ] - }, - "execution_count": 351, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "uk_data.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 352, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "uk_data.rename(\n", - " columns={\n", - " 'covidOccupiedMVBeds': 'ventilator_beds',\n", - " 'newCasesBySpecimenDate': 'new_cases',\n", - " 'hospitalCases': 'hospital_cases', \n", - " 'newDeaths28DaysByPublishDate': 'new_deaths',\n", - " 'newAdmissions': 'new_admissions',\n", - " 'newPCRTestsByPublishDate': 'new_pcr_tests',\n", - " 'newTestsByPublishDate': 'new_tests',\n", - " 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'\n", - " }, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 353, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "date datetime64[ns]\n", - "areaType object\n", - "areaCode object\n", - "areaName object\n", - "ventilator_beds float64\n", - "new_admissions float64\n", - "new_cases float64\n", - "hospital_cases float64\n", - "new_deaths int64\n", - "new_pcr_tests float64\n", - "new_tests float64\n", - "new_pillar_1_2_tests float64\n", - "dtype: object" - ] - }, - "execution_count": 353, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "uk_data.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": 354, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['date', 'areaType', 'areaCode', 'areaName', 'ventilator_beds',\n", - " 'new_admissions', 'new_cases', 'hospital_cases', 'new_deaths',\n", - " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'],\n", - " dtype='object')" - ] - }, - "execution_count": 354, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - " uk_data.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 355, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "uk_data[['date', \n", - " 'hospital_cases', 'ventilator_beds',\n", - " 'new_cases', 'new_deaths', \n", - " 'hospital_cases', 'new_admissions',\n", - " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'\n", - " ]].to_sql(\n", - " 'uk_data',\n", - " engine,\n", - " if_exists='replace',\n", - " index=False,\n", - " chunksize=500,\n", - " dtype={\n", - " \"date\": Date,\n", - " \"hospital_cases\": Integer,\n", - " \"ventilator_beds\": Integer,\n", - " \"new_cases\": Integer,\n", - " \"hospital_cases\": Integer,\n", - " \"new_deaths\": Integer,\n", - " \"new_admissions\": Integer,\n", - " 'new_pcr_tests': Integer, \n", - " 'new_tests': Integer, \n", - " 'new_pillar_1_2_tests': Integer\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 356, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# %sql select * from uk_data order by date desc limit 10" - ] - }, - { - "cell_type": "code", - "execution_count": 424, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "query_string = '''drop table if exists uk_data_7;\n", - "create table uk_data_7 \n", - "(date date primary key,\n", - " hospital_cases real,\n", - " ventilator_beds real,\n", - " new_cases real,\n", - " new_deaths real,\n", - " new_admissions real,\n", - " new_pcr_tests real,\n", - " new_tests real,\n", - " new_pillar_1_2_tests real\n", - ");'''\n", - "\n", - "with engine.connect() as connection:\n", - " connection.execute(query_string)" - ] - }, - { - "cell_type": "code", - "execution_count": 425, - "metadata": {}, - "outputs": [], - "source": [ - "update_string = '''with ownd as (\n", - " select date,\n", - " avg(hospital_cases) over wnd as w_hospital_cases,\n", - " avg(ventilator_beds) over wnd as w_ventilator_beds,\n", - " avg(new_cases) over wnd as w_new_cases,\n", - " avg(new_deaths) over wnd as w_new_deaths,\n", - " avg(new_admissions) over wnd as w_new_admissions,\n", - " avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n", - " avg(new_tests) over wnd as w_new_tests,\n", - " avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n", - " count(*) over wnd as w_size\n", - " from uk_data\n", - " window wnd as (\n", - " order by uk_data.date\n", - " rows between 3 preceding and 3 following\n", - " )\n", - ")\n", - "insert into uk_data_7(date, \n", - " hospital_cases, \n", - " ventilator_beds, \n", - " new_cases,\n", - " new_deaths,\n", - " new_admissions, \n", - " new_pcr_tests, \n", - " new_tests, \n", - " new_pillar_1_2_tests\n", - " )\n", - "(select date,\n", - " w_hospital_cases,\n", - " w_ventilator_beds,\n", - " w_new_cases,\n", - " w_new_deaths,\n", - " w_new_admissions,\n", - " w_new_pcr_tests,\n", - " w_new_tests,\n", - " w_new_pillar_1_2_tests\n", - " from ownd\n", - " where w_size = 7\n", - ")'''\n", - "with engine.connect() as connection:\n", - " connection.execute(update_string)" - ] - }, - { - "cell_type": "code", - "execution_count": 398, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)\n", - "# values (\n", - "# select date, \n", - "# avg(ventilator_beds) over (order by date rows between 6 preceding and current row)\n", - "# from uk_data\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 399, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# query_string = '''insert into uk_data_7(date, hospital_cases)\n", - "# select uk_data.date, \n", - "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases\n", - "# from uk_data'''\n", - "# with engine.connect() as connection:\n", - "# connection.execute(query_string) " - ] - }, - { - "cell_type": "code", - "execution_count": 360, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# %%sql \n", - "# with m7 as \n", - "# (select uk_data.date as date7, \n", - "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7\n", - "# from uk_data\n", - "# )\n", - "# update uk_data_7\n", - "# set ventilator_beds = nc7\n", - "# from m7\n", - "# where uk_data_7.date = m7.date7" - ] - }, - { - "cell_type": "code", - "execution_count": 361, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "390 rows affected.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 361, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with m7 as \n", - "# (select uk_data.date as date7, \n", - "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n", - "# from uk_data\n", - "# )\n", - "# update uk_data_7\n", - "# set ventilator_beds = nc7\n", - "# from m7\n", - "# where uk_data_7.date = m7.date7" - ] - }, - { - "cell_type": "code", - "execution_count": 362, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "390 rows affected.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 362, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with m7 as \n", - "# (select uk_data.date as date7, \n", - "# avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n", - "# from uk_data\n", - "# )\n", - "# update uk_data_7\n", - "# set new_cases = nc7\n", - "# from m7\n", - "# where uk_data_7.date = m7.date7" - ] - }, - { - "cell_type": "code", - "execution_count": 363, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "390 rows affected.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 363, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with m7 as \n", - "# (select uk_data.date as date7, \n", - "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", - "# from uk_data\n", - "# )\n", - "# update uk_data_7\n", - "# set hospital_cases = d7\n", - "# from m7\n", - "# where uk_data_7.date = m7.date7" - ] - }, - { - "cell_type": "code", - "execution_count": 364, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "390 rows affected.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 364, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with m7 as \n", - "# (select uk_data.date as date7, \n", - "# avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", - "# from uk_data\n", - "# )\n", - "# update uk_data_7\n", - "# set new_deaths = d7\n", - "# from m7\n", - "# where uk_data_7.date = m7.date7" - ] - }, - { - "cell_type": "code", - "execution_count": 365, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "390 rows affected.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 365, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with m7 as \n", - "# (select uk_data.date as date7, \n", - "# avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", - "# from uk_data\n", - "# )\n", - "# update uk_data_7\n", - "# set new_admissions = d7\n", - "# from m7\n", - "# where uk_data_7.date = m7.date7" - ] - }, - { - "cell_type": "code", - "execution_count": 366, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "390 rows affected.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 366, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with m7 as \n", - "# (select uk_data.date as date7, \n", - "# avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", - "# from uk_data\n", - "# )\n", - "# update uk_data_7\n", - "# set new_pcr_tests = d7\n", - "# from m7\n", - "# where uk_data_7.date = m7.date7" - ] - }, - { - "cell_type": "code", - "execution_count": 367, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "390 rows affected.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 367, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with m7 as \n", - "# (select uk_data.date as date7, \n", - "# avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", - "# from uk_data\n", - "# )\n", - "# update uk_data_7\n", - "# set new_tests = d7\n", - "# from m7\n", - "# where uk_data_7.date = m7.date7" - ] - }, - { - "cell_type": "code", - "execution_count": 309, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# %%sql\n", - "# with m7 as \n", - "# (select uk_data.date as date7, \n", - "# avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n", - "# from uk_data\n", - "# )\n", - "# update uk_data_7\n", - "# set new_pillar_1_2_tests = d7\n", - "# from m7\n", - "# where uk_data_7.date = m7.date7" - ] - }, - { - "cell_type": "code", - "execution_count": 310, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "0 rows affected.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 310, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with wnd as\n", - "# ( select date, \n", - "# avg(new_pillar_1_2_tests)\n", - "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests\n", - "# from uk_data\n", - "# )\n", - "# update uk_data_7\n", - "# set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests\n", - "# from wnd\n", - "# where uk_data_7.date = wnd.date\n", - "# and (select count(*) from wnd) = 7" - ] - }, - { - "cell_type": "code", - "execution_count": 379, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "10 rows affected.\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datenew_pillar_1_2_testsa_new_pillar_1_2_testsa_count
2021-01-26None463604.6666666666673
2021-01-25None499910.7500000000004
2021-01-24None528836.0000000000005
2021-01-23None543566.8333333333336
2021-01-22None545780.2857142857147
2021-01-21None546626.5714285714297
2021-01-20None546806.2857142857147
2021-01-19None547402.4285714285717
2021-01-18None542617.8571428571437
2021-01-17None545088.1428571428577
" - ], - "text/plain": [ - "[(datetime.date(2021, 1, 26), None, Decimal('463604.666666666667'), 3),\n", - " (datetime.date(2021, 1, 25), None, Decimal('499910.750000000000'), 4),\n", - " (datetime.date(2021, 1, 24), None, Decimal('528836.000000000000'), 5),\n", - " (datetime.date(2021, 1, 23), None, Decimal('543566.833333333333'), 6),\n", - " (datetime.date(2021, 1, 22), None, Decimal('545780.285714285714'), 7),\n", - " (datetime.date(2021, 1, 21), None, Decimal('546626.571428571429'), 7),\n", - " (datetime.date(2021, 1, 20), None, Decimal('546806.285714285714'), 7),\n", - " (datetime.date(2021, 1, 19), None, Decimal('547402.428571428571'), 7),\n", - " (datetime.date(2021, 1, 18), None, Decimal('542617.857142857143'), 7),\n", - " (datetime.date(2021, 1, 17), None, Decimal('545088.142857142857'), 7)]" - ] - }, - "execution_count": 379, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with wnd as\n", - "# ( select date, \n", - "# avg(new_pillar_1_2_tests)\n", - "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,\n", - "# count(new_pillar_1_2_tests)\n", - "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_count\n", - "# from uk_data\n", - "# )\n", - "# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count\n", - "# from uk_data_7, wnd\n", - "# where uk_data_7.date = wnd.date\n", - "# order by uk_data_7.date desc limit 10\n", - " \n", - "# select date, \n", - "# count(*) over wnd as w_size\n", - "# from uk_data\n", - "# window wnd as (\n", - "# order by uk_data.date\n", - "# rows between 3 preceding and 3 following\n", - "# );" - ] - }, - { - "cell_type": "code", - "execution_count": 401, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [ - "# %%sql\n", - "# select date, \n", - "# count(*) over wnd as w_size\n", - "# from uk_data\n", - "# window wnd as (\n", - "# order by uk_data.date\n", - "# rows between 3 preceding and 3 following\n", - "# )\n", - "# order by date desc limit 10" - ] - }, - { - "cell_type": "code", - "execution_count": 407, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "0 rows affected.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 407, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with ownd as (\n", - "# select date,\n", - "# avg(hospital_cases) over wnd as w_hospital_cases,\n", - "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n", - "# avg(new_cases) over wnd as w_new_cases,\n", - "# avg(new_deaths) over wnd as w_new_deaths,\n", - "# avg(new_admissions) over wnd as w_new_admissions,\n", - "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n", - "# avg(new_tests) over wnd as w_new_tests,\n", - "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n", - "# count(*) over wnd as w_size\n", - "# from uk_data\n", - "# window wnd as (\n", - "# order by uk_data.date\n", - "# rows between 3 preceding and 3 following\n", - "# ))\n", - "# insert into uk_data_7(date, \n", - "# hospital_cases, \n", - "# ventilator_beds, \n", - "# new_cases,\n", - "# new_deaths,\n", - "# new_admissions, \n", - "# new_pcr_tests, \n", - "# new_tests, \n", - "# new_pillar_1_2_tests\n", - "# )\n", - "# (select date,\n", - "# avg(hospital_cases) over wnd as w_hospital_cases,\n", - "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n", - "# avg(new_cases) over wnd as w_new_cases,\n", - "# avg(new_deaths) over wnd as w_new_deaths,\n", - "# avg(new_admissions) over wnd as w_new_admissions,\n", - "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n", - "# avg(new_tests) over wnd as w_new_tests,\n", - "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n", - "# count(*) over wnd as w_size\n", - "# from uk_data\n", - "# window wnd as (\n", - "# order by uk_data.date\n", - "# rows between 3 preceding and 3 following\n", - "# )\n", - "# )\n", - "# set date = ownd.date,\n", - "# hospital_cases = w_hospital_cases,\n", - "# ventilator_beds = w_ventilator_beds,\n", - "# new_cases = w_new_cases,\n", - "# new_deaths = w_new_deaths,\n", - "# new_admissions = w_new_admissions,\n", - "# new_pcr_tests = w_new_pcr_tests,\n", - "# new_tests = w_new_tests,\n", - "# new_pillar_1_2_tests = w_new_pillar_1_2_tests\n", - "# from ownd\n", - "# where w_size = 7" - ] - }, - { - "cell_type": "code", - "execution_count": 417, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "384 rows affected.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 417, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# %%sql\n", - "# with ownd as (\n", - "# select date,\n", - "# avg(hospital_cases) over wnd as w_hospital_cases,\n", - "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n", - "# avg(new_cases) over wnd as w_new_cases,\n", - "# avg(new_deaths) over wnd as w_new_deaths,\n", - "# avg(new_admissions) over wnd as w_new_admissions,\n", - "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n", - "# avg(new_tests) over wnd as w_new_tests,\n", - "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n", - "# count(*) over wnd as w_size\n", - "# from uk_data\n", - "# window wnd as (\n", - "# order by uk_data.date\n", - "# rows between 3 preceding and 3 following\n", - "# )\n", - "# )\n", - "# insert into uk_data_7(date, \n", - "# hospital_cases, \n", - "# ventilator_beds, \n", - "# new_cases,\n", - "# new_deaths,\n", - "# new_admissions, \n", - "# new_pcr_tests, \n", - "# new_tests, \n", - "# new_pillar_1_2_tests\n", - "# )\n", - "# (select date,\n", - "# w_hospital_cases,\n", - "# w_ventilator_beds,\n", - "# w_new_cases,\n", - "# w_new_deaths,\n", - "# w_new_admissions,\n", - "# w_new_pcr_tests,\n", - "# w_new_tests,\n", - "# w_new_pillar_1_2_tests\n", - "# from ownd\n", - "# where w_size = 7\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 427, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "10 rows affected.\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datehospital_casesventilator_bedsnew_casesnew_deathsnew_admissionsnew_pcr_testsnew_testsnew_pillar_1_2_tests
2021-01-2337915.64027.333322463.6661241.71423651.6667353735.34565312.3543566.8
2021-01-2238057.1684015.857224870.4281238.71423771.75354323.16567830.3545780.3
2021-01-2138217.7153999.285630620.4281239.71423828.2363750.16569716.7546626.56
2021-01-2038294.03969.857232707.4281248.42863811.0372037.44570458.1546806.3
2021-01-1938348.433939.571534143.2851240.85723825.8572374771.16571336.56547402.44
2021-01-1838338.1453898.571535766.571223.57143872.8572377019.28566901.1542617.9
2021-01-1738292.2853857.428537337.8551217.57143939.0389469.84571242.56545088.1
2021-01-1638119.8553810.714438824.01181.03967.2856395066.72569445.0541938.0
2021-01-1537858.7153746.285640268.571128.57144029.2856403526.72570188.7540199.9
2021-01-1437505.573667.285641989.1451118.57144065.2856412496.44567178.7537417.1
" - ], - "text/plain": [ - "[(datetime.date(2021, 1, 23), 37915.6, 4027.3333, 22463.666, 1241.7142, 3651.6667, 353735.34, 565312.3, 543566.8),\n", - " (datetime.date(2021, 1, 22), 38057.168, 4015.8572, 24870.428, 1238.7142, 3771.75, 354323.16, 567830.3, 545780.3),\n", - " (datetime.date(2021, 1, 21), 38217.715, 3999.2856, 30620.428, 1239.7142, 3828.2, 363750.16, 569716.7, 546626.56),\n", - " (datetime.date(2021, 1, 20), 38294.0, 3969.8572, 32707.428, 1248.4286, 3811.0, 372037.44, 570458.1, 546806.3),\n", - " (datetime.date(2021, 1, 19), 38348.43, 3939.5715, 34143.285, 1240.8572, 3825.8572, 374771.16, 571336.56, 547402.44),\n", - " (datetime.date(2021, 1, 18), 38338.145, 3898.5715, 35766.57, 1223.5714, 3872.8572, 377019.28, 566901.1, 542617.9),\n", - " (datetime.date(2021, 1, 17), 38292.285, 3857.4285, 37337.855, 1217.5714, 3939.0, 389469.84, 571242.56, 545088.1),\n", - " (datetime.date(2021, 1, 16), 38119.855, 3810.7144, 38824.0, 1181.0, 3967.2856, 395066.72, 569445.0, 541938.0),\n", - " (datetime.date(2021, 1, 15), 37858.715, 3746.2856, 40268.57, 1128.5714, 4029.2856, 403526.72, 570188.7, 540199.9),\n", - " (datetime.date(2021, 1, 14), 37505.57, 3667.2856, 41989.145, 1118.5714, 4065.2856, 412496.44, 567178.7, 537417.1)]" - ] - }, - "execution_count": 427, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%sql select * from uk_data_7 order by date desc limit 10" - ] - }, - { - "cell_type": "code", - "execution_count": 428, - "metadata": { - "Collapsed": "false" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " * postgresql://covid:***@localhost/covid\n", - "10 rows affected.\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datehospital_casesventilator_bedsnew_casesnew_deathsnew_admissionsnew_pcr_testsnew_testsnew_pillar_1_2_tests
2021-01-26NoneNoneNone1631NoneNoneNoneNone
2021-01-25None40324482592None196510546734531104
2021-01-2437561407714266610None262111412775394479
2021-01-23372664066204951348None389209486425465231
2021-01-223814440762909414013341401075631901608829
2021-01-213795739603143012903598439408668989644537
2021-01-203865039533501518204016434099645050617221
2021-01-193876539473931116104132357850582938559061
2021-01-18391813916447325994054262499559939537028
2021-01-17380953871288756713725320122417965395737
" - ], - "text/plain": [ - "[(datetime.date(2021, 1, 26), None, None, None, 1631, None, None, None, None),\n", - " (datetime.date(2021, 1, 25), None, 4032, 4482, 592, None, 196510, 546734, 531104),\n", - " (datetime.date(2021, 1, 24), 37561, 4077, 14266, 610, None, 262111, 412775, 394479),\n", - " (datetime.date(2021, 1, 23), 37266, 4066, 20495, 1348, None, 389209, 486425, 465231),\n", - " (datetime.date(2021, 1, 22), 38144, 4076, 29094, 1401, 3341, 401075, 631901, 608829),\n", - " (datetime.date(2021, 1, 21), 37957, 3960, 31430, 1290, 3598, 439408, 668989, 644537),\n", - " (datetime.date(2021, 1, 20), 38650, 3953, 35015, 1820, 4016, 434099, 645050, 617221),\n", - " (datetime.date(2021, 1, 19), 38765, 3947, 39311, 1610, 4132, 357850, 582938, 559061),\n", - " (datetime.date(2021, 1, 18), 39181, 3916, 44732, 599, 4054, 262499, 559939, 537028),\n", - " (datetime.date(2021, 1, 17), 38095, 3871, 28875, 671, 3725, 320122, 417965, 395737)]" - ] - }, - "execution_count": 428, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%sql select * from uk_data order by date desc limit 10" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,md" - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}