--- /dev/null
+---
+jupyter:
+ jupytext:
+ formats: ipynb,md
+ text_representation:
+ extension: .md
+ format_name: markdown
+ format_version: '1.2'
+ jupytext_version: 1.9.1
+ kernelspec:
+ display_name: Python 3
+ language: python
+ name: python3
+---
+
+<!-- #region Collapsed="false" -->
+Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
+<!-- #endregion -->
+
+```python Collapsed="false"
+from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
+from sqlalchemy import create_engine
+```
+
+```python Collapsed="false"
+import itertools
+import collections
+import json
+import pandas as pd
+import numpy as np
+from scipy.stats import gmean
+import datetime
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+%matplotlib inline
+```
+
+```python Collapsed="false"
+%load_ext sql
+```
+
+```python Collapsed="false"
+connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
+```
+
+```python Collapsed="false"
+%sql $connection_string
+```
+
+```python Collapsed="false"
+eng = create_engine(connection_string)
+engine = eng.execution_options(isolation_level="AUTOCOMMIT")
+```
+
+```python Collapsed="false"
+DEATH_COUNT_THRESHOLD = 10
+COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
+COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
+COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
+# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
+
+COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
+ 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
+ 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
+ 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
+COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
+COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
+```
+
+```python Collapsed="false"
+!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
+```
+
+```python Collapsed="false"
+# First col is a date, treat geoId of NA as 'Namibia', not "NA" value
+raw_data = pd.read_csv('covid.csv',
+ parse_dates=[0], dayfirst=True,
+ keep_default_na=False, na_values = [''],
+# dtype = {'day': np.int64,
+# 'month': np.int64,
+# 'year': np.int64,
+# 'cases': np.int64,
+# 'deaths': np.int64,
+# 'countriesAndTerritories': str,
+# 'geoId': str,
+# 'countryterritoryCode': str,
+# 'popData2019': np.int64,
+# 'continentExp': str,
+# }
+ )
+```
+
+```python Collapsed="false"
+raw_data.size
+```
+
+```python Collapsed="false"
+raw_data.fillna(0, inplace=True)
+```
+
+```python Collapsed="false"
+raw_data.head()
+```
+
+```python Collapsed="false"
+raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
+ 'countriesAndTerritories': 'country_name',
+ 'countryterritoryCode': 'country_territory_code',
+ 'popData2019': 'population_2019',
+ 'continentExp': 'continent'}, inplace=True)
+```
+
+```python Collapsed="false"
+raw_data.columns
+```
+
+```python Collapsed="false"
+raw_data.dtypes
+```
+
+```python Collapsed="false"
+raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]
+```
+
+```python Collapsed="false"
+raw_data[['country_name', 'geo_id', 'country_territory_code',
+ 'population_2019', 'continent']]
+```
+
+```python Collapsed="false"
+raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(
+ 'weekly_cases',
+ engine,
+ if_exists='replace',
+ index=False,
+ chunksize=500,
+ dtype={
+ "report_date": Date,
+ "cases_weekly": Integer,
+ "deaths_weekly": Integer,
+ "geo_id": String,
+ "notification_rate_per_100000_population_14-days": Float
+ }
+)
+```
+
+```python Collapsed="false"
+raw_data[['country_name', 'geo_id', 'country_territory_code',
+ 'population_2019', 'continent']].drop_duplicates().to_sql(
+ 'countries',
+ engine,
+ if_exists='replace',
+ index=False,
+ chunksize=500,
+ dtype={
+ "country_name": Text,
+ "geo_id": String,
+ "country_territory_code": String,
+ "population_2019": Integer,
+ "continent": Text
+ }
+)
+```
+
+```python Collapsed="false"
+# %sql select geo_id from weekly_cases limit 10
+```
+
+```python Collapsed="false"
+# %%sql alter table weekly_cases add primary key (geo_id, report_date);
+# alter table countries add primary key (geo_id);
+# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);
+# alter table weekly_cases add culm_cases integer;
+# alter table weekly_cases add culm_deaths integer;
+```
+
+```python
+with engine.connect() as connection:
+ connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')
+ connection.execute('alter table countries add primary key (geo_id);')
+ connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')
+ connection.execute('alter table weekly_cases add culm_cases integer;')
+ connection.execute('alter table weekly_cases add culm_deaths integer;')
+```
+
+```python Collapsed="false"
+# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10
+```
+
+```python Collapsed="false"
+# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10
+```
+
+```python Collapsed="false"
+%sql select * from weekly_cases limit 10
+```
+
+```python Collapsed="false"
+# %%sql
+# with culm as
+# (select report_date, geo_id,
+# sum(cases_weekly) over (partition by geo_id
+# order by report_date) as culm_data
+# from weekly_cases
+# )
+# update weekly_cases
+# set culm_cases = culm_data
+# from culm
+# where weekly_cases.report_date = culm.report_date and
+# weekly_cases.geo_id = culm.geo_id
+```
+
+```python Collapsed="false"
+query_string = '''with culm as
+ (select report_date, geo_id,
+ sum(cases_weekly) over (partition by geo_id
+ order by report_date) as culm_data
+ from weekly_cases
+ )
+update weekly_cases
+ set culm_cases = culm_data
+ from culm
+ where weekly_cases.report_date = culm.report_date and
+ weekly_cases.geo_id = culm.geo_id'''
+with engine.connect() as connection:
+ connection.execute(query_string)
+```
+
+```python Collapsed="false"
+# %%sql
+# with culm as
+# (select report_date, geo_id,
+# sum(deaths_weekly) over (partition by geo_id
+# order by report_date) as culm_data
+# from weekly_cases
+# )
+# update weekly_cases
+# set culm_deaths = culm_data
+# from culm
+# where weekly_cases.report_date = culm.report_date and
+# weekly_cases.geo_id = culm.geo_id
+```
+
+```python Collapsed="false"
+query_string = '''with culm as
+ (select report_date, geo_id,
+ sum(deaths_weekly) over (partition by geo_id
+ order by report_date) as culm_data
+ from weekly_cases
+ )
+update weekly_cases
+ set culm_deaths = culm_data
+ from culm
+ where weekly_cases.report_date = culm.report_date and
+ weekly_cases.geo_id = culm.geo_id'''
+with engine.connect() as connection:
+ connection.execute(query_string)
+```
+
+```python Collapsed="false"
+uk_query_string = (
+"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
+"metric=covidOccupiedMVBeds&"
+"metric=newAdmissions&"
+"metric=newCasesBySpecimenDate&"
+"metric=hospitalCases&"
+"metric=newDeaths28DaysByPublishDate&"
+"format=csv"
+)
+
+!curl "$uk_query_string" > uk_data.csv
+```
+
+```python Collapsed="false"
+test_query_string = (
+"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
+"metric=newPCRTestsByPublishDate&"
+"metric=newTestsByPublishDate&"
+"metric=newPillarOneTwoTestsByPublishDate&"
+"format=csv"
+)
+!curl "$test_query_string" > test_data.csv
+```
+
+```python Collapsed="false"
+# hospital_query_string = (
+# "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
+# "metric=newAdmissions&"
+# "format=csv"
+# )
+# !curl "$hospital_query_string" > hospital_admissions.csv
+```
+
+```python Collapsed="false"
+# hospital_query_string = (
+# "https://api.coronavirus.data.gov.uk/v1/data?"
+# "filters=areaName=United%2520Kingdom;areaType=overview&"
+# "structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,"
+# "%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv"
+# )
+
+# !curl "$hospital_query_string" | gunzip > hospital_admissions.csv
+```
+
+```python Collapsed="false"
+uk_data = pd.read_csv('uk_data.csv',
+ parse_dates=[0], dayfirst=True)
+uk_data
+```
+
+```python Collapsed="false"
+test_data = pd.read_csv('test_data.csv',
+ parse_dates=[0], dayfirst=True)
+test_data
+```
+
+```python Collapsed="false"
+test_data.columns
+```
+
+```python Collapsed="false"
+uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',
+ 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')
+```
+
+```python Collapsed="false"
+uk_data.columns
+```
+
+```python Collapsed="false"
+uk_data.rename(
+ columns={
+ 'covidOccupiedMVBeds': 'ventilator_beds',
+ 'newCasesBySpecimenDate': 'new_cases',
+ 'hospitalCases': 'hospital_cases',
+ 'newDeaths28DaysByPublishDate': 'new_deaths',
+ 'newAdmissions': 'new_admissions',
+ 'newPCRTestsByPublishDate': 'new_pcr_tests',
+ 'newTestsByPublishDate': 'new_tests',
+ 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'
+ }, inplace=True)
+```
+
+```python Collapsed="false"
+uk_data.dtypes
+```
+
+```python Collapsed="false"
+ uk_data.columns
+```
+
+```python Collapsed="false"
+uk_data[['date',
+ 'hospital_cases', 'ventilator_beds',
+ 'new_cases', 'new_deaths',
+ 'hospital_cases', 'new_admissions',
+ 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'
+ ]].to_sql(
+ 'uk_data',
+ engine,
+ if_exists='replace',
+ index=False,
+ chunksize=500,
+ dtype={
+ "date": Date,
+ "hospital_cases": Integer,
+ "ventilator_beds": Integer,
+ "new_cases": Integer,
+ "hospital_cases": Integer,
+ "new_deaths": Integer,
+ "new_admissions": Integer,
+ 'new_pcr_tests': Integer,
+ 'new_tests': Integer,
+ 'new_pillar_1_2_tests': Integer
+ }
+)
+```
+
+```python Collapsed="false"
+# %sql select * from uk_data order by date desc limit 10
+```
+
+```python Collapsed="false"
+query_string = '''drop table if exists uk_data_7;
+create table uk_data_7
+(date date primary key,
+ hospital_cases real,
+ ventilator_beds real,
+ new_cases real,
+ new_deaths real,
+ new_admissions real,
+ new_pcr_tests real,
+ new_tests real,
+ new_pillar_1_2_tests real
+);'''
+
+with engine.connect() as connection:
+ connection.execute(query_string)
+```
+
+```python
+update_string = '''with ownd as (
+ select date,
+ avg(hospital_cases) over wnd as w_hospital_cases,
+ avg(ventilator_beds) over wnd as w_ventilator_beds,
+ avg(new_cases) over wnd as w_new_cases,
+ avg(new_deaths) over wnd as w_new_deaths,
+ avg(new_admissions) over wnd as w_new_admissions,
+ avg(new_pcr_tests) over wnd as w_new_pcr_tests,
+ avg(new_tests) over wnd as w_new_tests,
+ avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
+ count(*) over wnd as w_size
+ from uk_data
+ window wnd as (
+ order by uk_data.date
+ rows between 3 preceding and 3 following
+ )
+)
+insert into uk_data_7(date,
+ hospital_cases,
+ ventilator_beds,
+ new_cases,
+ new_deaths,
+ new_admissions,
+ new_pcr_tests,
+ new_tests,
+ new_pillar_1_2_tests
+ )
+(select date,
+ w_hospital_cases,
+ w_ventilator_beds,
+ w_new_cases,
+ w_new_deaths,
+ w_new_admissions,
+ w_new_pcr_tests,
+ w_new_tests,
+ w_new_pillar_1_2_tests
+ from ownd
+ where w_size = 7
+)'''
+with engine.connect() as connection:
+ connection.execute(update_string)
+```
+
+```python Collapsed="false"
+# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)
+# values (
+# select date,
+# avg(ventilator_beds) over (order by date rows between 6 preceding and current row)
+# from uk_data
+# )
+```
+
+```python Collapsed="false"
+# query_string = '''insert into uk_data_7(date, hospital_cases)
+# select uk_data.date,
+# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases
+# from uk_data'''
+# with engine.connect() as connection:
+# connection.execute(query_string)
+```
+
+```python Collapsed="false"
+# %%sql
+# with m7 as
+# (select uk_data.date as date7,
+# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7
+# from uk_data
+# )
+# update uk_data_7
+# set ventilator_beds = nc7
+# from m7
+# where uk_data_7.date = m7.date7
+```
+
+```python Collapsed="false"
+# %%sql
+# with m7 as
+# (select uk_data.date as date7,
+# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7
+# from uk_data
+# )
+# update uk_data_7
+# set ventilator_beds = nc7
+# from m7
+# where uk_data_7.date = m7.date7
+```
+
+```python Collapsed="false"
+# %%sql
+# with m7 as
+# (select uk_data.date as date7,
+# avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7
+# from uk_data
+# )
+# update uk_data_7
+# set new_cases = nc7
+# from m7
+# where uk_data_7.date = m7.date7
+```
+
+```python Collapsed="false"
+# %%sql
+# with m7 as
+# (select uk_data.date as date7,
+# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
+# from uk_data
+# )
+# update uk_data_7
+# set hospital_cases = d7
+# from m7
+# where uk_data_7.date = m7.date7
+```
+
+```python Collapsed="false"
+# %%sql
+# with m7 as
+# (select uk_data.date as date7,
+# avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
+# from uk_data
+# )
+# update uk_data_7
+# set new_deaths = d7
+# from m7
+# where uk_data_7.date = m7.date7
+```
+
+```python Collapsed="false"
+# %%sql
+# with m7 as
+# (select uk_data.date as date7,
+# avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
+# from uk_data
+# )
+# update uk_data_7
+# set new_admissions = d7
+# from m7
+# where uk_data_7.date = m7.date7
+```
+
+```python Collapsed="false"
+# %%sql
+# with m7 as
+# (select uk_data.date as date7,
+# avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
+# from uk_data
+# )
+# update uk_data_7
+# set new_pcr_tests = d7
+# from m7
+# where uk_data_7.date = m7.date7
+```
+
+```python Collapsed="false"
+# %%sql
+# with m7 as
+# (select uk_data.date as date7,
+# avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
+# from uk_data
+# )
+# update uk_data_7
+# set new_tests = d7
+# from m7
+# where uk_data_7.date = m7.date7
+```
+
+```python Collapsed="false"
+# %%sql
+# with m7 as
+# (select uk_data.date as date7,
+# avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
+# from uk_data
+# )
+# update uk_data_7
+# set new_pillar_1_2_tests = d7
+# from m7
+# where uk_data_7.date = m7.date7
+```
+
+```python Collapsed="false"
+# %%sql
+# with wnd as
+# ( select date,
+# avg(new_pillar_1_2_tests)
+# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests
+# from uk_data
+# )
+# update uk_data_7
+# set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests
+# from wnd
+# where uk_data_7.date = wnd.date
+# and (select count(*) from wnd) = 7
+```
+
+```python Collapsed="false"
+# %%sql
+# with wnd as
+# ( select date,
+# avg(new_pillar_1_2_tests)
+# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,
+# count(new_pillar_1_2_tests)
+# over (order by uk_data.date rows between 3 preceding and 3 following) as a_count
+# from uk_data
+# )
+# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count
+# from uk_data_7, wnd
+# where uk_data_7.date = wnd.date
+# order by uk_data_7.date desc limit 10
+
+# select date,
+# count(*) over wnd as w_size
+# from uk_data
+# window wnd as (
+# order by uk_data.date
+# rows between 3 preceding and 3 following
+# );
+```
+
+```python Collapsed="false"
+# %%sql
+# select date,
+# count(*) over wnd as w_size
+# from uk_data
+# window wnd as (
+# order by uk_data.date
+# rows between 3 preceding and 3 following
+# )
+# order by date desc limit 10
+```
+
+```python
+# %%sql
+# with ownd as (
+# select date,
+# avg(hospital_cases) over wnd as w_hospital_cases,
+# avg(ventilator_beds) over wnd as w_ventilator_beds,
+# avg(new_cases) over wnd as w_new_cases,
+# avg(new_deaths) over wnd as w_new_deaths,
+# avg(new_admissions) over wnd as w_new_admissions,
+# avg(new_pcr_tests) over wnd as w_new_pcr_tests,
+# avg(new_tests) over wnd as w_new_tests,
+# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
+# count(*) over wnd as w_size
+# from uk_data
+# window wnd as (
+# order by uk_data.date
+# rows between 3 preceding and 3 following
+# ))
+# insert into uk_data_7(date,
+# hospital_cases,
+# ventilator_beds,
+# new_cases,
+# new_deaths,
+# new_admissions,
+# new_pcr_tests,
+# new_tests,
+# new_pillar_1_2_tests
+# )
+# (select date,
+# avg(hospital_cases) over wnd as w_hospital_cases,
+# avg(ventilator_beds) over wnd as w_ventilator_beds,
+# avg(new_cases) over wnd as w_new_cases,
+# avg(new_deaths) over wnd as w_new_deaths,
+# avg(new_admissions) over wnd as w_new_admissions,
+# avg(new_pcr_tests) over wnd as w_new_pcr_tests,
+# avg(new_tests) over wnd as w_new_tests,
+# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
+# count(*) over wnd as w_size
+# from uk_data
+# window wnd as (
+# order by uk_data.date
+# rows between 3 preceding and 3 following
+# )
+# )
+# set date = ownd.date,
+# hospital_cases = w_hospital_cases,
+# ventilator_beds = w_ventilator_beds,
+# new_cases = w_new_cases,
+# new_deaths = w_new_deaths,
+# new_admissions = w_new_admissions,
+# new_pcr_tests = w_new_pcr_tests,
+# new_tests = w_new_tests,
+# new_pillar_1_2_tests = w_new_pillar_1_2_tests
+# from ownd
+# where w_size = 7
+```
+
+```python
+# %%sql
+# with ownd as (
+# select date,
+# avg(hospital_cases) over wnd as w_hospital_cases,
+# avg(ventilator_beds) over wnd as w_ventilator_beds,
+# avg(new_cases) over wnd as w_new_cases,
+# avg(new_deaths) over wnd as w_new_deaths,
+# avg(new_admissions) over wnd as w_new_admissions,
+# avg(new_pcr_tests) over wnd as w_new_pcr_tests,
+# avg(new_tests) over wnd as w_new_tests,
+# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
+# count(*) over wnd as w_size
+# from uk_data
+# window wnd as (
+# order by uk_data.date
+# rows between 3 preceding and 3 following
+# )
+# )
+# insert into uk_data_7(date,
+# hospital_cases,
+# ventilator_beds,
+# new_cases,
+# new_deaths,
+# new_admissions,
+# new_pcr_tests,
+# new_tests,
+# new_pillar_1_2_tests
+# )
+# (select date,
+# w_hospital_cases,
+# w_ventilator_beds,
+# w_new_cases,
+# w_new_deaths,
+# w_new_admissions,
+# w_new_pcr_tests,
+# w_new_tests,
+# w_new_pillar_1_2_tests
+# from ownd
+# where w_size = 7
+# )
+```
+
+```python Collapsed="false"
+%sql select * from uk_data_7 order by date desc limit 10
+```
+
+```python Collapsed="false"
+%sql select * from uk_data order by date desc limit 10
+```
+
+```python Collapsed="false"
+
+```