+++ /dev/null
----
-jupyter:
- jupytext:
- formats: ipynb,md
- text_representation:
- extension: .md
- format_name: markdown
- format_version: '1.2'
- jupytext_version: 1.9.1
- kernelspec:
- display_name: Python 3
- language: python
- name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
-<!-- #endregion -->
-
-```python Collapsed="false"
-from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
-from sqlalchemy import create_engine
-```
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-```
-
-```python Collapsed="false"
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python Collapsed="false"
-eng = create_engine(connection_string)
-engine = eng.execution_options(isolation_level="AUTOCOMMIT")
-```
-
-```python Collapsed="false"
-DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
-COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
-COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-
-COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
- 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
- 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
- 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
-COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
-```
-
-```python Collapsed="false"
-!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
-```
-
-```python Collapsed="false"
-# First col is a date, treat geoId of NA as 'Namibia', not "NA" value
-raw_data = pd.read_csv('covid.csv',
- parse_dates=[0], dayfirst=True,
- keep_default_na=False, na_values = [''],
-# dtype = {'day': np.int64,
-# 'month': np.int64,
-# 'year': np.int64,
-# 'cases': np.int64,
-# 'deaths': np.int64,
-# 'countriesAndTerritories': str,
-# 'geoId': str,
-# 'countryterritoryCode': str,
-# 'popData2019': np.int64,
-# 'continentExp': str,
-# }
- )
-```
-
-```python Collapsed="false"
-raw_data.size
-```
-
-```python Collapsed="false"
-raw_data.fillna(0, inplace=True)
-```
-
-```python Collapsed="false"
-raw_data.head()
-```
-
-```python Collapsed="false"
-raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
- 'countriesAndTerritories': 'country_name',
- 'countryterritoryCode': 'country_territory_code',
- 'popData2019': 'population_2019',
- 'continentExp': 'continent'}, inplace=True)
-```
-
-```python Collapsed="false"
-raw_data.columns
-```
-
-```python Collapsed="false"
-raw_data.dtypes
-```
-
-```python Collapsed="false"
-raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]
-```
-
-```python Collapsed="false"
-raw_data[['country_name', 'geo_id', 'country_territory_code',
- 'population_2019', 'continent']]
-```
-
-```python Collapsed="false"
-raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(
- 'weekly_cases',
- engine,
- if_exists='replace',
- index=False,
- chunksize=500,
- dtype={
- "report_date": Date,
- "cases_weekly": Integer,
- "deaths_weekly": Integer,
- "geo_id": String,
- "notification_rate_per_100000_population_14-days": Float
- }
-)
-```
-
-```python Collapsed="false"
-raw_data[['country_name', 'geo_id', 'country_territory_code',
- 'population_2019', 'continent']].drop_duplicates().to_sql(
- 'countries',
- engine,
- if_exists='replace',
- index=False,
- chunksize=500,
- dtype={
- "country_name": Text,
- "geo_id": String,
- "country_territory_code": String,
- "population_2019": Integer,
- "continent": Text
- }
-)
-```
-
-```python Collapsed="false"
-# %sql select geo_id from weekly_cases limit 10
-```
-
-```python Collapsed="false"
-# %%sql alter table weekly_cases add primary key (geo_id, report_date);
-# alter table countries add primary key (geo_id);
-# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);
-# alter table weekly_cases add culm_cases integer;
-# alter table weekly_cases add culm_deaths integer;
-```
-
-```python
-with engine.connect() as connection:
- connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')
- connection.execute('alter table countries add primary key (geo_id);')
- connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')
- connection.execute('alter table weekly_cases add culm_cases integer;')
- connection.execute('alter table weekly_cases add culm_deaths integer;')
-```
-
-```python Collapsed="false"
-# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10
-```
-
-```python Collapsed="false"
-# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10
-```
-
-```python Collapsed="false"
-%sql select * from weekly_cases limit 10
-```
-
-```python Collapsed="false"
-# %%sql
-# with culm as
-# (select report_date, geo_id,
-# sum(cases_weekly) over (partition by geo_id
-# order by report_date) as culm_data
-# from weekly_cases
-# )
-# update weekly_cases
-# set culm_cases = culm_data
-# from culm
-# where weekly_cases.report_date = culm.report_date and
-# weekly_cases.geo_id = culm.geo_id
-```
-
-```python Collapsed="false"
-query_string = '''with culm as
- (select report_date, geo_id,
- sum(cases_weekly) over (partition by geo_id
- order by report_date) as culm_data
- from weekly_cases
- )
-update weekly_cases
- set culm_cases = culm_data
- from culm
- where weekly_cases.report_date = culm.report_date and
- weekly_cases.geo_id = culm.geo_id'''
-with engine.connect() as connection:
- connection.execute(query_string)
-```
-
-```python Collapsed="false"
-# %%sql
-# with culm as
-# (select report_date, geo_id,
-# sum(deaths_weekly) over (partition by geo_id
-# order by report_date) as culm_data
-# from weekly_cases
-# )
-# update weekly_cases
-# set culm_deaths = culm_data
-# from culm
-# where weekly_cases.report_date = culm.report_date and
-# weekly_cases.geo_id = culm.geo_id
-```
-
-```python Collapsed="false"
-query_string = '''with culm as
- (select report_date, geo_id,
- sum(deaths_weekly) over (partition by geo_id
- order by report_date) as culm_data
- from weekly_cases
- )
-update weekly_cases
- set culm_deaths = culm_data
- from culm
- where weekly_cases.report_date = culm.report_date and
- weekly_cases.geo_id = culm.geo_id'''
-with engine.connect() as connection:
- connection.execute(query_string)
-```
-
-```python Collapsed="false"
-uk_query_string = (
-"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
-"metric=covidOccupiedMVBeds&"
-"metric=newAdmissions&"
-"metric=newCasesBySpecimenDate&"
-"metric=hospitalCases&"
-"metric=newDeaths28DaysByPublishDate&"
-"format=csv"
-)
-
-!curl "$uk_query_string" > uk_data.csv
-```
-
-```python Collapsed="false"
-test_query_string = (
-"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
-"metric=newPCRTestsByPublishDate&"
-"metric=newTestsByPublishDate&"
-"metric=newPillarOneTwoTestsByPublishDate&"
-"format=csv"
-)
-!curl "$test_query_string" > test_data.csv
-```
-
-```python Collapsed="false"
-# hospital_query_string = (
-# "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
-# "metric=newAdmissions&"
-# "format=csv"
-# )
-# !curl "$hospital_query_string" > hospital_admissions.csv
-```
-
-```python Collapsed="false"
-# hospital_query_string = (
-# "https://api.coronavirus.data.gov.uk/v1/data?"
-# "filters=areaName=United%2520Kingdom;areaType=overview&"
-# "structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,"
-# "%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv"
-# )
-
-# !curl "$hospital_query_string" | gunzip > hospital_admissions.csv
-```
-
-```python Collapsed="false"
-uk_data = pd.read_csv('uk_data.csv',
- parse_dates=[0], dayfirst=True)
-uk_data
-```
-
-```python Collapsed="false"
-test_data = pd.read_csv('test_data.csv',
- parse_dates=[0], dayfirst=True)
-test_data
-```
-
-```python Collapsed="false"
-test_data.columns
-```
-
-```python Collapsed="false"
-uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',
- 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')
-```
-
-```python Collapsed="false"
-uk_data.columns
-```
-
-```python Collapsed="false"
-uk_data.rename(
- columns={
- 'covidOccupiedMVBeds': 'ventilator_beds',
- 'newCasesBySpecimenDate': 'new_cases',
- 'hospitalCases': 'hospital_cases',
- 'newDeaths28DaysByPublishDate': 'new_deaths',
- 'newAdmissions': 'new_admissions',
- 'newPCRTestsByPublishDate': 'new_pcr_tests',
- 'newTestsByPublishDate': 'new_tests',
- 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'
- }, inplace=True)
-```
-
-```python Collapsed="false"
-uk_data.dtypes
-```
-
-```python Collapsed="false"
- uk_data.columns
-```
-
-```python Collapsed="false"
-uk_data[['date',
- 'hospital_cases', 'ventilator_beds',
- 'new_cases', 'new_deaths',
- 'hospital_cases', 'new_admissions',
- 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'
- ]].to_sql(
- 'uk_data',
- engine,
- if_exists='replace',
- index=False,
- chunksize=500,
- dtype={
- "date": Date,
- "hospital_cases": Integer,
- "ventilator_beds": Integer,
- "new_cases": Integer,
- "hospital_cases": Integer,
- "new_deaths": Integer,
- "new_admissions": Integer,
- 'new_pcr_tests': Integer,
- 'new_tests': Integer,
- 'new_pillar_1_2_tests': Integer
- }
-)
-```
-
-```python Collapsed="false"
-# %sql select * from uk_data order by date desc limit 10
-```
-
-```python Collapsed="false"
-query_string = '''drop table if exists uk_data_7;
-create table uk_data_7
-(date date primary key,
- hospital_cases real,
- ventilator_beds real,
- new_cases real,
- new_deaths real,
- new_admissions real,
- new_pcr_tests real,
- new_tests real,
- new_pillar_1_2_tests real
-);'''
-
-with engine.connect() as connection:
- connection.execute(query_string)
-```
-
-```python
-update_string = '''with ownd as (
- select date,
- avg(hospital_cases) over wnd as w_hospital_cases,
- avg(ventilator_beds) over wnd as w_ventilator_beds,
- avg(new_cases) over wnd as w_new_cases,
- avg(new_deaths) over wnd as w_new_deaths,
- avg(new_admissions) over wnd as w_new_admissions,
- avg(new_pcr_tests) over wnd as w_new_pcr_tests,
- avg(new_tests) over wnd as w_new_tests,
- avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
- count(*) over wnd as w_size
- from uk_data
- window wnd as (
- order by uk_data.date
- rows between 3 preceding and 3 following
- )
-)
-insert into uk_data_7(date,
- hospital_cases,
- ventilator_beds,
- new_cases,
- new_deaths,
- new_admissions,
- new_pcr_tests,
- new_tests,
- new_pillar_1_2_tests
- )
-(select date,
- w_hospital_cases,
- w_ventilator_beds,
- w_new_cases,
- w_new_deaths,
- w_new_admissions,
- w_new_pcr_tests,
- w_new_tests,
- w_new_pillar_1_2_tests
- from ownd
- where w_size = 7
-)'''
-with engine.connect() as connection:
- connection.execute(update_string)
-```
-
-```python Collapsed="false"
-# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)
-# values (
-# select date,
-# avg(ventilator_beds) over (order by date rows between 6 preceding and current row)
-# from uk_data
-# )
-```
-
-```python Collapsed="false"
-# query_string = '''insert into uk_data_7(date, hospital_cases)
-# select uk_data.date,
-# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases
-# from uk_data'''
-# with engine.connect() as connection:
-# connection.execute(query_string)
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7
-# from uk_data
-# )
-# update uk_data_7
-# set ventilator_beds = nc7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7
-# from uk_data
-# )
-# update uk_data_7
-# set ventilator_beds = nc7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7
-# from uk_data
-# )
-# update uk_data_7
-# set new_cases = nc7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set hospital_cases = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set new_deaths = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set new_admissions = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set new_pcr_tests = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set new_tests = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set new_pillar_1_2_tests = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with wnd as
-# ( select date,
-# avg(new_pillar_1_2_tests)
-# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests
-# from uk_data
-# )
-# update uk_data_7
-# set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests
-# from wnd
-# where uk_data_7.date = wnd.date
-# and (select count(*) from wnd) = 7
-```
-
-```python Collapsed="false"
-# %%sql
-# with wnd as
-# ( select date,
-# avg(new_pillar_1_2_tests)
-# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,
-# count(new_pillar_1_2_tests)
-# over (order by uk_data.date rows between 3 preceding and 3 following) as a_count
-# from uk_data
-# )
-# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count
-# from uk_data_7, wnd
-# where uk_data_7.date = wnd.date
-# order by uk_data_7.date desc limit 10
-
-# select date,
-# count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-# order by uk_data.date
-# rows between 3 preceding and 3 following
-# );
-```
-
-```python Collapsed="false"
-# %%sql
-# select date,
-# count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-# order by uk_data.date
-# rows between 3 preceding and 3 following
-# )
-# order by date desc limit 10
-```
-
-```python
-# %%sql
-# with ownd as (
-# select date,
-# avg(hospital_cases) over wnd as w_hospital_cases,
-# avg(ventilator_beds) over wnd as w_ventilator_beds,
-# avg(new_cases) over wnd as w_new_cases,
-# avg(new_deaths) over wnd as w_new_deaths,
-# avg(new_admissions) over wnd as w_new_admissions,
-# avg(new_pcr_tests) over wnd as w_new_pcr_tests,
-# avg(new_tests) over wnd as w_new_tests,
-# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
-# count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-# order by uk_data.date
-# rows between 3 preceding and 3 following
-# ))
-# insert into uk_data_7(date,
-# hospital_cases,
-# ventilator_beds,
-# new_cases,
-# new_deaths,
-# new_admissions,
-# new_pcr_tests,
-# new_tests,
-# new_pillar_1_2_tests
-# )
-# (select date,
-# avg(hospital_cases) over wnd as w_hospital_cases,
-# avg(ventilator_beds) over wnd as w_ventilator_beds,
-# avg(new_cases) over wnd as w_new_cases,
-# avg(new_deaths) over wnd as w_new_deaths,
-# avg(new_admissions) over wnd as w_new_admissions,
-# avg(new_pcr_tests) over wnd as w_new_pcr_tests,
-# avg(new_tests) over wnd as w_new_tests,
-# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
-# count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-# order by uk_data.date
-# rows between 3 preceding and 3 following
-# )
-# )
-# set date = ownd.date,
-# hospital_cases = w_hospital_cases,
-# ventilator_beds = w_ventilator_beds,
-# new_cases = w_new_cases,
-# new_deaths = w_new_deaths,
-# new_admissions = w_new_admissions,
-# new_pcr_tests = w_new_pcr_tests,
-# new_tests = w_new_tests,
-# new_pillar_1_2_tests = w_new_pillar_1_2_tests
-# from ownd
-# where w_size = 7
-```
-
-```python
-# %%sql
-# with ownd as (
-# select date,
-# avg(hospital_cases) over wnd as w_hospital_cases,
-# avg(ventilator_beds) over wnd as w_ventilator_beds,
-# avg(new_cases) over wnd as w_new_cases,
-# avg(new_deaths) over wnd as w_new_deaths,
-# avg(new_admissions) over wnd as w_new_admissions,
-# avg(new_pcr_tests) over wnd as w_new_pcr_tests,
-# avg(new_tests) over wnd as w_new_tests,
-# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
-# count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-# order by uk_data.date
-# rows between 3 preceding and 3 following
-# )
-# )
-# insert into uk_data_7(date,
-# hospital_cases,
-# ventilator_beds,
-# new_cases,
-# new_deaths,
-# new_admissions,
-# new_pcr_tests,
-# new_tests,
-# new_pillar_1_2_tests
-# )
-# (select date,
-# w_hospital_cases,
-# w_ventilator_beds,
-# w_new_cases,
-# w_new_deaths,
-# w_new_admissions,
-# w_new_pcr_tests,
-# w_new_tests,
-# w_new_pillar_1_2_tests
-# from ownd
-# where w_size = 7
-# )
-```
-
-```python Collapsed="false"
-%sql select * from uk_data_7 order by date desc limit 10
-```
-
-```python Collapsed="false"
-%sql select * from uk_data order by date desc limit 10
-```
-
-```python Collapsed="false"
-
-```