--- /dev/null
+{
+ "folders":
+ [
+ {
+ "path": "."
+ }
+ ]
+}
+++ /dev/null
----
-jupyter:
- jupytext:
- formats: ipynb,md
- text_representation:
- extension: .md
- format_name: markdown
- format_version: '1.2'
- jupytext_version: 1.9.1
- kernelspec:
- display_name: Python 3
- language: python
- name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
-<!-- #endregion -->
-
-```python Collapsed="false"
-from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
-from sqlalchemy import create_engine
-```
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-```
-
-```python Collapsed="false"
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python Collapsed="false"
-eng = create_engine(connection_string)
-engine = eng.execution_options(isolation_level="AUTOCOMMIT")
-```
-
-```python Collapsed="false"
-DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
-COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
-COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-
-COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
- 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
- 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
- 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
-COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
-```
-
-```python Collapsed="false"
-!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
-```
-
-```python Collapsed="false"
-# First col is a date, treat geoId of NA as 'Namibia', not "NA" value
-raw_data = pd.read_csv('covid.csv',
- parse_dates=[0], dayfirst=True,
- keep_default_na=False, na_values = [''],
-# dtype = {'day': np.int64,
-# 'month': np.int64,
-# 'year': np.int64,
-# 'cases': np.int64,
-# 'deaths': np.int64,
-# 'countriesAndTerritories': str,
-# 'geoId': str,
-# 'countryterritoryCode': str,
-# 'popData2019': np.int64,
-# 'continentExp': str,
-# }
- )
-```
-
-```python Collapsed="false"
-raw_data.size
-```
-
-```python Collapsed="false"
-raw_data.fillna(0, inplace=True)
-```
-
-```python Collapsed="false"
-raw_data.head()
-```
-
-```python Collapsed="false"
-raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
- 'countriesAndTerritories': 'country_name',
- 'countryterritoryCode': 'country_territory_code',
- 'popData2019': 'population_2019',
- 'continentExp': 'continent'}, inplace=True)
-```
-
-```python Collapsed="false"
-raw_data.columns
-```
-
-```python Collapsed="false"
-raw_data.dtypes
-```
-
-```python Collapsed="false"
-raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]
-```
-
-```python Collapsed="false"
-raw_data[['country_name', 'geo_id', 'country_territory_code',
- 'population_2019', 'continent']]
-```
-
-```python Collapsed="false"
-raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(
- 'weekly_cases',
- engine,
- if_exists='replace',
- index=False,
- chunksize=500,
- dtype={
- "report_date": Date,
- "cases_weekly": Integer,
- "deaths_weekly": Integer,
- "geo_id": String,
- "notification_rate_per_100000_population_14-days": Float
- }
-)
-```
-
-```python Collapsed="false"
-raw_data[['country_name', 'geo_id', 'country_territory_code',
- 'population_2019', 'continent']].drop_duplicates().to_sql(
- 'countries',
- engine,
- if_exists='replace',
- index=False,
- chunksize=500,
- dtype={
- "country_name": Text,
- "geo_id": String,
- "country_territory_code": String,
- "population_2019": Integer,
- "continent": Text
- }
-)
-```
-
-```python Collapsed="false"
-# %sql select geo_id from weekly_cases limit 10
-```
-
-```python Collapsed="false"
-# %%sql alter table weekly_cases add primary key (geo_id, report_date);
-# alter table countries add primary key (geo_id);
-# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);
-# alter table weekly_cases add culm_cases integer;
-# alter table weekly_cases add culm_deaths integer;
-```
-
-```python
-with engine.connect() as connection:
- connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')
- connection.execute('alter table countries add primary key (geo_id);')
- connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')
- connection.execute('alter table weekly_cases add culm_cases integer;')
- connection.execute('alter table weekly_cases add culm_deaths integer;')
-```
-
-```python Collapsed="false"
-# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10
-```
-
-```python Collapsed="false"
-# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10
-```
-
-```python Collapsed="false"
-%sql select * from weekly_cases limit 10
-```
-
-```python Collapsed="false"
-# %%sql
-# with culm as
-# (select report_date, geo_id,
-# sum(cases_weekly) over (partition by geo_id
-# order by report_date) as culm_data
-# from weekly_cases
-# )
-# update weekly_cases
-# set culm_cases = culm_data
-# from culm
-# where weekly_cases.report_date = culm.report_date and
-# weekly_cases.geo_id = culm.geo_id
-```
-
-```python Collapsed="false"
-query_string = '''with culm as
- (select report_date, geo_id,
- sum(cases_weekly) over (partition by geo_id
- order by report_date) as culm_data
- from weekly_cases
- )
-update weekly_cases
- set culm_cases = culm_data
- from culm
- where weekly_cases.report_date = culm.report_date and
- weekly_cases.geo_id = culm.geo_id'''
-with engine.connect() as connection:
- connection.execute(query_string)
-```
-
-```python Collapsed="false"
-# %%sql
-# with culm as
-# (select report_date, geo_id,
-# sum(deaths_weekly) over (partition by geo_id
-# order by report_date) as culm_data
-# from weekly_cases
-# )
-# update weekly_cases
-# set culm_deaths = culm_data
-# from culm
-# where weekly_cases.report_date = culm.report_date and
-# weekly_cases.geo_id = culm.geo_id
-```
-
-```python Collapsed="false"
-query_string = '''with culm as
- (select report_date, geo_id,
- sum(deaths_weekly) over (partition by geo_id
- order by report_date) as culm_data
- from weekly_cases
- )
-update weekly_cases
- set culm_deaths = culm_data
- from culm
- where weekly_cases.report_date = culm.report_date and
- weekly_cases.geo_id = culm.geo_id'''
-with engine.connect() as connection:
- connection.execute(query_string)
-```
-
-```python Collapsed="false"
-uk_query_string = (
-"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
-"metric=covidOccupiedMVBeds&"
-"metric=newAdmissions&"
-"metric=newCasesBySpecimenDate&"
-"metric=hospitalCases&"
-"metric=newDeaths28DaysByPublishDate&"
-"format=csv"
-)
-
-!curl "$uk_query_string" > uk_data.csv
-```
-
-```python Collapsed="false"
-test_query_string = (
-"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
-"metric=newPCRTestsByPublishDate&"
-"metric=newTestsByPublishDate&"
-"metric=newPillarOneTwoTestsByPublishDate&"
-"format=csv"
-)
-!curl "$test_query_string" > test_data.csv
-```
-
-```python Collapsed="false"
-# hospital_query_string = (
-# "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
-# "metric=newAdmissions&"
-# "format=csv"
-# )
-# !curl "$hospital_query_string" > hospital_admissions.csv
-```
-
-```python Collapsed="false"
-# hospital_query_string = (
-# "https://api.coronavirus.data.gov.uk/v1/data?"
-# "filters=areaName=United%2520Kingdom;areaType=overview&"
-# "structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,"
-# "%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv"
-# )
-
-# !curl "$hospital_query_string" | gunzip > hospital_admissions.csv
-```
-
-```python Collapsed="false"
-uk_data = pd.read_csv('uk_data.csv',
- parse_dates=[0], dayfirst=True)
-uk_data
-```
-
-```python Collapsed="false"
-test_data = pd.read_csv('test_data.csv',
- parse_dates=[0], dayfirst=True)
-test_data
-```
-
-```python Collapsed="false"
-test_data.columns
-```
-
-```python Collapsed="false"
-uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',
- 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')
-```
-
-```python Collapsed="false"
-uk_data.columns
-```
-
-```python Collapsed="false"
-uk_data.rename(
- columns={
- 'covidOccupiedMVBeds': 'ventilator_beds',
- 'newCasesBySpecimenDate': 'new_cases',
- 'hospitalCases': 'hospital_cases',
- 'newDeaths28DaysByPublishDate': 'new_deaths',
- 'newAdmissions': 'new_admissions',
- 'newPCRTestsByPublishDate': 'new_pcr_tests',
- 'newTestsByPublishDate': 'new_tests',
- 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'
- }, inplace=True)
-```
-
-```python Collapsed="false"
-uk_data.dtypes
-```
-
-```python Collapsed="false"
- uk_data.columns
-```
-
-```python Collapsed="false"
-uk_data[['date',
- 'hospital_cases', 'ventilator_beds',
- 'new_cases', 'new_deaths',
- 'hospital_cases', 'new_admissions',
- 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'
- ]].to_sql(
- 'uk_data',
- engine,
- if_exists='replace',
- index=False,
- chunksize=500,
- dtype={
- "date": Date,
- "hospital_cases": Integer,
- "ventilator_beds": Integer,
- "new_cases": Integer,
- "hospital_cases": Integer,
- "new_deaths": Integer,
- "new_admissions": Integer,
- 'new_pcr_tests': Integer,
- 'new_tests': Integer,
- 'new_pillar_1_2_tests': Integer
- }
-)
-```
-
-```python Collapsed="false"
-# %sql select * from uk_data order by date desc limit 10
-```
-
-```python Collapsed="false"
-query_string = '''drop table if exists uk_data_7;
-create table uk_data_7
-(date date primary key,
- hospital_cases real,
- ventilator_beds real,
- new_cases real,
- new_deaths real,
- new_admissions real,
- new_pcr_tests real,
- new_tests real,
- new_pillar_1_2_tests real
-);'''
-
-with engine.connect() as connection:
- connection.execute(query_string)
-```
-
-```python
-update_string = '''with ownd as (
- select date,
- avg(hospital_cases) over wnd as w_hospital_cases,
- avg(ventilator_beds) over wnd as w_ventilator_beds,
- avg(new_cases) over wnd as w_new_cases,
- avg(new_deaths) over wnd as w_new_deaths,
- avg(new_admissions) over wnd as w_new_admissions,
- avg(new_pcr_tests) over wnd as w_new_pcr_tests,
- avg(new_tests) over wnd as w_new_tests,
- avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
- count(*) over wnd as w_size
- from uk_data
- window wnd as (
- order by uk_data.date
- rows between 3 preceding and 3 following
- )
-)
-insert into uk_data_7(date,
- hospital_cases,
- ventilator_beds,
- new_cases,
- new_deaths,
- new_admissions,
- new_pcr_tests,
- new_tests,
- new_pillar_1_2_tests
- )
-(select date,
- w_hospital_cases,
- w_ventilator_beds,
- w_new_cases,
- w_new_deaths,
- w_new_admissions,
- w_new_pcr_tests,
- w_new_tests,
- w_new_pillar_1_2_tests
- from ownd
- where w_size = 7
-)'''
-with engine.connect() as connection:
- connection.execute(update_string)
-```
-
-```python Collapsed="false"
-# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)
-# values (
-# select date,
-# avg(ventilator_beds) over (order by date rows between 6 preceding and current row)
-# from uk_data
-# )
-```
-
-```python Collapsed="false"
-# query_string = '''insert into uk_data_7(date, hospital_cases)
-# select uk_data.date,
-# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases
-# from uk_data'''
-# with engine.connect() as connection:
-# connection.execute(query_string)
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7
-# from uk_data
-# )
-# update uk_data_7
-# set ventilator_beds = nc7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7
-# from uk_data
-# )
-# update uk_data_7
-# set ventilator_beds = nc7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7
-# from uk_data
-# )
-# update uk_data_7
-# set new_cases = nc7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set hospital_cases = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set new_deaths = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set new_admissions = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set new_pcr_tests = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set new_tests = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as
-# (select uk_data.date as date7,
-# avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-# from uk_data
-# )
-# update uk_data_7
-# set new_pillar_1_2_tests = d7
-# from m7
-# where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with wnd as
-# ( select date,
-# avg(new_pillar_1_2_tests)
-# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests
-# from uk_data
-# )
-# update uk_data_7
-# set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests
-# from wnd
-# where uk_data_7.date = wnd.date
-# and (select count(*) from wnd) = 7
-```
-
-```python Collapsed="false"
-# %%sql
-# with wnd as
-# ( select date,
-# avg(new_pillar_1_2_tests)
-# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,
-# count(new_pillar_1_2_tests)
-# over (order by uk_data.date rows between 3 preceding and 3 following) as a_count
-# from uk_data
-# )
-# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count
-# from uk_data_7, wnd
-# where uk_data_7.date = wnd.date
-# order by uk_data_7.date desc limit 10
-
-# select date,
-# count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-# order by uk_data.date
-# rows between 3 preceding and 3 following
-# );
-```
-
-```python Collapsed="false"
-# %%sql
-# select date,
-# count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-# order by uk_data.date
-# rows between 3 preceding and 3 following
-# )
-# order by date desc limit 10
-```
-
-```python
-# %%sql
-# with ownd as (
-# select date,
-# avg(hospital_cases) over wnd as w_hospital_cases,
-# avg(ventilator_beds) over wnd as w_ventilator_beds,
-# avg(new_cases) over wnd as w_new_cases,
-# avg(new_deaths) over wnd as w_new_deaths,
-# avg(new_admissions) over wnd as w_new_admissions,
-# avg(new_pcr_tests) over wnd as w_new_pcr_tests,
-# avg(new_tests) over wnd as w_new_tests,
-# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
-# count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-# order by uk_data.date
-# rows between 3 preceding and 3 following
-# ))
-# insert into uk_data_7(date,
-# hospital_cases,
-# ventilator_beds,
-# new_cases,
-# new_deaths,
-# new_admissions,
-# new_pcr_tests,
-# new_tests,
-# new_pillar_1_2_tests
-# )
-# (select date,
-# avg(hospital_cases) over wnd as w_hospital_cases,
-# avg(ventilator_beds) over wnd as w_ventilator_beds,
-# avg(new_cases) over wnd as w_new_cases,
-# avg(new_deaths) over wnd as w_new_deaths,
-# avg(new_admissions) over wnd as w_new_admissions,
-# avg(new_pcr_tests) over wnd as w_new_pcr_tests,
-# avg(new_tests) over wnd as w_new_tests,
-# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
-# count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-# order by uk_data.date
-# rows between 3 preceding and 3 following
-# )
-# )
-# set date = ownd.date,
-# hospital_cases = w_hospital_cases,
-# ventilator_beds = w_ventilator_beds,
-# new_cases = w_new_cases,
-# new_deaths = w_new_deaths,
-# new_admissions = w_new_admissions,
-# new_pcr_tests = w_new_pcr_tests,
-# new_tests = w_new_tests,
-# new_pillar_1_2_tests = w_new_pillar_1_2_tests
-# from ownd
-# where w_size = 7
-```
-
-```python
-# %%sql
-# with ownd as (
-# select date,
-# avg(hospital_cases) over wnd as w_hospital_cases,
-# avg(ventilator_beds) over wnd as w_ventilator_beds,
-# avg(new_cases) over wnd as w_new_cases,
-# avg(new_deaths) over wnd as w_new_deaths,
-# avg(new_admissions) over wnd as w_new_admissions,
-# avg(new_pcr_tests) over wnd as w_new_pcr_tests,
-# avg(new_tests) over wnd as w_new_tests,
-# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
-# count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-# order by uk_data.date
-# rows between 3 preceding and 3 following
-# )
-# )
-# insert into uk_data_7(date,
-# hospital_cases,
-# ventilator_beds,
-# new_cases,
-# new_deaths,
-# new_admissions,
-# new_pcr_tests,
-# new_tests,
-# new_pillar_1_2_tests
-# )
-# (select date,
-# w_hospital_cases,
-# w_ventilator_beds,
-# w_new_cases,
-# w_new_deaths,
-# w_new_admissions,
-# w_new_pcr_tests,
-# w_new_tests,
-# w_new_pillar_1_2_tests
-# from ownd
-# where w_size = 7
-# )
-```
-
-```python Collapsed="false"
-%sql select * from uk_data_7 order by date desc limit 10
-```
-
-```python Collapsed="false"
-%sql select * from uk_data order by date desc limit 10
-```
-
-```python Collapsed="false"
-
-```
# %%
+COUNTRIES_ALL
+
+# %%
+# raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
+# 'countriesAndTerritories': 'country_name',
+# 'countryterritoryCode': 'country_territory_code',
+# 'popData2019': 'population_2019',
+# 'continentExp': 'continent'}, inplace=True)
raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
'countriesAndTerritories': 'country_name',
'countryterritoryCode': 'country_territory_code',
+ 'cases': 'cases_weekly',
+ 'deaths': 'deaths_weekly',
'popData2019': 'population_2019',
- 'continentExp': 'continent'}, inplace=True)
+ 'continentExp': 'continent',
+ 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days'
+ },
+ inplace=True)
# %%
+++ /dev/null
----
-jupyter:
- jupytext:
- formats: ipynb,md
- text_representation:
- extension: .md
- format_name: markdown
- format_version: '1.2'
- jupytext_version: 1.9.1
- kernelspec:
- display_name: Python 3
- language: python
- name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from:
-
-* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
-* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
-* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
-
-<!-- #endregion -->
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-
-from sqlalchemy.types import Integer, Text, String, DateTime, Float
-from sqlalchemy import create_engine
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python Collapsed="false"
-conn = create_engine(connection_string)
-```
-
-```python Collapsed="false"
-england_wales_filename = 'uk-deaths-data/publishedweek532020.xlsx'
-```
-
-```sql Collapsed="false"
-drop table if exists all_causes_deaths;
-create table all_causes_deaths (
- week integer,
- year integer,
- date_up_to date,
- nation varchar(20),
- deaths integer,
- CONSTRAINT week_nation PRIMARY KEY(year, week, nation)
-);
-```
-
-```python Collapsed="false"
-raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv',
- parse_dates=[1, 2], dayfirst=True,
-# index_col=0,
- header=[0, 1]
- )
-dh15i = raw_data_2015.iloc[:, [0, 3]]
-dh15i.set_index(dh15i.columns[0], inplace=True)
-dh15i.columns = ['total_2015']
-dh15i.tail()
-```
-
-```python Collapsed="false"
-raw_data_2015.head()
-```
-
-```python Collapsed="false"
-rd = raw_data_2015.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
- columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2015P)': 'deaths',
- 'Registration Week': 'week'}
- )
-rd['year'] = 2015
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python Collapsed="false"
-%sql select * from all_causes_deaths limit 10
-```
-
-```python Collapsed="false"
-raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv',
- parse_dates=[1, 2], dayfirst=True,
-# index_col=0,
- header=[0, 1]
- )
-raw_data_2016.head()
-# dh16i = raw_data_2016.iloc[:, [2]]
-# dh16i.columns = ['total_2016']
-# # dh16i.head()
-dh16i = raw_data_2016.iloc[:, [0, 3]]
-dh16i.set_index(dh16i.columns[0], inplace=True)
-dh16i.columns = ['total_2016']
-dh16i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2016.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
- columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2016P)': 'deaths',
- 'Registration Week': 'week'}
- )
-rd['year'] = 2016
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv',
- parse_dates=[1, 2], dayfirst=True,
-# index_col=0,
- header=[0, 1]
- )
-raw_data_2017.head()
-dh17i = raw_data_2017.iloc[:, [0, 3]]
-dh17i.set_index(dh17i.columns[0], inplace=True)
-dh17i.columns = ['total_2017']
-dh17i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2017.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
- columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2017P)': 'deaths',
- 'Registration Week': 'week'}
- )
-rd['year'] = 2017
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv',
- parse_dates=[1, 2], dayfirst=True,
-# index_col=0,
- header=[0, 1]
- )
-raw_data_2018.head()
-dh18i = raw_data_2018.iloc[:, [0, 3]]
-dh18i.set_index(dh18i.columns[0], inplace=True)
-dh18i.columns = ['total_2018']
-dh18i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2018.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
- columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2018P)': 'deaths',
- 'Registration Week': 'week'}
- )
-rd['year'] = 2018
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv',
- parse_dates=[1, 2], dayfirst=True,
-# index_col=0,
- header=[0, 1]
- )
-raw_data_2019.head()
-dh19i = raw_data_2019.iloc[:, [0, 3]]
-dh19i.set_index(dh19i.columns[0], inplace=True)
-dh19i.columns = ['total_2019']
-dh19i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2019.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
- columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2019P)': 'deaths',
- 'Registration Week': 'week'}
- )
-rd['year'] = 2019
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
- parse_dates=[1], dayfirst=True,
- header=[0, 1]
- )
-raw_data_2020_i.head()
-```
-
-```python Collapsed="false"
-rd = raw_data_2020_i.iloc[:, [0, 1, 2]].droplevel(1, axis=1).rename(
- columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2020P)': 'deaths',
- 'Registration Week': 'week'}
- )
-rd['year'] = 2020
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.tail()
-```
-
-```python Collapsed="false"
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python
-raw_data_2020_i.set_index(raw_data_2020_i.columns[0], inplace=True)
-raw_data_2020_i.tail()
-```
-
-```python Collapsed="false"
-datetime.datetime.now().isocalendar()
-```
-
-```python Collapsed="false"
-datetime.datetime.fromisocalendar(2021, 3, 1)
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-scotland.csv',
- index_col=0,
- header=0,
- skiprows=2
- )
-# raw_data_s
-```
-
-```python Collapsed="false"
-deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
-deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
-deaths_headlines_s.reset_index(drop=True, inplace=True)
-deaths_headlines_s.index = deaths_headlines_s.index + 1
-deaths_headlines_s
-```
-
-```python
-%sql select * from all_causes_deaths limit 5
-```
-
-```python Collapsed="false"
-for year, ser in deaths_headlines_s.items():
- year_i = int(year[-4:])
-# print(year_i)
- for week, deaths in ser.dropna().iteritems():
-# print(datetime.date.fromisocalendar(year_i, week, 7), deaths)
- dut = datetime.date.fromisocalendar(year_i, week, 7)
- %sql insert into all_causes_deaths(week, year, date_up_to, nation, deaths) values ({week}, {year_i}, :dut, 'Scotland', {deaths})
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python
-%sql select year, nation, date_up_to from all_causes_deaths where week=3 order by year, nation
-```
-
-```python Collapsed="false"
-eng_xls = pd.read_excel(england_wales_filename,
- sheet_name="Weekly figures 2020",
- skiprows=[0, 1, 2, 3],
- header=0,
- index_col=[1]
- ).iloc[:91].T
-eng_xls
-```
-
-```python Collapsed="false"
-# eng_xls_columns
-```
-
-```python Collapsed="false"
-eng_xls_columns = list(eng_xls.columns)
-
-for i, c in enumerate(eng_xls_columns):
-# print(i, c, type(c), isinstance(c, float))
- if isinstance(c, float) and np.isnan(c):
- if eng_xls.iloc[0].iloc[i] is not pd.NaT:
- eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
-
-# np.isnan(eng_xls_columns[0])
-# eng_xls_columns
-
-eng_xls.columns = eng_xls_columns
-# eng_xls.columns
-```
-
-```python
-eng_xls
-```
-
-```python
-rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
- columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
- 'index': 'week'}
- )
-rd['year'] = 2020
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python Collapsed="false"
-eng_xls = eng_xls.iloc[1:]
-eng_xls['England deaths'] = eng_xls.loc[:, 'Total deaths, all ages'] - eng_xls.loc[:, 'Wales']
-```
-
-```python
-eng_xls.head()
-```
-
-```python
-rd = eng_xls[['Week ended', 'England deaths']].reset_index(level=0).rename(
- columns={'Week ended': 'date_up_to', 'England deaths': 'deaths',
- 'index': 'week'}
- )
-rd['year'] = 2020
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-%sql delete from all_causes_deaths where nation = 'England'
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
-rd.tail()
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-# raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv',
-# parse_dates=[1], dayfirst=True,
-# index_col=0,
-# header=[0, 1])
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-# raw_data_2020.head()
-```
-
-```python Collapsed="false"
-# raw_data_2020['W92000004', 'Wales']
-```
-
-```python Collapsed="false"
-raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv',
- parse_dates=[1], dayfirst=True,
-# index_col=0,
- header=[0, 1])
-# raw_data_2019.head()
-```
-
-```python
-rdew = raw_data_2019.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
- columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
- 'Week number': 'week'}
- )
-rd['year'] = 2019
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
- columns={'Week ended': 'date_up_to', 'Week number': 'week'}
- )
-rd['year'] = 2019
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv',
- parse_dates=[1], dayfirst=True,
-# index_col=0,
- header=[0, 1])
-# raw_data_2018.head()
-```
-
-```python
-rdew = raw_data_2018.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
- columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
- 'Week number': 'week'}
- )
-rd['year'] = 2018
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
- columns={'Week ended': 'date_up_to', 'Week number': 'week'}
- )
-rd['year'] = 2018
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv',
- parse_dates=[1], dayfirst=True,
-# index_col=0,
- header=[0, 1])
-# raw_data_2017.head()
-```
-
-```python
-rdew = raw_data_2017.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
- columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
- 'Week number': 'week'}
- )
-rd['year'] = 2017
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
- columns={'Week ended': 'date_up_to', 'Week number': 'week'}
- )
-rd['year'] = 2017
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python
-
-```
-
-```python Collapsed="false"
-raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv',
- parse_dates=[1], dayfirst=True,
-# index_col=0,
- header=[0, 1])
-# raw_data_2016.head()
-```
-
-```python
-raw_data_2016.head()
-```
-
-```python
-rdew = raw_data_2016.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
- columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
- 'Week number': 'week'}
- )
-rd['year'] = 2016
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
- columns={'Week ended': 'date_up_to', 'Week number': 'week'}
- )
-rd['year'] = 2016
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
- %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
- parse_dates=[1], dayfirst=True,
-# index_col=0,
- header=[0, 1])
-# raw_data_2015.head()
-```
-
-```python
-rdew = raw_data_2015.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
- columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
- 'Week number': 'week'}
- )
-rd['year'] = 2015
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
- columns={'Week ended': 'date_up_to', 'Week number': 'week'}
- )
-rd['year'] = 2015
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
- 'all_causes_deaths',
- conn,
- if_exists='append',
- index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by year, nation
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'England'
-```
-
-```python
-deaths_headlines_e = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_e
-```
-
-```python
-
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'Scotland'
-```
-
-```python
-deaths_headlines_s = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_s
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'Wales'
-```
-
-```python
-deaths_headlines_w = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_w
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'Northern Ireland'
-```
-
-```python
-deaths_headlines_i = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_i
-```
-
-```python Collapsed="false"
-deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
-deaths_headlines
-```
-
-```python
-deaths_headlines_e.columns
-```
-
-```python
-deaths_headlines_e['previous_mean'] = deaths_headlines_e[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines_w['previous_mean'] = deaths_headlines_w[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines_s['previous_mean'] = deaths_headlines_s[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines_i['previous_mean'] = deaths_headlines_i[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines['previous_mean'] = deaths_headlines[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines
-```
-
-```python Collapsed="false"
-deaths_headlines[[2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
-```
-
-```python Collapsed="false"
-deaths_headlines[[2020, 'previous_mean']].plot(figsize=(10, 8))
-```
-
-```python Collapsed="false"
-deaths_headlines_i.plot()
-```
-
-```python
-deaths_headlines[2020].sum() - deaths_headlines.previous_mean.sum()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-dhna = deaths_headlines.dropna()
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
- np.flip(
- np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
- 14)
-# l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
-# l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
-# l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
-# l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
-# l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
-l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, dhna['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, dhna[2020], color="red", label="2020")
-
-# deaths_headlines.total_2019.plot(ax=ax)
-
-def _closeline(line):
- x, y = line.get_data()
- x = np.concatenate((x, [x[0]]))
- y = np.concatenate((y, [y[0]]))
- line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(dhna.index)
-plt.legend()
-plt.title("Deaths by week over years, all UK")
-plt.savefig('deaths-radar.png')
-plt.show()
-```
-
-<!-- #region Collapsed="false" -->
-# Plots for UK nations
-<!-- #endregion -->
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
- np.flip(
- np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
- 14)
-l15, = ax.plot(theta, deaths_headlines_e[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_e[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_e[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_e[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_e[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_e[2020], color="red", label="2020")
-
-# deaths_headlines.total_2019.plot(ax=ax)
-
-def _closeline(line):
- x, y = line.get_data()
- x = np.concatenate((x, [x[0]]))
- y = np.concatenate((y, [y[0]]))
- line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_e.index)
-plt.legend()
-plt.title("Deaths by week over years, England")
-plt.savefig('deaths-radar_england.png')
-plt.show()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
- np.flip(
- np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
- 14)
-l15, = ax.plot(theta, deaths_headlines_w[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_w[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_w[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_w[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_w[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_w[2020], color="red", label="2020")
-
-
-def _closeline(line):
- x, y = line.get_data()
- x = np.concatenate((x, [x[0]]))
- y = np.concatenate((y, [y[0]]))
- line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_w.index)
-plt.legend()
-plt.title("Deaths by week over years, Wales")
-plt.savefig('deaths-radar_wales.png')
-plt.show()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
- np.flip(
- np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
- 14)
-l15, = ax.plot(theta, deaths_headlines_s[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_s[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_s[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_s[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_s[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_s[2020], color="red", label="2020")
-
-
-def _closeline(line):
- x, y = line.get_data()
- x = np.concatenate((x, [x[0]]))
- y = np.concatenate((y, [y[0]]))
- line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_s.index)
-plt.legend()
-plt.title("Deaths by week over years, Scotland")
-plt.savefig('deaths-radar_scotland.png')
-plt.show()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
- np.flip(
- np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
- 14)
-l15, = ax.plot(theta, deaths_headlines_i[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_i[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_i[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_i[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_i[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_i[2020], color="red", label="2020")
-
-
-def _closeline(line):
- x, y = line.get_data()
- x = np.concatenate((x, [x[0]]))
- y = np.concatenate((y, [y[0]]))
- line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_i.index)
-plt.legend()
-plt.title("Deaths by week over years, Northern Ireland")
-plt.savefig('deaths-radar_northern_ireland.png')
-plt.show()
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-
-```
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.ioff()
+# # %matplotlib inline
# %%
+
+# %% [markdown]
+#
--- /dev/null
+---
+jupyter:
+ jupytext:
+ formats: ipynb,md
+ text_representation:
+ extension: .md
+ format_name: markdown
+ format_version: '1.2'
+ jupytext_version: 1.9.1
+ kernelspec:
+ display_name: Python 3
+ language: python
+ name: python3
+---
+
+<!-- #region Collapsed="false" -->
+Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
+<!-- #endregion -->
+
+```python Collapsed="false"
+import itertools
+import collections
+import json
+import pandas as pd
+import numpy as np
+from scipy.stats import gmean
+import datetime
+
+from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
+from sqlalchemy import create_engine
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+%matplotlib inline
+%load_ext sql
+```
+
+```python Collapsed="false"
+connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
+```
+
+```python Collapsed="false"
+%sql $connection_string
+```
+
+```python
+engine = create_engine(connection_string)
+```
+
+```python Collapsed="false"
+DEATH_COUNT_THRESHOLD = 10
+COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
+COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
+COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
+# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
+
+COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
+ 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
+ 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
+ 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
+COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
+COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
+```
+
+```python Collapsed="false"
+# res = %sql select report_date, geo_id, deaths_weekly, culm_deaths from weekly_cases where geo_id in $COUNTRIES_CORE order by report_date, geo_id
+```
+
+```python Collapsed="false"
+# country_data = res.DataFrame()
+# country_data['report_date'] = country_data.report_date.astype('datetime64[ns]')
+# country_data.set_index('report_date', inplace=True)
+# country_data.tail(10)
+```
+
+```python
+%sql select geo_id, country_name, population_2019 from countries where population_2019 > 30000000
+```
+
+```python
+query_string = f'''select report_date, weekly_cases.geo_id, country_name,
+ 10000000.0 * deaths_weekly / population_2019 as deaths_weekly_pc,
+ 10000000.0 * culm_deaths / population_2019 as culm_deaths_pc
+from weekly_cases, countries
+where weekly_cases.geo_id = countries.geo_id
+and countries.population_2019 > 20000000
+order by report_date, geo_id'''
+
+country_data = pd.read_sql_query(query_string,
+ engine,
+ index_col = 'report_date',
+ parse_dates = ['report_date']
+ )
+```
+
+```python
+country_data.tail()
+```
+
+```python
+latest_date = country_data.index.max()
+latest_date
+```
+
+```python
+# highest_deaths_geo_ids = country_data.loc[latest_date, :].nlargest(10, 'culm_deaths_pc')['geo_id'].values
+highest_deaths_geo_ids = (
+ country_data.loc[latest_date, :]
+ .nlargest(10, 'culm_deaths_pc')
+ ['country_name']
+ .values
+)
+
+highest_deaths_geo_ids
+```
+
+```python
+high_deaths_country_data = (
+ country_data[
+ country_data.country_name.isin(highest_deaths_geo_ids)]
+)
+high_deaths_country_data.tail()
+```
+
+```python Collapsed="false"
+deaths_culm = high_deaths_country_data.pivot(columns='country_name',
+ values='culm_deaths_pc')
+deaths_culm.tail()
+```
+
+```python Collapsed="false"
+deaths_culm.plot()
+```
+
+```python Collapsed="false"
+ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
+ax.set_xlabel(f"Date")
+for c in highest_deaths_geo_ids:
+ lvi = deaths_culm[c].last_valid_index()
+ ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')
+```
+
+```python
+deaths_culm.loc[latest_date].sort_values()
+```
+
+```python Collapsed="false"
+deaths_weekly = high_deaths_country_data.pivot(columns='country_name', values='deaths_weekly_pc')
+deaths_weekly.tail()
+```
+
+```python Collapsed="false"
+deaths_weekly.plot()
+```
+
+```python Collapsed="false"
+ax = deaths_weekly.loc['2020-03-01':].plot(figsize=(10, 6), title="Deaths per week")
+ax.set_xlabel('Date')
+for c in highest_deaths_geo_ids:
+ lvi = deaths_weekly[c].last_valid_index()
+ ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c)
+# plt.savefig('covid_deaths_per_week.png')
+```
+
+```python Collapsed="false"
+ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
+ax.set_xlabel("Date")
+
+text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
+
+for c in highest_deaths_geo_ids:
+ lvi = deaths_weekly[c].last_valid_index()
+# if c != 'ES':
+ ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}")
+plt.savefig('deaths_by_date_last_6_weeks.png')
+```
+
+```python
+query_string = f'''select report_date, geo_id,
+(cast(deaths_weekly as float) / population_2019) deaths_weekly_pc,
+(cast(culm_deaths as float) / population_2019) as culm_deaths_pc
+from weekly_cases join countries using (geo_id)
+where geo_id in {COUNTRIES_CORE}
+order by report_date, geo_id'''
+
+deaths_pc_data = pd.read_sql_query(query_string,
+ engine,
+ index_col = 'report_date',
+ parse_dates = ['report_date']
+ )
+deaths_pc_data.tail()
+```
+
+```python Collapsed="false"
+deaths_pc_culm = deaths_pc_data.pivot(columns='geo_id', values='culm_deaths_pc')
+deaths_pc_culm.tail()
+```
+
+```python
+ax = deaths_pc_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
+ax.set_xlabel(f"Date")
+for c in COUNTRIES_CORE:
+ lvi = deaths_pc_culm[c].last_valid_index()
+ ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc_culm[c][lvi], s = f"{c}: {deaths_pc_culm[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')
+```
+
+```python Collapsed="false"
+deaths_pc = deaths_pc_data.pivot(columns='geo_id', values='deaths_weekly_pc')
+deaths_pc.tail()
+```
+
+```python
+ax = deaths_pc.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
+ax.set_xlabel(f"Date")
+for c in COUNTRIES_CORE:
+ lvi = deaths_pc[c].last_valid_index()
+ ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')
+```
+
+```python
+ax = deaths_pc.iloc[-6:].plot(figsize=(10, 6), title="Deaths per million, linear")
+ax.set_xlabel(f"Date")
+for c in COUNTRIES_CORE:
+ lvi = deaths_pc[c].last_valid_index()
+ ax.text(x = lvi, y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')
+```
+
+```python
+
+```
+++ /dev/null
----
-jupyter:
- jupytext:
- formats: ipynb,md
- text_representation:
- extension: .md
- format_name: markdown
- format_version: '1.2'
- jupytext_version: 1.9.1
- kernelspec:
- display_name: Python 3
- language: python
- name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
-<!-- #endregion -->
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
-from sqlalchemy import create_engine
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python
-engine = create_engine(connection_string)
-```
-
-```python Collapsed="false"
-DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
-COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
-COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-
-COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
- 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
- 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
- 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
-COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
-```
-
-```python Collapsed="false"
-# res = %sql select report_date, geo_id, deaths_weekly, culm_deaths from weekly_cases where geo_id in $COUNTRIES_CORE order by report_date, geo_id
-```
-
-```python Collapsed="false"
-# country_data = res.DataFrame()
-# country_data['report_date'] = country_data.report_date.astype('datetime64[ns]')
-# country_data.set_index('report_date', inplace=True)
-# country_data.tail(10)
-```
-
-```python
-query_string = f'''select report_date, geo_id, deaths_weekly, culm_deaths
-from weekly_cases
-where geo_id in {COUNTRIES_CORE}
-order by report_date, geo_id'''
-
-country_data = pd.read_sql_query(query_string,
- engine,
- index_col = 'report_date',
- parse_dates = ['report_date']
- )
-```
-
-```python Collapsed="false"
-deaths_culm = country_data.pivot(columns='geo_id', values='culm_deaths')
-deaths_culm.tail()
-```
-
-```python Collapsed="false"
-deaths_culm.plot()
-```
-
-```python Collapsed="false"
-ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
- lvi = deaths_culm[c].last_valid_index()
- ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}")
-plt.savefig('covid_deaths_total_linear.png')
-```
-
-```python Collapsed="false"
-deaths_weekly = country_data.pivot(columns='geo_id', values='deaths_weekly')
-deaths_weekly.tail()
-```
-
-```python Collapsed="false"
-deaths_weekly.plot()
-```
-
-```python Collapsed="false"
-ax = deaths_weekly.loc['2020-03-01':, COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per week")
-ax.set_xlabel('Date')
-for c in COUNTRIES_CORE:
- lvi = deaths_weekly[c].last_valid_index()
- ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c)
-plt.savefig('covid_deaths_per_week.png')
-```
-
-```python Collapsed="false"
-ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
-ax.set_xlabel("Date")
-
-text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
-
-for c in COUNTRIES_CORE:
- lvi = deaths_weekly[c].last_valid_index()
-# if c != 'ES':
- ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}")
-plt.savefig('deaths_by_date_last_6_weeks.png')
-```
-
-```python
-query_string = f'''select report_date, geo_id,
-(cast(deaths_weekly as float) / population_2019) deaths_weekly_pc,
-(cast(culm_deaths as float) / population_2019) as culm_deaths_pc
-from weekly_cases join countries using (geo_id)
-where geo_id in {COUNTRIES_CORE}
-order by report_date, geo_id'''
-
-deaths_pc_data = pd.read_sql_query(query_string,
- engine,
- index_col = 'report_date',
- parse_dates = ['report_date']
- )
-deaths_pc_data.tail()
-```
-
-```python Collapsed="false"
-deaths_pc_culm = deaths_pc_data.pivot(columns='geo_id', values='culm_deaths_pc')
-deaths_pc_culm.tail()
-```
-
-```python
-ax = deaths_pc_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
- lvi = deaths_pc_culm[c].last_valid_index()
- ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc_culm[c][lvi], s = f"{c}: {deaths_pc_culm[c][lvi]*10**6:.0f}")
-# plt.savefig('covid_deaths_total_linear.png')
-```
-
-```python Collapsed="false"
-deaths_pc = deaths_pc_data.pivot(columns='geo_id', values='deaths_weekly_pc')
-deaths_pc.tail()
-```
-
-```python
-ax = deaths_pc.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
- lvi = deaths_pc[c].last_valid_index()
- ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
-# plt.savefig('covid_deaths_total_linear.png')
-```
-
-```python
-ax = deaths_pc.iloc[-6:].plot(figsize=(10, 6), title="Deaths per million, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
- lvi = deaths_pc[c].last_valid_index()
- ax.text(x = lvi, y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
-# plt.savefig('covid_deaths_total_linear.png')
-```
-
-```python
-
-```
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.ioff()
+# # %matplotlib inline
# %%
+++ /dev/null
----
-jupyter:
- jupytext:
- formats: ipynb,md
- text_representation:
- extension: .md
- format_name: markdown
- format_version: '1.2'
- jupytext_version: 1.9.1
- kernelspec:
- display_name: Python 3
- language: python
- name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
-<!-- #endregion -->
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python Collapsed="false"
-# DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = tuple('IT DE UK ES IE FR BE'.split())
-# COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
-# # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-
-# COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
-# 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
-# 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
-# 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
-# COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-# COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
-```
-
-<!-- #region Collapsed="false" -->
-# Write results to summary file
-<!-- #endregion -->
-
-```python Collapsed="false"
-last_uk_date = %sql select date from uk_data order by date desc limit 1
-last_uk_date = last_uk_date[0][0]
-last_uk_date
-```
-
-```python Collapsed="false"
-last_intl_date = %sql select report_date from weekly_cases order by report_date desc limit 1
-last_intl_date = last_intl_date[0][0]
-last_intl_date
-```
-
-```sql magic_args="results << select date, new_cases, new_deaths "
-from uk_data
-order by date
-```
-
-```python
-uk_data = results.DataFrame()
-uk_data['date'] = uk_data.date.astype('datetime64[ns]')
-uk_data.set_index('date', inplace=True)
-uk_data.tail(10)
-```
-
-```python
-most_recent_uk_date = %sql select max(date) from uk_data
-most_recent_uk_date = most_recent_uk_date[0][0]
-most_recent_uk_date
-```
-
-```sql Collapsed="false" magic_args="results << select geo_id, country_name, culm_deaths "
-from weekly_cases join countries using (geo_id)
-where geo_id in :COUNTRIES_CORE
- and (geo_id, report_date) in (select geo_id, max(report_date) from weekly_cases group by geo_id)
-order by geo_id
-```
-
-```python
-thirty_days_ago = most_recent_uk_date - datetime.timedelta(days=30)
-thirty_days_ago
-```
-
-```python
-# thirty_days_ago = most_recent_uk_date - datetime.interval(days=30)
-total_uk_deaths = %sql select sum(new_deaths) from uk_data
-total_uk_deaths = total_uk_deaths[0][0]
-deaths_in_past_month = %sql select sum(new_deaths) from uk_data where date > :thirty_days_ago
-deaths_in_past_month = deaths_in_past_month[0][0]
-cases_in_past_month = %sql select sum(new_cases) from uk_data where date > :thirty_days_ago
-cases_in_past_month = cases_in_past_month[0][0]
-total_uk_deaths, deaths_in_past_month, cases_in_past_month
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'w') as f:
- f.write('% Covid death data summary\n')
- f.write('% Neil Smith\n')
- f.write(f'% Created on {datetime.datetime.now().strftime("%Y-%m-%d")}\n')
- f.write('\n')
- f.write(f'> Last UK data from {last_uk_date.strftime("%d %b %Y")}. ')
- f.write(f' Last international data from {last_intl_date.strftime("%d %b %Y")}.\n')
- f.write('\n')
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
- f.write('## Headlines (UK data)\n')
- f.write('\n')
- f.write('| []() | |\n')
- f.write('|:---|---:|\n')
- f.write(f'| Deaths reported so far | {total_uk_deaths} | \n')
- f.write(f'| Deaths in last 30 days | {deaths_in_past_month} | \n')
- f.write(f'| Cases in last 30 days | {cases_in_past_month} | \n')
-# f.write(f'| Total Covid deaths to date (estimated) | {uk_deaths_to_date:.0f} |\n')
- f.write('\n')
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
- f.write('## International comparison\n')
- f.write('\n')
- f.write(f'Based on weekly data. Last data from {last_intl_date.strftime("%d %b %Y")}\n')
- f.write('\n')
- f.write('### Total deaths\n')
- f.write('\n')
- f.write('![Total deaths](covid_deaths_total_linear.png)\n')
- f.write('\n')
- f.write('| Country ID | Country name | Total deaths |\n')
- f.write('|:-----------|:-------------|-------------:|\n')
- for c_id, c_name, t_deaths in results:
- f.write(f'| {c_id} | {c_name} | {t_deaths} |\n')
- f.write('\n')
-```
-
-```python Collapsed="false"
-# with open('covid_summary.md', 'a') as f:
-# f.write('## All-causes deaths, UK\n')
-# f.write('\n')
-# f.write('![All-causes deaths](deaths-radar.png)\n')
-# f.write('\n')
-# f.write('### True deaths\n')
-# f.write('\n')
-# f.write(f'The number of deaths reported in official statistics, {uk_covid_deaths}, is an underestimate '
-# 'of the true number of Covid deaths.\n'
-# 'This is especially true early in the pandemic, approximately March to May 2020.\n')
-# f.write('We can get a better understanding of the impact of Covid by looking at the number of deaths, '
-# 'over and above what would be expected at each week of the year.\n')
-# f.write(f'The ONS (and other bodies in Scotland and Northern Ireland) have released data on the number of deaths '
-# f'up to {pd.to_datetime(excess_deaths_data["end_date"]).strftime("%d %B %Y")}.\n\n')
-# f.write('If, for each of those weeks, I take the largest of the excess deaths or the reported Covid deaths, ')
-# f.write(f'I estimate there have been **{uk_deaths_to_date}** total deaths so far.\n')
-# f.write('\n')
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
- f.write('### Deaths per week\n')
- f.write('\n')
- f.write('![Deaths per week](covid_deaths_per_week.png)\n')
- f.write('\n')
- f.write('![Deaths per week, last 6 weeks](deaths_by_date_last_6_weeks.png)\n')
- f.write('\n')
-```
-
-```python
-with open('covid_summary.md', 'a') as f:
- f.write('## UK data\n')
- f.write('\n')
- f.write('### Total deaths\n')
- f.write('\n')
- f.write(f'Deaths reported up to {last_uk_date.strftime("%d %b %Y")}: {total_uk_deaths}\n')
- f.write('\n')
- f.write('![Total deaths](cases_and_deaths.png)\n')
- f.write('\n')
- f.write('![Cases and deaths in last 60 days](cases_and_deaths_last_60_days.png)\n')
- f.write('\n')
- f.write('![Deaths compared to past five years](deaths-radar-2020.png)\n')
- f.write('\n')
-
-```
-
-```python Collapsed="false"
-with open('hospital_normalisation_date.json') as f:
- hospital_normalisation_date_data = json.load(f)
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
- f.write('### Hospital care\n')
- f.write(f'Based on a 7-day moving average\n')
- f.write('\n')
- f.write('![Cases, admissions, deaths](cases_admissions_deaths.png)\n')
- f.write('\n')
- f.write('Due to the large scale differences between the three '
- 'measures, they are all normalised to show changes ')
- f.write(f'since {pd.to_datetime(hospital_normalisation_date_data["hospital_normalisation_date"]).strftime("%d %B %Y")}.\n')
- f.write('\n')
- f.write('People in hospital, and on mechanical ventilators\n')
- f.write('\n')
- f.write('![People in hospital and on mechancial ventilators](people_in_hospital.png)\n')
- f.write('\n')
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
- f.write('### Testing effectiveness\n')
- f.write('\n')
- f.write('A question about testing is whether more detected cases is a result of more tests being '
- 'done or is because the number of cases is increasing. One way of telling the differeence '
- 'is by looking at the fraction of tests that are positive.\n')
- f.write('\n')
- f.write('![Positive tests and cases](tests_and_cases.png)\n')
- f.write('\n')
- f.write('Numbers of positive tests and cases, '
- '7-day moving average.\n'
- 'Note the different y-axes\n')
- f.write('\n')
- f.write('![Fraction of tests with positive result](fraction_positive_tests.png)\n')
- f.write('\n')
- f.write('Fraction of tests with a positive result, both daily figures and '
- '7-day moving average.\n')
- f.write('\n')
- f.write('\n')
- f.write('![Tests against fraction positive, trajectory](fraction_positive_tests_vs_tests.png)\n')
- f.write('\n')
- f.write('The trajectory of tests done vs fraction positive tests.\n')
- f.write('\n')
- f.write('Points higher indicate more tests; points to the right indicate more positive tests.'
- 'More tests being done with the same infection prevelance will move the point up '
- 'and to the left.\n')
- f.write('\n')
- f.write('\n')
- f.write('![Tests against fraction positive, trajectory](tests_vs_fraction_positive_animation.png)\n')
- f.write('\n')
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
- f.write('# Data sources\n')
- f.write('\n')
- f.write('> Covid data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)\n')
- f.write('\n')
- f.write("""> Population data from:
-
-* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
-* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
-* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.""")
-
- f.write('\n\n')
- f.write('> [Source code available](https://git.njae.me.uk/?p=covid19.git;a=tree)\n')
- f.write('\n')
-
-```
-
-```python Collapsed="false"
-!pandoc --toc -s covid_summary.md > covid_summary.html
-```
-
-```python Collapsed="false"
-!scp covid_summary.html neil@ogedei:/var/www/scripts.njae.me.uk/covid/index.html
-!scp covid_deaths_total_linear.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp cases_and_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp cases_and_deaths_last_60_days.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-# !scp deaths-radar.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp deaths-radar-2020.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp covid_deaths_per_week.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp fraction_positive_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp tests_and_cases.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp deaths_by_date_last_6_weeks.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp fraction_positive_tests_vs_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp tests_vs_fraction_positive_animation.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp people_in_hospital.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp cases_admissions_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-```
-
-```python Collapsed="false"
-with open('uk_covid_deaths.js', 'w') as f:
- f.write(f"document.write('{total_uk_deaths}');")
-
-with open('uk_deaths_30_days.js', 'w') as f:
- f.write(f"document.write('{deaths_in_past_month}');")
-
-with open('uk_cases_30_days.js', 'w') as f:
- f.write(f"document.write('{cases_in_past_month}');")
-
-# with open('estimated_total_deaths.js', 'w') as f:
-# f.write(f"document.write('{uk_deaths_to_date:.0f}');")
-
-# edut = pd.to_datetime(excess_deaths_data["end_date"]).strftime('%d %B %Y')
-# with open('excess_deaths_upto.js', 'w') as f:
-# f.write(f"document.write('{edut}');")
-
-with open('last_uk_date.js', 'w') as f:
- f.write(f"document.write('{pd.to_datetime(last_uk_date).strftime('%d %B %Y')}');")
-
-with open('last_intl_date.js', 'w') as f:
- f.write(f"document.write('{pd.to_datetime(last_intl_date).strftime('%d %B %Y')}');")
-
-```
-
-```python Collapsed="false"
-# pd.to_datetime(excess_deaths_upto).strftime('%d %B %Y')
-```
-
-```python Collapsed="false"
-!scp uk_covid_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp uk_deaths_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp uk_cases_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-# !scp estimated_total_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-# !scp excess_deaths_upto.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp last_uk_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp last_intl_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp hospital_normalisation_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-```
-
-```python Collapsed="false"
-
-```
import matplotlib as mpl
import matplotlib.pyplot as plt
+# # %matplotlib inline
# %%
# %%
os.system('pandoc --toc -s covid_summary.md > covid_summary.html')
-
-
-# %%
os.system('scp covid_summary.html neil@ogedei:/var/www/scripts.njae.me.uk/covid/index.html')
-os.system('scp covid_deaths_total_linear.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp cases_and_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp cases_and_deaths_last_60_days.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp deaths-radar-2021.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp covid_deaths_per_week.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp fraction_positive_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ ')
-os.system('scp tests_and_cases.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp deaths_by_date_last_6_weeks.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp fraction_positive_tests_vs_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp tests_vs_fraction_positive_animation.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ ')
-os.system('scp people_in_hospital.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp cases_admissions_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-
# %%
with open('uk_covid_deaths.js', 'w') as f:
with open('last_intl_date.js', 'w') as f:
f.write(f"document.write('{pd.to_datetime(last_intl_date).strftime('%d %B %Y')}');")
-
# %%
-os.system('scp uk_covid_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp uk_deaths_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp uk_cases_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-# # !scp estimated_total_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-# # !scp excess_deaths_upto.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-os.system('scp last_uk_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp last_intl_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp hospital_normalisation_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-
+transfer_files = [
+ 'covid_deaths_total_linear.png',
+ 'cases_and_deaths.png',
+ 'cases_and_deaths_last_60_days.png',
+ 'deaths-radar-2021.png',
+ 'covid_deaths_per_week.png',
+ 'fraction_positive_tests.png',
+ 'tests_and_cases.png',
+ 'deaths_by_date_last_6_weeks.png',
+ 'fraction_positive_tests_vs_tests.png',
+ 'tests_vs_fraction_positive_animation.png',
+ 'people_in_hospital.png',
+ 'cases_admissions_deaths.png',
+ 'uk_covid_deaths.js',
+ 'uk_deaths_30_days.js',
+ 'uk_cases_30_days.js',
+ 'last_uk_date.js',
+ 'last_intl_date.js',
+ 'hospital_normalisation_date.js'
+]
# %%
+for f in transfer_files:
+ if os.path.isfile(f):
+ os.system(f'scp {f} neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
+ print(f'Transferred {f}')
+ else:
+ print('Cannot transfer {f}: file does not exist')
-
-
+# %%
import matplotlib.pyplot as plt
import matplotlib.animation as ani
plt.ioff()
+# # %matplotlib inline
# %%
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.ioff()
+# # %matplotlib inline
# %%
```
```python Collapsed="false"
-england_wales_filename = 'uk-deaths-data/copyofpublishedweek042021.xlsx'
+england_wales_filename = 'uk-deaths-data/publishedweek052021.xlsx'
```
```python Collapsed="false"
-scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (10 February 2021).xlsx'
+scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (15 February 2021).xlsx'
```
```python