From: Neil Smith Date: Fri, 19 Feb 2021 14:27:52 +0000 (+0000) Subject: Updated for imported data format X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=4feace1e2a5076f3efa0fd65b31411ac511efbb1;p=covid19.git Updated for imported data format --- diff --git a/covid19.sublime-project b/covid19.sublime-project new file mode 100644 index 0000000..24db303 --- /dev/null +++ b/covid19.sublime-project @@ -0,0 +1,8 @@ +{ + "folders": + [ + { + "path": "." + } + ] +} diff --git a/data_import.md b/data_import.md deleted file mode 100644 index 0a680eb..0000000 --- a/data_import.md +++ /dev/null @@ -1,742 +0,0 @@ ---- -jupyter: - jupytext: - formats: ipynb,md - text_representation: - extension: .md - format_name: markdown - format_version: '1.2' - jupytext_version: 1.9.1 - kernelspec: - display_name: Python 3 - language: python - name: python3 ---- - - -Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide) - - -```python Collapsed="false" -from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float -from sqlalchemy import create_engine -``` - -```python Collapsed="false" -import itertools -import collections -import json -import pandas as pd -import numpy as np -from scipy.stats import gmean -import datetime - -import matplotlib as mpl -import matplotlib.pyplot as plt -%matplotlib inline -``` - -```python Collapsed="false" -%load_ext sql -``` - -```python Collapsed="false" -connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid' -``` - -```python Collapsed="false" -%sql $connection_string -``` - -```python Collapsed="false" -eng = create_engine(connection_string) -engine = eng.execution_options(isolation_level="AUTOCOMMIT") -``` - -```python Collapsed="false" -DEATH_COUNT_THRESHOLD = 10 -COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split() -COUNTRIES_NORDIC = 'SE NO DK FI UK'.split() -COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split() -# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split() - -COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA - 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT', - 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN', - 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE'] -COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS)) -COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS)) -``` - -```python Collapsed="false" -!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv -``` - -```python Collapsed="false" -# First col is a date, treat geoId of NA as 'Namibia', not "NA" value -raw_data = pd.read_csv('covid.csv', - parse_dates=[0], dayfirst=True, - keep_default_na=False, na_values = [''], -# dtype = {'day': np.int64, -# 'month': np.int64, -# 'year': np.int64, -# 'cases': np.int64, -# 'deaths': np.int64, -# 'countriesAndTerritories': str, -# 'geoId': str, -# 'countryterritoryCode': str, -# 'popData2019': np.int64, -# 'continentExp': str, -# } - ) -``` - -```python Collapsed="false" -raw_data.size -``` - -```python Collapsed="false" -raw_data.fillna(0, inplace=True) -``` - -```python Collapsed="false" -raw_data.head() -``` - -```python Collapsed="false" -raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id', - 'countriesAndTerritories': 'country_name', - 'countryterritoryCode': 'country_territory_code', - 'popData2019': 'population_2019', - 'continentExp': 'continent'}, inplace=True) -``` - -```python Collapsed="false" -raw_data.columns -``` - -```python Collapsed="false" -raw_data.dtypes -``` - -```python Collapsed="false" -raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']] -``` - -```python Collapsed="false" -raw_data[['country_name', 'geo_id', 'country_territory_code', - 'population_2019', 'continent']] -``` - -```python Collapsed="false" -raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql( - 'weekly_cases', - engine, - if_exists='replace', - index=False, - chunksize=500, - dtype={ - "report_date": Date, - "cases_weekly": Integer, - "deaths_weekly": Integer, - "geo_id": String, - "notification_rate_per_100000_population_14-days": Float - } -) -``` - -```python Collapsed="false" -raw_data[['country_name', 'geo_id', 'country_territory_code', - 'population_2019', 'continent']].drop_duplicates().to_sql( - 'countries', - engine, - if_exists='replace', - index=False, - chunksize=500, - dtype={ - "country_name": Text, - "geo_id": String, - "country_territory_code": String, - "population_2019": Integer, - "continent": Text - } -) -``` - -```python Collapsed="false" -# %sql select geo_id from weekly_cases limit 10 -``` - -```python Collapsed="false" -# %%sql alter table weekly_cases add primary key (geo_id, report_date); -# alter table countries add primary key (geo_id); -# alter table weekly_cases add foreign key (geo_id) references countries(geo_id); -# alter table weekly_cases add culm_cases integer; -# alter table weekly_cases add culm_deaths integer; -``` - -```python -with engine.connect() as connection: - connection.execute('alter table weekly_cases add primary key (geo_id, report_date)') - connection.execute('alter table countries add primary key (geo_id);') - connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);') - connection.execute('alter table weekly_cases add culm_cases integer;') - connection.execute('alter table weekly_cases add culm_deaths integer;') -``` - -```python Collapsed="false" -# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10 -``` - -```python Collapsed="false" -# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10 -``` - -```python Collapsed="false" -%sql select * from weekly_cases limit 10 -``` - -```python Collapsed="false" -# %%sql -# with culm as -# (select report_date, geo_id, -# sum(cases_weekly) over (partition by geo_id -# order by report_date) as culm_data -# from weekly_cases -# ) -# update weekly_cases -# set culm_cases = culm_data -# from culm -# where weekly_cases.report_date = culm.report_date and -# weekly_cases.geo_id = culm.geo_id -``` - -```python Collapsed="false" -query_string = '''with culm as - (select report_date, geo_id, - sum(cases_weekly) over (partition by geo_id - order by report_date) as culm_data - from weekly_cases - ) -update weekly_cases - set culm_cases = culm_data - from culm - where weekly_cases.report_date = culm.report_date and - weekly_cases.geo_id = culm.geo_id''' -with engine.connect() as connection: - connection.execute(query_string) -``` - -```python Collapsed="false" -# %%sql -# with culm as -# (select report_date, geo_id, -# sum(deaths_weekly) over (partition by geo_id -# order by report_date) as culm_data -# from weekly_cases -# ) -# update weekly_cases -# set culm_deaths = culm_data -# from culm -# where weekly_cases.report_date = culm.report_date and -# weekly_cases.geo_id = culm.geo_id -``` - -```python Collapsed="false" -query_string = '''with culm as - (select report_date, geo_id, - sum(deaths_weekly) over (partition by geo_id - order by report_date) as culm_data - from weekly_cases - ) -update weekly_cases - set culm_deaths = culm_data - from culm - where weekly_cases.report_date = culm.report_date and - weekly_cases.geo_id = culm.geo_id''' -with engine.connect() as connection: - connection.execute(query_string) -``` - -```python Collapsed="false" -uk_query_string = ( -"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&" -"metric=covidOccupiedMVBeds&" -"metric=newAdmissions&" -"metric=newCasesBySpecimenDate&" -"metric=hospitalCases&" -"metric=newDeaths28DaysByPublishDate&" -"format=csv" -) - -!curl "$uk_query_string" > uk_data.csv -``` - -```python Collapsed="false" -test_query_string = ( -"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&" -"metric=newPCRTestsByPublishDate&" -"metric=newTestsByPublishDate&" -"metric=newPillarOneTwoTestsByPublishDate&" -"format=csv" -) -!curl "$test_query_string" > test_data.csv -``` - -```python Collapsed="false" -# hospital_query_string = ( -# "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&" -# "metric=newAdmissions&" -# "format=csv" -# ) -# !curl "$hospital_query_string" > hospital_admissions.csv -``` - -```python Collapsed="false" -# hospital_query_string = ( -# "https://api.coronavirus.data.gov.uk/v1/data?" -# "filters=areaName=United%2520Kingdom;areaType=overview&" -# "structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22," -# "%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv" -# ) - -# !curl "$hospital_query_string" | gunzip > hospital_admissions.csv -``` - -```python Collapsed="false" -uk_data = pd.read_csv('uk_data.csv', - parse_dates=[0], dayfirst=True) -uk_data -``` - -```python Collapsed="false" -test_data = pd.read_csv('test_data.csv', - parse_dates=[0], dayfirst=True) -test_data -``` - -```python Collapsed="false" -test_data.columns -``` - -```python Collapsed="false" -uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate', - 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date') -``` - -```python Collapsed="false" -uk_data.columns -``` - -```python Collapsed="false" -uk_data.rename( - columns={ - 'covidOccupiedMVBeds': 'ventilator_beds', - 'newCasesBySpecimenDate': 'new_cases', - 'hospitalCases': 'hospital_cases', - 'newDeaths28DaysByPublishDate': 'new_deaths', - 'newAdmissions': 'new_admissions', - 'newPCRTestsByPublishDate': 'new_pcr_tests', - 'newTestsByPublishDate': 'new_tests', - 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests' - }, inplace=True) -``` - -```python Collapsed="false" -uk_data.dtypes -``` - -```python Collapsed="false" - uk_data.columns -``` - -```python Collapsed="false" -uk_data[['date', - 'hospital_cases', 'ventilator_beds', - 'new_cases', 'new_deaths', - 'hospital_cases', 'new_admissions', - 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests' - ]].to_sql( - 'uk_data', - engine, - if_exists='replace', - index=False, - chunksize=500, - dtype={ - "date": Date, - "hospital_cases": Integer, - "ventilator_beds": Integer, - "new_cases": Integer, - "hospital_cases": Integer, - "new_deaths": Integer, - "new_admissions": Integer, - 'new_pcr_tests': Integer, - 'new_tests': Integer, - 'new_pillar_1_2_tests': Integer - } -) -``` - -```python Collapsed="false" -# %sql select * from uk_data order by date desc limit 10 -``` - -```python Collapsed="false" -query_string = '''drop table if exists uk_data_7; -create table uk_data_7 -(date date primary key, - hospital_cases real, - ventilator_beds real, - new_cases real, - new_deaths real, - new_admissions real, - new_pcr_tests real, - new_tests real, - new_pillar_1_2_tests real -);''' - -with engine.connect() as connection: - connection.execute(query_string) -``` - -```python -update_string = '''with ownd as ( - select date, - avg(hospital_cases) over wnd as w_hospital_cases, - avg(ventilator_beds) over wnd as w_ventilator_beds, - avg(new_cases) over wnd as w_new_cases, - avg(new_deaths) over wnd as w_new_deaths, - avg(new_admissions) over wnd as w_new_admissions, - avg(new_pcr_tests) over wnd as w_new_pcr_tests, - avg(new_tests) over wnd as w_new_tests, - avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests, - count(*) over wnd as w_size - from uk_data - window wnd as ( - order by uk_data.date - rows between 3 preceding and 3 following - ) -) -insert into uk_data_7(date, - hospital_cases, - ventilator_beds, - new_cases, - new_deaths, - new_admissions, - new_pcr_tests, - new_tests, - new_pillar_1_2_tests - ) -(select date, - w_hospital_cases, - w_ventilator_beds, - w_new_cases, - w_new_deaths, - w_new_admissions, - w_new_pcr_tests, - w_new_tests, - w_new_pillar_1_2_tests - from ownd - where w_size = 7 -)''' -with engine.connect() as connection: - connection.execute(update_string) -``` - -```python Collapsed="false" -# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions) -# values ( -# select date, -# avg(ventilator_beds) over (order by date rows between 6 preceding and current row) -# from uk_data -# ) -``` - -```python Collapsed="false" -# query_string = '''insert into uk_data_7(date, hospital_cases) -# select uk_data.date, -# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases -# from uk_data''' -# with engine.connect() as connection: -# connection.execute(query_string) -``` - -```python Collapsed="false" -# %%sql -# with m7 as -# (select uk_data.date as date7, -# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7 -# from uk_data -# ) -# update uk_data_7 -# set ventilator_beds = nc7 -# from m7 -# where uk_data_7.date = m7.date7 -``` - -```python Collapsed="false" -# %%sql -# with m7 as -# (select uk_data.date as date7, -# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7 -# from uk_data -# ) -# update uk_data_7 -# set ventilator_beds = nc7 -# from m7 -# where uk_data_7.date = m7.date7 -``` - -```python Collapsed="false" -# %%sql -# with m7 as -# (select uk_data.date as date7, -# avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7 -# from uk_data -# ) -# update uk_data_7 -# set new_cases = nc7 -# from m7 -# where uk_data_7.date = m7.date7 -``` - -```python Collapsed="false" -# %%sql -# with m7 as -# (select uk_data.date as date7, -# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7 -# from uk_data -# ) -# update uk_data_7 -# set hospital_cases = d7 -# from m7 -# where uk_data_7.date = m7.date7 -``` - -```python Collapsed="false" -# %%sql -# with m7 as -# (select uk_data.date as date7, -# avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7 -# from uk_data -# ) -# update uk_data_7 -# set new_deaths = d7 -# from m7 -# where uk_data_7.date = m7.date7 -``` - -```python Collapsed="false" -# %%sql -# with m7 as -# (select uk_data.date as date7, -# avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7 -# from uk_data -# ) -# update uk_data_7 -# set new_admissions = d7 -# from m7 -# where uk_data_7.date = m7.date7 -``` - -```python Collapsed="false" -# %%sql -# with m7 as -# (select uk_data.date as date7, -# avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7 -# from uk_data -# ) -# update uk_data_7 -# set new_pcr_tests = d7 -# from m7 -# where uk_data_7.date = m7.date7 -``` - -```python Collapsed="false" -# %%sql -# with m7 as -# (select uk_data.date as date7, -# avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7 -# from uk_data -# ) -# update uk_data_7 -# set new_tests = d7 -# from m7 -# where uk_data_7.date = m7.date7 -``` - -```python Collapsed="false" -# %%sql -# with m7 as -# (select uk_data.date as date7, -# avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7 -# from uk_data -# ) -# update uk_data_7 -# set new_pillar_1_2_tests = d7 -# from m7 -# where uk_data_7.date = m7.date7 -``` - -```python Collapsed="false" -# %%sql -# with wnd as -# ( select date, -# avg(new_pillar_1_2_tests) -# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests -# from uk_data -# ) -# update uk_data_7 -# set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests -# from wnd -# where uk_data_7.date = wnd.date -# and (select count(*) from wnd) = 7 -``` - -```python Collapsed="false" -# %%sql -# with wnd as -# ( select date, -# avg(new_pillar_1_2_tests) -# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests, -# count(new_pillar_1_2_tests) -# over (order by uk_data.date rows between 3 preceding and 3 following) as a_count -# from uk_data -# ) -# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count -# from uk_data_7, wnd -# where uk_data_7.date = wnd.date -# order by uk_data_7.date desc limit 10 - -# select date, -# count(*) over wnd as w_size -# from uk_data -# window wnd as ( -# order by uk_data.date -# rows between 3 preceding and 3 following -# ); -``` - -```python Collapsed="false" -# %%sql -# select date, -# count(*) over wnd as w_size -# from uk_data -# window wnd as ( -# order by uk_data.date -# rows between 3 preceding and 3 following -# ) -# order by date desc limit 10 -``` - -```python -# %%sql -# with ownd as ( -# select date, -# avg(hospital_cases) over wnd as w_hospital_cases, -# avg(ventilator_beds) over wnd as w_ventilator_beds, -# avg(new_cases) over wnd as w_new_cases, -# avg(new_deaths) over wnd as w_new_deaths, -# avg(new_admissions) over wnd as w_new_admissions, -# avg(new_pcr_tests) over wnd as w_new_pcr_tests, -# avg(new_tests) over wnd as w_new_tests, -# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests, -# count(*) over wnd as w_size -# from uk_data -# window wnd as ( -# order by uk_data.date -# rows between 3 preceding and 3 following -# )) -# insert into uk_data_7(date, -# hospital_cases, -# ventilator_beds, -# new_cases, -# new_deaths, -# new_admissions, -# new_pcr_tests, -# new_tests, -# new_pillar_1_2_tests -# ) -# (select date, -# avg(hospital_cases) over wnd as w_hospital_cases, -# avg(ventilator_beds) over wnd as w_ventilator_beds, -# avg(new_cases) over wnd as w_new_cases, -# avg(new_deaths) over wnd as w_new_deaths, -# avg(new_admissions) over wnd as w_new_admissions, -# avg(new_pcr_tests) over wnd as w_new_pcr_tests, -# avg(new_tests) over wnd as w_new_tests, -# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests, -# count(*) over wnd as w_size -# from uk_data -# window wnd as ( -# order by uk_data.date -# rows between 3 preceding and 3 following -# ) -# ) -# set date = ownd.date, -# hospital_cases = w_hospital_cases, -# ventilator_beds = w_ventilator_beds, -# new_cases = w_new_cases, -# new_deaths = w_new_deaths, -# new_admissions = w_new_admissions, -# new_pcr_tests = w_new_pcr_tests, -# new_tests = w_new_tests, -# new_pillar_1_2_tests = w_new_pillar_1_2_tests -# from ownd -# where w_size = 7 -``` - -```python -# %%sql -# with ownd as ( -# select date, -# avg(hospital_cases) over wnd as w_hospital_cases, -# avg(ventilator_beds) over wnd as w_ventilator_beds, -# avg(new_cases) over wnd as w_new_cases, -# avg(new_deaths) over wnd as w_new_deaths, -# avg(new_admissions) over wnd as w_new_admissions, -# avg(new_pcr_tests) over wnd as w_new_pcr_tests, -# avg(new_tests) over wnd as w_new_tests, -# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests, -# count(*) over wnd as w_size -# from uk_data -# window wnd as ( -# order by uk_data.date -# rows between 3 preceding and 3 following -# ) -# ) -# insert into uk_data_7(date, -# hospital_cases, -# ventilator_beds, -# new_cases, -# new_deaths, -# new_admissions, -# new_pcr_tests, -# new_tests, -# new_pillar_1_2_tests -# ) -# (select date, -# w_hospital_cases, -# w_ventilator_beds, -# w_new_cases, -# w_new_deaths, -# w_new_admissions, -# w_new_pcr_tests, -# w_new_tests, -# w_new_pillar_1_2_tests -# from ownd -# where w_size = 7 -# ) -``` - -```python Collapsed="false" -%sql select * from uk_data_7 order by date desc limit 10 -``` - -```python Collapsed="false" -%sql select * from uk_data order by date desc limit 10 -``` - -```python Collapsed="false" - -``` diff --git a/data_import.py b/data_import.py index 20593c7..df35ce6 100644 --- a/data_import.py +++ b/data_import.py @@ -64,11 +64,24 @@ raw_data.fillna(0, inplace=True) # %% +COUNTRIES_ALL + +# %% +# raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id', +# 'countriesAndTerritories': 'country_name', +# 'countryterritoryCode': 'country_territory_code', +# 'popData2019': 'population_2019', +# 'continentExp': 'continent'}, inplace=True) raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id', 'countriesAndTerritories': 'country_name', 'countryterritoryCode': 'country_territory_code', + 'cases': 'cases_weekly', + 'deaths': 'deaths_weekly', 'popData2019': 'population_2019', - 'continentExp': 'continent'}, inplace=True) + 'continentExp': 'continent', + 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days' + }, + inplace=True) # %% diff --git a/deaths_import.md b/deaths_import.md deleted file mode 100644 index f704518..0000000 --- a/deaths_import.md +++ /dev/null @@ -1,1017 +0,0 @@ ---- -jupyter: - jupytext: - formats: ipynb,md - text_representation: - extension: .md - format_name: markdown - format_version: '1.2' - jupytext_version: 1.9.1 - kernelspec: - display_name: Python 3 - language: python - name: python3 ---- - - -Data from: - -* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday. -* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data. -* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday. - - - -```python Collapsed="false" -import itertools -import collections -import json -import pandas as pd -import numpy as np -from scipy.stats import gmean -import datetime - -import matplotlib as mpl -import matplotlib.pyplot as plt -%matplotlib inline - -from sqlalchemy.types import Integer, Text, String, DateTime, Float -from sqlalchemy import create_engine -%load_ext sql -``` - -```python Collapsed="false" -connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid' -``` - -```python Collapsed="false" -%sql $connection_string -``` - -```python Collapsed="false" -conn = create_engine(connection_string) -``` - -```python Collapsed="false" -england_wales_filename = 'uk-deaths-data/publishedweek532020.xlsx' -``` - -```sql Collapsed="false" -drop table if exists all_causes_deaths; -create table all_causes_deaths ( - week integer, - year integer, - date_up_to date, - nation varchar(20), - deaths integer, - CONSTRAINT week_nation PRIMARY KEY(year, week, nation) -); -``` - -```python Collapsed="false" -raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv', - parse_dates=[1, 2], dayfirst=True, -# index_col=0, - header=[0, 1] - ) -dh15i = raw_data_2015.iloc[:, [0, 3]] -dh15i.set_index(dh15i.columns[0], inplace=True) -dh15i.columns = ['total_2015'] -dh15i.tail() -``` - -```python Collapsed="false" -raw_data_2015.head() -``` - -```python Collapsed="false" -rd = raw_data_2015.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename( - columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2015P)': 'deaths', - 'Registration Week': 'week'} - ) -rd['year'] = 2015 -rd['nation'] = 'Northern Ireland' -rd.head() -``` - -```python Collapsed="false" -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python Collapsed="false" -%sql select * from all_causes_deaths limit 10 -``` - -```python Collapsed="false" -raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv', - parse_dates=[1, 2], dayfirst=True, -# index_col=0, - header=[0, 1] - ) -raw_data_2016.head() -# dh16i = raw_data_2016.iloc[:, [2]] -# dh16i.columns = ['total_2016'] -# # dh16i.head() -dh16i = raw_data_2016.iloc[:, [0, 3]] -dh16i.set_index(dh16i.columns[0], inplace=True) -dh16i.columns = ['total_2016'] -dh16i.tail() -``` - -```python Collapsed="false" -rd = raw_data_2016.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename( - columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2016P)': 'deaths', - 'Registration Week': 'week'} - ) -rd['year'] = 2016 -rd['nation'] = 'Northern Ireland' -rd.head() -``` - -```python Collapsed="false" -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python Collapsed="false" -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) -``` - -```python Collapsed="false" -raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv', - parse_dates=[1, 2], dayfirst=True, -# index_col=0, - header=[0, 1] - ) -raw_data_2017.head() -dh17i = raw_data_2017.iloc[:, [0, 3]] -dh17i.set_index(dh17i.columns[0], inplace=True) -dh17i.columns = ['total_2017'] -dh17i.tail() -``` - -```python Collapsed="false" -rd = raw_data_2017.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename( - columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2017P)': 'deaths', - 'Registration Week': 'week'} - ) -rd['year'] = 2017 -rd['nation'] = 'Northern Ireland' -rd.head() -``` - -```python Collapsed="false" -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python Collapsed="false" -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) -``` - -```python Collapsed="false" -raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv', - parse_dates=[1, 2], dayfirst=True, -# index_col=0, - header=[0, 1] - ) -raw_data_2018.head() -dh18i = raw_data_2018.iloc[:, [0, 3]] -dh18i.set_index(dh18i.columns[0], inplace=True) -dh18i.columns = ['total_2018'] -dh18i.tail() -``` - -```python Collapsed="false" -rd = raw_data_2018.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename( - columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2018P)': 'deaths', - 'Registration Week': 'week'} - ) -rd['year'] = 2018 -rd['nation'] = 'Northern Ireland' -rd.head() -``` - -```python Collapsed="false" -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python Collapsed="false" -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) -``` - -```python Collapsed="false" -raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv', - parse_dates=[1, 2], dayfirst=True, -# index_col=0, - header=[0, 1] - ) -raw_data_2019.head() -dh19i = raw_data_2019.iloc[:, [0, 3]] -dh19i.set_index(dh19i.columns[0], inplace=True) -dh19i.columns = ['total_2019'] -dh19i.tail() -``` - -```python Collapsed="false" -rd = raw_data_2019.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename( - columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2019P)': 'deaths', - 'Registration Week': 'week'} - ) -rd['year'] = 2019 -rd['nation'] = 'Northern Ireland' -rd.head() -``` - -```python Collapsed="false" -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python Collapsed="false" -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) -``` - -```python Collapsed="false" -raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv', - parse_dates=[1], dayfirst=True, - header=[0, 1] - ) -raw_data_2020_i.head() -``` - -```python Collapsed="false" -rd = raw_data_2020_i.iloc[:, [0, 1, 2]].droplevel(1, axis=1).rename( - columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2020P)': 'deaths', - 'Registration Week': 'week'} - ) -rd['year'] = 2020 -rd['nation'] = 'Northern Ireland' -rd.head() -``` - -```python Collapsed="false" -rd.tail() -``` - -```python Collapsed="false" -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python Collapsed="false" -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year -``` - -```python -raw_data_2020_i.set_index(raw_data_2020_i.columns[0], inplace=True) -raw_data_2020_i.tail() -``` - -```python Collapsed="false" -datetime.datetime.now().isocalendar() -``` - -```python Collapsed="false" -datetime.datetime.fromisocalendar(2021, 3, 1) -``` - -```python Collapsed="false" - -``` - -```python Collapsed="false" -raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-scotland.csv', - index_col=0, - header=0, - skiprows=2 - ) -# raw_data_s -``` - -```python Collapsed="false" -deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())] -deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns] -deaths_headlines_s.reset_index(drop=True, inplace=True) -deaths_headlines_s.index = deaths_headlines_s.index + 1 -deaths_headlines_s -``` - -```python -%sql select * from all_causes_deaths limit 5 -``` - -```python Collapsed="false" -for year, ser in deaths_headlines_s.items(): - year_i = int(year[-4:]) -# print(year_i) - for week, deaths in ser.dropna().iteritems(): -# print(datetime.date.fromisocalendar(year_i, week, 7), deaths) - dut = datetime.date.fromisocalendar(year_i, week, 7) - %sql insert into all_causes_deaths(week, year, date_up_to, nation, deaths) values ({week}, {year_i}, :dut, 'Scotland', {deaths}) -``` - -```python -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year -``` - -```python -%sql select year, nation, date_up_to from all_causes_deaths where week=3 order by year, nation -``` - -```python Collapsed="false" -eng_xls = pd.read_excel(england_wales_filename, - sheet_name="Weekly figures 2020", - skiprows=[0, 1, 2, 3], - header=0, - index_col=[1] - ).iloc[:91].T -eng_xls -``` - -```python Collapsed="false" -# eng_xls_columns -``` - -```python Collapsed="false" -eng_xls_columns = list(eng_xls.columns) - -for i, c in enumerate(eng_xls_columns): -# print(i, c, type(c), isinstance(c, float)) - if isinstance(c, float) and np.isnan(c): - if eng_xls.iloc[0].iloc[i] is not pd.NaT: - eng_xls_columns[i] = eng_xls.iloc[0].iloc[i] - -# np.isnan(eng_xls_columns[0]) -# eng_xls_columns - -eng_xls.columns = eng_xls_columns -# eng_xls.columns -``` - -```python -eng_xls -``` - -```python -rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename( - columns={'Week ended': 'date_up_to', 'Wales': 'deaths', - 'index': 'week'} - ) -rd['year'] = 2020 -rd['nation'] = 'Wales' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python Collapsed="false" -eng_xls = eng_xls.iloc[1:] -eng_xls['England deaths'] = eng_xls.loc[:, 'Total deaths, all ages'] - eng_xls.loc[:, 'Wales'] -``` - -```python -eng_xls.head() -``` - -```python -rd = eng_xls[['Week ended', 'England deaths']].reset_index(level=0).rename( - columns={'Week ended': 'date_up_to', 'England deaths': 'deaths', - 'index': 'week'} - ) -rd['year'] = 2020 -rd['nation'] = 'England' -rd.head() -``` - -```python -%sql delete from all_causes_deaths where nation = 'England' -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python -rd.tail() -``` - -```python -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year -``` - -```python Collapsed="false" -# raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv', -# parse_dates=[1], dayfirst=True, -# index_col=0, -# header=[0, 1]) -``` - -```python Collapsed="false" - -``` - -```python Collapsed="false" -# raw_data_2020.head() -``` - -```python Collapsed="false" -# raw_data_2020['W92000004', 'Wales'] -``` - -```python Collapsed="false" -raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv', - parse_dates=[1], dayfirst=True, -# index_col=0, - header=[0, 1]) -# raw_data_2019.head() -``` - -```python -rdew = raw_data_2019.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1) -rdew.head() -``` - -```python -rd = rdew.drop(columns=['Total deaths, all ages']).rename( - columns={'Week ended': 'date_up_to', 'W92000004': 'deaths', - 'Week number': 'week'} - ) -rd['year'] = 2019 -rd['nation'] = 'Wales' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python -rd = rdew.loc[:, ['Week ended','Week number']] -rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004'] -rd = rd.rename( - columns={'Week ended': 'date_up_to', 'Week number': 'week'} - ) -rd['year'] = 2019 -rd['nation'] = 'England' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year -``` - -```python Collapsed="false" -raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv', - parse_dates=[1], dayfirst=True, -# index_col=0, - header=[0, 1]) -# raw_data_2018.head() -``` - -```python -rdew = raw_data_2018.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1) -rdew.head() -``` - -```python -rd = rdew.drop(columns=['Total deaths, all ages']).rename( - columns={'Week ended': 'date_up_to', 'W92000004': 'deaths', - 'Week number': 'week'} - ) -rd['year'] = 2018 -rd['nation'] = 'Wales' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python -rd = rdew.loc[:, ['Week ended','Week number']] -rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004'] -rd = rd.rename( - columns={'Week ended': 'date_up_to', 'Week number': 'week'} - ) -rd['year'] = 2018 -rd['nation'] = 'England' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year -``` - -```python Collapsed="false" -raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv', - parse_dates=[1], dayfirst=True, -# index_col=0, - header=[0, 1]) -# raw_data_2017.head() -``` - -```python -rdew = raw_data_2017.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1) -rdew.head() -``` - -```python -rd = rdew.drop(columns=['Total deaths, all ages']).rename( - columns={'Week ended': 'date_up_to', 'W92000004': 'deaths', - 'Week number': 'week'} - ) -rd['year'] = 2017 -rd['nation'] = 'Wales' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python -rd = rdew.loc[:, ['Week ended','Week number']] -rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004'] -rd = rd.rename( - columns={'Week ended': 'date_up_to', 'Week number': 'week'} - ) -rd['year'] = 2017 -rd['nation'] = 'England' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year -``` - -```python - -``` - -```python Collapsed="false" -raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv', - parse_dates=[1], dayfirst=True, -# index_col=0, - header=[0, 1]) -# raw_data_2016.head() -``` - -```python -raw_data_2016.head() -``` - -```python -rdew = raw_data_2016.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1) -rdew.head() -``` - -```python -rd = rdew.drop(columns=['Total deaths, all ages']).rename( - columns={'Week ended': 'date_up_to', 'W92000004': 'deaths', - 'Week number': 'week'} - ) -rd['year'] = 2016 -rd['nation'] = 'Wales' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python -rd = rdew.loc[:, ['Week ended','Week number']] -rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004'] -rd = rd.rename( - columns={'Week ended': 'date_up_to', 'Week number': 'week'} - ) -rd['year'] = 2016 -rd['nation'] = 'England' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python - %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year -``` - -```python Collapsed="false" -raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv', - parse_dates=[1], dayfirst=True, -# index_col=0, - header=[0, 1]) -# raw_data_2015.head() -``` - -```python -rdew = raw_data_2015.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1) -rdew.head() -``` - -```python -rd = rdew.drop(columns=['Total deaths, all ages']).rename( - columns={'Week ended': 'date_up_to', 'W92000004': 'deaths', - 'Week number': 'week'} - ) -rd['year'] = 2015 -rd['nation'] = 'Wales' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python -rd = rdew.loc[:, ['Week ended','Week number']] -rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004'] -rd = rd.rename( - columns={'Week ended': 'date_up_to', 'Week number': 'week'} - ) -rd['year'] = 2015 -rd['nation'] = 'England' -rd.head() -``` - -```python -rd.to_sql( - 'all_causes_deaths', - conn, - if_exists='append', - index=False) -``` - -```python -%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by year, nation -``` - -```sql magic_args="res << select week, year, deaths" -from all_causes_deaths -where nation = 'England' -``` - -```python -deaths_headlines_e = res.DataFrame().pivot(index='week', columns='year', values='deaths') -deaths_headlines_e -``` - -```python - -``` - -```sql magic_args="res << select week, year, deaths" -from all_causes_deaths -where nation = 'Scotland' -``` - -```python -deaths_headlines_s = res.DataFrame().pivot(index='week', columns='year', values='deaths') -deaths_headlines_s -``` - -```sql magic_args="res << select week, year, deaths" -from all_causes_deaths -where nation = 'Wales' -``` - -```python -deaths_headlines_w = res.DataFrame().pivot(index='week', columns='year', values='deaths') -deaths_headlines_w -``` - -```sql magic_args="res << select week, year, deaths" -from all_causes_deaths -where nation = 'Northern Ireland' -``` - -```python -deaths_headlines_i = res.DataFrame().pivot(index='week', columns='year', values='deaths') -deaths_headlines_i -``` - -```python Collapsed="false" -deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s -deaths_headlines -``` - -```python -deaths_headlines_e.columns -``` - -```python -deaths_headlines_e['previous_mean'] = deaths_headlines_e[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1) -deaths_headlines_w['previous_mean'] = deaths_headlines_w[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1) -deaths_headlines_s['previous_mean'] = deaths_headlines_s[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1) -deaths_headlines_i['previous_mean'] = deaths_headlines_i[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1) -deaths_headlines['previous_mean'] = deaths_headlines[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1) -deaths_headlines -``` - -```python Collapsed="false" -deaths_headlines[[2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8)) -``` - -```python Collapsed="false" -deaths_headlines[[2020, 'previous_mean']].plot(figsize=(10, 8)) -``` - -```python Collapsed="false" -deaths_headlines_i.plot() -``` - -```python -deaths_headlines[2020].sum() - deaths_headlines.previous_mean.sum() -``` - -```python Collapsed="false" -# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas# - -dhna = deaths_headlines.dropna() - -fig = plt.figure(figsize=(10, 10)) -ax = fig.add_subplot(111, projection="polar") - -theta = np.roll( - np.flip( - np.arange(len(dhna))/float(len(dhna))*2.*np.pi), - 14) -# l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0 -# l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72 -# l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144 -# l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216 -# l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288 -l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0 -l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d -l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144 -l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216 -l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288 - -lmean, = ax.plot(theta, dhna['previous_mean'], color="black", linestyle='dashed', label="mean") - -l20, = ax.plot(theta, dhna[2020], color="red", label="2020") - -# deaths_headlines.total_2019.plot(ax=ax) - -def _closeline(line): - x, y = line.get_data() - x = np.concatenate((x, [x[0]])) - y = np.concatenate((y, [y[0]])) - line.set_data(x, y) - -[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]] - - -ax.set_xticks(theta) -ax.set_xticklabels(dhna.index) -plt.legend() -plt.title("Deaths by week over years, all UK") -plt.savefig('deaths-radar.png') -plt.show() -``` - - -# Plots for UK nations - - -```python Collapsed="false" -# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas# - -fig = plt.figure(figsize=(10, 10)) -ax = fig.add_subplot(111, projection="polar") - -theta = np.roll( - np.flip( - np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi), - 14) -l15, = ax.plot(theta, deaths_headlines_e[2015], color="#e47d7d", label="2015") # 0 -l16, = ax.plot(theta, deaths_headlines_e[2016], color="#afc169", label="2016") # 72 , d0e47d -l17, = ax.plot(theta, deaths_headlines_e[2017], color="#7de4a6", label="2017") # 144 -l18, = ax.plot(theta, deaths_headlines_e[2018], color="#7da6e4", label="2018") # 216 -l19, = ax.plot(theta, deaths_headlines_e[2019], color="#d07de4", label="2019") # 288 - -lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean") - -l20, = ax.plot(theta, deaths_headlines_e[2020], color="red", label="2020") - -# deaths_headlines.total_2019.plot(ax=ax) - -def _closeline(line): - x, y = line.get_data() - x = np.concatenate((x, [x[0]])) - y = np.concatenate((y, [y[0]])) - line.set_data(x, y) - -[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]] - - -ax.set_xticks(theta) -ax.set_xticklabels(deaths_headlines_e.index) -plt.legend() -plt.title("Deaths by week over years, England") -plt.savefig('deaths-radar_england.png') -plt.show() -``` - -```python Collapsed="false" -# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas# - -fig = plt.figure(figsize=(10, 10)) -ax = fig.add_subplot(111, projection="polar") - -theta = np.roll( - np.flip( - np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi), - 14) -l15, = ax.plot(theta, deaths_headlines_w[2015], color="#e47d7d", label="2015") # 0 -l16, = ax.plot(theta, deaths_headlines_w[2016], color="#afc169", label="2016") # 72 , d0e47d -l17, = ax.plot(theta, deaths_headlines_w[2017], color="#7de4a6", label="2017") # 144 -l18, = ax.plot(theta, deaths_headlines_w[2018], color="#7da6e4", label="2018") # 216 -l19, = ax.plot(theta, deaths_headlines_w[2019], color="#d07de4", label="2019") # 288 - -lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean") - -l20, = ax.plot(theta, deaths_headlines_w[2020], color="red", label="2020") - - -def _closeline(line): - x, y = line.get_data() - x = np.concatenate((x, [x[0]])) - y = np.concatenate((y, [y[0]])) - line.set_data(x, y) - -[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]] - - -ax.set_xticks(theta) -ax.set_xticklabels(deaths_headlines_w.index) -plt.legend() -plt.title("Deaths by week over years, Wales") -plt.savefig('deaths-radar_wales.png') -plt.show() -``` - -```python Collapsed="false" -# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas# - -fig = plt.figure(figsize=(10, 10)) -ax = fig.add_subplot(111, projection="polar") - -theta = np.roll( - np.flip( - np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi), - 14) -l15, = ax.plot(theta, deaths_headlines_s[2015], color="#e47d7d", label="2015") # 0 -l16, = ax.plot(theta, deaths_headlines_s[2016], color="#afc169", label="2016") # 72 , d0e47d -l17, = ax.plot(theta, deaths_headlines_s[2017], color="#7de4a6", label="2017") # 144 -l18, = ax.plot(theta, deaths_headlines_s[2018], color="#7da6e4", label="2018") # 216 -l19, = ax.plot(theta, deaths_headlines_s[2019], color="#d07de4", label="2019") # 288 - -lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean") - -l20, = ax.plot(theta, deaths_headlines_s[2020], color="red", label="2020") - - -def _closeline(line): - x, y = line.get_data() - x = np.concatenate((x, [x[0]])) - y = np.concatenate((y, [y[0]])) - line.set_data(x, y) - -[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]] - - -ax.set_xticks(theta) -ax.set_xticklabels(deaths_headlines_s.index) -plt.legend() -plt.title("Deaths by week over years, Scotland") -plt.savefig('deaths-radar_scotland.png') -plt.show() -``` - -```python Collapsed="false" -# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas# - -fig = plt.figure(figsize=(10, 10)) -ax = fig.add_subplot(111, projection="polar") - -theta = np.roll( - np.flip( - np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi), - 14) -l15, = ax.plot(theta, deaths_headlines_i[2015], color="#e47d7d", label="2015") # 0 -l16, = ax.plot(theta, deaths_headlines_i[2016], color="#afc169", label="2016") # 72 , d0e47d -l17, = ax.plot(theta, deaths_headlines_i[2017], color="#7de4a6", label="2017") # 144 -l18, = ax.plot(theta, deaths_headlines_i[2018], color="#7da6e4", label="2018") # 216 -l19, = ax.plot(theta, deaths_headlines_i[2019], color="#d07de4", label="2019") # 288 - -lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean") - -l20, = ax.plot(theta, deaths_headlines_i[2020], color="red", label="2020") - - -def _closeline(line): - x, y = line.get_data() - x = np.concatenate((x, [x[0]])) - y = np.concatenate((y, [y[0]])) - line.set_data(x, y) - -[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]] - - -ax.set_xticks(theta) -ax.set_xticklabels(deaths_headlines_i.index) -plt.legend() -plt.title("Deaths by week over years, Northern Ireland") -plt.savefig('deaths-radar_northern_ireland.png') -plt.show() -``` - -```python Collapsed="false" - -``` - -```python Collapsed="false" - -``` diff --git a/hospital_data.py b/hospital_data.py index 3771cfa..3354b78 100644 --- a/hospital_data.py +++ b/hospital_data.py @@ -14,6 +14,7 @@ import sqlalchemy import matplotlib as mpl import matplotlib.pyplot as plt plt.ioff() +# # %matplotlib inline # %% @@ -98,3 +99,6 @@ plt.savefig('cases_admissions_deaths.png') + +# %% [markdown] +# diff --git a/international_comparison-large-countries.md b/international_comparison-large-countries.md new file mode 100644 index 0000000..9a750df --- /dev/null +++ b/international_comparison-large-countries.md @@ -0,0 +1,234 @@ +--- +jupyter: + jupytext: + formats: ipynb,md + text_representation: + extension: .md + format_name: markdown + format_version: '1.2' + jupytext_version: 1.9.1 + kernelspec: + display_name: Python 3 + language: python + name: python3 +--- + + +Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide) + + +```python Collapsed="false" +import itertools +import collections +import json +import pandas as pd +import numpy as np +from scipy.stats import gmean +import datetime + +from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float +from sqlalchemy import create_engine + +import matplotlib as mpl +import matplotlib.pyplot as plt +%matplotlib inline +%load_ext sql +``` + +```python Collapsed="false" +connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid' +``` + +```python Collapsed="false" +%sql $connection_string +``` + +```python +engine = create_engine(connection_string) +``` + +```python Collapsed="false" +DEATH_COUNT_THRESHOLD = 10 +COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split())) +COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split()) +COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split()) +# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split() + +COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA + 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT', + 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN', + 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE') +COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS)) +COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS)) +``` + +```python Collapsed="false" +# res = %sql select report_date, geo_id, deaths_weekly, culm_deaths from weekly_cases where geo_id in $COUNTRIES_CORE order by report_date, geo_id +``` + +```python Collapsed="false" +# country_data = res.DataFrame() +# country_data['report_date'] = country_data.report_date.astype('datetime64[ns]') +# country_data.set_index('report_date', inplace=True) +# country_data.tail(10) +``` + +```python +%sql select geo_id, country_name, population_2019 from countries where population_2019 > 30000000 +``` + +```python +query_string = f'''select report_date, weekly_cases.geo_id, country_name, + 10000000.0 * deaths_weekly / population_2019 as deaths_weekly_pc, + 10000000.0 * culm_deaths / population_2019 as culm_deaths_pc +from weekly_cases, countries +where weekly_cases.geo_id = countries.geo_id +and countries.population_2019 > 20000000 +order by report_date, geo_id''' + +country_data = pd.read_sql_query(query_string, + engine, + index_col = 'report_date', + parse_dates = ['report_date'] + ) +``` + +```python +country_data.tail() +``` + +```python +latest_date = country_data.index.max() +latest_date +``` + +```python +# highest_deaths_geo_ids = country_data.loc[latest_date, :].nlargest(10, 'culm_deaths_pc')['geo_id'].values +highest_deaths_geo_ids = ( + country_data.loc[latest_date, :] + .nlargest(10, 'culm_deaths_pc') + ['country_name'] + .values +) + +highest_deaths_geo_ids +``` + +```python +high_deaths_country_data = ( + country_data[ + country_data.country_name.isin(highest_deaths_geo_ids)] +) +high_deaths_country_data.tail() +``` + +```python Collapsed="false" +deaths_culm = high_deaths_country_data.pivot(columns='country_name', + values='culm_deaths_pc') +deaths_culm.tail() +``` + +```python Collapsed="false" +deaths_culm.plot() +``` + +```python Collapsed="false" +ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear") +ax.set_xlabel(f"Date") +for c in highest_deaths_geo_ids: + lvi = deaths_culm[c].last_valid_index() + ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}") +# plt.savefig('covid_deaths_total_linear.png') +``` + +```python +deaths_culm.loc[latest_date].sort_values() +``` + +```python Collapsed="false" +deaths_weekly = high_deaths_country_data.pivot(columns='country_name', values='deaths_weekly_pc') +deaths_weekly.tail() +``` + +```python Collapsed="false" +deaths_weekly.plot() +``` + +```python Collapsed="false" +ax = deaths_weekly.loc['2020-03-01':].plot(figsize=(10, 6), title="Deaths per week") +ax.set_xlabel('Date') +for c in highest_deaths_geo_ids: + lvi = deaths_weekly[c].last_valid_index() + ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c) +# plt.savefig('covid_deaths_per_week.png') +``` + +```python Collapsed="false" +ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100)) +ax.set_xlabel("Date") + +text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5) + +for c in highest_deaths_geo_ids: + lvi = deaths_weekly[c].last_valid_index() +# if c != 'ES': + ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}") +plt.savefig('deaths_by_date_last_6_weeks.png') +``` + +```python +query_string = f'''select report_date, geo_id, +(cast(deaths_weekly as float) / population_2019) deaths_weekly_pc, +(cast(culm_deaths as float) / population_2019) as culm_deaths_pc +from weekly_cases join countries using (geo_id) +where geo_id in {COUNTRIES_CORE} +order by report_date, geo_id''' + +deaths_pc_data = pd.read_sql_query(query_string, + engine, + index_col = 'report_date', + parse_dates = ['report_date'] + ) +deaths_pc_data.tail() +``` + +```python Collapsed="false" +deaths_pc_culm = deaths_pc_data.pivot(columns='geo_id', values='culm_deaths_pc') +deaths_pc_culm.tail() +``` + +```python +ax = deaths_pc_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear") +ax.set_xlabel(f"Date") +for c in COUNTRIES_CORE: + lvi = deaths_pc_culm[c].last_valid_index() + ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc_culm[c][lvi], s = f"{c}: {deaths_pc_culm[c][lvi]:.0f}") +# plt.savefig('covid_deaths_total_linear.png') +``` + +```python Collapsed="false" +deaths_pc = deaths_pc_data.pivot(columns='geo_id', values='deaths_weekly_pc') +deaths_pc.tail() +``` + +```python +ax = deaths_pc.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear") +ax.set_xlabel(f"Date") +for c in COUNTRIES_CORE: + lvi = deaths_pc[c].last_valid_index() + ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]:.0f}") +# plt.savefig('covid_deaths_total_linear.png') +``` + +```python +ax = deaths_pc.iloc[-6:].plot(figsize=(10, 6), title="Deaths per million, linear") +ax.set_xlabel(f"Date") +for c in COUNTRIES_CORE: + lvi = deaths_pc[c].last_valid_index() + ax.text(x = lvi, y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}") +# plt.savefig('covid_deaths_total_linear.png') +``` + +```python + +``` diff --git a/international_comparison.md b/international_comparison.md deleted file mode 100644 index 801ee4c..0000000 --- a/international_comparison.md +++ /dev/null @@ -1,193 +0,0 @@ ---- -jupyter: - jupytext: - formats: ipynb,md - text_representation: - extension: .md - format_name: markdown - format_version: '1.2' - jupytext_version: 1.9.1 - kernelspec: - display_name: Python 3 - language: python - name: python3 ---- - - -Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide) - - -```python Collapsed="false" -import itertools -import collections -import json -import pandas as pd -import numpy as np -from scipy.stats import gmean -import datetime - -from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float -from sqlalchemy import create_engine - -import matplotlib as mpl -import matplotlib.pyplot as plt -%matplotlib inline -%load_ext sql -``` - -```python Collapsed="false" -connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid' -``` - -```python Collapsed="false" -%sql $connection_string -``` - -```python -engine = create_engine(connection_string) -``` - -```python Collapsed="false" -DEATH_COUNT_THRESHOLD = 10 -COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split())) -COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split()) -COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split()) -# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split() - -COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA - 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT', - 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN', - 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE') -COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS)) -COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS)) -``` - -```python Collapsed="false" -# res = %sql select report_date, geo_id, deaths_weekly, culm_deaths from weekly_cases where geo_id in $COUNTRIES_CORE order by report_date, geo_id -``` - -```python Collapsed="false" -# country_data = res.DataFrame() -# country_data['report_date'] = country_data.report_date.astype('datetime64[ns]') -# country_data.set_index('report_date', inplace=True) -# country_data.tail(10) -``` - -```python -query_string = f'''select report_date, geo_id, deaths_weekly, culm_deaths -from weekly_cases -where geo_id in {COUNTRIES_CORE} -order by report_date, geo_id''' - -country_data = pd.read_sql_query(query_string, - engine, - index_col = 'report_date', - parse_dates = ['report_date'] - ) -``` - -```python Collapsed="false" -deaths_culm = country_data.pivot(columns='geo_id', values='culm_deaths') -deaths_culm.tail() -``` - -```python Collapsed="false" -deaths_culm.plot() -``` - -```python Collapsed="false" -ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear") -ax.set_xlabel(f"Date") -for c in COUNTRIES_CORE: - lvi = deaths_culm[c].last_valid_index() - ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}") -plt.savefig('covid_deaths_total_linear.png') -``` - -```python Collapsed="false" -deaths_weekly = country_data.pivot(columns='geo_id', values='deaths_weekly') -deaths_weekly.tail() -``` - -```python Collapsed="false" -deaths_weekly.plot() -``` - -```python Collapsed="false" -ax = deaths_weekly.loc['2020-03-01':, COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per week") -ax.set_xlabel('Date') -for c in COUNTRIES_CORE: - lvi = deaths_weekly[c].last_valid_index() - ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c) -plt.savefig('covid_deaths_per_week.png') -``` - -```python Collapsed="false" -ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100)) -ax.set_xlabel("Date") - -text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5) - -for c in COUNTRIES_CORE: - lvi = deaths_weekly[c].last_valid_index() -# if c != 'ES': - ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}") -plt.savefig('deaths_by_date_last_6_weeks.png') -``` - -```python -query_string = f'''select report_date, geo_id, -(cast(deaths_weekly as float) / population_2019) deaths_weekly_pc, -(cast(culm_deaths as float) / population_2019) as culm_deaths_pc -from weekly_cases join countries using (geo_id) -where geo_id in {COUNTRIES_CORE} -order by report_date, geo_id''' - -deaths_pc_data = pd.read_sql_query(query_string, - engine, - index_col = 'report_date', - parse_dates = ['report_date'] - ) -deaths_pc_data.tail() -``` - -```python Collapsed="false" -deaths_pc_culm = deaths_pc_data.pivot(columns='geo_id', values='culm_deaths_pc') -deaths_pc_culm.tail() -``` - -```python -ax = deaths_pc_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear") -ax.set_xlabel(f"Date") -for c in COUNTRIES_CORE: - lvi = deaths_pc_culm[c].last_valid_index() - ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc_culm[c][lvi], s = f"{c}: {deaths_pc_culm[c][lvi]*10**6:.0f}") -# plt.savefig('covid_deaths_total_linear.png') -``` - -```python Collapsed="false" -deaths_pc = deaths_pc_data.pivot(columns='geo_id', values='deaths_weekly_pc') -deaths_pc.tail() -``` - -```python -ax = deaths_pc.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear") -ax.set_xlabel(f"Date") -for c in COUNTRIES_CORE: - lvi = deaths_pc[c].last_valid_index() - ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}") -# plt.savefig('covid_deaths_total_linear.png') -``` - -```python -ax = deaths_pc.iloc[-6:].plot(figsize=(10, 6), title="Deaths per million, linear") -ax.set_xlabel(f"Date") -for c in COUNTRIES_CORE: - lvi = deaths_pc[c].last_valid_index() - ax.text(x = lvi, y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}") -# plt.savefig('covid_deaths_total_linear.png') -``` - -```python - -``` diff --git a/international_comparison.py b/international_comparison.py index 2135609..18880f5 100644 --- a/international_comparison.py +++ b/international_comparison.py @@ -18,6 +18,7 @@ from sqlalchemy import create_engine import matplotlib as mpl import matplotlib.pyplot as plt plt.ioff() +# # %matplotlib inline # %% diff --git a/publish.md b/publish.md deleted file mode 100644 index 6b4ed59..0000000 --- a/publish.md +++ /dev/null @@ -1,343 +0,0 @@ ---- -jupyter: - jupytext: - formats: ipynb,md - text_representation: - extension: .md - format_name: markdown - format_version: '1.2' - jupytext_version: 1.9.1 - kernelspec: - display_name: Python 3 - language: python - name: python3 ---- - - -Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide) - - -```python Collapsed="false" -import itertools -import collections -import json -import pandas as pd -import numpy as np -from scipy.stats import gmean -import datetime - -import matplotlib as mpl -import matplotlib.pyplot as plt -%matplotlib inline -%load_ext sql -``` - -```python Collapsed="false" -connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid' -``` - -```python Collapsed="false" -%sql $connection_string -``` - -```python Collapsed="false" -# DEATH_COUNT_THRESHOLD = 10 -COUNTRIES_CORE = tuple('IT DE UK ES IE FR BE'.split()) -# COUNTRIES_NORDIC = 'SE NO DK FI UK'.split() -# COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split() -# # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split() - -# COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA -# 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT', -# 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN', -# 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE'] -# COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS)) -# COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS)) -``` - - -# Write results to summary file - - -```python Collapsed="false" -last_uk_date = %sql select date from uk_data order by date desc limit 1 -last_uk_date = last_uk_date[0][0] -last_uk_date -``` - -```python Collapsed="false" -last_intl_date = %sql select report_date from weekly_cases order by report_date desc limit 1 -last_intl_date = last_intl_date[0][0] -last_intl_date -``` - -```sql magic_args="results << select date, new_cases, new_deaths " -from uk_data -order by date -``` - -```python -uk_data = results.DataFrame() -uk_data['date'] = uk_data.date.astype('datetime64[ns]') -uk_data.set_index('date', inplace=True) -uk_data.tail(10) -``` - -```python -most_recent_uk_date = %sql select max(date) from uk_data -most_recent_uk_date = most_recent_uk_date[0][0] -most_recent_uk_date -``` - -```sql Collapsed="false" magic_args="results << select geo_id, country_name, culm_deaths " -from weekly_cases join countries using (geo_id) -where geo_id in :COUNTRIES_CORE - and (geo_id, report_date) in (select geo_id, max(report_date) from weekly_cases group by geo_id) -order by geo_id -``` - -```python -thirty_days_ago = most_recent_uk_date - datetime.timedelta(days=30) -thirty_days_ago -``` - -```python -# thirty_days_ago = most_recent_uk_date - datetime.interval(days=30) -total_uk_deaths = %sql select sum(new_deaths) from uk_data -total_uk_deaths = total_uk_deaths[0][0] -deaths_in_past_month = %sql select sum(new_deaths) from uk_data where date > :thirty_days_ago -deaths_in_past_month = deaths_in_past_month[0][0] -cases_in_past_month = %sql select sum(new_cases) from uk_data where date > :thirty_days_ago -cases_in_past_month = cases_in_past_month[0][0] -total_uk_deaths, deaths_in_past_month, cases_in_past_month -``` - -```python Collapsed="false" -with open('covid_summary.md', 'w') as f: - f.write('% Covid death data summary\n') - f.write('% Neil Smith\n') - f.write(f'% Created on {datetime.datetime.now().strftime("%Y-%m-%d")}\n') - f.write('\n') - f.write(f'> Last UK data from {last_uk_date.strftime("%d %b %Y")}. ') - f.write(f' Last international data from {last_intl_date.strftime("%d %b %Y")}.\n') - f.write('\n') -``` - -```python Collapsed="false" -with open('covid_summary.md', 'a') as f: - f.write('## Headlines (UK data)\n') - f.write('\n') - f.write('| []() | |\n') - f.write('|:---|---:|\n') - f.write(f'| Deaths reported so far | {total_uk_deaths} | \n') - f.write(f'| Deaths in last 30 days | {deaths_in_past_month} | \n') - f.write(f'| Cases in last 30 days | {cases_in_past_month} | \n') -# f.write(f'| Total Covid deaths to date (estimated) | {uk_deaths_to_date:.0f} |\n') - f.write('\n') -``` - -```python Collapsed="false" -with open('covid_summary.md', 'a') as f: - f.write('## International comparison\n') - f.write('\n') - f.write(f'Based on weekly data. Last data from {last_intl_date.strftime("%d %b %Y")}\n') - f.write('\n') - f.write('### Total deaths\n') - f.write('\n') - f.write('![Total deaths](covid_deaths_total_linear.png)\n') - f.write('\n') - f.write('| Country ID | Country name | Total deaths |\n') - f.write('|:-----------|:-------------|-------------:|\n') - for c_id, c_name, t_deaths in results: - f.write(f'| {c_id} | {c_name} | {t_deaths} |\n') - f.write('\n') -``` - -```python Collapsed="false" -# with open('covid_summary.md', 'a') as f: -# f.write('## All-causes deaths, UK\n') -# f.write('\n') -# f.write('![All-causes deaths](deaths-radar.png)\n') -# f.write('\n') -# f.write('### True deaths\n') -# f.write('\n') -# f.write(f'The number of deaths reported in official statistics, {uk_covid_deaths}, is an underestimate ' -# 'of the true number of Covid deaths.\n' -# 'This is especially true early in the pandemic, approximately March to May 2020.\n') -# f.write('We can get a better understanding of the impact of Covid by looking at the number of deaths, ' -# 'over and above what would be expected at each week of the year.\n') -# f.write(f'The ONS (and other bodies in Scotland and Northern Ireland) have released data on the number of deaths ' -# f'up to {pd.to_datetime(excess_deaths_data["end_date"]).strftime("%d %B %Y")}.\n\n') -# f.write('If, for each of those weeks, I take the largest of the excess deaths or the reported Covid deaths, ') -# f.write(f'I estimate there have been **{uk_deaths_to_date}** total deaths so far.\n') -# f.write('\n') -``` - -```python Collapsed="false" -with open('covid_summary.md', 'a') as f: - f.write('### Deaths per week\n') - f.write('\n') - f.write('![Deaths per week](covid_deaths_per_week.png)\n') - f.write('\n') - f.write('![Deaths per week, last 6 weeks](deaths_by_date_last_6_weeks.png)\n') - f.write('\n') -``` - -```python -with open('covid_summary.md', 'a') as f: - f.write('## UK data\n') - f.write('\n') - f.write('### Total deaths\n') - f.write('\n') - f.write(f'Deaths reported up to {last_uk_date.strftime("%d %b %Y")}: {total_uk_deaths}\n') - f.write('\n') - f.write('![Total deaths](cases_and_deaths.png)\n') - f.write('\n') - f.write('![Cases and deaths in last 60 days](cases_and_deaths_last_60_days.png)\n') - f.write('\n') - f.write('![Deaths compared to past five years](deaths-radar-2020.png)\n') - f.write('\n') - -``` - -```python Collapsed="false" -with open('hospital_normalisation_date.json') as f: - hospital_normalisation_date_data = json.load(f) -``` - -```python Collapsed="false" -with open('covid_summary.md', 'a') as f: - f.write('### Hospital care\n') - f.write(f'Based on a 7-day moving average\n') - f.write('\n') - f.write('![Cases, admissions, deaths](cases_admissions_deaths.png)\n') - f.write('\n') - f.write('Due to the large scale differences between the three ' - 'measures, they are all normalised to show changes ') - f.write(f'since {pd.to_datetime(hospital_normalisation_date_data["hospital_normalisation_date"]).strftime("%d %B %Y")}.\n') - f.write('\n') - f.write('People in hospital, and on mechanical ventilators\n') - f.write('\n') - f.write('![People in hospital and on mechancial ventilators](people_in_hospital.png)\n') - f.write('\n') -``` - -```python Collapsed="false" -with open('covid_summary.md', 'a') as f: - f.write('### Testing effectiveness\n') - f.write('\n') - f.write('A question about testing is whether more detected cases is a result of more tests being ' - 'done or is because the number of cases is increasing. One way of telling the differeence ' - 'is by looking at the fraction of tests that are positive.\n') - f.write('\n') - f.write('![Positive tests and cases](tests_and_cases.png)\n') - f.write('\n') - f.write('Numbers of positive tests and cases, ' - '7-day moving average.\n' - 'Note the different y-axes\n') - f.write('\n') - f.write('![Fraction of tests with positive result](fraction_positive_tests.png)\n') - f.write('\n') - f.write('Fraction of tests with a positive result, both daily figures and ' - '7-day moving average.\n') - f.write('\n') - f.write('\n') - f.write('![Tests against fraction positive, trajectory](fraction_positive_tests_vs_tests.png)\n') - f.write('\n') - f.write('The trajectory of tests done vs fraction positive tests.\n') - f.write('\n') - f.write('Points higher indicate more tests; points to the right indicate more positive tests.' - 'More tests being done with the same infection prevelance will move the point up ' - 'and to the left.\n') - f.write('\n') - f.write('\n') - f.write('![Tests against fraction positive, trajectory](tests_vs_fraction_positive_animation.png)\n') - f.write('\n') -``` - -```python Collapsed="false" - -``` - -```python Collapsed="false" -with open('covid_summary.md', 'a') as f: - f.write('# Data sources\n') - f.write('\n') - f.write('> Covid data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)\n') - f.write('\n') - f.write("""> Population data from: - -* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday. -* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data. -* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.""") - - f.write('\n\n') - f.write('> [Source code available](https://git.njae.me.uk/?p=covid19.git;a=tree)\n') - f.write('\n') - -``` - -```python Collapsed="false" -!pandoc --toc -s covid_summary.md > covid_summary.html -``` - -```python Collapsed="false" -!scp covid_summary.html neil@ogedei:/var/www/scripts.njae.me.uk/covid/index.html -!scp covid_deaths_total_linear.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp cases_and_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp cases_and_deaths_last_60_days.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -# !scp deaths-radar.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp deaths-radar-2020.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp covid_deaths_per_week.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp fraction_positive_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp tests_and_cases.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp deaths_by_date_last_6_weeks.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp fraction_positive_tests_vs_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp tests_vs_fraction_positive_animation.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp people_in_hospital.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp cases_admissions_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -``` - -```python Collapsed="false" -with open('uk_covid_deaths.js', 'w') as f: - f.write(f"document.write('{total_uk_deaths}');") - -with open('uk_deaths_30_days.js', 'w') as f: - f.write(f"document.write('{deaths_in_past_month}');") - -with open('uk_cases_30_days.js', 'w') as f: - f.write(f"document.write('{cases_in_past_month}');") - -# with open('estimated_total_deaths.js', 'w') as f: -# f.write(f"document.write('{uk_deaths_to_date:.0f}');") - -# edut = pd.to_datetime(excess_deaths_data["end_date"]).strftime('%d %B %Y') -# with open('excess_deaths_upto.js', 'w') as f: -# f.write(f"document.write('{edut}');") - -with open('last_uk_date.js', 'w') as f: - f.write(f"document.write('{pd.to_datetime(last_uk_date).strftime('%d %B %Y')}');") - -with open('last_intl_date.js', 'w') as f: - f.write(f"document.write('{pd.to_datetime(last_intl_date).strftime('%d %B %Y')}');") - -``` - -```python Collapsed="false" -# pd.to_datetime(excess_deaths_upto).strftime('%d %B %Y') -``` - -```python Collapsed="false" -!scp uk_covid_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp uk_deaths_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp uk_cases_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -# !scp estimated_total_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -# !scp excess_deaths_upto.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp last_uk_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp last_intl_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -!scp hospital_normalisation_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -``` - -```python Collapsed="false" - -``` diff --git a/publish.py b/publish.py index 8ed4adb..8c97056 100644 --- a/publish.py +++ b/publish.py @@ -16,6 +16,7 @@ import os import matplotlib as mpl import matplotlib.pyplot as plt +# # %matplotlib inline # %% @@ -220,23 +221,7 @@ with open('covid_summary.md', 'a') as f: # %% os.system('pandoc --toc -s covid_summary.md > covid_summary.html') - - -# %% os.system('scp covid_summary.html neil@ogedei:/var/www/scripts.njae.me.uk/covid/index.html') -os.system('scp covid_deaths_total_linear.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp cases_and_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp cases_and_deaths_last_60_days.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp deaths-radar-2021.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp covid_deaths_per_week.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp fraction_positive_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ ') -os.system('scp tests_and_cases.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp deaths_by_date_last_6_weeks.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp fraction_positive_tests_vs_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp tests_vs_fraction_positive_animation.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ ') -os.system('scp people_in_hospital.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp cases_admissions_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/') - # %% with open('uk_covid_deaths.js', 'w') as f: @@ -254,20 +239,35 @@ with open('last_uk_date.js', 'w') as f: with open('last_intl_date.js', 'w') as f: f.write(f"document.write('{pd.to_datetime(last_intl_date).strftime('%d %B %Y')}');") - # %% -os.system('scp uk_covid_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp uk_deaths_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp uk_cases_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -# # !scp estimated_total_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -# # !scp excess_deaths_upto.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/ -os.system('scp last_uk_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp last_intl_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/') -os.system('scp hospital_normalisation_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/') - +transfer_files = [ + 'covid_deaths_total_linear.png', + 'cases_and_deaths.png', + 'cases_and_deaths_last_60_days.png', + 'deaths-radar-2021.png', + 'covid_deaths_per_week.png', + 'fraction_positive_tests.png', + 'tests_and_cases.png', + 'deaths_by_date_last_6_weeks.png', + 'fraction_positive_tests_vs_tests.png', + 'tests_vs_fraction_positive_animation.png', + 'people_in_hospital.png', + 'cases_admissions_deaths.png', + 'uk_covid_deaths.js', + 'uk_deaths_30_days.js', + 'uk_cases_30_days.js', + 'last_uk_date.js', + 'last_intl_date.js', + 'hospital_normalisation_date.js' +] # %% +for f in transfer_files: + if os.path.isfile(f): + os.system(f'scp {f} neil@ogedei:/var/www/scripts.njae.me.uk/covid/') + print(f'Transferred {f}') + else: + print('Cannot transfer {f}: file does not exist') - - +# %% diff --git a/test_and_case_data.py b/test_and_case_data.py index 7de6bf0..d2ef00a 100644 --- a/test_and_case_data.py +++ b/test_and_case_data.py @@ -15,6 +15,7 @@ import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.animation as ani plt.ioff() +# # %matplotlib inline # %% diff --git a/uk_cases_and_deaths.py b/uk_cases_and_deaths.py index bb57ed6..08afb4b 100644 --- a/uk_cases_and_deaths.py +++ b/uk_cases_and_deaths.py @@ -14,6 +14,7 @@ import sqlalchemy import matplotlib as mpl import matplotlib.pyplot as plt plt.ioff() +# # %matplotlib inline # %% diff --git a/uk_deaths_import.md b/uk_deaths_import.md index e775661..61bc1e1 100644 --- a/uk_deaths_import.md +++ b/uk_deaths_import.md @@ -53,11 +53,11 @@ engine = create_engine(connection_string) ``` ```python Collapsed="false" -england_wales_filename = 'uk-deaths-data/copyofpublishedweek042021.xlsx' +england_wales_filename = 'uk-deaths-data/publishedweek052021.xlsx' ``` ```python Collapsed="false" -scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (10 February 2021).xlsx' +scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (15 February 2021).xlsx' ``` ```python