Updated for imported data format

author Neil Smith <neil.git@njae.me.uk>

Fri, 19 Feb 2021 14:27:52 +0000 (14:27 +0000)

committer Neil Smith <neil.git@njae.me.uk>

Fri, 19 Feb 2021 14:27:52 +0000 (14:27 +0000)
author Neil Smith <neil.git@njae.me.uk>
Fri, 19 Feb 2021 14:27:52 +0000 (14:27 +0000)
committer Neil Smith <neil.git@njae.me.uk>
Fri, 19 Feb 2021 14:27:52 +0000 (14:27 +0000)
diff --git a/covid19.sublime-project b/covid19.sublime-project

new file mode 100644 (file)

index 0000000..24db303
--- /dev/null
+++ b/covid19.sublime-project
@@ -0,0 +1,8 @@
+{
+       "folders":
+       [
+               {
+                       "path": "."
+               }
+       ]
+}
diff --git a/data_import.md b/data_import.md

deleted file mode 100644 (file)

index 0a680eb..0000000
--- a/data_import.md
+++ /dev/null
@@ -1,742 +0,0 @@
----
-jupyter:
-  jupytext:
-    formats: ipynb,md
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.2'
-      jupytext_version: 1.9.1
-  kernelspec:
-    display_name: Python 3
-    language: python
-    name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
-<!-- #endregion -->
-
-```python Collapsed="false"
-from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
-from sqlalchemy import create_engine
-```
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-```
-
-```python Collapsed="false"
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python Collapsed="false"
-eng = create_engine(connection_string)
-engine = eng.execution_options(isolation_level="AUTOCOMMIT")
-```
-
-```python Collapsed="false"
-DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
-COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
-COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-
-COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
-       'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
-       'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
-       'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
-COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
-```
-
-```python Collapsed="false"
-!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
-```
-
-```python Collapsed="false"
-# First col is a date, treat geoId of NA as 'Namibia', not "NA" value
-raw_data = pd.read_csv('covid.csv', 
-                       parse_dates=[0], dayfirst=True,
-                       keep_default_na=False, na_values = [''],
-#                        dtype = {'day': np.int64, 
-#                                 'month': np.int64, 
-#                                 'year': np.int64, 
-#                                 'cases': np.int64, 
-#                                 'deaths': np.int64, 
-#                                 'countriesAndTerritories': str, 
-#                                 'geoId': str, 
-#                                 'countryterritoryCode': str, 
-#                                 'popData2019': np.int64, 
-#                                 'continentExp': str, 
-#                                 }
-                      )
-```
-
-```python Collapsed="false"
-raw_data.size
-```
-
-```python Collapsed="false"
-raw_data.fillna(0, inplace=True)
-```
-
-```python Collapsed="false"
-raw_data.head()
-```
-
-```python Collapsed="false"
-raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
-                        'countriesAndTerritories': 'country_name',
-                        'countryterritoryCode': 'country_territory_code',
-                        'popData2019': 'population_2019',
-                        'continentExp': 'continent'}, inplace=True)
-```
-
-```python Collapsed="false"
-raw_data.columns
-```
-
-```python Collapsed="false"
-raw_data.dtypes
-```
-
-```python Collapsed="false"
-raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]
-```
-
-```python Collapsed="false"
-raw_data[['country_name', 'geo_id', 'country_territory_code',
-       'population_2019', 'continent']]
-```
-
-```python Collapsed="false"
-raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(
-    'weekly_cases',
-    engine,
-    if_exists='replace',
-    index=False,
-    chunksize=500,
-    dtype={
-        "report_date": Date,
-        "cases_weekly": Integer,
-        "deaths_weekly": Integer,
-        "geo_id": String,
-        "notification_rate_per_100000_population_14-days": Float
-    }
-)
-```
-
-```python Collapsed="false"
-raw_data[['country_name', 'geo_id', 'country_territory_code',
-       'population_2019', 'continent']].drop_duplicates().to_sql(
-    'countries',
-    engine,
-    if_exists='replace',
-    index=False,
-    chunksize=500,
-    dtype={
-        "country_name": Text,
-        "geo_id": String,
-        "country_territory_code": String,
-        "population_2019": Integer,
-        "continent": Text
-    }
-)
-```
-
-```python Collapsed="false"
-# %sql select geo_id from weekly_cases limit 10
-```
-
-```python Collapsed="false"
-# %%sql alter table weekly_cases add primary key (geo_id, report_date);
-# alter table countries add primary key (geo_id);
-# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);
-# alter table weekly_cases add culm_cases integer;
-# alter table weekly_cases add culm_deaths integer;
-```
-
-```python
-with engine.connect() as connection:
-    connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')
-    connection.execute('alter table countries add primary key (geo_id);')
-    connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')
-    connection.execute('alter table weekly_cases add culm_cases integer;')
-    connection.execute('alter table weekly_cases add culm_deaths integer;')
-```
-
-```python Collapsed="false"
-# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10
-```
-
-```python Collapsed="false"
-# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10
-```
-
-```python Collapsed="false"
-%sql select * from weekly_cases limit 10
-```
-
-```python Collapsed="false"
-# %%sql
-# with culm as 
-#     (select report_date, geo_id,
-#         sum(cases_weekly) over (partition by geo_id 
-#                                 order by report_date) as culm_data
-#      from weekly_cases
-#     )
-# update weekly_cases
-#     set culm_cases = culm_data
-#     from culm
-#     where weekly_cases.report_date = culm.report_date and
-#           weekly_cases.geo_id = culm.geo_id
-```
-
-```python Collapsed="false"
-query_string = '''with culm as 
-    (select report_date, geo_id,
-        sum(cases_weekly) over (partition by geo_id 
-                                order by report_date) as culm_data
-     from weekly_cases
-    )
-update weekly_cases
-    set culm_cases = culm_data
-    from culm
-    where weekly_cases.report_date = culm.report_date and
-          weekly_cases.geo_id = culm.geo_id'''
-with engine.connect() as connection:
-    connection.execute(query_string)
-```
-
-```python Collapsed="false"
-# %%sql
-# with culm as 
-#     (select report_date, geo_id,
-#         sum(deaths_weekly) over (partition by geo_id 
-#                                 order by report_date) as culm_data
-#      from weekly_cases
-#     )
-# update weekly_cases
-#     set culm_deaths = culm_data
-#     from culm
-#     where weekly_cases.report_date = culm.report_date and
-#           weekly_cases.geo_id = culm.geo_id
-```
-
-```python Collapsed="false"
-query_string = '''with culm as 
-    (select report_date, geo_id,
-        sum(deaths_weekly) over (partition by geo_id 
-                                order by report_date) as culm_data
-     from weekly_cases
-    )
-update weekly_cases
-    set culm_deaths = culm_data
-    from culm
-    where weekly_cases.report_date = culm.report_date and
-          weekly_cases.geo_id = culm.geo_id'''
-with engine.connect() as connection:
-    connection.execute(query_string)
-```
-
-```python Collapsed="false"
-uk_query_string = (
-"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
-"metric=covidOccupiedMVBeds&"
-"metric=newAdmissions&"
-"metric=newCasesBySpecimenDate&"
-"metric=hospitalCases&"
-"metric=newDeaths28DaysByPublishDate&"
-"format=csv"
-)
-
-!curl "$uk_query_string" > uk_data.csv
-```
-
-```python Collapsed="false"
-test_query_string = (
-"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
-"metric=newPCRTestsByPublishDate&"
-"metric=newTestsByPublishDate&"
-"metric=newPillarOneTwoTestsByPublishDate&"
-"format=csv"
-)
-!curl "$test_query_string" > test_data.csv
-```
-
-```python Collapsed="false"
-# hospital_query_string = (
-# "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
-# "metric=newAdmissions&"
-# "format=csv"
-# )
-# !curl "$hospital_query_string" > hospital_admissions.csv
-```
-
-```python Collapsed="false"
-# hospital_query_string = (
-# "https://api.coronavirus.data.gov.uk/v1/data?"
-# "filters=areaName=United%2520Kingdom;areaType=overview&"
-# "structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,"
-# "%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv"
-# )
-    
-# !curl "$hospital_query_string" | gunzip > hospital_admissions.csv
-```
-
-```python Collapsed="false"
-uk_data = pd.read_csv('uk_data.csv', 
-                       parse_dates=[0], dayfirst=True)
-uk_data
-```
-
-```python Collapsed="false"
-test_data = pd.read_csv('test_data.csv', 
-                       parse_dates=[0], dayfirst=True)
-test_data
-```
-
-```python Collapsed="false"
-test_data.columns
-```
-
-```python Collapsed="false"
-uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',
-       'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')
-```
-
-```python Collapsed="false"
-uk_data.columns
-```
-
-```python Collapsed="false"
-uk_data.rename(
-    columns={
-        'covidOccupiedMVBeds': 'ventilator_beds',
-        'newCasesBySpecimenDate': 'new_cases',
-        'hospitalCases': 'hospital_cases', 
-        'newDeaths28DaysByPublishDate': 'new_deaths',
-        'newAdmissions': 'new_admissions',
-        'newPCRTestsByPublishDate': 'new_pcr_tests',
-        'newTestsByPublishDate': 'new_tests',
-        'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'
-    }, inplace=True)
-```
-
-```python Collapsed="false"
-uk_data.dtypes
-```
-
-```python Collapsed="false"
- uk_data.columns
-```
-
-```python Collapsed="false"
-uk_data[['date', 
-         'hospital_cases', 'ventilator_beds',
-         'new_cases', 'new_deaths', 
-         'hospital_cases', 'new_admissions',
-         'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'
-        ]].to_sql(
-    'uk_data',
-    engine,
-    if_exists='replace',
-    index=False,
-    chunksize=500,
-    dtype={
-        "date": Date,
-        "hospital_cases": Integer,
-        "ventilator_beds": Integer,
-        "new_cases": Integer,
-        "hospital_cases": Integer,
-        "new_deaths": Integer,
-        "new_admissions": Integer,
-        'new_pcr_tests': Integer, 
-        'new_tests': Integer, 
-        'new_pillar_1_2_tests': Integer
-    }
-)
-```
-
-```python Collapsed="false"
-# %sql select * from uk_data order by date desc limit 10
-```
-
-```python Collapsed="false"
-query_string = '''drop table if exists uk_data_7;
-create table uk_data_7 
-(date date primary key,
- hospital_cases real,
- ventilator_beds real,
- new_cases real,
- new_deaths real,
- new_admissions real,
- new_pcr_tests real,
- new_tests real,
- new_pillar_1_2_tests real
-);'''
-
-with engine.connect() as connection:
-    connection.execute(query_string)
-```
-
-```python
-update_string = '''with ownd as (
-    select date,
-        avg(hospital_cases) over wnd as w_hospital_cases,
-        avg(ventilator_beds) over wnd as w_ventilator_beds,
-        avg(new_cases) over wnd as w_new_cases,
-        avg(new_deaths) over wnd as w_new_deaths,
-        avg(new_admissions) over wnd as w_new_admissions,
-        avg(new_pcr_tests) over wnd as w_new_pcr_tests,
-        avg(new_tests) over wnd as w_new_tests,
-        avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
-        count(*) over wnd as w_size
-    from uk_data
-    window wnd as (
-        order by uk_data.date
-        rows between 3 preceding and 3 following
-    )
-)
-insert into uk_data_7(date, 
-    hospital_cases, 
-    ventilator_beds, 
-    new_cases,
-    new_deaths,
-    new_admissions, 
-    new_pcr_tests, 
-    new_tests, 
-    new_pillar_1_2_tests
-    )
-(select date,
-        w_hospital_cases,
-        w_ventilator_beds,
-        w_new_cases,
-        w_new_deaths,
-        w_new_admissions,
-        w_new_pcr_tests,
-        w_new_tests,
-        w_new_pillar_1_2_tests
-    from ownd
-    where w_size = 7
-)'''
-with engine.connect() as connection:
-    connection.execute(update_string)
-```
-
-```python Collapsed="false"
-# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)
-# values (
-#     select date, 
-#     avg(ventilator_beds) over (order by date rows between 6 preceding and current row)
-#     from uk_data
-# )
-```
-
-```python Collapsed="false"
-# query_string = '''insert into uk_data_7(date, hospital_cases)
-#     select uk_data.date, 
-#         avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases
-#     from uk_data'''
-# with engine.connect() as connection:
-#     connection.execute(query_string)   
-```
-
-```python Collapsed="false"
-# %%sql 
-# with m7 as 
-#     (select uk_data.date as date7, 
-#         avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7
-#      from uk_data
-#     )
-# update uk_data_7
-#     set ventilator_beds = nc7
-#     from m7
-#     where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as 
-#     (select uk_data.date as date7, 
-#         avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7
-#      from uk_data
-#     )
-# update uk_data_7
-#     set ventilator_beds = nc7
-#     from m7
-#     where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as 
-#     (select uk_data.date as date7, 
-#         avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7
-#      from uk_data
-#     )
-# update uk_data_7
-#     set new_cases = nc7
-#     from m7
-#     where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as 
-#     (select uk_data.date as date7, 
-#         avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-#      from uk_data
-#     )
-# update uk_data_7
-#     set hospital_cases = d7
-#     from m7
-#     where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as 
-#     (select uk_data.date as date7, 
-#         avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-#      from uk_data
-#     )
-# update uk_data_7
-#     set new_deaths = d7
-#     from m7
-#     where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as 
-#     (select uk_data.date as date7, 
-#         avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-#      from uk_data
-#     )
-# update uk_data_7
-#     set new_admissions = d7
-#     from m7
-#     where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as 
-#     (select uk_data.date as date7, 
-#         avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-#      from uk_data
-#     )
-# update uk_data_7
-#     set new_pcr_tests = d7
-#     from m7
-#     where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as 
-#     (select uk_data.date as date7, 
-#         avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-#      from uk_data
-#     )
-# update uk_data_7
-#     set new_tests = d7
-#     from m7
-#     where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with m7 as 
-#     (select uk_data.date as date7, 
-#         avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
-#      from uk_data
-#     )
-# update uk_data_7
-#     set new_pillar_1_2_tests = d7
-#     from m7
-#     where uk_data_7.date = m7.date7
-```
-
-```python Collapsed="false"
-# %%sql
-# with wnd as
-#     (   select date, 
-#              avg(new_pillar_1_2_tests)
-#                 over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests
-#         from uk_data
-#     )
-# update uk_data_7
-#     set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests
-#     from wnd
-#     where uk_data_7.date = wnd.date
-#     and (select count(*) from wnd) = 7
-```
-
-```python Collapsed="false"
-# %%sql
-# with wnd as
-#     (   select date, 
-#              avg(new_pillar_1_2_tests)
-#                 over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,
-#              count(new_pillar_1_2_tests)
-#                 over (order by uk_data.date rows between 3 preceding and 3 following) as a_count
-#         from uk_data
-#     )
-# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count
-#     from uk_data_7, wnd
-#     where uk_data_7.date = wnd.date
-#     order by uk_data_7.date desc limit 10
-    
-# select date, 
-#     count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-#     order by uk_data.date
-#     rows between 3 preceding and 3 following
-# );
-```
-
-```python Collapsed="false"
-# %%sql
-# select date, 
-#     count(*) over wnd as w_size
-# from uk_data
-# window wnd as (
-#     order by uk_data.date
-#     rows between 3 preceding and 3 following
-# )
-# order by date desc limit 10
-```
-
-```python
-# %%sql
-# with ownd as (
-#     select date,
-#         avg(hospital_cases) over wnd as w_hospital_cases,
-#         avg(ventilator_beds) over wnd as w_ventilator_beds,
-#         avg(new_cases) over wnd as w_new_cases,
-#         avg(new_deaths) over wnd as w_new_deaths,
-#         avg(new_admissions) over wnd as w_new_admissions,
-#         avg(new_pcr_tests) over wnd as w_new_pcr_tests,
-#         avg(new_tests) over wnd as w_new_tests,
-#         avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
-#         count(*) over wnd as w_size
-#     from uk_data
-#     window wnd as (
-#         order by uk_data.date
-#         rows between 3 preceding and 3 following
-#     ))
-# insert into uk_data_7(date, 
-#     hospital_cases, 
-#     ventilator_beds, 
-#     new_cases,
-#     new_deaths,
-#     new_admissions, 
-#     new_pcr_tests, 
-#     new_tests, 
-#     new_pillar_1_2_tests
-#     )
-# (select date,
-#         avg(hospital_cases) over wnd as w_hospital_cases,
-#         avg(ventilator_beds) over wnd as w_ventilator_beds,
-#         avg(new_cases) over wnd as w_new_cases,
-#         avg(new_deaths) over wnd as w_new_deaths,
-#         avg(new_admissions) over wnd as w_new_admissions,
-#         avg(new_pcr_tests) over wnd as w_new_pcr_tests,
-#         avg(new_tests) over wnd as w_new_tests,
-#         avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
-#         count(*) over wnd as w_size
-#     from uk_data
-#     window wnd as (
-#         order by uk_data.date
-#         rows between 3 preceding and 3 following
-#     )
-# )
-#     set date = ownd.date,
-#         hospital_cases = w_hospital_cases,
-#         ventilator_beds = w_ventilator_beds,
-#         new_cases = w_new_cases,
-#         new_deaths = w_new_deaths,
-#         new_admissions = w_new_admissions,
-#         new_pcr_tests = w_new_pcr_tests,
-#         new_tests = w_new_tests,
-#         new_pillar_1_2_tests = w_new_pillar_1_2_tests
-#     from ownd
-#     where w_size = 7
-```
-
-```python
-# %%sql
-# with ownd as (
-#     select date,
-#         avg(hospital_cases) over wnd as w_hospital_cases,
-#         avg(ventilator_beds) over wnd as w_ventilator_beds,
-#         avg(new_cases) over wnd as w_new_cases,
-#         avg(new_deaths) over wnd as w_new_deaths,
-#         avg(new_admissions) over wnd as w_new_admissions,
-#         avg(new_pcr_tests) over wnd as w_new_pcr_tests,
-#         avg(new_tests) over wnd as w_new_tests,
-#         avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
-#         count(*) over wnd as w_size
-#     from uk_data
-#     window wnd as (
-#         order by uk_data.date
-#         rows between 3 preceding and 3 following
-#     )
-# )
-# insert into uk_data_7(date, 
-#     hospital_cases, 
-#     ventilator_beds, 
-#     new_cases,
-#     new_deaths,
-#     new_admissions, 
-#     new_pcr_tests, 
-#     new_tests, 
-#     new_pillar_1_2_tests
-#     )
-# (select date,
-#         w_hospital_cases,
-#         w_ventilator_beds,
-#         w_new_cases,
-#         w_new_deaths,
-#         w_new_admissions,
-#         w_new_pcr_tests,
-#         w_new_tests,
-#         w_new_pillar_1_2_tests
-#     from ownd
-#     where w_size = 7
-# )
-```
-
-```python Collapsed="false"
-%sql select * from uk_data_7 order by date desc limit 10
-```
-
-```python Collapsed="false"
-%sql select * from uk_data order by date desc limit 10
-```
-
-```python Collapsed="false"
-
-```
diff --git a/data_import.py b/data_import.py

index 20593c7219c8ebb614626b4f3a297bd8d38323f8..df35ce6cb0da2b002afe04da9ed906739ff5db92 100644 (file)
--- a/data_import.py
+++ b/data_import.py
@@ -64,11 +64,24 @@ raw_data.fillna(0, inplace=True)
  
  
  # %%
+COUNTRIES_ALL
+
+# %%
+# raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
+#                         'countriesAndTerritories': 'country_name',
+#                         'countryterritoryCode': 'country_territory_code',
+#                         'popData2019': 'population_2019',
+#                         'continentExp': 'continent'}, inplace=True)
  raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
                          'countriesAndTerritories': 'country_name',
                          'countryterritoryCode': 'country_territory_code',
+                        'cases': 'cases_weekly',
+                        'deaths': 'deaths_weekly',
                          'popData2019': 'population_2019',
-                        'continentExp': 'continent'}, inplace=True)
+                        'continentExp': 'continent',
+                        'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days'
+                        }, 
+                inplace=True)
  
  
  # %%
diff --git a/deaths_import.md b/deaths_import.md

deleted file mode 100644 (file)

index f704518..0000000
--- a/deaths_import.md
+++ /dev/null
@@ -1,1017 +0,0 @@
----
-jupyter:
-  jupytext:
-    formats: ipynb,md
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.2'
-      jupytext_version: 1.9.1
-  kernelspec:
-    display_name: Python 3
-    language: python
-    name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from:
-
-* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
-* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
-* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
-
-<!-- #endregion -->
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-
-from sqlalchemy.types import Integer, Text, String, DateTime, Float
-from sqlalchemy import create_engine
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python Collapsed="false"
-conn = create_engine(connection_string)
-```
-
-```python Collapsed="false"
-england_wales_filename = 'uk-deaths-data/publishedweek532020.xlsx'
-```
-
-```sql Collapsed="false"
-drop table if exists all_causes_deaths;
-create table all_causes_deaths (
-    week integer,
-    year integer,
-    date_up_to date,
-    nation varchar(20),
-    deaths integer,
-    CONSTRAINT week_nation PRIMARY KEY(year, week, nation)
-);
-```
-
-```python Collapsed="false"
-raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv', 
-                       parse_dates=[1, 2], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1]
-                           )
-dh15i = raw_data_2015.iloc[:, [0, 3]]
-dh15i.set_index(dh15i.columns[0], inplace=True)
-dh15i.columns = ['total_2015']
-dh15i.tail()
-```
-
-```python Collapsed="false"
-raw_data_2015.head()
-```
-
-```python Collapsed="false"
-rd = raw_data_2015.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
-    columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2015P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2015
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select * from all_causes_deaths limit 10
-```
-
-```python Collapsed="false"
-raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1]
-                           )
-raw_data_2016.head()
-# dh16i = raw_data_2016.iloc[:, [2]]
-# dh16i.columns = ['total_2016']
-# # dh16i.head()
-dh16i = raw_data_2016.iloc[:, [0, 3]]
-dh16i.set_index(dh16i.columns[0], inplace=True)
-dh16i.columns = ['total_2016']
-dh16i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2016.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
-    columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2016P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2016
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1]
-                           )
-raw_data_2017.head()
-dh17i = raw_data_2017.iloc[:, [0, 3]]
-dh17i.set_index(dh17i.columns[0], inplace=True)
-dh17i.columns = ['total_2017']
-dh17i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2017.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
-    columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2017P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2017
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1]
-                           )
-raw_data_2018.head()
-dh18i = raw_data_2018.iloc[:, [0, 3]]
-dh18i.set_index(dh18i.columns[0], inplace=True)
-dh18i.columns = ['total_2018']
-dh18i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2018.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
-    columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2018P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2018
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1]
-                           )
-raw_data_2019.head()
-dh19i = raw_data_2019.iloc[:, [0, 3]]
-dh19i.set_index(dh19i.columns[0], inplace=True)
-dh19i.columns = ['total_2019']
-dh19i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2019.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
-    columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2019P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2019
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv', 
-                        parse_dates=[1], dayfirst=True,
-                      header=[0, 1]
-                           )
-raw_data_2020_i.head()
-```
-
-```python Collapsed="false"
-rd = raw_data_2020_i.iloc[:, [0, 1, 2]].droplevel(1, axis=1).rename(
-    columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2020P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2020
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.tail()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python
-raw_data_2020_i.set_index(raw_data_2020_i.columns[0], inplace=True)
-raw_data_2020_i.tail()
-```
-
-```python Collapsed="false"
-datetime.datetime.now().isocalendar()
-```
-
-```python Collapsed="false"
-datetime.datetime.fromisocalendar(2021, 3, 1)
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-scotland.csv', 
-                      index_col=0,
-                      header=0,
-                        skiprows=2
-                           )
-# raw_data_s
-```
-
-```python Collapsed="false"
-deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
-deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
-deaths_headlines_s.reset_index(drop=True, inplace=True)
-deaths_headlines_s.index = deaths_headlines_s.index + 1
-deaths_headlines_s
-```
-
-```python
-%sql select * from all_causes_deaths limit 5
-```
-
-```python Collapsed="false"
-for year, ser in deaths_headlines_s.items():
-    year_i = int(year[-4:])
-#     print(year_i)
-    for week, deaths in ser.dropna().iteritems():
-#         print(datetime.date.fromisocalendar(year_i, week, 7), deaths)
-        dut = datetime.date.fromisocalendar(year_i, week, 7)
-        %sql insert into all_causes_deaths(week, year, date_up_to, nation, deaths) values ({week}, {year_i}, :dut, 'Scotland', {deaths})
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python
-%sql select year, nation, date_up_to from all_causes_deaths where week=3 order by year, nation
-```
-
-```python Collapsed="false"
-eng_xls = pd.read_excel(england_wales_filename, 
-                        sheet_name="Weekly figures 2020",
-                        skiprows=[0, 1, 2, 3],
-                        header=0,
-                        index_col=[1]
-                       ).iloc[:91].T
-eng_xls
-```
-
-```python Collapsed="false"
-# eng_xls_columns
-```
-
-```python Collapsed="false"
-eng_xls_columns = list(eng_xls.columns)
-
-for i, c in enumerate(eng_xls_columns):
-#     print(i, c, type(c), isinstance(c, float))
-    if isinstance(c, float) and np.isnan(c):
-        if eng_xls.iloc[0].iloc[i] is not pd.NaT:
-            eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
-
-# np.isnan(eng_xls_columns[0])
-# eng_xls_columns
-
-eng_xls.columns = eng_xls_columns
-# eng_xls.columns
-```
-
-```python
-eng_xls
-```
-
-```python
-rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
-    columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
-            'index': 'week'}
-    )
-rd['year'] = 2020
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-eng_xls = eng_xls.iloc[1:]
-eng_xls['England deaths'] = eng_xls.loc[:, 'Total deaths, all ages'] - eng_xls.loc[:, 'Wales']
-```
-
-```python
-eng_xls.head()
-```
-
-```python
-rd = eng_xls[['Week ended', 'England deaths']].reset_index(level=0).rename(
-    columns={'Week ended': 'date_up_to', 'England deaths': 'deaths',
-            'index': 'week'}
-    )
-rd['year'] = 2020
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-%sql delete from all_causes_deaths where nation = 'England'
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd.tail()
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-# raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv', 
-#                        parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-#                       header=[0, 1])
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-# raw_data_2020.head()
-```
-
-```python Collapsed="false"
-# raw_data_2020['W92000004', 'Wales']
-```
-
-```python Collapsed="false"
-raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv', 
-                       parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1])
-# raw_data_2019.head()
-```
-
-```python
-rdew = raw_data_2019.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
-    columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
-            'Week number': 'week'}
-    )
-rd['year'] = 2019
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
-    columns={'Week ended': 'date_up_to', 'Week number': 'week'}
-    )
-rd['year'] = 2019
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv', 
-                       parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1])
-# raw_data_2018.head()
-```
-
-```python
-rdew = raw_data_2018.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
-    columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
-            'Week number': 'week'}
-    )
-rd['year'] = 2018
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
-    columns={'Week ended': 'date_up_to', 'Week number': 'week'}
-    )
-rd['year'] = 2018
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv', 
-                       parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1])
-# raw_data_2017.head()
-```
-
-```python
-rdew = raw_data_2017.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
-    columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
-            'Week number': 'week'}
-    )
-rd['year'] = 2017
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
-    columns={'Week ended': 'date_up_to', 'Week number': 'week'}
-    )
-rd['year'] = 2017
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python
-
-```
-
-```python Collapsed="false"
-raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv', 
-                       parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1])
-# raw_data_2016.head()
-```
-
-```python
-raw_data_2016.head()
-```
-
-```python
-rdew = raw_data_2016.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
-    columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
-            'Week number': 'week'}
-    )
-rd['year'] = 2016
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
-    columns={'Week ended': 'date_up_to', 'Week number': 'week'}
-    )
-rd['year'] = 2016
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
- %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv', 
-                       parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1])
-# raw_data_2015.head()
-```
-
-```python
-rdew = raw_data_2015.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
-    columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
-            'Week number': 'week'}
-    )
-rd['year'] = 2015
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
-    columns={'Week ended': 'date_up_to', 'Week number': 'week'}
-    )
-rd['year'] = 2015
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by year, nation
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'England'
-```
-
-```python
-deaths_headlines_e = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_e
-```
-
-```python
-
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'Scotland'
-```
-
-```python
-deaths_headlines_s = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_s
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'Wales'
-```
-
-```python
-deaths_headlines_w = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_w
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'Northern Ireland'
-```
-
-```python
-deaths_headlines_i = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_i
-```
-
-```python Collapsed="false"
-deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
-deaths_headlines
-```
-
-```python
-deaths_headlines_e.columns
-```
-
-```python
-deaths_headlines_e['previous_mean'] = deaths_headlines_e[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines_w['previous_mean'] = deaths_headlines_w[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines_s['previous_mean'] = deaths_headlines_s[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines_i['previous_mean'] = deaths_headlines_i[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines['previous_mean'] = deaths_headlines[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines
-```
-
-```python Collapsed="false"
-deaths_headlines[[2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
-```
-
-```python Collapsed="false"
-deaths_headlines[[2020, 'previous_mean']].plot(figsize=(10, 8))
-```
-
-```python Collapsed="false"
-deaths_headlines_i.plot()
-```
-
-```python
-deaths_headlines[2020].sum() - deaths_headlines.previous_mean.sum()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-dhna = deaths_headlines.dropna()
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
-    14)
-# l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
-# l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
-# l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
-# l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
-# l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
-l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, dhna['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, dhna[2020], color="red", label="2020")
-
-# deaths_headlines.total_2019.plot(ax=ax)
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(dhna.index)
-plt.legend()
-plt.title("Deaths by week over years, all UK")
-plt.savefig('deaths-radar.png')
-plt.show()
-```
-
-<!-- #region Collapsed="false" -->
-# Plots for UK nations
-<!-- #endregion -->
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_e[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_e[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_e[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_e[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_e[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_e[2020], color="red", label="2020")
-
-# deaths_headlines.total_2019.plot(ax=ax)
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_e.index)
-plt.legend()
-plt.title("Deaths by week over years, England")
-plt.savefig('deaths-radar_england.png')
-plt.show()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_w[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_w[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_w[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_w[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_w[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_w[2020], color="red", label="2020")
-
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_w.index)
-plt.legend()
-plt.title("Deaths by week over years, Wales")
-plt.savefig('deaths-radar_wales.png')
-plt.show()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_s[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_s[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_s[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_s[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_s[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_s[2020], color="red", label="2020")
-
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_s.index)
-plt.legend()
-plt.title("Deaths by week over years, Scotland")
-plt.savefig('deaths-radar_scotland.png')
-plt.show()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_i[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_i[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_i[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_i[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_i[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_i[2020], color="red", label="2020")
-
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_i.index)
-plt.legend()
-plt.title("Deaths by week over years, Northern Ireland")
-plt.savefig('deaths-radar_northern_ireland.png')
-plt.show()
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-
-```
diff --git a/hospital_data.py b/hospital_data.py

index 3771cfa468522bd5c7601b7982c1186bfd5f4184..3354b780d562ea10ad26f8fcdb3fd944b70980dc 100644 (file)
--- a/hospital_data.py
+++ b/hospital_data.py
@@ -14,6 +14,7 @@ import sqlalchemy
  import matplotlib as mpl
  import matplotlib.pyplot as plt
  plt.ioff()
+# # %matplotlib inline
  
  
  # %%
@@ -98,3 +99,6 @@ plt.savefig('cases_admissions_deaths.png')
  
  
  
+
+# %% [markdown]
+#
diff --git a/international_comparison-large-countries.md b/international_comparison-large-countries.md

new file mode 100644 (file)

index 0000000..9a750df
--- /dev/null
+++ b/international_comparison-large-countries.md
@@ -0,0 +1,234 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.2'
+      jupytext_version: 1.9.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+---
+
+<!-- #region Collapsed="false" -->
+Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
+<!-- #endregion -->
+
+```python Collapsed="false"
+import itertools
+import collections
+import json
+import pandas as pd
+import numpy as np
+from scipy.stats import gmean
+import datetime
+
+from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
+from sqlalchemy import create_engine
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+%matplotlib inline
+%load_ext sql
+```
+
+```python Collapsed="false"
+connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
+```
+
+```python Collapsed="false"
+%sql $connection_string
+```
+
+```python
+engine = create_engine(connection_string)
+```
+
+```python Collapsed="false"
+DEATH_COUNT_THRESHOLD = 10
+COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
+COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
+COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
+# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
+
+COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
+       'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
+       'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
+       'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
+COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
+COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
+```
+
+```python Collapsed="false"
+# res = %sql select report_date, geo_id, deaths_weekly, culm_deaths from weekly_cases where geo_id in $COUNTRIES_CORE order by report_date, geo_id
+```
+
+```python Collapsed="false"
+# country_data = res.DataFrame()
+# country_data['report_date'] = country_data.report_date.astype('datetime64[ns]')
+# country_data.set_index('report_date', inplace=True)
+# country_data.tail(10)
+```
+
+```python
+%sql select geo_id, country_name, population_2019 from countries where population_2019 > 30000000
+```
+
+```python
+query_string = f'''select report_date, weekly_cases.geo_id, country_name,
+  10000000.0 * deaths_weekly / population_2019 as deaths_weekly_pc, 
+  10000000.0 * culm_deaths / population_2019 as culm_deaths_pc
+from weekly_cases, countries 
+where weekly_cases.geo_id = countries.geo_id
+and countries.population_2019 > 20000000
+order by report_date, geo_id'''
+
+country_data = pd.read_sql_query(query_string,
+                  engine,
+                  index_col = 'report_date',
+                  parse_dates = ['report_date']
+                 )
+```
+
+```python
+country_data.tail()
+```
+
+```python
+latest_date = country_data.index.max()
+latest_date
+```
+
+```python
+# highest_deaths_geo_ids = country_data.loc[latest_date, :].nlargest(10, 'culm_deaths_pc')['geo_id'].values
+highest_deaths_geo_ids = (
+  country_data.loc[latest_date, :]
+  .nlargest(10, 'culm_deaths_pc')
+  ['country_name']
+  .values
+)
+
+highest_deaths_geo_ids
+```
+
+```python
+high_deaths_country_data = (
+  country_data[
+    country_data.country_name.isin(highest_deaths_geo_ids)]
+)
+high_deaths_country_data.tail()
+```
+
+```python Collapsed="false"
+deaths_culm = high_deaths_country_data.pivot(columns='country_name', 
+                                             values='culm_deaths_pc')
+deaths_culm.tail()
+```
+
+```python Collapsed="false"
+deaths_culm.plot()
+```
+
+```python Collapsed="false"
+ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
+ax.set_xlabel(f"Date")
+for c in highest_deaths_geo_ids:
+    lvi = deaths_culm[c].last_valid_index()
+    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python
+deaths_culm.loc[latest_date].sort_values()
+```
+
+```python Collapsed="false"
+deaths_weekly = high_deaths_country_data.pivot(columns='country_name', values='deaths_weekly_pc')
+deaths_weekly.tail()
+```
+
+```python Collapsed="false"
+deaths_weekly.plot()
+```
+
+```python Collapsed="false"
+ax = deaths_weekly.loc['2020-03-01':].plot(figsize=(10, 6), title="Deaths per week")
+ax.set_xlabel('Date')
+for c in highest_deaths_geo_ids:
+    lvi = deaths_weekly[c].last_valid_index()
+    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c)
+# plt.savefig('covid_deaths_per_week.png') 
+```
+
+```python Collapsed="false"
+ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
+ax.set_xlabel("Date")
+
+text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
+
+for c in highest_deaths_geo_ids:
+    lvi = deaths_weekly[c].last_valid_index()
+#     if c != 'ES':
+    ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}")
+plt.savefig('deaths_by_date_last_6_weeks.png') 
+```
+
+```python
+query_string = f'''select report_date, geo_id, 
+(cast(deaths_weekly as float) / population_2019) deaths_weekly_pc,
+(cast(culm_deaths as float) / population_2019) as culm_deaths_pc
+from weekly_cases join countries using (geo_id)
+where  geo_id in {COUNTRIES_CORE} 
+order by report_date, geo_id'''
+
+deaths_pc_data = pd.read_sql_query(query_string,
+                  engine,
+                  index_col = 'report_date',
+                  parse_dates = ['report_date']
+                 )
+deaths_pc_data.tail()
+```
+
+```python Collapsed="false"
+deaths_pc_culm = deaths_pc_data.pivot(columns='geo_id', values='culm_deaths_pc')
+deaths_pc_culm.tail()
+```
+
+```python
+ax = deaths_pc_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
+ax.set_xlabel(f"Date")
+for c in COUNTRIES_CORE:
+    lvi = deaths_pc_culm[c].last_valid_index()
+    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc_culm[c][lvi], s = f"{c}: {deaths_pc_culm[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python Collapsed="false"
+deaths_pc = deaths_pc_data.pivot(columns='geo_id', values='deaths_weekly_pc')
+deaths_pc.tail()
+```
+
+```python
+ax = deaths_pc.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
+ax.set_xlabel(f"Date")
+for c in COUNTRIES_CORE:
+    lvi = deaths_pc[c].last_valid_index()
+    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python
+ax = deaths_pc.iloc[-6:].plot(figsize=(10, 6), title="Deaths per million, linear")
+ax.set_xlabel(f"Date")
+for c in COUNTRIES_CORE:
+    lvi = deaths_pc[c].last_valid_index()
+    ax.text(x = lvi, y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python
+
+```
diff --git a/international_comparison.md b/international_comparison.md

deleted file mode 100644 (file)

index 801ee4c..0000000
--- a/international_comparison.md
+++ /dev/null
@@ -1,193 +0,0 @@
----
-jupyter:
-  jupytext:
-    formats: ipynb,md
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.2'
-      jupytext_version: 1.9.1
-  kernelspec:
-    display_name: Python 3
-    language: python
-    name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
-<!-- #endregion -->
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
-from sqlalchemy import create_engine
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python
-engine = create_engine(connection_string)
-```
-
-```python Collapsed="false"
-DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
-COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
-COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-
-COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
-       'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
-       'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
-       'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
-COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
-```
-
-```python Collapsed="false"
-# res = %sql select report_date, geo_id, deaths_weekly, culm_deaths from weekly_cases where geo_id in $COUNTRIES_CORE order by report_date, geo_id
-```
-
-```python Collapsed="false"
-# country_data = res.DataFrame()
-# country_data['report_date'] = country_data.report_date.astype('datetime64[ns]')
-# country_data.set_index('report_date', inplace=True)
-# country_data.tail(10)
-```
-
-```python
-query_string = f'''select report_date, geo_id, deaths_weekly, culm_deaths 
-from weekly_cases 
-where geo_id in {COUNTRIES_CORE} 
-order by report_date, geo_id'''
-
-country_data = pd.read_sql_query(query_string,
-                  engine,
-                  index_col = 'report_date',
-                  parse_dates = ['report_date']
-                 )
-```
-
-```python Collapsed="false"
-deaths_culm = country_data.pivot(columns='geo_id', values='culm_deaths')
-deaths_culm.tail()
-```
-
-```python Collapsed="false"
-deaths_culm.plot()
-```
-
-```python Collapsed="false"
-ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
-    lvi = deaths_culm[c].last_valid_index()
-    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}")
-plt.savefig('covid_deaths_total_linear.png')    
-```
-
-```python Collapsed="false"
-deaths_weekly = country_data.pivot(columns='geo_id', values='deaths_weekly')
-deaths_weekly.tail()
-```
-
-```python Collapsed="false"
-deaths_weekly.plot()
-```
-
-```python Collapsed="false"
-ax = deaths_weekly.loc['2020-03-01':, COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per week")
-ax.set_xlabel('Date')
-for c in COUNTRIES_CORE:
-    lvi = deaths_weekly[c].last_valid_index()
-    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c)
-plt.savefig('covid_deaths_per_week.png') 
-```
-
-```python Collapsed="false"
-ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
-ax.set_xlabel("Date")
-
-text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
-
-for c in COUNTRIES_CORE:
-    lvi = deaths_weekly[c].last_valid_index()
-#     if c != 'ES':
-    ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}")
-plt.savefig('deaths_by_date_last_6_weeks.png') 
-```
-
-```python
-query_string = f'''select report_date, geo_id, 
-(cast(deaths_weekly as float) / population_2019) deaths_weekly_pc,
-(cast(culm_deaths as float) / population_2019) as culm_deaths_pc
-from weekly_cases join countries using (geo_id)
-where  geo_id in {COUNTRIES_CORE} 
-order by report_date, geo_id'''
-
-deaths_pc_data = pd.read_sql_query(query_string,
-                  engine,
-                  index_col = 'report_date',
-                  parse_dates = ['report_date']
-                 )
-deaths_pc_data.tail()
-```
-
-```python Collapsed="false"
-deaths_pc_culm = deaths_pc_data.pivot(columns='geo_id', values='culm_deaths_pc')
-deaths_pc_culm.tail()
-```
-
-```python
-ax = deaths_pc_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
-    lvi = deaths_pc_culm[c].last_valid_index()
-    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc_culm[c][lvi], s = f"{c}: {deaths_pc_culm[c][lvi]*10**6:.0f}")
-# plt.savefig('covid_deaths_total_linear.png')    
-```
-
-```python Collapsed="false"
-deaths_pc = deaths_pc_data.pivot(columns='geo_id', values='deaths_weekly_pc')
-deaths_pc.tail()
-```
-
-```python
-ax = deaths_pc.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
-    lvi = deaths_pc[c].last_valid_index()
-    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
-# plt.savefig('covid_deaths_total_linear.png')    
-```
-
-```python
-ax = deaths_pc.iloc[-6:].plot(figsize=(10, 6), title="Deaths per million, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
-    lvi = deaths_pc[c].last_valid_index()
-    ax.text(x = lvi, y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
-# plt.savefig('covid_deaths_total_linear.png')    
-```
-
-```python
-
-```
diff --git a/international_comparison.py b/international_comparison.py

index 213560930552e1ba777f427035050c1c57d16b44..18880f585d86639226a9e5aae3854d136d26d9e6 100644 (file)
--- a/international_comparison.py
+++ b/international_comparison.py
@@ -18,6 +18,7 @@ from sqlalchemy import create_engine
  import matplotlib as mpl
  import matplotlib.pyplot as plt
  plt.ioff()
+# # %matplotlib inline
  
  
  # %%
diff --git a/publish.md b/publish.md

deleted file mode 100644 (file)

index 6b4ed59..0000000
--- a/publish.md
+++ /dev/null
@@ -1,343 +0,0 @@
----
-jupyter:
-  jupytext:
-    formats: ipynb,md
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.2'
-      jupytext_version: 1.9.1
-  kernelspec:
-    display_name: Python 3
-    language: python
-    name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
-<!-- #endregion -->
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python Collapsed="false"
-# DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = tuple('IT DE UK ES IE FR BE'.split())
-# COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
-# # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-
-# COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
-#        'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
-#        'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
-#        'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
-# COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-# COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
-```
-
-<!-- #region Collapsed="false" -->
-# Write results to summary file
-<!-- #endregion -->
-
-```python Collapsed="false"
-last_uk_date = %sql select date from uk_data order by date desc limit 1
-last_uk_date = last_uk_date[0][0]
-last_uk_date
-```
-
-```python Collapsed="false"
-last_intl_date = %sql select report_date from weekly_cases order by report_date desc limit 1
-last_intl_date = last_intl_date[0][0]
-last_intl_date
-```
-
-```sql magic_args="results << select date, new_cases, new_deaths "
-from uk_data 
-order by date
-```
-
-```python
-uk_data = results.DataFrame()
-uk_data['date'] = uk_data.date.astype('datetime64[ns]')
-uk_data.set_index('date', inplace=True)
-uk_data.tail(10)
-```
-
-```python
-most_recent_uk_date = %sql select max(date) from uk_data
-most_recent_uk_date = most_recent_uk_date[0][0]
-most_recent_uk_date
-```
-
-```sql Collapsed="false" magic_args="results << select geo_id, country_name, culm_deaths "
-from weekly_cases join countries using (geo_id)
-where geo_id in :COUNTRIES_CORE 
-    and (geo_id, report_date) in (select geo_id, max(report_date) from weekly_cases group by geo_id)
-order by geo_id
-```
-
-```python
-thirty_days_ago = most_recent_uk_date - datetime.timedelta(days=30)
-thirty_days_ago
-```
-
-```python
-# thirty_days_ago = most_recent_uk_date - datetime.interval(days=30)
-total_uk_deaths = %sql select sum(new_deaths) from uk_data
-total_uk_deaths = total_uk_deaths[0][0]
-deaths_in_past_month = %sql select sum(new_deaths) from uk_data where date > :thirty_days_ago
-deaths_in_past_month = deaths_in_past_month[0][0]
-cases_in_past_month = %sql select sum(new_cases) from uk_data where date > :thirty_days_ago
-cases_in_past_month = cases_in_past_month[0][0]
-total_uk_deaths, deaths_in_past_month, cases_in_past_month
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'w') as f:
-    f.write('% Covid death data summary\n')
-    f.write('% Neil Smith\n')
-    f.write(f'% Created on {datetime.datetime.now().strftime("%Y-%m-%d")}\n')
-    f.write('\n')       
-    f.write(f'> Last UK data from {last_uk_date.strftime("%d %b %Y")}. ')
-    f.write(f' Last international data from {last_intl_date.strftime("%d %b %Y")}.\n')
-    f.write('\n')    
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
-    f.write('## Headlines (UK data)\n')
-    f.write('\n')
-    f.write('| []() | |\n')
-    f.write('|:---|---:|\n')
-    f.write(f'| Deaths reported so far | {total_uk_deaths} | \n')
-    f.write(f'| Deaths in last 30 days | {deaths_in_past_month} | \n')
-    f.write(f'| Cases in last 30 days  | {cases_in_past_month} | \n')
-#     f.write(f'| Total Covid deaths to date (estimated) | {uk_deaths_to_date:.0f} |\n')
-    f.write('\n')
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
-    f.write('## International comparison\n')
-    f.write('\n')
-    f.write(f'Based on weekly data. Last data from {last_intl_date.strftime("%d %b %Y")}\n')
-    f.write('\n')
-    f.write('### Total deaths\n')
-    f.write('\n')
-    f.write('![Total deaths](covid_deaths_total_linear.png)\n')
-    f.write('\n')
-    f.write('| Country ID | Country name | Total deaths |\n')
-    f.write('|:-----------|:-------------|-------------:|\n')
-    for c_id, c_name, t_deaths in results:
-        f.write(f'| {c_id} | {c_name} | {t_deaths} |\n')
-    f.write('\n')
-```
-
-```python Collapsed="false"
-# with open('covid_summary.md', 'a') as f:
-#     f.write('## All-causes deaths, UK\n')
-#     f.write('\n')
-#     f.write('![All-causes deaths](deaths-radar.png)\n')
-#     f.write('\n')
-#     f.write('### True deaths\n')
-#     f.write('\n')
-#     f.write(f'The number of deaths reported in official statistics, {uk_covid_deaths}, is an underestimate '
-#             'of the true number of Covid deaths.\n'
-#             'This is especially true early in the pandemic, approximately March to May 2020.\n')
-#     f.write('We can get a better understanding of the impact of Covid by looking at the number of deaths, '
-#             'over and above what would be expected at each week of the year.\n')
-#     f.write(f'The ONS (and other bodies in Scotland and Northern Ireland) have released data on the number of deaths '
-#             f'up to {pd.to_datetime(excess_deaths_data["end_date"]).strftime("%d %B %Y")}.\n\n')
-#     f.write('If, for each of those weeks, I take the largest of the excess deaths or the reported Covid deaths, ')
-#     f.write(f'I estimate there have been **{uk_deaths_to_date}** total deaths so far.\n')
-#     f.write('\n')
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
-    f.write('### Deaths per week\n')
-    f.write('\n')
-    f.write('![Deaths per week](covid_deaths_per_week.png)\n')
-    f.write('\n')
-    f.write('![Deaths per week, last 6 weeks](deaths_by_date_last_6_weeks.png)\n')
-    f.write('\n')
-```
-
-```python
-with open('covid_summary.md', 'a') as f:
-    f.write('## UK data\n')
-    f.write('\n')
-    f.write('### Total deaths\n')
-    f.write('\n')
-    f.write(f'Deaths reported up to {last_uk_date.strftime("%d %b %Y")}: {total_uk_deaths}\n')
-    f.write('\n')    
-    f.write('![Total deaths](cases_and_deaths.png)\n')
-    f.write('\n')    
-    f.write('![Cases and deaths in last 60 days](cases_and_deaths_last_60_days.png)\n')
-    f.write('\n')
-    f.write('![Deaths compared to past five years](deaths-radar-2020.png)\n')
-    f.write('\n')
-    
-```
-
-```python Collapsed="false"
-with open('hospital_normalisation_date.json') as f:
-    hospital_normalisation_date_data = json.load(f)
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
-    f.write('### Hospital care\n')
-    f.write(f'Based on a 7-day moving average\n')
-    f.write('\n')
-    f.write('![Cases, admissions, deaths](cases_admissions_deaths.png)\n')
-    f.write('\n')
-    f.write('Due to the large scale differences between the three '
-            'measures, they are all normalised to show changes ')
-    f.write(f'since {pd.to_datetime(hospital_normalisation_date_data["hospital_normalisation_date"]).strftime("%d %B %Y")}.\n')
-    f.write('\n')
-    f.write('People in hospital, and on mechanical ventilators\n')
-    f.write('\n')
-    f.write('![People in hospital and on mechancial ventilators](people_in_hospital.png)\n')
-    f.write('\n')
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
-    f.write('### Testing effectiveness\n')
-    f.write('\n')
-    f.write('A question about testing is whether more detected cases is a result of more tests being '
-            'done or is because the number of cases is increasing. One way of telling the differeence '
-            'is by looking at the fraction of tests that are positive.\n')
-    f.write('\n')
-    f.write('![Positive tests and cases](tests_and_cases.png)\n')
-    f.write('\n')
-    f.write('Numbers of positive tests and cases, '
-            '7-day moving average.\n'
-            'Note the different y-axes\n')
-    f.write('\n')    
-    f.write('![Fraction of tests with positive result](fraction_positive_tests.png)\n')
-    f.write('\n')
-    f.write('Fraction of tests with a positive result, both daily figures and '
-            '7-day moving average.\n')
-    f.write('\n')    
-    f.write('\n')
-    f.write('![Tests against fraction positive, trajectory](fraction_positive_tests_vs_tests.png)\n')
-    f.write('\n')
-    f.write('The trajectory of tests done vs fraction positive tests.\n')
-    f.write('\n')
-    f.write('Points higher indicate more tests; points to the right indicate more positive tests.'
-            'More tests being done with the same infection prevelance will move the point up '
-            'and to the left.\n')
-    f.write('\n')
-    f.write('\n')
-    f.write('![Tests against fraction positive, trajectory](tests_vs_fraction_positive_animation.png)\n')
-    f.write('\n')
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-with open('covid_summary.md', 'a') as f:
-    f.write('# Data sources\n')
-    f.write('\n')
-    f.write('> Covid data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)\n')
-    f.write('\n')    
-    f.write("""> Population data from:
-
-* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
-* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
-* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.""")
-    
-    f.write('\n\n')
-    f.write('> [Source code available](https://git.njae.me.uk/?p=covid19.git;a=tree)\n')
-    f.write('\n') 
-
-```
-
-```python Collapsed="false"
-!pandoc --toc -s covid_summary.md > covid_summary.html
-```
-
-```python Collapsed="false"
-!scp covid_summary.html neil@ogedei:/var/www/scripts.njae.me.uk/covid/index.html
-!scp covid_deaths_total_linear.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp cases_and_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp cases_and_deaths_last_60_days.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-# !scp deaths-radar.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp deaths-radar-2020.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp covid_deaths_per_week.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp fraction_positive_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/    
-!scp tests_and_cases.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp deaths_by_date_last_6_weeks.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp fraction_positive_tests_vs_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp tests_vs_fraction_positive_animation.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/   
-!scp people_in_hospital.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp cases_admissions_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-```
-
-```python Collapsed="false"
-with open('uk_covid_deaths.js', 'w') as f:
-    f.write(f"document.write('{total_uk_deaths}');")
-    
-with open('uk_deaths_30_days.js', 'w') as f:
-    f.write(f"document.write('{deaths_in_past_month}');")
-
-with open('uk_cases_30_days.js', 'w') as f:
-    f.write(f"document.write('{cases_in_past_month}');")    
-    
-# with open('estimated_total_deaths.js', 'w') as f:
-#     f.write(f"document.write('{uk_deaths_to_date:.0f}');")
-
-# edut = pd.to_datetime(excess_deaths_data["end_date"]).strftime('%d %B %Y')
-# with open('excess_deaths_upto.js', 'w') as f:
-#     f.write(f"document.write('{edut}');")
-    
-with open('last_uk_date.js', 'w') as f:
-    f.write(f"document.write('{pd.to_datetime(last_uk_date).strftime('%d %B %Y')}');")
-
-with open('last_intl_date.js', 'w') as f:
-    f.write(f"document.write('{pd.to_datetime(last_intl_date).strftime('%d %B %Y')}');")
-
-```
-
-```python Collapsed="false"
-# pd.to_datetime(excess_deaths_upto).strftime('%d %B %Y')
-```
-
-```python Collapsed="false"
-!scp uk_covid_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp uk_deaths_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp uk_cases_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-# !scp estimated_total_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-# !scp excess_deaths_upto.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp last_uk_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp last_intl_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-!scp hospital_normalisation_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-```
-
-```python Collapsed="false"
-
-```
diff --git a/publish.py b/publish.py

index 8ed4adb114363ce8e30a187e7a22dbb89b82aa39..8c970560fa9fa4840c5e966cf7cb85e3f98c3ede 100644 (file)
--- a/publish.py
+++ b/publish.py
@@ -16,6 +16,7 @@ import os
  
  import matplotlib as mpl
  import matplotlib.pyplot as plt
+# # %matplotlib inline
  
  
  # %%
@@ -220,23 +221,7 @@ with open('covid_summary.md', 'a') as f:
  
  # %%
  os.system('pandoc --toc -s covid_summary.md > covid_summary.html')
-
-
-# %%
  os.system('scp covid_summary.html neil@ogedei:/var/www/scripts.njae.me.uk/covid/index.html')
-os.system('scp covid_deaths_total_linear.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp cases_and_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp cases_and_deaths_last_60_days.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp deaths-radar-2021.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp covid_deaths_per_week.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp fraction_positive_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/    ')
-os.system('scp tests_and_cases.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp deaths_by_date_last_6_weeks.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp fraction_positive_tests_vs_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp tests_vs_fraction_positive_animation.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/   ')
-os.system('scp people_in_hospital.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp cases_admissions_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-
  
  # %%
  with open('uk_covid_deaths.js', 'w') as f:
@@ -254,20 +239,35 @@ with open('last_uk_date.js', 'w') as f:
  with open('last_intl_date.js', 'w') as f:
      f.write(f"document.write('{pd.to_datetime(last_intl_date).strftime('%d %B %Y')}');")
  
-
  # %%
-os.system('scp uk_covid_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp uk_deaths_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp uk_cases_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-# # !scp estimated_total_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-# # !scp excess_deaths_upto.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
-os.system('scp last_uk_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp last_intl_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-os.system('scp hospital_normalisation_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
-
+transfer_files = [
+    'covid_deaths_total_linear.png',
+    'cases_and_deaths.png',
+    'cases_and_deaths_last_60_days.png',
+    'deaths-radar-2021.png',
+    'covid_deaths_per_week.png',
+    'fraction_positive_tests.png',
+    'tests_and_cases.png',
+    'deaths_by_date_last_6_weeks.png',
+    'fraction_positive_tests_vs_tests.png',
+    'tests_vs_fraction_positive_animation.png',
+    'people_in_hospital.png',
+    'cases_admissions_deaths.png',
+    'uk_covid_deaths.js',
+    'uk_deaths_30_days.js',
+    'uk_cases_30_days.js',
+    'last_uk_date.js',
+    'last_intl_date.js',
+    'hospital_normalisation_date.js'
+]
  
  # %%
+for f in transfer_files:
+    if os.path.isfile(f):
+        os.system(f'scp {f} neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
+        print(f'Transferred {f}')
+    else:
+        print('Cannot transfer {f}: file does not exist')
  
-
-
+# %%
  
diff --git a/test_and_case_data.py b/test_and_case_data.py

index 7de6bf0cca54cf3131c4020f0518933f90475d1b..d2ef00a689a33efbe6d0d4730cbac11486771c03 100644 (file)
--- a/test_and_case_data.py
+++ b/test_and_case_data.py
@@ -15,6 +15,7 @@ import matplotlib as mpl
  import matplotlib.pyplot as plt
  import matplotlib.animation as ani
  plt.ioff()
+# # %matplotlib inline
  
  
  # %%
diff --git a/uk_cases_and_deaths.py b/uk_cases_and_deaths.py

index bb57ed63a10110f14c7ee6b639e9eef422aeb662..08afb4bfa1e1d49de058bc2c0308154263c1c19a 100644 (file)
--- a/uk_cases_and_deaths.py
+++ b/uk_cases_and_deaths.py
@@ -14,6 +14,7 @@ import sqlalchemy
  import matplotlib as mpl
  import matplotlib.pyplot as plt
  plt.ioff()
+# # %matplotlib inline
  
  
  # %%
diff --git a/uk_deaths_import.md b/uk_deaths_import.md

index e775661c8803ddaf2c8da3a68ff001d1b7c4e324..61bc1e1af51053f6a2fddf7ceb47197eeee0a39c 100644 (file)
--- a/uk_deaths_import.md
+++ b/uk_deaths_import.md
@@ -53,11 +53,11 @@ engine = create_engine(connection_string)
  ```
  
  ```python Collapsed="false"
-england_wales_filename = 'uk-deaths-data/copyofpublishedweek042021.xlsx'
+england_wales_filename = 'uk-deaths-data/publishedweek052021.xlsx'
  ```
  
  ```python Collapsed="false"
-scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (10 February 2021).xlsx'
+scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (15 February 2021).xlsx'
  ```
  
  ```python
author	Neil Smith <neil.git@njae.me.uk>
	Fri, 19 Feb 2021 14:27:52 +0000 (14:27 +0000)
committer	Neil Smith <neil.git@njae.me.uk>
	Fri, 19 Feb 2021 14:27:52 +0000 (14:27 +0000)
covid19.sublime-project	[new file with mode: 0644]	patch \| blob
data_import.md	[deleted file]	patch \| blob \| history
data_import.py		patch \| blob \| history
deaths_import.md	[deleted file]	patch \| blob \| history
hospital_data.py		patch \| blob \| history
international_comparison-large-countries.md	[new file with mode: 0644]	patch \| blob
international_comparison.md	[deleted file]	patch \| blob \| history
international_comparison.py		patch \| blob \| history
publish.md	[deleted file]	patch \| blob \| history
publish.py		patch \| blob \| history
test_and_case_data.py		patch \| blob \| history
uk_cases_and_deaths.py		patch \| blob \| history
uk_deaths_import.md		patch \| blob \| history