From 15eaad40c32fd6ac6ce6457451f4933fb45890b0 Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Sun, 21 Feb 2021 12:15:55 +0000 Subject: [PATCH] Changed European data source and format --- covid_summary.md | 28 +++---- data_import.py | 149 ++++++++++++++++++------------------ international_comparison.py | 42 +++++----- publish.py | 32 +++----- uk_deaths_import.md | 29 ++++--- 5 files changed, 144 insertions(+), 136 deletions(-) diff --git a/covid_summary.md b/covid_summary.md index 914f3d8..edc961e 100644 --- a/covid_summary.md +++ b/covid_summary.md @@ -1,20 +1,20 @@ % Covid death data summary % Neil Smith -% Created on 2021-02-10 +% Created on 2021-02-21 -> Last UK data from 09 Feb 2021. Last international data from 01 Feb 2021. +> Last UK data from 20 Feb 2021. Last international data from 14 Feb 2021. ## Headlines (UK data) | []() | | |:---|---:| -| Deaths reported so far | 113872 | -| Deaths in last 30 days | 32419 | -| Cases in last 30 days | 794103 | +| Deaths reported so far | 120387 | +| Deaths in last 30 days | 25785 | +| Cases in last 30 days | 480504 | ## International comparison -Based on weekly data. Last data from 01 Feb 2021 +Based on weekly data. Last data from 14 Feb 2021 ### Total deaths @@ -22,13 +22,13 @@ Based on weekly data. Last data from 01 Feb 2021 | Country ID | Country name | Total deaths | |:-----------|:-------------|-------------:| -| BE | Belgium | 21135 | -| DE | Germany | 57120 | -| ES | Spain | 59081 | -| FR | France | 76057 | -| IE | Ireland | 3307 | -| IT | Italy | 88516 | -| UK | United_Kingdom | 106158 | +| BEL | Belgium | 21720 | +| DEU | Germany | 65076 | +| ESP | Spain | 65449 | +| FRA | France | 81814 | +| GBR | United Kingdom | 117166 | +| IRL | Ireland | 3948 | +| ITA | Italy | 93577 | ### Deaths per week @@ -40,7 +40,7 @@ Based on weekly data. Last data from 01 Feb 2021 ### Total deaths -Deaths reported up to 09 Feb 2021: 113872 +Deaths reported up to 20 Feb 2021: 120387 ![Total deaths](cases_and_deaths.png) diff --git a/data_import.py b/data_import.py index df35ce6..8ec03d1 100644 --- a/data_import.py +++ b/data_import.py @@ -14,6 +14,11 @@ import numpy as np # from scipy.stats import gmean import datetime import os +# import matplotlib as mpl +# import matplotlib.pyplot as plt +# import matplotlib.animation as ani + +# # %matplotlib inline # %% @@ -24,28 +29,32 @@ engine = eng.execution_options(isolation_level="AUTOCOMMIT") # %% -DEATH_COUNT_THRESHOLD = 10 -COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split() -COUNTRIES_NORDIC = 'SE NO DK FI UK'.split() -COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split() -# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split() +# DEATH_COUNT_THRESHOLD = 10 +# COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split() +# COUNTRIES_NORDIC = 'SE NO DK FI UK'.split() +# COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split() +# # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split() -COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA - 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT', - 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN', - 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE'] -COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS)) -COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS)) +# COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA +# 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT', +# 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN', +# 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE'] +# COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS)) +# COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS)) # %% -os.system('curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv') +## Old data source +# os.system('curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv') + +## In outer shell script, as it doesn't seem to work here +os.system('curl https://opendata.ecdc.europa.eu/covid19/nationalcasedeath/csv/ > covid.csv') # %% # First col is a date, treat geoId of NA as 'Namibia', not "NA" value raw_data = pd.read_csv('covid.csv', - parse_dates=[0], dayfirst=True, + parse_dates=[7], dayfirst=True, keep_default_na=False, na_values = [''], # dtype = {'day': np.int64, # 'month': np.int64, @@ -61,10 +70,45 @@ raw_data = pd.read_csv('covid.csv', ) raw_data.fillna(0, inplace=True) +raw_data = raw_data[raw_data.country_code != 0] +# raw_data + +# %% +raw_data['date'] = raw_data.year_week.apply(lambda yw: datetime.datetime.fromisocalendar(int(yw[:4]), int(yw[5:]), 7 )) +# raw_data + +# %% +# raw_data.dtypes + +# %% +# raw_data.head() + +# %% +# raw_data[raw_data.country_code == 'GBR'].sort_values(by='date').tail(20) + +# %% +case_data = raw_data[raw_data.indicator == 'cases'][['country_code', 'date', 'weekly_count', 'cumulative_count']] +case_data.rename(columns={'weekly_count': 'cases_weekly', 'cumulative_count': 'culm_cases'}, inplace=True) +# case_data.head() + +# %% +deaths_data = raw_data[raw_data.indicator == 'deaths'][['country_code', 'date', 'weekly_count', 'cumulative_count']] +deaths_data.rename(columns={'weekly_count': 'deaths_weekly', 'cumulative_count': 'culm_deaths'}, inplace=True) +# deaths_data.head() + +# %% +case_death_data = case_data.merge(deaths_data) +# case_death_data + +# %% +# raw_data.columns + +# %% +country_data = raw_data[['country', 'country_code', 'continent', 'population']].drop_duplicates() +# country_data # %% -COUNTRIES_ALL # %% # raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id', @@ -72,20 +116,21 @@ COUNTRIES_ALL # 'countryterritoryCode': 'country_territory_code', # 'popData2019': 'population_2019', # 'continentExp': 'continent'}, inplace=True) -raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id', - 'countriesAndTerritories': 'country_name', - 'countryterritoryCode': 'country_territory_code', - 'cases': 'cases_weekly', - 'deaths': 'deaths_weekly', - 'popData2019': 'population_2019', - 'continentExp': 'continent', - 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days' - }, - inplace=True) + +# raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id', +# 'countriesAndTerritories': 'country_name', +# 'countryterritoryCode': 'country_territory_code', +# 'cases': 'cases_weekly', +# 'deaths': 'deaths_weekly', +# 'popData2019': 'population_2019', +# 'continentExp': 'continent', +# # 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days' +# }, +# inplace=True) # %% -raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql( +case_death_data.to_sql( 'weekly_cases', engine, if_exists='replace', @@ -94,16 +139,16 @@ raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notificatio dtype={ "report_date": Date, "cases_weekly": Integer, + "cases_culm": Integer, "deaths_weekly": Integer, - "geo_id": String, - "notification_rate_per_100000_population_14-days": Float + "deaths_culm": Integer, + "country_code": String } ) # %% -raw_data[['country_name', 'geo_id', 'country_territory_code', - 'population_2019', 'continent']].drop_duplicates().to_sql( +country_data.to_sql( 'countries', engine, if_exists='replace', @@ -111,55 +156,13 @@ raw_data[['country_name', 'geo_id', 'country_territory_code', chunksize=500, dtype={ "country_name": Text, - "geo_id": String, - "country_territory_code": String, - "population_2019": Integer, + "country_code": String, + "population": Integer, "continent": Text } ) -# %% -with engine.connect() as connection: - connection.execute('alter table weekly_cases add primary key (geo_id, report_date)') - connection.execute('alter table countries add primary key (geo_id);') - connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);') - connection.execute('alter table weekly_cases add culm_cases integer;') - connection.execute('alter table weekly_cases add culm_deaths integer;') - - -# %% -query_string = '''with culm as - (select report_date, geo_id, - sum(cases_weekly) over (partition by geo_id - order by report_date) as culm_data - from weekly_cases - ) -update weekly_cases - set culm_cases = culm_data - from culm - where weekly_cases.report_date = culm.report_date and - weekly_cases.geo_id = culm.geo_id''' -with engine.connect() as connection: - connection.execute(query_string) - - -# %% -query_string = '''with culm as - (select report_date, geo_id, - sum(deaths_weekly) over (partition by geo_id - order by report_date) as culm_data - from weekly_cases - ) -update weekly_cases - set culm_deaths = culm_data - from culm - where weekly_cases.report_date = culm.report_date and - weekly_cases.geo_id = culm.geo_id''' -with engine.connect() as connection: - connection.execute(query_string) - - # %% uk_query_string = ( "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&" diff --git a/international_comparison.py b/international_comparison.py index 18880f5..3764683 100644 --- a/international_comparison.py +++ b/international_comparison.py @@ -18,7 +18,7 @@ from sqlalchemy import create_engine import matplotlib as mpl import matplotlib.pyplot as plt plt.ioff() -# # %matplotlib inline +# %matplotlib inline # %% @@ -31,36 +31,42 @@ engine = create_engine(connection_string) # %% DEATH_COUNT_THRESHOLD = 10 -COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split())) -COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split()) -COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split()) +COUNTRIES_CORE = tuple(sorted('ITA DEU GBR ESP IRL FRA BEL'.split())) +# COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split()) +COUNTRIES_FRIENDS = tuple('ITA GBR ESP BEL SVN MEX'.split()) # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split() -COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA - 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT', - 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN', - 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE') -COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS)) -COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS)) +# COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA +# 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT', +# 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN', +# 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE') +# COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS)) +# COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS)) # %% -query_string = f'''select report_date, geo_id, deaths_weekly, culm_deaths +query_string = f'''select date, country_code, deaths_weekly, culm_deaths from weekly_cases -where geo_id in {COUNTRIES_CORE} -order by report_date, geo_id''' +where country_code in {COUNTRIES_CORE} +order by date, country_code''' country_data = pd.read_sql_query(query_string, engine, - index_col = 'report_date', - parse_dates = ['report_date'] + index_col = 'date', + parse_dates = ['date'] ) # %% -deaths_culm = country_data.pivot(columns='geo_id', values='culm_deaths') +deaths_culm = country_data.pivot(columns='country_code', values='culm_deaths') +# %% +# country_data + +# %% +# deaths_culm + # %% ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear") ax.set_xlabel(f"Date") @@ -71,7 +77,7 @@ plt.savefig('covid_deaths_total_linear.png') # %% -deaths_weekly = country_data.pivot(columns='geo_id', values='deaths_weekly') +deaths_weekly = country_data.pivot(columns='country_code', values='deaths_weekly') # %% @@ -84,7 +90,7 @@ plt.savefig('covid_deaths_per_week.png') # %% -ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100)) +ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week, last 6 weeks")#, ylim=(-10, 100)) ax.set_xlabel("Date") text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5) diff --git a/publish.py b/publish.py index 8c97056..8dd4b0d 100644 --- a/publish.py +++ b/publish.py @@ -28,21 +28,9 @@ engine = sqlalchemy.create_engine(connection_string) # %% -# DEATH_COUNT_THRESHOLD = 10 -COUNTRIES_CORE = tuple('IT DE UK ES IE FR BE'.split()) -# COUNTRIES_NORDIC = 'SE NO DK FI UK'.split() -# COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split() -# # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split() +COUNTRIES_CORE = tuple(sorted('ITA DEU GBR ESP IRL FRA BEL'.split())) +COUNTRIES_FRIENDS = tuple('ITA GBR ESP BEL SVN MEX'.split()) -# COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA -# 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT', -# 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN', -# 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE'] -# COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS)) -# COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS)) - - -# # Write results to summary file # %% def singleton_sql_value(engine, query_string): @@ -56,7 +44,7 @@ last_uk_date = singleton_sql_value(engine, 'select max(date) from uk_data') # %% -last_intl_date = singleton_sql_value(engine, 'select max(report_date) from weekly_cases') +last_intl_date = singleton_sql_value(engine, 'select max(date) from weekly_cases') # %% @@ -95,11 +83,11 @@ with open('covid_summary.md', 'a') as f: # %% -query_string = f'''select geo_id, country_name, culm_deaths -from weekly_cases join countries using (geo_id) -where geo_id in {COUNTRIES_CORE} - and report_date = '{last_intl_date.isoformat()}' -order by geo_id''' +query_string = f'''select country_code, country, culm_deaths +from weekly_cases join countries using (country_code) +where country_code in {COUNTRIES_CORE} + and date = '{last_intl_date.isoformat()}' +order by country_code''' with engine.connect() as conn: results = list(conn.execute(query_string)) @@ -244,7 +232,7 @@ transfer_files = [ 'covid_deaths_total_linear.png', 'cases_and_deaths.png', 'cases_and_deaths_last_60_days.png', - 'deaths-radar-2021.png', + 'deaths_radar_2021.png', 'covid_deaths_per_week.png', 'fraction_positive_tests.png', 'tests_and_cases.png', @@ -267,7 +255,7 @@ for f in transfer_files: os.system(f'scp {f} neil@ogedei:/var/www/scripts.njae.me.uk/covid/') print(f'Transferred {f}') else: - print('Cannot transfer {f}: file does not exist') + print(f'Cannot transfer {f}: file does not exist') # %% diff --git a/uk_deaths_import.md b/uk_deaths_import.md index 61bc1e1..612783c 100644 --- a/uk_deaths_import.md +++ b/uk_deaths_import.md @@ -5,8 +5,8 @@ jupyter: text_representation: extension: .md format_name: markdown - format_version: '1.2' - jupytext_version: 1.9.1 + format_version: '1.3' + jupytext_version: 1.10.2 kernelspec: display_name: Python 3 language: python @@ -64,14 +64,25 @@ scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (15 Februa n_ireland_filename = 'uk-deaths-data/Weekly_Deaths_0.xlsx' ``` +```python Collapsed="false" +# eng_xls = pd.read_excel(england_wales_filename, +# sheet_name="Weekly figures 2021", +# skiprows=[0, 1, 2, 3], +# skipfooter=11, +# header=0, +# index_col=[1] +# ).iloc[:99].T +# eng_xls +``` + ```python Collapsed="false" eng_xls = pd.read_excel(england_wales_filename, sheet_name="Weekly figures 2021", skiprows=[0, 1, 2, 3], - skipfooter=11, + skipfooter=10, header=0, index_col=[1] - ).iloc[:91].T + ).T eng_xls ``` @@ -415,7 +426,7 @@ def create_and_save_radar_plot(dataset, title_string, filename_suffix): ax.set_xticklabels(dataset.index) plt.legend() plt.title(f"Deaths by week over years, {title_string}") - plt.savefig(f'deaths-radar-2021{filename_suffix}.png') + plt.savefig(f'deaths_radar_2021{filename_suffix}.png') plt.show() ``` @@ -469,19 +480,19 @@ create_and_save_radar_plot(deaths, 'all UK', '') ```python -create_and_save_radar_plot(deaths_e, 'England', '-england') +create_and_save_radar_plot(deaths_e, 'England', '_england') ``` ```python -create_and_save_radar_plot(deaths_w, 'Wales', '-wales') +create_and_save_radar_plot(deaths_w, 'Wales', '_wales') ``` ```python -create_and_save_radar_plot(deaths_s, 'Scotland', '-scotland') +create_and_save_radar_plot(deaths_s, 'Scotland', '_scotland') ``` ```python -create_and_save_radar_plot(deaths_i, 'Northern Ireland', '-northern-ireland') +create_and_save_radar_plot(deaths_i, 'Northern Ireland', '_northern_ireland') ``` ```python Collapsed="false" -- 2.34.1