% Covid death data summary
% Neil Smith
-% Created on 2021-02-10
+% Created on 2021-02-21
-> Last UK data from 09 Feb 2021. Last international data from 01 Feb 2021.
+> Last UK data from 20 Feb 2021. Last international data from 14 Feb 2021.
## Headlines (UK data)
| []() | |
|:---|---:|
-| Deaths reported so far | 113872 |
-| Deaths in last 30 days | 32419 |
-| Cases in last 30 days | 794103 |
+| Deaths reported so far | 120387 |
+| Deaths in last 30 days | 25785 |
+| Cases in last 30 days | 480504 |
## International comparison
-Based on weekly data. Last data from 01 Feb 2021
+Based on weekly data. Last data from 14 Feb 2021
### Total deaths
| Country ID | Country name | Total deaths |
|:-----------|:-------------|-------------:|
-| BE | Belgium | 21135 |
-| DE | Germany | 57120 |
-| ES | Spain | 59081 |
-| FR | France | 76057 |
-| IE | Ireland | 3307 |
-| IT | Italy | 88516 |
-| UK | United_Kingdom | 106158 |
+| BEL | Belgium | 21720 |
+| DEU | Germany | 65076 |
+| ESP | Spain | 65449 |
+| FRA | France | 81814 |
+| GBR | United Kingdom | 117166 |
+| IRL | Ireland | 3948 |
+| ITA | Italy | 93577 |
### Deaths per week
### Total deaths
-Deaths reported up to 09 Feb 2021: 113872
+Deaths reported up to 20 Feb 2021: 120387
![Total deaths](cases_and_deaths.png)
# from scipy.stats import gmean
import datetime
import os
+# import matplotlib as mpl
+# import matplotlib.pyplot as plt
+# import matplotlib.animation as ani
+
+# # %matplotlib inline
# %%
# %%
-DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
-COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
-COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
+# DEATH_COUNT_THRESHOLD = 10
+# COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
+# COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
+# COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
+# # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
- 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
- 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
- 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
-COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
+# COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
+# 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
+# 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
+# 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
+# COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
+# COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
# %%
-os.system('curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv')
+## Old data source
+# os.system('curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv')
+
+## In outer shell script, as it doesn't seem to work here
+os.system('curl https://opendata.ecdc.europa.eu/covid19/nationalcasedeath/csv/ > covid.csv')
# %%
# First col is a date, treat geoId of NA as 'Namibia', not "NA" value
raw_data = pd.read_csv('covid.csv',
- parse_dates=[0], dayfirst=True,
+ parse_dates=[7], dayfirst=True,
keep_default_na=False, na_values = [''],
# dtype = {'day': np.int64,
# 'month': np.int64,
)
raw_data.fillna(0, inplace=True)
+raw_data = raw_data[raw_data.country_code != 0]
+# raw_data
+
+# %%
+raw_data['date'] = raw_data.year_week.apply(lambda yw: datetime.datetime.fromisocalendar(int(yw[:4]), int(yw[5:]), 7 ))
+# raw_data
+
+# %%
+# raw_data.dtypes
+
+# %%
+# raw_data.head()
+
+# %%
+# raw_data[raw_data.country_code == 'GBR'].sort_values(by='date').tail(20)
+
+# %%
+case_data = raw_data[raw_data.indicator == 'cases'][['country_code', 'date', 'weekly_count', 'cumulative_count']]
+case_data.rename(columns={'weekly_count': 'cases_weekly', 'cumulative_count': 'culm_cases'}, inplace=True)
+# case_data.head()
+
+# %%
+deaths_data = raw_data[raw_data.indicator == 'deaths'][['country_code', 'date', 'weekly_count', 'cumulative_count']]
+deaths_data.rename(columns={'weekly_count': 'deaths_weekly', 'cumulative_count': 'culm_deaths'}, inplace=True)
+# deaths_data.head()
+
+# %%
+case_death_data = case_data.merge(deaths_data)
+# case_death_data
+
+# %%
+# raw_data.columns
+
+# %%
+country_data = raw_data[['country', 'country_code', 'continent', 'population']].drop_duplicates()
+# country_data
# %%
-COUNTRIES_ALL
# %%
# raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
# 'countryterritoryCode': 'country_territory_code',
# 'popData2019': 'population_2019',
# 'continentExp': 'continent'}, inplace=True)
-raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
- 'countriesAndTerritories': 'country_name',
- 'countryterritoryCode': 'country_territory_code',
- 'cases': 'cases_weekly',
- 'deaths': 'deaths_weekly',
- 'popData2019': 'population_2019',
- 'continentExp': 'continent',
- 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days'
- },
- inplace=True)
+
+# raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
+# 'countriesAndTerritories': 'country_name',
+# 'countryterritoryCode': 'country_territory_code',
+# 'cases': 'cases_weekly',
+# 'deaths': 'deaths_weekly',
+# 'popData2019': 'population_2019',
+# 'continentExp': 'continent',
+# # 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days'
+# },
+# inplace=True)
# %%
-raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(
+case_death_data.to_sql(
'weekly_cases',
engine,
if_exists='replace',
dtype={
"report_date": Date,
"cases_weekly": Integer,
+ "cases_culm": Integer,
"deaths_weekly": Integer,
- "geo_id": String,
- "notification_rate_per_100000_population_14-days": Float
+ "deaths_culm": Integer,
+ "country_code": String
}
)
# %%
-raw_data[['country_name', 'geo_id', 'country_territory_code',
- 'population_2019', 'continent']].drop_duplicates().to_sql(
+country_data.to_sql(
'countries',
engine,
if_exists='replace',
chunksize=500,
dtype={
"country_name": Text,
- "geo_id": String,
- "country_territory_code": String,
- "population_2019": Integer,
+ "country_code": String,
+ "population": Integer,
"continent": Text
}
)
-# %%
-with engine.connect() as connection:
- connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')
- connection.execute('alter table countries add primary key (geo_id);')
- connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')
- connection.execute('alter table weekly_cases add culm_cases integer;')
- connection.execute('alter table weekly_cases add culm_deaths integer;')
-
-
-# %%
-query_string = '''with culm as
- (select report_date, geo_id,
- sum(cases_weekly) over (partition by geo_id
- order by report_date) as culm_data
- from weekly_cases
- )
-update weekly_cases
- set culm_cases = culm_data
- from culm
- where weekly_cases.report_date = culm.report_date and
- weekly_cases.geo_id = culm.geo_id'''
-with engine.connect() as connection:
- connection.execute(query_string)
-
-
-# %%
-query_string = '''with culm as
- (select report_date, geo_id,
- sum(deaths_weekly) over (partition by geo_id
- order by report_date) as culm_data
- from weekly_cases
- )
-update weekly_cases
- set culm_deaths = culm_data
- from culm
- where weekly_cases.report_date = culm.report_date and
- weekly_cases.geo_id = culm.geo_id'''
-with engine.connect() as connection:
- connection.execute(query_string)
-
-
# %%
uk_query_string = (
"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.ioff()
-# # %matplotlib inline
+# %matplotlib inline
# %%
# %%
DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
-COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
-COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
+COUNTRIES_CORE = tuple(sorted('ITA DEU GBR ESP IRL FRA BEL'.split()))
+# COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
+COUNTRIES_FRIENDS = tuple('ITA GBR ESP BEL SVN MEX'.split())
# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
- 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
- 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
- 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
-COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
+# COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
+# 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
+# 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
+# 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
+# COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
+# COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
# %%
-query_string = f'''select report_date, geo_id, deaths_weekly, culm_deaths
+query_string = f'''select date, country_code, deaths_weekly, culm_deaths
from weekly_cases
-where geo_id in {COUNTRIES_CORE}
-order by report_date, geo_id'''
+where country_code in {COUNTRIES_CORE}
+order by date, country_code'''
country_data = pd.read_sql_query(query_string,
engine,
- index_col = 'report_date',
- parse_dates = ['report_date']
+ index_col = 'date',
+ parse_dates = ['date']
)
# %%
-deaths_culm = country_data.pivot(columns='geo_id', values='culm_deaths')
+deaths_culm = country_data.pivot(columns='country_code', values='culm_deaths')
+# %%
+# country_data
+
+# %%
+# deaths_culm
+
# %%
ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
ax.set_xlabel(f"Date")
# %%
-deaths_weekly = country_data.pivot(columns='geo_id', values='deaths_weekly')
+deaths_weekly = country_data.pivot(columns='country_code', values='deaths_weekly')
# %%
# %%
-ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
+ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week, last 6 weeks")#, ylim=(-10, 100))
ax.set_xlabel("Date")
text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
# %%
-# DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = tuple('IT DE UK ES IE FR BE'.split())
-# COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
-# # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
+COUNTRIES_CORE = tuple(sorted('ITA DEU GBR ESP IRL FRA BEL'.split()))
+COUNTRIES_FRIENDS = tuple('ITA GBR ESP BEL SVN MEX'.split())
-# COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
-# 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
-# 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
-# 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
-# COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-# COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
-
-
-# # Write results to summary file
# %%
def singleton_sql_value(engine, query_string):
# %%
-last_intl_date = singleton_sql_value(engine, 'select max(report_date) from weekly_cases')
+last_intl_date = singleton_sql_value(engine, 'select max(date) from weekly_cases')
# %%
# %%
-query_string = f'''select geo_id, country_name, culm_deaths
-from weekly_cases join countries using (geo_id)
-where geo_id in {COUNTRIES_CORE}
- and report_date = '{last_intl_date.isoformat()}'
-order by geo_id'''
+query_string = f'''select country_code, country, culm_deaths
+from weekly_cases join countries using (country_code)
+where country_code in {COUNTRIES_CORE}
+ and date = '{last_intl_date.isoformat()}'
+order by country_code'''
with engine.connect() as conn:
results = list(conn.execute(query_string))
'covid_deaths_total_linear.png',
'cases_and_deaths.png',
'cases_and_deaths_last_60_days.png',
- 'deaths-radar-2021.png',
+ 'deaths_radar_2021.png',
'covid_deaths_per_week.png',
'fraction_positive_tests.png',
'tests_and_cases.png',
os.system(f'scp {f} neil@ogedei:/var/www/scripts.njae.me.uk/covid/')
print(f'Transferred {f}')
else:
- print('Cannot transfer {f}: file does not exist')
+ print(f'Cannot transfer {f}: file does not exist')
# %%
text_representation:
extension: .md
format_name: markdown
- format_version: '1.2'
- jupytext_version: 1.9.1
+ format_version: '1.3'
+ jupytext_version: 1.10.2
kernelspec:
display_name: Python 3
language: python
n_ireland_filename = 'uk-deaths-data/Weekly_Deaths_0.xlsx'
```
+```python Collapsed="false"
+# eng_xls = pd.read_excel(england_wales_filename,
+# sheet_name="Weekly figures 2021",
+# skiprows=[0, 1, 2, 3],
+# skipfooter=11,
+# header=0,
+# index_col=[1]
+# ).iloc[:99].T
+# eng_xls
+```
+
```python Collapsed="false"
eng_xls = pd.read_excel(england_wales_filename,
sheet_name="Weekly figures 2021",
skiprows=[0, 1, 2, 3],
- skipfooter=11,
+ skipfooter=10,
header=0,
index_col=[1]
- ).iloc[:91].T
+ ).T
eng_xls
```
ax.set_xticklabels(dataset.index)
plt.legend()
plt.title(f"Deaths by week over years, {title_string}")
- plt.savefig(f'deaths-radar-2021{filename_suffix}.png')
+ plt.savefig(f'deaths_radar_2021{filename_suffix}.png')
plt.show()
```
<!-- #endregion -->
```python
-create_and_save_radar_plot(deaths_e, 'England', '-england')
+create_and_save_radar_plot(deaths_e, 'England', '_england')
```
```python
-create_and_save_radar_plot(deaths_w, 'Wales', '-wales')
+create_and_save_radar_plot(deaths_w, 'Wales', '_wales')
```
```python
-create_and_save_radar_plot(deaths_s, 'Scotland', '-scotland')
+create_and_save_radar_plot(deaths_s, 'Scotland', '_scotland')
```
```python
-create_and_save_radar_plot(deaths_i, 'Northern Ireland', '-northern-ireland')
+create_and_save_radar_plot(deaths_i, 'Northern Ireland', '_northern_ireland')
```
```python Collapsed="false"