--- /dev/null
+---
+jupyter:
+ jupytext:
+ formats: ipynb,md
+ text_representation:
+ extension: .md
+ format_name: markdown
+ format_version: '1.2'
+ jupytext_version: 1.9.1
+ kernelspec:
+ display_name: Python 3
+ language: python
+ name: python3
+---
+
+<!-- #region Collapsed="false" -->
+Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
+<!-- #endregion -->
+
+```python Collapsed="false"
+import itertools
+import collections
+import json
+import pandas as pd
+import numpy as np
+from scipy.stats import gmean
+import datetime
+
+from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
+from sqlalchemy import create_engine
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+%matplotlib inline
+%load_ext sql
+```
+
+```python Collapsed="false"
+connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
+```
+
+```python Collapsed="false"
+%sql $connection_string
+```
+
+```python
+engine = create_engine(connection_string)
+```
+
+```python Collapsed="false"
+DEATH_COUNT_THRESHOLD = 10
+COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
+COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
+COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
+# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
+
+COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
+ 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
+ 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
+ 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
+COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
+COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
+```
+
+```python Collapsed="false"
+# res = %sql select report_date, geo_id, deaths_weekly, culm_deaths from weekly_cases where geo_id in $COUNTRIES_CORE order by report_date, geo_id
+```
+
+```python Collapsed="false"
+# country_data = res.DataFrame()
+# country_data['report_date'] = country_data.report_date.astype('datetime64[ns]')
+# country_data.set_index('report_date', inplace=True)
+# country_data.tail(10)
+```
+
+```python
+%sql select geo_id, country_name, population_2019 from countries where population_2019 > 30000000
+```
+
+```python
+query_string = f'''select report_date, weekly_cases.geo_id, country_name,
+ 10000000.0 * deaths_weekly / population_2019 as deaths_weekly_pc,
+ 10000000.0 * culm_deaths / population_2019 as culm_deaths_pc
+from weekly_cases, countries
+where weekly_cases.geo_id = countries.geo_id
+and countries.population_2019 > 20000000
+order by report_date, geo_id'''
+
+country_data = pd.read_sql_query(query_string,
+ engine,
+ index_col = 'report_date',
+ parse_dates = ['report_date']
+ )
+```
+
+```python
+country_data.tail()
+```
+
+```python
+latest_date = country_data.index.max()
+latest_date
+```
+
+```python
+# highest_deaths_geo_ids = country_data.loc[latest_date, :].nlargest(10, 'culm_deaths_pc')['geo_id'].values
+highest_deaths_geo_ids = (
+ country_data.loc[latest_date, :]
+ .nlargest(10, 'culm_deaths_pc')
+ ['country_name']
+ .values
+)
+
+highest_deaths_geo_ids
+```
+
+```python
+high_deaths_country_data = (
+ country_data[
+ country_data.country_name.isin(highest_deaths_geo_ids)]
+)
+high_deaths_country_data.tail()
+```
+
+```python Collapsed="false"
+deaths_culm = high_deaths_country_data.pivot(columns='country_name',
+ values='culm_deaths_pc')
+deaths_culm.tail()
+```
+
+```python Collapsed="false"
+deaths_culm.plot()
+```
+
+```python Collapsed="false"
+ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
+ax.set_xlabel(f"Date")
+for c in highest_deaths_geo_ids:
+ lvi = deaths_culm[c].last_valid_index()
+ ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')
+```
+
+```python
+deaths_culm.loc[latest_date].sort_values()
+```
+
+```python Collapsed="false"
+deaths_weekly = high_deaths_country_data.pivot(columns='country_name', values='deaths_weekly_pc')
+deaths_weekly.tail()
+```
+
+```python Collapsed="false"
+deaths_weekly.plot()
+```
+
+```python Collapsed="false"
+ax = deaths_weekly.loc['2020-03-01':].plot(figsize=(10, 6), title="Deaths per week")
+ax.set_xlabel('Date')
+for c in highest_deaths_geo_ids:
+ lvi = deaths_weekly[c].last_valid_index()
+ ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c)
+# plt.savefig('covid_deaths_per_week.png')
+```
+
+```python Collapsed="false"
+ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
+ax.set_xlabel("Date")
+
+text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
+
+for c in highest_deaths_geo_ids:
+ lvi = deaths_weekly[c].last_valid_index()
+# if c != 'ES':
+ ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}")
+plt.savefig('deaths_by_date_last_6_weeks.png')
+```
+
+```python
+query_string = f'''select report_date, geo_id,
+(cast(deaths_weekly as float) / population_2019) deaths_weekly_pc,
+(cast(culm_deaths as float) / population_2019) as culm_deaths_pc
+from weekly_cases join countries using (geo_id)
+where geo_id in {COUNTRIES_CORE}
+order by report_date, geo_id'''
+
+deaths_pc_data = pd.read_sql_query(query_string,
+ engine,
+ index_col = 'report_date',
+ parse_dates = ['report_date']
+ )
+deaths_pc_data.tail()
+```
+
+```python Collapsed="false"
+deaths_pc_culm = deaths_pc_data.pivot(columns='geo_id', values='culm_deaths_pc')
+deaths_pc_culm.tail()
+```
+
+```python
+ax = deaths_pc_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
+ax.set_xlabel(f"Date")
+for c in COUNTRIES_CORE:
+ lvi = deaths_pc_culm[c].last_valid_index()
+ ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc_culm[c][lvi], s = f"{c}: {deaths_pc_culm[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')
+```
+
+```python Collapsed="false"
+deaths_pc = deaths_pc_data.pivot(columns='geo_id', values='deaths_weekly_pc')
+deaths_pc.tail()
+```
+
+```python
+ax = deaths_pc.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
+ax.set_xlabel(f"Date")
+for c in COUNTRIES_CORE:
+ lvi = deaths_pc[c].last_valid_index()
+ ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')
+```
+
+```python
+ax = deaths_pc.iloc[-6:].plot(figsize=(10, 6), title="Deaths per million, linear")
+ax.set_xlabel(f"Date")
+for c in COUNTRIES_CORE:
+ lvi = deaths_pc[c].last_valid_index()
+ ax.text(x = lvi, y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')
+```
+
+```python
+
+```