General updates
[covid19.git] / covid.md
diff --git a/covid.md b/covid.md
deleted file mode 100644 (file)
index 318cd5d..0000000
--- a/covid.md
+++ /dev/null
@@ -1,533 +0,0 @@
----
-jupyter:
-  jupytext:
-    formats: ipynb,md
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.2'
-      jupytext_version: 1.3.4
-  kernelspec:
-    display_name: Python 3
-    language: python
-    name: python3
----
-
-Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
-
-```python
-import itertools
-import collections
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-```
-
-```python
-DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = 'IT DE UK ES IE FR'.split()
-COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
-COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
-COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC))
-```
-
-```python
-!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
-```
-
-```python
-# First col is a date, treat geoId of NA as 'Namibia', not "NA" value
-raw_data = pd.read_csv('covid.csv', parse_dates=[0], keep_default_na=False, dayfirst=True)
-```
-
-```python
-raw_data.size
-```
-
-```python
-raw_data.head()
-```
-
-```python
-raw_data.dtypes
-```
-
-```python
-base_data = raw_data.set_index(['geoId', 'dateRep'])
-base_data.sort_index(inplace=True)
-base_data
-```
-
-```python
-base_data.loc['UK']
-```
-
-```python
-base_data.loc['UK', '2020-04-17']
-```
-
-```python
-countries = raw_data[['geoId', 'countriesAndTerritories', 'popData2018']]
-countries = countries[countries['popData2018'] != '']
-countries = countries.drop_duplicates()
-countries.set_index('geoId', inplace=True)
-countries = countries.astype({'popData2018': 'int64'})
-countries.head()
-```
-
-```python
-countries.shape
-```
-
-```python
-countries[countries.countriesAndTerritories == 'Finland']
-```
-
-```python
-countries.loc[COUNTRIES_OF_INTEREST]
-```
-
-```python
-data_by_date = base_data[['cases', 'deaths']]
-data_by_date.head()
-```
-
-```python
-data_by_date.loc['UK']
-```
-
-```python
-data_by_date.groupby(level=0).cumsum()
-```
-
-```python
-data_by_date = data_by_date.merge(
-    data_by_date.groupby(level=0).cumsum(), 
-    suffixes=('', '_culm'), 
-    left_index=True, right_index=True)
-data_by_date
-```
-
-```python
-data_by_date = data_by_date.merge(
-    data_by_date[['cases', 'deaths']].groupby(level=0).diff(), 
-    suffixes=('', '_diff'), 
-    left_index=True, right_index=True)
-data_by_date
-```
-
-```python
-data_by_date.loc['UK', '2020-04-17']
-```
-
-```python
-data_by_date.loc['UK']
-```
-
-```python
-# data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
-```
-
-```python
-# days_since_threshold = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].groupby(level=0).cumcount()
-# days_since_threshold.rename('since_threshold', inplace=True)
-```
-
-```python
-dbd = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].reset_index(level=1)
-dbd['since_threshold'] = dbd.dateRep
-dbd.set_index('dateRep', append=True, inplace=True)
-dbd.sort_index(inplace=True)
-days_since_threshold = dbd.groupby(level=0).diff().since_threshold.dt.days.fillna(0).astype(int).groupby(level=0).cumsum()
-# days_since_threshold.groupby(level=0).cumsum()
-
-# days_since_threshold = dbd.rename('since_threshold')
-days_since_threshold
-```
-
-```python
-# days_since_threshold = (data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
-#                         .reset_index(level=1).groupby(level=0)
-#                         .diff().dateRep.dt.days
-#                         .groupby(level=0).cumcount()
-#                        )
-# days_since_threshold.rename('since_threshold', inplace=True)
-# days_since_threshold
-```
-
-```python
-data_since_threshold = data_by_date.merge(days_since_threshold, 
-    left_index=True, right_index=True)
-data_since_threshold
-```
-
-```python
-data_since_threshold = data_since_threshold.set_index('since_threshold', append=True
-                              ).reorder_levels(['since_threshold', 'geoId', 'dateRep']
-                                              ).reset_index('dateRep')
-data_since_threshold
-```
-
-```python
-data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
-```
-
-```python
-data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), ['deaths_culm']].unstack().plot(logy=True)
-```
-
-```python
-# deaths = data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
-```
-
-```python
-deaths = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True)
-```
-
-```python
-data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
-```
-
-```python
-data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId']).sort_index(inplace=True)
-```
-
-```python
-data_since_threshold_per_capita = data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
-data_since_threshold_per_capita['cases_culm_pc'] = data_since_threshold_per_capita.cases_culm / data_since_threshold_per_capita.popData2018
-data_since_threshold_per_capita['deaths_culm_pc'] = data_since_threshold_per_capita.deaths_culm / data_since_threshold_per_capita.popData2018
-data_since_threshold_per_capita
-```
-
-```python
-deaths_pc = data_since_threshold_per_capita.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm_pc']].unstack().sort_index().xs('deaths_culm_pc', axis=1, drop_level=True)
-```
-
-```python
-deaths_pc
-```
-
-```python
-deaths_pc.index
-```
-
-```python
-deaths_pc = data_since_threshold_per_capita.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)
-```
-
-```python
-deaths[COUNTRIES_CORE].plot()
-```
-
-```python
-deaths[COUNTRIES_FRIENDS].plot()
-```
-
-```python
-ax = deaths[COUNTRIES_CORE].plot(figsize=(10, 6), title="Total deaths, linear")
-for c in COUNTRIES_CORE:
-    lvi = deaths[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
-plt.savefig('covid_deaths_total_linear.png')    
-```
-
-```python
-ax = deaths[COUNTRIES_NORDIC].plot(figsize=(10, 6), title="Total deaths, linear")
-for c in COUNTRIES_NORDIC:
-    lvi = deaths[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
-# plt.savefig('covid_deaths_total_linear.png')    
-```
-
-```python
-ax = deaths[COUNTRIES_OF_INTEREST].plot(figsize=(10, 6), title="Total deaths, linear")
-for c in COUNTRIES_OF_INTEREST:
-    lvi = deaths[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
-# plt.savefig('covid_deaths_total_linear.png') 
-```
-
-```python
-ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
-for c in COUNTRIES_CORE:
-    lvi = deaths[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
-
-plt.savefig('covid_deaths_total_log.png')
-```
-
-```python
-ylim = (5*10**3, 5*10**4)
-ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), ylim=ylim, title="Total deaths, log")
-for c in COUNTRIES_CORE:
-    lvi = deaths[c].last_valid_index()
-    if ylim[0] < deaths[c][lvi] < ylim[1]:
-        ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
-
-# plt.savefig('covid_deaths_total_log.png')
-```
-
-```python
-ax = deaths[COUNTRIES_FRIENDS].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
-for c in COUNTRIES_FRIENDS:
-    lvi = deaths[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
-
-# plt.savefig('covid_deaths_total_log.png')
-```
-
-```python
-ax = deaths[COUNTRIES_NORDIC].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
-for c in COUNTRIES_NORDIC:
-    lvi = deaths[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
-
-# plt.savefig('covid_deaths_total_log.png')
-```
-
-```python
-ax = deaths[COUNTRIES_OF_INTEREST].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
-for c in COUNTRIES_OF_INTEREST:
-    lvi = deaths[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
-
-plt.savefig('covid_deaths_total_log.png')
-```
-
-```python
-deaths_pc.plot(figsize=(10, 6), title="Deaths per capita, linear")
-plt.savefig('covid_deaths_per_capita_linear.png')
-```
-
-```python
-ax = deaths_pc.plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
-for c in deaths_pc.columns:
-    lvi = deaths_pc[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths_pc[c][lvi], s = c)
-```
-
-```python
-deaths_pc[['UK', 'IE']].plot( figsize=(10, 6), title="Deaths per capita, linear")
-```
-
-```python
-deaths_pc[['UK', 'IE']].plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
-```
-
-```python
-deaths[['UK', 'ES', 'IT']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
-plt.savefig('covid_deaths_selected_log.png')
-```
-
-```python
-deaths[['UK', 'ES', 'IT', 'MX']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
-```
-
-```python
-data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
-```
-
-```python
-data_since_threshold['deaths_m4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).mean())
-data_since_threshold['deaths_m7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).mean())
-# data_since_threshold['deaths_diff_m4'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(4, 1).mean())
-# data_since_threshold['deaths_diff_m7'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(7, 1).mean())
-data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
-```
-
-```python
-deaths_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m4']]
-             .unstack().sort_index().xs('deaths_m4', axis=1, drop_level=True))
-```
-
-```python
-deaths_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m7']]
-             .unstack().sort_index().xs('deaths_m7', axis=1, drop_level=True))
-```
-
-```python
-ax = deaths_m4.plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
-for c in deaths_m4.columns:
-    lvi = deaths_m4[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
-plt.savefig('covid_deaths_per_day.png') 
-```
-
-```python
-ax = deaths_m4[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
-for c in COUNTRIES_CORE:
-    lvi = deaths_m4[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
-plt.savefig('covid_deaths_per_day-core.png') 
-```
-
-```python
-ax = deaths_m4[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
-for c in COUNTRIES_FRIENDS:
-    lvi = deaths_m4[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
-plt.savefig('covid_deaths_per_day-friends.png') 
-```
-
-```python
-C7s = 'ES FR IT UK'.split()
-ax = deaths_m7[C7s].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
-for c in C7s:
-    lvi = deaths_m7[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
-# plt.savefig('covid_deaths_per_day-friends.png') 
-```
-
-```python
-ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
-for c in COUNTRIES_CORE:
-    lvi = deaths_m7[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
-plt.savefig('covid_deaths_per_day_7.png') 
-```
-
-```python
-ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
-for c in COUNTRIES_FRIENDS:
-    lvi = deaths_m7[c].last_valid_index()
-    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
-plt.savefig('covid_deaths_per_day_friends_7.png') 
-```
-
-```python
-def gmean_scale(items):
-    return gmean(items) / items[-1]
-```
-
-```python
-def doubling_time(df):
-    return np.log(2) / np.log((df.deaths_culm + df.deaths_g4) / df.deaths_culm)
-
-def doubling_time_7(df):
-    return np.log(2) / np.log((df.deaths_culm + df.deaths_g7) / df.deaths_culm)
-```
-
-```python
-# data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean_scale, raw=True))
-# data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
-```
-
-```python
-data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True))
-data_since_threshold['deaths_g7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).apply(gmean, raw=True))
-data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
-```
-
-```python
-data_since_threshold['doubling_time'] = data_since_threshold.groupby(level=1).apply(doubling_time).reset_index(level=0, drop=True)
-data_since_threshold['doubling_time_7'] = data_since_threshold.groupby(level=1).apply(doubling_time_7).reset_index(level=0, drop=True)
-# data_since_threshold.loc[(slice(None), 'UK'), :]
-```
-
-```python
-doubling_times = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time']]
-             .unstack().sort_index().xs('doubling_time', axis=1, drop_level=True))
-doubling_times.replace([np.inf, -np.inf], np.nan, inplace=True)
-```
-
-```python
-doubling_times_7 = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time_7']]
-             .unstack().sort_index().xs('doubling_time_7', axis=1, drop_level=True))
-doubling_times_7.replace([np.inf, -np.inf], np.nan, inplace=True)
-```
-
-```python
-ax = doubling_times.plot(figsize=(10, 6), title="Doubling times, 4 day average")
-for c in doubling_times.columns:
-    lvi = doubling_times[c].last_valid_index()
-    ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
-# plt.savefig('covid_deaths_per_day.png') 
-```
-
-```python
-ax = doubling_times_7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 7 day average")
-for c in COUNTRIES_CORE:
-    lvi = doubling_times_7[c].last_valid_index()
-    ax.text(x = lvi + 1, y = doubling_times_7[c][lvi], s = c)
-plt.savefig('covid_doubling_times_7.png') 
-```
-
-```python
-ax = doubling_times[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 4 day average")
-for c in COUNTRIES_CORE:
-    lvi = doubling_times[c].last_valid_index()
-    ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
-plt.savefig('covid_doubling_times.png') 
-```
-
-```python
-ax = doubling_times[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Doubling times")
-for c in COUNTRIES_FRIENDS:
-    lvi = doubling_times[c].last_valid_index()
-    ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
-plt.savefig('covid_doubling_times_friends.png')
-```
-
-```python
-ax = doubling_times[C7s].plot(figsize=(10, 6), title="Doubling times")
-for c in C7s:
-    lvi = doubling_times[c].last_valid_index()
-    ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
-# plt.savefig('covid_doubling_times_friends.png')
-```
-
-```python
-# deaths_diff_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m4']]
-#              .unstack().sort_index().xs('deaths_diff_m4', axis=1, drop_level=True))
-```
-
-```python
-# deaths_diff_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m7']]
-#              .unstack().sort_index().xs('deaths_diff_m7', axis=1, drop_level=True))
-```
-
-```python
-# deaths_diff_m7
-```
-
-```python
-data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[COUNTRIES_ALL]#, [doubling_time]]
-```
-
-```python
-data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[['UK', 'DE', 'IT']]#, [doubling_time]]
-```
-
-```python
-it_since_threshold = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['IT']), :]
-it_since_threshold.index.max()[0]
-```
-
-```python
-uk_projection = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), :]
-uk_projection.index.max()[0]
-```
-
-```python
-proj = it_since_threshold.loc[(slice(60, 77), slice(None)), ['cases', 'deaths']]
-proj.index = pd.MultiIndex.from_tuples([(n, 'UK') for n, _ in proj.index], names=proj.index.names)
-proj
-```
-
-```python
-uk_projection = uk_projection.append(proj, sort=True)
-uk_projection.deaths.sum()
-```
-
-```python
-it_since_threshold.deaths.sum()
-```
-
-```python
-
-```