Now using py files, for automation
[covid19.git] / covid-old.md
diff --git a/covid-old.md b/covid-old.md
new file mode 100644 (file)
index 0000000..cc80d79
--- /dev/null
@@ -0,0 +1,1736 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.2'
+      jupytext_version: 1.3.4
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+---
+
+<!-- #region Collapsed="false" -->
+Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
+<!-- #endregion -->
+
+```python Collapsed="false"
+import itertools
+import collections
+import json
+import pandas as pd
+import numpy as np
+from scipy.stats import gmean
+import datetime
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+%matplotlib inline
+```
+
+```python Collapsed="false"
+DEATH_COUNT_THRESHOLD = 10
+COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
+COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
+COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
+# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
+
+COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
+       'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
+       'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
+       'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
+COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
+COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
+```
+
+```python Collapsed="false"
+!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
+```
+
+```python Collapsed="false"
+# First col is a date, treat geoId of NA as 'Namibia', not "NA" value
+raw_data = pd.read_csv('covid.csv', 
+                       parse_dates=[0], dayfirst=True,
+                       keep_default_na=False, na_values = [''],
+#                        dtype = {'day': np.int64, 
+#                                 'month': np.int64, 
+#                                 'year': np.int64, 
+#                                 'cases': np.int64, 
+#                                 'deaths': np.int64, 
+#                                 'countriesAndTerritories': str, 
+#                                 'geoId': str, 
+#                                 'countryterritoryCode': str, 
+#                                 'popData2019': np.int64, 
+#                                 'continentExp': str, 
+#                                 }
+                      )
+```
+
+```python Collapsed="false"
+raw_data.size
+```
+
+```python Collapsed="false"
+raw_data.fillna(0, inplace=True)
+```
+
+```python Collapsed="false"
+raw_data.head()
+```
+
+```python Collapsed="false"
+raw_data.dtypes
+```
+
+```python Collapsed="false"
+# raw_data = raw_data.astype({'dateRep': np.datetime64, 
+#     'day': np.int64, 
+#     'month': np.int64, 
+#     'year': np.int64, 
+#     'cases': np.int64, 
+#     'deaths': np.int64, 
+#     'countriesAndTerritories': str, 
+#     'geoId': str, 
+#     'countryterritoryCode': str, 
+#     'popData2019': np.int64, 
+#     'continentExp': str })
+raw_data = raw_data.astype({'dateRep': np.datetime64, 
+    'day': np.int64, 
+    'month': np.int64, 
+    'year': np.int64, 
+    'cases': np.int64, 
+    'deaths': np.int64, 
+    'countriesAndTerritories': str, 
+    'geoId': str, 
+    'countryterritoryCode': str, 
+    'popData2019': np.int64, 
+    'continentExp': str })
+```
+
+```python Collapsed="false"
+raw_data.dtypes
+```
+
+```python Collapsed="false"
+raw_data[((raw_data.geoId == 'UK') & (raw_data.dateRep >= '2020-07-10'))]
+```
+
+```python Collapsed="false"
+# raw_data = raw_data[~ ((raw_data.geoId == 'ES') & (raw_data.dateRep >= '2020-05-22'))]
+```
+
+```python Collapsed="false"
+base_data = raw_data.set_index(['geoId', 'dateRep'])
+base_data.sort_index(inplace=True)
+base_data
+```
+
+```python Collapsed="false"
+base_data.loc['ES'].loc['2020-05-10':]
+```
+
+```python Collapsed="false"
+countries = raw_data[['geoId', 'countriesAndTerritories', 'popData2019', 'continentExp']]
+countries = countries[countries['popData2019'] != '']
+countries = countries.drop_duplicates()
+countries.set_index('geoId', inplace=True)
+countries = countries.astype({'popData2019': 'int64'})
+countries.head()
+```
+
+```python Collapsed="false"
+countries.shape
+```
+
+```python Collapsed="false"
+countries[countries.countriesAndTerritories == 'Finland']
+```
+
+```python Collapsed="false"
+countries.loc[COUNTRIES_OF_INTEREST]
+```
+
+```python Collapsed="false"
+countries[countries.continentExp == 'America'].index
+```
+
+```python Collapsed="false"
+data_by_date = base_data[['cases', 'deaths']]
+data_by_date.head()
+```
+
+```python Collapsed="false"
+data_by_date.loc['UK']
+```
+
+```python Collapsed="false"
+# data_by_date.deaths.drop_duplicates().sort_values().to_csv('dth.csv', header=True)
+```
+
+```python Collapsed="false"
+data_by_date.groupby(level=0).cumsum()
+```
+
+```python Collapsed="false"
+data_by_date = data_by_date.merge(
+    data_by_date.groupby(level=0).cumsum(), 
+    suffixes=('', '_culm'), 
+    left_index=True, right_index=True)
+data_by_date
+```
+
+```python Collapsed="false"
+data_by_date = data_by_date.merge(
+    data_by_date[['cases', 'deaths']].groupby(level=0).diff(), 
+    suffixes=('', '_diff'), 
+    left_index=True, right_index=True)
+data_by_date
+```
+
+```python Collapsed="false"
+data_by_date.loc['UK', '2020-04-17']
+```
+
+```python Collapsed="false"
+data_by_date.loc['UK']
+```
+
+```python Collapsed="false"
+# data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
+```
+
+```python Collapsed="false"
+# days_since_threshold = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].groupby(level=0).cumcount()
+# days_since_threshold.rename('since_threshold', inplace=True)
+```
+
+```python Collapsed="false"
+dbd = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].reset_index(level=1)
+dbd['since_threshold'] = dbd.dateRep
+dbd.set_index('dateRep', append=True, inplace=True)
+dbd.sort_index(inplace=True)
+days_since_threshold = dbd.groupby(level=0).diff().since_threshold.dt.days.fillna(0).astype(int).groupby(level=0).cumsum()
+# days_since_threshold.groupby(level=0).cumsum()
+
+# days_since_threshold = dbd.rename('since_threshold')
+days_since_threshold
+```
+
+```python Collapsed="false"
+# days_since_threshold = (data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
+#                         .reset_index(level=1).groupby(level=0)
+#                         .diff().dateRep.dt.days
+#                         .groupby(level=0).cumcount()
+#                        )
+# days_since_threshold.rename('since_threshold', inplace=True)
+# days_since_threshold
+```
+
+```python Collapsed="false"
+data_since_threshold = data_by_date.merge(days_since_threshold, 
+    left_index=True, right_index=True)
+data_since_threshold
+```
+
+```python Collapsed="false"
+data_since_threshold = data_since_threshold.set_index('since_threshold', append=True
+                              ).reorder_levels(['since_threshold', 'geoId', 'dateRep']
+                                              ).reset_index('dateRep')
+data_since_threshold.sort_index(inplace=True)
+data_since_threshold
+```
+
+```python Collapsed="false"
+data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
+```
+
+```python Collapsed="false"
+data_since_threshold.loc[(slice(None), ['ES']), :].tail(8)
+```
+
+```python Collapsed="false"
+data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), ['deaths_culm']].unstack().plot(logy=True)
+```
+
+```python Collapsed="false"
+# deaths = data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
+```
+
+```python Collapsed="false"
+deaths = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True)
+deaths_by_date = data_by_date.loc[COUNTRIES_ALL, ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True).T
+```
+
+```python Collapsed="false"
+cases = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['cases_culm']].unstack().sort_index().xs('cases_culm', axis=1, drop_level=True)
+cases_by_date = data_by_date.loc[ COUNTRIES_ALL, ['cases_culm']].unstack().sort_index().xs('cases_culm', axis=1, drop_level=True).T
+```
+
+```python Collapsed="false"
+COUNTRIES_AMERICAS_DEAD = list(set(deaths.columns) & set(COUNTRIES_AMERICAS))
+```
+
+```python Collapsed="false"
+data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
+```
+
+```python Collapsed="false"
+data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId']).sort_index(inplace=True)
+```
+
+```python Collapsed="false"
+data_since_threshold_per_capita = data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
+data_since_threshold_per_capita['cases_culm_pc'] = data_since_threshold_per_capita.cases_culm / data_since_threshold_per_capita.popData2019
+data_since_threshold_per_capita['deaths_culm_pc'] = data_since_threshold_per_capita.deaths_culm / data_since_threshold_per_capita.popData2019
+data_since_threshold_per_capita
+```
+
+```python Collapsed="false"
+deaths_pc = data_since_threshold_per_capita.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm_pc']].unstack().sort_index().xs('deaths_culm_pc', axis=1, drop_level=True)
+```
+
+```python Collapsed="false"
+deaths_pc.index
+```
+
+```python Collapsed="false"
+deaths_pc = data_since_threshold_per_capita.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)
+```
+
+```python Collapsed="false"
+deaths[COUNTRIES_CORE].plot()
+```
+
+```python Collapsed="false"
+deaths[COUNTRIES_FRIENDS].plot()
+```
+
+```python Collapsed="false"
+ax = deaths[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Total deaths, linear")
+ax.set_xlabel(f"Days since {DEATH_COUNT_THRESHOLD} deaths")
+for c in COUNTRIES_FRIENDS:
+    lvi = deaths[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths[c][lvi], s = f"{c}: {deaths[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python Collapsed="false"
+ax = deaths[COUNTRIES_CORE].plot(figsize=(10, 6), title="Total deaths, linear")
+ax.set_xlabel(f"Days since {DEATH_COUNT_THRESHOLD} deaths")
+for c in COUNTRIES_CORE:
+    lvi = deaths[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths[c][lvi], s = f"{c}: {deaths[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python Collapsed="false"
+ax = deaths_by_date.loc['2020-03-15':, COUNTRIES_CORE].plot(figsize=(10, 6), title="Total deaths, linear")
+# data_by_date.loc[COUNTRIES_CORE]
+# deaths_by_date = data_by_date.loc[COUNTRIES_ALL, ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True)
+ax.set_xlabel(f"Date")
+for c in COUNTRIES_CORE:
+    lvi = deaths_by_date[c].last_valid_index()
+    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_by_date[c][lvi], s = f"{c}: {deaths_by_date[c][lvi]:.0f}")
+plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python Collapsed="false"
+deaths_prime = deaths[COUNTRIES_CORE].copy()
+deaths_prime.loc[73:, 'ES'] = np.NaN
+# deaths_prime['ES'][70:]
+```
+
+```python Collapsed="false"
+ax = deaths_prime[COUNTRIES_CORE].plot(figsize=(10, 6), title="Total deaths, linear")
+for c in COUNTRIES_CORE:
+    lvi = deaths_prime[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_prime[c][lvi], s = f"{c}: {deaths_prime[c][lvi]:.0f}")
+# plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python Collapsed="false"
+ax = cases[COUNTRIES_CORE].plot(figsize=(10, 6), title="Total cases, linear")
+for c in COUNTRIES_CORE:
+    lvi = cases[c].last_valid_index()
+    ax.text(x = lvi + 1, y = cases[c][lvi], s = c)
+plt.savefig('covid_cases_total_linear.png')    
+```
+
+```python Collapsed="false"
+ax = deaths[COUNTRIES_AMERICAS_DEAD].plot(figsize=(10, 6), title="Total deaths, linear")
+for c in COUNTRIES_AMERICAS_DEAD:
+    lvi = deaths[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
+# plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python Collapsed="false"
+ax = deaths[COUNTRIES_CORE + ['BR', 'MX']].plot(figsize=(10, 6), title="Total deaths, linear")
+for c in COUNTRIES_CORE + ['BR', 'MX']:
+    lvi = deaths[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
+# plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python Collapsed="false"
+ax = deaths[COUNTRIES_NORDIC].plot(figsize=(10, 6), title="Total deaths, linear")
+for c in COUNTRIES_NORDIC:
+    lvi = deaths[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
+# plt.savefig('covid_deaths_total_linear.png')    
+```
+
+```python Collapsed="false"
+ax = deaths[COUNTRIES_OF_INTEREST].plot(figsize=(10, 6), title="Total deaths, linear")
+for c in COUNTRIES_OF_INTEREST:
+    lvi = deaths[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
+plt.savefig('covid_deaths_total_linear_of_interest.png') 
+```
+
+```python Collapsed="false"
+ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
+for c in COUNTRIES_CORE:
+    lvi = deaths[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
+
+plt.savefig('covid_deaths_total_log.png')
+```
+
+```python Collapsed="false"
+ylim = (5*10**3, 5*10**4)
+ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), ylim=ylim, title="Total deaths, log")
+for c in COUNTRIES_CORE:
+    lvi = deaths[c].last_valid_index()
+    if ylim[0] < deaths[c][lvi] < ylim[1]:
+        ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
+
+# plt.savefig('covid_deaths_total_log.png')
+```
+
+```python Collapsed="false"
+ax = deaths[COUNTRIES_FRIENDS].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
+for c in COUNTRIES_FRIENDS:
+    lvi = deaths[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
+
+# plt.savefig('covid_deaths_total_log.png')
+```
+
+```python Collapsed="false"
+ax = deaths[COUNTRIES_NORDIC].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
+for c in COUNTRIES_NORDIC:
+    lvi = deaths[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
+
+# plt.savefig('covid_deaths_total_log.png')
+```
+
+```python Collapsed="false"
+ax = deaths[COUNTRIES_OF_INTEREST].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
+for c in COUNTRIES_OF_INTEREST:
+    lvi = deaths[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
+
+plt.savefig('covid_deaths_total_log.png')
+```
+
+```python Collapsed="false"
+ax = deaths_pc.plot(figsize=(10, 6), title="Deaths per capita, linear")
+for c in deaths_pc.columns:
+    lvi = deaths_pc[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_pc[c][lvi], s = c)
+plt.savefig('covid_deaths_per_capita_linear.png')
+```
+
+```python Collapsed="false"
+ax = deaths_pc.plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
+for c in deaths_pc.columns:
+    lvi = deaths_pc[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_pc[c][lvi], s = c)
+```
+
+```python Collapsed="false"
+deaths_pc[['UK', 'IE']].plot( figsize=(10, 6), title="Deaths per capita, linear")
+```
+
+```python Collapsed="false"
+deaths_pc[['UK', 'IE']].plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
+```
+
+```python Collapsed="false"
+deaths[['UK', 'ES', 'IT']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
+plt.savefig('covid_deaths_selected_log.png')
+```
+
+```python Collapsed="false"
+deaths[['UK', 'ES', 'IT', 'MX']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
+```
+
+```python Collapsed="false"
+data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
+```
+
+```python Collapsed="false"
+data_since_threshold['deaths_m4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).mean())
+data_since_threshold['deaths_m7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).mean())
+data_since_threshold['cases_m7'] = data_since_threshold.groupby(level=1)['cases'].transform(lambda x: x.rolling(7, 1).mean())
+# data_since_threshold['deaths_diff_m4'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(4, 1).mean())
+# data_since_threshold['deaths_diff_m7'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(7, 1).mean())
+data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
+```
+
+```python Collapsed="false"
+deaths_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m4']]
+             .unstack().sort_index().xs('deaths_m4', axis=1, drop_level=True))
+```
+
+```python Collapsed="false"
+deaths_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m7']]
+             .unstack().sort_index().xs('deaths_m7', axis=1, drop_level=True))
+```
+
+```python Collapsed="false"
+cases_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['cases_m7']]
+             .unstack().sort_index().xs('cases_m7', axis=1, drop_level=True))
+```
+
+```python Collapsed="false"
+data_by_date['cases_m7'] = data_by_date.groupby(level=0)['cases'].transform(lambda x: x.rolling(7, 1).mean())
+data_by_date['deaths_m7'] = data_by_date.groupby(level=0)['deaths'].transform(lambda x: x.rolling(7, 1).mean())
+data_by_date
+```
+
+```python Collapsed="false"
+data_by_date.loc[('UK', '2020-07-15'):'UK', 'cases'].plot()
+```
+
+```python Collapsed="false"
+cases_by_date_m7 = data_by_date.loc[COUNTRIES_ALL, 'cases_m7'].unstack(level=0).sort_index()
+cases_by_date_m7[COUNTRIES_CORE].plot()
+```
+
+```python Collapsed="false"
+deaths_by_date_m7 = data_by_date.loc[COUNTRIES_ALL, 'deaths_m7'].unstack(level=0).sort_index()
+deaths_by_date_m7[COUNTRIES_CORE].plot()
+```
+
+```python Collapsed="false"
+ax = deaths_m4.plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
+for c in deaths_m4.columns:
+    lvi = deaths_m4[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
+plt.savefig('covid_deaths_per_day.png') 
+```
+
+```python Collapsed="false"
+ax = deaths_m4[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
+for c in COUNTRIES_CORE:
+    lvi = deaths_m4[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
+plt.savefig('covid_deaths_per_day-core.png') 
+```
+
+```python Collapsed="false"
+ax = deaths_m4[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
+for c in COUNTRIES_FRIENDS:
+    lvi = deaths_m4[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
+# plt.savefig('covid_deaths_per_day-friends.png') 
+```
+
+```python Collapsed="false"
+C7s = 'ES FR IT UK'.split()
+ax = deaths_m7[C7s].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
+for c in C7s:
+    lvi = deaths_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
+# plt.savefig('covid_deaths_per_day-friends.png') 
+```
+
+```python Collapsed="false"
+ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
+ax.set_xlabel(f"Days since {DEATH_COUNT_THRESHOLD} deaths")
+for c in COUNTRIES_CORE:
+    lvi = deaths_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
+# plt.axhline(0, color='0.7')
+plt.savefig('covid_deaths_per_day_7.png') 
+```
+
+```python Collapsed="false"
+ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
+ax.set_xlabel(f"Days since {DEATH_COUNT_THRESHOLD} deaths")
+for c in COUNTRIES_FRIENDS:
+    lvi = deaths_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
+# plt.axhline(0, color='0.7')
+plt.savefig('covid_deaths_per_day-friends.png') 
+```
+
+```python Collapsed="false"
+deaths_m7_prime = deaths_m7[COUNTRIES_CORE].copy()
+deaths_m7_prime.loc[73:, 'ES'] = np.NaN
+deaths_m7_prime['ES'][70:]
+```
+
+```python Collapsed="false"
+ax = deaths_m7_prime[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
+for c in COUNTRIES_CORE:
+    lvi = deaths_m7_prime[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m7_prime[c][lvi], s = c)
+plt.savefig('covid_deaths_per_day_7.png') # see below for where this is written, with the projection
+```
+
+```python Collapsed="false"
+ax = deaths_by_date_m7.loc['2020-03-01':, COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
+ax.set_xlabel('Date')
+for c in COUNTRIES_CORE:
+    lvi = deaths_by_date_m7[c].last_valid_index()
+    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_by_date_m7[c][lvi], s = c)
+plt.savefig('covid_deaths_per_day_7.png') 
+```
+
+```python Collapsed="false"
+ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
+for c in COUNTRIES_FRIENDS:
+    lvi = deaths_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
+plt.savefig('covid_deaths_per_day_friends_7.png') 
+```
+
+```python Collapsed="false"
+ax = deaths_m7[COUNTRIES_CORE + ['BR', 'MX']].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
+for c in COUNTRIES_CORE + ['BR', 'MX']:
+    lvi = deaths_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
+# plt.savefig('covid_deaths_per_day_7.png') 
+```
+
+```python Collapsed="false"
+ax = deaths_by_date_m7.iloc[-30:][COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")#, ylim=(-10, 100))
+ax.set_xlabel("Date")
+
+text_x_pos = deaths_by_date_m7.last_valid_index() + pd.Timedelta(days=1)
+
+for c in COUNTRIES_CORE:
+    lvi = deaths_by_date_m7[c].last_valid_index()
+#     if c != 'ES':
+    ax.text(x = text_x_pos, y = deaths_by_date_m7[c][lvi], s = f"{c}: {deaths_by_date_m7[c][lvi]:.0f}")
+plt.savefig('deaths_by_date_last_30_days.png') 
+```
+
+```python Collapsed="false"
+ax = deaths_by_date_m7.iloc[-30:][COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")#, ylim=(-10, 100))
+ax.set_xlabel("Date")
+
+text_x_pos = deaths_by_date_m7.last_valid_index() + pd.Timedelta(days=1)
+
+for c in COUNTRIES_FRIENDS:
+    lvi = deaths_by_date_m7[c].last_valid_index()
+#     if c != 'ES':
+    ax.text(x = text_x_pos, y = deaths_by_date_m7[c][lvi], s = f"{c}: {deaths_by_date_m7[c][lvi]:.0f}")
+plt.savefig('deaths_by_date_last_30_days_friends.png') 
+```
+
+```python Collapsed="false"
+ax = cases_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Cases per day, 7 day moving average")
+for c in COUNTRIES_CORE:
+    lvi = cases_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = cases_m7[c][lvi], s = c)
+plt.savefig('covid_cases_per_day-core.png') 
+```
+
+```python Collapsed="false"
+ax = cases_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Cases per day, 7 day moving average")
+for c in COUNTRIES_FRIENDS:
+    lvi = cases_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = cases_m7[c][lvi], s = c)
+plt.savefig('covid_cases_per_day-core.png') 
+```
+
+```python Collapsed="false"
+ax = cases_by_date_m7.iloc[-30:][COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Cases per day, 7 day moving average")#, ylim=(-10, 100))
+ax.set_xlabel("Date")
+
+text_x_pos = cases_by_date_m7.last_valid_index() + pd.Timedelta(days=1)
+
+for c in COUNTRIES_FRIENDS:
+    lvi = cases_by_date_m7[c].last_valid_index()
+#     if c != 'ES':
+    ax.text(x = text_x_pos, y = cases_by_date_m7[c][lvi], s = f"{c}: {cases_by_date_m7[c][lvi]:.0f}")
+plt.savefig('cases_by_date_last_30_days_friends.png') 
+```
+
+```python Collapsed="false"
+def gmean_scale(items):
+    return gmean(items) / items[-1]
+```
+
+```python Collapsed="false"
+def doubling_time(df):
+    return np.log(2) / np.log((df.deaths_culm + df.deaths_g4) / df.deaths_culm)
+
+def doubling_time_7(df):
+    return np.log(2) / np.log((df.deaths_culm + df.deaths_g7) / df.deaths_culm)
+```
+
+```python Collapsed="false"
+# data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean_scale, raw=True))
+# data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
+```
+
+```python Collapsed="false"
+data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True))
+data_since_threshold['deaths_g7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).apply(gmean, raw=True))
+data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
+```
+
+```python Collapsed="false"
+data_since_threshold['doubling_time'] = data_since_threshold.groupby(level=1).apply(doubling_time).reset_index(level=0, drop=True).sort_index()
+data_since_threshold['doubling_time_7'] = data_since_threshold.groupby(level=1).apply(doubling_time_7).reset_index(level=0, drop=True).sort_index()
+# data_since_threshold.loc[(slice(None), 'UK'), :]
+```
+
+```python Collapsed="false"
+data_by_date['deaths_g4'] = data_by_date.groupby(level=0)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True))
+data_by_date['deaths_g7'] = data_by_date.groupby(level=0)['deaths'].transform(lambda x: x.rolling(7, 1).apply(gmean, raw=True))
+data_by_date['doubling_time'] = data_by_date.groupby(level=0).apply(doubling_time).reset_index(level=0, drop=True).sort_index()
+data_by_date['doubling_time_7'] = data_by_date.groupby(level=0).apply(doubling_time_7).reset_index(level=0, drop=True).sort_index()
+data_by_date.loc['UK']
+```
+
+```python Collapsed="false"
+doubling_times = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time']]
+             .unstack().sort_index().xs('doubling_time', axis=1, drop_level=True))
+doubling_times.replace([np.inf, -np.inf], np.nan, inplace=True)
+```
+
+```python Collapsed="false"
+doubling_times_7 = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time_7']]
+             .unstack().sort_index().xs('doubling_time_7', axis=1, drop_level=True))
+doubling_times_7.replace([np.inf, -np.inf], np.nan, inplace=True)
+```
+
+```python Collapsed="false"
+ax = doubling_times.plot(figsize=(10, 6), title="Doubling times, 4 day average")
+for c in doubling_times.columns:
+    lvi = doubling_times[c].last_valid_index()
+    ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
+# plt.savefig('covid_deaths_per_day.png') 
+```
+
+```python Collapsed="false"
+ax = doubling_times_7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 7 day average")
+ax.legend(loc="upper left")
+for c in COUNTRIES_CORE:
+    lvi = doubling_times_7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = doubling_times_7[c][lvi], s = c)
+plt.savefig('covid_doubling_times_7.png') 
+```
+
+```python Collapsed="false"
+ax = doubling_times[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 4 day average")
+for c in COUNTRIES_CORE:
+    lvi = doubling_times[c].last_valid_index()
+    ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
+plt.savefig('covid_doubling_times.png') 
+```
+
+```python Collapsed="false"
+ax = doubling_times[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Doubling times")
+for c in COUNTRIES_FRIENDS:
+    lvi = doubling_times[c].last_valid_index()
+    ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
+plt.savefig('covid_doubling_times_friends.png')
+```
+
+```python Collapsed="false"
+ax = doubling_times[C7s].plot(figsize=(10, 6), title="Doubling times")
+for c in C7s:
+    lvi = doubling_times[c].last_valid_index()
+    ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
+# plt.savefig('covid_doubling_times_friends.png')
+```
+
+```python Collapsed="false"
+# deaths_diff_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m4']]
+#              .unstack().sort_index().xs('deaths_diff_m4', axis=1, drop_level=True))
+```
+
+```python Collapsed="false"
+# deaths_diff_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m7']]
+#              .unstack().sort_index().xs('deaths_diff_m7', axis=1, drop_level=True))
+```
+
+```python Collapsed="false"
+# deaths_diff_m7
+```
+
+```python Collapsed="false"
+# data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[COUNTRIES_ALL]#, [doubling_time]]
+```
+
+```python Collapsed="false"
+dstl = data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last()
+dstl.loc[dstl.index.intersection(COUNTRIES_ALL)]
+```
+
+```python Collapsed="false"
+# data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[['UK', 'DE', 'IT']]#, [doubling_time]]
+dstl.loc[['UK', 'DE', 'IT', 'FR', 'ES']]
+```
+
+```python Collapsed="false"
+data_since_threshold.loc[(slice(None), ['UK']), :].tail(20)
+```
+
+```python Collapsed="false"
+data_since_threshold.loc[(slice(None), ['ES']), :].tail(20)
+```
+
+<!-- #region Collapsed="false" -->
+## Death projections
+<!-- #endregion -->
+
+```python Collapsed="false"
+data_since_threshold.loc[(slice(None), ['UK']), :].tail(15)
+```
+
+```python Collapsed="false"
+it_since_threshold = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['IT']), :]
+s_end = it_since_threshold.index.max()[0]
+s_end
+```
+
+```python Collapsed="false"
+uk_projection = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), :]
+uk_current_end = uk_projection.index.max()[0] + 1
+# s_start = uk_projection.index.max()[0] + 1
+uk_current_end
+```
+
+```python Collapsed="false"
+current_uk_deaths_m7 = uk_projection[uk_projection.deaths_m7 >= 0].iloc[-1].deaths_m7
+current_uk_deaths_m7
+```
+
+```python Collapsed="false"
+it_since_threshold[it_since_threshold.deaths_m7 <= current_uk_deaths_m7].loc[60:].first_valid_index()[0]
+```
+
+```python Collapsed="false"
+s_start = it_since_threshold[it_since_threshold.deaths_m7 <= current_uk_deaths_m7].loc[60:].first_valid_index()[0]
+s_start
+```
+
+```python Collapsed="false"
+s_start_date = data_since_threshold.loc[(89, 'IT'), 'dateRep']# .iloc[0]
+s_start_date
+```
+
+```python Collapsed="false"
+s_end - s_start
+```
+
+```python Collapsed="false"
+uk_end = s_end - s_start + uk_current_end
+uk_end
+```
+
+```python Collapsed="false"
+proj = it_since_threshold.loc[(slice(s_start, s_end), slice(None)), ['cases', 'deaths', 'deaths_m7']]
+ndiff = uk_current_end - s_start
+proj.index = pd.MultiIndex.from_tuples([(n + ndiff, 'UK') for n, _ in proj.index], names=proj.index.names)
+proj
+```
+
+```python Collapsed="false"
+it_since_threshold.loc[(slice(s_start - 8, s_start + 2), slice(None)), ['cases', 'deaths', 'deaths_m7']]
+```
+
+```python Collapsed="false"
+uk_projection[['cases', 'deaths', 'deaths_m7']].tail()
+```
+
+```python Collapsed="false"
+# proj['deaths_m7'] = proj['deaths_m7'] + 20
+# proj
+```
+
+<!-- #region Collapsed="false" -->
+Projected deaths, UK following IT trend from now.
+<!-- #endregion -->
+
+```python Collapsed="false"
+uk_projection = uk_projection.append(proj, sort=True)
+uk_projection.deaths.sum()
+```
+
+```python Collapsed="false"
+uk_projection = uk_projection.droplevel(1)
+uk_projection
+```
+
+```python Collapsed="false"
+uk_projection.loc[152, 'deaths']
+```
+
+<!-- #region Collapsed="false" -->
+## Correction for cumulative deaths correction on 14 August
+<!-- #endregion -->
+
+```python Collapsed="false"
+# uk_projection.loc[152, 'deaths'] = 50
+```
+
+```python Collapsed="false"
+uk_projection['deaths_m7'] = uk_projection['deaths'].transform(lambda x: x.rolling(7, 1).mean())
+uk_projection.loc[(uk_current_end - 20):(uk_current_end + 5)]
+```
+
+```python Collapsed="false"
+uk_projection.loc[(uk_current_end - 5):]
+```
+
+```python Collapsed="false"
+uk_projection.deaths_m7.plot()
+```
+
+```python Collapsed="false"
+proj.droplevel(level=1)
+```
+
+```python Collapsed="false"
+ax = deaths_m7[COUNTRIES_CORE].plot()
+# uk_projection['deaths_m7'].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average", label="Projection", style='--', ax=ax)
+proj.droplevel(level=1)['deaths_m7'].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average", label="Projection", style='--', ax=ax)
+ax.set_xlabel(f"Days since {DEATH_COUNT_THRESHOLD} deaths")
+for c in COUNTRIES_CORE:
+    lvi = deaths_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
+# plt.savefig('covid_deaths_per_day_7.png') 
+```
+
+```python Collapsed="false"
+it_since_threshold.deaths.sum()
+```
+
+<!-- #region Collapsed="false" -->
+# Excess deaths calculation
+<!-- #endregion -->
+
+```python Collapsed="false"
+with open('excess_deaths.json') as f:
+    excess_deaths_data = json.load(f)
+    
+# with open('excess_death_accuracy.json') as f:
+#     excess_death_accuracy = json.load(f)
+    
+excess_deaths_data
+```
+
+```python Collapsed="false"
+additional_deaths = data_by_date.loc[('UK', excess_deaths_data['end_date']):('UK')].iloc[1:].deaths.sum()
+additional_deaths
+```
+
+```python Collapsed="false"
+uk_covid_deaths = data_by_date.loc['UK'].deaths.sum()
+uk_covid_deaths
+```
+
+```python Collapsed="false"
+uk_deaths_to_date = int(excess_deaths_data['excess_deaths']) + additional_deaths
+uk_deaths_to_date
+```
+
+```python Collapsed="false"
+# excess_deaths_upto = '2020-05-08'
+# excess_deaths = 54500
+```
+
+```python Collapsed="false"
+# excess_deaths_upto = excess_deaths_data['end_date']
+# excess_deaths = excess_deaths_data['excess_deaths']
+```
+
+<!-- #region Collapsed="false" -->
+Recorded deaths in period where ONS has reported total deaths
+<!-- #endregion -->
+
+```python Collapsed="false"
+# ons_reported_deaths = base_data.loc['UK'][:excess_deaths_upto]['deaths'].sum()
+# ons_reported_deaths
+```
+
+```python Collapsed="false"
+# excess_deaths_upto
+```
+
+<!-- #region Collapsed="false" -->
+## Correction for deaths total correction on 14 August
+<!-- #endregion -->
+
+```python Collapsed="false"
+# ons_unreported_deaths_data = base_data.loc['UK'][excess_deaths_upto:].iloc[1:]['deaths']
+# ons_unreported_deaths_data['2020-08-14'] = 50
+```
+
+```python Collapsed="false"
+# ons_unreported_deaths = ons_unreported_deaths_data.sum()
+# ons_unreported_deaths
+```
+
+```python Collapsed="false"
+# scaled_ons_unreported_deaths = ons_unreported_deaths * excess_death_accuracy
+# scaled_ons_unreported_deaths
+```
+
+```python Collapsed="false"
+# uk_deaths_to_date = excess_deaths + scaled_ons_unreported_deaths
+# uk_deaths_to_date
+```
+
+```python Collapsed="false"
+# data_since_threshold.loc[(slice(None), 'UK'), :][data_since_threshold.dateRep == excess_deaths_data['end_date']]
+```
+
+```python Collapsed="false"
+# data_since_threshold[data_since_threshold.dateRep == excess_deaths_data['end_date']].loc[(slice(None), 'UK'), :]
+```
+
+```python Collapsed="false"
+# ons_unreported_start = data_since_threshold[data_since_threshold.dateRep == excess_deaths_data['end_date']].loc[(slice(None), 'UK'), :].first_valid_index()[0] + 1
+# ons_unreported_start
+```
+
+```python Collapsed="false"
+# unreported_projected_deaths = uk_projection.loc[ons_unreported_start:].deaths.sum()
+# unreported_projected_deaths
+```
+
+```python Collapsed="false"
+# unreported_projected_deaths_scaled = unreported_projected_deaths * excess_death_accuracy
+# unreported_projected_deaths_scaled
+```
+
+```python Collapsed="false"
+# uk_projection.loc[(s_start):].deaths.sum()
+```
+
+```python Collapsed="false"
+# deaths_actual_projected_scaled = uk_deaths_to_date + uk_projection.loc[(s_start):].deaths.sum() * excess_death_accuracy
+# deaths_actual_projected_scaled
+```
+
+```python Collapsed="false"
+# excess_deaths / reported_deaths
+```
+
+<!-- #region Collapsed="false" -->
+True deaths to date, if we follow the scaling of excess deaths over reported deaths so far.
+<!-- #endregion -->
+
+```python Collapsed="false"
+# uk_covid_deaths = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), 'deaths_culm'].iloc[-1]
+# uk_covid_deaths
+```
+
+```python Collapsed="false"
+# uk_covid_deaths_scaled = excess_deaths + unreported_deaths * excess_death_accuracy
+# uk_covid_deaths_scaled
+```
+
+```python Collapsed="false"
+# data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['IT']), 'dateRep'].iloc[-1] + pd.Timedelta(s_end - s_start, unit='days')
+```
+
+```python Collapsed="false"
+# data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), 'dateRep'].iloc[-1].strftime("%Y-%m-%d")
+```
+
+```python Collapsed="false"
+# uk_covid_deaths * excess_deaths / reported_deaths
+```
+
+```python Collapsed="false"
+# uk_projection.deaths.sum() * excess_deaths / reported_deaths
+```
+
+```python Collapsed="false"
+# data_since_threshold.loc[(slice(None), 'FR'), :]
+# data_since_threshold[data_since_threshold.dateRep == '2020-05-18'].loc[(slice(None), 'FR'), :]
+```
+
+<!-- #region Collapsed="false" -->
+## School reopenings
+<!-- #endregion -->
+
+```python Collapsed="false"
+school_reopenings = {
+    'ES': {'date': '2020-05-18'},
+    'FR': {'date': '2020-05-18'}, # some areas only
+#     'IT': {'date': '2020-09-01'},
+    # 'IE': {'date': '2020-09-01'},
+    'DE': {'date': '2020-05-04'},
+    'UK': {'date': '2020-06-01'}
+}
+```
+
+```python Collapsed="false"
+data_since_threshold[data_since_threshold.dateRep == '2020-05-04'].loc[(slice(None), ['DE']), :].first_valid_index()
+```
+
+```python Collapsed="false"
+data_since_threshold[data_since_threshold.dateRep == '2020-05-04'].loc[(slice(None), ['DE']), :].iloc[0].deaths_m7
+```
+
+```python Collapsed="false"
+for cID in school_reopenings:
+    dst_in = data_since_threshold[data_since_threshold.dateRep == (school_reopenings[cID]['date'])].loc[(slice(None), [cID]), :]
+    dst_i = dst_in.first_valid_index()
+    dst_n = dst_in.iloc[0].deaths_m7
+    school_reopenings[cID]['since_threshold'] = dst_i[0]
+    school_reopenings[cID]['deaths_m7'] = dst_n
+school_reopenings
+```
+
+```python Collapsed="false"
+ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(15, 9), title="Deaths per day, 7 day moving average")
+# uk_projection.deaths_m7.plot(ax=ax)
+for c in COUNTRIES_CORE:
+    lvi = deaths_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = f"{c}: {deaths_m7[c][lvi]:.0f}")
+    if c in school_reopenings:
+        marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()
+        ax.plot(school_reopenings[c]['since_threshold'], school_reopenings[c]['deaths_m7'], '*', 
+                markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)
+        ax.text(x = school_reopenings[c]['since_threshold'] + 1, y = school_reopenings[c]['deaths_m7'], 
+                s = f"{school_reopenings[c]['date']}: {school_reopenings[c]['deaths_m7']:.0f}")
+plt.savefig('school_reopenings.png')
+```
+
+```python Collapsed="false"
+# ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(15, 9), title="Deaths per day, 7 day moving average",
+#                                    xlim=(46, 91), ylim=(0, 400))
+# # uk_projection.deaths_m7.plot(ax=ax)
+# for c in COUNTRIES_CORE:
+#     lvi = deaths_m7[c].last_valid_index()
+#     ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = f"{c}: {deaths_m7[c][lvi]:.0f}", fontsize=14)
+#     if c in school_reopenings:
+#         marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()
+#         ax.plot(school_reopenings[c]['since_threshold'], school_reopenings[c]['deaths_m7'], '*', 
+#                 markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)
+#         ax.text(x = school_reopenings[c]['since_threshold'] + 1, y = school_reopenings[c]['deaths_m7'], 
+#                 s = f"{school_reopenings[c]['date']}: {school_reopenings[c]['deaths_m7']:.0f}",
+#                 fontsize=14)
+# plt.savefig('school_reopenings_detail.png')
+```
+
+<!-- #region Collapsed="false" -->
+# Lockdown graphs
+<!-- #endregion -->
+
+```python Collapsed="false"
+lockdown_dates = {
+    'ES': { 'part_start': {'date': '2020-03-14'}
+          , 'full_start': {'date': '2020-03-15'}
+          , 'part_finish': {'date': '2020-05-18'}
+          },
+    'FR': { 'part_start': {'date': '2020-03-13'}
+          , 'full_start': {'date': '2020-03-17'}
+          , 'part_finish': {'date': '2020-05-11'}
+          },
+    'IT': { 'part_start': {'date': '2020-03-08'}
+          , 'full_start': {'date': '2020-03-10'}
+          , 'part_finish': {'date': '2020-05-04'}
+          },
+    'DE': { #'part_start': {'date': '2020-03-13'}
+          'full_start': {'date': '2020-03-22'}
+          , 'part_finish': {'date': '2020-05-06'}
+          },
+    'UK': { 'part_start': {'date': '2020-03-23'}
+          , 'full_start': {'date': '2020-03-23'}
+          , 'part_finish': {'date': '2020-05-31'}
+          },
+    'IE': { #'part_start': {'date': '2020-03-12'}
+          'full_start': {'date': '2020-03-27'}
+          , 'part_finish': {'date': '2020-05-18'}
+          },
+}
+```
+
+```python Collapsed="false"
+for cID in lockdown_dates:
+    for phase in lockdown_dates[cID]:
+        dst_in = data_since_threshold[data_since_threshold.dateRep == (lockdown_dates[cID][phase]['date'])].loc[(slice(None), [cID]), :]
+        dst_i = dst_in.first_valid_index()
+        dst_n = dst_in.iloc[0].deaths_m7
+        dst_c = dst_in.iloc[0].cases_m7
+        lockdown_dates[cID][phase]['since_threshold'] = dst_i[0]
+        lockdown_dates[cID][phase]['deaths_m7'] = dst_n
+        lockdown_dates[cID][phase]['cases_m7'] = dst_c
+
+lockdown_dates
+```
+
+```python Collapsed="false"
+ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(15, 9), title="Deaths per day, 7 day moving averagee, with lockdown dates")
+ax.set_xlabel(f"Days since {DEATH_COUNT_THRESHOLD} deaths")
+# uk_projection.deaths_m7.plot(ax=ax)
+for c in COUNTRIES_CORE:
+    lvi = deaths_m7[c].last_valid_index()
+    if c != 'UK':
+        ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = f"{c}: {deaths_m7[c][lvi]:.0f}")
+    if c in lockdown_dates:
+        for phase in lockdown_dates[c]:
+            marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()
+            ax.plot(lockdown_dates[c][phase]['since_threshold'], lockdown_dates[c][phase]['deaths_m7'], '*',
+                    markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)
+            if 'start' not in phase:
+                ax.text(x = lockdown_dates[c][phase]['since_threshold'] + 1, y = lockdown_dates[c][phase]['deaths_m7'], 
+                        s = f"{lockdown_dates[c][phase]['date']}: {lockdown_dates[c][phase]['deaths_m7']:.0f}")
+# plt.savefig('school_reopenings.png')
+```
+
+```python Collapsed="false"
+ax = cases_m7.iloc[-50:][COUNTRIES_CORE].plot(figsize=(15, 9), title="Cases per day, 7 day moving average, with lockdown dates") #, ylim=(-10, 1500))
+ax.set_xlabel(f"Days since {DEATH_COUNT_THRESHOLD} deaths")
+# uk_projection.deaths_m7.plot(ax=ax)
+for c in COUNTRIES_CORE:
+    lvi = cases_m7[c].last_valid_index()
+#     if c != 'UK':
+    ax.text(x = lvi + 1, y = cases_m7[c][lvi], s = f"{c}: {cases_m7[c][lvi]:.0f}")
+
+```
+
+```python Collapsed="false"
+ax = cases_m7[COUNTRIES_CORE].plot(figsize=(15, 9), title="Cases per day, 7 day moving average, with lockdown dates")
+ax.set_xlabel(f"Days since {DEATH_COUNT_THRESHOLD} deaths")
+# uk_projection.deaths_m7.plot(ax=ax)
+for c in COUNTRIES_CORE:
+    lvi = cases_m7[c].last_valid_index()
+#     if c != 'UK':
+    ax.text(x = lvi + 1, y = cases_m7[c][lvi], s = f"{c}: {cases_m7[c][lvi]:.0f}")
+    if c in lockdown_dates:
+        for phase in lockdown_dates[c]:
+            marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()
+            if 'start' in phase:
+                marker_shape = '^'
+            else:
+                marker_shape = 'v'
+            ax.plot(lockdown_dates[c][phase]['since_threshold'], lockdown_dates[c][phase]['cases_m7'], 
+                    marker_shape,
+                    markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)
+            if 'start' not in phase:
+                ax.text(x = lockdown_dates[c][phase]['since_threshold'] + 1, y = lockdown_dates[c][phase]['cases_m7'], 
+                        s = f"{lockdown_dates[c][phase]['date']}: {lockdown_dates[c][phase]['cases_m7']:.0f}")
+# plt.savefig('cases_per_day_with_lockdown.png')
+```
+
+```python Collapsed="false"
+plot_start_date = '2020-03-01'
+ax = cases_by_date_m7.loc[plot_start_date:, COUNTRIES_CORE].plot(figsize=(15, 9), title="Cases per day, 7 day moving average, with lockdown dates")
+ax.set_xlabel(f"Date")
+ax.set_ylabel("Number of cases")
+# uk_projection.deaths_m7.plot(ax=ax)
+for c in COUNTRIES_CORE:
+    lvi = cases_by_date_m7[c].last_valid_index()
+#     if c != 'UK':
+    ax.text(x = lvi + pd.Timedelta(days=1), y = cases_by_date_m7[c][lvi], s = f"{c}: {cases_by_date_m7[c][lvi]:.0f}")
+    if c in lockdown_dates:
+        for phase in lockdown_dates[c]:
+            marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()
+            if 'start' in phase:
+                marker_shape = '^'
+            else:
+                marker_shape = 'v'
+            marker_x_pos = ax.get_xlim()[0] + mpl.dates.date2num(pd.to_datetime(lockdown_dates[c][phase]['date'])) - mpl.dates.date2num(pd.to_datetime(plot_start_date))
+            ax.plot(marker_x_pos, lockdown_dates[c][phase]['cases_m7'], 
+                    marker_shape,
+                    markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)
+            if 'start' not in phase:
+                ax.text(x = marker_x_pos + 3, y = lockdown_dates[c][phase]['cases_m7'], 
+                        s = f"{lockdown_dates[c][phase]['date']}: {lockdown_dates[c][phase]['cases_m7']:.0f}")
+plt.savefig('cases_per_day_with_lockdown.png')
+```
+
+```python Collapsed="false"
+ax = cases_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Cases per day, 7 day moving average")
+for c in COUNTRIES_CORE:
+    lvi = cases_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = cases_m7[c][lvi], s = c)
+plt.savefig('covid_cases_per_day-core.png') 
+```
+
+```python Collapsed="false"
+ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(15, 9), title="Deaths per day, 7 day moving average",
+                                   xlim=(0, 15), 
+                                    ylim=(0, 66)
+                                   )
+# uk_projection.deaths_m7.plot(ax=ax)
+for c in COUNTRIES_CORE:
+    lvi = deaths_m7[c].last_valid_index()
+    if c in lockdown_dates:
+        for phase in lockdown_dates[c]:
+            if 'start' in phase:
+                print(c, phase)
+                marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()
+                ax.plot(lockdown_dates[c][phase]['since_threshold'], lockdown_dates[c][phase]['deaths_m7'], '*', 
+                        markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)
+                ax.text(x = lockdown_dates[c][phase]['since_threshold'] + 0.3, y = lockdown_dates[c][phase]['deaths_m7'], 
+                        s = f"{lockdown_dates[c][phase]['date']}: {lockdown_dates[c][phase]['deaths_m7']:.0f}")
+# plt.savefig('school_reopenings.png')
+```
+
+```python Collapsed="false"
+
+```
+
+```python Collapsed="false"
+
+```
+
+<!-- #region Collapsed="false" -->
+# Write results to summary file
+<!-- #endregion -->
+
+```python Collapsed="false"
+with open('covid_summary.md', 'w') as f:
+    f.write('% Covid death data summary\n')
+    f.write('% Neil Smith\n')
+    f.write(f'% Created on {datetime.datetime.now().strftime("%Y-%m-%d")}\n')
+    f.write('\n')
+        
+    last_uk_date = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), 'dateRep'].iloc[-1]
+    f.write(f'> Last UK data from {last_uk_date.strftime("%Y-%m-%d")}\n')
+    f.write('\n')    
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('## Headlines\n')
+    f.write('\n')
+    f.write('| []() | |\n')
+    f.write('|:---|---:|\n')
+    f.write(f'| Deaths reported so far | {uk_covid_deaths} | \n')
+    f.write(f'| Total Covid deaths to date (estimated) | {uk_deaths_to_date:.0f} |\n')
+    projection_date = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['IT']), 'dateRep'].iloc[-1] + pd.Timedelta(s_end - s_start, unit='days')
+#     f.write(f'| Projected total deaths up to {projection_date.strftime("%Y-%m-%d")} | {deaths_actual_projected_scaled:.0f} | \n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('## Total deaths\n')
+#     f.write(f'Time based on days since {DEATH_COUNT_THRESHOLD} deaths\n')
+    f.write('\n')
+    f.write('![Total deaths](covid_deaths_total_linear.png)\n')
+    f.write('\n')
+    f.write('| Country ID | Country name | Total deaths |\n')
+    f.write('|:-----------|:-------------|-------------:|\n')
+    for c in sorted(COUNTRIES_CORE):
+        lvi = deaths_by_date[c].last_valid_index()
+        f.write(f'| {c} | {countries.loc[c].countriesAndTerritories} | {int(deaths_by_date[c][lvi])} |\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('## All-causes deaths, UK\n')
+    f.write('\n')
+    f.write('![All-causes deaths](deaths-radar.png)\n')
+    f.write('\n')
+    f.write('### True deaths\n')
+    f.write('\n')
+    f.write(f'The number of deaths reported in official statistics, {uk_covid_deaths}, is an underestimate '
+            'of the true number of Covid deaths.\n'
+            'This is especially true early in the pandemic, approximately March to May 2020.\n')
+    f.write('We can get a better understanding of the impact of Covid by looking at the number of deaths, '
+            'over and above what would be expected at each week of the year.\n')
+    f.write(f'The ONS (and other bodies in Scotland and Northern Ireland) have released data on the number of deaths '
+            f'up to {pd.to_datetime(excess_deaths_data["end_date"]).strftime("%d %B %Y")}.\n\n')
+    f.write('If, for each of those weeks, I take the largest of the excess deaths or the reported Covid deaths, ')
+    f.write(f'I estimate there have been **{uk_deaths_to_date}** total deaths so far.\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+# with open('covid_summary.md', 'a') as f:
+#     f.write(f'In that period, the UK reported {ons_reported_deaths} Covid deaths.\n')
+#     f.write(f'In the last three weeks for which excess deaths have been reported, the excess deaths have been {excess_death_accuracy:.3f} higher than the Covid-reported deaths.\n')
+# #     f.write(f'That means the actual number of Covid death is about {excess_deaths / reported_deaths:.2f} times higher than the reported figures.\n')
+#     f.write('\n')
+#     f.write(f'The UK has reported {uk_covid_deaths} deaths so far.\n')
+#     f.write(f'Using the scaling factor above (for Covid-19 deaths after the ONS figures), I infer that there have been **{uk_deaths_to_date:.0f}** total deaths so far.\n')
+#     f.write('\n')
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('## Deaths per day\n')
+    f.write(f'Based on a 7-day moving average\n')
+    f.write('\n')
+    f.write('![Deaths per day](covid_deaths_per_day_7.png)\n')
+    f.write('\n')
+    f.write('![Deaths per day, last 30 days](deaths_by_date_last_30_days.png)\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+s_end - s_start - 1
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('## Projected deaths\n')
+    f.write("Previously, I was using Italy's deaths data to predict the UK's deaths data. "
+            "This worked when both countries' trends of deaths were falling or constant, "
+            "as they were until September.\n")
+    f.write("\n")
+    f.write("As of mid-September, with cases rising in both countries at around the same time, "
+            "I can't use Italian data to predict the UK's future deaths.\n")
+    f.write("\n")
+#     f.write(f"The UK's daily deaths data is very similar to Italy's.\n")
+#     f.write(f'If I use the Italian data for the next {s_end - s_start - 1} days (from {s_start_date.strftime("%d %B %Y")} onwards),')
+#     f.write(f' the UK will report {uk_projection.deaths.sum()} deaths on day {uk_end} of the epidemic.\n')
+#     f.write('\n')
+#     f.write('Using the excess deaths scaling from above, that will translate into ')
+#     f.write(f'**{deaths_actual_projected_scaled:.0f}** Covid deaths total.\n')
+#     f.write('\n')
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('## Deaths doubling times\n')
+    f.write(f'Based on a 7-day moving average\n')
+    f.write('\n')
+    f.write('![Deaths doubling times](covid_doubling_times_7.png)\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('\n')
+    f.write('## Cases per day and lockdown dates\n')
+    f.write(f'Based on a 7-day moving average\n')
+    f.write('\n')
+    f.write('![Cases per day](cases_per_day_with_lockdown.png)\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('| Country ID | Country name | Most recent daily cases | Most recent daily deaths |\n')
+    f.write('|:-----------|:-------------|------------------------:|-------------------------:|\n')
+    for c in sorted(COUNTRIES_CORE):
+        lvic = cases_m7[c].last_valid_index()
+        lvid = deaths_m7[c].last_valid_index()
+        f.write(f'| {c} | {countries.loc[c].countriesAndTerritories} | {cases_m7[c][lvic]:.0f} | {deaths_m7[c][lvid]:.0f} | \n')
+    f.write('\n')
+    f.write('(Figures are 7-day averages)\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+with open('hospital_normalisation_date.json') as f:
+    hospital_normalisation_date_data = json.load(f)
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('## Hospital care\n')
+    f.write(f'Based on a 7-day moving average\n')
+    f.write('\n')
+    f.write('![Cases, admissions, deaths](cases_admissions_deaths.png)\n')
+    f.write('\n')
+#     f.write('Admissions are shifted by 10 days, deaths by 25 days. '
+#             'This reflects the typical timescales of infection: '
+#             'patients are admitted 10 days after onset of symptoms, '
+#             'and die 15 days after admission.\n')
+#     f.write('\n')
+#     f.write('Plotting this data with offsets shows more clearly '
+#             'the relative changes in these three metrics.\n')
+    f.write('Due to the large scale differences between the three '
+            'measures, they are all normalised to show changes ')
+    f.write(f'since {pd.to_datetime(hospital_normalisation_date_data["hospital_normalisation_date"]).strftime("%d %B %Y")}.\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('## Testing effectiveness\n')
+    f.write('\n')
+    f.write('A question about testing is whether more detected cases is a result of more tests being '
+            'done or is because the number of cases is increasing. One way of telling the differeence '
+            'is by looking at the fraction of tests that are positive.\n')
+    f.write('\n')
+    f.write('![Positive tests and cases](tests_and_cases.png)\n')
+    f.write('\n')
+    f.write('Numbers of positive tests and cases, '
+            '7-day moving average.\n'
+            'Note the different y-axes\n')
+    f.write('\n')    
+    f.write('![Fraction of tests with positive result](fraction_positive_tests.png)\n')
+    f.write('\n')
+    f.write('Fraction of tests with a positive result, both daily figures and '
+            '7-day moving average.\n')
+    f.write('\n')    
+    f.write('\n')
+    f.write('![Tests against fraction positive, trajectory](fraction_positive_tests_vs_tests.png)\n')
+    f.write('\n')
+    f.write('The trajectory of tests done vs fraction positive tests.\n')
+    f.write('\n')
+    f.write('Points higher indicate more tests; points to the right indicate more positive tests.'
+            'More tests being done with the same infection prevelance will move the point up '
+            'and to the left.\n')
+    f.write('\n')
+    f.write('\n')
+    f.write('![Tests against fraction positive, trajectory](tests_vs_fraction_positive_animation.png)\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('# Data sources\n')
+    f.write('\n')
+    f.write('> Covid data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)\n')
+    f.write('\n')    
+    f.write("""> Population data from:
+
+* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
+* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
+* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.""")
+    
+    f.write('\n\n')
+    f.write('> [Source code available](https://git.njae.me.uk/?p=covid19.git;a=tree)\n')
+    f.write('\n') 
+
+```
+
+```python Collapsed="false"
+!pandoc --toc -s covid_summary.md > covid_summary.html
+```
+
+```python Collapsed="false"
+!scp covid_summary.html neil@ogedei:/var/www/scripts.njae.me.uk/covid/index.html
+!scp covid_deaths_total_linear.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp deaths-radar.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp covid_deaths_per_day_7.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp covid_doubling_times_7.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp cases_per_day_with_lockdown.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp cases_admissions_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp fraction_positive_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/    
+!scp tests_and_cases.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp deaths_by_date_last_30_days.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp fraction_positive_tests_vs_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp tests_vs_fraction_positive_animation.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/   
+```
+
+```python Collapsed="false"
+with open('uk_covid_deaths.js', 'w') as f:
+    f.write(f"document.write('{uk_covid_deaths}');")
+    
+with open('estimated_total_deaths.js', 'w') as f:
+    f.write(f"document.write('{uk_deaths_to_date:.0f}');")
+
+# with open('projection_date.js', 'w') as f:
+#     f.write(f"document.write(\'{projection_date.strftime('%d %B %Y')}\');")
+
+# with open('projected_deaths.js', 'w') as f:
+#     f.write(f"document.write('{uk_projection.deaths.sum():.0f}');")
+
+# with open('projected_excess_deaths.js', 'w') as f:
+#     f.write(f"document.write('{deaths_actual_projected_scaled:.0f}');")
+
+edut = pd.to_datetime(excess_deaths_data["end_date"]).strftime('%d %B %Y')
+with open('excess_deaths_upto.js', 'w') as f:
+    f.write(f"document.write('{edut}');")
+
+# with open('excess_deaths.js', 'w') as f:
+#     f.write(f"document.write('{excess_deaths:.0f}');")
+    
+# with open('reported_deaths.js', 'w') as f:
+#     f.write(f"document.write('{ons_reported_deaths:.0f}');")
+    
+# with open('scaling_factor.js', 'w') as f:
+#     f.write(f"document.write('{excess_death_accuracy:.2f}');")  
+
+# with open('projection_length.js', 'w') as f:
+#     f.write(f"document.write('{s_end - s_start - 1}');")
+    
+# with open('s_end.js', 'w') as f:
+#     f.write(f"document.write('{s_end}');")
+    
+# s_start_date_str = s_start_date.strftime("%d %B %Y")
+# with open('s_start_date.js', 'w') as f:
+#     f.write(f"document.write('{s_start_date_str}');")
+    
+# with open('uk_end.js', 'w') as f:
+#     f.write(f"document.write('{uk_end}');")
+    
+with open('last_uk_date.js', 'w') as f:
+    f.write(f"document.write('{pd.to_datetime(last_uk_date).strftime('%d %B %Y')}');")
+```
+
+```python Collapsed="false"
+# pd.to_datetime(excess_deaths_upto).strftime('%d %B %Y')
+```
+
+```python Collapsed="false"
+!scp uk_covid_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp estimated_total_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp projection_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp projected_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp projected_excess_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp excess_deaths_upto.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp excess_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp reported_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp scaling_factor.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp projection_length.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp s_end.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp s_start_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp uk_end.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp last_uk_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp hospital_normalisation_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+```
+
+```python Collapsed="false"
+data_by_date.loc['UK'].to_csv('data_by_day_uk.csv', header=True, index=True)
+data_by_date.loc['BE'].to_csv('data_by_day_be.csv', header=True, index=True)
+```
+
+```python Collapsed="false"
+ukd = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), ['deaths', 'deaths_m7']].droplevel(1)
+ax = ukd.deaths.plot.bar(figsize=(12, 8))
+ukd.deaths_m7.plot.line(ax=ax, color='red')
+# ax = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), 'deaths_m7'].plot.line(figsize=(12, 8), color='red')
+# ax = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), 'deaths'].plot.bar(ax=ax)
+ax.set_xticks(range(0, 120, 20))
+```
+
+```python Collapsed="false"
+ukdd = data_by_date.loc['UK'].iloc[-30:]
+ax = ukdd.deaths_m7.plot.line(figsize=(12, 8), color='red')
+# ukdd.deaths.plot.bar(ax=ax)
+ax.bar(ukdd.index, ukdd.deaths)
+```
+
+```python Collapsed="false"
+ukdd
+```
+
+```python Collapsed="false"
+np.arange(0, 130, 20)
+```
+
+```python Collapsed="false"
+data_by_date.loc['UK']
+```
+
+```python Collapsed="false"
+data_by_date.loc['UK'].plot(x='deaths_culm', y='deaths', logx=True, logy=True)
+```
+
+```python Collapsed="false"
+data_by_date.loc['UK'].plot(x='cases_culm', y='cases')
+```
+
+```python Collapsed="false"
+ukdbd = data_by_date.loc['UK'].copy()
+ukdbd['deaths_m7'] = ukdbd.deaths.transform(lambda x: x.rolling(7, 1).mean())
+ukdbd['cases_m7'] = ukdbd.cases.transform(lambda x: x.rolling(7, 1).mean())
+ukdbd
+```
+
+```python Collapsed="false"
+ukdbd.plot(x='deaths_culm', y='deaths_m7', logx=True, logy=True)
+```
+
+```python Collapsed="false"
+fig, ax = plt.subplots(figsize=(12, 8))
+xmax = 10
+for c in COUNTRIES_CORE:
+    if data_since_threshold.loc[(slice(None), c), 'deaths_culm'].max() > xmax:
+        xmax = data_since_threshold.loc[(slice(None), c), 'deaths_culm'].max()
+    data_since_threshold.loc[(slice(None), c), :].plot(x='deaths_culm', y='deaths_m7', logx=True, logy=True, xlim=(10, xmax * 1.1), label=c, ax=ax)
+```
+
+```python Collapsed="false"
+data_since_threshold.loc[(slice(None), 'UK'), 'deaths_culm'].max()
+```
+
+```python Collapsed="false"
+countries.continentExp.unique()
+```
+
+```python Collapsed="false"
+countries.loc['KW']
+```
+
+```python Collapsed="false"
+data_by_date.groupby(level=0)['deaths'].shift(-25)
+```
+
+```python Collapsed="false"
+offset_data = data_by_date.loc[:, ['cases']]
+offset_data['deaths'] = data_by_date.groupby(level=0)['deaths'].shift(-25)
+offset_data['cases_m7'] = offset_data.groupby(level=0)['cases'].transform(lambda x: x.rolling(7, 1).mean())
+offset_data['deaths_m7'] = offset_data['deaths'].dropna().groupby(level=0).transform(lambda x: x.rolling(7, 1).mean())
+offset_data['deaths_per_case'] = offset_data.deaths_m7 / offset_data.cases_m7
+offset_data
+```
+
+```python Collapsed="false"
+deaths_m7
+```
+
+```python Collapsed="false"
+offset_deaths_m7 = (offset_data.loc[COUNTRIES_ALL, ['deaths_m7']]
+             .unstack().sort_index().xs('deaths_m7', axis=1, drop_level=True)).T.sort_index()
+offset_deaths_m7
+```
+
+```python Collapsed="false"
+offset_deaths_m7['UK']
+```
+
+```python Collapsed="false"
+data_since_threshold.loc[(slice(None), 'UK'), :].tail()
+```
+
+```python Collapsed="false"
+countries.loc['PT']
+```
+
+```python Collapsed="false"
+ax = cases_by_date_m7.iloc[-50:][COUNTRIES_FRIENDS].plot(figsize=(15, 9), title="Cases per day, 7 day moving average")#, ylim=(-10, 1500))
+# ax.set_xlabel(f"Days since {DEATH_COUNT_THRESHOLD} deaths")
+# uk_projection.deaths_m7.plot(ax=ax)
+for c in COUNTRIES_FRIENDS:
+    lvi = cases_by_date_m7[c].last_valid_index()
+    ax.text(x = lvi + pd.Timedelta(days=1), y = cases_by_date_m7[c][lvi], s = f"{c}: {cases_by_date_m7[c][lvi]:.0f}")
+
+```
+
+```python Collapsed="false"
+ax = deaths_by_date_m7.iloc[-50:][COUNTRIES_FRIENDS].plot(figsize=(15, 9), title="Deaths per day, 7 day moving average")#, ylim=(-10, 100))
+# ax.set_xlabel(f"Days since {DEATH_COUNT_THRESHOLD} deaths")
+# uk_projection.deaths_m7.plot(ax=ax)
+for c in COUNTRIES_FRIENDS:
+    lvi = deaths_by_date_m7[c].last_valid_index()
+#     if c != 'ES':
+    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_by_date_m7[c][lvi], s = f"{c}: {deaths_by_date_m7[c][lvi]:.0f}")
+```
+
+```python Collapsed="false"
+
+```