General updates
[covid19.git] / uk_deaths.md
diff --git a/uk_deaths.md b/uk_deaths.md
deleted file mode 100644 (file)
index da577be..0000000
+++ /dev/null
@@ -1,536 +0,0 @@
----
-jupyter:
-  jupytext:
-    formats: ipynb,md
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.2'
-      jupytext_version: 1.3.4
-  kernelspec:
-    display_name: Python 3
-    language: python
-    name: python3
----
-
-Data from:
-
-* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
-* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
-* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
-
-
-```python
-import itertools
-import collections
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-```
-
-```python
-!ls uk-deaths-data
-```
-
-```python
-raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv', 
-                       parse_dates=[1, 2], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1]
-                           )
-dh15i = raw_data_2015.iloc[:, [2]]
-dh15i.columns = ['total_2015']
-# dh15i.head()
-```
-
-```python
-raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1]
-                           )
-dh16i = raw_data_2016.iloc[:, [2]]
-dh16i.columns = ['total_2016']
-# dh16i.head()
-```
-
-```python
-raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1]
-                           )
-dh17i = raw_data_2017.iloc[:, [2]]
-dh17i.columns = ['total_2017']
-# dh17i.head()
-```
-
-```python
-raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1]
-                           )
-dh18i = raw_data_2018.iloc[:, [2]]
-dh18i.columns = ['total_2018']
-# dh18i.head()
-```
-
-```python
-raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1]
-                           )
-dh19i = raw_data_2019.iloc[:, [2]]
-dh19i.columns = ['total_2019']
-# dh19i.head()
-```
-
-```python
-raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv', 
-                        parse_dates=[1], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1]
-                           )
-deaths_headlines_i = raw_data_2020_i.iloc[:, [1]]
-deaths_headlines_i.columns = ['total_2020']
-deaths_headlines_i.head()
-```
-
-```python
-
-```
-
-```python
-
-```
-
-```python
-raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-april-20-scotland.csv', 
-                      index_col=0,
-                      header=0,
-                        skiprows=2
-                           )
-# raw_data_s
-```
-
-```python
-deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
-deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
-deaths_headlines_s.reset_index(drop=True, inplace=True)
-deaths_headlines_s.index = deaths_headlines_s.index + 1
-deaths_headlines_s
-```
-
-```python
-
-```
-
-```python
-
-```
-
-```python
-
-```
-
-```python
-
-```
-
-```python
-
-```
-
-```python
-raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek182020.csv', 
-                       parse_dates=[1], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1])
-```
-
-```python
-# raw_data_2020.head()
-```
-
-```python
-raw_data_2020['W92000004', 'Wales']
-```
-
-```python
-raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv', 
-                       parse_dates=[1], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1])
-# raw_data_2019.head()
-```
-
-```python
-raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv', 
-                       parse_dates=[1], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1])
-# raw_data_2018.head()
-```
-
-```python
-raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv', 
-                       parse_dates=[1], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1])
-# raw_data_2017.head()
-```
-
-```python
-raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv', 
-                       parse_dates=[1], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1])
-# raw_data_2016.head()
-```
-
-```python
-raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv', 
-                       parse_dates=[1], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1])
-# raw_data_2015.head()
-```
-
-```python
-deaths_headlines_e = raw_data_2020.iloc[:, [1]]
-deaths_headlines_e.columns = ['total_2020']
-deaths_headlines_w = raw_data_2020['W92000004']
-deaths_headlines_e.columns = ['total_2020']
-deaths_headlines_w.columns = ['total_2020']
-deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
-deaths_headlines_e.head()
-deaths_headlines_e
-```
-
-```python
-dh19e = raw_data_2019.iloc[:, [1]]
-dh19w = raw_data_2019['W92000004']
-dh19e.columns = ['total_2019']
-dh19w.columns = ['total_2019']
-dh19e.total_2019 -= dh19w.total_2019
-dh19e.head()
-```
-
-```python
-dh19w.head()
-```
-
-```python
-dh18e = raw_data_2018.iloc[:, [1]]
-dh18w = raw_data_2018['W92000004']
-dh18e.columns = ['total_2018']
-dh18w.columns = ['total_2018']
-dh18e.total_2018 -= dh18w.total_2018
-# dh18e.head()
-```
-
-```python
-dh17e = raw_data_2017.iloc[:, [1]]
-dh17w = raw_data_2017['W92000004']
-dh17e.columns = ['total_2017']
-dh17w.columns = ['total_2017']
-dh17e.total_2017 -= dh17w.total_2017
-# dh17e.head()
-```
-
-```python
-dh16e = raw_data_2016.iloc[:, [1]]
-dh16w = raw_data_2016['W92000004']
-dh16e.columns = ['total_2016']
-dh16w.columns = ['total_2016']
-dh16e.total_2016 -= dh16w.total_2016
-# dh16e.head()
-```
-
-```python
-dh15e = raw_data_2015.iloc[:, [1]]
-dh15w = raw_data_2015['W92000004']
-dh15e.columns = ['total_2015']
-dh15w.columns = ['total_2015']
-dh15e.total_2015 -= dh15w.total_2015
-# dh15e.head()
-```
-
-```python
-# dh18 = raw_data_2018.iloc[:, [1, 2]]
-# dh18.columns = ['total_2018', 'total_previous']
-# # dh18.head()
-```
-
-```python
-deaths_headlines_e = deaths_headlines_e.merge(dh19e['total_2019'], how='outer', left_index=True, right_index=True)
-deaths_headlines_e = deaths_headlines_e.merge(dh18e['total_2018'], how='outer', left_index=True, right_index=True)
-deaths_headlines_e = deaths_headlines_e.merge(dh17e['total_2017'], how='outer', left_index=True, right_index=True)
-deaths_headlines_e = deaths_headlines_e.merge(dh16e['total_2016'], how='outer', left_index=True, right_index=True)
-# deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
-deaths_headlines_e = deaths_headlines_e.merge(dh15e['total_2015'], how='left', left_index=True, right_index=True)
-deaths_headlines_e
-```
-
-```python
-deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
-deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
-deaths_headlines_s.reset_index(drop=True, inplace=True)
-deaths_headlines_s.index = deaths_headlines_s.index + 1
-deaths_headlines_s = deaths_headlines_s.loc[1:52]
-deaths_headlines_s
-```
-
-```python
-deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True)
-deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True)
-deaths_headlines_w = deaths_headlines_w.merge(dh17w['total_2017'], how='outer', left_index=True, right_index=True)
-deaths_headlines_w = deaths_headlines_w.merge(dh16w['total_2016'], how='outer', left_index=True, right_index=True)
-# deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
-deaths_headlines_w = deaths_headlines_w.merge(dh15w['total_2015'], how='left', left_index=True, right_index=True)
-deaths_headlines_w
-```
-
-```python
-deaths_headlines_i = deaths_headlines_i.merge(dh19i['total_2019'], how='outer', left_index=True, right_index=True)
-deaths_headlines_i = deaths_headlines_i.merge(dh18i['total_2018'], how='outer', left_index=True, right_index=True)
-deaths_headlines_i = deaths_headlines_i.merge(dh17i['total_2017'], how='outer', left_index=True, right_index=True)
-deaths_headlines_i = deaths_headlines_i.merge(dh16i['total_2016'], how='outer', left_index=True, right_index=True)
-deaths_headlines_i = deaths_headlines_i.merge(dh15i['total_2015'], how='left', left_index=True, right_index=True)
-deaths_headlines_i
-```
-
-```python
-deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
-deaths_headlines
-```
-
-```python
-deaths_headlines_e['previous_mean'] = deaths_headlines_e['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
-deaths_headlines_w['previous_mean'] = deaths_headlines_w['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
-deaths_headlines_s['previous_mean'] = deaths_headlines_s['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
-deaths_headlines_i['previous_mean'] = deaths_headlines_i['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
-deaths_headlines['previous_mean'] = deaths_headlines['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
-deaths_headlines
-```
-
-```python
-deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(10, 8))
-```
-
-```python
-deaths_headlines_i.plot()
-```
-
-```python
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines))/float(len(deaths_headlines))*2.*np.pi),
-    14)
-# l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
-# l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
-# l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
-# l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
-# l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
-l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines['total_2020'], color="red", label="2020")
-
-# deaths_headlines.total_2019.plot(ax=ax)
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines.index)
-plt.legend()
-plt.title("Deaths by week over years, all UK")
-plt.savefig('deaths-radar.png')
-plt.show()
-```
-
-# Excess deaths calculation
-
-```python
-(deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
-```
-
-```python
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_e['total_2015'], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_e['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_e['total_2017'], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_e['total_2018'], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_e['total_2019'], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_e['total_2020'], color="red", label="2020")
-
-# deaths_headlines.total_2019.plot(ax=ax)
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_e.index)
-plt.legend()
-plt.title("Deaths by week over years, England")
-plt.savefig('deaths-radar_england.png')
-plt.show()
-```
-
-```python
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_w['total_2015'], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_w['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_w['total_2017'], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_w['total_2018'], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_w['total_2019'], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_w['total_2020'], color="red", label="2020")
-
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_w.index)
-plt.legend()
-plt.title("Deaths by week over years, Wales")
-plt.savefig('deaths-radar_wales.png')
-plt.show()
-```
-
-```python
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_s['total_2015'], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_s['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_s['total_2017'], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_s['total_2018'], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_s['total_2019'], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_s['total_2020'], color="red", label="2020")
-
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_s.index)
-plt.legend()
-plt.title("Deaths by week over years, Scotland")
-plt.savefig('deaths-radar_scotland.png')
-plt.show()
-```
-
-```python
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_i['total_2015'], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_i['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_i['total_2017'], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_i['total_2018'], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_i['total_2019'], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_i['total_2020'], color="red", label="2020")
-
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_i.index)
-plt.legend()
-plt.title("Deaths by week over years, Northern Ireland")
-plt.savefig('deaths-radar_northern_ireland.png')
-plt.show()
-```
-
-```python
-
-```