Collecting various updates

[covid19.git] / uk_deaths.md
diff --git a/uk_deaths.md b/uk_deaths.md

index fc81099b48100abe68678537b9fe45005e66aaf4..4dc06c22db6369527160e729d9b6c92ed59f6e2d 100644 (file)
--- a/uk_deaths.md
+++ b/uk_deaths.md
@@ -23,6 +23,7 @@ Data from:
  ```python
  import itertools
  import collections
+import json
  import pandas as pd
  import numpy as np
  from scipy.stats import gmean
@@ -132,26 +133,52 @@ deaths_headlines_s
  ```
  
  ```python
+eng_xls = pd.read_excel('uk-deaths-data/publishedweek312020.xlsx', 
+                        sheet_name="Weekly figures 2020",
+                        skiprows=[0, 1, 2, 3],
+                        header=0,
+                        index_col=[1]
+                       ).iloc[:91].T
+eng_xls
+```
  
+```python
+# eng_xls_columns
  ```
  
  ```python
+eng_xls_columns = list(eng_xls.columns)
+
+for i, c in enumerate(eng_xls_columns):
+#     print(i, c, type(c), isinstance(c, float))
+    if isinstance(c, float) and np.isnan(c):
+        if eng_xls.iloc[0].iloc[i] is not pd.NaT:
+            eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
+
+# np.isnan(eng_xls_columns[0])
+# eng_xls_columns
  
+eng_xls.columns = eng_xls_columns
+# eng_xls.columns
  ```
  
  ```python
-
+eng_xls['Total deaths, all ages']
  ```
  
  ```python
+eng_xls['Wales'].iloc[1:]
+```
  
+```python
+# raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv', 
+#                        parse_dates=[1], dayfirst=True,
+#                       index_col=0,
+#                       header=[0, 1])
  ```
  
  ```python
-raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek182020.csv', 
-                       parse_dates=[1], dayfirst=True,
-                      index_col=0,
-                      header=[0, 1])
+
  ```
  
  ```python
@@ -159,7 +186,7 @@ raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek182020.csv',
  ```
  
  ```python
-raw_data_2020['W92000004', 'Wales']
+# raw_data_2020['W92000004', 'Wales']
  ```
  
  ```python
@@ -203,14 +230,21 @@ raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
  ```
  
  ```python
-deaths_headlines_e = raw_data_2020.iloc[:, [1]]
-deaths_headlines_e.columns = ['total_2020']
-deaths_headlines_w = raw_data_2020['W92000004']
-deaths_headlines_e.columns = ['total_2020']
-deaths_headlines_w.columns = ['total_2020']
-deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
-deaths_headlines_e.head()
-deaths_headlines_e
+dhw = eng_xls['Wales'].iloc[1:]
+dhe = eng_xls['Total deaths, all ages'].iloc[1:] - dhw
+deaths_headlines_e = pd.DataFrame({'total_2020': dhe.dropna()})
+deaths_headlines_w = pd.DataFrame({'total_2020': dhw.dropna()})
+```
+
+```python
+# deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy()
+# deaths_headlines_e.columns = ['total_2020']
+# deaths_headlines_w = raw_data_2020['W92000004'].copy()
+# deaths_headlines_e.columns = ['total_2020']
+# deaths_headlines_w.columns = ['total_2020']
+# deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
+# deaths_headlines_e.head()
+# deaths_headlines_e
  ```
  
  ```python
@@ -219,7 +253,7 @@ dh19w = raw_data_2019['W92000004']
  dh19e.columns = ['total_2019']
  dh19w.columns = ['total_2019']
  dh19e.total_2019 -= dh19w.total_2019
-dh19e.head()
+dh19e.tail()
  ```
  
  ```python
@@ -283,9 +317,17 @@ deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split()
  deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
  deaths_headlines_s.reset_index(drop=True, inplace=True)
  deaths_headlines_s.index = deaths_headlines_s.index + 1
+deaths_headlines_s = deaths_headlines_s.loc[1:52]
  deaths_headlines_s
  ```
  
+# Correction for missing data
+
+```python
+# deaths_headlines_s.loc[20, 'total_2020'] = 1000
+# deaths_headlines_s
+```
+
  ```python
  deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True)
  deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True)
@@ -311,15 +353,29 @@ deaths_headlines
  ```
  
  ```python
+deaths_headlines_e['previous_mean'] = deaths_headlines_e['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
+deaths_headlines_w['previous_mean'] = deaths_headlines_w['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
+deaths_headlines_s['previous_mean'] = deaths_headlines_s['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
+deaths_headlines_i['previous_mean'] = deaths_headlines_i['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
  deaths_headlines['previous_mean'] = deaths_headlines['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
  deaths_headlines
  ```
  
  ```python
-deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(10, 8))
+deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(14, 8))
+```
+
+```python
+deaths_headlines[['total_2020', 'previous_mean']].plot(figsize=(10, 8))
+```
+
+```python
+deaths_headlines_i.plot()
  ```
  
  ```python
+# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
+
  fig = plt.figure(figsize=(10, 10))
  ax = fig.add_subplot(111, projection="polar")
  
@@ -361,10 +417,279 @@ plt.savefig('deaths-radar.png')
  plt.show()
  ```
  
+# Excess deaths calculation
+
+```python
+# raw_data_2020.loc[12, 'Week ended']
+```
+
+```python
+eng_xls.loc[12, 'Week ended']
+```
+
+```python
+# raw_data_2020.iloc[-1]['Week ended']
+```
+
+```python
+deaths_headlines_e.total_2020.dropna().last_valid_index()
+```
+
+```python
+eng_xls.loc[deaths_headlines_e.total_2020.dropna().last_valid_index(), 'Week ended']
+```
+
+```python
+eng_xls.loc[27, 'Week ended']
+```
+
+```python
+# raw_data_2020.loc[12].droplevel(1)['Week ended']
+```
+
+```python
+# raw_data_2020.iloc[-1].droplevel(1)['Week ended']
+```
+
  ```python
  (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
  ```
  
+```python
+(deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum()
+```
+
+```python
+deaths_headlines.previous_mean.sum()
+```
+
+```python
+# excess_death_data = {
+#     'start_date': str(eng_xls.loc[12, 'Week ended']),
+#     'end_date': str(eng_xls.loc[deaths_headlines_e.total_2020.dropna().last_valid_index(), 'Week ended']),
+#     'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
+# }
+
+# with open('excess_deaths.json', 'w') as f:
+#     json.dump(excess_death_data, f)
+```
+
+```python
+excess_death_data = {
+    'start_date': str(eng_xls.loc[12, 'Week ended']),
+    'end_date': str(eng_xls.loc[27, 'Week ended']),
+    'excess_deaths': (deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum()
+}
+
+with open('excess_deaths.json', 'w') as f:
+    json.dump(excess_death_data, f)
+```
+
+```python
+# excess_death_data = {
+#     'start_date': str(raw_data_2020.loc[12].droplevel(1)['Week ended']),
+#     'end_date': str(raw_data_2020.iloc[-1].droplevel(1)['Week ended']),
+#     'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
+# }
+
+# with open('excess_deaths.json', 'w') as f:
+#     json.dump(excess_death_data, f)
+```
+
+```python
+eng_xls['Week ended']
+```
+
+```python
+# raw_data_2020.droplevel(1, axis='columns')['Week ended']
+```
+
+```python
+deaths_by_week = deaths_headlines.merge(eng_xls['Week ended'], left_index=True, right_index=True)
+deaths_by_week.rename(columns={'Week ended': 'week_ended'}, inplace=True)
+deaths_by_week.to_csv('deaths_by_week.csv', header=True, index=False)
+```
+
+```python
+# deaths_by_week = deaths_headlines.merge(raw_data_2020.droplevel(1, axis='columns')['Week ended'], left_index=True, right_index=True)
+# deaths_by_week.rename(columns={'Week ended': 'week_ended'}, inplace=True)
+# deaths_by_week.to_csv('deaths_by_week.csv', header=True, index=False)
+```
+
+# Plots for UK nations
+
+```python
+# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
+
+fig = plt.figure(figsize=(10, 10))
+ax = fig.add_subplot(111, projection="polar")
+
+theta = np.roll(
+    np.flip(
+        np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
+    14)
+l15, = ax.plot(theta, deaths_headlines_e['total_2015'], color="#e47d7d", label="2015") # 0
+l16, = ax.plot(theta, deaths_headlines_e['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
+l17, = ax.plot(theta, deaths_headlines_e['total_2017'], color="#7de4a6", label="2017") # 144
+l18, = ax.plot(theta, deaths_headlines_e['total_2018'], color="#7da6e4", label="2018") # 216
+l19, = ax.plot(theta, deaths_headlines_e['total_2019'], color="#d07de4", label="2019") # 288
+
+lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
+
+l20, = ax.plot(theta, deaths_headlines_e['total_2020'], color="red", label="2020")
+
+# deaths_headlines.total_2019.plot(ax=ax)
+
+def _closeline(line):
+    x, y = line.get_data()
+    x = np.concatenate((x, [x[0]]))
+    y = np.concatenate((y, [y[0]]))
+    line.set_data(x, y)
+
+[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
+
+
+ax.set_xticks(theta)
+ax.set_xticklabels(deaths_headlines_e.index)
+plt.legend()
+plt.title("Deaths by week over years, England")
+plt.savefig('deaths-radar_england.png')
+plt.show()
+```
+
+```python
+# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
+
+fig = plt.figure(figsize=(10, 10))
+ax = fig.add_subplot(111, projection="polar")
+
+theta = np.roll(
+    np.flip(
+        np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
+    14)
+l15, = ax.plot(theta, deaths_headlines_w['total_2015'], color="#e47d7d", label="2015") # 0
+l16, = ax.plot(theta, deaths_headlines_w['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
+l17, = ax.plot(theta, deaths_headlines_w['total_2017'], color="#7de4a6", label="2017") # 144
+l18, = ax.plot(theta, deaths_headlines_w['total_2018'], color="#7da6e4", label="2018") # 216
+l19, = ax.plot(theta, deaths_headlines_w['total_2019'], color="#d07de4", label="2019") # 288
+
+lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
+
+l20, = ax.plot(theta, deaths_headlines_w['total_2020'], color="red", label="2020")
+
+
+def _closeline(line):
+    x, y = line.get_data()
+    x = np.concatenate((x, [x[0]]))
+    y = np.concatenate((y, [y[0]]))
+    line.set_data(x, y)
+
+[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
+
+
+ax.set_xticks(theta)
+ax.set_xticklabels(deaths_headlines_w.index)
+plt.legend()
+plt.title("Deaths by week over years, Wales")
+plt.savefig('deaths-radar_wales.png')
+plt.show()
+```
+
+```python
+# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
+
+fig = plt.figure(figsize=(10, 10))
+ax = fig.add_subplot(111, projection="polar")
+
+theta = np.roll(
+    np.flip(
+        np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
+    14)
+l15, = ax.plot(theta, deaths_headlines_s['total_2015'], color="#e47d7d", label="2015") # 0
+l16, = ax.plot(theta, deaths_headlines_s['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
+l17, = ax.plot(theta, deaths_headlines_s['total_2017'], color="#7de4a6", label="2017") # 144
+l18, = ax.plot(theta, deaths_headlines_s['total_2018'], color="#7da6e4", label="2018") # 216
+l19, = ax.plot(theta, deaths_headlines_s['total_2019'], color="#d07de4", label="2019") # 288
+
+lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
+
+l20, = ax.plot(theta, deaths_headlines_s['total_2020'], color="red", label="2020")
+
+
+def _closeline(line):
+    x, y = line.get_data()
+    x = np.concatenate((x, [x[0]]))
+    y = np.concatenate((y, [y[0]]))
+    line.set_data(x, y)
+
+[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
+
+
+ax.set_xticks(theta)
+ax.set_xticklabels(deaths_headlines_s.index)
+plt.legend()
+plt.title("Deaths by week over years, Scotland")
+plt.savefig('deaths-radar_scotland.png')
+plt.show()
+```
+
+```python
+# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
+
+fig = plt.figure(figsize=(10, 10))
+ax = fig.add_subplot(111, projection="polar")
+
+theta = np.roll(
+    np.flip(
+        np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
+    14)
+l15, = ax.plot(theta, deaths_headlines_i['total_2015'], color="#e47d7d", label="2015") # 0
+l16, = ax.plot(theta, deaths_headlines_i['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
+l17, = ax.plot(theta, deaths_headlines_i['total_2017'], color="#7de4a6", label="2017") # 144
+l18, = ax.plot(theta, deaths_headlines_i['total_2018'], color="#7da6e4", label="2018") # 216
+l19, = ax.plot(theta, deaths_headlines_i['total_2019'], color="#d07de4", label="2019") # 288
+
+lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
+
+l20, = ax.plot(theta, deaths_headlines_i['total_2020'], color="red", label="2020")
+
+
+def _closeline(line):
+    x, y = line.get_data()
+    x = np.concatenate((x, [x[0]]))
+    y = np.concatenate((y, [y[0]]))
+    line.set_data(x, y)
+
+[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
+
+
+ax.set_xticks(theta)
+ax.set_xticklabels(deaths_headlines_i.index)
+plt.legend()
+plt.title("Deaths by week over years, Northern Ireland")
+plt.savefig('deaths-radar_northern_ireland.png')
+plt.show()
+```
+
+```python
+# list(raw_data_2020.columns)
+```
+
+```python
+# deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy()
+# deaths_headlines_e.columns = ['total_2020']
+# deaths_headlines_w = raw_data_2020['W92000004'].copy()
+# deaths_headlines_e.columns = ['total_2020']
+# deaths_headlines_w.columns = ['total_2020']
+# deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
+# deaths_headlines_e.head()
+# deaths_headlines_e
+```
+
+```python
+
+```
+
  ```python
  
  ```