9 jupytext_version: 1.3.4
11 display_name: Python 3
18 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
19 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
20 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
29 from scipy.stats import gmean
31 import matplotlib as mpl
32 import matplotlib.pyplot as plt
41 raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv',
42 parse_dates=[1, 2], dayfirst=True,
46 dh15i = raw_data_2015.iloc[:, [2]]
47 dh15i.columns = ['total_2015']
52 raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv',
53 parse_dates=[1, 2], dayfirst=True,
57 dh16i = raw_data_2016.iloc[:, [2]]
58 dh16i.columns = ['total_2016']
63 raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv',
64 parse_dates=[1, 2], dayfirst=True,
68 dh17i = raw_data_2017.iloc[:, [2]]
69 dh17i.columns = ['total_2017']
74 raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv',
75 parse_dates=[1, 2], dayfirst=True,
79 dh18i = raw_data_2018.iloc[:, [2]]
80 dh18i.columns = ['total_2018']
85 raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv',
86 parse_dates=[1, 2], dayfirst=True,
90 dh19i = raw_data_2019.iloc[:, [2]]
91 dh19i.columns = ['total_2019']
96 raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek192020.csv',
97 parse_dates=[1], dayfirst=True,
103 raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
104 parse_dates=[1], dayfirst=True,
108 deaths_headlines_i = raw_data_2020_i.iloc[:, [1]]
109 deaths_headlines_i.columns = ['total_2020']
110 deaths_headlines_i.head()
122 raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-april-20-scotland.csv',
131 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
132 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
133 deaths_headlines_s.reset_index(drop=True, inplace=True)
134 deaths_headlines_s.index = deaths_headlines_s.index + 1
159 # raw_data_2020.head()
163 raw_data_2020['W92000004', 'Wales']
167 raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv',
168 parse_dates=[1], dayfirst=True,
171 # raw_data_2019.head()
175 raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv',
176 parse_dates=[1], dayfirst=True,
179 # raw_data_2018.head()
183 raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv',
184 parse_dates=[1], dayfirst=True,
187 # raw_data_2017.head()
191 raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv',
192 parse_dates=[1], dayfirst=True,
195 # raw_data_2016.head()
199 raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
200 parse_dates=[1], dayfirst=True,
203 # raw_data_2015.head()
207 deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy()
208 deaths_headlines_e.columns = ['total_2020']
209 deaths_headlines_w = raw_data_2020['W92000004'].copy()
210 deaths_headlines_e.columns = ['total_2020']
211 deaths_headlines_w.columns = ['total_2020']
212 deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
213 deaths_headlines_e.head()
218 dh19e = raw_data_2019.iloc[:, [1]]
219 dh19w = raw_data_2019['W92000004']
220 dh19e.columns = ['total_2019']
221 dh19w.columns = ['total_2019']
222 dh19e.total_2019 -= dh19w.total_2019
231 dh18e = raw_data_2018.iloc[:, [1]]
232 dh18w = raw_data_2018['W92000004']
233 dh18e.columns = ['total_2018']
234 dh18w.columns = ['total_2018']
235 dh18e.total_2018 -= dh18w.total_2018
240 dh17e = raw_data_2017.iloc[:, [1]]
241 dh17w = raw_data_2017['W92000004']
242 dh17e.columns = ['total_2017']
243 dh17w.columns = ['total_2017']
244 dh17e.total_2017 -= dh17w.total_2017
249 dh16e = raw_data_2016.iloc[:, [1]]
250 dh16w = raw_data_2016['W92000004']
251 dh16e.columns = ['total_2016']
252 dh16w.columns = ['total_2016']
253 dh16e.total_2016 -= dh16w.total_2016
258 dh15e = raw_data_2015.iloc[:, [1]]
259 dh15w = raw_data_2015['W92000004']
260 dh15e.columns = ['total_2015']
261 dh15w.columns = ['total_2015']
262 dh15e.total_2015 -= dh15w.total_2015
267 # dh18 = raw_data_2018.iloc[:, [1, 2]]
268 # dh18.columns = ['total_2018', 'total_previous']
273 deaths_headlines_e = deaths_headlines_e.merge(dh19e['total_2019'], how='outer', left_index=True, right_index=True)
274 deaths_headlines_e = deaths_headlines_e.merge(dh18e['total_2018'], how='outer', left_index=True, right_index=True)
275 deaths_headlines_e = deaths_headlines_e.merge(dh17e['total_2017'], how='outer', left_index=True, right_index=True)
276 deaths_headlines_e = deaths_headlines_e.merge(dh16e['total_2016'], how='outer', left_index=True, right_index=True)
277 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
278 deaths_headlines_e = deaths_headlines_e.merge(dh15e['total_2015'], how='left', left_index=True, right_index=True)
283 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
284 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
285 deaths_headlines_s.reset_index(drop=True, inplace=True)
286 deaths_headlines_s.index = deaths_headlines_s.index + 1
287 deaths_headlines_s = deaths_headlines_s.loc[1:52]
292 deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True)
293 deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True)
294 deaths_headlines_w = deaths_headlines_w.merge(dh17w['total_2017'], how='outer', left_index=True, right_index=True)
295 deaths_headlines_w = deaths_headlines_w.merge(dh16w['total_2016'], how='outer', left_index=True, right_index=True)
296 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
297 deaths_headlines_w = deaths_headlines_w.merge(dh15w['total_2015'], how='left', left_index=True, right_index=True)
302 deaths_headlines_i = deaths_headlines_i.merge(dh19i['total_2019'], how='outer', left_index=True, right_index=True)
303 deaths_headlines_i = deaths_headlines_i.merge(dh18i['total_2018'], how='outer', left_index=True, right_index=True)
304 deaths_headlines_i = deaths_headlines_i.merge(dh17i['total_2017'], how='outer', left_index=True, right_index=True)
305 deaths_headlines_i = deaths_headlines_i.merge(dh16i['total_2016'], how='outer', left_index=True, right_index=True)
306 deaths_headlines_i = deaths_headlines_i.merge(dh15i['total_2015'], how='left', left_index=True, right_index=True)
311 deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
316 deaths_headlines_e['previous_mean'] = deaths_headlines_e['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
317 deaths_headlines_w['previous_mean'] = deaths_headlines_w['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
318 deaths_headlines_s['previous_mean'] = deaths_headlines_s['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
319 deaths_headlines_i['previous_mean'] = deaths_headlines_i['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
320 deaths_headlines['previous_mean'] = deaths_headlines['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
325 deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(10, 8))
329 deaths_headlines_i.plot()
333 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
335 fig = plt.figure(figsize=(10, 10))
336 ax = fig.add_subplot(111, projection="polar")
340 np.arange(len(deaths_headlines))/float(len(deaths_headlines))*2.*np.pi),
342 # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
343 # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
344 # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
345 # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
346 # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
347 l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#e47d7d", label="2015") # 0
348 l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
349 l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#7de4a6", label="2017") # 144
350 l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#7da6e4", label="2018") # 216
351 l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#d07de4", label="2019") # 288
353 lmean, = ax.plot(theta, deaths_headlines['previous_mean'], color="black", linestyle='dashed', label="mean")
355 l20, = ax.plot(theta, deaths_headlines['total_2020'], color="red", label="2020")
357 # deaths_headlines.total_2019.plot(ax=ax)
359 def _closeline(line):
360 x, y = line.get_data()
361 x = np.concatenate((x, [x[0]]))
362 y = np.concatenate((y, [y[0]]))
365 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
369 ax.set_xticklabels(deaths_headlines.index)
371 plt.title("Deaths by week over years, all UK")
372 plt.savefig('deaths-radar.png')
376 # Excess deaths calculation
379 raw_data_2020.loc[12, 'Week ended']
383 raw_data_2020.iloc[-1]['Week ended']
387 raw_data_2020.loc[12].droplevel(1)['Week ended']
391 raw_data_2020.iloc[-1].droplevel(1)['Week ended']
395 (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
399 deaths_headlines.previous_mean.sum()
403 excess_death_data = {
404 'start_date': str(raw_data_2020.loc[12].droplevel(1)['Week ended']),
405 'end_date': str(raw_data_2020.iloc[-1].droplevel(1)['Week ended']),
406 'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
409 with open('excess_deaths.json', 'w') as f:
410 json.dump(excess_death_data, f)
413 # Plots for UK nations
416 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
418 fig = plt.figure(figsize=(10, 10))
419 ax = fig.add_subplot(111, projection="polar")
423 np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
425 l15, = ax.plot(theta, deaths_headlines_e['total_2015'], color="#e47d7d", label="2015") # 0
426 l16, = ax.plot(theta, deaths_headlines_e['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
427 l17, = ax.plot(theta, deaths_headlines_e['total_2017'], color="#7de4a6", label="2017") # 144
428 l18, = ax.plot(theta, deaths_headlines_e['total_2018'], color="#7da6e4", label="2018") # 216
429 l19, = ax.plot(theta, deaths_headlines_e['total_2019'], color="#d07de4", label="2019") # 288
431 lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
433 l20, = ax.plot(theta, deaths_headlines_e['total_2020'], color="red", label="2020")
435 # deaths_headlines.total_2019.plot(ax=ax)
437 def _closeline(line):
438 x, y = line.get_data()
439 x = np.concatenate((x, [x[0]]))
440 y = np.concatenate((y, [y[0]]))
443 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
447 ax.set_xticklabels(deaths_headlines_e.index)
449 plt.title("Deaths by week over years, England")
450 plt.savefig('deaths-radar_england.png')
455 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
457 fig = plt.figure(figsize=(10, 10))
458 ax = fig.add_subplot(111, projection="polar")
462 np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
464 l15, = ax.plot(theta, deaths_headlines_w['total_2015'], color="#e47d7d", label="2015") # 0
465 l16, = ax.plot(theta, deaths_headlines_w['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
466 l17, = ax.plot(theta, deaths_headlines_w['total_2017'], color="#7de4a6", label="2017") # 144
467 l18, = ax.plot(theta, deaths_headlines_w['total_2018'], color="#7da6e4", label="2018") # 216
468 l19, = ax.plot(theta, deaths_headlines_w['total_2019'], color="#d07de4", label="2019") # 288
470 lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
472 l20, = ax.plot(theta, deaths_headlines_w['total_2020'], color="red", label="2020")
475 def _closeline(line):
476 x, y = line.get_data()
477 x = np.concatenate((x, [x[0]]))
478 y = np.concatenate((y, [y[0]]))
481 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
485 ax.set_xticklabels(deaths_headlines_w.index)
487 plt.title("Deaths by week over years, Wales")
488 plt.savefig('deaths-radar_wales.png')
493 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
495 fig = plt.figure(figsize=(10, 10))
496 ax = fig.add_subplot(111, projection="polar")
500 np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
502 l15, = ax.plot(theta, deaths_headlines_s['total_2015'], color="#e47d7d", label="2015") # 0
503 l16, = ax.plot(theta, deaths_headlines_s['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
504 l17, = ax.plot(theta, deaths_headlines_s['total_2017'], color="#7de4a6", label="2017") # 144
505 l18, = ax.plot(theta, deaths_headlines_s['total_2018'], color="#7da6e4", label="2018") # 216
506 l19, = ax.plot(theta, deaths_headlines_s['total_2019'], color="#d07de4", label="2019") # 288
508 lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
510 l20, = ax.plot(theta, deaths_headlines_s['total_2020'], color="red", label="2020")
513 def _closeline(line):
514 x, y = line.get_data()
515 x = np.concatenate((x, [x[0]]))
516 y = np.concatenate((y, [y[0]]))
519 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
523 ax.set_xticklabels(deaths_headlines_s.index)
525 plt.title("Deaths by week over years, Scotland")
526 plt.savefig('deaths-radar_scotland.png')
531 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
533 fig = plt.figure(figsize=(10, 10))
534 ax = fig.add_subplot(111, projection="polar")
538 np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
540 l15, = ax.plot(theta, deaths_headlines_i['total_2015'], color="#e47d7d", label="2015") # 0
541 l16, = ax.plot(theta, deaths_headlines_i['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
542 l17, = ax.plot(theta, deaths_headlines_i['total_2017'], color="#7de4a6", label="2017") # 144
543 l18, = ax.plot(theta, deaths_headlines_i['total_2018'], color="#7da6e4", label="2018") # 216
544 l19, = ax.plot(theta, deaths_headlines_i['total_2019'], color="#d07de4", label="2019") # 288
546 lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
548 l20, = ax.plot(theta, deaths_headlines_i['total_2020'], color="red", label="2020")
551 def _closeline(line):
552 x, y = line.get_data()
553 x = np.concatenate((x, [x[0]]))
554 y = np.concatenate((y, [y[0]]))
557 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
561 ax.set_xticklabels(deaths_headlines_i.index)
563 plt.title("Deaths by week over years, Northern Ireland")
564 plt.savefig('deaths-radar_northern_ireland.png')