9 jupytext_version: 1.3.4
11 display_name: Python 3
18 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
19 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
20 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
29 from scipy.stats import gmean
31 import matplotlib as mpl
32 import matplotlib.pyplot as plt
41 raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv',
42 parse_dates=[1, 2], dayfirst=True,
46 dh15i = raw_data_2015.iloc[:, [2]]
47 dh15i.columns = ['total_2015']
52 raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv',
53 parse_dates=[1, 2], dayfirst=True,
57 dh16i = raw_data_2016.iloc[:, [2]]
58 dh16i.columns = ['total_2016']
63 raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv',
64 parse_dates=[1, 2], dayfirst=True,
68 dh17i = raw_data_2017.iloc[:, [2]]
69 dh17i.columns = ['total_2017']
74 raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv',
75 parse_dates=[1, 2], dayfirst=True,
79 dh18i = raw_data_2018.iloc[:, [2]]
80 dh18i.columns = ['total_2018']
85 raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv',
86 parse_dates=[1, 2], dayfirst=True,
90 dh19i = raw_data_2019.iloc[:, [2]]
91 dh19i.columns = ['total_2019']
96 raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
97 parse_dates=[1], dayfirst=True,
101 deaths_headlines_i = raw_data_2020_i.iloc[:, [1]]
102 deaths_headlines_i.columns = ['total_2020']
103 deaths_headlines_i.head()
115 raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-april-20-scotland.csv',
124 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
125 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
126 deaths_headlines_s.reset_index(drop=True, inplace=True)
127 deaths_headlines_s.index = deaths_headlines_s.index + 1
136 eng_xls = pd.read_excel('uk-deaths-data/publishedweek312020.xlsx',
137 sheet_name="Weekly figures 2020",
138 skiprows=[0, 1, 2, 3],
150 eng_xls_columns = list(eng_xls.columns)
152 for i, c in enumerate(eng_xls_columns):
153 # print(i, c, type(c), isinstance(c, float))
154 if isinstance(c, float) and np.isnan(c):
155 if eng_xls.iloc[0].iloc[i] is not pd.NaT:
156 eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
158 # np.isnan(eng_xls_columns[0])
161 eng_xls.columns = eng_xls_columns
166 eng_xls['Total deaths, all ages']
170 eng_xls['Wales'].iloc[1:]
174 # raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv',
175 # parse_dates=[1], dayfirst=True,
185 # raw_data_2020.head()
189 # raw_data_2020['W92000004', 'Wales']
193 raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv',
194 parse_dates=[1], dayfirst=True,
197 # raw_data_2019.head()
201 raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv',
202 parse_dates=[1], dayfirst=True,
205 # raw_data_2018.head()
209 raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv',
210 parse_dates=[1], dayfirst=True,
213 # raw_data_2017.head()
217 raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv',
218 parse_dates=[1], dayfirst=True,
221 # raw_data_2016.head()
225 raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
226 parse_dates=[1], dayfirst=True,
229 # raw_data_2015.head()
233 dhw = eng_xls['Wales'].iloc[1:]
234 dhe = eng_xls['Total deaths, all ages'].iloc[1:] - dhw
235 deaths_headlines_e = pd.DataFrame({'total_2020': dhe.dropna()})
236 deaths_headlines_w = pd.DataFrame({'total_2020': dhw.dropna()})
240 # deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy()
241 # deaths_headlines_e.columns = ['total_2020']
242 # deaths_headlines_w = raw_data_2020['W92000004'].copy()
243 # deaths_headlines_e.columns = ['total_2020']
244 # deaths_headlines_w.columns = ['total_2020']
245 # deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
246 # deaths_headlines_e.head()
251 dh19e = raw_data_2019.iloc[:, [1]]
252 dh19w = raw_data_2019['W92000004']
253 dh19e.columns = ['total_2019']
254 dh19w.columns = ['total_2019']
255 dh19e.total_2019 -= dh19w.total_2019
264 dh18e = raw_data_2018.iloc[:, [1]]
265 dh18w = raw_data_2018['W92000004']
266 dh18e.columns = ['total_2018']
267 dh18w.columns = ['total_2018']
268 dh18e.total_2018 -= dh18w.total_2018
273 dh17e = raw_data_2017.iloc[:, [1]]
274 dh17w = raw_data_2017['W92000004']
275 dh17e.columns = ['total_2017']
276 dh17w.columns = ['total_2017']
277 dh17e.total_2017 -= dh17w.total_2017
282 dh16e = raw_data_2016.iloc[:, [1]]
283 dh16w = raw_data_2016['W92000004']
284 dh16e.columns = ['total_2016']
285 dh16w.columns = ['total_2016']
286 dh16e.total_2016 -= dh16w.total_2016
291 dh15e = raw_data_2015.iloc[:, [1]]
292 dh15w = raw_data_2015['W92000004']
293 dh15e.columns = ['total_2015']
294 dh15w.columns = ['total_2015']
295 dh15e.total_2015 -= dh15w.total_2015
300 # dh18 = raw_data_2018.iloc[:, [1, 2]]
301 # dh18.columns = ['total_2018', 'total_previous']
306 deaths_headlines_e = deaths_headlines_e.merge(dh19e['total_2019'], how='outer', left_index=True, right_index=True)
307 deaths_headlines_e = deaths_headlines_e.merge(dh18e['total_2018'], how='outer', left_index=True, right_index=True)
308 deaths_headlines_e = deaths_headlines_e.merge(dh17e['total_2017'], how='outer', left_index=True, right_index=True)
309 deaths_headlines_e = deaths_headlines_e.merge(dh16e['total_2016'], how='outer', left_index=True, right_index=True)
310 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
311 deaths_headlines_e = deaths_headlines_e.merge(dh15e['total_2015'], how='left', left_index=True, right_index=True)
316 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
317 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
318 deaths_headlines_s.reset_index(drop=True, inplace=True)
319 deaths_headlines_s.index = deaths_headlines_s.index + 1
320 deaths_headlines_s = deaths_headlines_s.loc[1:52]
324 # Correction for missing data
327 # deaths_headlines_s.loc[20, 'total_2020'] = 1000
332 deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True)
333 deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True)
334 deaths_headlines_w = deaths_headlines_w.merge(dh17w['total_2017'], how='outer', left_index=True, right_index=True)
335 deaths_headlines_w = deaths_headlines_w.merge(dh16w['total_2016'], how='outer', left_index=True, right_index=True)
336 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
337 deaths_headlines_w = deaths_headlines_w.merge(dh15w['total_2015'], how='left', left_index=True, right_index=True)
342 deaths_headlines_i = deaths_headlines_i.merge(dh19i['total_2019'], how='outer', left_index=True, right_index=True)
343 deaths_headlines_i = deaths_headlines_i.merge(dh18i['total_2018'], how='outer', left_index=True, right_index=True)
344 deaths_headlines_i = deaths_headlines_i.merge(dh17i['total_2017'], how='outer', left_index=True, right_index=True)
345 deaths_headlines_i = deaths_headlines_i.merge(dh16i['total_2016'], how='outer', left_index=True, right_index=True)
346 deaths_headlines_i = deaths_headlines_i.merge(dh15i['total_2015'], how='left', left_index=True, right_index=True)
351 deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
356 deaths_headlines_e['previous_mean'] = deaths_headlines_e['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
357 deaths_headlines_w['previous_mean'] = deaths_headlines_w['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
358 deaths_headlines_s['previous_mean'] = deaths_headlines_s['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
359 deaths_headlines_i['previous_mean'] = deaths_headlines_i['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
360 deaths_headlines['previous_mean'] = deaths_headlines['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
365 deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(14, 8))
369 deaths_headlines[['total_2020', 'previous_mean']].plot(figsize=(10, 8))
373 deaths_headlines_i.plot()
377 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
379 fig = plt.figure(figsize=(10, 10))
380 ax = fig.add_subplot(111, projection="polar")
384 np.arange(len(deaths_headlines))/float(len(deaths_headlines))*2.*np.pi),
386 # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
387 # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
388 # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
389 # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
390 # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
391 l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#e47d7d", label="2015") # 0
392 l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
393 l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#7de4a6", label="2017") # 144
394 l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#7da6e4", label="2018") # 216
395 l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#d07de4", label="2019") # 288
397 lmean, = ax.plot(theta, deaths_headlines['previous_mean'], color="black", linestyle='dashed', label="mean")
399 l20, = ax.plot(theta, deaths_headlines['total_2020'], color="red", label="2020")
401 # deaths_headlines.total_2019.plot(ax=ax)
403 def _closeline(line):
404 x, y = line.get_data()
405 x = np.concatenate((x, [x[0]]))
406 y = np.concatenate((y, [y[0]]))
409 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
413 ax.set_xticklabels(deaths_headlines.index)
415 plt.title("Deaths by week over years, all UK")
416 plt.savefig('deaths-radar.png')
420 # Excess deaths calculation
423 # raw_data_2020.loc[12, 'Week ended']
427 eng_xls.loc[12, 'Week ended']
431 # raw_data_2020.iloc[-1]['Week ended']
435 deaths_headlines_e.total_2020.dropna().last_valid_index()
439 eng_xls.loc[deaths_headlines_e.total_2020.dropna().last_valid_index(), 'Week ended']
443 eng_xls.loc[27, 'Week ended']
447 # raw_data_2020.loc[12].droplevel(1)['Week ended']
451 # raw_data_2020.iloc[-1].droplevel(1)['Week ended']
455 (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
459 (deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum()
463 deaths_headlines.previous_mean.sum()
467 # excess_death_data = {
468 # 'start_date': str(eng_xls.loc[12, 'Week ended']),
469 # 'end_date': str(eng_xls.loc[deaths_headlines_e.total_2020.dropna().last_valid_index(), 'Week ended']),
470 # 'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
473 # with open('excess_deaths.json', 'w') as f:
474 # json.dump(excess_death_data, f)
478 excess_death_data = {
479 'start_date': str(eng_xls.loc[12, 'Week ended']),
480 'end_date': str(eng_xls.loc[27, 'Week ended']),
481 'excess_deaths': (deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum()
484 with open('excess_deaths.json', 'w') as f:
485 json.dump(excess_death_data, f)
489 # excess_death_data = {
490 # 'start_date': str(raw_data_2020.loc[12].droplevel(1)['Week ended']),
491 # 'end_date': str(raw_data_2020.iloc[-1].droplevel(1)['Week ended']),
492 # 'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
495 # with open('excess_deaths.json', 'w') as f:
496 # json.dump(excess_death_data, f)
500 eng_xls['Week ended']
504 # raw_data_2020.droplevel(1, axis='columns')['Week ended']
508 deaths_by_week = deaths_headlines.merge(eng_xls['Week ended'], left_index=True, right_index=True)
509 deaths_by_week.rename(columns={'Week ended': 'week_ended'}, inplace=True)
510 deaths_by_week.to_csv('deaths_by_week.csv', header=True, index=False)
514 # deaths_by_week = deaths_headlines.merge(raw_data_2020.droplevel(1, axis='columns')['Week ended'], left_index=True, right_index=True)
515 # deaths_by_week.rename(columns={'Week ended': 'week_ended'}, inplace=True)
516 # deaths_by_week.to_csv('deaths_by_week.csv', header=True, index=False)
519 # Plots for UK nations
522 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
524 fig = plt.figure(figsize=(10, 10))
525 ax = fig.add_subplot(111, projection="polar")
529 np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
531 l15, = ax.plot(theta, deaths_headlines_e['total_2015'], color="#e47d7d", label="2015") # 0
532 l16, = ax.plot(theta, deaths_headlines_e['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
533 l17, = ax.plot(theta, deaths_headlines_e['total_2017'], color="#7de4a6", label="2017") # 144
534 l18, = ax.plot(theta, deaths_headlines_e['total_2018'], color="#7da6e4", label="2018") # 216
535 l19, = ax.plot(theta, deaths_headlines_e['total_2019'], color="#d07de4", label="2019") # 288
537 lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
539 l20, = ax.plot(theta, deaths_headlines_e['total_2020'], color="red", label="2020")
541 # deaths_headlines.total_2019.plot(ax=ax)
543 def _closeline(line):
544 x, y = line.get_data()
545 x = np.concatenate((x, [x[0]]))
546 y = np.concatenate((y, [y[0]]))
549 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
553 ax.set_xticklabels(deaths_headlines_e.index)
555 plt.title("Deaths by week over years, England")
556 plt.savefig('deaths-radar_england.png')
561 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
563 fig = plt.figure(figsize=(10, 10))
564 ax = fig.add_subplot(111, projection="polar")
568 np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
570 l15, = ax.plot(theta, deaths_headlines_w['total_2015'], color="#e47d7d", label="2015") # 0
571 l16, = ax.plot(theta, deaths_headlines_w['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
572 l17, = ax.plot(theta, deaths_headlines_w['total_2017'], color="#7de4a6", label="2017") # 144
573 l18, = ax.plot(theta, deaths_headlines_w['total_2018'], color="#7da6e4", label="2018") # 216
574 l19, = ax.plot(theta, deaths_headlines_w['total_2019'], color="#d07de4", label="2019") # 288
576 lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
578 l20, = ax.plot(theta, deaths_headlines_w['total_2020'], color="red", label="2020")
581 def _closeline(line):
582 x, y = line.get_data()
583 x = np.concatenate((x, [x[0]]))
584 y = np.concatenate((y, [y[0]]))
587 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
591 ax.set_xticklabels(deaths_headlines_w.index)
593 plt.title("Deaths by week over years, Wales")
594 plt.savefig('deaths-radar_wales.png')
599 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
601 fig = plt.figure(figsize=(10, 10))
602 ax = fig.add_subplot(111, projection="polar")
606 np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
608 l15, = ax.plot(theta, deaths_headlines_s['total_2015'], color="#e47d7d", label="2015") # 0
609 l16, = ax.plot(theta, deaths_headlines_s['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
610 l17, = ax.plot(theta, deaths_headlines_s['total_2017'], color="#7de4a6", label="2017") # 144
611 l18, = ax.plot(theta, deaths_headlines_s['total_2018'], color="#7da6e4", label="2018") # 216
612 l19, = ax.plot(theta, deaths_headlines_s['total_2019'], color="#d07de4", label="2019") # 288
614 lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
616 l20, = ax.plot(theta, deaths_headlines_s['total_2020'], color="red", label="2020")
619 def _closeline(line):
620 x, y = line.get_data()
621 x = np.concatenate((x, [x[0]]))
622 y = np.concatenate((y, [y[0]]))
625 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
629 ax.set_xticklabels(deaths_headlines_s.index)
631 plt.title("Deaths by week over years, Scotland")
632 plt.savefig('deaths-radar_scotland.png')
637 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
639 fig = plt.figure(figsize=(10, 10))
640 ax = fig.add_subplot(111, projection="polar")
644 np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
646 l15, = ax.plot(theta, deaths_headlines_i['total_2015'], color="#e47d7d", label="2015") # 0
647 l16, = ax.plot(theta, deaths_headlines_i['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
648 l17, = ax.plot(theta, deaths_headlines_i['total_2017'], color="#7de4a6", label="2017") # 144
649 l18, = ax.plot(theta, deaths_headlines_i['total_2018'], color="#7da6e4", label="2018") # 216
650 l19, = ax.plot(theta, deaths_headlines_i['total_2019'], color="#d07de4", label="2019") # 288
652 lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
654 l20, = ax.plot(theta, deaths_headlines_i['total_2020'], color="red", label="2020")
657 def _closeline(line):
658 x, y = line.get_data()
659 x = np.concatenate((x, [x[0]]))
660 y = np.concatenate((y, [y[0]]))
663 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
667 ax.set_xticklabels(deaths_headlines_i.index)
669 plt.title("Deaths by week over years, Northern Ireland")
670 plt.savefig('deaths-radar_northern_ireland.png')
675 # list(raw_data_2020.columns)
679 # deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy()
680 # deaths_headlines_e.columns = ['total_2020']
681 # deaths_headlines_w = raw_data_2020['W92000004'].copy()
682 # deaths_headlines_e.columns = ['total_2020']
683 # deaths_headlines_w.columns = ['total_2020']
684 # deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
685 # deaths_headlines_e.head()