General updates
[covid19.git] / uk_deaths.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.2'
9 jupytext_version: 1.3.4
10 kernelspec:
11 display_name: Python 3
12 language: python
13 name: python3
14 ---
15
16 Data from:
17
18 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
19 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
20 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
21
22
23 ```python
24 import itertools
25 import collections
26 import json
27 import pandas as pd
28 import numpy as np
29 from scipy.stats import gmean
30
31 import matplotlib as mpl
32 import matplotlib.pyplot as plt
33 %matplotlib inline
34 ```
35
36 ```python
37 !ls uk-deaths-data
38 ```
39
40 ```python
41 raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv',
42 parse_dates=[1, 2], dayfirst=True,
43 index_col=0,
44 header=[0, 1]
45 )
46 dh15i = raw_data_2015.iloc[:, [2]]
47 dh15i.columns = ['total_2015']
48 # dh15i.head()
49 ```
50
51 ```python
52 raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv',
53 parse_dates=[1, 2], dayfirst=True,
54 index_col=0,
55 header=[0, 1]
56 )
57 dh16i = raw_data_2016.iloc[:, [2]]
58 dh16i.columns = ['total_2016']
59 # dh16i.head()
60 ```
61
62 ```python
63 raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv',
64 parse_dates=[1, 2], dayfirst=True,
65 index_col=0,
66 header=[0, 1]
67 )
68 dh17i = raw_data_2017.iloc[:, [2]]
69 dh17i.columns = ['total_2017']
70 # dh17i.head()
71 ```
72
73 ```python
74 raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv',
75 parse_dates=[1, 2], dayfirst=True,
76 index_col=0,
77 header=[0, 1]
78 )
79 dh18i = raw_data_2018.iloc[:, [2]]
80 dh18i.columns = ['total_2018']
81 # dh18i.head()
82 ```
83
84 ```python
85 raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv',
86 parse_dates=[1, 2], dayfirst=True,
87 index_col=0,
88 header=[0, 1]
89 )
90 dh19i = raw_data_2019.iloc[:, [2]]
91 dh19i.columns = ['total_2019']
92 # dh19i.head()
93 ```
94
95 ```python
96 raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek192020.csv',
97 parse_dates=[1], dayfirst=True,
98 index_col=0,
99 header=[0, 1])
100 ```
101
102 ```python
103 raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
104 parse_dates=[1], dayfirst=True,
105 index_col=0,
106 header=[0, 1]
107 )
108 deaths_headlines_i = raw_data_2020_i.iloc[:, [1]]
109 deaths_headlines_i.columns = ['total_2020']
110 deaths_headlines_i.head()
111 ```
112
113 ```python
114
115 ```
116
117 ```python
118
119 ```
120
121 ```python
122 raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-april-20-scotland.csv',
123 index_col=0,
124 header=0,
125 skiprows=2
126 )
127 # raw_data_s
128 ```
129
130 ```python
131 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
132 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
133 deaths_headlines_s.reset_index(drop=True, inplace=True)
134 deaths_headlines_s.index = deaths_headlines_s.index + 1
135 deaths_headlines_s
136 ```
137
138 ```python
139
140 ```
141
142 ```python
143
144 ```
145
146 ```python
147
148 ```
149
150 ```python
151
152 ```
153
154 ```python
155
156 ```
157
158 ```python
159 # raw_data_2020.head()
160 ```
161
162 ```python
163 raw_data_2020['W92000004', 'Wales']
164 ```
165
166 ```python
167 raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv',
168 parse_dates=[1], dayfirst=True,
169 index_col=0,
170 header=[0, 1])
171 # raw_data_2019.head()
172 ```
173
174 ```python
175 raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv',
176 parse_dates=[1], dayfirst=True,
177 index_col=0,
178 header=[0, 1])
179 # raw_data_2018.head()
180 ```
181
182 ```python
183 raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv',
184 parse_dates=[1], dayfirst=True,
185 index_col=0,
186 header=[0, 1])
187 # raw_data_2017.head()
188 ```
189
190 ```python
191 raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv',
192 parse_dates=[1], dayfirst=True,
193 index_col=0,
194 header=[0, 1])
195 # raw_data_2016.head()
196 ```
197
198 ```python
199 raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
200 parse_dates=[1], dayfirst=True,
201 index_col=0,
202 header=[0, 1])
203 # raw_data_2015.head()
204 ```
205
206 ```python
207 deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy()
208 deaths_headlines_e.columns = ['total_2020']
209 deaths_headlines_w = raw_data_2020['W92000004'].copy()
210 deaths_headlines_e.columns = ['total_2020']
211 deaths_headlines_w.columns = ['total_2020']
212 deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
213 deaths_headlines_e.head()
214 deaths_headlines_e
215 ```
216
217 ```python
218 dh19e = raw_data_2019.iloc[:, [1]]
219 dh19w = raw_data_2019['W92000004']
220 dh19e.columns = ['total_2019']
221 dh19w.columns = ['total_2019']
222 dh19e.total_2019 -= dh19w.total_2019
223 dh19e.head()
224 ```
225
226 ```python
227 dh19w.head()
228 ```
229
230 ```python
231 dh18e = raw_data_2018.iloc[:, [1]]
232 dh18w = raw_data_2018['W92000004']
233 dh18e.columns = ['total_2018']
234 dh18w.columns = ['total_2018']
235 dh18e.total_2018 -= dh18w.total_2018
236 # dh18e.head()
237 ```
238
239 ```python
240 dh17e = raw_data_2017.iloc[:, [1]]
241 dh17w = raw_data_2017['W92000004']
242 dh17e.columns = ['total_2017']
243 dh17w.columns = ['total_2017']
244 dh17e.total_2017 -= dh17w.total_2017
245 # dh17e.head()
246 ```
247
248 ```python
249 dh16e = raw_data_2016.iloc[:, [1]]
250 dh16w = raw_data_2016['W92000004']
251 dh16e.columns = ['total_2016']
252 dh16w.columns = ['total_2016']
253 dh16e.total_2016 -= dh16w.total_2016
254 # dh16e.head()
255 ```
256
257 ```python
258 dh15e = raw_data_2015.iloc[:, [1]]
259 dh15w = raw_data_2015['W92000004']
260 dh15e.columns = ['total_2015']
261 dh15w.columns = ['total_2015']
262 dh15e.total_2015 -= dh15w.total_2015
263 # dh15e.head()
264 ```
265
266 ```python
267 # dh18 = raw_data_2018.iloc[:, [1, 2]]
268 # dh18.columns = ['total_2018', 'total_previous']
269 # # dh18.head()
270 ```
271
272 ```python
273 deaths_headlines_e = deaths_headlines_e.merge(dh19e['total_2019'], how='outer', left_index=True, right_index=True)
274 deaths_headlines_e = deaths_headlines_e.merge(dh18e['total_2018'], how='outer', left_index=True, right_index=True)
275 deaths_headlines_e = deaths_headlines_e.merge(dh17e['total_2017'], how='outer', left_index=True, right_index=True)
276 deaths_headlines_e = deaths_headlines_e.merge(dh16e['total_2016'], how='outer', left_index=True, right_index=True)
277 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
278 deaths_headlines_e = deaths_headlines_e.merge(dh15e['total_2015'], how='left', left_index=True, right_index=True)
279 deaths_headlines_e
280 ```
281
282 ```python
283 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
284 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
285 deaths_headlines_s.reset_index(drop=True, inplace=True)
286 deaths_headlines_s.index = deaths_headlines_s.index + 1
287 deaths_headlines_s = deaths_headlines_s.loc[1:52]
288 deaths_headlines_s
289 ```
290
291 ```python
292 deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True)
293 deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True)
294 deaths_headlines_w = deaths_headlines_w.merge(dh17w['total_2017'], how='outer', left_index=True, right_index=True)
295 deaths_headlines_w = deaths_headlines_w.merge(dh16w['total_2016'], how='outer', left_index=True, right_index=True)
296 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
297 deaths_headlines_w = deaths_headlines_w.merge(dh15w['total_2015'], how='left', left_index=True, right_index=True)
298 deaths_headlines_w
299 ```
300
301 ```python
302 deaths_headlines_i = deaths_headlines_i.merge(dh19i['total_2019'], how='outer', left_index=True, right_index=True)
303 deaths_headlines_i = deaths_headlines_i.merge(dh18i['total_2018'], how='outer', left_index=True, right_index=True)
304 deaths_headlines_i = deaths_headlines_i.merge(dh17i['total_2017'], how='outer', left_index=True, right_index=True)
305 deaths_headlines_i = deaths_headlines_i.merge(dh16i['total_2016'], how='outer', left_index=True, right_index=True)
306 deaths_headlines_i = deaths_headlines_i.merge(dh15i['total_2015'], how='left', left_index=True, right_index=True)
307 deaths_headlines_i
308 ```
309
310 ```python
311 deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
312 deaths_headlines
313 ```
314
315 ```python
316 deaths_headlines_e['previous_mean'] = deaths_headlines_e['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
317 deaths_headlines_w['previous_mean'] = deaths_headlines_w['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
318 deaths_headlines_s['previous_mean'] = deaths_headlines_s['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
319 deaths_headlines_i['previous_mean'] = deaths_headlines_i['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
320 deaths_headlines['previous_mean'] = deaths_headlines['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
321 deaths_headlines
322 ```
323
324 ```python
325 deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(10, 8))
326 ```
327
328 ```python
329 deaths_headlines_i.plot()
330 ```
331
332 ```python
333 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
334
335 fig = plt.figure(figsize=(10, 10))
336 ax = fig.add_subplot(111, projection="polar")
337
338 theta = np.roll(
339 np.flip(
340 np.arange(len(deaths_headlines))/float(len(deaths_headlines))*2.*np.pi),
341 14)
342 # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
343 # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
344 # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
345 # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
346 # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
347 l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#e47d7d", label="2015") # 0
348 l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
349 l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#7de4a6", label="2017") # 144
350 l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#7da6e4", label="2018") # 216
351 l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#d07de4", label="2019") # 288
352
353 lmean, = ax.plot(theta, deaths_headlines['previous_mean'], color="black", linestyle='dashed', label="mean")
354
355 l20, = ax.plot(theta, deaths_headlines['total_2020'], color="red", label="2020")
356
357 # deaths_headlines.total_2019.plot(ax=ax)
358
359 def _closeline(line):
360 x, y = line.get_data()
361 x = np.concatenate((x, [x[0]]))
362 y = np.concatenate((y, [y[0]]))
363 line.set_data(x, y)
364
365 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
366
367
368 ax.set_xticks(theta)
369 ax.set_xticklabels(deaths_headlines.index)
370 plt.legend()
371 plt.title("Deaths by week over years, all UK")
372 plt.savefig('deaths-radar.png')
373 plt.show()
374 ```
375
376 # Excess deaths calculation
377
378 ```python
379 raw_data_2020.loc[12, 'Week ended']
380 ```
381
382 ```python
383 raw_data_2020.iloc[-1]['Week ended']
384 ```
385
386 ```python
387 raw_data_2020.loc[12].droplevel(1)['Week ended']
388 ```
389
390 ```python
391 raw_data_2020.iloc[-1].droplevel(1)['Week ended']
392 ```
393
394 ```python
395 (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
396 ```
397
398 ```python
399 deaths_headlines.previous_mean.sum()
400 ```
401
402 ```python
403 excess_death_data = {
404 'start_date': str(raw_data_2020.loc[12].droplevel(1)['Week ended']),
405 'end_date': str(raw_data_2020.iloc[-1].droplevel(1)['Week ended']),
406 'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
407 }
408
409 with open('excess_deaths.json', 'w') as f:
410 json.dump(excess_death_data, f)
411 ```
412
413 # Plots for UK nations
414
415 ```python
416 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
417
418 fig = plt.figure(figsize=(10, 10))
419 ax = fig.add_subplot(111, projection="polar")
420
421 theta = np.roll(
422 np.flip(
423 np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
424 14)
425 l15, = ax.plot(theta, deaths_headlines_e['total_2015'], color="#e47d7d", label="2015") # 0
426 l16, = ax.plot(theta, deaths_headlines_e['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
427 l17, = ax.plot(theta, deaths_headlines_e['total_2017'], color="#7de4a6", label="2017") # 144
428 l18, = ax.plot(theta, deaths_headlines_e['total_2018'], color="#7da6e4", label="2018") # 216
429 l19, = ax.plot(theta, deaths_headlines_e['total_2019'], color="#d07de4", label="2019") # 288
430
431 lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
432
433 l20, = ax.plot(theta, deaths_headlines_e['total_2020'], color="red", label="2020")
434
435 # deaths_headlines.total_2019.plot(ax=ax)
436
437 def _closeline(line):
438 x, y = line.get_data()
439 x = np.concatenate((x, [x[0]]))
440 y = np.concatenate((y, [y[0]]))
441 line.set_data(x, y)
442
443 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
444
445
446 ax.set_xticks(theta)
447 ax.set_xticklabels(deaths_headlines_e.index)
448 plt.legend()
449 plt.title("Deaths by week over years, England")
450 plt.savefig('deaths-radar_england.png')
451 plt.show()
452 ```
453
454 ```python
455 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
456
457 fig = plt.figure(figsize=(10, 10))
458 ax = fig.add_subplot(111, projection="polar")
459
460 theta = np.roll(
461 np.flip(
462 np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
463 14)
464 l15, = ax.plot(theta, deaths_headlines_w['total_2015'], color="#e47d7d", label="2015") # 0
465 l16, = ax.plot(theta, deaths_headlines_w['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
466 l17, = ax.plot(theta, deaths_headlines_w['total_2017'], color="#7de4a6", label="2017") # 144
467 l18, = ax.plot(theta, deaths_headlines_w['total_2018'], color="#7da6e4", label="2018") # 216
468 l19, = ax.plot(theta, deaths_headlines_w['total_2019'], color="#d07de4", label="2019") # 288
469
470 lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
471
472 l20, = ax.plot(theta, deaths_headlines_w['total_2020'], color="red", label="2020")
473
474
475 def _closeline(line):
476 x, y = line.get_data()
477 x = np.concatenate((x, [x[0]]))
478 y = np.concatenate((y, [y[0]]))
479 line.set_data(x, y)
480
481 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
482
483
484 ax.set_xticks(theta)
485 ax.set_xticklabels(deaths_headlines_w.index)
486 plt.legend()
487 plt.title("Deaths by week over years, Wales")
488 plt.savefig('deaths-radar_wales.png')
489 plt.show()
490 ```
491
492 ```python
493 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
494
495 fig = plt.figure(figsize=(10, 10))
496 ax = fig.add_subplot(111, projection="polar")
497
498 theta = np.roll(
499 np.flip(
500 np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
501 14)
502 l15, = ax.plot(theta, deaths_headlines_s['total_2015'], color="#e47d7d", label="2015") # 0
503 l16, = ax.plot(theta, deaths_headlines_s['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
504 l17, = ax.plot(theta, deaths_headlines_s['total_2017'], color="#7de4a6", label="2017") # 144
505 l18, = ax.plot(theta, deaths_headlines_s['total_2018'], color="#7da6e4", label="2018") # 216
506 l19, = ax.plot(theta, deaths_headlines_s['total_2019'], color="#d07de4", label="2019") # 288
507
508 lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
509
510 l20, = ax.plot(theta, deaths_headlines_s['total_2020'], color="red", label="2020")
511
512
513 def _closeline(line):
514 x, y = line.get_data()
515 x = np.concatenate((x, [x[0]]))
516 y = np.concatenate((y, [y[0]]))
517 line.set_data(x, y)
518
519 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
520
521
522 ax.set_xticks(theta)
523 ax.set_xticklabels(deaths_headlines_s.index)
524 plt.legend()
525 plt.title("Deaths by week over years, Scotland")
526 plt.savefig('deaths-radar_scotland.png')
527 plt.show()
528 ```
529
530 ```python
531 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
532
533 fig = plt.figure(figsize=(10, 10))
534 ax = fig.add_subplot(111, projection="polar")
535
536 theta = np.roll(
537 np.flip(
538 np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
539 14)
540 l15, = ax.plot(theta, deaths_headlines_i['total_2015'], color="#e47d7d", label="2015") # 0
541 l16, = ax.plot(theta, deaths_headlines_i['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
542 l17, = ax.plot(theta, deaths_headlines_i['total_2017'], color="#7de4a6", label="2017") # 144
543 l18, = ax.plot(theta, deaths_headlines_i['total_2018'], color="#7da6e4", label="2018") # 216
544 l19, = ax.plot(theta, deaths_headlines_i['total_2019'], color="#d07de4", label="2019") # 288
545
546 lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
547
548 l20, = ax.plot(theta, deaths_headlines_i['total_2020'], color="red", label="2020")
549
550
551 def _closeline(line):
552 x, y = line.get_data()
553 x = np.concatenate((x, [x[0]]))
554 y = np.concatenate((y, [y[0]]))
555 line.set_data(x, y)
556
557 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
558
559
560 ax.set_xticks(theta)
561 ax.set_xticklabels(deaths_headlines_i.index)
562 plt.legend()
563 plt.title("Deaths by week over years, Northern Ireland")
564 plt.savefig('deaths-radar_northern_ireland.png')
565 plt.show()
566 ```
567
568 ```python
569
570 ```