Done deaths for UK nations
[covid19.git] / uk_deaths.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.2'
9 jupytext_version: 1.3.4
10 kernelspec:
11 display_name: Python 3
12 language: python
13 name: python3
14 ---
15
16 Data from:
17
18 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
19 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
20 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
21
22
23 ```python
24 import itertools
25 import collections
26 import pandas as pd
27 import numpy as np
28 from scipy.stats import gmean
29
30 import matplotlib as mpl
31 import matplotlib.pyplot as plt
32 %matplotlib inline
33 ```
34
35 ```python
36 !ls uk-deaths-data
37 ```
38
39 ```python
40 raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv',
41 parse_dates=[1, 2], dayfirst=True,
42 index_col=0,
43 header=[0, 1]
44 )
45 dh15i = raw_data_2015.iloc[:, [2]]
46 dh15i.columns = ['total_2015']
47 # dh15i.head()
48 ```
49
50 ```python
51 raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv',
52 parse_dates=[1, 2], dayfirst=True,
53 index_col=0,
54 header=[0, 1]
55 )
56 dh16i = raw_data_2016.iloc[:, [2]]
57 dh16i.columns = ['total_2016']
58 # dh16i.head()
59 ```
60
61 ```python
62 raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv',
63 parse_dates=[1, 2], dayfirst=True,
64 index_col=0,
65 header=[0, 1]
66 )
67 dh17i = raw_data_2017.iloc[:, [2]]
68 dh17i.columns = ['total_2017']
69 # dh17i.head()
70 ```
71
72 ```python
73 raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv',
74 parse_dates=[1, 2], dayfirst=True,
75 index_col=0,
76 header=[0, 1]
77 )
78 dh18i = raw_data_2018.iloc[:, [2]]
79 dh18i.columns = ['total_2018']
80 # dh18i.head()
81 ```
82
83 ```python
84 raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv',
85 parse_dates=[1, 2], dayfirst=True,
86 index_col=0,
87 header=[0, 1]
88 )
89 dh19i = raw_data_2019.iloc[:, [2]]
90 dh19i.columns = ['total_2019']
91 # dh19i.head()
92 ```
93
94 ```python
95 raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
96 parse_dates=[1], dayfirst=True,
97 index_col=0,
98 header=[0, 1]
99 )
100 deaths_headlines_i = raw_data_2020_i.iloc[:, [1]]
101 deaths_headlines_i.columns = ['total_2020']
102 deaths_headlines_i.head()
103 ```
104
105 ```python
106
107 ```
108
109 ```python
110
111 ```
112
113 ```python
114 raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-april-20-scotland.csv',
115 index_col=0,
116 header=0,
117 skiprows=2
118 )
119 # raw_data_s
120 ```
121
122 ```python
123 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
124 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
125 deaths_headlines_s.reset_index(drop=True, inplace=True)
126 deaths_headlines_s.index = deaths_headlines_s.index + 1
127 deaths_headlines_s
128 ```
129
130 ```python
131
132 ```
133
134 ```python
135
136 ```
137
138 ```python
139
140 ```
141
142 ```python
143
144 ```
145
146 ```python
147
148 ```
149
150 ```python
151 raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek182020.csv',
152 parse_dates=[1], dayfirst=True,
153 index_col=0,
154 header=[0, 1])
155 ```
156
157 ```python
158 # raw_data_2020.head()
159 ```
160
161 ```python
162 raw_data_2020['W92000004', 'Wales']
163 ```
164
165 ```python
166 raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv',
167 parse_dates=[1], dayfirst=True,
168 index_col=0,
169 header=[0, 1])
170 # raw_data_2019.head()
171 ```
172
173 ```python
174 raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv',
175 parse_dates=[1], dayfirst=True,
176 index_col=0,
177 header=[0, 1])
178 # raw_data_2018.head()
179 ```
180
181 ```python
182 raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv',
183 parse_dates=[1], dayfirst=True,
184 index_col=0,
185 header=[0, 1])
186 # raw_data_2017.head()
187 ```
188
189 ```python
190 raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv',
191 parse_dates=[1], dayfirst=True,
192 index_col=0,
193 header=[0, 1])
194 # raw_data_2016.head()
195 ```
196
197 ```python
198 raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
199 parse_dates=[1], dayfirst=True,
200 index_col=0,
201 header=[0, 1])
202 # raw_data_2015.head()
203 ```
204
205 ```python
206 deaths_headlines_e = raw_data_2020.iloc[:, [1]]
207 deaths_headlines_e.columns = ['total_2020']
208 deaths_headlines_w = raw_data_2020['W92000004']
209 deaths_headlines_e.columns = ['total_2020']
210 deaths_headlines_w.columns = ['total_2020']
211 deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
212 deaths_headlines_e.head()
213 deaths_headlines_e
214 ```
215
216 ```python
217 dh19e = raw_data_2019.iloc[:, [1]]
218 dh19w = raw_data_2019['W92000004']
219 dh19e.columns = ['total_2019']
220 dh19w.columns = ['total_2019']
221 dh19e.total_2019 -= dh19w.total_2019
222 dh19e.head()
223 ```
224
225 ```python
226 dh19w.head()
227 ```
228
229 ```python
230 dh18e = raw_data_2018.iloc[:, [1]]
231 dh18w = raw_data_2018['W92000004']
232 dh18e.columns = ['total_2018']
233 dh18w.columns = ['total_2018']
234 dh18e.total_2018 -= dh18w.total_2018
235 # dh18e.head()
236 ```
237
238 ```python
239 dh17e = raw_data_2017.iloc[:, [1]]
240 dh17w = raw_data_2017['W92000004']
241 dh17e.columns = ['total_2017']
242 dh17w.columns = ['total_2017']
243 dh17e.total_2017 -= dh17w.total_2017
244 # dh17e.head()
245 ```
246
247 ```python
248 dh16e = raw_data_2016.iloc[:, [1]]
249 dh16w = raw_data_2016['W92000004']
250 dh16e.columns = ['total_2016']
251 dh16w.columns = ['total_2016']
252 dh16e.total_2016 -= dh16w.total_2016
253 # dh16e.head()
254 ```
255
256 ```python
257 dh15e = raw_data_2015.iloc[:, [1]]
258 dh15w = raw_data_2015['W92000004']
259 dh15e.columns = ['total_2015']
260 dh15w.columns = ['total_2015']
261 dh15e.total_2015 -= dh15w.total_2015
262 # dh15e.head()
263 ```
264
265 ```python
266 # dh18 = raw_data_2018.iloc[:, [1, 2]]
267 # dh18.columns = ['total_2018', 'total_previous']
268 # # dh18.head()
269 ```
270
271 ```python
272 deaths_headlines_e = deaths_headlines_e.merge(dh19e['total_2019'], how='outer', left_index=True, right_index=True)
273 deaths_headlines_e = deaths_headlines_e.merge(dh18e['total_2018'], how='outer', left_index=True, right_index=True)
274 deaths_headlines_e = deaths_headlines_e.merge(dh17e['total_2017'], how='outer', left_index=True, right_index=True)
275 deaths_headlines_e = deaths_headlines_e.merge(dh16e['total_2016'], how='outer', left_index=True, right_index=True)
276 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
277 deaths_headlines_e = deaths_headlines_e.merge(dh15e['total_2015'], how='left', left_index=True, right_index=True)
278 deaths_headlines_e
279 ```
280
281 ```python
282 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
283 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
284 deaths_headlines_s.reset_index(drop=True, inplace=True)
285 deaths_headlines_s.index = deaths_headlines_s.index + 1
286 deaths_headlines_s = deaths_headlines_s.loc[1:52]
287 deaths_headlines_s
288 ```
289
290 ```python
291 deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True)
292 deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True)
293 deaths_headlines_w = deaths_headlines_w.merge(dh17w['total_2017'], how='outer', left_index=True, right_index=True)
294 deaths_headlines_w = deaths_headlines_w.merge(dh16w['total_2016'], how='outer', left_index=True, right_index=True)
295 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
296 deaths_headlines_w = deaths_headlines_w.merge(dh15w['total_2015'], how='left', left_index=True, right_index=True)
297 deaths_headlines_w
298 ```
299
300 ```python
301 deaths_headlines_i = deaths_headlines_i.merge(dh19i['total_2019'], how='outer', left_index=True, right_index=True)
302 deaths_headlines_i = deaths_headlines_i.merge(dh18i['total_2018'], how='outer', left_index=True, right_index=True)
303 deaths_headlines_i = deaths_headlines_i.merge(dh17i['total_2017'], how='outer', left_index=True, right_index=True)
304 deaths_headlines_i = deaths_headlines_i.merge(dh16i['total_2016'], how='outer', left_index=True, right_index=True)
305 deaths_headlines_i = deaths_headlines_i.merge(dh15i['total_2015'], how='left', left_index=True, right_index=True)
306 deaths_headlines_i
307 ```
308
309 ```python
310 deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
311 deaths_headlines
312 ```
313
314 ```python
315 deaths_headlines_e['previous_mean'] = deaths_headlines_e['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
316 deaths_headlines_w['previous_mean'] = deaths_headlines_w['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
317 deaths_headlines_s['previous_mean'] = deaths_headlines_s['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
318 deaths_headlines_i['previous_mean'] = deaths_headlines_i['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
319 deaths_headlines['previous_mean'] = deaths_headlines['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
320 deaths_headlines
321 ```
322
323 ```python
324 deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(10, 8))
325 ```
326
327 ```python
328 deaths_headlines_i.plot()
329 ```
330
331 ```python
332 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
333
334 fig = plt.figure(figsize=(10, 10))
335 ax = fig.add_subplot(111, projection="polar")
336
337 theta = np.roll(
338 np.flip(
339 np.arange(len(deaths_headlines))/float(len(deaths_headlines))*2.*np.pi),
340 14)
341 # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
342 # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
343 # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
344 # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
345 # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
346 l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#e47d7d", label="2015") # 0
347 l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
348 l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#7de4a6", label="2017") # 144
349 l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#7da6e4", label="2018") # 216
350 l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#d07de4", label="2019") # 288
351
352 lmean, = ax.plot(theta, deaths_headlines['previous_mean'], color="black", linestyle='dashed', label="mean")
353
354 l20, = ax.plot(theta, deaths_headlines['total_2020'], color="red", label="2020")
355
356 # deaths_headlines.total_2019.plot(ax=ax)
357
358 def _closeline(line):
359 x, y = line.get_data()
360 x = np.concatenate((x, [x[0]]))
361 y = np.concatenate((y, [y[0]]))
362 line.set_data(x, y)
363
364 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
365
366
367 ax.set_xticks(theta)
368 ax.set_xticklabels(deaths_headlines.index)
369 plt.legend()
370 plt.title("Deaths by week over years, all UK")
371 plt.savefig('deaths-radar.png')
372 plt.show()
373 ```
374
375 # Excess deaths calculation
376
377 ```python
378 (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
379 ```
380
381 ```python
382 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
383
384 fig = plt.figure(figsize=(10, 10))
385 ax = fig.add_subplot(111, projection="polar")
386
387 theta = np.roll(
388 np.flip(
389 np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
390 14)
391 l15, = ax.plot(theta, deaths_headlines_e['total_2015'], color="#e47d7d", label="2015") # 0
392 l16, = ax.plot(theta, deaths_headlines_e['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
393 l17, = ax.plot(theta, deaths_headlines_e['total_2017'], color="#7de4a6", label="2017") # 144
394 l18, = ax.plot(theta, deaths_headlines_e['total_2018'], color="#7da6e4", label="2018") # 216
395 l19, = ax.plot(theta, deaths_headlines_e['total_2019'], color="#d07de4", label="2019") # 288
396
397 lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
398
399 l20, = ax.plot(theta, deaths_headlines_e['total_2020'], color="red", label="2020")
400
401 # deaths_headlines.total_2019.plot(ax=ax)
402
403 def _closeline(line):
404 x, y = line.get_data()
405 x = np.concatenate((x, [x[0]]))
406 y = np.concatenate((y, [y[0]]))
407 line.set_data(x, y)
408
409 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
410
411
412 ax.set_xticks(theta)
413 ax.set_xticklabels(deaths_headlines_e.index)
414 plt.legend()
415 plt.title("Deaths by week over years, England")
416 plt.savefig('deaths-radar_england.png')
417 plt.show()
418 ```
419
420 ```python
421 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
422
423 fig = plt.figure(figsize=(10, 10))
424 ax = fig.add_subplot(111, projection="polar")
425
426 theta = np.roll(
427 np.flip(
428 np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
429 14)
430 l15, = ax.plot(theta, deaths_headlines_w['total_2015'], color="#e47d7d", label="2015") # 0
431 l16, = ax.plot(theta, deaths_headlines_w['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
432 l17, = ax.plot(theta, deaths_headlines_w['total_2017'], color="#7de4a6", label="2017") # 144
433 l18, = ax.plot(theta, deaths_headlines_w['total_2018'], color="#7da6e4", label="2018") # 216
434 l19, = ax.plot(theta, deaths_headlines_w['total_2019'], color="#d07de4", label="2019") # 288
435
436 lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
437
438 l20, = ax.plot(theta, deaths_headlines_w['total_2020'], color="red", label="2020")
439
440
441 def _closeline(line):
442 x, y = line.get_data()
443 x = np.concatenate((x, [x[0]]))
444 y = np.concatenate((y, [y[0]]))
445 line.set_data(x, y)
446
447 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
448
449
450 ax.set_xticks(theta)
451 ax.set_xticklabels(deaths_headlines_w.index)
452 plt.legend()
453 plt.title("Deaths by week over years, Wales")
454 plt.savefig('deaths-radar_wales.png')
455 plt.show()
456 ```
457
458 ```python
459 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
460
461 fig = plt.figure(figsize=(10, 10))
462 ax = fig.add_subplot(111, projection="polar")
463
464 theta = np.roll(
465 np.flip(
466 np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
467 14)
468 l15, = ax.plot(theta, deaths_headlines_s['total_2015'], color="#e47d7d", label="2015") # 0
469 l16, = ax.plot(theta, deaths_headlines_s['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
470 l17, = ax.plot(theta, deaths_headlines_s['total_2017'], color="#7de4a6", label="2017") # 144
471 l18, = ax.plot(theta, deaths_headlines_s['total_2018'], color="#7da6e4", label="2018") # 216
472 l19, = ax.plot(theta, deaths_headlines_s['total_2019'], color="#d07de4", label="2019") # 288
473
474 lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
475
476 l20, = ax.plot(theta, deaths_headlines_s['total_2020'], color="red", label="2020")
477
478
479 def _closeline(line):
480 x, y = line.get_data()
481 x = np.concatenate((x, [x[0]]))
482 y = np.concatenate((y, [y[0]]))
483 line.set_data(x, y)
484
485 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
486
487
488 ax.set_xticks(theta)
489 ax.set_xticklabels(deaths_headlines_s.index)
490 plt.legend()
491 plt.title("Deaths by week over years, Scotland")
492 plt.savefig('deaths-radar_scotland.png')
493 plt.show()
494 ```
495
496 ```python
497 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
498
499 fig = plt.figure(figsize=(10, 10))
500 ax = fig.add_subplot(111, projection="polar")
501
502 theta = np.roll(
503 np.flip(
504 np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
505 14)
506 l15, = ax.plot(theta, deaths_headlines_i['total_2015'], color="#e47d7d", label="2015") # 0
507 l16, = ax.plot(theta, deaths_headlines_i['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
508 l17, = ax.plot(theta, deaths_headlines_i['total_2017'], color="#7de4a6", label="2017") # 144
509 l18, = ax.plot(theta, deaths_headlines_i['total_2018'], color="#7da6e4", label="2018") # 216
510 l19, = ax.plot(theta, deaths_headlines_i['total_2019'], color="#d07de4", label="2019") # 288
511
512 lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
513
514 l20, = ax.plot(theta, deaths_headlines_i['total_2020'], color="red", label="2020")
515
516
517 def _closeline(line):
518 x, y = line.get_data()
519 x = np.concatenate((x, [x[0]]))
520 y = np.concatenate((y, [y[0]]))
521 line.set_data(x, y)
522
523 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
524
525
526 ax.set_xticks(theta)
527 ax.set_xticklabels(deaths_headlines_i.index)
528 plt.legend()
529 plt.title("Deaths by week over years, Northern Ireland")
530 plt.savefig('deaths-radar_northern_ireland.png')
531 plt.show()
532 ```
533
534 ```python
535
536 ```