Finished UK death data analysis
[covid19.git] / uk_deaths.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.2'
9 jupytext_version: 1.3.4
10 kernelspec:
11 display_name: Python 3
12 language: python
13 name: python3
14 ---
15
16 Data from:
17
18 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
19 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
20 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
21
22
23 ```python
24 import itertools
25 import collections
26 import pandas as pd
27 import numpy as np
28 from scipy.stats import gmean
29
30 import matplotlib as mpl
31 import matplotlib.pyplot as plt
32 %matplotlib inline
33 ```
34
35 ```python
36 !ls uk-deaths-data
37 ```
38
39 ```python
40 raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv',
41 parse_dates=[1, 2], dayfirst=True,
42 index_col=0,
43 header=[0, 1]
44 )
45 dh15i = raw_data_2015.iloc[:, [2]]
46 dh15i.columns = ['total_2015']
47 # dh15i.head()
48 ```
49
50 ```python
51 raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv',
52 parse_dates=[1, 2], dayfirst=True,
53 index_col=0,
54 header=[0, 1]
55 )
56 dh16i = raw_data_2016.iloc[:, [2]]
57 dh16i.columns = ['total_2016']
58 # dh16i.head()
59 ```
60
61 ```python
62 raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv',
63 parse_dates=[1, 2], dayfirst=True,
64 index_col=0,
65 header=[0, 1]
66 )
67 dh17i = raw_data_2017.iloc[:, [2]]
68 dh17i.columns = ['total_2017']
69 # dh17i.head()
70 ```
71
72 ```python
73 raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv',
74 parse_dates=[1, 2], dayfirst=True,
75 index_col=0,
76 header=[0, 1]
77 )
78 dh18i = raw_data_2018.iloc[:, [2]]
79 dh18i.columns = ['total_2018']
80 # dh18i.head()
81 ```
82
83 ```python
84 raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv',
85 parse_dates=[1, 2], dayfirst=True,
86 index_col=0,
87 header=[0, 1]
88 )
89 dh19i = raw_data_2019.iloc[:, [2]]
90 dh19i.columns = ['total_2019']
91 # dh19i.head()
92 ```
93
94 ```python
95 raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
96 parse_dates=[1], dayfirst=True,
97 index_col=0,
98 header=[0, 1]
99 )
100 deaths_headlines_i = raw_data_2020_i.iloc[:, [1]]
101 deaths_headlines_i.columns = ['total_2020']
102 deaths_headlines_i.head()
103 ```
104
105 ```python
106
107 ```
108
109 ```python
110
111 ```
112
113 ```python
114 raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-april-20-scotland.csv',
115 index_col=0,
116 header=0,
117 skiprows=2
118 )
119 # raw_data_s
120 ```
121
122 ```python
123 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
124 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
125 deaths_headlines_s.reset_index(drop=True, inplace=True)
126 deaths_headlines_s.index = deaths_headlines_s.index + 1
127 deaths_headlines_s
128 ```
129
130 ```python
131
132 ```
133
134 ```python
135
136 ```
137
138 ```python
139
140 ```
141
142 ```python
143
144 ```
145
146 ```python
147
148 ```
149
150 ```python
151 raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek182020.csv',
152 parse_dates=[1], dayfirst=True,
153 index_col=0,
154 header=[0, 1])
155 ```
156
157 ```python
158 # raw_data_2020.head()
159 ```
160
161 ```python
162 raw_data_2020['W92000004', 'Wales']
163 ```
164
165 ```python
166 raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv',
167 parse_dates=[1], dayfirst=True,
168 index_col=0,
169 header=[0, 1])
170 # raw_data_2019.head()
171 ```
172
173 ```python
174 raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv',
175 parse_dates=[1], dayfirst=True,
176 index_col=0,
177 header=[0, 1])
178 # raw_data_2018.head()
179 ```
180
181 ```python
182 raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv',
183 parse_dates=[1], dayfirst=True,
184 index_col=0,
185 header=[0, 1])
186 # raw_data_2017.head()
187 ```
188
189 ```python
190 raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv',
191 parse_dates=[1], dayfirst=True,
192 index_col=0,
193 header=[0, 1])
194 # raw_data_2016.head()
195 ```
196
197 ```python
198 raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
199 parse_dates=[1], dayfirst=True,
200 index_col=0,
201 header=[0, 1])
202 # raw_data_2015.head()
203 ```
204
205 ```python
206 deaths_headlines_e = raw_data_2020.iloc[:, [1]]
207 deaths_headlines_e.columns = ['total_2020']
208 deaths_headlines_w = raw_data_2020['W92000004']
209 deaths_headlines_e.columns = ['total_2020']
210 deaths_headlines_w.columns = ['total_2020']
211 deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
212 deaths_headlines_e.head()
213 deaths_headlines_e
214 ```
215
216 ```python
217 dh19e = raw_data_2019.iloc[:, [1]]
218 dh19w = raw_data_2019['W92000004']
219 dh19e.columns = ['total_2019']
220 dh19w.columns = ['total_2019']
221 dh19e.total_2019 -= dh19w.total_2019
222 dh19e.head()
223 ```
224
225 ```python
226 dh19w.head()
227 ```
228
229 ```python
230 dh18e = raw_data_2018.iloc[:, [1]]
231 dh18w = raw_data_2018['W92000004']
232 dh18e.columns = ['total_2018']
233 dh18w.columns = ['total_2018']
234 dh18e.total_2018 -= dh18w.total_2018
235 # dh18e.head()
236 ```
237
238 ```python
239 dh17e = raw_data_2017.iloc[:, [1]]
240 dh17w = raw_data_2017['W92000004']
241 dh17e.columns = ['total_2017']
242 dh17w.columns = ['total_2017']
243 dh17e.total_2017 -= dh17w.total_2017
244 # dh17e.head()
245 ```
246
247 ```python
248 dh16e = raw_data_2016.iloc[:, [1]]
249 dh16w = raw_data_2016['W92000004']
250 dh16e.columns = ['total_2016']
251 dh16w.columns = ['total_2016']
252 dh16e.total_2016 -= dh16w.total_2016
253 # dh16e.head()
254 ```
255
256 ```python
257 dh15e = raw_data_2015.iloc[:, [1]]
258 dh15w = raw_data_2015['W92000004']
259 dh15e.columns = ['total_2015']
260 dh15w.columns = ['total_2015']
261 dh15e.total_2015 -= dh15w.total_2015
262 # dh15e.head()
263 ```
264
265 ```python
266 # dh18 = raw_data_2018.iloc[:, [1, 2]]
267 # dh18.columns = ['total_2018', 'total_previous']
268 # # dh18.head()
269 ```
270
271 ```python
272 deaths_headlines_e = deaths_headlines_e.merge(dh19e['total_2019'], how='outer', left_index=True, right_index=True)
273 deaths_headlines_e = deaths_headlines_e.merge(dh18e['total_2018'], how='outer', left_index=True, right_index=True)
274 deaths_headlines_e = deaths_headlines_e.merge(dh17e['total_2017'], how='outer', left_index=True, right_index=True)
275 deaths_headlines_e = deaths_headlines_e.merge(dh16e['total_2016'], how='outer', left_index=True, right_index=True)
276 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
277 deaths_headlines_e = deaths_headlines_e.merge(dh15e['total_2015'], how='left', left_index=True, right_index=True)
278 deaths_headlines_e
279 ```
280
281 ```python
282 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
283 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
284 deaths_headlines_s.reset_index(drop=True, inplace=True)
285 deaths_headlines_s.index = deaths_headlines_s.index + 1
286 deaths_headlines_s
287 ```
288
289 ```python
290 deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True)
291 deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True)
292 deaths_headlines_w = deaths_headlines_w.merge(dh17w['total_2017'], how='outer', left_index=True, right_index=True)
293 deaths_headlines_w = deaths_headlines_w.merge(dh16w['total_2016'], how='outer', left_index=True, right_index=True)
294 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
295 deaths_headlines_w = deaths_headlines_w.merge(dh15w['total_2015'], how='left', left_index=True, right_index=True)
296 deaths_headlines_w
297 ```
298
299 ```python
300 deaths_headlines_i = deaths_headlines_i.merge(dh19i['total_2019'], how='outer', left_index=True, right_index=True)
301 deaths_headlines_i = deaths_headlines_i.merge(dh18i['total_2018'], how='outer', left_index=True, right_index=True)
302 deaths_headlines_i = deaths_headlines_i.merge(dh17i['total_2017'], how='outer', left_index=True, right_index=True)
303 deaths_headlines_i = deaths_headlines_i.merge(dh16i['total_2016'], how='outer', left_index=True, right_index=True)
304 deaths_headlines_i = deaths_headlines_i.merge(dh15i['total_2015'], how='left', left_index=True, right_index=True)
305 deaths_headlines_i
306 ```
307
308 ```python
309 deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
310 deaths_headlines
311 ```
312
313 ```python
314 deaths_headlines['previous_mean'] = deaths_headlines['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
315 deaths_headlines
316 ```
317
318 ```python
319 deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(10, 8))
320 ```
321
322 ```python
323 fig = plt.figure(figsize=(10, 10))
324 ax = fig.add_subplot(111, projection="polar")
325
326 theta = np.roll(
327 np.flip(
328 np.arange(len(deaths_headlines))/float(len(deaths_headlines))*2.*np.pi),
329 14)
330 # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
331 # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
332 # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
333 # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
334 # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
335 l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#e47d7d", label="2015") # 0
336 l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
337 l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#7de4a6", label="2017") # 144
338 l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#7da6e4", label="2018") # 216
339 l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#d07de4", label="2019") # 288
340
341 lmean, = ax.plot(theta, deaths_headlines['previous_mean'], color="black", linestyle='dashed', label="mean")
342
343 l20, = ax.plot(theta, deaths_headlines['total_2020'], color="red", label="2020")
344
345 # deaths_headlines.total_2019.plot(ax=ax)
346
347 def _closeline(line):
348 x, y = line.get_data()
349 x = np.concatenate((x, [x[0]]))
350 y = np.concatenate((y, [y[0]]))
351 line.set_data(x, y)
352
353 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
354
355
356 ax.set_xticks(theta)
357 ax.set_xticklabels(deaths_headlines.index)
358 plt.legend()
359 plt.title("Deaths by week over years, all UK")
360 plt.savefig('deaths-radar.png')
361 plt.show()
362 ```
363
364 ```python
365 (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
366 ```
367
368 ```python
369
370 ```