Tweaked death data and graph
[covid19.git] / uk_deaths.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.2'
9 jupytext_version: 1.3.4
10 kernelspec:
11 display_name: Python 3
12 language: python
13 name: python3
14 ---
15
16 Data from:
17
18 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
19 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
20 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
21
22
23 ```python
24 import itertools
25 import collections
26 import pandas as pd
27 import numpy as np
28 from scipy.stats import gmean
29
30 import matplotlib as mpl
31 import matplotlib.pyplot as plt
32 %matplotlib inline
33 ```
34
35 ```python
36 !ls uk-deaths-data
37 ```
38
39 ```python
40 raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv',
41 parse_dates=[1, 2], dayfirst=True,
42 index_col=0,
43 header=[0, 1]
44 )
45 dh15i = raw_data_2015.iloc[:, [2]]
46 dh15i.columns = ['total_2015']
47 # dh15i.head()
48 ```
49
50 ```python
51 raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv',
52 parse_dates=[1, 2], dayfirst=True,
53 index_col=0,
54 header=[0, 1]
55 )
56 dh16i = raw_data_2016.iloc[:, [2]]
57 dh16i.columns = ['total_2016']
58 # dh16i.head()
59 ```
60
61 ```python
62 raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv',
63 parse_dates=[1, 2], dayfirst=True,
64 index_col=0,
65 header=[0, 1]
66 )
67 dh17i = raw_data_2017.iloc[:, [2]]
68 dh17i.columns = ['total_2017']
69 # dh17i.head()
70 ```
71
72 ```python
73 raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv',
74 parse_dates=[1, 2], dayfirst=True,
75 index_col=0,
76 header=[0, 1]
77 )
78 dh18i = raw_data_2018.iloc[:, [2]]
79 dh18i.columns = ['total_2018']
80 # dh18i.head()
81 ```
82
83 ```python
84 raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv',
85 parse_dates=[1, 2], dayfirst=True,
86 index_col=0,
87 header=[0, 1]
88 )
89 dh19i = raw_data_2019.iloc[:, [2]]
90 dh19i.columns = ['total_2019']
91 # dh19i.head()
92 ```
93
94 ```python
95 raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
96 parse_dates=[1], dayfirst=True,
97 index_col=0,
98 header=[0, 1]
99 )
100 deaths_headlines_i = raw_data_2020_i.iloc[:, [1]]
101 deaths_headlines_i.columns = ['total_2020']
102 deaths_headlines_i.head()
103 ```
104
105 ```python
106
107 ```
108
109 ```python
110
111 ```
112
113 ```python
114 raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-april-20-scotland.csv',
115 index_col=0,
116 header=0,
117 skiprows=2
118 )
119 # raw_data_s
120 ```
121
122 ```python
123 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
124 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
125 deaths_headlines_s.reset_index(drop=True, inplace=True)
126 deaths_headlines_s.index = deaths_headlines_s.index + 1
127 deaths_headlines_s
128 ```
129
130 ```python
131
132 ```
133
134 ```python
135
136 ```
137
138 ```python
139
140 ```
141
142 ```python
143
144 ```
145
146 ```python
147
148 ```
149
150 ```python
151 raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek182020.csv',
152 parse_dates=[1], dayfirst=True,
153 index_col=0,
154 header=[0, 1])
155 ```
156
157 ```python
158 # raw_data_2020.head()
159 ```
160
161 ```python
162 raw_data_2020['W92000004', 'Wales']
163 ```
164
165 ```python
166 raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv',
167 parse_dates=[1], dayfirst=True,
168 index_col=0,
169 header=[0, 1])
170 # raw_data_2019.head()
171 ```
172
173 ```python
174 raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv',
175 parse_dates=[1], dayfirst=True,
176 index_col=0,
177 header=[0, 1])
178 # raw_data_2018.head()
179 ```
180
181 ```python
182 raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv',
183 parse_dates=[1], dayfirst=True,
184 index_col=0,
185 header=[0, 1])
186 # raw_data_2017.head()
187 ```
188
189 ```python
190 raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv',
191 parse_dates=[1], dayfirst=True,
192 index_col=0,
193 header=[0, 1])
194 # raw_data_2016.head()
195 ```
196
197 ```python
198 raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
199 parse_dates=[1], dayfirst=True,
200 index_col=0,
201 header=[0, 1])
202 # raw_data_2015.head()
203 ```
204
205 ```python
206 deaths_headlines_e = raw_data_2020.iloc[:, [1]]
207 deaths_headlines_e.columns = ['total_2020']
208 deaths_headlines_w = raw_data_2020['W92000004']
209 deaths_headlines_e.columns = ['total_2020']
210 deaths_headlines_w.columns = ['total_2020']
211 deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
212 deaths_headlines_e.head()
213 deaths_headlines_e
214 ```
215
216 ```python
217 dh19e = raw_data_2019.iloc[:, [1]]
218 dh19w = raw_data_2019['W92000004']
219 dh19e.columns = ['total_2019']
220 dh19w.columns = ['total_2019']
221 dh19e.total_2019 -= dh19w.total_2019
222 dh19e.head()
223 ```
224
225 ```python
226 dh19w.head()
227 ```
228
229 ```python
230 dh18e = raw_data_2018.iloc[:, [1]]
231 dh18w = raw_data_2018['W92000004']
232 dh18e.columns = ['total_2018']
233 dh18w.columns = ['total_2018']
234 dh18e.total_2018 -= dh18w.total_2018
235 # dh18e.head()
236 ```
237
238 ```python
239 dh17e = raw_data_2017.iloc[:, [1]]
240 dh17w = raw_data_2017['W92000004']
241 dh17e.columns = ['total_2017']
242 dh17w.columns = ['total_2017']
243 dh17e.total_2017 -= dh17w.total_2017
244 # dh17e.head()
245 ```
246
247 ```python
248 dh16e = raw_data_2016.iloc[:, [1]]
249 dh16w = raw_data_2016['W92000004']
250 dh16e.columns = ['total_2016']
251 dh16w.columns = ['total_2016']
252 dh16e.total_2016 -= dh16w.total_2016
253 # dh16e.head()
254 ```
255
256 ```python
257 dh15e = raw_data_2015.iloc[:, [1]]
258 dh15w = raw_data_2015['W92000004']
259 dh15e.columns = ['total_2015']
260 dh15w.columns = ['total_2015']
261 dh15e.total_2015 -= dh15w.total_2015
262 # dh15e.head()
263 ```
264
265 ```python
266 # dh18 = raw_data_2018.iloc[:, [1, 2]]
267 # dh18.columns = ['total_2018', 'total_previous']
268 # # dh18.head()
269 ```
270
271 ```python
272 deaths_headlines_e = deaths_headlines_e.merge(dh19e['total_2019'], how='outer', left_index=True, right_index=True)
273 deaths_headlines_e = deaths_headlines_e.merge(dh18e['total_2018'], how='outer', left_index=True, right_index=True)
274 deaths_headlines_e = deaths_headlines_e.merge(dh17e['total_2017'], how='outer', left_index=True, right_index=True)
275 deaths_headlines_e = deaths_headlines_e.merge(dh16e['total_2016'], how='outer', left_index=True, right_index=True)
276 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
277 deaths_headlines_e = deaths_headlines_e.merge(dh15e['total_2015'], how='left', left_index=True, right_index=True)
278 deaths_headlines_e
279 ```
280
281 ```python
282 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
283 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
284 deaths_headlines_s.reset_index(drop=True, inplace=True)
285 deaths_headlines_s.index = deaths_headlines_s.index + 1
286 deaths_headlines_s = deaths_headlines_s.loc[1:52]
287 deaths_headlines_s
288 ```
289
290 ```python
291 deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True)
292 deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True)
293 deaths_headlines_w = deaths_headlines_w.merge(dh17w['total_2017'], how='outer', left_index=True, right_index=True)
294 deaths_headlines_w = deaths_headlines_w.merge(dh16w['total_2016'], how='outer', left_index=True, right_index=True)
295 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
296 deaths_headlines_w = deaths_headlines_w.merge(dh15w['total_2015'], how='left', left_index=True, right_index=True)
297 deaths_headlines_w
298 ```
299
300 ```python
301 deaths_headlines_i = deaths_headlines_i.merge(dh19i['total_2019'], how='outer', left_index=True, right_index=True)
302 deaths_headlines_i = deaths_headlines_i.merge(dh18i['total_2018'], how='outer', left_index=True, right_index=True)
303 deaths_headlines_i = deaths_headlines_i.merge(dh17i['total_2017'], how='outer', left_index=True, right_index=True)
304 deaths_headlines_i = deaths_headlines_i.merge(dh16i['total_2016'], how='outer', left_index=True, right_index=True)
305 deaths_headlines_i = deaths_headlines_i.merge(dh15i['total_2015'], how='left', left_index=True, right_index=True)
306 deaths_headlines_i
307 ```
308
309 ```python
310 deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
311 deaths_headlines
312 ```
313
314 ```python
315 deaths_headlines['previous_mean'] = deaths_headlines['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
316 deaths_headlines
317 ```
318
319 ```python
320 deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(10, 8))
321 ```
322
323 ```python
324 Radar plot code taken f
325 fig = plt.figure(figsize=(10, 10))
326 ax = fig.add_subplot(111, projection="polar")
327
328 theta = np.roll(
329 np.flip(
330 np.arange(len(deaths_headlines))/float(len(deaths_headlines))*2.*np.pi),
331 14)
332 # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
333 # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
334 # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
335 # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
336 # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
337 l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#e47d7d", label="2015") # 0
338 l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
339 l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#7de4a6", label="2017") # 144
340 l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#7da6e4", label="2018") # 216
341 l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#d07de4", label="2019") # 288
342
343 lmean, = ax.plot(theta, deaths_headlines['previous_mean'], color="black", linestyle='dashed', label="mean")
344
345 l20, = ax.plot(theta, deaths_headlines['total_2020'], color="red", label="2020")
346
347 # deaths_headlines.total_2019.plot(ax=ax)
348
349 def _closeline(line):
350 x, y = line.get_data()
351 x = np.concatenate((x, [x[0]]))
352 y = np.concatenate((y, [y[0]]))
353 line.set_data(x, y)
354
355 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
356
357
358 ax.set_xticks(theta)
359 ax.set_xticklabels(deaths_headlines.index)
360 plt.legend()
361 plt.title("Deaths by week over years, all UK")
362 plt.savefig('deaths-radar.png')
363 plt.show()
364 ```
365
366 ```python
367 (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
368 ```
369
370 ```python
371
372 ```