Changed European data source and format
[covid19.git] / uk_deaths_import.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.3'
9 jupytext_version: 1.10.2
10 kernelspec:
11 display_name: Python 3
12 language: python
13 name: python3
14 ---
15
16 <!-- #region Collapsed="false" -->
17 Data from:
18
19 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
20 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
21 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
22
23 <!-- #endregion -->
24
25 ```python Collapsed="false"
26 import itertools
27 import collections
28 import json
29 import pandas as pd
30 import numpy as np
31 from scipy.stats import gmean
32 import datetime
33
34 import matplotlib as mpl
35 import matplotlib.pyplot as plt
36 %matplotlib inline
37
38 from sqlalchemy.types import Integer, Text, String, DateTime, Float
39 from sqlalchemy import create_engine
40 %load_ext sql
41 ```
42
43 ```python Collapsed="false"
44 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
45 ```
46
47 ```python Collapsed="false"
48 %sql $connection_string
49 ```
50
51 ```python Collapsed="false"
52 engine = create_engine(connection_string)
53 ```
54
55 ```python Collapsed="false"
56 england_wales_filename = 'uk-deaths-data/publishedweek052021.xlsx'
57 ```
58
59 ```python Collapsed="false"
60 scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (15 February 2021).xlsx'
61 ```
62
63 ```python
64 n_ireland_filename = 'uk-deaths-data/Weekly_Deaths_0.xlsx'
65 ```
66
67 ```python Collapsed="false"
68 # eng_xls = pd.read_excel(england_wales_filename,
69 # sheet_name="Weekly figures 2021",
70 # skiprows=[0, 1, 2, 3],
71 # skipfooter=11,
72 # header=0,
73 # index_col=[1]
74 # ).iloc[:99].T
75 # eng_xls
76 ```
77
78 ```python Collapsed="false"
79 eng_xls = pd.read_excel(england_wales_filename,
80 sheet_name="Weekly figures 2021",
81 skiprows=[0, 1, 2, 3],
82 skipfooter=10,
83 header=0,
84 index_col=[1]
85 ).T
86 eng_xls
87 ```
88
89 ```python Collapsed="false"
90 eng_xls_columns = list(eng_xls.columns)
91
92 for i, c in enumerate(eng_xls_columns):
93 # print(i, c, type(c), isinstance(c, float))
94 if isinstance(c, float) and np.isnan(c):
95 if eng_xls.iloc[0].iloc[i] is not pd.NaT:
96 eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
97
98 # np.isnan(eng_xls_columns[0])
99 # eng_xls_columns
100
101 eng_xls.columns = eng_xls_columns
102 # eng_xls.columns
103 ```
104
105 ```python
106 eng_xls
107 ```
108
109 ```python
110 rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
111 columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
112 'index': 'week'}
113 )
114 rd['year'] = 2021
115 rd['nation'] = 'Wales'
116 rd.dropna(inplace=True)
117 rd.head()
118 ```
119
120 ```python
121 query_string = '''
122 delete from all_causes_deaths
123 where nation = 'Wales'
124 and year = 2021;
125 '''
126 with engine.connect() as connection:
127 connection.execute(query_string)
128 ```
129
130 ```python
131 rd.to_sql(
132 'all_causes_deaths',
133 engine,
134 if_exists='append',
135 index=False)
136 ```
137
138 ```python
139 %sql select * from all_causes_deaths where year = 2021 limit 10
140 ```
141
142 ```python
143 rd = eng_xls.iloc[1:][['Week ended', 'Total deaths, all ages (2021)', 'Wales']].reset_index(level=0).rename(
144 columns={'Week ended': 'date_up_to',
145 'Total deaths, all ages (2021)': 'ew_deaths',
146 'Wales': 'w_deaths',
147 'index': 'week'}
148 )
149 rd['year'] = 2021
150 rd['nation'] = 'England'
151 rd['deaths'] = rd['ew_deaths'] - rd['w_deaths']
152 rd.drop(labels=['ew_deaths', 'w_deaths'], axis='columns', inplace=True)
153 rd.dropna(inplace=True)
154 rd.tail()
155 ```
156
157 ```python
158 query_string = '''
159 delete from all_causes_deaths
160 where nation = 'England'
161 and year = 2021;
162 '''
163 with engine.connect() as connection:
164 connection.execute(query_string)
165 ```
166
167 ```python
168 rd.to_sql(
169 'all_causes_deaths',
170 engine,
171 if_exists='append',
172 index=False)
173 ```
174
175 ```python
176 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
177 ```
178
179 ```python
180 ni_xls = pd.read_excel(n_ireland_filename,
181 sheet_name='Table 1',
182 skiprows=[0, 1, 2, 3],
183 header=0,
184 ).rename(
185 columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2021P)': 'deaths',
186 'Registration Week': 'week'})
187 rd = ni_xls[ni_xls['deaths'].notna()][['week', 'date_up_to', 'deaths']]
188 rd['year'] = 2021
189 rd['nation'] = 'Northern Ireland'
190 rd
191 ```
192
193 ```python
194 query_string = '''
195 delete from all_causes_deaths
196 where nation = 'Northern Ireland'
197 and year = 2021;
198 '''
199 with engine.connect() as connection:
200 connection.execute(query_string)
201 ```
202
203 ```python
204 rd.to_sql(
205 'all_causes_deaths',
206 engine,
207 if_exists='append',
208 index=False)
209 ```
210
211 ```python
212 %sql select nation from all_causes_deaths group by nation
213 ```
214
215 ```python
216 # sco_xls = pd.read_excel(scotland_filename,
217 # sheet_name="2.2_excess",
218 # skiprows=[0, 1, 2, 3],
219 # skipfooter=3,
220 # header=0,
221 # index_col=[1]
222 # ).iloc[:91].T
223 ```
224
225 ```python
226 import openpyxl
227 ```
228
229 ```python
230 wb = openpyxl.load_workbook(scotland_filename, read_only=True)#, data_only=True, keep_links=False)
231 sheet = wb.worksheets[7]
232 sheet
233 ```
234
235 ```python
236 sheet.reset_dimensions()
237 ```
238
239 ```python
240 scot_elems = [[value for value in row] for row in sheet.values]
241 scot_cols = scot_elems[3]
242 scot_dicts = [{k: v for k, v in zip(scot_cols, row)} for row in scot_elems[4:]]
243 scot_data = pd.DataFrame(scot_dicts)
244 rd = scot_data[scot_data.date >= '2021'].rename(
245 columns={'week_number': 'week', 'date': 'date_up_to', 'total_deaths': 'deaths'})[['week', 'date_up_to', 'deaths']]
246 rd['year'] = 2021
247 rd['nation'] = 'Scotland'
248 rd
249 ```
250
251 ```python
252 query_string = '''
253 delete from all_causes_deaths
254 where nation = 'Scotland'
255 and year = 2021;
256 '''
257 with engine.connect() as connection:
258 connection.execute(query_string)
259 ```
260
261 ```python
262 rd.to_sql(
263 'all_causes_deaths',
264 engine,
265 if_exists='append',
266 index=False)
267 ```
268
269 ```python
270 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
271 ```
272
273 ```python
274
275 ```
276
277 # Create graphs
278
279 ```python
280 qstr = '''select week, year, deaths
281 from all_causes_deaths
282 where nation = 'England' '''
283 deaths_e = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
284 deaths_e.head()
285 ```
286
287 ```python
288 qstr = '''select week, year, deaths
289 from all_causes_deaths
290 where nation = 'Wales' '''
291 deaths_w = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
292 deaths_w.head()
293 ```
294
295 ```python
296 qstr = '''select week, year, deaths
297 from all_causes_deaths
298 where nation = 'Scotland' '''
299 deaths_s = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
300 deaths_s.head()
301 ```
302
303 ```python
304 qstr = '''select week, year, deaths
305 from all_causes_deaths
306 where nation = 'Northern Ireland' '''
307 deaths_i = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
308 deaths_i.head()
309 ```
310
311 ```python
312 qstr = '''select week, avg(deaths) as prev_mean
313 from all_causes_deaths
314 where year <= 2019 and nation='England'
315 group by week
316 order by week'''
317 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
318 deaths_prev.head()
319 deaths_e = deaths_e.merge(deaths_prev, on='week')
320 deaths_e.head()
321 ```
322
323 ```python
324 qstr = '''select week, avg(deaths) as prev_mean
325 from all_causes_deaths
326 where year <= 2019 and nation='Wales'
327 group by week
328 order by week'''
329 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
330 deaths_prev.head()
331 deaths_w = deaths_w.merge(deaths_prev, on='week')
332 deaths_w.head()
333 ```
334
335 ```python
336 qstr = '''select week, avg(deaths) as prev_mean
337 from all_causes_deaths
338 where year <= 2019 and nation='Scotland'
339 group by week
340 order by week'''
341 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
342 deaths_prev.head()
343 deaths_s = deaths_s.merge(deaths_prev, on='week')
344 deaths_s.head()
345 ```
346
347 ```python
348 qstr = '''select week, avg(deaths) as prev_mean
349 from all_causes_deaths
350 where year <= 2019 and nation='Northern Ireland'
351 group by week
352 order by week'''
353 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
354 deaths_prev.head()
355 deaths_i = deaths_i.merge(deaths_prev, on='week')
356 deaths_i.head()
357 ```
358
359 ```python Collapsed="false"
360 deaths = deaths_e + deaths_w + deaths_i + deaths_s
361 deaths.head()
362 ```
363
364 ```python Collapsed="false"
365 deaths[[2021, 2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
366 ```
367
368 ```python Collapsed="false"
369 deaths[[2021, 2020, 'prev_mean']].plot(figsize=(10, 8))
370 ```
371
372 ```python Collapsed="false"
373 deaths_i.plot()
374 ```
375
376 ```python
377 deaths[2020].sum() - deaths.prev_mean.sum()
378 ```
379
380 ```python
381 def _closeline(line):
382 x, y = line.get_data()
383 x = np.concatenate((x, [x[0]]))
384 y = np.concatenate((y, [y[0]]))
385 line.set_data(x, y)
386
387 def _closeline_connect(lines):
388 for line0, line1 in zip(lines, lines[1:]):
389 x0, y0 = line0.get_data()
390 x1, y1 = line1.get_data()
391
392 x0 = np.concatenate((x0, [x1[0]]))
393 y0 = np.concatenate((y0, [y1[0]]))
394 line0.set_data(x0, y0)
395 ```
396
397 ```python Collapsed="false"
398 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
399
400 def create_and_save_radar_plot(dataset, title_string, filename_suffix):
401
402 fig = plt.figure(figsize=(10, 10))
403 ax = fig.add_subplot(111, projection="polar")
404
405 theta = np.roll(
406 np.flip(
407 np.arange(len(dataset))/float(len(dataset))*2.*np.pi),
408 14)
409 l15, = ax.plot(theta, dataset[2015], color="#e47d7d", label="2015") # 0
410 l16, = ax.plot(theta, dataset[2016], color="#afc169", label="2016") # 72 , d0e47d
411 l17, = ax.plot(theta, dataset[2017], color="#7de4a6", label="2017") # 144
412 l18, = ax.plot(theta, dataset[2018], color="#7da6e4", label="2018") # 216
413 l19, = ax.plot(theta, dataset[2019], color="#d07de4", label="2019") # 288
414
415 lmean, = ax.plot(theta, dataset['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
416
417 l20, = ax.plot(theta, dataset[2020], color="#bb0000", label="2020")
418 l21, = ax.plot(theta, dataset[2021], color="#ff0000", label="2021")
419
420 # deaths_headlines.total_2019.plot(ax=ax)
421
422 _closeline(lmean)
423 _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
424
425 ax.set_xticks(theta)
426 ax.set_xticklabels(dataset.index)
427 plt.legend()
428 plt.title(f"Deaths by week over years, {title_string}")
429 plt.savefig(f'deaths_radar_2021{filename_suffix}.png')
430 plt.show()
431 ```
432
433 ```python
434 create_and_save_radar_plot(deaths, 'all UK', '')
435 ```
436
437 ```python Collapsed="false"
438 # # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
439
440 # dhna = deaths# .dropna()
441
442 # fig = plt.figure(figsize=(10, 10))
443 # ax = fig.add_subplot(111, projection="polar")
444
445 # theta = np.roll(
446 # np.flip(
447 # np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
448 # 14)
449 # # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
450 # # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
451 # # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
452 # # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
453 # # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
454 # l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
455 # l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
456 # l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
457 # l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
458 # l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
459
460 # lmean, = ax.plot(theta, dhna['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
461
462 # l20, = ax.plot(theta, dhna[2020], color="#bb0000", label="2020")
463 # l21, = ax.plot(theta, dhna[2021], color="#ff0000", label="2021")
464
465 # # deaths_headlines.total_2019.plot(ax=ax)
466
467 # _closeline(lmean)
468 # _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
469
470 # ax.set_xticks(theta)
471 # ax.set_xticklabels(dhna.index)
472 # plt.legend()
473 # plt.title("Deaths by week over years, all UK")
474 # plt.savefig('deaths-radar-2021.png')
475 # plt.show()
476 ```
477
478 <!-- #region Collapsed="false" -->
479 # Plots for UK nations
480 <!-- #endregion -->
481
482 ```python
483 create_and_save_radar_plot(deaths_e, 'England', '_england')
484 ```
485
486 ```python
487 create_and_save_radar_plot(deaths_w, 'Wales', '_wales')
488 ```
489
490 ```python
491 create_and_save_radar_plot(deaths_s, 'Scotland', '_scotland')
492 ```
493
494 ```python
495 create_and_save_radar_plot(deaths_i, 'Northern Ireland', '_northern_ireland')
496 ```
497
498 ```python Collapsed="false"
499
500 ```