General updates
[covid19.git] / uk_deaths_import.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.3'
9 jupytext_version: 1.11.1
10 kernelspec:
11 display_name: Python 3
12 language: python
13 name: python3
14 ---
15
16 <!-- #region Collapsed="false" -->
17 Data from:
18
19 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
20 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
21 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
22
23 <!-- #endregion -->
24
25 ```python Collapsed="false"
26 import itertools
27 import collections
28 import json
29 import pandas as pd
30 import numpy as np
31 from scipy.stats import gmean
32 import datetime
33
34 import matplotlib as mpl
35 import matplotlib.pyplot as plt
36 %matplotlib inline
37
38 from sqlalchemy.types import Integer, Text, String, DateTime, Float
39 from sqlalchemy import create_engine
40 %load_ext sql
41 ```
42
43 ```python Collapsed="false"
44 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
45 ```
46
47 ```python Collapsed="false"
48 %sql $connection_string
49 ```
50
51 ```python Collapsed="false"
52 engine = create_engine(connection_string)
53 ```
54
55 ```python
56 !ls uk-deaths-data/publishedweek*2021*xlsx
57 ```
58
59 ```python Collapsed="false"
60 england_wales_filename = 'uk-deaths-data/publishedweek122021.xlsx'
61 ```
62
63 ```python Collapsed="false"
64 scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (08 April 2021).xlsx'
65 ```
66
67 ```python
68 n_ireland_filename = 'uk-deaths-data/Weekly_Deaths - w e 2nd April 2021.XLSX'
69 ```
70
71 ```python Collapsed="false"
72 # eng_xls = pd.read_excel(england_wales_filename,
73 # sheet_name="Weekly figures 2021",
74 # skiprows=[0, 1, 2, 3],
75 # skipfooter=11,
76 # header=0,
77 # index_col=[1]
78 # ).iloc[:99].T
79 # eng_xls
80 ```
81
82 ```python Collapsed="false"
83 eng_xls = pd.read_excel(england_wales_filename,
84 sheet_name="Weekly figures 2021",
85 skiprows=[0, 1, 2, 3],
86 skipfooter=10,
87 header=0,
88 index_col=[1]
89 ).T
90 eng_xls
91 ```
92
93 ```python Collapsed="false"
94 eng_xls_columns = list(eng_xls.columns)
95
96 for i, c in enumerate(eng_xls_columns):
97 # print(i, c, type(c), isinstance(c, float))
98 if isinstance(c, float) and np.isnan(c):
99 if eng_xls.iloc[0].iloc[i] is not pd.NaT:
100 eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
101
102 # np.isnan(eng_xls_columns[0])
103 # eng_xls_columns
104
105 eng_xls.columns = eng_xls_columns
106 # eng_xls.columns
107 ```
108
109 ```python
110 eng_xls
111 ```
112
113 ```python
114 rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
115 columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
116 'index': 'week'}
117 )
118 rd['year'] = 2021
119 rd['nation'] = 'Wales'
120 rd.dropna(inplace=True)
121 rd.tail()
122 ```
123
124 ```python
125 query_string = '''
126 delete from all_causes_deaths
127 where nation = 'Wales'
128 and year = 2021;
129 '''
130 with engine.connect() as connection:
131 connection.execute(query_string)
132 ```
133
134 ```python
135 rd.to_sql(
136 'all_causes_deaths',
137 engine,
138 if_exists='append',
139 index=False)
140 ```
141
142 ```python
143 %sql select * from all_causes_deaths where year = 2021 order by date_up_to desc limit 10
144 ```
145
146 ```python
147 rd = eng_xls.iloc[1:][['Week ended', 'Total deaths, all ages (2021)', 'Wales']].reset_index(level=0).rename(
148 columns={'Week ended': 'date_up_to',
149 'Total deaths, all ages (2021)': 'ew_deaths',
150 'Wales': 'w_deaths',
151 'index': 'week'}
152 )
153 rd['year'] = 2021
154 rd['nation'] = 'England'
155 rd['deaths'] = rd['ew_deaths'] - rd['w_deaths']
156 rd.drop(labels=['ew_deaths', 'w_deaths'], axis='columns', inplace=True)
157 rd.dropna(inplace=True)
158 rd.tail()
159 ```
160
161 ```python
162 query_string = '''
163 delete from all_causes_deaths
164 where nation = 'England'
165 and year = 2021;
166 '''
167 with engine.connect() as connection:
168 connection.execute(query_string)
169 ```
170
171 ```python
172 rd.to_sql(
173 'all_causes_deaths',
174 engine,
175 if_exists='append',
176 index=False)
177 ```
178
179 ```python
180 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
181 ```
182
183 ```python
184 ni_xls = pd.read_excel(n_ireland_filename,
185 sheet_name='Table 1',
186 skiprows=[0, 1, 2],
187 header=0,
188 ).rename(
189 columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2021P)': 'deaths',
190 'Registration Week': 'week'})
191 rd = ni_xls[ni_xls['deaths'].notna()][['week', 'date_up_to', 'deaths']]
192 rd['year'] = 2021
193 rd['nation'] = 'Northern Ireland'
194 rd.tail()
195 ```
196
197 ```python
198 query_string = '''
199 delete from all_causes_deaths
200 where nation = 'Northern Ireland'
201 and year = 2021;
202 '''
203 with engine.connect() as connection:
204 connection.execute(query_string)
205 ```
206
207 ```python
208 rd.to_sql(
209 'all_causes_deaths',
210 engine,
211 if_exists='append',
212 index=False)
213 ```
214
215 ```python
216 %sql select nation, count(*) from all_causes_deaths group by nation
217 ```
218
219 ```python
220 # sco_xls = pd.read_excel(scotland_filename,
221 # sheet_name="2.2_excess",
222 # skiprows=[0, 1, 2, 3],
223 # skipfooter=3,
224 # header=0,
225 # index_col=[1]
226 # ).iloc[:91].T
227 ```
228
229 ```python
230 import openpyxl
231 ```
232
233 ```python
234 wb = openpyxl.load_workbook(scotland_filename, read_only=True)#, data_only=True, keep_links=False)
235 sheet = wb.worksheets[7]
236 sheet
237 ```
238
239 ```python
240 sheet.reset_dimensions()
241 ```
242
243 ```python
244 scot_elems = [[value for value in row] for row in sheet.values]
245 scot_cols = scot_elems[3]
246 scot_dicts = [{k: v for k, v in zip(scot_cols, row)} for row in scot_elems[4:]]
247 scot_data = pd.DataFrame(scot_dicts)
248 rd = scot_data[scot_data.date >= '2021'].rename(
249 columns={'week_number': 'week', 'date': 'date_up_to', 'total_deaths': 'deaths'})[['week', 'date_up_to', 'deaths']]
250 rd['year'] = 2021
251 rd['nation'] = 'Scotland'
252 rd
253 ```
254
255 ```python
256 query_string = '''
257 delete from all_causes_deaths
258 where nation = 'Scotland'
259 and year = 2021;
260 '''
261 with engine.connect() as connection:
262 connection.execute(query_string)
263 ```
264
265 ```python
266 rd.to_sql(
267 'all_causes_deaths',
268 engine,
269 if_exists='append',
270 index=False)
271 ```
272
273 ```python
274 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
275 ```
276
277 ```python
278
279 ```
280
281 # Create graphs
282
283 ```python
284 qstr = '''select week, year, deaths
285 from all_causes_deaths
286 where nation = 'England' '''
287 deaths_e = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
288 deaths_e.head()
289 ```
290
291 ```python
292 qstr = '''select week, year, deaths
293 from all_causes_deaths
294 where nation = 'Wales' '''
295 deaths_w = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
296 deaths_w.head()
297 ```
298
299 ```python
300 qstr = '''select week, year, deaths
301 from all_causes_deaths
302 where nation = 'Scotland' '''
303 deaths_s = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
304 deaths_s.head()
305 ```
306
307 ```python
308 qstr = '''select week, year, deaths
309 from all_causes_deaths
310 where nation = 'Northern Ireland' '''
311 deaths_i = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
312 deaths_i.head()
313 ```
314
315 ```python
316 qstr = '''select week, avg(deaths) as prev_mean
317 from all_causes_deaths
318 where year <= 2019 and nation='England'
319 group by week
320 order by week'''
321 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
322 deaths_prev.head()
323 deaths_e = deaths_e.merge(deaths_prev, on='week')
324 deaths_e.head()
325 ```
326
327 ```python
328 qstr = '''select week, avg(deaths) as prev_mean
329 from all_causes_deaths
330 where year <= 2019 and nation='Wales'
331 group by week
332 order by week'''
333 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
334 deaths_prev.head()
335 deaths_w = deaths_w.merge(deaths_prev, on='week')
336 deaths_w.head()
337 ```
338
339 ```python
340 qstr = '''select week, avg(deaths) as prev_mean
341 from all_causes_deaths
342 where year <= 2019 and nation='Scotland'
343 group by week
344 order by week'''
345 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
346 deaths_prev.head()
347 deaths_s = deaths_s.merge(deaths_prev, on='week')
348 deaths_s.head()
349 ```
350
351 ```python
352 qstr = '''select week, avg(deaths) as prev_mean
353 from all_causes_deaths
354 where year <= 2019 and nation='Northern Ireland'
355 group by week
356 order by week'''
357 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
358 deaths_prev.head()
359 deaths_i = deaths_i.merge(deaths_prev, on='week')
360 deaths_i.head()
361 ```
362
363 ```python Collapsed="false"
364 deaths = deaths_e + deaths_w + deaths_i + deaths_s
365 deaths.head()
366 ```
367
368 ```python Collapsed="false"
369 deaths[[2021, 2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
370 ```
371
372 ```python Collapsed="false"
373 deaths[[2021, 2020, 'prev_mean']].plot(figsize=(10, 8))
374 ```
375
376 ```python Collapsed="false"
377 deaths_i.plot()
378 ```
379
380 ```python
381 deaths[2020].sum() - deaths.prev_mean.sum()
382 ```
383
384 ```python
385 def _closeline(line):
386 x, y = line.get_data()
387 x = np.concatenate((x, [x[0]]))
388 y = np.concatenate((y, [y[0]]))
389 line.set_data(x, y)
390
391 def _closeline_connect(lines):
392 for line0, line1 in zip(lines, lines[1:]):
393 x0, y0 = line0.get_data()
394 x1, y1 = line1.get_data()
395
396 x0 = np.concatenate((x0, [x1[0]]))
397 y0 = np.concatenate((y0, [y1[0]]))
398 line0.set_data(x0, y0)
399 ```
400
401 ```python Collapsed="false"
402 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
403
404 def create_and_save_radar_plot(dataset, title_string, filename_suffix):
405
406 fig = plt.figure(figsize=(10, 10))
407 ax = fig.add_subplot(111, projection="polar")
408
409 theta = np.roll(
410 np.flip(
411 np.arange(len(dataset))/float(len(dataset))*2.*np.pi),
412 14)
413 l15, = ax.plot(theta, dataset[2015], color="#e47d7d", label="2015") # 0
414 l16, = ax.plot(theta, dataset[2016], color="#afc169", label="2016") # 72 , d0e47d
415 l17, = ax.plot(theta, dataset[2017], color="#7de4a6", label="2017") # 144
416 l18, = ax.plot(theta, dataset[2018], color="#7da6e4", label="2018") # 216
417 l19, = ax.plot(theta, dataset[2019], color="#d07de4", label="2019") # 288
418
419 lmean, = ax.plot(theta, dataset['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
420
421 l20, = ax.plot(theta, dataset[2020], color="#bb0000", label="2020")
422 l21, = ax.plot(theta, dataset[2021], color="#ff0000", label="2021")
423
424 # deaths_headlines.total_2019.plot(ax=ax)
425
426 _closeline(lmean)
427 _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
428
429 ax.set_xticks(theta)
430 ax.set_xticklabels(dataset.index)
431 plt.legend()
432 plt.title(f"Deaths by week over years, {title_string}")
433 plt.savefig(f'deaths_radar_2021{filename_suffix}.png')
434 plt.show()
435 ```
436
437 ```python
438 create_and_save_radar_plot(deaths, 'all UK', '')
439 ```
440
441 ```python Collapsed="false"
442 # # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
443
444 # dhna = deaths# .dropna()
445
446 # fig = plt.figure(figsize=(10, 10))
447 # ax = fig.add_subplot(111, projection="polar")
448
449 # theta = np.roll(
450 # np.flip(
451 # np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
452 # 14)
453 # # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
454 # # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
455 # # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
456 # # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
457 # # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
458 # l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
459 # l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
460 # l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
461 # l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
462 # l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
463
464 # lmean, = ax.plot(theta, dhna['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
465
466 # l20, = ax.plot(theta, dhna[2020], color="#bb0000", label="2020")
467 # l21, = ax.plot(theta, dhna[2021], color="#ff0000", label="2021")
468
469 # # deaths_headlines.total_2019.plot(ax=ax)
470
471 # _closeline(lmean)
472 # _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
473
474 # ax.set_xticks(theta)
475 # ax.set_xticklabels(dhna.index)
476 # plt.legend()
477 # plt.title("Deaths by week over years, all UK")
478 # plt.savefig('deaths-radar-2021.png')
479 # plt.show()
480 ```
481
482 <!-- #region Collapsed="false" -->
483 # Plots for UK nations
484 <!-- #endregion -->
485
486 ```python
487 create_and_save_radar_plot(deaths_e, 'England', '_england')
488 ```
489
490 ```python
491 create_and_save_radar_plot(deaths_w, 'Wales', '_wales')
492 ```
493
494 ```python
495 create_and_save_radar_plot(deaths_s, 'Scotland', '_scotland')
496 ```
497
498 ```python
499 create_and_save_radar_plot(deaths_i, 'Northern Ireland', '_northern_ireland')
500 ```
501
502 ```python Collapsed="false"
503
504 ```