9 jupytext_version: 1.10.2
11 display_name: Python 3
16 <!-- #region Collapsed="false" -->
19 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
20 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
21 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
25 ```python Collapsed="false"
31 from scipy.stats import gmean
34 import matplotlib as mpl
35 import matplotlib.pyplot as plt
38 from sqlalchemy.types import Integer, Text, String, DateTime, Float
39 from sqlalchemy import create_engine
43 ```python Collapsed="false"
44 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
47 ```python Collapsed="false"
48 %sql $connection_string
51 ```python Collapsed="false"
52 engine = create_engine(connection_string)
55 ```python Collapsed="false"
56 england_wales_filename = 'uk-deaths-data/publishedweek052021.xlsx'
59 ```python Collapsed="false"
60 scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (15 February 2021).xlsx'
64 n_ireland_filename = 'uk-deaths-data/Weekly_Deaths_0.xlsx'
67 ```python Collapsed="false"
68 # eng_xls = pd.read_excel(england_wales_filename,
69 # sheet_name="Weekly figures 2021",
70 # skiprows=[0, 1, 2, 3],
78 ```python Collapsed="false"
79 eng_xls = pd.read_excel(england_wales_filename,
80 sheet_name="Weekly figures 2021",
81 skiprows=[0, 1, 2, 3],
89 ```python Collapsed="false"
90 eng_xls_columns = list(eng_xls.columns)
92 for i, c in enumerate(eng_xls_columns):
93 # print(i, c, type(c), isinstance(c, float))
94 if isinstance(c, float) and np.isnan(c):
95 if eng_xls.iloc[0].iloc[i] is not pd.NaT:
96 eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
98 # np.isnan(eng_xls_columns[0])
101 eng_xls.columns = eng_xls_columns
110 rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
111 columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
115 rd['nation'] = 'Wales'
116 rd.dropna(inplace=True)
122 delete from all_causes_deaths
123 where nation = 'Wales'
126 with engine.connect() as connection:
127 connection.execute(query_string)
139 %sql select * from all_causes_deaths where year = 2021 limit 10
143 rd = eng_xls.iloc[1:][['Week ended', 'Total deaths, all ages (2021)', 'Wales']].reset_index(level=0).rename(
144 columns={'Week ended': 'date_up_to',
145 'Total deaths, all ages (2021)': 'ew_deaths',
150 rd['nation'] = 'England'
151 rd['deaths'] = rd['ew_deaths'] - rd['w_deaths']
152 rd.drop(labels=['ew_deaths', 'w_deaths'], axis='columns', inplace=True)
153 rd.dropna(inplace=True)
159 delete from all_causes_deaths
160 where nation = 'England'
163 with engine.connect() as connection:
164 connection.execute(query_string)
176 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
180 ni_xls = pd.read_excel(n_ireland_filename,
181 sheet_name='Table 1',
182 skiprows=[0, 1, 2, 3],
185 columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2021P)': 'deaths',
186 'Registration Week': 'week'})
187 rd = ni_xls[ni_xls['deaths'].notna()][['week', 'date_up_to', 'deaths']]
189 rd['nation'] = 'Northern Ireland'
195 delete from all_causes_deaths
196 where nation = 'Northern Ireland'
199 with engine.connect() as connection:
200 connection.execute(query_string)
212 %sql select nation from all_causes_deaths group by nation
216 # sco_xls = pd.read_excel(scotland_filename,
217 # sheet_name="2.2_excess",
218 # skiprows=[0, 1, 2, 3],
230 wb = openpyxl.load_workbook(scotland_filename, read_only=True)#, data_only=True, keep_links=False)
231 sheet = wb.worksheets[7]
236 sheet.reset_dimensions()
240 scot_elems = [[value for value in row] for row in sheet.values]
241 scot_cols = scot_elems[3]
242 scot_dicts = [{k: v for k, v in zip(scot_cols, row)} for row in scot_elems[4:]]
243 scot_data = pd.DataFrame(scot_dicts)
244 rd = scot_data[scot_data.date >= '2021'].rename(
245 columns={'week_number': 'week', 'date': 'date_up_to', 'total_deaths': 'deaths'})[['week', 'date_up_to', 'deaths']]
247 rd['nation'] = 'Scotland'
253 delete from all_causes_deaths
254 where nation = 'Scotland'
257 with engine.connect() as connection:
258 connection.execute(query_string)
270 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
280 qstr = '''select week, year, deaths
281 from all_causes_deaths
282 where nation = 'England' '''
283 deaths_e = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
288 qstr = '''select week, year, deaths
289 from all_causes_deaths
290 where nation = 'Wales' '''
291 deaths_w = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
296 qstr = '''select week, year, deaths
297 from all_causes_deaths
298 where nation = 'Scotland' '''
299 deaths_s = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
304 qstr = '''select week, year, deaths
305 from all_causes_deaths
306 where nation = 'Northern Ireland' '''
307 deaths_i = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
312 qstr = '''select week, avg(deaths) as prev_mean
313 from all_causes_deaths
314 where year <= 2019 and nation='England'
317 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
319 deaths_e = deaths_e.merge(deaths_prev, on='week')
324 qstr = '''select week, avg(deaths) as prev_mean
325 from all_causes_deaths
326 where year <= 2019 and nation='Wales'
329 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
331 deaths_w = deaths_w.merge(deaths_prev, on='week')
336 qstr = '''select week, avg(deaths) as prev_mean
337 from all_causes_deaths
338 where year <= 2019 and nation='Scotland'
341 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
343 deaths_s = deaths_s.merge(deaths_prev, on='week')
348 qstr = '''select week, avg(deaths) as prev_mean
349 from all_causes_deaths
350 where year <= 2019 and nation='Northern Ireland'
353 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
355 deaths_i = deaths_i.merge(deaths_prev, on='week')
359 ```python Collapsed="false"
360 deaths = deaths_e + deaths_w + deaths_i + deaths_s
364 ```python Collapsed="false"
365 deaths[[2021, 2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
368 ```python Collapsed="false"
369 deaths[[2021, 2020, 'prev_mean']].plot(figsize=(10, 8))
372 ```python Collapsed="false"
377 deaths[2020].sum() - deaths.prev_mean.sum()
381 def _closeline(line):
382 x, y = line.get_data()
383 x = np.concatenate((x, [x[0]]))
384 y = np.concatenate((y, [y[0]]))
387 def _closeline_connect(lines):
388 for line0, line1 in zip(lines, lines[1:]):
389 x0, y0 = line0.get_data()
390 x1, y1 = line1.get_data()
392 x0 = np.concatenate((x0, [x1[0]]))
393 y0 = np.concatenate((y0, [y1[0]]))
394 line0.set_data(x0, y0)
397 ```python Collapsed="false"
398 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
400 def create_and_save_radar_plot(dataset, title_string, filename_suffix):
402 fig = plt.figure(figsize=(10, 10))
403 ax = fig.add_subplot(111, projection="polar")
407 np.arange(len(dataset))/float(len(dataset))*2.*np.pi),
409 l15, = ax.plot(theta, dataset[2015], color="#e47d7d", label="2015") # 0
410 l16, = ax.plot(theta, dataset[2016], color="#afc169", label="2016") # 72 , d0e47d
411 l17, = ax.plot(theta, dataset[2017], color="#7de4a6", label="2017") # 144
412 l18, = ax.plot(theta, dataset[2018], color="#7da6e4", label="2018") # 216
413 l19, = ax.plot(theta, dataset[2019], color="#d07de4", label="2019") # 288
415 lmean, = ax.plot(theta, dataset['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
417 l20, = ax.plot(theta, dataset[2020], color="#bb0000", label="2020")
418 l21, = ax.plot(theta, dataset[2021], color="#ff0000", label="2021")
420 # deaths_headlines.total_2019.plot(ax=ax)
423 _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
426 ax.set_xticklabels(dataset.index)
428 plt.title(f"Deaths by week over years, {title_string}")
429 plt.savefig(f'deaths_radar_2021{filename_suffix}.png')
434 create_and_save_radar_plot(deaths, 'all UK', '')
437 ```python Collapsed="false"
438 # # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
440 # dhna = deaths# .dropna()
442 # fig = plt.figure(figsize=(10, 10))
443 # ax = fig.add_subplot(111, projection="polar")
447 # np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
449 # # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
450 # # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
451 # # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
452 # # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
453 # # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
454 # l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
455 # l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
456 # l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
457 # l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
458 # l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
460 # lmean, = ax.plot(theta, dhna['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
462 # l20, = ax.plot(theta, dhna[2020], color="#bb0000", label="2020")
463 # l21, = ax.plot(theta, dhna[2021], color="#ff0000", label="2021")
465 # # deaths_headlines.total_2019.plot(ax=ax)
468 # _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
470 # ax.set_xticks(theta)
471 # ax.set_xticklabels(dhna.index)
473 # plt.title("Deaths by week over years, all UK")
474 # plt.savefig('deaths-radar-2021.png')
478 <!-- #region Collapsed="false" -->
479 # Plots for UK nations
483 create_and_save_radar_plot(deaths_e, 'England', '_england')
487 create_and_save_radar_plot(deaths_w, 'Wales', '_wales')
491 create_and_save_radar_plot(deaths_s, 'Scotland', '_scotland')
495 create_and_save_radar_plot(deaths_i, 'Northern Ireland', '_northern_ireland')
498 ```python Collapsed="false"