9 jupytext_version: 1.11.1
11 display_name: Python 3
16 <!-- #region Collapsed="false" -->
19 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
20 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
21 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
25 ```python Collapsed="false"
31 from scipy.stats import gmean
34 import matplotlib as mpl
35 import matplotlib.pyplot as plt
38 from sqlalchemy.types import Integer, Text, String, DateTime, Float
39 from sqlalchemy import create_engine
43 ```python Collapsed="false"
44 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
47 ```python Collapsed="false"
48 %sql $connection_string
51 ```python Collapsed="false"
52 engine = create_engine(connection_string)
56 !ls uk-deaths-data/publishedweek*2021*xlsx
59 ```python Collapsed="false"
60 england_wales_filename = 'uk-deaths-data/publishedweek122021.xlsx'
63 ```python Collapsed="false"
64 scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (08 April 2021).xlsx'
68 n_ireland_filename = 'uk-deaths-data/Weekly_Deaths - w e 2nd April 2021.XLSX'
71 ```python Collapsed="false"
72 # eng_xls = pd.read_excel(england_wales_filename,
73 # sheet_name="Weekly figures 2021",
74 # skiprows=[0, 1, 2, 3],
82 ```python Collapsed="false"
83 eng_xls = pd.read_excel(england_wales_filename,
84 sheet_name="Weekly figures 2021",
85 skiprows=[0, 1, 2, 3],
93 ```python Collapsed="false"
94 eng_xls_columns = list(eng_xls.columns)
96 for i, c in enumerate(eng_xls_columns):
97 # print(i, c, type(c), isinstance(c, float))
98 if isinstance(c, float) and np.isnan(c):
99 if eng_xls.iloc[0].iloc[i] is not pd.NaT:
100 eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
102 # np.isnan(eng_xls_columns[0])
105 eng_xls.columns = eng_xls_columns
114 rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
115 columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
119 rd['nation'] = 'Wales'
120 rd.dropna(inplace=True)
126 delete from all_causes_deaths
127 where nation = 'Wales'
130 with engine.connect() as connection:
131 connection.execute(query_string)
143 %sql select * from all_causes_deaths where year = 2021 order by date_up_to desc limit 10
147 rd = eng_xls.iloc[1:][['Week ended', 'Total deaths, all ages (2021)', 'Wales']].reset_index(level=0).rename(
148 columns={'Week ended': 'date_up_to',
149 'Total deaths, all ages (2021)': 'ew_deaths',
154 rd['nation'] = 'England'
155 rd['deaths'] = rd['ew_deaths'] - rd['w_deaths']
156 rd.drop(labels=['ew_deaths', 'w_deaths'], axis='columns', inplace=True)
157 rd.dropna(inplace=True)
163 delete from all_causes_deaths
164 where nation = 'England'
167 with engine.connect() as connection:
168 connection.execute(query_string)
180 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
184 ni_xls = pd.read_excel(n_ireland_filename,
185 sheet_name='Table 1',
189 columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2021P)': 'deaths',
190 'Registration Week': 'week'})
191 rd = ni_xls[ni_xls['deaths'].notna()][['week', 'date_up_to', 'deaths']]
193 rd['nation'] = 'Northern Ireland'
199 delete from all_causes_deaths
200 where nation = 'Northern Ireland'
203 with engine.connect() as connection:
204 connection.execute(query_string)
216 %sql select nation, count(*) from all_causes_deaths group by nation
220 # sco_xls = pd.read_excel(scotland_filename,
221 # sheet_name="2.2_excess",
222 # skiprows=[0, 1, 2, 3],
234 wb = openpyxl.load_workbook(scotland_filename, read_only=True)#, data_only=True, keep_links=False)
235 sheet = wb.worksheets[7]
240 sheet.reset_dimensions()
244 scot_elems = [[value for value in row] for row in sheet.values]
245 scot_cols = scot_elems[3]
246 scot_dicts = [{k: v for k, v in zip(scot_cols, row)} for row in scot_elems[4:]]
247 scot_data = pd.DataFrame(scot_dicts)
248 rd = scot_data[scot_data.date >= '2021'].rename(
249 columns={'week_number': 'week', 'date': 'date_up_to', 'total_deaths': 'deaths'})[['week', 'date_up_to', 'deaths']]
251 rd['nation'] = 'Scotland'
257 delete from all_causes_deaths
258 where nation = 'Scotland'
261 with engine.connect() as connection:
262 connection.execute(query_string)
274 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
284 qstr = '''select week, year, deaths
285 from all_causes_deaths
286 where nation = 'England' '''
287 deaths_e = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
292 qstr = '''select week, year, deaths
293 from all_causes_deaths
294 where nation = 'Wales' '''
295 deaths_w = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
300 qstr = '''select week, year, deaths
301 from all_causes_deaths
302 where nation = 'Scotland' '''
303 deaths_s = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
308 qstr = '''select week, year, deaths
309 from all_causes_deaths
310 where nation = 'Northern Ireland' '''
311 deaths_i = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
316 qstr = '''select week, avg(deaths) as prev_mean
317 from all_causes_deaths
318 where year <= 2019 and nation='England'
321 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
323 deaths_e = deaths_e.merge(deaths_prev, on='week')
328 qstr = '''select week, avg(deaths) as prev_mean
329 from all_causes_deaths
330 where year <= 2019 and nation='Wales'
333 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
335 deaths_w = deaths_w.merge(deaths_prev, on='week')
340 qstr = '''select week, avg(deaths) as prev_mean
341 from all_causes_deaths
342 where year <= 2019 and nation='Scotland'
345 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
347 deaths_s = deaths_s.merge(deaths_prev, on='week')
352 qstr = '''select week, avg(deaths) as prev_mean
353 from all_causes_deaths
354 where year <= 2019 and nation='Northern Ireland'
357 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
359 deaths_i = deaths_i.merge(deaths_prev, on='week')
363 ```python Collapsed="false"
364 deaths = deaths_e + deaths_w + deaths_i + deaths_s
368 ```python Collapsed="false"
369 deaths[[2021, 2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
372 ```python Collapsed="false"
373 deaths[[2021, 2020, 'prev_mean']].plot(figsize=(10, 8))
376 ```python Collapsed="false"
381 deaths[2020].sum() - deaths.prev_mean.sum()
385 def _closeline(line):
386 x, y = line.get_data()
387 x = np.concatenate((x, [x[0]]))
388 y = np.concatenate((y, [y[0]]))
391 def _closeline_connect(lines):
392 for line0, line1 in zip(lines, lines[1:]):
393 x0, y0 = line0.get_data()
394 x1, y1 = line1.get_data()
396 x0 = np.concatenate((x0, [x1[0]]))
397 y0 = np.concatenate((y0, [y1[0]]))
398 line0.set_data(x0, y0)
401 ```python Collapsed="false"
402 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
404 def create_and_save_radar_plot(dataset, title_string, filename_suffix):
406 fig = plt.figure(figsize=(10, 10))
407 ax = fig.add_subplot(111, projection="polar")
411 np.arange(len(dataset))/float(len(dataset))*2.*np.pi),
413 l15, = ax.plot(theta, dataset[2015], color="#e47d7d", label="2015") # 0
414 l16, = ax.plot(theta, dataset[2016], color="#afc169", label="2016") # 72 , d0e47d
415 l17, = ax.plot(theta, dataset[2017], color="#7de4a6", label="2017") # 144
416 l18, = ax.plot(theta, dataset[2018], color="#7da6e4", label="2018") # 216
417 l19, = ax.plot(theta, dataset[2019], color="#d07de4", label="2019") # 288
419 lmean, = ax.plot(theta, dataset['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
421 l20, = ax.plot(theta, dataset[2020], color="#bb0000", label="2020")
422 l21, = ax.plot(theta, dataset[2021], color="#ff0000", label="2021")
424 # deaths_headlines.total_2019.plot(ax=ax)
427 _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
430 ax.set_xticklabels(dataset.index)
432 plt.title(f"Deaths by week over years, {title_string}")
433 plt.savefig(f'deaths_radar_2021{filename_suffix}.png')
438 create_and_save_radar_plot(deaths, 'all UK', '')
441 ```python Collapsed="false"
442 # # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
444 # dhna = deaths# .dropna()
446 # fig = plt.figure(figsize=(10, 10))
447 # ax = fig.add_subplot(111, projection="polar")
451 # np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
453 # # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
454 # # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
455 # # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
456 # # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
457 # # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
458 # l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
459 # l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
460 # l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
461 # l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
462 # l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
464 # lmean, = ax.plot(theta, dhna['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
466 # l20, = ax.plot(theta, dhna[2020], color="#bb0000", label="2020")
467 # l21, = ax.plot(theta, dhna[2021], color="#ff0000", label="2021")
469 # # deaths_headlines.total_2019.plot(ax=ax)
472 # _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
474 # ax.set_xticks(theta)
475 # ax.set_xticklabels(dhna.index)
477 # plt.title("Deaths by week over years, all UK")
478 # plt.savefig('deaths-radar-2021.png')
482 <!-- #region Collapsed="false" -->
483 # Plots for UK nations
487 create_and_save_radar_plot(deaths_e, 'England', '_england')
491 create_and_save_radar_plot(deaths_w, 'Wales', '_wales')
495 create_and_save_radar_plot(deaths_s, 'Scotland', '_scotland')
499 create_and_save_radar_plot(deaths_i, 'Northern Ireland', '_northern_ireland')
502 ```python Collapsed="false"