9 jupytext_version: 1.9.1
11 display_name: Python 3
16 <!-- #region Collapsed="false" -->
19 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
20 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
21 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
25 ```python Collapsed="false"
31 from scipy.stats import gmean
34 import matplotlib as mpl
35 import matplotlib.pyplot as plt
38 from sqlalchemy.types import Integer, Text, String, DateTime, Float
39 from sqlalchemy import create_engine
43 ```python Collapsed="false"
44 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
47 ```python Collapsed="false"
48 %sql $connection_string
51 ```python Collapsed="false"
52 engine = create_engine(connection_string)
55 ```python Collapsed="false"
56 england_wales_filename = 'uk-deaths-data/copyofpublishedweek042021.xlsx'
59 ```python Collapsed="false"
60 scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (10 February 2021).xlsx'
64 n_ireland_filename = 'uk-deaths-data/Weekly_Deaths_0.xlsx'
67 ```python Collapsed="false"
68 eng_xls = pd.read_excel(england_wales_filename,
69 sheet_name="Weekly figures 2021",
70 skiprows=[0, 1, 2, 3],
78 ```python Collapsed="false"
79 eng_xls_columns = list(eng_xls.columns)
81 for i, c in enumerate(eng_xls_columns):
82 # print(i, c, type(c), isinstance(c, float))
83 if isinstance(c, float) and np.isnan(c):
84 if eng_xls.iloc[0].iloc[i] is not pd.NaT:
85 eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
87 # np.isnan(eng_xls_columns[0])
90 eng_xls.columns = eng_xls_columns
99 rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
100 columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
104 rd['nation'] = 'Wales'
105 rd.dropna(inplace=True)
111 delete from all_causes_deaths
112 where nation = 'Wales'
115 with engine.connect() as connection:
116 connection.execute(query_string)
128 %sql select * from all_causes_deaths where year = 2021 limit 10
132 rd = eng_xls.iloc[1:][['Week ended', 'Total deaths, all ages (2021)', 'Wales']].reset_index(level=0).rename(
133 columns={'Week ended': 'date_up_to',
134 'Total deaths, all ages (2021)': 'ew_deaths',
139 rd['nation'] = 'England'
140 rd['deaths'] = rd['ew_deaths'] - rd['w_deaths']
141 rd.drop(labels=['ew_deaths', 'w_deaths'], axis='columns', inplace=True)
142 rd.dropna(inplace=True)
148 delete from all_causes_deaths
149 where nation = 'England'
152 with engine.connect() as connection:
153 connection.execute(query_string)
165 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
169 ni_xls = pd.read_excel(n_ireland_filename,
170 sheet_name='Table 1',
171 skiprows=[0, 1, 2, 3],
174 columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2021P)': 'deaths',
175 'Registration Week': 'week'})
176 rd = ni_xls[ni_xls['deaths'].notna()][['week', 'date_up_to', 'deaths']]
178 rd['nation'] = 'Northern Ireland'
184 delete from all_causes_deaths
185 where nation = 'Northern Ireland'
188 with engine.connect() as connection:
189 connection.execute(query_string)
201 %sql select nation from all_causes_deaths group by nation
205 # sco_xls = pd.read_excel(scotland_filename,
206 # sheet_name="2.2_excess",
207 # skiprows=[0, 1, 2, 3],
219 wb = openpyxl.load_workbook(scotland_filename, read_only=True)#, data_only=True, keep_links=False)
220 sheet = wb.worksheets[7]
225 sheet.reset_dimensions()
229 scot_elems = [[value for value in row] for row in sheet.values]
230 scot_cols = scot_elems[3]
231 scot_dicts = [{k: v for k, v in zip(scot_cols, row)} for row in scot_elems[4:]]
232 scot_data = pd.DataFrame(scot_dicts)
233 rd = scot_data[scot_data.date >= '2021'].rename(
234 columns={'week_number': 'week', 'date': 'date_up_to', 'total_deaths': 'deaths'})[['week', 'date_up_to', 'deaths']]
236 rd['nation'] = 'Scotland'
242 delete from all_causes_deaths
243 where nation = 'Scotland'
246 with engine.connect() as connection:
247 connection.execute(query_string)
259 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
269 qstr = '''select week, year, deaths
270 from all_causes_deaths
271 where nation = 'England' '''
272 deaths_e = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
277 qstr = '''select week, year, deaths
278 from all_causes_deaths
279 where nation = 'Wales' '''
280 deaths_w = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
285 qstr = '''select week, year, deaths
286 from all_causes_deaths
287 where nation = 'Scotland' '''
288 deaths_s = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
293 qstr = '''select week, year, deaths
294 from all_causes_deaths
295 where nation = 'Northern Ireland' '''
296 deaths_i = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
301 qstr = '''select week, avg(deaths) as prev_mean
302 from all_causes_deaths
303 where year <= 2019 and nation='England'
306 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
308 deaths_e = deaths_e.merge(deaths_prev, on='week')
313 qstr = '''select week, avg(deaths) as prev_mean
314 from all_causes_deaths
315 where year <= 2019 and nation='Wales'
318 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
320 deaths_w = deaths_w.merge(deaths_prev, on='week')
325 qstr = '''select week, avg(deaths) as prev_mean
326 from all_causes_deaths
327 where year <= 2019 and nation='Scotland'
330 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
332 deaths_s = deaths_s.merge(deaths_prev, on='week')
337 qstr = '''select week, avg(deaths) as prev_mean
338 from all_causes_deaths
339 where year <= 2019 and nation='Northern Ireland'
342 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
344 deaths_i = deaths_i.merge(deaths_prev, on='week')
348 ```python Collapsed="false"
349 deaths = deaths_e + deaths_w + deaths_i + deaths_s
353 ```python Collapsed="false"
354 deaths[[2021, 2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
357 ```python Collapsed="false"
358 deaths[[2021, 2020, 'prev_mean']].plot(figsize=(10, 8))
361 ```python Collapsed="false"
366 deaths[2020].sum() - deaths.prev_mean.sum()
370 def _closeline(line):
371 x, y = line.get_data()
372 x = np.concatenate((x, [x[0]]))
373 y = np.concatenate((y, [y[0]]))
376 def _closeline_connect(lines):
377 for line0, line1 in zip(lines, lines[1:]):
378 x0, y0 = line0.get_data()
379 x1, y1 = line1.get_data()
381 x0 = np.concatenate((x0, [x1[0]]))
382 y0 = np.concatenate((y0, [y1[0]]))
383 line0.set_data(x0, y0)
386 ```python Collapsed="false"
387 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
389 def create_and_save_radar_plot(dataset, title_string, filename_suffix):
391 fig = plt.figure(figsize=(10, 10))
392 ax = fig.add_subplot(111, projection="polar")
396 np.arange(len(dataset))/float(len(dataset))*2.*np.pi),
398 l15, = ax.plot(theta, dataset[2015], color="#e47d7d", label="2015") # 0
399 l16, = ax.plot(theta, dataset[2016], color="#afc169", label="2016") # 72 , d0e47d
400 l17, = ax.plot(theta, dataset[2017], color="#7de4a6", label="2017") # 144
401 l18, = ax.plot(theta, dataset[2018], color="#7da6e4", label="2018") # 216
402 l19, = ax.plot(theta, dataset[2019], color="#d07de4", label="2019") # 288
404 lmean, = ax.plot(theta, dataset['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
406 l20, = ax.plot(theta, dataset[2020], color="#bb0000", label="2020")
407 l21, = ax.plot(theta, dataset[2021], color="#ff0000", label="2021")
409 # deaths_headlines.total_2019.plot(ax=ax)
412 _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
415 ax.set_xticklabels(dataset.index)
417 plt.title(f"Deaths by week over years, {title_string}")
418 plt.savefig(f'deaths-radar-2021{filename_suffix}.png')
423 create_and_save_radar_plot(deaths, 'all UK', '')
426 ```python Collapsed="false"
427 # # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
429 # dhna = deaths# .dropna()
431 # fig = plt.figure(figsize=(10, 10))
432 # ax = fig.add_subplot(111, projection="polar")
436 # np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
438 # # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
439 # # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
440 # # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
441 # # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
442 # # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
443 # l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
444 # l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
445 # l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
446 # l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
447 # l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
449 # lmean, = ax.plot(theta, dhna['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
451 # l20, = ax.plot(theta, dhna[2020], color="#bb0000", label="2020")
452 # l21, = ax.plot(theta, dhna[2021], color="#ff0000", label="2021")
454 # # deaths_headlines.total_2019.plot(ax=ax)
457 # _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
459 # ax.set_xticks(theta)
460 # ax.set_xticklabels(dhna.index)
462 # plt.title("Deaths by week over years, all UK")
463 # plt.savefig('deaths-radar-2021.png')
467 <!-- #region Collapsed="false" -->
468 # Plots for UK nations
472 create_and_save_radar_plot(deaths_e, 'England', '-england')
476 create_and_save_radar_plot(deaths_w, 'Wales', '-wales')
480 create_and_save_radar_plot(deaths_s, 'Scotland', '-scotland')
484 create_and_save_radar_plot(deaths_i, 'Northern Ireland', '-northern-ireland')
487 ```python Collapsed="false"