uk_deaths_import.md

   1 ---
   2 jupyter:
   3   jupytext:
   4     formats: ipynb,md
   5     text_representation:
   6       extension: .md
   7       format_name: markdown
   8       format_version: '1.2'
   9       jupytext_version: 1.9.1
  10   kernelspec:
  11     display_name: Python 3
  12     language: python
  13     name: python3
  14 ---
  15
  16 <!-- #region Collapsed="false" -->
  17 Data from:
  18
  19 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
  20 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
  21 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
  22
  23 <!-- #endregion -->
  24
  25 ```python Collapsed="false"
  26 import itertools
  27 import collections
  28 import json
  29 import pandas as pd
  30 import numpy as np
  31 from scipy.stats import gmean
  32 import datetime
  33
  34 import matplotlib as mpl
  35 import matplotlib.pyplot as plt
  36 %matplotlib inline
  37
  38 from sqlalchemy.types import Integer, Text, String, DateTime, Float
  39 from sqlalchemy import create_engine
  40 %load_ext sql
  41 ```
  42
  43 ```python Collapsed="false"
  44 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
  45 ```
  46
  47 ```python Collapsed="false"
  48 %sql $connection_string
  49 ```
  50
  51 ```python Collapsed="false"
  52 engine = create_engine(connection_string)
  53 ```
  54
  55 ```python Collapsed="false"
  56 england_wales_filename = 'uk-deaths-data/copyofpublishedweek042021.xlsx'
  57 ```
  58
  59 ```python Collapsed="false"
  60 scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (10 February 2021).xlsx'
  61 ```
  62
  63 ```python
  64 n_ireland_filename = 'uk-deaths-data/Weekly_Deaths_0.xlsx'
  65 ```
  66
  67 ```python Collapsed="false"
  68 eng_xls = pd.read_excel(england_wales_filename,
  69                         sheet_name="Weekly figures 2021",
  70                         skiprows=[0, 1, 2, 3],
  71                         skipfooter=11,
  72                         header=0,
  73                         index_col=[1]
  74                        ).iloc[:91].T
  75 eng_xls
  76 ```
  77
  78 ```python Collapsed="false"
  79 eng_xls_columns = list(eng_xls.columns)
  80
  81 for i, c in enumerate(eng_xls_columns):
  82 #     print(i, c, type(c), isinstance(c, float))
  83     if isinstance(c, float) and np.isnan(c):
  84         if eng_xls.iloc[0].iloc[i] is not pd.NaT:
  85             eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
  86
  87 # np.isnan(eng_xls_columns[0])
  88 # eng_xls_columns
  89
  90 eng_xls.columns = eng_xls_columns
  91 # eng_xls.columns
  92 ```
  93
  94 ```python
  95 eng_xls
  96 ```
  97
  98 ```python
  99 rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
 100     columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
 101             'index': 'week'}
 102     )
 103 rd['year'] = 2021
 104 rd['nation'] = 'Wales'
 105 rd.dropna(inplace=True)
 106 rd.head()
 107 ```
 108
 109 ```python
 110 query_string = '''
 111 delete from all_causes_deaths
 112 where nation = 'Wales'
 113   and year = 2021;
 114 '''
 115 with engine.connect() as connection:
 116     connection.execute(query_string)
 117 ```
 118
 119 ```python
 120 rd.to_sql(
 121     'all_causes_deaths',
 122     engine,
 123     if_exists='append',
 124     index=False)
 125 ```
 126
 127 ```python
 128 %sql select * from all_causes_deaths where year = 2021 limit 10
 129 ```
 130
 131 ```python
 132 rd = eng_xls.iloc[1:][['Week ended', 'Total deaths, all ages (2021)', 'Wales']].reset_index(level=0).rename(
 133     columns={'Week ended': 'date_up_to',
 134              'Total deaths, all ages (2021)': 'ew_deaths',
 135              'Wales': 'w_deaths',
 136             'index': 'week'}
 137     )
 138 rd['year'] = 2021
 139 rd['nation'] = 'England'
 140 rd['deaths'] = rd['ew_deaths'] - rd['w_deaths']
 141 rd.drop(labels=['ew_deaths', 'w_deaths'], axis='columns', inplace=True)
 142 rd.dropna(inplace=True)
 143 rd.tail()
 144 ```
 145
 146 ```python
 147 query_string = '''
 148 delete from all_causes_deaths
 149 where nation = 'England'
 150   and year = 2021;
 151 '''
 152 with engine.connect() as connection:
 153     connection.execute(query_string)
 154 ```
 155
 156 ```python
 157 rd.to_sql(
 158     'all_causes_deaths',
 159     engine,
 160     if_exists='append',
 161     index=False)
 162 ```
 163
 164 ```python
 165 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
 166 ```
 167
 168 ```python
 169 ni_xls = pd.read_excel(n_ireland_filename,
 170                         sheet_name='Table 1',
 171                         skiprows=[0, 1, 2, 3],
 172                         header=0,
 173                       ).rename(
 174     columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2021P)': 'deaths',
 175             'Registration Week': 'week'})
 176 rd = ni_xls[ni_xls['deaths'].notna()][['week', 'date_up_to', 'deaths']]
 177 rd['year'] = 2021
 178 rd['nation'] = 'Northern Ireland'
 179 rd
 180 ```
 181
 182 ```python
 183 query_string = '''
 184 delete from all_causes_deaths
 185 where nation = 'Northern Ireland'
 186   and year = 2021;
 187 '''
 188 with engine.connect() as connection:
 189     connection.execute(query_string)
 190 ```
 191
 192 ```python
 193 rd.to_sql(
 194     'all_causes_deaths',
 195     engine,
 196     if_exists='append',
 197     index=False)
 198 ```
 199
 200 ```python
 201 %sql select nation from all_causes_deaths group by nation
 202 ```
 203
 204 ```python
 205 # sco_xls = pd.read_excel(scotland_filename,
 206 #                         sheet_name="2.2_excess",
 207 #                         skiprows=[0, 1, 2, 3],
 208 #                         skipfooter=3,
 209 #                         header=0,
 210 #                         index_col=[1]
 211 #                        ).iloc[:91].T
 212 ```
 213
 214 ```python
 215 import openpyxl
 216 ```
 217
 218 ```python
 219 wb = openpyxl.load_workbook(scotland_filename, read_only=True)#, data_only=True, keep_links=False)
 220 sheet = wb.worksheets[7]
 221 sheet
 222 ```
 223
 224 ```python
 225 sheet.reset_dimensions()
 226 ```
 227
 228 ```python
 229 scot_elems = [[value for value in row] for row in sheet.values]
 230 scot_cols = scot_elems[3]
 231 scot_dicts = [{k: v for k, v in zip(scot_cols, row)} for row in scot_elems[4:]]
 232 scot_data = pd.DataFrame(scot_dicts)
 233 rd = scot_data[scot_data.date >= '2021'].rename(
 234     columns={'week_number': 'week', 'date': 'date_up_to', 'total_deaths': 'deaths'})[['week', 'date_up_to', 'deaths']]
 235 rd['year'] = 2021
 236 rd['nation'] = 'Scotland'
 237 rd
 238 ```
 239
 240 ```python
 241 query_string = '''
 242 delete from all_causes_deaths
 243 where nation = 'Scotland'
 244   and year = 2021;
 245 '''
 246 with engine.connect() as connection:
 247     connection.execute(query_string)
 248 ```
 249
 250 ```python
 251 rd.to_sql(
 252     'all_causes_deaths',
 253     engine,
 254     if_exists='append',
 255     index=False)
 256 ```
 257
 258 ```python
 259 %sql select nation, sum(deaths), count(*) from all_causes_deaths where year = 2021 group by nation
 260 ```
 261
 262 ```python
 263
 264 ```
 265
 266 # Create graphs
 267
 268 ```python
 269 qstr = '''select week, year, deaths
 270 from all_causes_deaths
 271 where nation = 'England' '''
 272 deaths_e = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
 273 deaths_e.head()
 274 ```
 275
 276 ```python
 277 qstr = '''select week, year, deaths
 278 from all_causes_deaths
 279 where nation = 'Wales' '''
 280 deaths_w = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
 281 deaths_w.head()
 282 ```
 283
 284 ```python
 285 qstr = '''select week, year, deaths
 286 from all_causes_deaths
 287 where nation = 'Scotland' '''
 288 deaths_s = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
 289 deaths_s.head()
 290 ```
 291
 292 ```python
 293 qstr = '''select week, year, deaths
 294 from all_causes_deaths
 295 where nation = 'Northern Ireland' '''
 296 deaths_i = pd.read_sql_query(qstr, engine).pivot(index='week', columns='year', values='deaths')
 297 deaths_i.head()
 298 ```
 299
 300 ```python
 301 qstr = '''select week, avg(deaths) as prev_mean
 302 from all_causes_deaths
 303 where year <= 2019 and nation='England'
 304 group by week
 305 order by week'''
 306 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
 307 deaths_prev.head()
 308 deaths_e = deaths_e.merge(deaths_prev, on='week')
 309 deaths_e.head()
 310 ```
 311
 312 ```python
 313 qstr = '''select week, avg(deaths) as prev_mean
 314 from all_causes_deaths
 315 where year <= 2019 and nation='Wales'
 316 group by week
 317 order by week'''
 318 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
 319 deaths_prev.head()
 320 deaths_w = deaths_w.merge(deaths_prev, on='week')
 321 deaths_w.head()
 322 ```
 323
 324 ```python
 325 qstr = '''select week, avg(deaths) as prev_mean
 326 from all_causes_deaths
 327 where year <= 2019 and nation='Scotland'
 328 group by week
 329 order by week'''
 330 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
 331 deaths_prev.head()
 332 deaths_s = deaths_s.merge(deaths_prev, on='week')
 333 deaths_s.head()
 334 ```
 335
 336 ```python
 337 qstr = '''select week, avg(deaths) as prev_mean
 338 from all_causes_deaths
 339 where year <= 2019 and nation='Northern Ireland'
 340 group by week
 341 order by week'''
 342 deaths_prev = pd.read_sql_query(qstr, engine, index_col='week')
 343 deaths_prev.head()
 344 deaths_i = deaths_i.merge(deaths_prev, on='week')
 345 deaths_i.head()
 346 ```
 347
 348 ```python Collapsed="false"
 349 deaths = deaths_e + deaths_w + deaths_i + deaths_s
 350 deaths.head()
 351 ```
 352
 353 ```python Collapsed="false"
 354 deaths[[2021, 2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
 355 ```
 356
 357 ```python Collapsed="false"
 358 deaths[[2021, 2020, 'prev_mean']].plot(figsize=(10, 8))
 359 ```
 360
 361 ```python Collapsed="false"
 362 deaths_i.plot()
 363 ```
 364
 365 ```python
 366 deaths[2020].sum() - deaths.prev_mean.sum()
 367 ```
 368
 369 ```python
 370 def _closeline(line):
 371     x, y = line.get_data()
 372     x = np.concatenate((x, [x[0]]))
 373     y = np.concatenate((y, [y[0]]))
 374     line.set_data(x, y)
 375
 376 def _closeline_connect(lines):
 377     for line0, line1 in zip(lines, lines[1:]):
 378         x0, y0 = line0.get_data()
 379         x1, y1 = line1.get_data()
 380
 381         x0 = np.concatenate((x0, [x1[0]]))
 382         y0 = np.concatenate((y0, [y1[0]]))
 383         line0.set_data(x0, y0)
 384 ```
 385
 386 ```python Collapsed="false"
 387 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 388
 389 def create_and_save_radar_plot(dataset, title_string, filename_suffix):
 390
 391     fig = plt.figure(figsize=(10, 10))
 392     ax = fig.add_subplot(111, projection="polar")
 393
 394     theta = np.roll(
 395         np.flip(
 396             np.arange(len(dataset))/float(len(dataset))*2.*np.pi),
 397         14)
 398     l15, = ax.plot(theta, dataset[2015], color="#e47d7d", label="2015") # 0
 399     l16, = ax.plot(theta, dataset[2016], color="#afc169", label="2016") # 72 , d0e47d
 400     l17, = ax.plot(theta, dataset[2017], color="#7de4a6", label="2017") # 144
 401     l18, = ax.plot(theta, dataset[2018], color="#7da6e4", label="2018") # 216
 402     l19, = ax.plot(theta, dataset[2019], color="#d07de4", label="2019") # 288
 403
 404     lmean, = ax.plot(theta, dataset['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
 405
 406     l20, = ax.plot(theta, dataset[2020], color="#bb0000", label="2020")
 407     l21, = ax.plot(theta, dataset[2021], color="#ff0000", label="2021")
 408
 409     # deaths_headlines.total_2019.plot(ax=ax)
 410
 411     _closeline(lmean)
 412     _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
 413
 414     ax.set_xticks(theta)
 415     ax.set_xticklabels(dataset.index)
 416     plt.legend()
 417     plt.title(f"Deaths by week over years, {title_string}")
 418     plt.savefig(f'deaths-radar-2021{filename_suffix}.png')
 419     plt.show()
 420 ```
 421
 422 ```python
 423 create_and_save_radar_plot(deaths, 'all UK', '')
 424 ```
 425
 426 ```python Collapsed="false"
 427 # # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 428
 429 # dhna = deaths# .dropna()
 430
 431 # fig = plt.figure(figsize=(10, 10))
 432 # ax = fig.add_subplot(111, projection="polar")
 433
 434 # theta = np.roll(
 435 #     np.flip(
 436 #         np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
 437 #     14)
 438 # # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
 439 # # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
 440 # # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
 441 # # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
 442 # # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
 443 # l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
 444 # l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
 445 # l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
 446 # l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
 447 # l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
 448
 449 # lmean, = ax.plot(theta, dhna['prev_mean'], color="black", linestyle='dashed', label="mean, 15–19")
 450
 451 # l20, = ax.plot(theta, dhna[2020], color="#bb0000", label="2020")
 452 # l21, = ax.plot(theta, dhna[2021], color="#ff0000", label="2021")
 453
 454 # # deaths_headlines.total_2019.plot(ax=ax)
 455
 456 # _closeline(lmean)
 457 # _closeline_connect([l15, l16, l17, l18, l19, l20, l21])
 458
 459 # ax.set_xticks(theta)
 460 # ax.set_xticklabels(dhna.index)
 461 # plt.legend()
 462 # plt.title("Deaths by week over years, all UK")
 463 # plt.savefig('deaths-radar-2021.png')
 464 # plt.show()
 465 ```
 466
 467 <!-- #region Collapsed="false" -->
 468 # Plots for UK nations
 469 <!-- #endregion -->
 470
 471 ```python
 472 create_and_save_radar_plot(deaths_e, 'England', '-england')
 473 ```
 474
 475 ```python
 476 create_and_save_radar_plot(deaths_w, 'Wales', '-wales')
 477 ```
 478
 479 ```python
 480 create_and_save_radar_plot(deaths_s, 'Scotland', '-scotland')
 481 ```
 482
 483 ```python
 484 create_and_save_radar_plot(deaths_i, 'Northern Ireland', '-northern-ireland')
 485 ```
 486
 487 ```python Collapsed="false"
 488
 489 ```