uk_deaths.md

   1 ---
   2 jupyter:
   3   jupytext:
   4     formats: ipynb,md
   5     text_representation:
   6       extension: .md
   7       format_name: markdown
   8       format_version: '1.2'
   9       jupytext_version: 1.3.4
  10   kernelspec:
  11     display_name: Python 3
  12     language: python
  13     name: python3
  14 ---
  15
  16 Data from:
  17
  18 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
  19 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
  20 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
  21
  22
  23 ```python
  24 import itertools
  25 import collections
  26 import pandas as pd
  27 import numpy as np
  28 from scipy.stats import gmean
  29
  30 import matplotlib as mpl
  31 import matplotlib.pyplot as plt
  32 %matplotlib inline
  33 ```
  34
  35 ```python
  36 !ls uk-deaths-data
  37 ```
  38
  39 ```python
  40 raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv',
  41                        parse_dates=[1, 2], dayfirst=True,
  42                       index_col=0,
  43                       header=[0, 1]
  44                            )
  45 dh15i = raw_data_2015.iloc[:, [2]]
  46 dh15i.columns = ['total_2015']
  47 # dh15i.head()
  48 ```
  49
  50 ```python
  51 raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv',
  52                         parse_dates=[1, 2], dayfirst=True,
  53                       index_col=0,
  54                       header=[0, 1]
  55                            )
  56 dh16i = raw_data_2016.iloc[:, [2]]
  57 dh16i.columns = ['total_2016']
  58 # dh16i.head()
  59 ```
  60
  61 ```python
  62 raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv',
  63                         parse_dates=[1, 2], dayfirst=True,
  64                       index_col=0,
  65                       header=[0, 1]
  66                            )
  67 dh17i = raw_data_2017.iloc[:, [2]]
  68 dh17i.columns = ['total_2017']
  69 # dh17i.head()
  70 ```
  71
  72 ```python
  73 raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv',
  74                         parse_dates=[1, 2], dayfirst=True,
  75                       index_col=0,
  76                       header=[0, 1]
  77                            )
  78 dh18i = raw_data_2018.iloc[:, [2]]
  79 dh18i.columns = ['total_2018']
  80 # dh18i.head()
  81 ```
  82
  83 ```python
  84 raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv',
  85                         parse_dates=[1, 2], dayfirst=True,
  86                       index_col=0,
  87                       header=[0, 1]
  88                            )
  89 dh19i = raw_data_2019.iloc[:, [2]]
  90 dh19i.columns = ['total_2019']
  91 # dh19i.head()
  92 ```
  93
  94 ```python
  95 raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
  96                         parse_dates=[1], dayfirst=True,
  97                       index_col=0,
  98                       header=[0, 1]
  99                            )
 100 deaths_headlines_i = raw_data_2020_i.iloc[:, [1]]
 101 deaths_headlines_i.columns = ['total_2020']
 102 deaths_headlines_i.head()
 103 ```
 104
 105 ```python
 106
 107 ```
 108
 109 ```python
 110
 111 ```
 112
 113 ```python
 114 raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-april-20-scotland.csv',
 115                       index_col=0,
 116                       header=0,
 117                         skiprows=2
 118                            )
 119 # raw_data_s
 120 ```
 121
 122 ```python
 123 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
 124 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
 125 deaths_headlines_s.reset_index(drop=True, inplace=True)
 126 deaths_headlines_s.index = deaths_headlines_s.index + 1
 127 deaths_headlines_s
 128 ```
 129
 130 ```python
 131
 132 ```
 133
 134 ```python
 135
 136 ```
 137
 138 ```python
 139
 140 ```
 141
 142 ```python
 143
 144 ```
 145
 146 ```python
 147
 148 ```
 149
 150 ```python
 151 raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek182020.csv',
 152                        parse_dates=[1], dayfirst=True,
 153                       index_col=0,
 154                       header=[0, 1])
 155 ```
 156
 157 ```python
 158 # raw_data_2020.head()
 159 ```
 160
 161 ```python
 162 raw_data_2020['W92000004', 'Wales']
 163 ```
 164
 165 ```python
 166 raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv',
 167                        parse_dates=[1], dayfirst=True,
 168                       index_col=0,
 169                       header=[0, 1])
 170 # raw_data_2019.head()
 171 ```
 172
 173 ```python
 174 raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv',
 175                        parse_dates=[1], dayfirst=True,
 176                       index_col=0,
 177                       header=[0, 1])
 178 # raw_data_2018.head()
 179 ```
 180
 181 ```python
 182 raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv',
 183                        parse_dates=[1], dayfirst=True,
 184                       index_col=0,
 185                       header=[0, 1])
 186 # raw_data_2017.head()
 187 ```
 188
 189 ```python
 190 raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv',
 191                        parse_dates=[1], dayfirst=True,
 192                       index_col=0,
 193                       header=[0, 1])
 194 # raw_data_2016.head()
 195 ```
 196
 197 ```python
 198 raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
 199                        parse_dates=[1], dayfirst=True,
 200                       index_col=0,
 201                       header=[0, 1])
 202 # raw_data_2015.head()
 203 ```
 204
 205 ```python
 206 deaths_headlines_e = raw_data_2020.iloc[:, [1]]
 207 deaths_headlines_e.columns = ['total_2020']
 208 deaths_headlines_w = raw_data_2020['W92000004']
 209 deaths_headlines_e.columns = ['total_2020']
 210 deaths_headlines_w.columns = ['total_2020']
 211 deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
 212 deaths_headlines_e.head()
 213 deaths_headlines_e
 214 ```
 215
 216 ```python
 217 dh19e = raw_data_2019.iloc[:, [1]]
 218 dh19w = raw_data_2019['W92000004']
 219 dh19e.columns = ['total_2019']
 220 dh19w.columns = ['total_2019']
 221 dh19e.total_2019 -= dh19w.total_2019
 222 dh19e.head()
 223 ```
 224
 225 ```python
 226 dh19w.head()
 227 ```
 228
 229 ```python
 230 dh18e = raw_data_2018.iloc[:, [1]]
 231 dh18w = raw_data_2018['W92000004']
 232 dh18e.columns = ['total_2018']
 233 dh18w.columns = ['total_2018']
 234 dh18e.total_2018 -= dh18w.total_2018
 235 # dh18e.head()
 236 ```
 237
 238 ```python
 239 dh17e = raw_data_2017.iloc[:, [1]]
 240 dh17w = raw_data_2017['W92000004']
 241 dh17e.columns = ['total_2017']
 242 dh17w.columns = ['total_2017']
 243 dh17e.total_2017 -= dh17w.total_2017
 244 # dh17e.head()
 245 ```
 246
 247 ```python
 248 dh16e = raw_data_2016.iloc[:, [1]]
 249 dh16w = raw_data_2016['W92000004']
 250 dh16e.columns = ['total_2016']
 251 dh16w.columns = ['total_2016']
 252 dh16e.total_2016 -= dh16w.total_2016
 253 # dh16e.head()
 254 ```
 255
 256 ```python
 257 dh15e = raw_data_2015.iloc[:, [1]]
 258 dh15w = raw_data_2015['W92000004']
 259 dh15e.columns = ['total_2015']
 260 dh15w.columns = ['total_2015']
 261 dh15e.total_2015 -= dh15w.total_2015
 262 # dh15e.head()
 263 ```
 264
 265 ```python
 266 # dh18 = raw_data_2018.iloc[:, [1, 2]]
 267 # dh18.columns = ['total_2018', 'total_previous']
 268 # # dh18.head()
 269 ```
 270
 271 ```python
 272 deaths_headlines_e = deaths_headlines_e.merge(dh19e['total_2019'], how='outer', left_index=True, right_index=True)
 273 deaths_headlines_e = deaths_headlines_e.merge(dh18e['total_2018'], how='outer', left_index=True, right_index=True)
 274 deaths_headlines_e = deaths_headlines_e.merge(dh17e['total_2017'], how='outer', left_index=True, right_index=True)
 275 deaths_headlines_e = deaths_headlines_e.merge(dh16e['total_2016'], how='outer', left_index=True, right_index=True)
 276 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
 277 deaths_headlines_e = deaths_headlines_e.merge(dh15e['total_2015'], how='left', left_index=True, right_index=True)
 278 deaths_headlines_e
 279 ```
 280
 281 ```python
 282 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
 283 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
 284 deaths_headlines_s.reset_index(drop=True, inplace=True)
 285 deaths_headlines_s.index = deaths_headlines_s.index + 1
 286 deaths_headlines_s = deaths_headlines_s.loc[1:52]
 287 deaths_headlines_s
 288 ```
 289
 290 ```python
 291 deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True)
 292 deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True)
 293 deaths_headlines_w = deaths_headlines_w.merge(dh17w['total_2017'], how='outer', left_index=True, right_index=True)
 294 deaths_headlines_w = deaths_headlines_w.merge(dh16w['total_2016'], how='outer', left_index=True, right_index=True)
 295 # deaths_headlines = deaths_headlines.merge(dh15['total_2015'], how='outer', left_index=True, right_index=True)
 296 deaths_headlines_w = deaths_headlines_w.merge(dh15w['total_2015'], how='left', left_index=True, right_index=True)
 297 deaths_headlines_w
 298 ```
 299
 300 ```python
 301 deaths_headlines_i = deaths_headlines_i.merge(dh19i['total_2019'], how='outer', left_index=True, right_index=True)
 302 deaths_headlines_i = deaths_headlines_i.merge(dh18i['total_2018'], how='outer', left_index=True, right_index=True)
 303 deaths_headlines_i = deaths_headlines_i.merge(dh17i['total_2017'], how='outer', left_index=True, right_index=True)
 304 deaths_headlines_i = deaths_headlines_i.merge(dh16i['total_2016'], how='outer', left_index=True, right_index=True)
 305 deaths_headlines_i = deaths_headlines_i.merge(dh15i['total_2015'], how='left', left_index=True, right_index=True)
 306 deaths_headlines_i
 307 ```
 308
 309 ```python
 310 deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
 311 deaths_headlines
 312 ```
 313
 314 ```python
 315 deaths_headlines_e['previous_mean'] = deaths_headlines_e['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
 316 deaths_headlines_w['previous_mean'] = deaths_headlines_w['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
 317 deaths_headlines_s['previous_mean'] = deaths_headlines_s['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
 318 deaths_headlines_i['previous_mean'] = deaths_headlines_i['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
 319 deaths_headlines['previous_mean'] = deaths_headlines['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1)
 320 deaths_headlines
 321 ```
 322
 323 ```python
 324 deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(10, 8))
 325 ```
 326
 327 ```python
 328 deaths_headlines_i.plot()
 329 ```
 330
 331 ```python
 332 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 333
 334 fig = plt.figure(figsize=(10, 10))
 335 ax = fig.add_subplot(111, projection="polar")
 336
 337 theta = np.roll(
 338     np.flip(
 339         np.arange(len(deaths_headlines))/float(len(deaths_headlines))*2.*np.pi),
 340     14)
 341 # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
 342 # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
 343 # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
 344 # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
 345 # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
 346 l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#e47d7d", label="2015") # 0
 347 l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
 348 l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#7de4a6", label="2017") # 144
 349 l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#7da6e4", label="2018") # 216
 350 l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#d07de4", label="2019") # 288
 351
 352 lmean, = ax.plot(theta, deaths_headlines['previous_mean'], color="black", linestyle='dashed', label="mean")
 353
 354 l20, = ax.plot(theta, deaths_headlines['total_2020'], color="red", label="2020")
 355
 356 # deaths_headlines.total_2019.plot(ax=ax)
 357
 358 def _closeline(line):
 359     x, y = line.get_data()
 360     x = np.concatenate((x, [x[0]]))
 361     y = np.concatenate((y, [y[0]]))
 362     line.set_data(x, y)
 363
 364 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
 365
 366
 367 ax.set_xticks(theta)
 368 ax.set_xticklabels(deaths_headlines.index)
 369 plt.legend()
 370 plt.title("Deaths by week over years, all UK")
 371 plt.savefig('deaths-radar.png')
 372 plt.show()
 373 ```
 374
 375 # Excess deaths calculation
 376
 377 ```python
 378 (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
 379 ```
 380
 381 ```python
 382 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 383
 384 fig = plt.figure(figsize=(10, 10))
 385 ax = fig.add_subplot(111, projection="polar")
 386
 387 theta = np.roll(
 388     np.flip(
 389         np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
 390     14)
 391 l15, = ax.plot(theta, deaths_headlines_e['total_2015'], color="#e47d7d", label="2015") # 0
 392 l16, = ax.plot(theta, deaths_headlines_e['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
 393 l17, = ax.plot(theta, deaths_headlines_e['total_2017'], color="#7de4a6", label="2017") # 144
 394 l18, = ax.plot(theta, deaths_headlines_e['total_2018'], color="#7da6e4", label="2018") # 216
 395 l19, = ax.plot(theta, deaths_headlines_e['total_2019'], color="#d07de4", label="2019") # 288
 396
 397 lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
 398
 399 l20, = ax.plot(theta, deaths_headlines_e['total_2020'], color="red", label="2020")
 400
 401 # deaths_headlines.total_2019.plot(ax=ax)
 402
 403 def _closeline(line):
 404     x, y = line.get_data()
 405     x = np.concatenate((x, [x[0]]))
 406     y = np.concatenate((y, [y[0]]))
 407     line.set_data(x, y)
 408
 409 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
 410
 411
 412 ax.set_xticks(theta)
 413 ax.set_xticklabels(deaths_headlines_e.index)
 414 plt.legend()
 415 plt.title("Deaths by week over years, England")
 416 plt.savefig('deaths-radar_england.png')
 417 plt.show()
 418 ```
 419
 420 ```python
 421 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 422
 423 fig = plt.figure(figsize=(10, 10))
 424 ax = fig.add_subplot(111, projection="polar")
 425
 426 theta = np.roll(
 427     np.flip(
 428         np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
 429     14)
 430 l15, = ax.plot(theta, deaths_headlines_w['total_2015'], color="#e47d7d", label="2015") # 0
 431 l16, = ax.plot(theta, deaths_headlines_w['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
 432 l17, = ax.plot(theta, deaths_headlines_w['total_2017'], color="#7de4a6", label="2017") # 144
 433 l18, = ax.plot(theta, deaths_headlines_w['total_2018'], color="#7da6e4", label="2018") # 216
 434 l19, = ax.plot(theta, deaths_headlines_w['total_2019'], color="#d07de4", label="2019") # 288
 435
 436 lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
 437
 438 l20, = ax.plot(theta, deaths_headlines_w['total_2020'], color="red", label="2020")
 439
 440
 441 def _closeline(line):
 442     x, y = line.get_data()
 443     x = np.concatenate((x, [x[0]]))
 444     y = np.concatenate((y, [y[0]]))
 445     line.set_data(x, y)
 446
 447 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
 448
 449
 450 ax.set_xticks(theta)
 451 ax.set_xticklabels(deaths_headlines_w.index)
 452 plt.legend()
 453 plt.title("Deaths by week over years, Wales")
 454 plt.savefig('deaths-radar_wales.png')
 455 plt.show()
 456 ```
 457
 458 ```python
 459 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 460
 461 fig = plt.figure(figsize=(10, 10))
 462 ax = fig.add_subplot(111, projection="polar")
 463
 464 theta = np.roll(
 465     np.flip(
 466         np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
 467     14)
 468 l15, = ax.plot(theta, deaths_headlines_s['total_2015'], color="#e47d7d", label="2015") # 0
 469 l16, = ax.plot(theta, deaths_headlines_s['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
 470 l17, = ax.plot(theta, deaths_headlines_s['total_2017'], color="#7de4a6", label="2017") # 144
 471 l18, = ax.plot(theta, deaths_headlines_s['total_2018'], color="#7da6e4", label="2018") # 216
 472 l19, = ax.plot(theta, deaths_headlines_s['total_2019'], color="#d07de4", label="2019") # 288
 473
 474 lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
 475
 476 l20, = ax.plot(theta, deaths_headlines_s['total_2020'], color="red", label="2020")
 477
 478
 479 def _closeline(line):
 480     x, y = line.get_data()
 481     x = np.concatenate((x, [x[0]]))
 482     y = np.concatenate((y, [y[0]]))
 483     line.set_data(x, y)
 484
 485 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
 486
 487
 488 ax.set_xticks(theta)
 489 ax.set_xticklabels(deaths_headlines_s.index)
 490 plt.legend()
 491 plt.title("Deaths by week over years, Scotland")
 492 plt.savefig('deaths-radar_scotland.png')
 493 plt.show()
 494 ```
 495
 496 ```python
 497 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 498
 499 fig = plt.figure(figsize=(10, 10))
 500 ax = fig.add_subplot(111, projection="polar")
 501
 502 theta = np.roll(
 503     np.flip(
 504         np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
 505     14)
 506 l15, = ax.plot(theta, deaths_headlines_i['total_2015'], color="#e47d7d", label="2015") # 0
 507 l16, = ax.plot(theta, deaths_headlines_i['total_2016'], color="#afc169", label="2016") # 72 , d0e47d
 508 l17, = ax.plot(theta, deaths_headlines_i['total_2017'], color="#7de4a6", label="2017") # 144
 509 l18, = ax.plot(theta, deaths_headlines_i['total_2018'], color="#7da6e4", label="2018") # 216
 510 l19, = ax.plot(theta, deaths_headlines_i['total_2019'], color="#d07de4", label="2019") # 288
 511
 512 lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
 513
 514 l20, = ax.plot(theta, deaths_headlines_i['total_2020'], color="red", label="2020")
 515
 516
 517 def _closeline(line):
 518     x, y = line.get_data()
 519     x = np.concatenate((x, [x[0]]))
 520     y = np.concatenate((y, [y[0]]))
 521     line.set_data(x, y)
 522
 523 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
 524
 525
 526 ax.set_xticks(theta)
 527 ax.set_xticklabels(deaths_headlines_i.index)
 528 plt.legend()
 529 plt.title("Deaths by week over years, Northern Ireland")
 530 plt.savefig('deaths-radar_northern_ireland.png')
 531 plt.show()
 532 ```
 533
 534 ```python
 535
 536 ```