deaths_import.md

   1 ---
   2 jupyter:
   3   jupytext:
   4     formats: ipynb,md
   5     text_representation:
   6       extension: .md
   7       format_name: markdown
   8       format_version: '1.3'
   9       jupytext_version: 1.10.2
  10   kernelspec:
  11     display_name: Python 3
  12     language: python
  13     name: python3
  14 ---
  15
  16 <!-- #region Collapsed="false" -->
  17 Data from:
  18
  19 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
  20 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
  21 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
  22
  23 <!-- #endregion -->
  24
  25 ```python Collapsed="false"
  26 import itertools
  27 import collections
  28 import json
  29 import pandas as pd
  30 import numpy as np
  31 from scipy.stats import gmean
  32 import datetime
  33
  34 import matplotlib as mpl
  35 import matplotlib.pyplot as plt
  36 %matplotlib inline
  37
  38 from sqlalchemy.types import Integer, Text, String, DateTime, Float
  39 from sqlalchemy import create_engine
  40 %load_ext sql
  41 ```
  42
  43 ```python Collapsed="false"
  44 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
  45 ```
  46
  47 ```python Collapsed="false"
  48 %sql $connection_string
  49 ```
  50
  51 ```python Collapsed="false"
  52 conn = create_engine(connection_string)
  53 ```
  54
  55 ```python Collapsed="false"
  56 england_wales_filename = 'uk-deaths-data/publishedweek532020.xlsx'
  57 ```
  58
  59 ```sql Collapsed="false"
  60 drop table if exists all_causes_deaths;
  61 create table all_causes_deaths (
  62     week integer,
  63     year integer,
  64     date_up_to date,
  65     nation varchar(20),
  66     deaths integer,
  67     CONSTRAINT week_nation PRIMARY KEY(year, week, nation)
  68 );
  69 ```
  70
  71 ```python Collapsed="false"
  72 raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv',
  73                        parse_dates=[1, 2], dayfirst=True,
  74 #                       index_col=0,
  75                       header=[0, 1]
  76                            )
  77 dh15i = raw_data_2015.iloc[:, [0, 3]]
  78 dh15i.set_index(dh15i.columns[0], inplace=True)
  79 dh15i.columns = ['total_2015']
  80 dh15i.tail()
  81 ```
  82
  83 ```python Collapsed="false"
  84 raw_data_2015.head()
  85 ```
  86
  87 ```python Collapsed="false"
  88 rd = raw_data_2015.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
  89     columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2015P)': 'deaths',
  90             'Registration Week': 'week'}
  91     )
  92 rd['year'] = 2015
  93 rd['nation'] = 'Northern Ireland'
  94 rd.head()
  95 ```
  96
  97 ```python Collapsed="false"
  98 rd.to_sql(
  99     'all_causes_deaths',
 100     conn,
 101     if_exists='append',
 102     index=False)
 103 ```
 104
 105 ```python Collapsed="false"
 106 %sql select * from all_causes_deaths limit 10
 107 ```
 108
 109 ```python Collapsed="false"
 110 raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv',
 111                         parse_dates=[1, 2], dayfirst=True,
 112 #                       index_col=0,
 113                       header=[0, 1]
 114                            )
 115 raw_data_2016.head()
 116 # dh16i = raw_data_2016.iloc[:, [2]]
 117 # dh16i.columns = ['total_2016']
 118 # # dh16i.head()
 119 dh16i = raw_data_2016.iloc[:, [0, 3]]
 120 dh16i.set_index(dh16i.columns[0], inplace=True)
 121 dh16i.columns = ['total_2016']
 122 dh16i.tail()
 123 ```
 124
 125 ```python Collapsed="false"
 126 rd = raw_data_2016.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
 127     columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2016P)': 'deaths',
 128             'Registration Week': 'week'}
 129     )
 130 rd['year'] = 2016
 131 rd['nation'] = 'Northern Ireland'
 132 rd.head()
 133 ```
 134
 135 ```python Collapsed="false"
 136 rd.to_sql(
 137     'all_causes_deaths',
 138     conn,
 139     if_exists='append',
 140     index=False)
 141 ```
 142
 143 ```python Collapsed="false"
 144 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
 145 ```
 146
 147 ```python Collapsed="false"
 148 raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv',
 149                         parse_dates=[1, 2], dayfirst=True,
 150 #                       index_col=0,
 151                       header=[0, 1]
 152                            )
 153 raw_data_2017.head()
 154 dh17i = raw_data_2017.iloc[:, [0, 3]]
 155 dh17i.set_index(dh17i.columns[0], inplace=True)
 156 dh17i.columns = ['total_2017']
 157 dh17i.tail()
 158 ```
 159
 160 ```python Collapsed="false"
 161 rd = raw_data_2017.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
 162     columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2017P)': 'deaths',
 163             'Registration Week': 'week'}
 164     )
 165 rd['year'] = 2017
 166 rd['nation'] = 'Northern Ireland'
 167 rd.head()
 168 ```
 169
 170 ```python Collapsed="false"
 171 rd.to_sql(
 172     'all_causes_deaths',
 173     conn,
 174     if_exists='append',
 175     index=False)
 176 ```
 177
 178 ```python Collapsed="false"
 179 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
 180 ```
 181
 182 ```python Collapsed="false"
 183 raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv',
 184                         parse_dates=[1, 2], dayfirst=True,
 185 #                       index_col=0,
 186                       header=[0, 1]
 187                            )
 188 raw_data_2018.head()
 189 dh18i = raw_data_2018.iloc[:, [0, 3]]
 190 dh18i.set_index(dh18i.columns[0], inplace=True)
 191 dh18i.columns = ['total_2018']
 192 dh18i.tail()
 193 ```
 194
 195 ```python Collapsed="false"
 196 rd = raw_data_2018.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
 197     columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2018P)': 'deaths',
 198             'Registration Week': 'week'}
 199     )
 200 rd['year'] = 2018
 201 rd['nation'] = 'Northern Ireland'
 202 rd.head()
 203 ```
 204
 205 ```python Collapsed="false"
 206 rd.to_sql(
 207     'all_causes_deaths',
 208     conn,
 209     if_exists='append',
 210     index=False)
 211 ```
 212
 213 ```python Collapsed="false"
 214 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
 215 ```
 216
 217 ```python Collapsed="false"
 218 raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv',
 219                         parse_dates=[1, 2], dayfirst=True,
 220 #                       index_col=0,
 221                       header=[0, 1]
 222                            )
 223 raw_data_2019.head()
 224 dh19i = raw_data_2019.iloc[:, [0, 3]]
 225 dh19i.set_index(dh19i.columns[0], inplace=True)
 226 dh19i.columns = ['total_2019']
 227 dh19i.tail()
 228 ```
 229
 230 ```python Collapsed="false"
 231 rd = raw_data_2019.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
 232     columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2019P)': 'deaths',
 233             'Registration Week': 'week'}
 234     )
 235 rd['year'] = 2019
 236 rd['nation'] = 'Northern Ireland'
 237 rd.head()
 238 ```
 239
 240 ```python Collapsed="false"
 241 rd.to_sql(
 242     'all_causes_deaths',
 243     conn,
 244     if_exists='append',
 245     index=False)
 246 ```
 247
 248 ```python Collapsed="false"
 249 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
 250 ```
 251
 252 ```python Collapsed="false"
 253 raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
 254                         parse_dates=[1], dayfirst=True,
 255                       header=[0, 1]
 256                            )
 257 raw_data_2020_i.head()
 258 ```
 259
 260 ```python Collapsed="false"
 261 rd = raw_data_2020_i.iloc[:, [0, 1, 2]].droplevel(1, axis=1).rename(
 262     columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2020P)': 'deaths',
 263             'Registration Week': 'week'}
 264     )
 265 rd['year'] = 2020
 266 rd['nation'] = 'Northern Ireland'
 267 rd.head()
 268 ```
 269
 270 ```python Collapsed="false"
 271 rd.tail()
 272 ```
 273
 274 ```python Collapsed="false"
 275 rd.to_sql(
 276     'all_causes_deaths',
 277     conn,
 278     if_exists='append',
 279     index=False)
 280 ```
 281
 282 ```python Collapsed="false"
 283 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
 284 ```
 285
 286 ```python
 287 raw_data_2020_i.set_index(raw_data_2020_i.columns[0], inplace=True)
 288 raw_data_2020_i.tail()
 289 ```
 290
 291 ```python Collapsed="false"
 292 datetime.datetime.now().isocalendar()
 293 ```
 294
 295 ```python Collapsed="false"
 296 datetime.datetime.fromisocalendar(2021, 3, 1)
 297 ```
 298
 299 ```python Collapsed="false"
 300
 301 ```
 302
 303 ```python Collapsed="false"
 304 raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-scotland.csv',
 305                       index_col=0,
 306                       header=0,
 307                         skiprows=2
 308                            )
 309 # raw_data_s
 310 ```
 311
 312 ```python Collapsed="false"
 313 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
 314 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
 315 deaths_headlines_s.reset_index(drop=True, inplace=True)
 316 deaths_headlines_s.index = deaths_headlines_s.index + 1
 317 deaths_headlines_s
 318 ```
 319
 320 ```python
 321 %sql select * from all_causes_deaths limit 5
 322 ```
 323
 324 ```python Collapsed="false"
 325 for year, ser in deaths_headlines_s.items():
 326     year_i = int(year[-4:])
 327 #     print(year_i)
 328     for week, deaths in ser.dropna().iteritems():
 329 #         print(datetime.date.fromisocalendar(year_i, week, 7), deaths)
 330         dut = datetime.date.fromisocalendar(year_i, week, 7)
 331         %sql insert into all_causes_deaths(week, year, date_up_to, nation, deaths) values ({week}, {year_i}, :dut, 'Scotland', {deaths})
 332 ```
 333
 334 ```python
 335 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
 336 ```
 337
 338 ```python
 339 %sql select year, nation, date_up_to from all_causes_deaths where week=3 order by year, nation
 340 ```
 341
 342 ```python Collapsed="false"
 343 eng_xls = pd.read_excel(england_wales_filename,
 344                         sheet_name="Weekly figures 2020",
 345                         skiprows=[0, 1, 2, 3],
 346                         header=0,
 347                         index_col=[1]
 348                        ).iloc[:91].T
 349 eng_xls
 350 ```
 351
 352 ```python Collapsed="false"
 353 # eng_xls_columns
 354 ```
 355
 356 ```python Collapsed="false"
 357 eng_xls_columns = list(eng_xls.columns)
 358
 359 for i, c in enumerate(eng_xls_columns):
 360 #     print(i, c, type(c), isinstance(c, float))
 361     if isinstance(c, float) and np.isnan(c):
 362         if eng_xls.iloc[0].iloc[i] is not pd.NaT:
 363             eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
 364
 365 # np.isnan(eng_xls_columns[0])
 366 # eng_xls_columns
 367
 368 eng_xls.columns = eng_xls_columns
 369 # eng_xls.columns
 370 ```
 371
 372 ```python
 373 eng_xls
 374 ```
 375
 376 ```python
 377 rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
 378     columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
 379             'index': 'week'}
 380     )
 381 rd['year'] = 2020
 382 rd['nation'] = 'Wales'
 383 rd.head()
 384 ```
 385
 386 ```python
 387 rd.to_sql(
 388     'all_causes_deaths',
 389     conn,
 390     if_exists='append',
 391     index=False)
 392 ```
 393
 394 ```python Collapsed="false"
 395 eng_xls = eng_xls.iloc[1:]
 396 eng_xls['England deaths'] = eng_xls.loc[:, 'Total deaths, all ages'] - eng_xls.loc[:, 'Wales']
 397 ```
 398
 399 ```python
 400 eng_xls.head()
 401 ```
 402
 403 ```python
 404 rd = eng_xls[['Week ended', 'England deaths']].reset_index(level=0).rename(
 405     columns={'Week ended': 'date_up_to', 'England deaths': 'deaths',
 406             'index': 'week'}
 407     )
 408 rd['year'] = 2020
 409 rd['nation'] = 'England'
 410 rd.head()
 411 ```
 412
 413 ```python
 414 %sql delete from all_causes_deaths where nation = 'England'
 415 ```
 416
 417 ```python
 418 rd.to_sql(
 419     'all_causes_deaths',
 420     conn,
 421     if_exists='append',
 422     index=False)
 423 ```
 424
 425 ```python
 426 rd.tail()
 427 ```
 428
 429 ```python
 430 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
 431 ```
 432
 433 ```python Collapsed="false"
 434 # raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv',
 435 #                        parse_dates=[1], dayfirst=True,
 436 #                       index_col=0,
 437 #                       header=[0, 1])
 438 ```
 439
 440 ```python Collapsed="false"
 441
 442 ```
 443
 444 ```python Collapsed="false"
 445 # raw_data_2020.head()
 446 ```
 447
 448 ```python Collapsed="false"
 449 # raw_data_2020['W92000004', 'Wales']
 450 ```
 451
 452 ```python Collapsed="false"
 453 raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv',
 454                        parse_dates=[1], dayfirst=True,
 455 #                       index_col=0,
 456                       header=[0, 1])
 457 # raw_data_2019.head()
 458 ```
 459
 460 ```python
 461 rdew = raw_data_2019.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
 462 rdew.head()
 463 ```
 464
 465 ```python
 466 rd = rdew.drop(columns=['Total deaths, all ages']).rename(
 467     columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
 468             'Week number': 'week'}
 469     )
 470 rd['year'] = 2019
 471 rd['nation'] = 'Wales'
 472 rd.head()
 473 ```
 474
 475 ```python
 476 rd.to_sql(
 477     'all_causes_deaths',
 478     conn,
 479     if_exists='append',
 480     index=False)
 481 ```
 482
 483 ```python
 484 rd = rdew.loc[:, ['Week ended','Week number']]
 485 rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
 486 rd = rd.rename(
 487     columns={'Week ended': 'date_up_to', 'Week number': 'week'}
 488     )
 489 rd['year'] = 2019
 490 rd['nation'] = 'England'
 491 rd.head()
 492 ```
 493
 494 ```python
 495 rd.to_sql(
 496     'all_causes_deaths',
 497     conn,
 498     if_exists='append',
 499     index=False)
 500 ```
 501
 502 ```python
 503 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
 504 ```
 505
 506 ```python Collapsed="false"
 507 raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv',
 508                        parse_dates=[1], dayfirst=True,
 509 #                       index_col=0,
 510                       header=[0, 1])
 511 # raw_data_2018.head()
 512 ```
 513
 514 ```python
 515 rdew = raw_data_2018.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
 516 rdew.head()
 517 ```
 518
 519 ```python
 520 rd = rdew.drop(columns=['Total deaths, all ages']).rename(
 521     columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
 522             'Week number': 'week'}
 523     )
 524 rd['year'] = 2018
 525 rd['nation'] = 'Wales'
 526 rd.head()
 527 ```
 528
 529 ```python
 530 rd.to_sql(
 531     'all_causes_deaths',
 532     conn,
 533     if_exists='append',
 534     index=False)
 535 ```
 536
 537 ```python
 538 rd = rdew.loc[:, ['Week ended','Week number']]
 539 rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
 540 rd = rd.rename(
 541     columns={'Week ended': 'date_up_to', 'Week number': 'week'}
 542     )
 543 rd['year'] = 2018
 544 rd['nation'] = 'England'
 545 rd.head()
 546 ```
 547
 548 ```python
 549 rd.to_sql(
 550     'all_causes_deaths',
 551     conn,
 552     if_exists='append',
 553     index=False)
 554 ```
 555
 556 ```python
 557 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
 558 ```
 559
 560 ```python Collapsed="false"
 561 raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv',
 562                        parse_dates=[1], dayfirst=True,
 563 #                       index_col=0,
 564                       header=[0, 1])
 565 # raw_data_2017.head()
 566 ```
 567
 568 ```python
 569 rdew = raw_data_2017.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
 570 rdew.head()
 571 ```
 572
 573 ```python
 574 rd = rdew.drop(columns=['Total deaths, all ages']).rename(
 575     columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
 576             'Week number': 'week'}
 577     )
 578 rd['year'] = 2017
 579 rd['nation'] = 'Wales'
 580 rd.head()
 581 ```
 582
 583 ```python
 584 rd.to_sql(
 585     'all_causes_deaths',
 586     conn,
 587     if_exists='append',
 588     index=False)
 589 ```
 590
 591 ```python
 592 rd = rdew.loc[:, ['Week ended','Week number']]
 593 rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
 594 rd = rd.rename(
 595     columns={'Week ended': 'date_up_to', 'Week number': 'week'}
 596     )
 597 rd['year'] = 2017
 598 rd['nation'] = 'England'
 599 rd.head()
 600 ```
 601
 602 ```python
 603 rd.to_sql(
 604     'all_causes_deaths',
 605     conn,
 606     if_exists='append',
 607     index=False)
 608 ```
 609
 610 ```python
 611 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
 612 ```
 613
 614 ```python
 615
 616 ```
 617
 618 ```python Collapsed="false"
 619 raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv',
 620                        parse_dates=[1], dayfirst=True,
 621 #                       index_col=0,
 622                       header=[0, 1])
 623 # raw_data_2016.head()
 624 ```
 625
 626 ```python
 627 raw_data_2016.head()
 628 ```
 629
 630 ```python
 631 rdew = raw_data_2016.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
 632 rdew.head()
 633 ```
 634
 635 ```python
 636 rd = rdew.drop(columns=['Total deaths, all ages']).rename(
 637     columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
 638             'Week number': 'week'}
 639     )
 640 rd['year'] = 2016
 641 rd['nation'] = 'Wales'
 642 rd.head()
 643 ```
 644
 645 ```python
 646 rd.to_sql(
 647     'all_causes_deaths',
 648     conn,
 649     if_exists='append',
 650     index=False)
 651 ```
 652
 653 ```python
 654 rd = rdew.loc[:, ['Week ended','Week number']]
 655 rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
 656 rd = rd.rename(
 657     columns={'Week ended': 'date_up_to', 'Week number': 'week'}
 658     )
 659 rd['year'] = 2016
 660 rd['nation'] = 'England'
 661 rd.head()
 662 ```
 663
 664 ```python
 665 rd.to_sql(
 666     'all_causes_deaths',
 667     conn,
 668     if_exists='append',
 669     index=False)
 670 ```
 671
 672 ```python
 673  %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
 674 ```
 675
 676 ```python Collapsed="false"
 677 raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
 678                        parse_dates=[1], dayfirst=True,
 679 #                       index_col=0,
 680                       header=[0, 1])
 681 # raw_data_2015.head()
 682 ```
 683
 684 ```python
 685 rdew = raw_data_2015.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
 686 rdew.head()
 687 ```
 688
 689 ```python
 690 rd = rdew.drop(columns=['Total deaths, all ages']).rename(
 691     columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
 692             'Week number': 'week'}
 693     )
 694 rd['year'] = 2015
 695 rd['nation'] = 'Wales'
 696 rd.head()
 697 ```
 698
 699 ```python
 700 rd.to_sql(
 701     'all_causes_deaths',
 702     conn,
 703     if_exists='append',
 704     index=False)
 705 ```
 706
 707 ```python
 708 rd = rdew.loc[:, ['Week ended','Week number']]
 709 rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
 710 rd = rd.rename(
 711     columns={'Week ended': 'date_up_to', 'Week number': 'week'}
 712     )
 713 rd['year'] = 2015
 714 rd['nation'] = 'England'
 715 rd.head()
 716 ```
 717
 718 ```python
 719 rd.to_sql(
 720     'all_causes_deaths',
 721     conn,
 722     if_exists='append',
 723     index=False)
 724 ```
 725
 726 ```python
 727 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by year, nation
 728 ```
 729
 730 ```sql magic_args="res << select week, year, deaths"
 731 from all_causes_deaths
 732 where nation = 'England'
 733 ```
 734
 735 ```python
 736 deaths_headlines_e = res.DataFrame().pivot(index='week', columns='year', values='deaths')
 737 deaths_headlines_e
 738 ```
 739
 740 ```python
 741
 742 ```
 743
 744 ```sql magic_args="res << select week, year, deaths"
 745 from all_causes_deaths
 746 where nation = 'Scotland'
 747 ```
 748
 749 ```python
 750 deaths_headlines_s = res.DataFrame().pivot(index='week', columns='year', values='deaths')
 751 deaths_headlines_s
 752 ```
 753
 754 ```sql magic_args="res << select week, year, deaths"
 755 from all_causes_deaths
 756 where nation = 'Wales'
 757 ```
 758
 759 ```python
 760 deaths_headlines_w = res.DataFrame().pivot(index='week', columns='year', values='deaths')
 761 deaths_headlines_w
 762 ```
 763
 764 ```sql magic_args="res << select week, year, deaths"
 765 from all_causes_deaths
 766 where nation = 'Northern Ireland'
 767 ```
 768
 769 ```python
 770 deaths_headlines_i = res.DataFrame().pivot(index='week', columns='year', values='deaths')
 771 deaths_headlines_i
 772 ```
 773
 774 ```python Collapsed="false"
 775 deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
 776 deaths_headlines
 777 ```
 778
 779 ```python
 780 deaths_headlines_e.columns
 781 ```
 782
 783 ```python
 784 deaths_headlines_e['previous_mean'] = deaths_headlines_e[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
 785 deaths_headlines_w['previous_mean'] = deaths_headlines_w[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
 786 deaths_headlines_s['previous_mean'] = deaths_headlines_s[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
 787 deaths_headlines_i['previous_mean'] = deaths_headlines_i[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
 788 deaths_headlines['previous_mean'] = deaths_headlines[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
 789 deaths_headlines
 790 ```
 791
 792 ```python Collapsed="false"
 793 deaths_headlines[[2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
 794 ```
 795
 796 ```python Collapsed="false"
 797 deaths_headlines[[2020, 'previous_mean']].plot(figsize=(10, 8))
 798 ```
 799
 800 ```python Collapsed="false"
 801 deaths_headlines_i.plot()
 802 ```
 803
 804 ```python
 805 deaths_headlines[2020].sum() - deaths_headlines.previous_mean.sum()
 806 ```
 807
 808 ```python Collapsed="false"
 809 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 810
 811 dhna = deaths_headlines.dropna()
 812
 813 fig = plt.figure(figsize=(10, 10))
 814 ax = fig.add_subplot(111, projection="polar")
 815
 816 theta = np.roll(
 817     np.flip(
 818         np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
 819     14)
 820 # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
 821 # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
 822 # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
 823 # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
 824 # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
 825 l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
 826 l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
 827 l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
 828 l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
 829 l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
 830
 831 lmean, = ax.plot(theta, dhna['previous_mean'], color="black", linestyle='dashed', label="mean")
 832
 833 l20, = ax.plot(theta, dhna[2020], color="red", label="2020")
 834
 835 # deaths_headlines.total_2019.plot(ax=ax)
 836
 837 def _closeline(line):
 838     x, y = line.get_data()
 839     x = np.concatenate((x, [x[0]]))
 840     y = np.concatenate((y, [y[0]]))
 841     line.set_data(x, y)
 842
 843 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
 844
 845
 846 ax.set_xticks(theta)
 847 ax.set_xticklabels(dhna.index)
 848 plt.legend()
 849 plt.title("Deaths by week over years, all UK")
 850 plt.savefig('deaths-radar.png')
 851 plt.show()
 852 ```
 853
 854 <!-- #region Collapsed="false" -->
 855 # Plots for UK nations
 856 <!-- #endregion -->
 857
 858 ```python Collapsed="false"
 859 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 860
 861 fig = plt.figure(figsize=(10, 10))
 862 ax = fig.add_subplot(111, projection="polar")
 863
 864 theta = np.roll(
 865     np.flip(
 866         np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
 867     14)
 868 l15, = ax.plot(theta, deaths_headlines_e[2015], color="#e47d7d", label="2015") # 0
 869 l16, = ax.plot(theta, deaths_headlines_e[2016], color="#afc169", label="2016") # 72 , d0e47d
 870 l17, = ax.plot(theta, deaths_headlines_e[2017], color="#7de4a6", label="2017") # 144
 871 l18, = ax.plot(theta, deaths_headlines_e[2018], color="#7da6e4", label="2018") # 216
 872 l19, = ax.plot(theta, deaths_headlines_e[2019], color="#d07de4", label="2019") # 288
 873
 874 lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
 875
 876 l20, = ax.plot(theta, deaths_headlines_e[2020], color="red", label="2020")
 877
 878 # deaths_headlines.total_2019.plot(ax=ax)
 879
 880 def _closeline(line):
 881     x, y = line.get_data()
 882     x = np.concatenate((x, [x[0]]))
 883     y = np.concatenate((y, [y[0]]))
 884     line.set_data(x, y)
 885
 886 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
 887
 888
 889 ax.set_xticks(theta)
 890 ax.set_xticklabels(deaths_headlines_e.index)
 891 plt.legend()
 892 plt.title("Deaths by week over years, England")
 893 plt.savefig('deaths-radar_england.png')
 894 plt.show()
 895 ```
 896
 897 ```python Collapsed="false"
 898 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 899
 900 fig = plt.figure(figsize=(10, 10))
 901 ax = fig.add_subplot(111, projection="polar")
 902
 903 theta = np.roll(
 904     np.flip(
 905         np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
 906     14)
 907 l15, = ax.plot(theta, deaths_headlines_w[2015], color="#e47d7d", label="2015") # 0
 908 l16, = ax.plot(theta, deaths_headlines_w[2016], color="#afc169", label="2016") # 72 , d0e47d
 909 l17, = ax.plot(theta, deaths_headlines_w[2017], color="#7de4a6", label="2017") # 144
 910 l18, = ax.plot(theta, deaths_headlines_w[2018], color="#7da6e4", label="2018") # 216
 911 l19, = ax.plot(theta, deaths_headlines_w[2019], color="#d07de4", label="2019") # 288
 912
 913 lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
 914
 915 l20, = ax.plot(theta, deaths_headlines_w[2020], color="red", label="2020")
 916
 917
 918 def _closeline(line):
 919     x, y = line.get_data()
 920     x = np.concatenate((x, [x[0]]))
 921     y = np.concatenate((y, [y[0]]))
 922     line.set_data(x, y)
 923
 924 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
 925
 926
 927 ax.set_xticks(theta)
 928 ax.set_xticklabels(deaths_headlines_w.index)
 929 plt.legend()
 930 plt.title("Deaths by week over years, Wales")
 931 plt.savefig('deaths-radar_wales.png')
 932 plt.show()
 933 ```
 934
 935 ```python Collapsed="false"
 936 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 937
 938 fig = plt.figure(figsize=(10, 10))
 939 ax = fig.add_subplot(111, projection="polar")
 940
 941 theta = np.roll(
 942     np.flip(
 943         np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
 944     14)
 945 l15, = ax.plot(theta, deaths_headlines_s[2015], color="#e47d7d", label="2015") # 0
 946 l16, = ax.plot(theta, deaths_headlines_s[2016], color="#afc169", label="2016") # 72 , d0e47d
 947 l17, = ax.plot(theta, deaths_headlines_s[2017], color="#7de4a6", label="2017") # 144
 948 l18, = ax.plot(theta, deaths_headlines_s[2018], color="#7da6e4", label="2018") # 216
 949 l19, = ax.plot(theta, deaths_headlines_s[2019], color="#d07de4", label="2019") # 288
 950
 951 lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
 952
 953 l20, = ax.plot(theta, deaths_headlines_s[2020], color="red", label="2020")
 954
 955
 956 def _closeline(line):
 957     x, y = line.get_data()
 958     x = np.concatenate((x, [x[0]]))
 959     y = np.concatenate((y, [y[0]]))
 960     line.set_data(x, y)
 961
 962 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
 963
 964
 965 ax.set_xticks(theta)
 966 ax.set_xticklabels(deaths_headlines_s.index)
 967 plt.legend()
 968 plt.title("Deaths by week over years, Scotland")
 969 plt.savefig('deaths-radar_scotland.png')
 970 plt.show()
 971 ```
 972
 973 ```python Collapsed="false"
 974 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
 975
 976 fig = plt.figure(figsize=(10, 10))
 977 ax = fig.add_subplot(111, projection="polar")
 978
 979 theta = np.roll(
 980     np.flip(
 981         np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
 982     14)
 983 l15, = ax.plot(theta, deaths_headlines_i[2015], color="#e47d7d", label="2015") # 0
 984 l16, = ax.plot(theta, deaths_headlines_i[2016], color="#afc169", label="2016") # 72 , d0e47d
 985 l17, = ax.plot(theta, deaths_headlines_i[2017], color="#7de4a6", label="2017") # 144
 986 l18, = ax.plot(theta, deaths_headlines_i[2018], color="#7da6e4", label="2018") # 216
 987 l19, = ax.plot(theta, deaths_headlines_i[2019], color="#d07de4", label="2019") # 288
 988
 989 lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
 990
 991 l20, = ax.plot(theta, deaths_headlines_i[2020], color="red", label="2020")
 992
 993
 994 def _closeline(line):
 995     x, y = line.get_data()
 996     x = np.concatenate((x, [x[0]]))
 997     y = np.concatenate((y, [y[0]]))
 998     line.set_data(x, y)
 999
1000 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
1001
1002
1003 ax.set_xticks(theta)
1004 ax.set_xticklabels(deaths_headlines_i.index)
1005 plt.legend()
1006 plt.title("Deaths by week over years, Northern Ireland")
1007 plt.savefig('deaths-radar_northern_ireland.png')
1008 plt.show()
1009 ```
1010
1011 ```python Collapsed="false"
1012
1013 ```
1014
1015 ```python Collapsed="false"
1016
1017 ```