General updates
[covid19.git] / historical_deaths_import.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.3'
9 jupytext_version: 1.11.1
10 kernelspec:
11 display_name: Python 3
12 language: python
13 name: python3
14 ---
15
16 <!-- #region Collapsed="false" -->
17 Data from:
18
19 * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
20 * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
21 * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
22
23 <!-- #endregion -->
24
25 ```python Collapsed="false"
26 import itertools
27 import collections
28 import json
29 import pandas as pd
30 import numpy as np
31 from scipy.stats import gmean
32 import datetime
33
34 import matplotlib as mpl
35 import matplotlib.pyplot as plt
36 %matplotlib inline
37
38 from sqlalchemy.types import Integer, Text, String, DateTime, Float
39 from sqlalchemy import create_engine
40 %load_ext sql
41 ```
42
43 ```python Collapsed="false"
44 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
45 ```
46
47 ```python Collapsed="false"
48 %sql $connection_string
49 ```
50
51 ```python Collapsed="false"
52 conn = create_engine(connection_string)
53 engine = create_engine(connection_string)
54 ```
55
56 ```python Collapsed="false"
57 england_wales_filename = 'uk-deaths-data/publishedweek532020.xlsx'
58 ```
59
60 ```python
61 query_string = '''
62 drop table if exists all_causes_deaths;
63 create table all_causes_deaths (
64 week integer,
65 year integer,
66 date_up_to date,
67 nation varchar(20),
68 deaths integer,
69 CONSTRAINT week_nation PRIMARY KEY(year, week, nation)
70 );
71 '''
72 with engine.connect() as connection:
73 connection.execute(query_string)
74 ```
75
76 ```python Collapsed="false"
77 raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv',
78 parse_dates=[1, 2], dayfirst=True,
79 # index_col=0,
80 header=[0, 1]
81 )
82 dh15i = raw_data_2015.iloc[:, [0, 3]]
83 dh15i.set_index(dh15i.columns[0], inplace=True)
84 dh15i.columns = ['total_2015']
85 dh15i.tail()
86 ```
87
88 ```python Collapsed="false"
89 raw_data_2015.head()
90 ```
91
92 ```python Collapsed="false"
93 rd = raw_data_2015.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
94 columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2015P)': 'deaths',
95 'Registration Week': 'week'}
96 )
97 rd['year'] = 2015
98 rd['nation'] = 'Northern Ireland'
99 rd.head()
100 ```
101
102 ```python Collapsed="false"
103 rd.to_sql(
104 'all_causes_deaths',
105 conn,
106 if_exists='append',
107 index=False)
108 ```
109
110 ```python Collapsed="false"
111 %sql select * from all_causes_deaths limit 10
112 ```
113
114 ```python Collapsed="false"
115 raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv',
116 parse_dates=[1, 2], dayfirst=True,
117 # index_col=0,
118 header=[0, 1]
119 )
120 raw_data_2016.head()
121 # dh16i = raw_data_2016.iloc[:, [2]]
122 # dh16i.columns = ['total_2016']
123 # # dh16i.head()
124 dh16i = raw_data_2016.iloc[:, [0, 3]]
125 dh16i.set_index(dh16i.columns[0], inplace=True)
126 dh16i.columns = ['total_2016']
127 dh16i.tail()
128 ```
129
130 ```python Collapsed="false"
131 rd = raw_data_2016.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
132 columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2016P)': 'deaths',
133 'Registration Week': 'week'}
134 )
135 rd['year'] = 2016
136 rd['nation'] = 'Northern Ireland'
137 rd.head()
138 ```
139
140 ```python Collapsed="false"
141 rd.to_sql(
142 'all_causes_deaths',
143 conn,
144 if_exists='append',
145 index=False)
146 ```
147
148 ```python Collapsed="false"
149 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
150 ```
151
152 ```python Collapsed="false"
153 raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv',
154 parse_dates=[1, 2], dayfirst=True,
155 # index_col=0,
156 header=[0, 1]
157 )
158 raw_data_2017.head()
159 dh17i = raw_data_2017.iloc[:, [0, 3]]
160 dh17i.set_index(dh17i.columns[0], inplace=True)
161 dh17i.columns = ['total_2017']
162 dh17i.tail()
163 ```
164
165 ```python Collapsed="false"
166 rd = raw_data_2017.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
167 columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2017P)': 'deaths',
168 'Registration Week': 'week'}
169 )
170 rd['year'] = 2017
171 rd['nation'] = 'Northern Ireland'
172 rd.head()
173 ```
174
175 ```python Collapsed="false"
176 rd.to_sql(
177 'all_causes_deaths',
178 conn,
179 if_exists='append',
180 index=False)
181 ```
182
183 ```python Collapsed="false"
184 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
185 ```
186
187 ```python Collapsed="false"
188 raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv',
189 parse_dates=[1, 2], dayfirst=True,
190 # index_col=0,
191 header=[0, 1]
192 )
193 raw_data_2018.head()
194 dh18i = raw_data_2018.iloc[:, [0, 3]]
195 dh18i.set_index(dh18i.columns[0], inplace=True)
196 dh18i.columns = ['total_2018']
197 dh18i.tail()
198 ```
199
200 ```python Collapsed="false"
201 rd = raw_data_2018.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
202 columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2018P)': 'deaths',
203 'Registration Week': 'week'}
204 )
205 rd['year'] = 2018
206 rd['nation'] = 'Northern Ireland'
207 rd.head()
208 ```
209
210 ```python Collapsed="false"
211 rd.to_sql(
212 'all_causes_deaths',
213 conn,
214 if_exists='append',
215 index=False)
216 ```
217
218 ```python Collapsed="false"
219 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
220 ```
221
222 ```python Collapsed="false"
223 raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv',
224 parse_dates=[1, 2], dayfirst=True,
225 # index_col=0,
226 header=[0, 1]
227 )
228 raw_data_2019.head()
229 dh19i = raw_data_2019.iloc[:, [0, 3]]
230 dh19i.set_index(dh19i.columns[0], inplace=True)
231 dh19i.columns = ['total_2019']
232 dh19i.tail()
233 ```
234
235 ```python Collapsed="false"
236 rd = raw_data_2019.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
237 columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2019P)': 'deaths',
238 'Registration Week': 'week'}
239 )
240 rd['year'] = 2019
241 rd['nation'] = 'Northern Ireland'
242 rd.head()
243 ```
244
245 ```python Collapsed="false"
246 rd.to_sql(
247 'all_causes_deaths',
248 conn,
249 if_exists='append',
250 index=False)
251 ```
252
253 ```python Collapsed="false"
254 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
255 ```
256
257 ```python Collapsed="false"
258 raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
259 parse_dates=[1], dayfirst=True,
260 header=[0, 1]
261 )
262 raw_data_2020_i.head()
263 ```
264
265 ```python Collapsed="false"
266 rd = raw_data_2020_i.iloc[:, [0, 1, 2]].droplevel(1, axis=1).rename(
267 columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2020P)': 'deaths',
268 'Registration Week': 'week'}
269 )
270 rd['year'] = 2020
271 rd['nation'] = 'Northern Ireland'
272 rd.head()
273 ```
274
275 ```python Collapsed="false"
276 rd.tail()
277 ```
278
279 ```python Collapsed="false"
280 rd.to_sql(
281 'all_causes_deaths',
282 conn,
283 if_exists='append',
284 index=False)
285 ```
286
287 ```python Collapsed="false"
288 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
289 ```
290
291 ```python
292 raw_data_2020_i.set_index(raw_data_2020_i.columns[0], inplace=True)
293 raw_data_2020_i.tail()
294 ```
295
296 ```python Collapsed="false"
297 datetime.datetime.now().isocalendar()
298 ```
299
300 ```python Collapsed="false"
301 datetime.datetime.fromisocalendar(2021, 3, 1)
302 ```
303
304 ```python Collapsed="false"
305
306 ```
307
308 ```python Collapsed="false"
309 raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-scotland.csv',
310 index_col=0,
311 header=0,
312 skiprows=2
313 )
314 # raw_data_s
315 ```
316
317 ```python Collapsed="false"
318 deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
319 deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
320 deaths_headlines_s.reset_index(drop=True, inplace=True)
321 deaths_headlines_s.index = deaths_headlines_s.index + 1
322 deaths_headlines_s
323 ```
324
325 ```python
326 %sql select * from all_causes_deaths limit 5
327 ```
328
329 ```python
330 query_string = '''
331 delete from all_causes_deaths where nation = 'Scotland';
332 '''
333 with engine.connect() as connection:
334 connection.execute(query_string)
335 ```
336
337 ```python Collapsed="false"
338 for year, ser in deaths_headlines_s.items():
339 year_i = int(year[-4:])
340 # print(year_i)
341 for week, deaths in ser.dropna().iteritems():
342 # print(datetime.date.fromisocalendar(year_i, week, 7), deaths)
343 dut = datetime.date.fromisocalendar(year_i, week, 7)
344 query_string = f'''insert into
345 all_causes_deaths(week, year, date_up_to, nation, deaths)
346 values ({week}, {year_i}, '{dut}', 'Scotland', {deaths});'''
347 with engine.connect() as connection:
348 connection.execute(query_string)
349 ```
350
351 ```python
352 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
353 ```
354
355 ```python
356 %sql select year, nation, date_up_to from all_causes_deaths where week=3 order by year, nation
357 ```
358
359 ```python Collapsed="false"
360 eng_xls = pd.read_excel(england_wales_filename,
361 sheet_name="Weekly figures 2020",
362 skiprows=[0, 1, 2, 3],
363 header=0,
364 index_col=[1]
365 ).iloc[:91].T
366 eng_xls
367 ```
368
369 ```python Collapsed="false"
370 # eng_xls_columns
371 ```
372
373 ```python Collapsed="false"
374 eng_xls_columns = list(eng_xls.columns)
375
376 for i, c in enumerate(eng_xls_columns):
377 # print(i, c, type(c), isinstance(c, float))
378 if isinstance(c, float) and np.isnan(c):
379 if eng_xls.iloc[0].iloc[i] is not pd.NaT:
380 eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
381
382 # np.isnan(eng_xls_columns[0])
383 # eng_xls_columns
384
385 eng_xls.columns = eng_xls_columns
386 # eng_xls.columns
387 ```
388
389 ```python
390 eng_xls
391 ```
392
393 ```python
394 rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
395 columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
396 'index': 'week'}
397 )
398 rd['year'] = 2020
399 rd['nation'] = 'Wales'
400 rd.head()
401 ```
402
403 ```python
404 rd.to_sql(
405 'all_causes_deaths',
406 conn,
407 if_exists='append',
408 index=False)
409 ```
410
411 ```python Collapsed="false"
412 eng_xls = eng_xls.iloc[1:]
413 eng_xls['England deaths'] = eng_xls.loc[:, 'Total deaths, all ages'] - eng_xls.loc[:, 'Wales']
414 ```
415
416 ```python
417 eng_xls.head()
418 ```
419
420 ```python
421 rd = eng_xls[['Week ended', 'England deaths']].reset_index(level=0).rename(
422 columns={'Week ended': 'date_up_to', 'England deaths': 'deaths',
423 'index': 'week'}
424 )
425 rd['year'] = 2020
426 rd['nation'] = 'England'
427 rd.head()
428 ```
429
430 ```python
431 query_string = '''
432 delete from all_causes_deaths where nation = 'England';
433 '''
434 with engine.connect() as connection:
435 connection.execute(query_string)
436 ```
437
438 ```python
439 rd.to_sql(
440 'all_causes_deaths',
441 conn,
442 if_exists='append',
443 index=False)
444 ```
445
446 ```python
447 rd.tail()
448 ```
449
450 ```python
451 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
452 ```
453
454 ```python Collapsed="false"
455 # raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv',
456 # parse_dates=[1], dayfirst=True,
457 # index_col=0,
458 # header=[0, 1])
459 ```
460
461 ```python Collapsed="false"
462
463 ```
464
465 ```python Collapsed="false"
466 # raw_data_2020.head()
467 ```
468
469 ```python Collapsed="false"
470 # raw_data_2020['W92000004', 'Wales']
471 ```
472
473 ```python Collapsed="false"
474 raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv',
475 parse_dates=[1], dayfirst=True,
476 # index_col=0,
477 header=[0, 1])
478 # raw_data_2019.head()
479 ```
480
481 ```python
482 rdew = raw_data_2019.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
483 rdew.head()
484 ```
485
486 ```python
487 rd = rdew.drop(columns=['Total deaths, all ages']).rename(
488 columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
489 'Week number': 'week'}
490 )
491 rd['year'] = 2019
492 rd['nation'] = 'Wales'
493 rd.head()
494 ```
495
496 ```python
497 rd.to_sql(
498 'all_causes_deaths',
499 conn,
500 if_exists='append',
501 index=False)
502 ```
503
504 ```python
505 rd = rdew.loc[:, ['Week ended','Week number']]
506 rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
507 rd = rd.rename(
508 columns={'Week ended': 'date_up_to', 'Week number': 'week'}
509 )
510 rd['year'] = 2019
511 rd['nation'] = 'England'
512 rd.head()
513 ```
514
515 ```python
516 rd.to_sql(
517 'all_causes_deaths',
518 conn,
519 if_exists='append',
520 index=False)
521 ```
522
523 ```python
524 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
525 ```
526
527 ```python Collapsed="false"
528 raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv',
529 parse_dates=[1], dayfirst=True,
530 # index_col=0,
531 header=[0, 1])
532 # raw_data_2018.head()
533 ```
534
535 ```python
536 rdew = raw_data_2018.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
537 rdew.head()
538 ```
539
540 ```python
541 rd = rdew.drop(columns=['Total deaths, all ages']).rename(
542 columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
543 'Week number': 'week'}
544 )
545 rd['year'] = 2018
546 rd['nation'] = 'Wales'
547 rd.head()
548 ```
549
550 ```python
551 rd.to_sql(
552 'all_causes_deaths',
553 conn,
554 if_exists='append',
555 index=False)
556 ```
557
558 ```python
559 rd = rdew.loc[:, ['Week ended','Week number']]
560 rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
561 rd = rd.rename(
562 columns={'Week ended': 'date_up_to', 'Week number': 'week'}
563 )
564 rd['year'] = 2018
565 rd['nation'] = 'England'
566 rd.head()
567 ```
568
569 ```python
570 rd.to_sql(
571 'all_causes_deaths',
572 conn,
573 if_exists='append',
574 index=False)
575 ```
576
577 ```python
578 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
579 ```
580
581 ```python Collapsed="false"
582 raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv',
583 parse_dates=[1], dayfirst=True,
584 # index_col=0,
585 header=[0, 1])
586 # raw_data_2017.head()
587 ```
588
589 ```python
590 rdew = raw_data_2017.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
591 rdew.head()
592 ```
593
594 ```python
595 rd = rdew.drop(columns=['Total deaths, all ages']).rename(
596 columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
597 'Week number': 'week'}
598 )
599 rd['year'] = 2017
600 rd['nation'] = 'Wales'
601 rd.head()
602 ```
603
604 ```python
605 rd.to_sql(
606 'all_causes_deaths',
607 conn,
608 if_exists='append',
609 index=False)
610 ```
611
612 ```python
613 rd = rdew.loc[:, ['Week ended','Week number']]
614 rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
615 rd = rd.rename(
616 columns={'Week ended': 'date_up_to', 'Week number': 'week'}
617 )
618 rd['year'] = 2017
619 rd['nation'] = 'England'
620 rd.head()
621 ```
622
623 ```python
624 rd.to_sql(
625 'all_causes_deaths',
626 conn,
627 if_exists='append',
628 index=False)
629 ```
630
631 ```python
632 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
633 ```
634
635 ```python
636
637 ```
638
639 ```python Collapsed="false"
640 raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv',
641 parse_dates=[1], dayfirst=True,
642 # index_col=0,
643 header=[0, 1])
644 # raw_data_2016.head()
645 ```
646
647 ```python
648 raw_data_2016.head()
649 ```
650
651 ```python
652 rdew = raw_data_2016.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
653 rdew.head()
654 ```
655
656 ```python
657 rd = rdew.drop(columns=['Total deaths, all ages']).rename(
658 columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
659 'Week number': 'week'}
660 )
661 rd['year'] = 2016
662 rd['nation'] = 'Wales'
663 rd.head()
664 ```
665
666 ```python
667 rd.to_sql(
668 'all_causes_deaths',
669 conn,
670 if_exists='append',
671 index=False)
672 ```
673
674 ```python
675 rd = rdew.loc[:, ['Week ended','Week number']]
676 rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
677 rd = rd.rename(
678 columns={'Week ended': 'date_up_to', 'Week number': 'week'}
679 )
680 rd['year'] = 2016
681 rd['nation'] = 'England'
682 rd.head()
683 ```
684
685 ```python
686 rd.to_sql(
687 'all_causes_deaths',
688 conn,
689 if_exists='append',
690 index=False)
691 ```
692
693 ```python
694 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
695 ```
696
697 ```python Collapsed="false"
698 raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv',
699 parse_dates=[1], dayfirst=True,
700 # index_col=0,
701 header=[0, 1])
702 # raw_data_2015.head()
703 ```
704
705 ```python
706 rdew = raw_data_2015.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
707 rdew.head()
708 ```
709
710 ```python
711 rd = rdew.drop(columns=['Total deaths, all ages']).rename(
712 columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
713 'Week number': 'week'}
714 )
715 rd['year'] = 2015
716 rd['nation'] = 'Wales'
717 rd.head()
718 ```
719
720 ```python
721 rd.to_sql(
722 'all_causes_deaths',
723 conn,
724 if_exists='append',
725 index=False)
726 ```
727
728 ```python
729 rd = rdew.loc[:, ['Week ended','Week number']]
730 rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
731 rd = rd.rename(
732 columns={'Week ended': 'date_up_to', 'Week number': 'week'}
733 )
734 rd['year'] = 2015
735 rd['nation'] = 'England'
736 rd.head()
737 ```
738
739 ```python
740 rd.to_sql(
741 'all_causes_deaths',
742 conn,
743 if_exists='append',
744 index=False)
745 ```
746
747 ```python
748 %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by year, nation
749 ```
750
751 ```sql magic_args="res << select week, year, deaths"
752 from all_causes_deaths
753 where nation = 'England'
754 ```
755
756 ```python
757 deaths_headlines_e = res.DataFrame().pivot(index='week', columns='year', values='deaths')
758 deaths_headlines_e
759 ```
760
761 ```python
762
763 ```
764
765 ```sql magic_args="res << select week, year, deaths"
766 from all_causes_deaths
767 where nation = 'Scotland'
768 ```
769
770 ```python
771 deaths_headlines_s = res.DataFrame().pivot(index='week', columns='year', values='deaths')
772 deaths_headlines_s
773 ```
774
775 ```sql magic_args="res << select week, year, deaths"
776 from all_causes_deaths
777 where nation = 'Wales'
778 ```
779
780 ```python
781 deaths_headlines_w = res.DataFrame().pivot(index='week', columns='year', values='deaths')
782 deaths_headlines_w
783 ```
784
785 ```sql magic_args="res << select week, year, deaths"
786 from all_causes_deaths
787 where nation = 'Northern Ireland'
788 ```
789
790 ```python
791 deaths_headlines_i = res.DataFrame().pivot(index='week', columns='year', values='deaths')
792 deaths_headlines_i
793 ```
794
795 ```python Collapsed="false"
796 deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
797 deaths_headlines
798 ```
799
800 ```python
801 deaths_headlines_e.columns
802 ```
803
804 ```python
805 deaths_headlines_e['previous_mean'] = deaths_headlines_e[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
806 deaths_headlines_w['previous_mean'] = deaths_headlines_w[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
807 deaths_headlines_s['previous_mean'] = deaths_headlines_s[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
808 deaths_headlines_i['previous_mean'] = deaths_headlines_i[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
809 deaths_headlines['previous_mean'] = deaths_headlines[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
810 deaths_headlines
811 ```
812
813 ```python Collapsed="false"
814 deaths_headlines[[2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
815 ```
816
817 ```python Collapsed="false"
818 deaths_headlines[[2020, 'previous_mean']].plot(figsize=(10, 8))
819 ```
820
821 ```python Collapsed="false"
822 deaths_headlines_i.plot()
823 ```
824
825 ```python
826 deaths_headlines[2020].sum() - deaths_headlines.previous_mean.sum()
827 ```
828
829 ```python Collapsed="false"
830 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
831
832 dhna = deaths_headlines.dropna()
833
834 fig = plt.figure(figsize=(10, 10))
835 ax = fig.add_subplot(111, projection="polar")
836
837 theta = np.roll(
838 np.flip(
839 np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
840 14)
841 # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
842 # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
843 # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
844 # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
845 # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
846 l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
847 l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
848 l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
849 l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
850 l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
851
852 lmean, = ax.plot(theta, dhna['previous_mean'], color="black", linestyle='dashed', label="mean")
853
854 l20, = ax.plot(theta, dhna[2020], color="red", label="2020")
855
856 # deaths_headlines.total_2019.plot(ax=ax)
857
858 def _closeline(line):
859 x, y = line.get_data()
860 x = np.concatenate((x, [x[0]]))
861 y = np.concatenate((y, [y[0]]))
862 line.set_data(x, y)
863
864 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
865
866
867 ax.set_xticks(theta)
868 ax.set_xticklabels(dhna.index)
869 plt.legend()
870 plt.title("Deaths by week over years, all UK")
871 plt.savefig('deaths-radar-2020.png')
872 plt.show()
873 ```
874
875 <!-- #region Collapsed="false" -->
876 # Plots for UK nations
877 <!-- #endregion -->
878
879 ```python Collapsed="false"
880 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
881
882 fig = plt.figure(figsize=(10, 10))
883 ax = fig.add_subplot(111, projection="polar")
884
885 theta = np.roll(
886 np.flip(
887 np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
888 14)
889 l15, = ax.plot(theta, deaths_headlines_e[2015], color="#e47d7d", label="2015") # 0
890 l16, = ax.plot(theta, deaths_headlines_e[2016], color="#afc169", label="2016") # 72 , d0e47d
891 l17, = ax.plot(theta, deaths_headlines_e[2017], color="#7de4a6", label="2017") # 144
892 l18, = ax.plot(theta, deaths_headlines_e[2018], color="#7da6e4", label="2018") # 216
893 l19, = ax.plot(theta, deaths_headlines_e[2019], color="#d07de4", label="2019") # 288
894
895 lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
896
897 l20, = ax.plot(theta, deaths_headlines_e[2020], color="red", label="2020")
898
899 # deaths_headlines.total_2019.plot(ax=ax)
900
901 def _closeline(line):
902 x, y = line.get_data()
903 x = np.concatenate((x, [x[0]]))
904 y = np.concatenate((y, [y[0]]))
905 line.set_data(x, y)
906
907 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
908
909
910 ax.set_xticks(theta)
911 ax.set_xticklabels(deaths_headlines_e.index)
912 plt.legend()
913 plt.title("Deaths by week over years, England")
914 plt.savefig('deaths-radar-2020-england.png')
915 plt.show()
916 ```
917
918 ```python Collapsed="false"
919 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
920
921 fig = plt.figure(figsize=(10, 10))
922 ax = fig.add_subplot(111, projection="polar")
923
924 theta = np.roll(
925 np.flip(
926 np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
927 14)
928 l15, = ax.plot(theta, deaths_headlines_w[2015], color="#e47d7d", label="2015") # 0
929 l16, = ax.plot(theta, deaths_headlines_w[2016], color="#afc169", label="2016") # 72 , d0e47d
930 l17, = ax.plot(theta, deaths_headlines_w[2017], color="#7de4a6", label="2017") # 144
931 l18, = ax.plot(theta, deaths_headlines_w[2018], color="#7da6e4", label="2018") # 216
932 l19, = ax.plot(theta, deaths_headlines_w[2019], color="#d07de4", label="2019") # 288
933
934 lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
935
936 l20, = ax.plot(theta, deaths_headlines_w[2020], color="red", label="2020")
937
938
939 def _closeline(line):
940 x, y = line.get_data()
941 x = np.concatenate((x, [x[0]]))
942 y = np.concatenate((y, [y[0]]))
943 line.set_data(x, y)
944
945 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
946
947
948 ax.set_xticks(theta)
949 ax.set_xticklabels(deaths_headlines_w.index)
950 plt.legend()
951 plt.title("Deaths by week over years, Wales")
952 plt.savefig('deaths-radar-2020-wales.png')
953 plt.show()
954 ```
955
956 ```python Collapsed="false"
957 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
958
959 fig = plt.figure(figsize=(10, 10))
960 ax = fig.add_subplot(111, projection="polar")
961
962 theta = np.roll(
963 np.flip(
964 np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
965 14)
966 l15, = ax.plot(theta, deaths_headlines_s[2015], color="#e47d7d", label="2015") # 0
967 l16, = ax.plot(theta, deaths_headlines_s[2016], color="#afc169", label="2016") # 72 , d0e47d
968 l17, = ax.plot(theta, deaths_headlines_s[2017], color="#7de4a6", label="2017") # 144
969 l18, = ax.plot(theta, deaths_headlines_s[2018], color="#7da6e4", label="2018") # 216
970 l19, = ax.plot(theta, deaths_headlines_s[2019], color="#d07de4", label="2019") # 288
971
972 lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
973
974 l20, = ax.plot(theta, deaths_headlines_s[2020], color="red", label="2020")
975
976
977 def _closeline(line):
978 x, y = line.get_data()
979 x = np.concatenate((x, [x[0]]))
980 y = np.concatenate((y, [y[0]]))
981 line.set_data(x, y)
982
983 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
984
985
986 ax.set_xticks(theta)
987 ax.set_xticklabels(deaths_headlines_s.index)
988 plt.legend()
989 plt.title("Deaths by week over years, Scotland")
990 plt.savefig('deaths-radar-2020-scotland.png')
991 plt.show()
992 ```
993
994 ```python Collapsed="false"
995 # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
996
997 fig = plt.figure(figsize=(10, 10))
998 ax = fig.add_subplot(111, projection="polar")
999
1000 theta = np.roll(
1001 np.flip(
1002 np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
1003 14)
1004 l15, = ax.plot(theta, deaths_headlines_i[2015], color="#e47d7d", label="2015") # 0
1005 l16, = ax.plot(theta, deaths_headlines_i[2016], color="#afc169", label="2016") # 72 , d0e47d
1006 l17, = ax.plot(theta, deaths_headlines_i[2017], color="#7de4a6", label="2017") # 144
1007 l18, = ax.plot(theta, deaths_headlines_i[2018], color="#7da6e4", label="2018") # 216
1008 l19, = ax.plot(theta, deaths_headlines_i[2019], color="#d07de4", label="2019") # 288
1009
1010 lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
1011
1012 l20, = ax.plot(theta, deaths_headlines_i[2020], color="red", label="2020")
1013
1014
1015 def _closeline(line):
1016 x, y = line.get_data()
1017 x = np.concatenate((x, [x[0]]))
1018 y = np.concatenate((y, [y[0]]))
1019 line.set_data(x, y)
1020
1021 [_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
1022
1023
1024 ax.set_xticks(theta)
1025 ax.set_xticklabels(deaths_headlines_i.index)
1026 plt.legend()
1027 plt.title("Deaths by week over years, Northern Ireland")
1028 plt.savefig('deaths-radar-2020-northern-ireland.png')
1029 plt.show()
1030 ```
1031
1032 ```python Collapsed="false"
1033
1034 ```
1035
1036 ```python Collapsed="false"
1037
1038 ```