Updated for imported data format
[covid19.git] / deaths_import.md
diff --git a/deaths_import.md b/deaths_import.md
deleted file mode 100644 (file)
index f704518..0000000
+++ /dev/null
@@ -1,1017 +0,0 @@
----
-jupyter:
-  jupytext:
-    formats: ipynb,md
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.2'
-      jupytext_version: 1.9.1
-  kernelspec:
-    display_name: Python 3
-    language: python
-    name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from:
-
-* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
-* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
-* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.
-
-<!-- #endregion -->
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-
-from sqlalchemy.types import Integer, Text, String, DateTime, Float
-from sqlalchemy import create_engine
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python Collapsed="false"
-conn = create_engine(connection_string)
-```
-
-```python Collapsed="false"
-england_wales_filename = 'uk-deaths-data/publishedweek532020.xlsx'
-```
-
-```sql Collapsed="false"
-drop table if exists all_causes_deaths;
-create table all_causes_deaths (
-    week integer,
-    year integer,
-    date_up_to date,
-    nation varchar(20),
-    deaths integer,
-    CONSTRAINT week_nation PRIMARY KEY(year, week, nation)
-);
-```
-
-```python Collapsed="false"
-raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv', 
-                       parse_dates=[1, 2], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1]
-                           )
-dh15i = raw_data_2015.iloc[:, [0, 3]]
-dh15i.set_index(dh15i.columns[0], inplace=True)
-dh15i.columns = ['total_2015']
-dh15i.tail()
-```
-
-```python Collapsed="false"
-raw_data_2015.head()
-```
-
-```python Collapsed="false"
-rd = raw_data_2015.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
-    columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2015P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2015
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select * from all_causes_deaths limit 10
-```
-
-```python Collapsed="false"
-raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1]
-                           )
-raw_data_2016.head()
-# dh16i = raw_data_2016.iloc[:, [2]]
-# dh16i.columns = ['total_2016']
-# # dh16i.head()
-dh16i = raw_data_2016.iloc[:, [0, 3]]
-dh16i.set_index(dh16i.columns[0], inplace=True)
-dh16i.columns = ['total_2016']
-dh16i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2016.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
-    columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2016P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2016
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1]
-                           )
-raw_data_2017.head()
-dh17i = raw_data_2017.iloc[:, [0, 3]]
-dh17i.set_index(dh17i.columns[0], inplace=True)
-dh17i.columns = ['total_2017']
-dh17i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2017.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
-    columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2017P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2017
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1]
-                           )
-raw_data_2018.head()
-dh18i = raw_data_2018.iloc[:, [0, 3]]
-dh18i.set_index(dh18i.columns[0], inplace=True)
-dh18i.columns = ['total_2018']
-dh18i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2018.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
-    columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2018P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2018
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv', 
-                        parse_dates=[1, 2], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1]
-                           )
-raw_data_2019.head()
-dh19i = raw_data_2019.iloc[:, [0, 3]]
-dh19i.set_index(dh19i.columns[0], inplace=True)
-dh19i.columns = ['total_2019']
-dh19i.tail()
-```
-
-```python Collapsed="false"
-rd = raw_data_2019.iloc[:, [0, 2, 3]].droplevel(1, axis=1).rename(
-    columns={'Week Ends (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2019P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2019
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation)
-```
-
-```python Collapsed="false"
-raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv', 
-                        parse_dates=[1], dayfirst=True,
-                      header=[0, 1]
-                           )
-raw_data_2020_i.head()
-```
-
-```python Collapsed="false"
-rd = raw_data_2020_i.iloc[:, [0, 1, 2]].droplevel(1, axis=1).rename(
-    columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2020P)': 'deaths',
-            'Registration Week': 'week'}
-    )
-rd['year'] = 2020
-rd['nation'] = 'Northern Ireland'
-rd.head()
-```
-
-```python Collapsed="false"
-rd.tail()
-```
-
-```python Collapsed="false"
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python
-raw_data_2020_i.set_index(raw_data_2020_i.columns[0], inplace=True)
-raw_data_2020_i.tail()
-```
-
-```python Collapsed="false"
-datetime.datetime.now().isocalendar()
-```
-
-```python Collapsed="false"
-datetime.datetime.fromisocalendar(2021, 3, 1)
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-scotland.csv', 
-                      index_col=0,
-                      header=0,
-                        skiprows=2
-                           )
-# raw_data_s
-```
-
-```python Collapsed="false"
-deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())]
-deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns]
-deaths_headlines_s.reset_index(drop=True, inplace=True)
-deaths_headlines_s.index = deaths_headlines_s.index + 1
-deaths_headlines_s
-```
-
-```python
-%sql select * from all_causes_deaths limit 5
-```
-
-```python Collapsed="false"
-for year, ser in deaths_headlines_s.items():
-    year_i = int(year[-4:])
-#     print(year_i)
-    for week, deaths in ser.dropna().iteritems():
-#         print(datetime.date.fromisocalendar(year_i, week, 7), deaths)
-        dut = datetime.date.fromisocalendar(year_i, week, 7)
-        %sql insert into all_causes_deaths(week, year, date_up_to, nation, deaths) values ({week}, {year_i}, :dut, 'Scotland', {deaths})
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python
-%sql select year, nation, date_up_to from all_causes_deaths where week=3 order by year, nation
-```
-
-```python Collapsed="false"
-eng_xls = pd.read_excel(england_wales_filename, 
-                        sheet_name="Weekly figures 2020",
-                        skiprows=[0, 1, 2, 3],
-                        header=0,
-                        index_col=[1]
-                       ).iloc[:91].T
-eng_xls
-```
-
-```python Collapsed="false"
-# eng_xls_columns
-```
-
-```python Collapsed="false"
-eng_xls_columns = list(eng_xls.columns)
-
-for i, c in enumerate(eng_xls_columns):
-#     print(i, c, type(c), isinstance(c, float))
-    if isinstance(c, float) and np.isnan(c):
-        if eng_xls.iloc[0].iloc[i] is not pd.NaT:
-            eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
-
-# np.isnan(eng_xls_columns[0])
-# eng_xls_columns
-
-eng_xls.columns = eng_xls_columns
-# eng_xls.columns
-```
-
-```python
-eng_xls
-```
-
-```python
-rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename(
-    columns={'Week ended': 'date_up_to', 'Wales': 'deaths',
-            'index': 'week'}
-    )
-rd['year'] = 2020
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python Collapsed="false"
-eng_xls = eng_xls.iloc[1:]
-eng_xls['England deaths'] = eng_xls.loc[:, 'Total deaths, all ages'] - eng_xls.loc[:, 'Wales']
-```
-
-```python
-eng_xls.head()
-```
-
-```python
-rd = eng_xls[['Week ended', 'England deaths']].reset_index(level=0).rename(
-    columns={'Week ended': 'date_up_to', 'England deaths': 'deaths',
-            'index': 'week'}
-    )
-rd['year'] = 2020
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-%sql delete from all_causes_deaths where nation = 'England'
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd.tail()
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-# raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv', 
-#                        parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-#                       header=[0, 1])
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-# raw_data_2020.head()
-```
-
-```python Collapsed="false"
-# raw_data_2020['W92000004', 'Wales']
-```
-
-```python Collapsed="false"
-raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv', 
-                       parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1])
-# raw_data_2019.head()
-```
-
-```python
-rdew = raw_data_2019.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
-    columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
-            'Week number': 'week'}
-    )
-rd['year'] = 2019
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
-    columns={'Week ended': 'date_up_to', 'Week number': 'week'}
-    )
-rd['year'] = 2019
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv', 
-                       parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1])
-# raw_data_2018.head()
-```
-
-```python
-rdew = raw_data_2018.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
-    columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
-            'Week number': 'week'}
-    )
-rd['year'] = 2018
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
-    columns={'Week ended': 'date_up_to', 'Week number': 'week'}
-    )
-rd['year'] = 2018
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv', 
-                       parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1])
-# raw_data_2017.head()
-```
-
-```python
-rdew = raw_data_2017.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
-    columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
-            'Week number': 'week'}
-    )
-rd['year'] = 2017
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
-    columns={'Week ended': 'date_up_to', 'Week number': 'week'}
-    )
-rd['year'] = 2017
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python
-
-```
-
-```python Collapsed="false"
-raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv', 
-                       parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1])
-# raw_data_2016.head()
-```
-
-```python
-raw_data_2016.head()
-```
-
-```python
-rdew = raw_data_2016.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
-    columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
-            'Week number': 'week'}
-    )
-rd['year'] = 2016
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
-    columns={'Week ended': 'date_up_to', 'Week number': 'week'}
-    )
-rd['year'] = 2016
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
- %sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by nation, year
-```
-
-```python Collapsed="false"
-raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv', 
-                       parse_dates=[1], dayfirst=True,
-#                       index_col=0,
-                      header=[0, 1])
-# raw_data_2015.head()
-```
-
-```python
-rdew = raw_data_2015.iloc[:, [0, 1, 2, -1]].droplevel(axis=1, level=1)
-rdew.head()
-```
-
-```python
-rd = rdew.drop(columns=['Total deaths, all ages']).rename(
-    columns={'Week ended': 'date_up_to', 'W92000004': 'deaths',
-            'Week number': 'week'}
-    )
-rd['year'] = 2015
-rd['nation'] = 'Wales'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-rd = rdew.loc[:, ['Week ended','Week number']]
-rd['deaths'] = rdew['Total deaths, all ages'] - rdew['W92000004']
-rd = rd.rename(
-    columns={'Week ended': 'date_up_to', 'Week number': 'week'}
-    )
-rd['year'] = 2015
-rd['nation'] = 'England'
-rd.head()
-```
-
-```python
-rd.to_sql(
-    'all_causes_deaths',
-    conn,
-    if_exists='append',
-    index=False)
-```
-
-```python
-%sql select year, nation, count(date_up_to) from all_causes_deaths group by (year, nation) order by year, nation
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'England'
-```
-
-```python
-deaths_headlines_e = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_e
-```
-
-```python
-
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'Scotland'
-```
-
-```python
-deaths_headlines_s = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_s
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'Wales'
-```
-
-```python
-deaths_headlines_w = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_w
-```
-
-```sql magic_args="res << select week, year, deaths"
-from all_causes_deaths
-where nation = 'Northern Ireland'
-```
-
-```python
-deaths_headlines_i = res.DataFrame().pivot(index='week', columns='year', values='deaths')
-deaths_headlines_i
-```
-
-```python Collapsed="false"
-deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s
-deaths_headlines
-```
-
-```python
-deaths_headlines_e.columns
-```
-
-```python
-deaths_headlines_e['previous_mean'] = deaths_headlines_e[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines_w['previous_mean'] = deaths_headlines_w[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines_s['previous_mean'] = deaths_headlines_s[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines_i['previous_mean'] = deaths_headlines_i[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines['previous_mean'] = deaths_headlines[[int(y) for y in '2019 2018 2017 2016 2015'.split()]].apply(np.mean, axis=1)
-deaths_headlines
-```
-
-```python Collapsed="false"
-deaths_headlines[[2020, 2019, 2018, 2017, 2016, 2015]].plot(figsize=(14, 8))
-```
-
-```python Collapsed="false"
-deaths_headlines[[2020, 'previous_mean']].plot(figsize=(10, 8))
-```
-
-```python Collapsed="false"
-deaths_headlines_i.plot()
-```
-
-```python
-deaths_headlines[2020].sum() - deaths_headlines.previous_mean.sum()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-dhna = deaths_headlines.dropna()
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(dhna))/float(len(dhna))*2.*np.pi),
-    14)
-# l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0
-# l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72
-# l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144
-# l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216
-# l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288
-l15, = ax.plot(theta, dhna[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, dhna[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, dhna[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, dhna[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, dhna[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, dhna['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, dhna[2020], color="red", label="2020")
-
-# deaths_headlines.total_2019.plot(ax=ax)
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(dhna.index)
-plt.legend()
-plt.title("Deaths by week over years, all UK")
-plt.savefig('deaths-radar.png')
-plt.show()
-```
-
-<!-- #region Collapsed="false" -->
-# Plots for UK nations
-<!-- #endregion -->
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_e))/float(len(deaths_headlines_e))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_e[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_e[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_e[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_e[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_e[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_e['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_e[2020], color="red", label="2020")
-
-# deaths_headlines.total_2019.plot(ax=ax)
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_e.index)
-plt.legend()
-plt.title("Deaths by week over years, England")
-plt.savefig('deaths-radar_england.png')
-plt.show()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_w))/float(len(deaths_headlines_w))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_w[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_w[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_w[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_w[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_w[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_w['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_w[2020], color="red", label="2020")
-
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_w.index)
-plt.legend()
-plt.title("Deaths by week over years, Wales")
-plt.savefig('deaths-radar_wales.png')
-plt.show()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_s))/float(len(deaths_headlines_s))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_s[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_s[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_s[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_s[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_s[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_s['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_s[2020], color="red", label="2020")
-
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_s.index)
-plt.legend()
-plt.title("Deaths by week over years, Scotland")
-plt.savefig('deaths-radar_scotland.png')
-plt.show()
-```
-
-```python Collapsed="false"
-# Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas#
-
-fig = plt.figure(figsize=(10, 10))
-ax = fig.add_subplot(111, projection="polar")
-
-theta = np.roll(
-    np.flip(
-        np.arange(len(deaths_headlines_i))/float(len(deaths_headlines_i))*2.*np.pi),
-    14)
-l15, = ax.plot(theta, deaths_headlines_i[2015], color="#e47d7d", label="2015") # 0
-l16, = ax.plot(theta, deaths_headlines_i[2016], color="#afc169", label="2016") # 72 , d0e47d
-l17, = ax.plot(theta, deaths_headlines_i[2017], color="#7de4a6", label="2017") # 144
-l18, = ax.plot(theta, deaths_headlines_i[2018], color="#7da6e4", label="2018") # 216
-l19, = ax.plot(theta, deaths_headlines_i[2019], color="#d07de4", label="2019") # 288
-
-lmean, = ax.plot(theta, deaths_headlines_i['previous_mean'], color="black", linestyle='dashed', label="mean")
-
-l20, = ax.plot(theta, deaths_headlines_i[2020], color="red", label="2020")
-
-
-def _closeline(line):
-    x, y = line.get_data()
-    x = np.concatenate((x, [x[0]]))
-    y = np.concatenate((y, [y[0]]))
-    line.set_data(x, y)
-
-[_closeline(l) for l in [l19, l18, l17, l16, l15, lmean]]
-
-
-ax.set_xticks(theta)
-ax.set_xticklabels(deaths_headlines_i.index)
-plt.legend()
-plt.title("Deaths by week over years, Northern Ireland")
-plt.savefig('deaths-radar_northern_ireland.png')
-plt.show()
-```
-
-```python Collapsed="false"
-
-```
-
-```python Collapsed="false"
-
-```