X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;ds=sidebyside;f=uk_deaths.md;fp=uk_deaths.md;h=a6baf23c12a5fdb5e4eea1a860401393245e5de0;hb=5afedd66506be7575034ae6deebcfaa7c2ced978;hp=4dc06c22db6369527160e729d9b6c92ed59f6e2d;hpb=4abff18d7988bdea04a267a08a0792ba570fe0bd;p=covid19.git diff --git a/uk_deaths.md b/uk_deaths.md index 4dc06c2..a6baf23 100644 --- a/uk_deaths.md +++ b/uk_deaths.md @@ -6,21 +6,23 @@ jupyter: extension: .md format_name: markdown format_version: '1.2' - jupytext_version: 1.3.4 + jupytext_version: 1.9.1 kernelspec: display_name: Python 3 language: python name: python3 --- + Data from: * [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday. * [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data. * [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday. + -```python +```python Collapsed="false" import itertools import collections import json @@ -33,11 +35,11 @@ import matplotlib.pyplot as plt %matplotlib inline ``` -```python -!ls uk-deaths-data +```python Collapsed="false" +england_wales_filename = 'uk-deaths-data/publishedweek532020.xlsx' ``` -```python +```python Collapsed="false" raw_data_2015 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2015.csv', parse_dates=[1, 2], dayfirst=True, index_col=0, @@ -48,7 +50,7 @@ dh15i.columns = ['total_2015'] # dh15i.head() ``` -```python +```python Collapsed="false" raw_data_2016 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2016.csv', parse_dates=[1, 2], dayfirst=True, index_col=0, @@ -59,7 +61,7 @@ dh16i.columns = ['total_2016'] # dh16i.head() ``` -```python +```python Collapsed="false" raw_data_2017 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2017.csv', parse_dates=[1, 2], dayfirst=True, index_col=0, @@ -70,7 +72,7 @@ dh17i.columns = ['total_2017'] # dh17i.head() ``` -```python +```python Collapsed="false" raw_data_2018 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2018.csv', parse_dates=[1, 2], dayfirst=True, index_col=0, @@ -81,7 +83,7 @@ dh18i.columns = ['total_2018'] # dh18i.head() ``` -```python +```python Collapsed="false" raw_data_2019 = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2019.csv', parse_dates=[1, 2], dayfirst=True, index_col=0, @@ -92,7 +94,7 @@ dh19i.columns = ['total_2019'] # dh19i.head() ``` -```python +```python Collapsed="false" raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv', parse_dates=[1], dayfirst=True, index_col=0, @@ -100,19 +102,19 @@ raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv', ) deaths_headlines_i = raw_data_2020_i.iloc[:, [1]] deaths_headlines_i.columns = ['total_2020'] -deaths_headlines_i.head() +deaths_headlines_i.tail() ``` -```python - +```python Collapsed="false" +raw_data_2019 ``` -```python +```python Collapsed="false" ``` -```python -raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-april-20-scotland.csv', +```python Collapsed="false" +raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-scotland.csv', index_col=0, header=0, skiprows=2 @@ -120,7 +122,7 @@ raw_data_s = pd.read_csv('uk-deaths-data/weekly-deaths-april-20-scotland.csv', # raw_data_s ``` -```python +```python Collapsed="false" deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())] deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns] deaths_headlines_s.reset_index(drop=True, inplace=True) @@ -128,12 +130,12 @@ deaths_headlines_s.index = deaths_headlines_s.index + 1 deaths_headlines_s ``` -```python +```python Collapsed="false" ``` -```python -eng_xls = pd.read_excel('uk-deaths-data/publishedweek312020.xlsx', +```python Collapsed="false" +eng_xls = pd.read_excel(england_wales_filename, sheet_name="Weekly figures 2020", skiprows=[0, 1, 2, 3], header=0, @@ -142,11 +144,11 @@ eng_xls = pd.read_excel('uk-deaths-data/publishedweek312020.xlsx', eng_xls ``` -```python +```python Collapsed="false" # eng_xls_columns ``` -```python +```python Collapsed="false" eng_xls_columns = list(eng_xls.columns) for i, c in enumerate(eng_xls_columns): @@ -162,34 +164,34 @@ eng_xls.columns = eng_xls_columns # eng_xls.columns ``` -```python +```python Collapsed="false" eng_xls['Total deaths, all ages'] ``` -```python +```python Collapsed="false" eng_xls['Wales'].iloc[1:] ``` -```python +```python Collapsed="false" # raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv', # parse_dates=[1], dayfirst=True, # index_col=0, # header=[0, 1]) ``` -```python +```python Collapsed="false" ``` -```python +```python Collapsed="false" # raw_data_2020.head() ``` -```python +```python Collapsed="false" # raw_data_2020['W92000004', 'Wales'] ``` -```python +```python Collapsed="false" raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv', parse_dates=[1], dayfirst=True, index_col=0, @@ -197,7 +199,7 @@ raw_data_2019 = pd.read_csv('uk-deaths-data/publishedweek522019.csv', # raw_data_2019.head() ``` -```python +```python Collapsed="false" raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv', parse_dates=[1], dayfirst=True, index_col=0, @@ -205,7 +207,7 @@ raw_data_2018 = pd.read_csv('uk-deaths-data/publishedweek522018.csv', # raw_data_2018.head() ``` -```python +```python Collapsed="false" raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv', parse_dates=[1], dayfirst=True, index_col=0, @@ -213,7 +215,7 @@ raw_data_2017 = pd.read_csv('uk-deaths-data/publishedweek522017.csv', # raw_data_2017.head() ``` -```python +```python Collapsed="false" raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv', parse_dates=[1], dayfirst=True, index_col=0, @@ -221,7 +223,7 @@ raw_data_2016 = pd.read_csv('uk-deaths-data/publishedweek522016.csv', # raw_data_2016.head() ``` -```python +```python Collapsed="false" raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv', parse_dates=[1], dayfirst=True, index_col=0, @@ -229,14 +231,14 @@ raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv', # raw_data_2015.head() ``` -```python +```python Collapsed="false" dhw = eng_xls['Wales'].iloc[1:] dhe = eng_xls['Total deaths, all ages'].iloc[1:] - dhw deaths_headlines_e = pd.DataFrame({'total_2020': dhe.dropna()}) deaths_headlines_w = pd.DataFrame({'total_2020': dhw.dropna()}) ``` -```python +```python Collapsed="false" # deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy() # deaths_headlines_e.columns = ['total_2020'] # deaths_headlines_w = raw_data_2020['W92000004'].copy() @@ -247,7 +249,7 @@ deaths_headlines_w = pd.DataFrame({'total_2020': dhw.dropna()}) # deaths_headlines_e ``` -```python +```python Collapsed="false" dh19e = raw_data_2019.iloc[:, [1]] dh19w = raw_data_2019['W92000004'] dh19e.columns = ['total_2019'] @@ -256,11 +258,11 @@ dh19e.total_2019 -= dh19w.total_2019 dh19e.tail() ``` -```python +```python Collapsed="false" dh19w.head() ``` -```python +```python Collapsed="false" dh18e = raw_data_2018.iloc[:, [1]] dh18w = raw_data_2018['W92000004'] dh18e.columns = ['total_2018'] @@ -269,7 +271,7 @@ dh18e.total_2018 -= dh18w.total_2018 # dh18e.head() ``` -```python +```python Collapsed="false" dh17e = raw_data_2017.iloc[:, [1]] dh17w = raw_data_2017['W92000004'] dh17e.columns = ['total_2017'] @@ -278,7 +280,7 @@ dh17e.total_2017 -= dh17w.total_2017 # dh17e.head() ``` -```python +```python Collapsed="false" dh16e = raw_data_2016.iloc[:, [1]] dh16w = raw_data_2016['W92000004'] dh16e.columns = ['total_2016'] @@ -287,7 +289,7 @@ dh16e.total_2016 -= dh16w.total_2016 # dh16e.head() ``` -```python +```python Collapsed="false" dh15e = raw_data_2015.iloc[:, [1]] dh15w = raw_data_2015['W92000004'] dh15e.columns = ['total_2015'] @@ -296,13 +298,13 @@ dh15e.total_2015 -= dh15w.total_2015 # dh15e.head() ``` -```python +```python Collapsed="false" # dh18 = raw_data_2018.iloc[:, [1, 2]] # dh18.columns = ['total_2018', 'total_previous'] # # dh18.head() ``` -```python +```python Collapsed="false" deaths_headlines_e = deaths_headlines_e.merge(dh19e['total_2019'], how='outer', left_index=True, right_index=True) deaths_headlines_e = deaths_headlines_e.merge(dh18e['total_2018'], how='outer', left_index=True, right_index=True) deaths_headlines_e = deaths_headlines_e.merge(dh17e['total_2017'], how='outer', left_index=True, right_index=True) @@ -312,7 +314,7 @@ deaths_headlines_e = deaths_headlines_e.merge(dh15e['total_2015'], how='left', l deaths_headlines_e ``` -```python +```python Collapsed="false" deaths_headlines_s = raw_data_s[reversed('2015 2016 2017 2018 2019 2020'.split())] deaths_headlines_s.columns = ['total_' + c for c in deaths_headlines_s.columns] deaths_headlines_s.reset_index(drop=True, inplace=True) @@ -321,14 +323,16 @@ deaths_headlines_s = deaths_headlines_s.loc[1:52] deaths_headlines_s ``` + # Correction for missing data + -```python +```python Collapsed="false" # deaths_headlines_s.loc[20, 'total_2020'] = 1000 # deaths_headlines_s ``` -```python +```python Collapsed="false" deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True) deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True) deaths_headlines_w = deaths_headlines_w.merge(dh17w['total_2017'], how='outer', left_index=True, right_index=True) @@ -338,7 +342,7 @@ deaths_headlines_w = deaths_headlines_w.merge(dh15w['total_2015'], how='left', l deaths_headlines_w ``` -```python +```python Collapsed="false" deaths_headlines_i = deaths_headlines_i.merge(dh19i['total_2019'], how='outer', left_index=True, right_index=True) deaths_headlines_i = deaths_headlines_i.merge(dh18i['total_2018'], how='outer', left_index=True, right_index=True) deaths_headlines_i = deaths_headlines_i.merge(dh17i['total_2017'], how='outer', left_index=True, right_index=True) @@ -347,12 +351,16 @@ deaths_headlines_i = deaths_headlines_i.merge(dh15i['total_2015'], how='left', l deaths_headlines_i ``` -```python +```python Collapsed="false" +deaths_headlines_s +``` + +```python Collapsed="false" deaths_headlines = deaths_headlines_e + deaths_headlines_w + deaths_headlines_i + deaths_headlines_s deaths_headlines ``` -```python +```python Collapsed="false" deaths_headlines_e['previous_mean'] = deaths_headlines_e['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1) deaths_headlines_w['previous_mean'] = deaths_headlines_w['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1) deaths_headlines_s['previous_mean'] = deaths_headlines_s['total_2019 total_2018 total_2017 total_2016 total_2015'.split()].apply(np.mean, axis=1) @@ -361,42 +369,44 @@ deaths_headlines['previous_mean'] = deaths_headlines['total_2019 total_2018 tota deaths_headlines ``` -```python +```python Collapsed="false" deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(14, 8)) ``` -```python +```python Collapsed="false" deaths_headlines[['total_2020', 'previous_mean']].plot(figsize=(10, 8)) ``` -```python +```python Collapsed="false" deaths_headlines_i.plot() ``` -```python +```python Collapsed="false" # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas# +dhna = deaths_headlines.dropna() + fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(111, projection="polar") theta = np.roll( np.flip( - np.arange(len(deaths_headlines))/float(len(deaths_headlines))*2.*np.pi), + np.arange(len(dhna))/float(len(dhna))*2.*np.pi), 14) # l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#b56363", label="2015") # 0 # l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#a4b563", label="2016") # 72 # l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#63b584", label="2017") # 144 # l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#6384b5", label="2018") # 216 # l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#a4635b", label="2019") # 288 -l15, = ax.plot(theta, deaths_headlines['total_2015'], color="#e47d7d", label="2015") # 0 -l16, = ax.plot(theta, deaths_headlines['total_2016'], color="#afc169", label="2016") # 72 , d0e47d -l17, = ax.plot(theta, deaths_headlines['total_2017'], color="#7de4a6", label="2017") # 144 -l18, = ax.plot(theta, deaths_headlines['total_2018'], color="#7da6e4", label="2018") # 216 -l19, = ax.plot(theta, deaths_headlines['total_2019'], color="#d07de4", label="2019") # 288 +l15, = ax.plot(theta, dhna['total_2015'], color="#e47d7d", label="2015") # 0 +l16, = ax.plot(theta, dhna['total_2016'], color="#afc169", label="2016") # 72 , d0e47d +l17, = ax.plot(theta, dhna['total_2017'], color="#7de4a6", label="2017") # 144 +l18, = ax.plot(theta, dhna['total_2018'], color="#7da6e4", label="2018") # 216 +l19, = ax.plot(theta, dhna['total_2019'], color="#d07de4", label="2019") # 288 -lmean, = ax.plot(theta, deaths_headlines['previous_mean'], color="black", linestyle='dashed', label="mean") +lmean, = ax.plot(theta, dhna['previous_mean'], color="black", linestyle='dashed', label="mean") -l20, = ax.plot(theta, deaths_headlines['total_2020'], color="red", label="2020") +l20, = ax.plot(theta, dhna['total_2020'], color="red", label="2020") # deaths_headlines.total_2019.plot(ax=ax) @@ -410,60 +420,62 @@ def _closeline(line): ax.set_xticks(theta) -ax.set_xticklabels(deaths_headlines.index) +ax.set_xticklabels(dhna.index) plt.legend() plt.title("Deaths by week over years, all UK") plt.savefig('deaths-radar.png') plt.show() ``` + # Excess deaths calculation + -```python +```python Collapsed="false" # raw_data_2020.loc[12, 'Week ended'] ``` -```python +```python Collapsed="false" eng_xls.loc[12, 'Week ended'] ``` -```python +```python Collapsed="false" # raw_data_2020.iloc[-1]['Week ended'] ``` -```python +```python Collapsed="false" deaths_headlines_e.total_2020.dropna().last_valid_index() ``` -```python +```python Collapsed="false" eng_xls.loc[deaths_headlines_e.total_2020.dropna().last_valid_index(), 'Week ended'] ``` -```python +```python Collapsed="false" eng_xls.loc[27, 'Week ended'] ``` -```python +```python Collapsed="false" # raw_data_2020.loc[12].droplevel(1)['Week ended'] ``` -```python +```python Collapsed="false" # raw_data_2020.iloc[-1].droplevel(1)['Week ended'] ``` -```python +```python Collapsed="false" (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum() ``` -```python +```python Collapsed="false" (deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum() ``` -```python +```python Collapsed="false" deaths_headlines.previous_mean.sum() ``` -```python +```python Collapsed="false" # excess_death_data = { # 'start_date': str(eng_xls.loc[12, 'Week ended']), # 'end_date': str(eng_xls.loc[deaths_headlines_e.total_2020.dropna().last_valid_index(), 'Week ended']), @@ -474,18 +486,18 @@ deaths_headlines.previous_mean.sum() # json.dump(excess_death_data, f) ``` -```python -excess_death_data = { - 'start_date': str(eng_xls.loc[12, 'Week ended']), - 'end_date': str(eng_xls.loc[27, 'Week ended']), - 'excess_deaths': (deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum() -} +```python Collapsed="false" +# excess_death_data = { +# 'start_date': str(eng_xls.loc[12, 'Week ended']), +# 'end_date': str(eng_xls.loc[27, 'Week ended']), +# 'excess_deaths': (deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum() +# } -with open('excess_deaths.json', 'w') as f: - json.dump(excess_death_data, f) +# with open('excess_deaths.json', 'w') as f: +# json.dump(excess_death_data, f) ``` -```python +```python Collapsed="false" # excess_death_data = { # 'start_date': str(raw_data_2020.loc[12].droplevel(1)['Week ended']), # 'end_date': str(raw_data_2020.iloc[-1].droplevel(1)['Week ended']), @@ -496,29 +508,31 @@ with open('excess_deaths.json', 'w') as f: # json.dump(excess_death_data, f) ``` -```python +```python Collapsed="false" eng_xls['Week ended'] ``` -```python +```python Collapsed="false" # raw_data_2020.droplevel(1, axis='columns')['Week ended'] ``` -```python +```python Collapsed="false" deaths_by_week = deaths_headlines.merge(eng_xls['Week ended'], left_index=True, right_index=True) deaths_by_week.rename(columns={'Week ended': 'week_ended'}, inplace=True) deaths_by_week.to_csv('deaths_by_week.csv', header=True, index=False) ``` -```python +```python Collapsed="false" # deaths_by_week = deaths_headlines.merge(raw_data_2020.droplevel(1, axis='columns')['Week ended'], left_index=True, right_index=True) # deaths_by_week.rename(columns={'Week ended': 'week_ended'}, inplace=True) # deaths_by_week.to_csv('deaths_by_week.csv', header=True, index=False) ``` + # Plots for UK nations + -```python +```python Collapsed="false" # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas# fig = plt.figure(figsize=(10, 10)) @@ -557,7 +571,7 @@ plt.savefig('deaths-radar_england.png') plt.show() ``` -```python +```python Collapsed="false" # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas# fig = plt.figure(figsize=(10, 10)) @@ -595,7 +609,7 @@ plt.savefig('deaths-radar_wales.png') plt.show() ``` -```python +```python Collapsed="false" # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas# fig = plt.figure(figsize=(10, 10)) @@ -633,7 +647,7 @@ plt.savefig('deaths-radar_scotland.png') plt.show() ``` -```python +```python Collapsed="false" # Radar plot code taken from example at https://stackoverflow.com/questions/42878485/getting-matplotlib-radar-plot-with-pandas# fig = plt.figure(figsize=(10, 10)) @@ -671,11 +685,11 @@ plt.savefig('deaths-radar_northern_ireland.png') plt.show() ``` -```python +```python Collapsed="false" # list(raw_data_2020.columns) ``` -```python +```python Collapsed="false" # deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy() # deaths_headlines_e.columns = ['total_2020'] # deaths_headlines_w = raw_data_2020['W92000004'].copy() @@ -686,10 +700,10 @@ plt.show() # deaths_headlines_e ``` -```python +```python Collapsed="false" ``` -```python +```python Collapsed="false" ```