X-Git-Url: https://git.njae.me.uk/?p=covid19.git;a=blobdiff_plain;f=uk_deaths.md;h=4dc06c22db6369527160e729d9b6c92ed59f6e2d;hp=e0077b334078753e5cb299385bf8f8a17e188e1e;hb=4abff18d7988bdea04a267a08a0792ba570fe0bd;hpb=0fa9dcb25b4a7ee4e28c28f9c350add4f61f21ee diff --git a/uk_deaths.md b/uk_deaths.md index e0077b3..4dc06c2 100644 --- a/uk_deaths.md +++ b/uk_deaths.md @@ -92,13 +92,6 @@ dh19i.columns = ['total_2019'] # dh19i.head() ``` -```python -raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek192020.csv', - parse_dates=[1], dayfirst=True, - index_col=0, - header=[0, 1]) -``` - ```python raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv', parse_dates=[1], dayfirst=True, @@ -140,15 +133,48 @@ deaths_headlines_s ``` ```python +eng_xls = pd.read_excel('uk-deaths-data/publishedweek312020.xlsx', + sheet_name="Weekly figures 2020", + skiprows=[0, 1, 2, 3], + header=0, + index_col=[1] + ).iloc[:91].T +eng_xls +``` +```python +# eng_xls_columns ``` ```python +eng_xls_columns = list(eng_xls.columns) + +for i, c in enumerate(eng_xls_columns): +# print(i, c, type(c), isinstance(c, float)) + if isinstance(c, float) and np.isnan(c): + if eng_xls.iloc[0].iloc[i] is not pd.NaT: + eng_xls_columns[i] = eng_xls.iloc[0].iloc[i] +# np.isnan(eng_xls_columns[0]) +# eng_xls_columns + +eng_xls.columns = eng_xls_columns +# eng_xls.columns ``` ```python +eng_xls['Total deaths, all ages'] +``` +```python +eng_xls['Wales'].iloc[1:] +``` + +```python +# raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv', +# parse_dates=[1], dayfirst=True, +# index_col=0, +# header=[0, 1]) ``` ```python @@ -160,7 +186,7 @@ deaths_headlines_s ``` ```python -raw_data_2020['W92000004', 'Wales'] +# raw_data_2020['W92000004', 'Wales'] ``` ```python @@ -204,14 +230,21 @@ raw_data_2015 = pd.read_csv('uk-deaths-data/publishedweek2015.csv', ``` ```python -deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy() -deaths_headlines_e.columns = ['total_2020'] -deaths_headlines_w = raw_data_2020['W92000004'].copy() -deaths_headlines_e.columns = ['total_2020'] -deaths_headlines_w.columns = ['total_2020'] -deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020 -deaths_headlines_e.head() -deaths_headlines_e +dhw = eng_xls['Wales'].iloc[1:] +dhe = eng_xls['Total deaths, all ages'].iloc[1:] - dhw +deaths_headlines_e = pd.DataFrame({'total_2020': dhe.dropna()}) +deaths_headlines_w = pd.DataFrame({'total_2020': dhw.dropna()}) +``` + +```python +# deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy() +# deaths_headlines_e.columns = ['total_2020'] +# deaths_headlines_w = raw_data_2020['W92000004'].copy() +# deaths_headlines_e.columns = ['total_2020'] +# deaths_headlines_w.columns = ['total_2020'] +# deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020 +# deaths_headlines_e.head() +# deaths_headlines_e ``` ```python @@ -220,7 +253,7 @@ dh19w = raw_data_2019['W92000004'] dh19e.columns = ['total_2019'] dh19w.columns = ['total_2019'] dh19e.total_2019 -= dh19w.total_2019 -dh19e.head() +dh19e.tail() ``` ```python @@ -288,6 +321,13 @@ deaths_headlines_s = deaths_headlines_s.loc[1:52] deaths_headlines_s ``` +# Correction for missing data + +```python +# deaths_headlines_s.loc[20, 'total_2020'] = 1000 +# deaths_headlines_s +``` + ```python deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True) deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True) @@ -322,7 +362,11 @@ deaths_headlines ``` ```python -deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(10, 8)) +deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(14, 8)) +``` + +```python +deaths_headlines[['total_2020', 'previous_mean']].plot(figsize=(10, 8)) ``` ```python @@ -376,40 +420,102 @@ plt.show() # Excess deaths calculation ```python -raw_data_2020.loc[12, 'Week ended'] +# raw_data_2020.loc[12, 'Week ended'] +``` + +```python +eng_xls.loc[12, 'Week ended'] +``` + +```python +# raw_data_2020.iloc[-1]['Week ended'] ``` ```python -raw_data_2020.iloc[-1]['Week ended'] +deaths_headlines_e.total_2020.dropna().last_valid_index() ``` ```python -raw_data_2020.loc[12].droplevel(1)['Week ended'] +eng_xls.loc[deaths_headlines_e.total_2020.dropna().last_valid_index(), 'Week ended'] ``` ```python -raw_data_2020.iloc[-1].droplevel(1)['Week ended'] +eng_xls.loc[27, 'Week ended'] +``` + +```python +# raw_data_2020.loc[12].droplevel(1)['Week ended'] +``` + +```python +# raw_data_2020.iloc[-1].droplevel(1)['Week ended'] ``` ```python (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum() ``` +```python +(deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum() +``` + ```python deaths_headlines.previous_mean.sum() ``` +```python +# excess_death_data = { +# 'start_date': str(eng_xls.loc[12, 'Week ended']), +# 'end_date': str(eng_xls.loc[deaths_headlines_e.total_2020.dropna().last_valid_index(), 'Week ended']), +# 'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum() +# } + +# with open('excess_deaths.json', 'w') as f: +# json.dump(excess_death_data, f) +``` + ```python excess_death_data = { - 'start_date': str(raw_data_2020.loc[12].droplevel(1)['Week ended']), - 'end_date': str(raw_data_2020.iloc[-1].droplevel(1)['Week ended']), - 'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum() + 'start_date': str(eng_xls.loc[12, 'Week ended']), + 'end_date': str(eng_xls.loc[27, 'Week ended']), + 'excess_deaths': (deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum() } with open('excess_deaths.json', 'w') as f: json.dump(excess_death_data, f) ``` +```python +# excess_death_data = { +# 'start_date': str(raw_data_2020.loc[12].droplevel(1)['Week ended']), +# 'end_date': str(raw_data_2020.iloc[-1].droplevel(1)['Week ended']), +# 'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum() +# } + +# with open('excess_deaths.json', 'w') as f: +# json.dump(excess_death_data, f) +``` + +```python +eng_xls['Week ended'] +``` + +```python +# raw_data_2020.droplevel(1, axis='columns')['Week ended'] +``` + +```python +deaths_by_week = deaths_headlines.merge(eng_xls['Week ended'], left_index=True, right_index=True) +deaths_by_week.rename(columns={'Week ended': 'week_ended'}, inplace=True) +deaths_by_week.to_csv('deaths_by_week.csv', header=True, index=False) +``` + +```python +# deaths_by_week = deaths_headlines.merge(raw_data_2020.droplevel(1, axis='columns')['Week ended'], left_index=True, right_index=True) +# deaths_by_week.rename(columns={'Week ended': 'week_ended'}, inplace=True) +# deaths_by_week.to_csv('deaths_by_week.csv', header=True, index=False) +``` + # Plots for UK nations ```python @@ -565,6 +671,25 @@ plt.savefig('deaths-radar_northern_ireland.png') plt.show() ``` +```python +# list(raw_data_2020.columns) +``` + +```python +# deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy() +# deaths_headlines_e.columns = ['total_2020'] +# deaths_headlines_w = raw_data_2020['W92000004'].copy() +# deaths_headlines_e.columns = ['total_2020'] +# deaths_headlines_w.columns = ['total_2020'] +# deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020 +# deaths_headlines_e.head() +# deaths_headlines_e +``` + +```python + +``` + ```python ```