# dh19i.head()
```
-```python
-raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek192020.csv',
- parse_dates=[1], dayfirst=True,
- index_col=0,
- header=[0, 1])
-```
-
```python
raw_data_2020_i = pd.read_csv('uk-deaths-data/Weekly_Deaths_NI_2020.csv',
parse_dates=[1], dayfirst=True,
```
```python
+eng_xls = pd.read_excel('uk-deaths-data/publishedweek312020.xlsx',
+ sheet_name="Weekly figures 2020",
+ skiprows=[0, 1, 2, 3],
+ header=0,
+ index_col=[1]
+ ).iloc[:91].T
+eng_xls
+```
+```python
+# eng_xls_columns
```
```python
+eng_xls_columns = list(eng_xls.columns)
+
+for i, c in enumerate(eng_xls_columns):
+# print(i, c, type(c), isinstance(c, float))
+ if isinstance(c, float) and np.isnan(c):
+ if eng_xls.iloc[0].iloc[i] is not pd.NaT:
+ eng_xls_columns[i] = eng_xls.iloc[0].iloc[i]
+# np.isnan(eng_xls_columns[0])
+# eng_xls_columns
+
+eng_xls.columns = eng_xls_columns
+# eng_xls.columns
```
```python
+eng_xls['Total deaths, all ages']
+```
+```python
+eng_xls['Wales'].iloc[1:]
+```
+
+```python
+# raw_data_2020 = pd.read_csv('uk-deaths-data/publishedweek272020.csv',
+# parse_dates=[1], dayfirst=True,
+# index_col=0,
+# header=[0, 1])
```
```python
```
```python
-raw_data_2020['W92000004', 'Wales']
+# raw_data_2020['W92000004', 'Wales']
```
```python
```
```python
-deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy()
-deaths_headlines_e.columns = ['total_2020']
-deaths_headlines_w = raw_data_2020['W92000004'].copy()
-deaths_headlines_e.columns = ['total_2020']
-deaths_headlines_w.columns = ['total_2020']
-deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
-deaths_headlines_e.head()
-deaths_headlines_e
+dhw = eng_xls['Wales'].iloc[1:]
+dhe = eng_xls['Total deaths, all ages'].iloc[1:] - dhw
+deaths_headlines_e = pd.DataFrame({'total_2020': dhe.dropna()})
+deaths_headlines_w = pd.DataFrame({'total_2020': dhw.dropna()})
+```
+
+```python
+# deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy()
+# deaths_headlines_e.columns = ['total_2020']
+# deaths_headlines_w = raw_data_2020['W92000004'].copy()
+# deaths_headlines_e.columns = ['total_2020']
+# deaths_headlines_w.columns = ['total_2020']
+# deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
+# deaths_headlines_e.head()
+# deaths_headlines_e
```
```python
dh19e.columns = ['total_2019']
dh19w.columns = ['total_2019']
dh19e.total_2019 -= dh19w.total_2019
-dh19e.head()
+dh19e.tail()
```
```python
deaths_headlines_s
```
+# Correction for missing data
+
+```python
+# deaths_headlines_s.loc[20, 'total_2020'] = 1000
+# deaths_headlines_s
+```
+
```python
deaths_headlines_w = deaths_headlines_w.merge(dh19w['total_2019'], how='outer', left_index=True, right_index=True)
deaths_headlines_w = deaths_headlines_w.merge(dh18w['total_2018'], how='outer', left_index=True, right_index=True)
```
```python
-deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(10, 8))
+deaths_headlines['total_2020 total_2019 total_2018 total_2017 total_2016 total_2015'.split()].plot(figsize=(14, 8))
+```
+
+```python
+deaths_headlines[['total_2020', 'previous_mean']].plot(figsize=(10, 8))
```
```python
# Excess deaths calculation
```python
-raw_data_2020.loc[12, 'Week ended']
+# raw_data_2020.loc[12, 'Week ended']
+```
+
+```python
+eng_xls.loc[12, 'Week ended']
+```
+
+```python
+# raw_data_2020.iloc[-1]['Week ended']
```
```python
-raw_data_2020.iloc[-1]['Week ended']
+deaths_headlines_e.total_2020.dropna().last_valid_index()
```
```python
-raw_data_2020.loc[12].droplevel(1)['Week ended']
+eng_xls.loc[deaths_headlines_e.total_2020.dropna().last_valid_index(), 'Week ended']
```
```python
-raw_data_2020.iloc[-1].droplevel(1)['Week ended']
+eng_xls.loc[27, 'Week ended']
+```
+
+```python
+# raw_data_2020.loc[12].droplevel(1)['Week ended']
+```
+
+```python
+# raw_data_2020.iloc[-1].droplevel(1)['Week ended']
```
```python
(deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
```
+```python
+(deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum()
+```
+
```python
deaths_headlines.previous_mean.sum()
```
+```python
+# excess_death_data = {
+# 'start_date': str(eng_xls.loc[12, 'Week ended']),
+# 'end_date': str(eng_xls.loc[deaths_headlines_e.total_2020.dropna().last_valid_index(), 'Week ended']),
+# 'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
+# }
+
+# with open('excess_deaths.json', 'w') as f:
+# json.dump(excess_death_data, f)
+```
+
```python
excess_death_data = {
- 'start_date': str(raw_data_2020.loc[12].droplevel(1)['Week ended']),
- 'end_date': str(raw_data_2020.iloc[-1].droplevel(1)['Week ended']),
- 'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
+ 'start_date': str(eng_xls.loc[12, 'Week ended']),
+ 'end_date': str(eng_xls.loc[27, 'Week ended']),
+ 'excess_deaths': (deaths_headlines.loc[12:27].total_2020 - deaths_headlines.loc[12:27].previous_mean).sum()
}
with open('excess_deaths.json', 'w') as f:
json.dump(excess_death_data, f)
```
+```python
+# excess_death_data = {
+# 'start_date': str(raw_data_2020.loc[12].droplevel(1)['Week ended']),
+# 'end_date': str(raw_data_2020.iloc[-1].droplevel(1)['Week ended']),
+# 'excess_deaths': (deaths_headlines.loc[12:].total_2020 - deaths_headlines.loc[12:].previous_mean).sum()
+# }
+
+# with open('excess_deaths.json', 'w') as f:
+# json.dump(excess_death_data, f)
+```
+
+```python
+eng_xls['Week ended']
+```
+
+```python
+# raw_data_2020.droplevel(1, axis='columns')['Week ended']
+```
+
+```python
+deaths_by_week = deaths_headlines.merge(eng_xls['Week ended'], left_index=True, right_index=True)
+deaths_by_week.rename(columns={'Week ended': 'week_ended'}, inplace=True)
+deaths_by_week.to_csv('deaths_by_week.csv', header=True, index=False)
+```
+
+```python
+# deaths_by_week = deaths_headlines.merge(raw_data_2020.droplevel(1, axis='columns')['Week ended'], left_index=True, right_index=True)
+# deaths_by_week.rename(columns={'Week ended': 'week_ended'}, inplace=True)
+# deaths_by_week.to_csv('deaths_by_week.csv', header=True, index=False)
+```
+
# Plots for UK nations
```python
plt.show()
```
+```python
+# list(raw_data_2020.columns)
+```
+
+```python
+# deaths_headlines_e = raw_data_2020.iloc[:, [1]].copy()
+# deaths_headlines_e.columns = ['total_2020']
+# deaths_headlines_w = raw_data_2020['W92000004'].copy()
+# deaths_headlines_e.columns = ['total_2020']
+# deaths_headlines_w.columns = ['total_2020']
+# deaths_headlines_e.total_2020 -= deaths_headlines_w.total_2020
+# deaths_headlines_e.head()
+# deaths_headlines_e
+```
+
+```python
+
+```
+
```python
```