X-Git-Url: https://git.njae.me.uk/?p=covid19.git;a=blobdiff_plain;f=covid.md;h=e86f0ed5e62e5037ca833da38741694937b12bcd;hp=0dcc3539f91d5996dc67b8c109c78a2af3201f69;hb=HEAD;hpb=a596078977bfdf8b1929a5e288cc70f745101f9d diff --git a/covid.md b/covid.md deleted file mode 100644 index 0dcc353..0000000 --- a/covid.md +++ /dev/null @@ -1,533 +0,0 @@ ---- -jupyter: - jupytext: - formats: ipynb,md - text_representation: - extension: .md - format_name: markdown - format_version: '1.2' - jupytext_version: 1.3.4 - kernelspec: - display_name: Python 3 - language: python - name: python3 ---- - -Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide) - -```python -import itertools -import collections -import pandas as pd -import numpy as np -from scipy.stats import gmean - -import matplotlib as mpl -import matplotlib.pyplot as plt -%matplotlib inline -``` - -```python -DEATH_COUNT_THRESHOLD = 10 -COUNTRIES_CORE = 'IT DE UK ES IE FR'.split() -COUNTRIES_NORDIC = 'SE NO DK FI UK'.split() -COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split() -COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS)) -COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC)) -``` - -```python -!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv -``` - -```python -# First col is a date, treat geoId of NA as 'Namibia', not "NA" value -raw_data = pd.read_csv('covid.csv', parse_dates=[0], keep_default_na=False, dayfirst=True) -``` - -```python -raw_data.size -``` - -```python -raw_data.head() -``` - -```python -raw_data.dtypes -``` - -```python -base_data = raw_data.set_index(['geoId', 'dateRep']) -base_data.sort_index(inplace=True) -base_data -``` - -```python -base_data.loc['UK'] -``` - -```python -base_data.loc['UK', '2020-04-17'] -``` - -```python -countries = raw_data[['geoId', 'countriesAndTerritories', 'popData2018']] -countries = countries[countries['popData2018'] != ''] -countries = countries.drop_duplicates() -countries.set_index('geoId', inplace=True) -countries = countries.astype({'popData2018': 'int64'}) -countries.head() -``` - -```python -countries.shape -``` - -```python -countries[countries.countriesAndTerritories == 'Finland'] -``` - -```python -countries.loc[COUNTRIES_OF_INTEREST] -``` - -```python -data_by_date = base_data[['cases', 'deaths']] -data_by_date.head() -``` - -```python -data_by_date.loc['UK'] -``` - -```python -data_by_date.groupby(level=0).cumsum() -``` - -```python -data_by_date = data_by_date.merge( - data_by_date.groupby(level=0).cumsum(), - suffixes=('', '_culm'), - left_index=True, right_index=True) -data_by_date -``` - -```python -data_by_date = data_by_date.merge( - data_by_date[['cases', 'deaths']].groupby(level=0).diff(), - suffixes=('', '_diff'), - left_index=True, right_index=True) -data_by_date -``` - -```python -data_by_date.loc['UK', '2020-04-17'] -``` - -```python -data_by_date.loc['UK'] -``` - -```python -# data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD] -``` - -```python -# days_since_threshold = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].groupby(level=0).cumcount() -# days_since_threshold.rename('since_threshold', inplace=True) -``` - -```python -dbd = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].reset_index(level=1) -dbd['since_threshold'] = dbd.dateRep -dbd.set_index('dateRep', append=True, inplace=True) -dbd.sort_index(inplace=True) -days_since_threshold = dbd.groupby(level=0).diff().since_threshold.dt.days.fillna(0).astype(int).groupby(level=0).cumsum() -# days_since_threshold.groupby(level=0).cumsum() - -# days_since_threshold = dbd.rename('since_threshold') -days_since_threshold -``` - -```python -# days_since_threshold = (data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD] -# .reset_index(level=1).groupby(level=0) -# .diff().dateRep.dt.days -# .groupby(level=0).cumcount() -# ) -# days_since_threshold.rename('since_threshold', inplace=True) -# days_since_threshold -``` - -```python -data_since_threshold = data_by_date.merge(days_since_threshold, - left_index=True, right_index=True) -data_since_threshold -``` - -```python -data_since_threshold = data_since_threshold.set_index('since_threshold', append=True - ).reorder_levels(['since_threshold', 'geoId', 'dateRep'] - ).reset_index('dateRep') -data_since_threshold -``` - -```python -data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :] -``` - -```python -data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), ['deaths_culm']].unstack().plot(logy=True) -``` - -```python -# deaths = data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True) -``` - -```python -deaths = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True) -``` - -```python -data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId']) -``` - -```python -data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId']).sort_index(inplace=True) -``` - -```python -data_since_threshold_per_capita = data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId']) -data_since_threshold_per_capita['cases_culm_pc'] = data_since_threshold_per_capita.cases_culm / data_since_threshold_per_capita.popData2018 -data_since_threshold_per_capita['deaths_culm_pc'] = data_since_threshold_per_capita.deaths_culm / data_since_threshold_per_capita.popData2018 -data_since_threshold_per_capita -``` - -```python -deaths_pc = data_since_threshold_per_capita.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm_pc']].unstack().sort_index().xs('deaths_culm_pc', axis=1, drop_level=True) -``` - -```python -deaths_pc -``` - -```python -deaths_pc.index -``` - -```python -deaths_pc = data_since_threshold_per_capita.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True) -``` - -```python -deaths[COUNTRIES_CORE].plot() -``` - -```python -deaths[COUNTRIES_FRIENDS].plot() -``` - -```python -ax = deaths[COUNTRIES_CORE].plot(figsize=(10, 6), title="Total deaths, linear") -for c in COUNTRIES_CORE: - lvi = deaths[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths[c][lvi], s = c) -# plt.savefig('covid_deaths_total_linear.png') -``` - -```python -ax = deaths[COUNTRIES_NORDIC].plot(figsize=(10, 6), title="Total deaths, linear") -for c in COUNTRIES_NORDIC: - lvi = deaths[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths[c][lvi], s = c) -# plt.savefig('covid_deaths_total_linear.png') -``` - -```python -ax = deaths[COUNTRIES_OF_INTEREST].plot(figsize=(10, 6), title="Total deaths, linear") -for c in COUNTRIES_OF_INTEREST: - lvi = deaths[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths[c][lvi], s = c) -plt.savefig('covid_deaths_total_linear.png') -``` - -```python -ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), title="Total deaths, log") -for c in COUNTRIES_CORE: - lvi = deaths[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths[c][lvi], s = c) - -plt.savefig('covid_deaths_total_log.png') -``` - -```python -ylim = (5*10**3, 5*10**4) -ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), ylim=ylim, title="Total deaths, log") -for c in COUNTRIES_CORE: - lvi = deaths[c].last_valid_index() - if ylim[0] < deaths[c][lvi] < ylim[1]: - ax.text(x = lvi + 1, y = deaths[c][lvi], s = c) - -# plt.savefig('covid_deaths_total_log.png') -``` - -```python -ax = deaths[COUNTRIES_FRIENDS].plot(logy=True, figsize=(10, 6), title="Total deaths, log") -for c in COUNTRIES_FRIENDS: - lvi = deaths[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths[c][lvi], s = c) - -# plt.savefig('covid_deaths_total_log.png') -``` - -```python -ax = deaths[COUNTRIES_NORDIC].plot(logy=True, figsize=(10, 6), title="Total deaths, log") -for c in COUNTRIES_NORDIC: - lvi = deaths[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths[c][lvi], s = c) - -# plt.savefig('covid_deaths_total_log.png') -``` - -```python -ax = deaths[COUNTRIES_OF_INTEREST].plot(logy=True, figsize=(10, 6), title="Total deaths, log") -for c in COUNTRIES_OF_INTEREST: - lvi = deaths[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths[c][lvi], s = c) - -plt.savefig('covid_deaths_total_log.png') -``` - -```python -deaths_pc.plot(figsize=(10, 6), title="Deaths per capita, linear") -plt.savefig('covid_deaths_per_capita_linear.png') -``` - -```python -ax = deaths_pc.plot(logy=True, figsize=(10, 6), title="Deaths per capita, log") -for c in deaths_pc.columns: - lvi = deaths_pc[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths_pc[c][lvi], s = c) -``` - -```python -deaths_pc[['UK', 'IE']].plot( figsize=(10, 6), title="Deaths per capita, linear") -``` - -```python -deaths_pc[['UK', 'IE']].plot(logy=True, figsize=(10, 6), title="Deaths per capita, log") -``` - -```python -deaths[['UK', 'ES', 'IT']].plot(logy=True, figsize=(10, 6), title="Deaths, log") -plt.savefig('covid_deaths_selected_log.png') -``` - -```python -deaths[['UK', 'ES', 'IT', 'MX']].plot(logy=True, figsize=(10, 6), title="Deaths, log") -``` - -```python -data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :] -``` - -```python -data_since_threshold['deaths_m4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).mean()) -data_since_threshold['deaths_m7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).mean()) -# data_since_threshold['deaths_diff_m4'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(4, 1).mean()) -# data_since_threshold['deaths_diff_m7'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(7, 1).mean()) -data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :] -``` - -```python -deaths_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m4']] - .unstack().sort_index().xs('deaths_m4', axis=1, drop_level=True)) -``` - -```python -deaths_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m7']] - .unstack().sort_index().xs('deaths_m7', axis=1, drop_level=True)) -``` - -```python -ax = deaths_m4.plot(figsize=(10, 6), title="Deaths per day, 4 day moving average") -for c in deaths_m4.columns: - lvi = deaths_m4[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c) -plt.savefig('covid_deaths_per_day.png') -``` - -```python -ax = deaths_m4[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average") -for c in COUNTRIES_CORE: - lvi = deaths_m4[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c) -plt.savefig('covid_deaths_per_day-core.png') -``` - -```python -ax = deaths_m4[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average") -for c in COUNTRIES_FRIENDS: - lvi = deaths_m4[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c) -plt.savefig('covid_deaths_per_day-friends.png') -``` - -```python -C7s = 'ES FR IT UK'.split() -ax = deaths_m7[C7s].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average") -for c in C7s: - lvi = deaths_m7[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c) -# plt.savefig('covid_deaths_per_day-friends.png') -``` - -```python -ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average") -for c in COUNTRIES_CORE: - lvi = deaths_m7[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c) -plt.savefig('covid_deaths_per_day_7.png') -``` - -```python -ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average") -for c in COUNTRIES_FRIENDS: - lvi = deaths_m7[c].last_valid_index() - ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c) -plt.savefig('covid_deaths_per_day_friends_7.png') -``` - -```python -def gmean_scale(items): - return gmean(items) / items[-1] -``` - -```python -def doubling_time(df): - return np.log(2) / np.log((df.deaths_culm + df.deaths_g4) / df.deaths_culm) - -def doubling_time_7(df): - return np.log(2) / np.log((df.deaths_culm + df.deaths_g7) / df.deaths_culm) -``` - -```python -# data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean_scale, raw=True)) -# data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :] -``` - -```python -data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True)) -data_since_threshold['deaths_g7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).apply(gmean, raw=True)) -data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :] -``` - -```python -data_since_threshold['doubling_time'] = data_since_threshold.groupby(level=1).apply(doubling_time).reset_index(level=0, drop=True) -data_since_threshold['doubling_time_7'] = data_since_threshold.groupby(level=1).apply(doubling_time_7).reset_index(level=0, drop=True) -# data_since_threshold.loc[(slice(None), 'UK'), :] -``` - -```python -doubling_times = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time']] - .unstack().sort_index().xs('doubling_time', axis=1, drop_level=True)) -doubling_times.replace([np.inf, -np.inf], np.nan, inplace=True) -``` - -```python -doubling_times_7 = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time_7']] - .unstack().sort_index().xs('doubling_time_7', axis=1, drop_level=True)) -doubling_times_7.replace([np.inf, -np.inf], np.nan, inplace=True) -``` - -```python -ax = doubling_times.plot(figsize=(10, 6), title="Doubling times, 4 day average") -for c in doubling_times.columns: - lvi = doubling_times[c].last_valid_index() - ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c) -# plt.savefig('covid_deaths_per_day.png') -``` - -```python -ax = doubling_times_7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 7 day average") -for c in COUNTRIES_CORE: - lvi = doubling_times_7[c].last_valid_index() - ax.text(x = lvi + 1, y = doubling_times_7[c][lvi], s = c) -plt.savefig('covid_doubling_times_7.png') -``` - -```python -ax = doubling_times[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 4 day average") -for c in COUNTRIES_CORE: - lvi = doubling_times[c].last_valid_index() - ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c) -plt.savefig('covid_doubling_times.png') -``` - -```python -ax = doubling_times[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Doubling times") -for c in COUNTRIES_FRIENDS: - lvi = doubling_times[c].last_valid_index() - ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c) -plt.savefig('covid_doubling_times_friends.png') -``` - -```python -ax = doubling_times[C7s].plot(figsize=(10, 6), title="Doubling times") -for c in C7s: - lvi = doubling_times[c].last_valid_index() - ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c) -# plt.savefig('covid_doubling_times_friends.png') -``` - -```python -# deaths_diff_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m4']] -# .unstack().sort_index().xs('deaths_diff_m4', axis=1, drop_level=True)) -``` - -```python -# deaths_diff_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m7']] -# .unstack().sort_index().xs('deaths_diff_m7', axis=1, drop_level=True)) -``` - -```python -# deaths_diff_m7 -``` - -```python -data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[COUNTRIES_ALL]#, [doubling_time]] -``` - -```python -data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[['UK', 'DE', 'IT']]#, [doubling_time]] -``` - -```python -it_since_threshold = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['IT']), :] -it_since_threshold.index.max()[0] -``` - -```python -uk_projection = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), :] -uk_projection.index.max()[0] -``` - -```python -proj = it_since_threshold.loc[(slice(60, 77), slice(None)), ['cases', 'deaths']] -proj.index = pd.MultiIndex.from_tuples([(n, 'UK') for n, _ in proj.index], names=proj.index.names) -proj -``` - -```python -uk_projection = uk_projection.append(proj, sort=True) -uk_projection.deaths.sum() -``` - -```python -it_since_threshold.deaths.sum() -``` - -```python - -```