9 jupytext_version: 1.3.4
11 display_name: Python 3
16 Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
23 from scipy.stats import gmean
25 import matplotlib as mpl
26 import matplotlib.pyplot as plt
31 DEATH_COUNT_THRESHOLD = 10
32 COUNTRIES_CORE = 'IT DE UK ES IE FR'.split()
33 COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
34 COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
35 COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
36 COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC))
40 !curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
44 # First col is a date, treat geoId of NA as 'Namibia', not "NA" value
45 raw_data = pd.read_csv('covid.csv', parse_dates=[0], keep_default_na=False, dayfirst=True)
61 base_data = raw_data.set_index(['geoId', 'dateRep'])
62 base_data.sort_index(inplace=True)
71 base_data.loc['UK', '2020-04-17']
75 countries = raw_data[['geoId', 'countriesAndTerritories', 'popData2018']]
76 countries = countries[countries['popData2018'] != '']
77 countries = countries.drop_duplicates()
78 countries.set_index('geoId', inplace=True)
79 countries = countries.astype({'popData2018': 'int64'})
88 countries[countries.countriesAndTerritories == 'Finland']
92 countries.loc[COUNTRIES_OF_INTEREST]
96 data_by_date = base_data[['cases', 'deaths']]
101 data_by_date.loc['UK']
105 data_by_date.groupby(level=0).cumsum()
109 data_by_date = data_by_date.merge(
110 data_by_date.groupby(level=0).cumsum(),
111 suffixes=('', '_culm'),
112 left_index=True, right_index=True)
117 data_by_date = data_by_date.merge(
118 data_by_date[['cases', 'deaths']].groupby(level=0).diff(),
119 suffixes=('', '_diff'),
120 left_index=True, right_index=True)
125 data_by_date.loc['UK', '2020-04-17']
129 data_by_date.loc['UK']
133 # data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
137 # days_since_threshold = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].groupby(level=0).cumcount()
138 # days_since_threshold.rename('since_threshold', inplace=True)
142 dbd = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].reset_index(level=1)
143 dbd['since_threshold'] = dbd.dateRep
144 dbd.set_index('dateRep', append=True, inplace=True)
145 dbd.sort_index(inplace=True)
146 days_since_threshold = dbd.groupby(level=0).diff().since_threshold.dt.days.fillna(0).astype(int).groupby(level=0).cumsum()
147 # days_since_threshold.groupby(level=0).cumsum()
149 # days_since_threshold = dbd.rename('since_threshold')
154 # days_since_threshold = (data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
155 # .reset_index(level=1).groupby(level=0)
156 # .diff().dateRep.dt.days
157 # .groupby(level=0).cumcount()
159 # days_since_threshold.rename('since_threshold', inplace=True)
160 # days_since_threshold
164 data_since_threshold = data_by_date.merge(days_since_threshold,
165 left_index=True, right_index=True)
170 data_since_threshold = data_since_threshold.set_index('since_threshold', append=True
171 ).reorder_levels(['since_threshold', 'geoId', 'dateRep']
172 ).reset_index('dateRep')
177 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
181 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), ['deaths_culm']].unstack().plot(logy=True)
185 # deaths = data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
189 deaths = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True)
193 data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
197 data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId']).sort_index(inplace=True)
201 data_since_threshold_per_capita = data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
202 data_since_threshold_per_capita['cases_culm_pc'] = data_since_threshold_per_capita.cases_culm / data_since_threshold_per_capita.popData2018
203 data_since_threshold_per_capita['deaths_culm_pc'] = data_since_threshold_per_capita.deaths_culm / data_since_threshold_per_capita.popData2018
204 data_since_threshold_per_capita
208 deaths_pc = data_since_threshold_per_capita.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm_pc']].unstack().sort_index().xs('deaths_culm_pc', axis=1, drop_level=True)
220 deaths_pc = data_since_threshold_per_capita.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)
224 deaths[COUNTRIES_CORE].plot()
228 deaths[COUNTRIES_FRIENDS].plot()
232 ax = deaths[COUNTRIES_CORE].plot(figsize=(10, 6), title="Total deaths, linear")
233 for c in COUNTRIES_CORE:
234 lvi = deaths[c].last_valid_index()
235 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
236 # plt.savefig('covid_deaths_total_linear.png')
240 ax = deaths[COUNTRIES_NORDIC].plot(figsize=(10, 6), title="Total deaths, linear")
241 for c in COUNTRIES_NORDIC:
242 lvi = deaths[c].last_valid_index()
243 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
244 # plt.savefig('covid_deaths_total_linear.png')
248 ax = deaths[COUNTRIES_OF_INTEREST].plot(figsize=(10, 6), title="Total deaths, linear")
249 for c in COUNTRIES_OF_INTEREST:
250 lvi = deaths[c].last_valid_index()
251 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
252 plt.savefig('covid_deaths_total_linear.png')
256 ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
257 for c in COUNTRIES_CORE:
258 lvi = deaths[c].last_valid_index()
259 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
261 plt.savefig('covid_deaths_total_log.png')
265 ylim = (5*10**3, 5*10**4)
266 ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), ylim=ylim, title="Total deaths, log")
267 for c in COUNTRIES_CORE:
268 lvi = deaths[c].last_valid_index()
269 if ylim[0] < deaths[c][lvi] < ylim[1]:
270 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
272 # plt.savefig('covid_deaths_total_log.png')
276 ax = deaths[COUNTRIES_FRIENDS].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
277 for c in COUNTRIES_FRIENDS:
278 lvi = deaths[c].last_valid_index()
279 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
281 # plt.savefig('covid_deaths_total_log.png')
285 ax = deaths[COUNTRIES_NORDIC].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
286 for c in COUNTRIES_NORDIC:
287 lvi = deaths[c].last_valid_index()
288 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
290 # plt.savefig('covid_deaths_total_log.png')
294 ax = deaths[COUNTRIES_OF_INTEREST].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
295 for c in COUNTRIES_OF_INTEREST:
296 lvi = deaths[c].last_valid_index()
297 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
299 plt.savefig('covid_deaths_total_log.png')
303 deaths_pc.plot(figsize=(10, 6), title="Deaths per capita, linear")
304 plt.savefig('covid_deaths_per_capita_linear.png')
308 ax = deaths_pc.plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
309 for c in deaths_pc.columns:
310 lvi = deaths_pc[c].last_valid_index()
311 ax.text(x = lvi + 1, y = deaths_pc[c][lvi], s = c)
315 deaths_pc[['UK', 'IE']].plot( figsize=(10, 6), title="Deaths per capita, linear")
319 deaths_pc[['UK', 'IE']].plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
323 deaths[['UK', 'ES', 'IT']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
324 plt.savefig('covid_deaths_selected_log.png')
328 deaths[['UK', 'ES', 'IT', 'MX']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
332 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
336 data_since_threshold['deaths_m4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).mean())
337 data_since_threshold['deaths_m7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).mean())
338 # data_since_threshold['deaths_diff_m4'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(4, 1).mean())
339 # data_since_threshold['deaths_diff_m7'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(7, 1).mean())
340 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
344 deaths_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m4']]
345 .unstack().sort_index().xs('deaths_m4', axis=1, drop_level=True))
349 deaths_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m7']]
350 .unstack().sort_index().xs('deaths_m7', axis=1, drop_level=True))
354 ax = deaths_m4.plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
355 for c in deaths_m4.columns:
356 lvi = deaths_m4[c].last_valid_index()
357 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
358 plt.savefig('covid_deaths_per_day.png')
362 ax = deaths_m4[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
363 for c in COUNTRIES_CORE:
364 lvi = deaths_m4[c].last_valid_index()
365 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
366 plt.savefig('covid_deaths_per_day-core.png')
370 ax = deaths_m4[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
371 for c in COUNTRIES_FRIENDS:
372 lvi = deaths_m4[c].last_valid_index()
373 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
374 plt.savefig('covid_deaths_per_day-friends.png')
378 C7s = 'ES FR IT UK'.split()
379 ax = deaths_m7[C7s].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
381 lvi = deaths_m7[c].last_valid_index()
382 ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
383 # plt.savefig('covid_deaths_per_day-friends.png')
387 ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
388 for c in COUNTRIES_CORE:
389 lvi = deaths_m7[c].last_valid_index()
390 ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
391 plt.savefig('covid_deaths_per_day_7.png')
395 ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
396 for c in COUNTRIES_FRIENDS:
397 lvi = deaths_m7[c].last_valid_index()
398 ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
399 plt.savefig('covid_deaths_per_day_friends_7.png')
403 def gmean_scale(items):
404 return gmean(items) / items[-1]
408 def doubling_time(df):
409 return np.log(2) / np.log((df.deaths_culm + df.deaths_g4) / df.deaths_culm)
411 def doubling_time_7(df):
412 return np.log(2) / np.log((df.deaths_culm + df.deaths_g7) / df.deaths_culm)
416 # data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean_scale, raw=True))
417 # data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
421 data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True))
422 data_since_threshold['deaths_g7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).apply(gmean, raw=True))
423 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
427 data_since_threshold['doubling_time'] = data_since_threshold.groupby(level=1).apply(doubling_time).reset_index(level=0, drop=True)
428 data_since_threshold['doubling_time_7'] = data_since_threshold.groupby(level=1).apply(doubling_time_7).reset_index(level=0, drop=True)
429 # data_since_threshold.loc[(slice(None), 'UK'), :]
433 doubling_times = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time']]
434 .unstack().sort_index().xs('doubling_time', axis=1, drop_level=True))
435 doubling_times.replace([np.inf, -np.inf], np.nan, inplace=True)
439 doubling_times_7 = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time_7']]
440 .unstack().sort_index().xs('doubling_time_7', axis=1, drop_level=True))
441 doubling_times_7.replace([np.inf, -np.inf], np.nan, inplace=True)
445 ax = doubling_times.plot(figsize=(10, 6), title="Doubling times, 4 day average")
446 for c in doubling_times.columns:
447 lvi = doubling_times[c].last_valid_index()
448 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
449 # plt.savefig('covid_deaths_per_day.png')
453 ax = doubling_times_7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 7 day average")
454 for c in COUNTRIES_CORE:
455 lvi = doubling_times_7[c].last_valid_index()
456 ax.text(x = lvi + 1, y = doubling_times_7[c][lvi], s = c)
457 plt.savefig('covid_doubling_times_7.png')
461 ax = doubling_times[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 4 day average")
462 for c in COUNTRIES_CORE:
463 lvi = doubling_times[c].last_valid_index()
464 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
465 plt.savefig('covid_doubling_times.png')
469 ax = doubling_times[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Doubling times")
470 for c in COUNTRIES_FRIENDS:
471 lvi = doubling_times[c].last_valid_index()
472 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
473 plt.savefig('covid_doubling_times_friends.png')
477 ax = doubling_times[C7s].plot(figsize=(10, 6), title="Doubling times")
479 lvi = doubling_times[c].last_valid_index()
480 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
481 # plt.savefig('covid_doubling_times_friends.png')
485 # deaths_diff_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m4']]
486 # .unstack().sort_index().xs('deaths_diff_m4', axis=1, drop_level=True))
490 # deaths_diff_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m7']]
491 # .unstack().sort_index().xs('deaths_diff_m7', axis=1, drop_level=True))
499 data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[COUNTRIES_ALL]#, [doubling_time]]