9 jupytext_version: 1.3.4
11 display_name: Python 3
16 Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
23 from scipy.stats import gmean
25 import matplotlib as mpl
26 import matplotlib.pyplot as plt
31 DEATH_COUNT_THRESHOLD = 10
32 COUNTRIES_CORE = 'IT DE UK ES IE FR'.split()
33 COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
34 COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
35 COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
36 COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC))
40 !curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
44 # First col is a date, treat geoId of NA as 'Namibia', not "NA" value
45 raw_data = pd.read_csv('covid.csv', parse_dates=[0], keep_default_na=False, dayfirst=True)
61 base_data = raw_data.set_index(['geoId', 'dateRep'])
62 base_data.sort_index(inplace=True)
71 countries = raw_data[['geoId', 'countriesAndTerritories', 'popData2018']]
72 countries = countries[countries['popData2018'] != '']
73 countries = countries.drop_duplicates()
74 countries.set_index('geoId', inplace=True)
75 countries = countries.astype({'popData2018': 'int64'})
84 countries[countries.countriesAndTerritories == 'Finland']
88 countries.loc[COUNTRIES_OF_INTEREST]
92 data_by_date = base_data[['cases', 'deaths']]
97 data_by_date.loc['UK']
101 data_by_date.groupby(level=0).cumsum()
105 data_by_date = data_by_date.merge(
106 data_by_date.groupby(level=0).cumsum(),
107 suffixes=('', '_culm'),
108 left_index=True, right_index=True)
113 # data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
117 # days_since_threshold = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].groupby(level=0).cumcount()
118 # days_since_threshold.rename('since_threshold', inplace=True)
122 dbd = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].reset_index(level=1)
123 dbd['since_threshold'] = dbd.dateRep
124 dbd.set_index('dateRep', append=True, inplace=True)
125 dbd.sort_index(inplace=True)
126 days_since_threshold = dbd.groupby(level=0).diff().since_threshold.dt.days.fillna(0).astype(int).groupby(level=0).cumsum()
127 # days_since_threshold.groupby(level=0).cumsum()
129 # days_since_threshold = dbd.rename('since_threshold')
134 # days_since_threshold = (data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
135 # .reset_index(level=1).groupby(level=0)
136 # .diff().dateRep.dt.days
137 # .groupby(level=0).cumcount()
139 # days_since_threshold.rename('since_threshold', inplace=True)
140 # days_since_threshold
144 data_since_threshold = data_by_date.merge(days_since_threshold,
145 left_index=True, right_index=True)
150 data_since_threshold = data_since_threshold.set_index('since_threshold', append=True
151 ).reorder_levels(['since_threshold', 'geoId', 'dateRep']
152 ).reset_index('dateRep')
157 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
161 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), ['deaths_culm']].unstack().plot(logy=True)
165 deaths = data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
169 deaths = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
173 data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
177 data_since_threshold_per_capita = data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
178 data_since_threshold_per_capita['cases_culm_pc'] = data_since_threshold_per_capita.cases_culm / data_since_threshold_per_capita.popData2018
179 data_since_threshold_per_capita['deaths_culm_pc'] = data_since_threshold_per_capita.deaths_culm / data_since_threshold_per_capita.popData2018
180 data_since_threshold_per_capita
184 deaths_pc = data_since_threshold_per_capita.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)
192 deaths_pc = data_since_threshold_per_capita.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)
196 deaths[COUNTRIES_CORE].plot()
200 deaths[COUNTRIES_FRIENDS].plot()
204 ax = deaths[COUNTRIES_CORE].plot(figsize=(10, 6), title="Total deaths, linear")
205 for c in COUNTRIES_CORE:
206 lvi = deaths[c].last_valid_index()
207 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
208 # plt.savefig('covid_deaths_total_linear.png')
212 ax = deaths[COUNTRIES_NORDIC].plot(figsize=(10, 6), title="Total deaths, linear")
213 for c in COUNTRIES_NORDIC:
214 lvi = deaths[c].last_valid_index()
215 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
216 # plt.savefig('covid_deaths_total_linear.png')
220 ax = deaths[COUNTRIES_OF_INTEREST].plot(figsize=(10, 6), title="Total deaths, linear")
221 for c in COUNTRIES_OF_INTEREST:
222 lvi = deaths[c].last_valid_index()
223 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
224 plt.savefig('covid_deaths_total_linear.png')
228 ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
229 for c in COUNTRIES_CORE:
230 lvi = deaths[c].last_valid_index()
231 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
233 plt.savefig('covid_deaths_total_log.png')
237 ax = deaths[COUNTRIES_FRIENDS].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
238 for c in COUNTRIES_FRIENDS:
239 lvi = deaths[c].last_valid_index()
240 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
242 # plt.savefig('covid_deaths_total_log.png')
246 ax = deaths[COUNTRIES_NORDIC].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
247 for c in COUNTRIES_NORDIC:
248 lvi = deaths[c].last_valid_index()
249 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
251 # plt.savefig('covid_deaths_total_log.png')
255 ax = deaths[COUNTRIES_OF_INTEREST].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
256 for c in COUNTRIES_OF_INTEREST:
257 lvi = deaths[c].last_valid_index()
258 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
260 plt.savefig('covid_deaths_total_log.png')
264 deaths_pc.plot(figsize=(10, 6), title="Deaths per capita, linear")
265 plt.savefig('covid_deaths_per_capita_linear.png')
269 ax = deaths_pc.plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
270 for c in deaths_pc.columns:
271 lvi = deaths_pc[c].last_valid_index()
272 ax.text(x = lvi + 1, y = deaths_pc[c][lvi], s = c)
276 deaths_pc[['UK', 'IE']].plot( figsize=(10, 6), title="Deaths per capita, linear")
280 deaths_pc[['UK', 'IE']].plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
284 deaths[['UK', 'ES', 'IT']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
285 plt.savefig('covid_deaths_selected_log.png')
289 deaths[['UK', 'ES', 'IT', 'MX']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
293 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
297 data_since_threshold['deaths_m4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).mean())
298 data_since_threshold['deaths_m7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).mean())
299 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
303 deaths_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m4']]
304 .unstack().xs('deaths_m4', axis=1, drop_level=True))
308 deaths_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m7']]
309 .unstack().xs('deaths_m7', axis=1, drop_level=True))
313 ax = deaths_m4.plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
314 for c in deaths_m4.columns:
315 lvi = deaths_m4[c].last_valid_index()
316 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
317 plt.savefig('covid_deaths_per_day.png')
321 ax = deaths_m4[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
322 for c in COUNTRIES_CORE:
323 lvi = deaths_m4[c].last_valid_index()
324 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
325 plt.savefig('covid_deaths_per_day-core.png')
329 ax = deaths_m4[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
330 for c in COUNTRIES_FRIENDS:
331 lvi = deaths_m4[c].last_valid_index()
332 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
333 plt.savefig('covid_deaths_per_day-friends.png')
337 C7s = 'ES FR IT UK'.split()
338 ax = deaths_m7[C7s].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
340 lvi = deaths_m7[c].last_valid_index()
341 ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
342 # plt.savefig('covid_deaths_per_day-friends.png')
346 ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
347 for c in COUNTRIES_FRIENDS:
348 lvi = deaths_m7[c].last_valid_index()
349 ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
350 # plt.savefig('covid_deaths_per_day-friends.png')
354 def gmean_scale(items):
355 return gmean(items) / items[-1]
359 def doubling_time(df):
360 return np.log(2) / np.log((df.deaths_culm + df.deaths_g4) / df.deaths_culm)
364 # data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean_scale, raw=True))
365 # data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
369 data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True))
370 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
374 data_since_threshold['doubling_time'] = data_since_threshold.groupby(level=1).apply(doubling_time).reset_index(level=0, drop=True)
375 # data_since_threshold.loc[(slice(None), 'UK'), :]
379 doubling_times = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time']]
380 .unstack().xs('doubling_time', axis=1, drop_level=True))
381 doubling_times.replace([np.inf, -np.inf], np.nan, inplace=True)
385 ax = doubling_times.plot(figsize=(10, 6), title="Doubling times")
386 for c in doubling_times.columns:
387 lvi = doubling_times[c].last_valid_index()
388 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
389 # plt.savefig('covid_deaths_per_day.png')
393 ax = doubling_times[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times")
394 for c in COUNTRIES_CORE:
395 lvi = doubling_times[c].last_valid_index()
396 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
397 plt.savefig('covid_doubling_times.png')
401 ax = doubling_times[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Doubling times")
402 for c in COUNTRIES_FRIENDS:
403 lvi = doubling_times[c].last_valid_index()
404 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
405 plt.savefig('covid_doubling_times_friends.png')
409 data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[COUNTRIES_ALL]#, [doubling_time]]