Finished UK death data analysis
[covid19.git] / covid.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.2'
9 jupytext_version: 1.3.4
10 kernelspec:
11 display_name: Python 3
12 language: python
13 name: python3
14 ---
15
16 Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
17
18 ```python
19 import itertools
20 import collections
21 import pandas as pd
22 import numpy as np
23 from scipy.stats import gmean
24
25 import matplotlib as mpl
26 import matplotlib.pyplot as plt
27 %matplotlib inline
28 ```
29
30 ```python
31 DEATH_COUNT_THRESHOLD = 10
32 COUNTRIES_CORE = 'IT DE UK ES IE FR'.split()
33 COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
34 COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
35 COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
36 COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC))
37 ```
38
39 ```python
40 !curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
41 ```
42
43 ```python
44 # First col is a date, treat geoId of NA as 'Namibia', not "NA" value
45 raw_data = pd.read_csv('covid.csv', parse_dates=[0], keep_default_na=False, dayfirst=True)
46 ```
47
48 ```python
49 raw_data.size
50 ```
51
52 ```python
53 raw_data.head()
54 ```
55
56 ```python
57 raw_data.dtypes
58 ```
59
60 ```python
61 base_data = raw_data.set_index(['geoId', 'dateRep'])
62 base_data.sort_index(inplace=True)
63 base_data
64 ```
65
66 ```python
67 base_data.loc['UK']
68 ```
69
70 ```python
71 base_data.loc['UK', '2020-04-17']
72 ```
73
74 ```python
75 countries = raw_data[['geoId', 'countriesAndTerritories', 'popData2018']]
76 countries = countries[countries['popData2018'] != '']
77 countries = countries.drop_duplicates()
78 countries.set_index('geoId', inplace=True)
79 countries = countries.astype({'popData2018': 'int64'})
80 countries.head()
81 ```
82
83 ```python
84 countries.shape
85 ```
86
87 ```python
88 countries[countries.countriesAndTerritories == 'Finland']
89 ```
90
91 ```python
92 countries.loc[COUNTRIES_OF_INTEREST]
93 ```
94
95 ```python
96 data_by_date = base_data[['cases', 'deaths']]
97 data_by_date.head()
98 ```
99
100 ```python
101 data_by_date.loc['UK']
102 ```
103
104 ```python
105 data_by_date.groupby(level=0).cumsum()
106 ```
107
108 ```python
109 data_by_date = data_by_date.merge(
110 data_by_date.groupby(level=0).cumsum(),
111 suffixes=('', '_culm'),
112 left_index=True, right_index=True)
113 data_by_date
114 ```
115
116 ```python
117 data_by_date = data_by_date.merge(
118 data_by_date[['cases', 'deaths']].groupby(level=0).diff(),
119 suffixes=('', '_diff'),
120 left_index=True, right_index=True)
121 data_by_date
122 ```
123
124 ```python
125 data_by_date.loc['UK', '2020-04-17']
126 ```
127
128 ```python
129 data_by_date.loc['UK']
130 ```
131
132 ```python
133 # data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
134 ```
135
136 ```python
137 # days_since_threshold = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].groupby(level=0).cumcount()
138 # days_since_threshold.rename('since_threshold', inplace=True)
139 ```
140
141 ```python
142 dbd = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].reset_index(level=1)
143 dbd['since_threshold'] = dbd.dateRep
144 dbd.set_index('dateRep', append=True, inplace=True)
145 dbd.sort_index(inplace=True)
146 days_since_threshold = dbd.groupby(level=0).diff().since_threshold.dt.days.fillna(0).astype(int).groupby(level=0).cumsum()
147 # days_since_threshold.groupby(level=0).cumsum()
148
149 # days_since_threshold = dbd.rename('since_threshold')
150 days_since_threshold
151 ```
152
153 ```python
154 # days_since_threshold = (data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
155 # .reset_index(level=1).groupby(level=0)
156 # .diff().dateRep.dt.days
157 # .groupby(level=0).cumcount()
158 # )
159 # days_since_threshold.rename('since_threshold', inplace=True)
160 # days_since_threshold
161 ```
162
163 ```python
164 data_since_threshold = data_by_date.merge(days_since_threshold,
165 left_index=True, right_index=True)
166 data_since_threshold
167 ```
168
169 ```python
170 data_since_threshold = data_since_threshold.set_index('since_threshold', append=True
171 ).reorder_levels(['since_threshold', 'geoId', 'dateRep']
172 ).reset_index('dateRep')
173 data_since_threshold
174 ```
175
176 ```python
177 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
178 ```
179
180 ```python
181 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), ['deaths_culm']].unstack().plot(logy=True)
182 ```
183
184 ```python
185 # deaths = data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
186 ```
187
188 ```python
189 deaths = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True)
190 ```
191
192 ```python
193 data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
194 ```
195
196 ```python
197 data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId']).sort_index(inplace=True)
198 ```
199
200 ```python
201 data_since_threshold_per_capita = data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
202 data_since_threshold_per_capita['cases_culm_pc'] = data_since_threshold_per_capita.cases_culm / data_since_threshold_per_capita.popData2018
203 data_since_threshold_per_capita['deaths_culm_pc'] = data_since_threshold_per_capita.deaths_culm / data_since_threshold_per_capita.popData2018
204 data_since_threshold_per_capita
205 ```
206
207 ```python
208 deaths_pc = data_since_threshold_per_capita.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm_pc']].unstack().sort_index().xs('deaths_culm_pc', axis=1, drop_level=True)
209 ```
210
211 ```python
212 deaths_pc
213 ```
214
215 ```python
216 deaths_pc.index
217 ```
218
219 ```python
220 deaths_pc = data_since_threshold_per_capita.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)
221 ```
222
223 ```python
224 deaths[COUNTRIES_CORE].plot()
225 ```
226
227 ```python
228 deaths[COUNTRIES_FRIENDS].plot()
229 ```
230
231 ```python
232 ax = deaths[COUNTRIES_CORE].plot(figsize=(10, 6), title="Total deaths, linear")
233 for c in COUNTRIES_CORE:
234 lvi = deaths[c].last_valid_index()
235 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
236 # plt.savefig('covid_deaths_total_linear.png')
237 ```
238
239 ```python
240 ax = deaths[COUNTRIES_NORDIC].plot(figsize=(10, 6), title="Total deaths, linear")
241 for c in COUNTRIES_NORDIC:
242 lvi = deaths[c].last_valid_index()
243 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
244 # plt.savefig('covid_deaths_total_linear.png')
245 ```
246
247 ```python
248 ax = deaths[COUNTRIES_OF_INTEREST].plot(figsize=(10, 6), title="Total deaths, linear")
249 for c in COUNTRIES_OF_INTEREST:
250 lvi = deaths[c].last_valid_index()
251 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
252 plt.savefig('covid_deaths_total_linear.png')
253 ```
254
255 ```python
256 ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
257 for c in COUNTRIES_CORE:
258 lvi = deaths[c].last_valid_index()
259 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
260
261 plt.savefig('covid_deaths_total_log.png')
262 ```
263
264 ```python
265 ylim = (5*10**3, 5*10**4)
266 ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), ylim=ylim, title="Total deaths, log")
267 for c in COUNTRIES_CORE:
268 lvi = deaths[c].last_valid_index()
269 if ylim[0] < deaths[c][lvi] < ylim[1]:
270 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
271
272 # plt.savefig('covid_deaths_total_log.png')
273 ```
274
275 ```python
276 ax = deaths[COUNTRIES_FRIENDS].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
277 for c in COUNTRIES_FRIENDS:
278 lvi = deaths[c].last_valid_index()
279 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
280
281 # plt.savefig('covid_deaths_total_log.png')
282 ```
283
284 ```python
285 ax = deaths[COUNTRIES_NORDIC].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
286 for c in COUNTRIES_NORDIC:
287 lvi = deaths[c].last_valid_index()
288 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
289
290 # plt.savefig('covid_deaths_total_log.png')
291 ```
292
293 ```python
294 ax = deaths[COUNTRIES_OF_INTEREST].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
295 for c in COUNTRIES_OF_INTEREST:
296 lvi = deaths[c].last_valid_index()
297 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
298
299 plt.savefig('covid_deaths_total_log.png')
300 ```
301
302 ```python
303 deaths_pc.plot(figsize=(10, 6), title="Deaths per capita, linear")
304 plt.savefig('covid_deaths_per_capita_linear.png')
305 ```
306
307 ```python
308 ax = deaths_pc.plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
309 for c in deaths_pc.columns:
310 lvi = deaths_pc[c].last_valid_index()
311 ax.text(x = lvi + 1, y = deaths_pc[c][lvi], s = c)
312 ```
313
314 ```python
315 deaths_pc[['UK', 'IE']].plot( figsize=(10, 6), title="Deaths per capita, linear")
316 ```
317
318 ```python
319 deaths_pc[['UK', 'IE']].plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
320 ```
321
322 ```python
323 deaths[['UK', 'ES', 'IT']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
324 plt.savefig('covid_deaths_selected_log.png')
325 ```
326
327 ```python
328 deaths[['UK', 'ES', 'IT', 'MX']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
329 ```
330
331 ```python
332 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
333 ```
334
335 ```python
336 data_since_threshold['deaths_m4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).mean())
337 data_since_threshold['deaths_m7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).mean())
338 # data_since_threshold['deaths_diff_m4'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(4, 1).mean())
339 # data_since_threshold['deaths_diff_m7'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(7, 1).mean())
340 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
341 ```
342
343 ```python
344 deaths_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m4']]
345 .unstack().sort_index().xs('deaths_m4', axis=1, drop_level=True))
346 ```
347
348 ```python
349 deaths_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m7']]
350 .unstack().sort_index().xs('deaths_m7', axis=1, drop_level=True))
351 ```
352
353 ```python
354 ax = deaths_m4.plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
355 for c in deaths_m4.columns:
356 lvi = deaths_m4[c].last_valid_index()
357 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
358 plt.savefig('covid_deaths_per_day.png')
359 ```
360
361 ```python
362 ax = deaths_m4[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
363 for c in COUNTRIES_CORE:
364 lvi = deaths_m4[c].last_valid_index()
365 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
366 plt.savefig('covid_deaths_per_day-core.png')
367 ```
368
369 ```python
370 ax = deaths_m4[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
371 for c in COUNTRIES_FRIENDS:
372 lvi = deaths_m4[c].last_valid_index()
373 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
374 plt.savefig('covid_deaths_per_day-friends.png')
375 ```
376
377 ```python
378 C7s = 'ES FR IT UK'.split()
379 ax = deaths_m7[C7s].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
380 for c in C7s:
381 lvi = deaths_m7[c].last_valid_index()
382 ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
383 # plt.savefig('covid_deaths_per_day-friends.png')
384 ```
385
386 ```python
387 ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
388 for c in COUNTRIES_CORE:
389 lvi = deaths_m7[c].last_valid_index()
390 ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
391 plt.savefig('covid_deaths_per_day_7.png')
392 ```
393
394 ```python
395 ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
396 for c in COUNTRIES_FRIENDS:
397 lvi = deaths_m7[c].last_valid_index()
398 ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
399 plt.savefig('covid_deaths_per_day_friends_7.png')
400 ```
401
402 ```python
403 def gmean_scale(items):
404 return gmean(items) / items[-1]
405 ```
406
407 ```python
408 def doubling_time(df):
409 return np.log(2) / np.log((df.deaths_culm + df.deaths_g4) / df.deaths_culm)
410
411 def doubling_time_7(df):
412 return np.log(2) / np.log((df.deaths_culm + df.deaths_g7) / df.deaths_culm)
413 ```
414
415 ```python
416 # data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean_scale, raw=True))
417 # data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
418 ```
419
420 ```python
421 data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True))
422 data_since_threshold['deaths_g7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).apply(gmean, raw=True))
423 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
424 ```
425
426 ```python
427 data_since_threshold['doubling_time'] = data_since_threshold.groupby(level=1).apply(doubling_time).reset_index(level=0, drop=True)
428 data_since_threshold['doubling_time_7'] = data_since_threshold.groupby(level=1).apply(doubling_time_7).reset_index(level=0, drop=True)
429 # data_since_threshold.loc[(slice(None), 'UK'), :]
430 ```
431
432 ```python
433 doubling_times = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time']]
434 .unstack().sort_index().xs('doubling_time', axis=1, drop_level=True))
435 doubling_times.replace([np.inf, -np.inf], np.nan, inplace=True)
436 ```
437
438 ```python
439 doubling_times_7 = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time_7']]
440 .unstack().sort_index().xs('doubling_time_7', axis=1, drop_level=True))
441 doubling_times_7.replace([np.inf, -np.inf], np.nan, inplace=True)
442 ```
443
444 ```python
445 ax = doubling_times.plot(figsize=(10, 6), title="Doubling times, 4 day average")
446 for c in doubling_times.columns:
447 lvi = doubling_times[c].last_valid_index()
448 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
449 # plt.savefig('covid_deaths_per_day.png')
450 ```
451
452 ```python
453 ax = doubling_times_7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 7 day average")
454 for c in COUNTRIES_CORE:
455 lvi = doubling_times_7[c].last_valid_index()
456 ax.text(x = lvi + 1, y = doubling_times_7[c][lvi], s = c)
457 plt.savefig('covid_doubling_times_7.png')
458 ```
459
460 ```python
461 ax = doubling_times[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 4 day average")
462 for c in COUNTRIES_CORE:
463 lvi = doubling_times[c].last_valid_index()
464 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
465 plt.savefig('covid_doubling_times.png')
466 ```
467
468 ```python
469 ax = doubling_times[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Doubling times")
470 for c in COUNTRIES_FRIENDS:
471 lvi = doubling_times[c].last_valid_index()
472 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
473 plt.savefig('covid_doubling_times_friends.png')
474 ```
475
476 ```python
477 ax = doubling_times[C7s].plot(figsize=(10, 6), title="Doubling times")
478 for c in C7s:
479 lvi = doubling_times[c].last_valid_index()
480 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
481 # plt.savefig('covid_doubling_times_friends.png')
482 ```
483
484 ```python
485 # deaths_diff_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m4']]
486 # .unstack().sort_index().xs('deaths_diff_m4', axis=1, drop_level=True))
487 ```
488
489 ```python
490 # deaths_diff_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m7']]
491 # .unstack().sort_index().xs('deaths_diff_m7', axis=1, drop_level=True))
492 ```
493
494 ```python
495 # deaths_diff_m7
496 ```
497
498 ```python
499 data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[COUNTRIES_ALL]#, [doubling_time]]
500 ```
501
502 ```python
503 data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[['UK', 'DE', 'IT']]#, [doubling_time]]
504 ```
505
506 ```python
507 it_since_threshold = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['IT']), :]
508 it_since_threshold.index.max()[0]
509 ```
510
511 ```python
512 uk_projection = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), :]
513 uk_projection.index.max()[0]
514 ```
515
516 ```python
517 proj = it_since_threshold.loc[(slice(60, 77), slice(None)), ['cases', 'deaths']]
518 proj.index = pd.MultiIndex.from_tuples([(n, 'UK') for n, _ in proj.index], names=proj.index.names)
519 proj
520 ```
521
522 ```python
523 uk_projection = uk_projection.append(proj, sort=True)
524 uk_projection.deaths.sum()
525 ```
526
527 ```python
528 it_since_threshold.deaths.sum()
529 ```
530
531 ```python
532
533 ```