Initial commit
[covid19.git] / covid.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.2'
9 jupytext_version: 1.3.4
10 kernelspec:
11 display_name: Python 3
12 language: python
13 name: python3
14 ---
15
16 Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
17
18 ```python
19 import itertools
20 import collections
21 import pandas as pd
22 import numpy as np
23 from scipy.stats import gmean
24
25 import matplotlib as mpl
26 import matplotlib.pyplot as plt
27 %matplotlib inline
28 ```
29
30 ```python
31 DEATH_COUNT_THRESHOLD = 10
32 COUNTRIES_CORE = 'IT DE UK ES IE FR'.split()
33 COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
34 COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
35 COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
36 COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC))
37 ```
38
39 ```python
40 !curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
41 ```
42
43 ```python
44 # First col is a date, treat geoId of NA as 'Namibia', not "NA" value
45 raw_data = pd.read_csv('covid.csv', parse_dates=[0], keep_default_na=False, dayfirst=True)
46 ```
47
48 ```python
49 raw_data.size
50 ```
51
52 ```python
53 raw_data.head()
54 ```
55
56 ```python
57 raw_data.dtypes
58 ```
59
60 ```python
61 base_data = raw_data.set_index(['geoId', 'dateRep'])
62 base_data.sort_index(inplace=True)
63 base_data
64 ```
65
66 ```python
67 base_data.loc['UK']
68 ```
69
70 ```python
71 countries = raw_data[['geoId', 'countriesAndTerritories', 'popData2018']]
72 countries = countries[countries['popData2018'] != '']
73 countries = countries.drop_duplicates()
74 countries.set_index('geoId', inplace=True)
75 countries = countries.astype({'popData2018': 'int64'})
76 countries.head()
77 ```
78
79 ```python
80 countries.shape
81 ```
82
83 ```python
84 countries[countries.countriesAndTerritories == 'Finland']
85 ```
86
87 ```python
88 countries.loc[COUNTRIES_OF_INTEREST]
89 ```
90
91 ```python
92 data_by_date = base_data[['cases', 'deaths']]
93 data_by_date.head()
94 ```
95
96 ```python
97 data_by_date.loc['UK']
98 ```
99
100 ```python
101 data_by_date.groupby(level=0).cumsum()
102 ```
103
104 ```python
105 data_by_date = data_by_date.merge(
106 data_by_date.groupby(level=0).cumsum(),
107 suffixes=('', '_culm'),
108 left_index=True, right_index=True)
109 data_by_date
110 ```
111
112 ```python
113 # data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
114 ```
115
116 ```python
117 # days_since_threshold = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].groupby(level=0).cumcount()
118 # days_since_threshold.rename('since_threshold', inplace=True)
119 ```
120
121 ```python
122 dbd = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].reset_index(level=1)
123 dbd['since_threshold'] = dbd.dateRep
124 dbd.set_index('dateRep', append=True, inplace=True)
125 dbd.sort_index(inplace=True)
126 days_since_threshold = dbd.groupby(level=0).diff().since_threshold.dt.days.fillna(0).astype(int).groupby(level=0).cumsum()
127 # days_since_threshold.groupby(level=0).cumsum()
128
129 # days_since_threshold = dbd.rename('since_threshold')
130 days_since_threshold
131 ```
132
133 ```python
134 # days_since_threshold = (data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
135 # .reset_index(level=1).groupby(level=0)
136 # .diff().dateRep.dt.days
137 # .groupby(level=0).cumcount()
138 # )
139 # days_since_threshold.rename('since_threshold', inplace=True)
140 # days_since_threshold
141 ```
142
143 ```python
144 data_since_threshold = data_by_date.merge(days_since_threshold,
145 left_index=True, right_index=True)
146 data_since_threshold
147 ```
148
149 ```python
150 data_since_threshold = data_since_threshold.set_index('since_threshold', append=True
151 ).reorder_levels(['since_threshold', 'geoId', 'dateRep']
152 ).reset_index('dateRep')
153 data_since_threshold
154 ```
155
156 ```python
157 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
158 ```
159
160 ```python
161 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), ['deaths_culm']].unstack().plot(logy=True)
162 ```
163
164 ```python
165 deaths = data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
166 ```
167
168 ```python
169 deaths = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
170 ```
171
172 ```python
173 data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
174 ```
175
176 ```python
177 data_since_threshold_per_capita = data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
178 data_since_threshold_per_capita['cases_culm_pc'] = data_since_threshold_per_capita.cases_culm / data_since_threshold_per_capita.popData2018
179 data_since_threshold_per_capita['deaths_culm_pc'] = data_since_threshold_per_capita.deaths_culm / data_since_threshold_per_capita.popData2018
180 data_since_threshold_per_capita
181 ```
182
183 ```python
184 deaths_pc = data_since_threshold_per_capita.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)
185 ```
186
187 ```python
188 deaths_pc
189 ```
190
191 ```python
192 deaths_pc = data_since_threshold_per_capita.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)
193 ```
194
195 ```python
196 deaths[COUNTRIES_CORE].plot()
197 ```
198
199 ```python
200 deaths[COUNTRIES_FRIENDS].plot()
201 ```
202
203 ```python
204 ax = deaths[COUNTRIES_CORE].plot(figsize=(10, 6), title="Total deaths, linear")
205 for c in COUNTRIES_CORE:
206 lvi = deaths[c].last_valid_index()
207 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
208 # plt.savefig('covid_deaths_total_linear.png')
209 ```
210
211 ```python
212 ax = deaths[COUNTRIES_NORDIC].plot(figsize=(10, 6), title="Total deaths, linear")
213 for c in COUNTRIES_NORDIC:
214 lvi = deaths[c].last_valid_index()
215 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
216 # plt.savefig('covid_deaths_total_linear.png')
217 ```
218
219 ```python
220 ax = deaths[COUNTRIES_OF_INTEREST].plot(figsize=(10, 6), title="Total deaths, linear")
221 for c in COUNTRIES_OF_INTEREST:
222 lvi = deaths[c].last_valid_index()
223 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
224 plt.savefig('covid_deaths_total_linear.png')
225 ```
226
227 ```python
228 ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
229 for c in COUNTRIES_CORE:
230 lvi = deaths[c].last_valid_index()
231 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
232
233 plt.savefig('covid_deaths_total_log.png')
234 ```
235
236 ```python
237 ax = deaths[COUNTRIES_FRIENDS].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
238 for c in COUNTRIES_FRIENDS:
239 lvi = deaths[c].last_valid_index()
240 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
241
242 # plt.savefig('covid_deaths_total_log.png')
243 ```
244
245 ```python
246 ax = deaths[COUNTRIES_NORDIC].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
247 for c in COUNTRIES_NORDIC:
248 lvi = deaths[c].last_valid_index()
249 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
250
251 # plt.savefig('covid_deaths_total_log.png')
252 ```
253
254 ```python
255 ax = deaths[COUNTRIES_OF_INTEREST].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
256 for c in COUNTRIES_OF_INTEREST:
257 lvi = deaths[c].last_valid_index()
258 ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
259
260 plt.savefig('covid_deaths_total_log.png')
261 ```
262
263 ```python
264 deaths_pc.plot(figsize=(10, 6), title="Deaths per capita, linear")
265 plt.savefig('covid_deaths_per_capita_linear.png')
266 ```
267
268 ```python
269 ax = deaths_pc.plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
270 for c in deaths_pc.columns:
271 lvi = deaths_pc[c].last_valid_index()
272 ax.text(x = lvi + 1, y = deaths_pc[c][lvi], s = c)
273 ```
274
275 ```python
276 deaths_pc[['UK', 'IE']].plot( figsize=(10, 6), title="Deaths per capita, linear")
277 ```
278
279 ```python
280 deaths_pc[['UK', 'IE']].plot(logy=True, figsize=(10, 6), title="Deaths per capita, log")
281 ```
282
283 ```python
284 deaths[['UK', 'ES', 'IT']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
285 plt.savefig('covid_deaths_selected_log.png')
286 ```
287
288 ```python
289 deaths[['UK', 'ES', 'IT', 'MX']].plot(logy=True, figsize=(10, 6), title="Deaths, log")
290 ```
291
292 ```python
293 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
294 ```
295
296 ```python
297 data_since_threshold['deaths_m4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).mean())
298 data_since_threshold['deaths_m7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).mean())
299 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
300 ```
301
302 ```python
303 deaths_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m4']]
304 .unstack().xs('deaths_m4', axis=1, drop_level=True))
305 ```
306
307 ```python
308 deaths_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m7']]
309 .unstack().xs('deaths_m7', axis=1, drop_level=True))
310 ```
311
312 ```python
313 ax = deaths_m4.plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
314 for c in deaths_m4.columns:
315 lvi = deaths_m4[c].last_valid_index()
316 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
317 plt.savefig('covid_deaths_per_day.png')
318 ```
319
320 ```python
321 ax = deaths_m4[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
322 for c in COUNTRIES_CORE:
323 lvi = deaths_m4[c].last_valid_index()
324 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
325 plt.savefig('covid_deaths_per_day-core.png')
326 ```
327
328 ```python
329 ax = deaths_m4[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 4 day moving average")
330 for c in COUNTRIES_FRIENDS:
331 lvi = deaths_m4[c].last_valid_index()
332 ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)
333 plt.savefig('covid_deaths_per_day-friends.png')
334 ```
335
336 ```python
337 C7s = 'ES FR IT UK'.split()
338 ax = deaths_m7[C7s].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
339 for c in C7s:
340 lvi = deaths_m7[c].last_valid_index()
341 ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
342 # plt.savefig('covid_deaths_per_day-friends.png')
343 ```
344
345 ```python
346 ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
347 for c in COUNTRIES_FRIENDS:
348 lvi = deaths_m7[c].last_valid_index()
349 ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
350 # plt.savefig('covid_deaths_per_day-friends.png')
351 ```
352
353 ```python
354 def gmean_scale(items):
355 return gmean(items) / items[-1]
356 ```
357
358 ```python
359 def doubling_time(df):
360 return np.log(2) / np.log((df.deaths_culm + df.deaths_g4) / df.deaths_culm)
361 ```
362
363 ```python
364 # data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean_scale, raw=True))
365 # data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
366 ```
367
368 ```python
369 data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True))
370 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
371 ```
372
373 ```python
374 data_since_threshold['doubling_time'] = data_since_threshold.groupby(level=1).apply(doubling_time).reset_index(level=0, drop=True)
375 # data_since_threshold.loc[(slice(None), 'UK'), :]
376 ```
377
378 ```python
379 doubling_times = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time']]
380 .unstack().xs('doubling_time', axis=1, drop_level=True))
381 doubling_times.replace([np.inf, -np.inf], np.nan, inplace=True)
382 ```
383
384 ```python
385 ax = doubling_times.plot(figsize=(10, 6), title="Doubling times")
386 for c in doubling_times.columns:
387 lvi = doubling_times[c].last_valid_index()
388 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
389 # plt.savefig('covid_deaths_per_day.png')
390 ```
391
392 ```python
393 ax = doubling_times[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times")
394 for c in COUNTRIES_CORE:
395 lvi = doubling_times[c].last_valid_index()
396 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
397 plt.savefig('covid_doubling_times.png')
398 ```
399
400 ```python
401 ax = doubling_times[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Doubling times")
402 for c in COUNTRIES_FRIENDS:
403 lvi = doubling_times[c].last_valid_index()
404 ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
405 plt.savefig('covid_doubling_times_friends.png')
406 ```
407
408 ```python
409 data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[COUNTRIES_ALL]#, [doubling_time]]
410 ```
411
412 ```python
413
414 ```