More analysis
[covid19.git] / covid.md
index c5ada44c47552ecdc018dcf27bfa9e4ce155ab77..5b762de0275f32355e221821b654967a178b797e 100644 (file)
--- a/covid.md
+++ b/covid.md
@@ -67,6 +67,10 @@ base_data
 base_data.loc['UK']
 ```
 
+```python
+base_data.loc['UK', '2020-04-17']
+```
+
 ```python
 countries = raw_data[['geoId', 'countriesAndTerritories', 'popData2018']]
 countries = countries[countries['popData2018'] != '']
@@ -109,6 +113,22 @@ data_by_date = data_by_date.merge(
 data_by_date
 ```
 
+```python
+data_by_date = data_by_date.merge(
+    data_by_date[['cases', 'deaths']].groupby(level=0).diff(), 
+    suffixes=('', '_diff'), 
+    left_index=True, right_index=True)
+data_by_date
+```
+
+```python
+data_by_date.loc['UK', '2020-04-17']
+```
+
+```python
+data_by_date.loc['UK']
+```
+
 ```python
 # data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]
 ```
@@ -162,17 +182,21 @@ data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), ['deaths_culm']].uns
 ```
 
 ```python
-deaths = data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
+deaths = data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
 ```
 
 ```python
-deaths = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)
+deaths = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True)
 ```
 
 ```python
 data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
 ```
 
+```python
+data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId']).sort_index(inplace=True)
+```
+
 ```python
 data_since_threshold_per_capita = data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])
 data_since_threshold_per_capita['cases_culm_pc'] = data_since_threshold_per_capita.cases_culm / data_since_threshold_per_capita.popData2018
@@ -181,13 +205,17 @@ data_since_threshold_per_capita
 ```
 
 ```python
-deaths_pc = data_since_threshold_per_capita.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)
+deaths_pc = data_since_threshold_per_capita.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm_pc']].unstack().sort_index().xs('deaths_culm_pc', axis=1, drop_level=True)
 ```
 
 ```python
 deaths_pc
 ```
 
+```python
+deaths_pc.index
+```
+
 ```python
 deaths_pc = data_since_threshold_per_capita.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)
 ```
@@ -233,6 +261,17 @@ for c in COUNTRIES_CORE:
 plt.savefig('covid_deaths_total_log.png')
 ```
 
+```python
+ylim = (5*10**3, 5*10**4)
+ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), ylim=ylim, title="Total deaths, log")
+for c in COUNTRIES_CORE:
+    lvi = deaths[c].last_valid_index()
+    if ylim[0] < deaths[c][lvi] < ylim[1]:
+        ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)
+
+# plt.savefig('covid_deaths_total_log.png')
+```
+
 ```python
 ax = deaths[COUNTRIES_FRIENDS].plot(logy=True, figsize=(10, 6), title="Total deaths, log")
 for c in COUNTRIES_FRIENDS:
@@ -296,17 +335,19 @@ data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
 ```python
 data_since_threshold['deaths_m4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).mean())
 data_since_threshold['deaths_m7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).mean())
+# data_since_threshold['deaths_diff_m4'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(4, 1).mean())
+# data_since_threshold['deaths_diff_m7'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(7, 1).mean())
 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
 ```
 
 ```python
 deaths_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m4']]
-             .unstack().xs('deaths_m4', axis=1, drop_level=True))
+             .unstack().sort_index().xs('deaths_m4', axis=1, drop_level=True))
 ```
 
 ```python
 deaths_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m7']]
-             .unstack().xs('deaths_m7', axis=1, drop_level=True))
+             .unstack().sort_index().xs('deaths_m7', axis=1, drop_level=True))
 ```
 
 ```python
@@ -342,12 +383,20 @@ for c in C7s:
 # plt.savefig('covid_deaths_per_day-friends.png') 
 ```
 
+```python
+ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
+for c in COUNTRIES_CORE:
+    lvi = deaths_m7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
+plt.savefig('covid_deaths_per_day_7.png') 
+```
+
 ```python
 ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title="Deaths per day, 7 day moving average")
 for c in COUNTRIES_FRIENDS:
     lvi = deaths_m7[c].last_valid_index()
     ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)
-# plt.savefig('covid_deaths_per_day-friends.png') 
+plt.savefig('covid_deaths_per_day_friends_7.png') 
 ```
 
 ```python
@@ -358,6 +407,9 @@ def gmean_scale(items):
 ```python
 def doubling_time(df):
     return np.log(2) / np.log((df.deaths_culm + df.deaths_g4) / df.deaths_culm)
+
+def doubling_time_7(df):
+    return np.log(2) / np.log((df.deaths_culm + df.deaths_g7) / df.deaths_culm)
 ```
 
 ```python
@@ -367,22 +419,30 @@ def doubling_time(df):
 
 ```python
 data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True))
+data_since_threshold['deaths_g7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).apply(gmean, raw=True))
 data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]
 ```
 
 ```python
 data_since_threshold['doubling_time'] = data_since_threshold.groupby(level=1).apply(doubling_time).reset_index(level=0, drop=True)
+data_since_threshold['doubling_time_7'] = data_since_threshold.groupby(level=1).apply(doubling_time_7).reset_index(level=0, drop=True)
 # data_since_threshold.loc[(slice(None), 'UK'), :]
 ```
 
 ```python
 doubling_times = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time']]
-             .unstack().xs('doubling_time', axis=1, drop_level=True))
+             .unstack().sort_index().xs('doubling_time', axis=1, drop_level=True))
 doubling_times.replace([np.inf, -np.inf], np.nan, inplace=True)
 ```
 
 ```python
-ax = doubling_times.plot(figsize=(10, 6), title="Doubling times")
+doubling_times_7 = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time_7']]
+             .unstack().sort_index().xs('doubling_time_7', axis=1, drop_level=True))
+doubling_times_7.replace([np.inf, -np.inf], np.nan, inplace=True)
+```
+
+```python
+ax = doubling_times.plot(figsize=(10, 6), title="Doubling times, 4 day average")
 for c in doubling_times.columns:
     lvi = doubling_times[c].last_valid_index()
     ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
@@ -390,7 +450,15 @@ for c in doubling_times.columns:
 ```
 
 ```python
-ax = doubling_times[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times")
+ax = doubling_times_7[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 7 day average")
+for c in COUNTRIES_CORE:
+    lvi = doubling_times_7[c].last_valid_index()
+    ax.text(x = lvi + 1, y = doubling_times_7[c][lvi], s = c)
+plt.savefig('covid_doubling_times_7.png') 
+```
+
+```python
+ax = doubling_times[COUNTRIES_CORE].plot(figsize=(10, 6), title="Doubling times, 4 day average")
 for c in COUNTRIES_CORE:
     lvi = doubling_times[c].last_valid_index()
     ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
@@ -405,6 +473,28 @@ for c in COUNTRIES_FRIENDS:
 plt.savefig('covid_doubling_times_friends.png')
 ```
 
+```python
+ax = doubling_times[C7s].plot(figsize=(10, 6), title="Doubling times")
+for c in C7s:
+    lvi = doubling_times[c].last_valid_index()
+    ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)
+# plt.savefig('covid_doubling_times_friends.png')
+```
+
+```python
+# deaths_diff_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m4']]
+#              .unstack().sort_index().xs('deaths_diff_m4', axis=1, drop_level=True))
+```
+
+```python
+# deaths_diff_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m7']]
+#              .unstack().sort_index().xs('deaths_diff_m7', axis=1, drop_level=True))
+```
+
+```python
+# deaths_diff_m7
+```
+
 ```python
 data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[COUNTRIES_ALL]#, [doubling_time]]
 ```