New gitignore
[covid19.git] / test_and_case_data-be.md
diff --git a/test_and_case_data-be.md b/test_and_case_data-be.md
new file mode 100644 (file)
index 0000000..4ed0c55
--- /dev/null
@@ -0,0 +1,216 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.3'
+      jupytext_version: 1.10.2
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+---
+
+```python Collapsed="false"
+import itertools
+import collections
+import json
+import pandas as pd
+import numpy as np
+from scipy.stats import gmean
+import datetime
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import matplotlib.animation as ani
+%matplotlib inline
+```
+
+<!-- #region Collapsed="false" -->
+Belgian data from https://epistat.wiv-isp.be/covid/
+<!-- #endregion -->
+
+```python Collapsed="false"
+!curl "https://epistat.sciensano.be/Data/COVID19BE_tests.csv" > COVID19BE_tests.csv
+```
+
+```python Collapsed="false"
+raw_data = pd.read_csv('COVID19BE_tests.csv', 
+                       parse_dates=[0], dayfirst=True,
+                       keep_default_na=False, na_values = ['']
+                      )
+```
+
+```python Collapsed="false"
+raw_data.dtypes
+```
+
+```python Collapsed="false"
+tests_data = raw_data.set_index('DATE').sort_index().groupby('DATE').sum()[:-1]
+tests_data
+```
+
+```python Collapsed="false"
+tests_data.plot()
+```
+
+```python Collapsed="false"
+# data_by_day.newAdmissions.dropna()
+```
+
+```python Collapsed="false"
+data_by_day = pd.read_csv('data_by_day_be.csv', index_col='dateRep', parse_dates=True)
+data_by_day
+```
+
+```python Collapsed="false"
+data_by_day.loc['2020-07-03']
+```
+
+```python Collapsed="false"
+data_by_day = data_by_day.merge(tests_data[['TESTS_ALL']], how='outer',
+    left_index=True, right_index=True).dropna()
+```
+
+```python Collapsed="false"
+data_by_day
+```
+
+```python Collapsed="false"
+data_by_day['deaths_m7'] = data_by_day.deaths.transform(lambda x: x.rolling(7, 1).mean())
+data_by_day['cases_m7'] = data_by_day.cases.transform(lambda x: x.rolling(7, 1).mean())
+data_by_day['tests_m7'] = data_by_day.TESTS_ALL.transform(lambda x: x.rolling(7, 1).mean())
+```
+
+```python Collapsed="false"
+data_by_day = data_by_day.dropna()
+data_by_day
+```
+
+```python Collapsed="false"
+data_by_day.loc['2020-06-22']
+```
+
+```python Collapsed="false"
+data_by_day.loc['2020-04-15':'2020-08-26', ['cases_m7', 'tests_m7']].plot()
+```
+
+```python Collapsed="false"
+data_by_day['fraction_positive'] = data_by_day.cases / data_by_day.TESTS_ALL
+data_by_day['fraction_positive_m7'] = data_by_day.cases_m7 / data_by_day.tests_m7
+```
+
+```python Collapsed="false"
+data_by_day[['fraction_positive', 'fraction_positive_m7']].dropna().plot()
+```
+
+```python Collapsed="false"
+# ax = data_by_day.dropna().loc['2020-06-15': , ['fraction_positive', 'fraction_positive_m7']].plot(figsize=(10, 8), title='Fraction of tests with positive results')
+# ax.legend(['Fraction positive per day', 'Fraction positive, 7 day moving average'])
+# ax.set_ylabel('Fraction positive')
+# plt.savefig('fraction_positive_tests.png')
+```
+
+```python Collapsed="false"
+pri_y_max = int((data_by_day.dropna().loc['2020-06-15': , 'tests_m7'].max() * 1.1) / 100 ) * 100
+ax = data_by_day.dropna().loc['2020-06-15': , 'tests_m7'].plot(figsize=(10, 8), 
+                                                               style=['k-'], 
+                                                               legend=False,
+                                                               ylim=(0, pri_y_max))
+ax.set_title('Tests done and new cases (7 day moving average)')
+ax.legend(['Tests, 7 day moving average'], loc='lower left')
+ax.set_ylabel('Tests')
+sec_y_max = int((data_by_day.dropna().loc['2020-06-15':, 'cases_m7'].max() * 1.1) / 100) * 100
+ax = data_by_day.dropna().loc['2020-06-15':, 'cases_m7'].plot(ax=ax, secondary_y=True, style='r--')
+ax.set_ylim((0, sec_y_max))
+ax.legend(['Cases (7 day moving average)'], loc='lower right')
+ax.set_ylabel('New cases')
+plt.savefig('tests_and_cases_be.png')
+```
+
+```python Collapsed="false"
+int((sec_y_max * 1.1) / 100) * 100
+```
+
+```python Collapsed="false"
+ax = (data_by_day.dropna().loc['2020-06-15': , ['fraction_positive', 'fraction_positive_m7']] * 100).plot(figsize=(10, 8), 
+                                                                                                  style=['b:', 'k-'], legend=False)
+ax.set_title('Fraction of tests with positive results')
+ax.legend(['Fraction positive (%)', 'Fraction positive (%), 7 day moving average'], loc='lower left')
+ax.set_ylabel('Fraction positive')
+ax = data_by_day.dropna().loc['2020-06-15':, 'cases_m7'].plot(ax=ax, secondary_y=True, style='r--')
+ax.legend(['Cases (7 day moving average)'], loc='lower right')
+ax.set_ylabel('New cases')
+plt.savefig('fraction_positive_tests_be.png')
+```
+
+```python Collapsed="false"
+ax = data_by_day.dropna().loc['2020-06-15':].plot(x='fraction_positive_m7', y='tests_m7', 
+                                                  figsize=(8, 8),
+                                                  legend=None)
+ax.set_xlabel("Fraction of tests that are positive")
+ax.set_ylabel("Number of tests")
+for d in data_by_day.dropna().loc['2020-06-15'::15].index:
+    ax.plot(data_by_day.loc[d, 'fraction_positive_m7'], data_by_day.loc[d, 'tests_m7'], 'o', 
+                        markersize=8)#, markerfacecolor=marker_col, markeredgecolor=marker_col)
+    ax.text(data_by_day.loc[d, 'fraction_positive_m7'] + 0.0002, data_by_day.loc[d, 'tests_m7'], 
+            s = d.strftime("%d %B %Y"))
+plt.savefig('fraction_positive_tests_vs_tests_be.png')
+```
+
+```python Collapsed="false"
+data_by_day.dropna().loc['2020-06-15':][:2]
+```
+
+```python Collapsed="false"
+fig = plt.figure(figsize=(8, 8))
+plt.ylabel('Number of tests')
+plt.xlabel('Fraction of tests that are positive')
+all_data = data_by_day.dropna().loc['2020-06-15':]
+
+
+minx = all_data.fraction_positive_m7.min() * 0.9
+maxx = all_data.fraction_positive_m7.max() * 1.1
+miny = all_data.tests_m7.min() * 0.9
+maxy = all_data.tests_m7.max() * 1.1
+
+plt.xlim(minx, maxx)
+plt.ylim(miny, maxy)
+# plt.legend(None)
+
+def build_state_frame(i):
+    this_data = all_data[:i]
+    plt.clf()
+    plt.ylabel('Number of tests')
+    plt.xlabel('Fraction of tests that are positive')
+    plt.xlim(minx, maxx)
+    plt.ylim(miny, maxy)
+    p = plt.plot(this_data.fraction_positive_m7, this_data.tests_m7)
+    p[0].set_color('r')
+    for d in this_data[::15].index:
+        plt.plot(this_data.loc[d, 'fraction_positive_m7'], 
+                this_data.loc[d, 'tests_m7'], 'o', 
+                markersize=8, markerfacecolor='r', markeredgecolor='r')
+        plt.text(this_data.loc[d, 'fraction_positive_m7'] + 0.0002, 
+                this_data.loc[d, 'tests_m7'], 
+            s = d.strftime("%d %B %Y"))
+
+animator = ani.FuncAnimation(fig, build_state_frame, 
+                             frames=all_data.shape[0]+1,
+                             interval=100,
+                             repeat_delay=200,
+                             repeat=True)
+animator.save('tests_vs_fraction_positive_be.mp4')
+plt.show()
+```
+
+```python Collapsed="false"
+!rm tests_vs_fraction_positive_animation_be.png
+!ffmpeg -i tests_vs_fraction_positive_be.mp4 -plays 0 -final_delay 1 -f apng tests_vs_fraction_positive_animation_be.png
+```
+
+```python Collapsed="false"
+
+```