Updated for imported data format
[covid19.git] / international_comparison.md
diff --git a/international_comparison.md b/international_comparison.md
deleted file mode 100644 (file)
index 801ee4c..0000000
+++ /dev/null
@@ -1,193 +0,0 @@
----
-jupyter:
-  jupytext:
-    formats: ipynb,md
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.2'
-      jupytext_version: 1.9.1
-  kernelspec:
-    display_name: Python 3
-    language: python
-    name: python3
----
-
-<!-- #region Collapsed="false" -->
-Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
-<!-- #endregion -->
-
-```python Collapsed="false"
-import itertools
-import collections
-import json
-import pandas as pd
-import numpy as np
-from scipy.stats import gmean
-import datetime
-
-from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
-from sqlalchemy import create_engine
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-%matplotlib inline
-%load_ext sql
-```
-
-```python Collapsed="false"
-connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
-```
-
-```python Collapsed="false"
-%sql $connection_string
-```
-
-```python
-engine = create_engine(connection_string)
-```
-
-```python Collapsed="false"
-DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
-COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
-COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-
-COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
-       'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
-       'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
-       'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
-COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
-```
-
-```python Collapsed="false"
-# res = %sql select report_date, geo_id, deaths_weekly, culm_deaths from weekly_cases where geo_id in $COUNTRIES_CORE order by report_date, geo_id
-```
-
-```python Collapsed="false"
-# country_data = res.DataFrame()
-# country_data['report_date'] = country_data.report_date.astype('datetime64[ns]')
-# country_data.set_index('report_date', inplace=True)
-# country_data.tail(10)
-```
-
-```python
-query_string = f'''select report_date, geo_id, deaths_weekly, culm_deaths 
-from weekly_cases 
-where geo_id in {COUNTRIES_CORE} 
-order by report_date, geo_id'''
-
-country_data = pd.read_sql_query(query_string,
-                  engine,
-                  index_col = 'report_date',
-                  parse_dates = ['report_date']
-                 )
-```
-
-```python Collapsed="false"
-deaths_culm = country_data.pivot(columns='geo_id', values='culm_deaths')
-deaths_culm.tail()
-```
-
-```python Collapsed="false"
-deaths_culm.plot()
-```
-
-```python Collapsed="false"
-ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
-    lvi = deaths_culm[c].last_valid_index()
-    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}")
-plt.savefig('covid_deaths_total_linear.png')    
-```
-
-```python Collapsed="false"
-deaths_weekly = country_data.pivot(columns='geo_id', values='deaths_weekly')
-deaths_weekly.tail()
-```
-
-```python Collapsed="false"
-deaths_weekly.plot()
-```
-
-```python Collapsed="false"
-ax = deaths_weekly.loc['2020-03-01':, COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per week")
-ax.set_xlabel('Date')
-for c in COUNTRIES_CORE:
-    lvi = deaths_weekly[c].last_valid_index()
-    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c)
-plt.savefig('covid_deaths_per_week.png') 
-```
-
-```python Collapsed="false"
-ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
-ax.set_xlabel("Date")
-
-text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
-
-for c in COUNTRIES_CORE:
-    lvi = deaths_weekly[c].last_valid_index()
-#     if c != 'ES':
-    ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}")
-plt.savefig('deaths_by_date_last_6_weeks.png') 
-```
-
-```python
-query_string = f'''select report_date, geo_id, 
-(cast(deaths_weekly as float) / population_2019) deaths_weekly_pc,
-(cast(culm_deaths as float) / population_2019) as culm_deaths_pc
-from weekly_cases join countries using (geo_id)
-where  geo_id in {COUNTRIES_CORE} 
-order by report_date, geo_id'''
-
-deaths_pc_data = pd.read_sql_query(query_string,
-                  engine,
-                  index_col = 'report_date',
-                  parse_dates = ['report_date']
-                 )
-deaths_pc_data.tail()
-```
-
-```python Collapsed="false"
-deaths_pc_culm = deaths_pc_data.pivot(columns='geo_id', values='culm_deaths_pc')
-deaths_pc_culm.tail()
-```
-
-```python
-ax = deaths_pc_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
-    lvi = deaths_pc_culm[c].last_valid_index()
-    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc_culm[c][lvi], s = f"{c}: {deaths_pc_culm[c][lvi]*10**6:.0f}")
-# plt.savefig('covid_deaths_total_linear.png')    
-```
-
-```python Collapsed="false"
-deaths_pc = deaths_pc_data.pivot(columns='geo_id', values='deaths_weekly_pc')
-deaths_pc.tail()
-```
-
-```python
-ax = deaths_pc.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
-    lvi = deaths_pc[c].last_valid_index()
-    ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
-# plt.savefig('covid_deaths_total_linear.png')    
-```
-
-```python
-ax = deaths_pc.iloc[-6:].plot(figsize=(10, 6), title="Deaths per million, linear")
-ax.set_xlabel(f"Date")
-for c in COUNTRIES_CORE:
-    lvi = deaths_pc[c].last_valid_index()
-    ax.text(x = lvi, y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
-# plt.savefig('covid_deaths_total_linear.png')    
-```
-
-```python
-
-```