Removed data files
[covid19.git] / international_comparison.md
1 ---
2 jupyter:
3 jupytext:
4 formats: ipynb,md
5 text_representation:
6 extension: .md
7 format_name: markdown
8 format_version: '1.2'
9 jupytext_version: 1.9.1
10 kernelspec:
11 display_name: Python 3
12 language: python
13 name: python3
14 ---
15
16 <!-- #region Collapsed="false" -->
17 Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
18 <!-- #endregion -->
19
20 ```python Collapsed="false"
21 import itertools
22 import collections
23 import json
24 import pandas as pd
25 import numpy as np
26 from scipy.stats import gmean
27 import datetime
28
29 from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
30 from sqlalchemy import create_engine
31
32 import matplotlib as mpl
33 import matplotlib.pyplot as plt
34 %matplotlib inline
35 %load_ext sql
36 ```
37
38 ```python Collapsed="false"
39 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
40 ```
41
42 ```python Collapsed="false"
43 %sql $connection_string
44 ```
45
46 ```python
47 engine = create_engine(connection_string)
48 ```
49
50 ```python Collapsed="false"
51 DEATH_COUNT_THRESHOLD = 10
52 COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
53 COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
54 COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
55 # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
56
57 COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
58 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
59 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
60 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
61 COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
62 COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
63 ```
64
65 ```python Collapsed="false"
66 # res = %sql select report_date, geo_id, deaths_weekly, culm_deaths from weekly_cases where geo_id in $COUNTRIES_CORE order by report_date, geo_id
67 ```
68
69 ```python Collapsed="false"
70 # country_data = res.DataFrame()
71 # country_data['report_date'] = country_data.report_date.astype('datetime64[ns]')
72 # country_data.set_index('report_date', inplace=True)
73 # country_data.tail(10)
74 ```
75
76 ```python
77 query_string = f'''select report_date, geo_id, deaths_weekly, culm_deaths
78 from weekly_cases
79 where geo_id in {COUNTRIES_CORE}
80 order by report_date, geo_id'''
81
82 country_data = pd.read_sql_query(query_string,
83 engine,
84 index_col = 'report_date',
85 parse_dates = ['report_date']
86 )
87 ```
88
89 ```python Collapsed="false"
90 deaths_culm = country_data.pivot(columns='geo_id', values='culm_deaths')
91 deaths_culm.tail()
92 ```
93
94 ```python Collapsed="false"
95 deaths_culm.plot()
96 ```
97
98 ```python Collapsed="false"
99 ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
100 ax.set_xlabel(f"Date")
101 for c in COUNTRIES_CORE:
102 lvi = deaths_culm[c].last_valid_index()
103 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}")
104 plt.savefig('covid_deaths_total_linear.png')
105 ```
106
107 ```python Collapsed="false"
108 deaths_weekly = country_data.pivot(columns='geo_id', values='deaths_weekly')
109 deaths_weekly.tail()
110 ```
111
112 ```python Collapsed="false"
113 deaths_weekly.plot()
114 ```
115
116 ```python Collapsed="false"
117 ax = deaths_weekly.loc['2020-03-01':, COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per week")
118 ax.set_xlabel('Date')
119 for c in COUNTRIES_CORE:
120 lvi = deaths_weekly[c].last_valid_index()
121 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c)
122 plt.savefig('covid_deaths_per_week.png')
123 ```
124
125 ```python Collapsed="false"
126 ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
127 ax.set_xlabel("Date")
128
129 text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
130
131 for c in COUNTRIES_CORE:
132 lvi = deaths_weekly[c].last_valid_index()
133 # if c != 'ES':
134 ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}")
135 plt.savefig('deaths_by_date_last_6_weeks.png')
136 ```
137
138 ```python
139 query_string = f'''select report_date, geo_id,
140 (cast(deaths_weekly as float) / population_2019) deaths_weekly_pc,
141 (cast(culm_deaths as float) / population_2019) as culm_deaths_pc
142 from weekly_cases join countries using (geo_id)
143 where geo_id in {COUNTRIES_CORE}
144 order by report_date, geo_id'''
145
146 deaths_pc_data = pd.read_sql_query(query_string,
147 engine,
148 index_col = 'report_date',
149 parse_dates = ['report_date']
150 )
151 deaths_pc_data.tail()
152 ```
153
154 ```python Collapsed="false"
155 deaths_pc_culm = deaths_pc_data.pivot(columns='geo_id', values='culm_deaths_pc')
156 deaths_pc_culm.tail()
157 ```
158
159 ```python
160 ax = deaths_pc_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
161 ax.set_xlabel(f"Date")
162 for c in COUNTRIES_CORE:
163 lvi = deaths_pc_culm[c].last_valid_index()
164 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc_culm[c][lvi], s = f"{c}: {deaths_pc_culm[c][lvi]*10**6:.0f}")
165 # plt.savefig('covid_deaths_total_linear.png')
166 ```
167
168 ```python Collapsed="false"
169 deaths_pc = deaths_pc_data.pivot(columns='geo_id', values='deaths_weekly_pc')
170 deaths_pc.tail()
171 ```
172
173 ```python
174 ax = deaths_pc.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
175 ax.set_xlabel(f"Date")
176 for c in COUNTRIES_CORE:
177 lvi = deaths_pc[c].last_valid_index()
178 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
179 # plt.savefig('covid_deaths_total_linear.png')
180 ```
181
182 ```python
183 ax = deaths_pc.iloc[-6:].plot(figsize=(10, 6), title="Deaths per million, linear")
184 ax.set_xlabel(f"Date")
185 for c in COUNTRIES_CORE:
186 lvi = deaths_pc[c].last_valid_index()
187 ax.text(x = lvi, y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
188 # plt.savefig('covid_deaths_total_linear.png')
189 ```
190
191 ```python
192
193 ```