9 jupytext_version: 1.9.1
11 display_name: Python 3
16 <!-- #region Collapsed="false" -->
17 Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
20 ```python Collapsed="false"
26 from scipy.stats import gmean
29 from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
30 from sqlalchemy import create_engine
32 import matplotlib as mpl
33 import matplotlib.pyplot as plt
38 ```python Collapsed="false"
39 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
42 ```python Collapsed="false"
43 %sql $connection_string
47 engine = create_engine(connection_string)
50 ```python Collapsed="false"
51 DEATH_COUNT_THRESHOLD = 10
52 COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
53 COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
54 COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
55 # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
57 COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
58 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
59 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
60 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
61 COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
62 COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
65 ```python Collapsed="false"
66 # res = %sql select report_date, geo_id, deaths_weekly, culm_deaths from weekly_cases where geo_id in $COUNTRIES_CORE order by report_date, geo_id
69 ```python Collapsed="false"
70 # country_data = res.DataFrame()
71 # country_data['report_date'] = country_data.report_date.astype('datetime64[ns]')
72 # country_data.set_index('report_date', inplace=True)
73 # country_data.tail(10)
77 %sql select geo_id, country_name, population_2019 from countries where population_2019 > 30000000
81 query_string = f'''select report_date, weekly_cases.geo_id, country_name,
82 10000000.0 * deaths_weekly / population_2019 as deaths_weekly_pc,
83 10000000.0 * culm_deaths / population_2019 as culm_deaths_pc
84 from weekly_cases, countries
85 where weekly_cases.geo_id = countries.geo_id
86 and countries.population_2019 > 20000000
87 order by report_date, geo_id'''
89 country_data = pd.read_sql_query(query_string,
91 index_col = 'report_date',
92 parse_dates = ['report_date']
101 latest_date = country_data.index.max()
106 # highest_deaths_geo_ids = country_data.loc[latest_date, :].nlargest(10, 'culm_deaths_pc')['geo_id'].values
107 highest_deaths_geo_ids = (
108 country_data.loc[latest_date, :]
109 .nlargest(10, 'culm_deaths_pc')
114 highest_deaths_geo_ids
118 high_deaths_country_data = (
120 country_data.country_name.isin(highest_deaths_geo_ids)]
122 high_deaths_country_data.tail()
125 ```python Collapsed="false"
126 deaths_culm = high_deaths_country_data.pivot(columns='country_name',
127 values='culm_deaths_pc')
131 ```python Collapsed="false"
135 ```python Collapsed="false"
136 ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
137 ax.set_xlabel(f"Date")
138 for c in highest_deaths_geo_ids:
139 lvi = deaths_culm[c].last_valid_index()
140 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}")
141 # plt.savefig('covid_deaths_total_linear.png')
145 deaths_culm.loc[latest_date].sort_values()
148 ```python Collapsed="false"
149 deaths_weekly = high_deaths_country_data.pivot(columns='country_name', values='deaths_weekly_pc')
153 ```python Collapsed="false"
157 ```python Collapsed="false"
158 ax = deaths_weekly.loc['2020-03-01':].plot(figsize=(10, 6), title="Deaths per week")
159 ax.set_xlabel('Date')
160 for c in highest_deaths_geo_ids:
161 lvi = deaths_weekly[c].last_valid_index()
162 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c)
163 # plt.savefig('covid_deaths_per_week.png')
166 ```python Collapsed="false"
167 ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
168 ax.set_xlabel("Date")
170 text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
172 for c in highest_deaths_geo_ids:
173 lvi = deaths_weekly[c].last_valid_index()
175 ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}")
176 plt.savefig('deaths_by_date_last_6_weeks.png')
180 query_string = f'''select report_date, geo_id,
181 (cast(deaths_weekly as float) / population_2019) deaths_weekly_pc,
182 (cast(culm_deaths as float) / population_2019) as culm_deaths_pc
183 from weekly_cases join countries using (geo_id)
184 where geo_id in {COUNTRIES_CORE}
185 order by report_date, geo_id'''
187 deaths_pc_data = pd.read_sql_query(query_string,
189 index_col = 'report_date',
190 parse_dates = ['report_date']
192 deaths_pc_data.tail()
195 ```python Collapsed="false"
196 deaths_pc_culm = deaths_pc_data.pivot(columns='geo_id', values='culm_deaths_pc')
197 deaths_pc_culm.tail()
201 ax = deaths_pc_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
202 ax.set_xlabel(f"Date")
203 for c in COUNTRIES_CORE:
204 lvi = deaths_pc_culm[c].last_valid_index()
205 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc_culm[c][lvi], s = f"{c}: {deaths_pc_culm[c][lvi]:.0f}")
206 # plt.savefig('covid_deaths_total_linear.png')
209 ```python Collapsed="false"
210 deaths_pc = deaths_pc_data.pivot(columns='geo_id', values='deaths_weekly_pc')
215 ax = deaths_pc.loc['2020-03-15':].plot(figsize=(10, 6), title="Deaths per million, linear")
216 ax.set_xlabel(f"Date")
217 for c in COUNTRIES_CORE:
218 lvi = deaths_pc[c].last_valid_index()
219 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]:.0f}")
220 # plt.savefig('covid_deaths_total_linear.png')
224 ax = deaths_pc.iloc[-6:].plot(figsize=(10, 6), title="Deaths per million, linear")
225 ax.set_xlabel(f"Date")
226 for c in COUNTRIES_CORE:
227 lvi = deaths_pc[c].last_valid_index()
228 ax.text(x = lvi, y = deaths_pc[c][lvi], s = f"{c}: {deaths_pc[c][lvi]*10**6:.0f}")
229 # plt.savefig('covid_deaths_total_linear.png')