Updated for imported data format
[covid19.git] / international_comparison.py
1 #!/usr/bin/env python
2 # coding: utf-8
3 # %%
4 # Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
5
6 # %%
7 import itertools
8 import collections
9 import json
10 import pandas as pd
11 import numpy as np
12 from scipy.stats import gmean
13 import datetime
14
15 from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
16 from sqlalchemy import create_engine
17
18 import matplotlib as mpl
19 import matplotlib.pyplot as plt
20 plt.ioff()
21 # # %matplotlib inline
22
23
24 # %%
25 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
26
27
28 # %%
29 engine = create_engine(connection_string)
30
31
32 # %%
33 DEATH_COUNT_THRESHOLD = 10
34 COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
35 COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
36 COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
37 # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
38
39 COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
40 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
41 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
42 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
43 COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
44 COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
45
46
47 # %%
48 query_string = f'''select report_date, geo_id, deaths_weekly, culm_deaths
49 from weekly_cases
50 where geo_id in {COUNTRIES_CORE}
51 order by report_date, geo_id'''
52
53 country_data = pd.read_sql_query(query_string,
54 engine,
55 index_col = 'report_date',
56 parse_dates = ['report_date']
57 )
58
59
60 # %%
61 deaths_culm = country_data.pivot(columns='geo_id', values='culm_deaths')
62
63
64 # %%
65 ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
66 ax.set_xlabel(f"Date")
67 for c in COUNTRIES_CORE:
68 lvi = deaths_culm[c].last_valid_index()
69 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}")
70 plt.savefig('covid_deaths_total_linear.png')
71
72
73 # %%
74 deaths_weekly = country_data.pivot(columns='geo_id', values='deaths_weekly')
75
76
77 # %%
78 ax = deaths_weekly.loc['2020-03-01':, COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per week")
79 ax.set_xlabel('Date')
80 for c in COUNTRIES_CORE:
81 lvi = deaths_weekly[c].last_valid_index()
82 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c)
83 plt.savefig('covid_deaths_per_week.png')
84
85
86 # %%
87 ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
88 ax.set_xlabel("Date")
89
90 text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
91
92 for c in COUNTRIES_CORE:
93 lvi = deaths_weekly[c].last_valid_index()
94 # if c != 'ES':
95 ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}")
96 plt.savefig('deaths_by_date_last_6_weeks.png')
97
98
99 # %%
100
101
102
103