Now using py files, for automation
[covid19.git] / international_comparison.py
1 #!/usr/bin/env python
2 # coding: utf-8
3 # %%
4 # Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
5
6 # %%
7 import itertools
8 import collections
9 import json
10 import pandas as pd
11 import numpy as np
12 from scipy.stats import gmean
13 import datetime
14
15 from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
16 from sqlalchemy import create_engine
17
18 import matplotlib as mpl
19 import matplotlib.pyplot as plt
20 plt.ioff()
21
22
23 # %%
24 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
25
26
27 # %%
28 engine = create_engine(connection_string)
29
30
31 # %%
32 DEATH_COUNT_THRESHOLD = 10
33 COUNTRIES_CORE = tuple(sorted('IT DE UK ES IE FR BE'.split()))
34 COUNTRIES_NORDIC = tuple('SE NO DK FI UK'.split())
35 COUNTRIES_FRIENDS = tuple('IT UK ES BE SI MX'.split())
36 # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
37
38 COUNTRIES_AMERICAS = ('AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
39 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
40 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
41 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE')
42 COUNTRIES_OF_INTEREST = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
43 COUNTRIES_ALL = tuple(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
44
45
46 # %%
47 query_string = f'''select report_date, geo_id, deaths_weekly, culm_deaths
48 from weekly_cases
49 where geo_id in {COUNTRIES_CORE}
50 order by report_date, geo_id'''
51
52 country_data = pd.read_sql_query(query_string,
53 engine,
54 index_col = 'report_date',
55 parse_dates = ['report_date']
56 )
57
58
59 # %%
60 deaths_culm = country_data.pivot(columns='geo_id', values='culm_deaths')
61
62
63 # %%
64 ax = deaths_culm.loc['2020-03-15':].plot(figsize=(10, 6), title="Total deaths, linear")
65 ax.set_xlabel(f"Date")
66 for c in COUNTRIES_CORE:
67 lvi = deaths_culm[c].last_valid_index()
68 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_culm[c][lvi], s = f"{c}: {deaths_culm[c][lvi]:.0f}")
69 plt.savefig('covid_deaths_total_linear.png')
70
71
72 # %%
73 deaths_weekly = country_data.pivot(columns='geo_id', values='deaths_weekly')
74
75
76 # %%
77 ax = deaths_weekly.loc['2020-03-01':, COUNTRIES_CORE].plot(figsize=(10, 6), title="Deaths per week")
78 ax.set_xlabel('Date')
79 for c in COUNTRIES_CORE:
80 lvi = deaths_weekly[c].last_valid_index()
81 ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_weekly[c][lvi], s = c)
82 plt.savefig('covid_deaths_per_week.png')
83
84
85 # %%
86 ax = deaths_weekly.iloc[-6:].plot(figsize=(10, 6), title="Deaths per week")#, ylim=(-10, 100))
87 ax.set_xlabel("Date")
88
89 text_x_pos = deaths_weekly.last_valid_index() + pd.Timedelta(days=0.5)
90
91 for c in COUNTRIES_CORE:
92 lvi = deaths_weekly[c].last_valid_index()
93 # if c != 'ES':
94 ax.text(x = text_x_pos, y = deaths_weekly[c][lvi], s = f"{c}: {deaths_weekly[c][lvi]:.0f}")
95 plt.savefig('deaths_by_date_last_6_weeks.png')
96
97
98 # %%
99
100
101
102