4 # Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
6 from sqlalchemy
.types
import Integer
, Text
, String
, DateTime
, Date
, Float
7 from sqlalchemy
import create_engine
14 # from scipy.stats import gmean
20 connection_string
= 'postgresql://covid:3NbjJTkT63@localhost/covid'
22 eng
= create_engine(connection_string
)
23 engine
= eng
.execution_options(isolation_level
="AUTOCOMMIT")
27 DEATH_COUNT_THRESHOLD
= 10
28 COUNTRIES_CORE
= 'IT DE UK ES IE FR BE'.split()
29 COUNTRIES_NORDIC
= 'SE NO DK FI UK'.split()
30 COUNTRIES_FRIENDS
= 'IT UK ES BE SI MX'.split()
31 # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
33 COUNTRIES_AMERICAS
= ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
34 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
35 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
36 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
37 COUNTRIES_OF_INTEREST
= list(set(COUNTRIES_CORE
+ COUNTRIES_FRIENDS
))
38 COUNTRIES_ALL
= list(set(COUNTRIES_CORE
+ COUNTRIES_FRIENDS
+ COUNTRIES_NORDIC
+ COUNTRIES_AMERICAS
))
42 os
.system('curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv')
46 # First col is a date, treat geoId of NA as 'Namibia', not "NA" value
47 raw_data
= pd
.read_csv('covid.csv',
48 parse_dates
=[0], dayfirst
=True,
49 keep_default_na
=False, na_values
= [''],
50 # dtype = {'day': np.int64,
55 # 'countriesAndTerritories': str,
57 # 'countryterritoryCode': str,
58 # 'popData2019': np.int64,
59 # 'continentExp': str,
63 raw_data
.fillna(0, inplace
=True)
67 raw_data
.rename(columns
={'dateRep':'report_date', 'geoId': 'geo_id',
68 'countriesAndTerritories': 'country_name',
69 'countryterritoryCode': 'country_territory_code',
70 'popData2019': 'population_2019',
71 'continentExp': 'continent'}, inplace
=True)
75 raw_data
[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(
83 "cases_weekly": Integer
,
84 "deaths_weekly": Integer
,
86 "notification_rate_per_100000_population_14-days": Float
92 raw_data
[['country_name', 'geo_id', 'country_territory_code',
93 'population_2019', 'continent']].drop_duplicates().to_sql(
100 "country_name": Text
,
102 "country_territory_code": String
,
103 "population_2019": Integer
,
110 with engine
.connect() as connection
:
111 connection
.execute('alter table weekly_cases add primary key (geo_id, report_date)')
112 connection
.execute('alter table countries add primary key (geo_id);')
113 connection
.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')
114 connection
.execute('alter table weekly_cases add culm_cases integer;')
115 connection
.execute('alter table weekly_cases add culm_deaths integer;')
119 query_string
= '''with culm as
120 (select report_date, geo_id,
121 sum(cases_weekly) over (partition by geo_id
122 order by report_date) as culm_data
126 set culm_cases = culm_data
128 where weekly_cases.report_date = culm.report_date and
129 weekly_cases.geo_id = culm.geo_id'''
130 with engine
.connect() as connection
:
131 connection
.execute(query_string
)
135 query_string
= '''with culm as
136 (select report_date, geo_id,
137 sum(deaths_weekly) over (partition by geo_id
138 order by report_date) as culm_data
142 set culm_deaths = culm_data
144 where weekly_cases.report_date = culm.report_date and
145 weekly_cases.geo_id = culm.geo_id'''
146 with engine
.connect() as connection
:
147 connection
.execute(query_string
)
152 "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
153 "metric=covidOccupiedMVBeds&"
154 "metric=newAdmissions&"
155 "metric=newCasesBySpecimenDate&"
156 "metric=hospitalCases&"
157 "metric=newDeaths28DaysByPublishDate&"
161 os
.system(f
'curl "{uk_query_string}" > uk_data.csv')
165 test_query_string
= (
166 "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
167 "metric=newPCRTestsByPublishDate&"
168 "metric=newTestsByPublishDate&"
169 "metric=newPillarOneTwoTestsByPublishDate&"
172 os
.system(f
'curl "{test_query_string}" > test_data.csv')
176 uk_data
= pd
.read_csv('uk_data.csv',
177 parse_dates
=[0], dayfirst
=True)
181 test_data
= pd
.read_csv('test_data.csv',
182 parse_dates
=[0], dayfirst
=True)
186 uk_data
= uk_data
.merge(test_data
[['date', 'newPCRTestsByPublishDate',
187 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how
='outer', on
='date')
193 'covidOccupiedMVBeds': 'ventilator_beds',
194 'newCasesBySpecimenDate': 'new_cases',
195 'hospitalCases': 'hospital_cases',
196 'newDeaths28DaysByPublishDate': 'new_deaths',
197 'newAdmissions': 'new_admissions',
198 'newPCRTestsByPublishDate': 'new_pcr_tests',
199 'newTestsByPublishDate': 'new_tests',
200 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'
206 'hospital_cases', 'ventilator_beds',
207 'new_cases', 'new_deaths',
208 'hospital_cases', 'new_admissions',
209 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'
218 "hospital_cases": Integer
,
219 "ventilator_beds": Integer
,
220 "new_cases": Integer
,
221 "hospital_cases": Integer
,
222 "new_deaths": Integer
,
223 "new_admissions": Integer
,
224 'new_pcr_tests': Integer
,
225 'new_tests': Integer
,
226 'new_pillar_1_2_tests': Integer
232 query_string
= '''drop table if exists uk_data_7;
233 create table uk_data_7
234 (date date primary key,
236 ventilator_beds real,
242 new_pillar_1_2_tests real
245 with engine
.connect() as connection
:
246 connection
.execute(query_string
)
250 query_string
= '''with ownd as (
252 avg(hospital_cases) over wnd as w_hospital_cases,
253 avg(ventilator_beds) over wnd as w_ventilator_beds,
254 avg(new_cases) over wnd as w_new_cases,
255 avg(new_deaths) over wnd as w_new_deaths,
256 avg(new_admissions) over wnd as w_new_admissions,
257 avg(new_pcr_tests) over wnd as w_new_pcr_tests,
258 avg(new_tests) over wnd as w_new_tests,
259 avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
260 count(*) over wnd as w_size
263 order by uk_data.date
264 rows between 3 preceding and 3 following
267 insert into uk_data_7(date,
285 w_new_pillar_1_2_tests
289 with engine
.connect() as connection
:
290 connection
.execute(query_string
)