4 # Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
6 from sqlalchemy
.types
import Integer
, Text
, String
, DateTime
, Date
, Float
7 from sqlalchemy
import create_engine
14 # from scipy.stats import gmean
20 connection_string
= 'postgresql://covid:3NbjJTkT63@localhost/covid'
22 eng
= create_engine(connection_string
)
23 engine
= eng
.execution_options(isolation_level
="AUTOCOMMIT")
27 DEATH_COUNT_THRESHOLD
= 10
28 COUNTRIES_CORE
= 'IT DE UK ES IE FR BE'.split()
29 COUNTRIES_NORDIC
= 'SE NO DK FI UK'.split()
30 COUNTRIES_FRIENDS
= 'IT UK ES BE SI MX'.split()
31 # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
33 COUNTRIES_AMERICAS
= ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
34 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
35 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
36 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
37 COUNTRIES_OF_INTEREST
= list(set(COUNTRIES_CORE
+ COUNTRIES_FRIENDS
))
38 COUNTRIES_ALL
= list(set(COUNTRIES_CORE
+ COUNTRIES_FRIENDS
+ COUNTRIES_NORDIC
+ COUNTRIES_AMERICAS
))
42 os
.system('curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv')
46 # First col is a date, treat geoId of NA as 'Namibia', not "NA" value
47 raw_data
= pd
.read_csv('covid.csv',
48 parse_dates
=[0], dayfirst
=True,
49 keep_default_na
=False, na_values
= [''],
50 # dtype = {'day': np.int64,
55 # 'countriesAndTerritories': str,
57 # 'countryterritoryCode': str,
58 # 'popData2019': np.int64,
59 # 'continentExp': str,
63 raw_data
.fillna(0, inplace
=True)
70 # raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
71 # 'countriesAndTerritories': 'country_name',
72 # 'countryterritoryCode': 'country_territory_code',
73 # 'popData2019': 'population_2019',
74 # 'continentExp': 'continent'}, inplace=True)
75 raw_data
.rename(columns
={'dateRep':'report_date', 'geoId': 'geo_id',
76 'countriesAndTerritories': 'country_name',
77 'countryterritoryCode': 'country_territory_code',
78 'cases': 'cases_weekly',
79 'deaths': 'deaths_weekly',
80 'popData2019': 'population_2019',
81 'continentExp': 'continent',
82 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days'
88 raw_data
[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(
96 "cases_weekly": Integer
,
97 "deaths_weekly": Integer
,
99 "notification_rate_per_100000_population_14-days": Float
105 raw_data
[['country_name', 'geo_id', 'country_territory_code',
106 'population_2019', 'continent']].drop_duplicates().to_sql(
113 "country_name": Text
,
115 "country_territory_code": String
,
116 "population_2019": Integer
,
123 with engine
.connect() as connection
:
124 connection
.execute('alter table weekly_cases add primary key (geo_id, report_date)')
125 connection
.execute('alter table countries add primary key (geo_id);')
126 connection
.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')
127 connection
.execute('alter table weekly_cases add culm_cases integer;')
128 connection
.execute('alter table weekly_cases add culm_deaths integer;')
132 query_string
= '''with culm as
133 (select report_date, geo_id,
134 sum(cases_weekly) over (partition by geo_id
135 order by report_date) as culm_data
139 set culm_cases = culm_data
141 where weekly_cases.report_date = culm.report_date and
142 weekly_cases.geo_id = culm.geo_id'''
143 with engine
.connect() as connection
:
144 connection
.execute(query_string
)
148 query_string
= '''with culm as
149 (select report_date, geo_id,
150 sum(deaths_weekly) over (partition by geo_id
151 order by report_date) as culm_data
155 set culm_deaths = culm_data
157 where weekly_cases.report_date = culm.report_date and
158 weekly_cases.geo_id = culm.geo_id'''
159 with engine
.connect() as connection
:
160 connection
.execute(query_string
)
165 "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
166 "metric=covidOccupiedMVBeds&"
167 "metric=newAdmissions&"
168 "metric=newCasesBySpecimenDate&"
169 "metric=hospitalCases&"
170 "metric=newDeaths28DaysByPublishDate&"
174 os
.system(f
'curl "{uk_query_string}" > uk_data.csv')
178 test_query_string
= (
179 "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
180 "metric=newPCRTestsByPublishDate&"
181 "metric=newTestsByPublishDate&"
182 "metric=newPillarOneTwoTestsByPublishDate&"
185 os
.system(f
'curl "{test_query_string}" > test_data.csv')
189 uk_data
= pd
.read_csv('uk_data.csv',
190 parse_dates
=[0], dayfirst
=True)
194 test_data
= pd
.read_csv('test_data.csv',
195 parse_dates
=[0], dayfirst
=True)
199 uk_data
= uk_data
.merge(test_data
[['date', 'newPCRTestsByPublishDate',
200 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how
='outer', on
='date')
206 'covidOccupiedMVBeds': 'ventilator_beds',
207 'newCasesBySpecimenDate': 'new_cases',
208 'hospitalCases': 'hospital_cases',
209 'newDeaths28DaysByPublishDate': 'new_deaths',
210 'newAdmissions': 'new_admissions',
211 'newPCRTestsByPublishDate': 'new_pcr_tests',
212 'newTestsByPublishDate': 'new_tests',
213 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'
219 'hospital_cases', 'ventilator_beds',
220 'new_cases', 'new_deaths',
221 'hospital_cases', 'new_admissions',
222 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'
231 "hospital_cases": Integer
,
232 "ventilator_beds": Integer
,
233 "new_cases": Integer
,
234 "hospital_cases": Integer
,
235 "new_deaths": Integer
,
236 "new_admissions": Integer
,
237 'new_pcr_tests': Integer
,
238 'new_tests': Integer
,
239 'new_pillar_1_2_tests': Integer
245 query_string
= '''drop table if exists uk_data_7;
246 create table uk_data_7
247 (date date primary key,
249 ventilator_beds real,
255 new_pillar_1_2_tests real
258 with engine
.connect() as connection
:
259 connection
.execute(query_string
)
263 query_string
= '''with ownd as (
265 avg(hospital_cases) over wnd as w_hospital_cases,
266 avg(ventilator_beds) over wnd as w_ventilator_beds,
267 avg(new_cases) over wnd as w_new_cases,
268 avg(new_deaths) over wnd as w_new_deaths,
269 avg(new_admissions) over wnd as w_new_admissions,
270 avg(new_pcr_tests) over wnd as w_new_pcr_tests,
271 avg(new_tests) over wnd as w_new_tests,
272 avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
273 count(*) over wnd as w_size
276 order by uk_data.date
277 rows between 3 preceding and 3 following
280 insert into uk_data_7(date,
298 w_new_pillar_1_2_tests
302 with engine
.connect() as connection
:
303 connection
.execute(query_string
)