4 # Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
6 from sqlalchemy
.types
import Integer
, Text
, String
, DateTime
, Date
, Float
7 from sqlalchemy
import create_engine
14 # from scipy.stats import gmean
17 # import matplotlib as mpl
18 # import matplotlib.pyplot as plt
19 # import matplotlib.animation as ani
21 # # %matplotlib inline
25 connection_string
= 'postgresql://covid:3NbjJTkT63@localhost/covid'
27 eng
= create_engine(connection_string
)
28 engine
= eng
.execution_options(isolation_level
="AUTOCOMMIT")
32 # DEATH_COUNT_THRESHOLD = 10
33 # COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
34 # COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
35 # COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
36 # # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
38 # COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
39 # 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
40 # 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
41 # 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
42 # COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
43 # COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
48 # os.system('curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv')
50 ## In outer shell script, as it doesn't seem to work here
51 os
.system('curl https://opendata.ecdc.europa.eu/covid19/nationalcasedeath/csv/ > covid.csv')
55 # First col is a date, treat geoId of NA as 'Namibia', not "NA" value
56 raw_data
= pd
.read_csv('covid.csv',
57 parse_dates
=[7], dayfirst
=True,
58 keep_default_na
=False, na_values
= [''],
59 # dtype = {'day': np.int64,
64 # 'countriesAndTerritories': str,
66 # 'countryterritoryCode': str,
67 # 'popData2019': np.int64,
68 # 'continentExp': str,
72 raw_data
.fillna(0, inplace
=True)
73 raw_data
= raw_data
[raw_data
.country_code
!= 0]
78 raw_data
['date'] = raw_data
.year_week
.apply(lambda yw
: datetime
.datetime
.fromisocalendar(int(yw
[:4]), int(yw
[5:]), 7 ))
88 # raw_data[raw_data.country_code == 'GBR'].sort_values(by='date').tail(20)
91 case_data
= raw_data
[raw_data
.indicator
== 'cases'][['country_code', 'date', 'weekly_count', 'cumulative_count']]
92 case_data
.rename(columns
={'weekly_count': 'cases_weekly', 'cumulative_count': 'culm_cases'}, inplace
=True)
96 deaths_data
= raw_data
[raw_data
.indicator
== 'deaths'][['country_code', 'date', 'weekly_count', 'cumulative_count']]
97 deaths_data
.rename(columns
={'weekly_count': 'deaths_weekly', 'cumulative_count': 'culm_deaths'}, inplace
=True)
101 case_death_data
= case_data
.merge(deaths_data
)
108 country_data
= raw_data
[['country', 'country_code', 'continent', 'population']].drop_duplicates()
114 # raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
115 # 'countriesAndTerritories': 'country_name',
116 # 'countryterritoryCode': 'country_territory_code',
117 # 'popData2019': 'population_2019',
118 # 'continentExp': 'continent'}, inplace=True)
120 # raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
121 # 'countriesAndTerritories': 'country_name',
122 # 'countryterritoryCode': 'country_territory_code',
123 # 'cases': 'cases_weekly',
124 # 'deaths': 'deaths_weekly',
125 # 'popData2019': 'population_2019',
126 # 'continentExp': 'continent',
127 # # 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days'
133 case_death_data
.to_sql(
141 "cases_weekly": Integer
,
142 "cases_culm": Integer
,
143 "deaths_weekly": Integer
,
144 "deaths_culm": Integer
,
145 "country_code": String
158 "country_name": Text
,
159 "country_code": String
,
160 "population": Integer
,
168 "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
169 "metric=covidOccupiedMVBeds&"
170 "metric=newAdmissions&"
171 "metric=newCasesBySpecimenDate&"
172 "metric=hospitalCases&"
173 "metric=newDeaths28DaysByPublishDate&"
177 os
.system(f
'curl "{uk_query_string}" > uk_data.csv')
181 test_query_string
= (
182 "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
183 "metric=newPCRTestsByPublishDate&"
184 "metric=newTestsByPublishDate&"
185 "metric=newPillarOneTwoTestsByPublishDate&"
188 os
.system(f
'curl "{test_query_string}" > test_data.csv')
192 uk_data
= pd
.read_csv('uk_data.csv',
193 parse_dates
=[0], dayfirst
=True)
197 test_data
= pd
.read_csv('test_data.csv',
198 parse_dates
=[0], dayfirst
=True)
202 uk_data
= uk_data
.merge(test_data
[['date', 'newPCRTestsByPublishDate',
203 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how
='outer', on
='date')
209 'covidOccupiedMVBeds': 'ventilator_beds',
210 'newCasesBySpecimenDate': 'new_cases',
211 'hospitalCases': 'hospital_cases',
212 'newDeaths28DaysByPublishDate': 'new_deaths',
213 'newAdmissions': 'new_admissions',
214 'newPCRTestsByPublishDate': 'new_pcr_tests',
215 'newTestsByPublishDate': 'new_tests',
216 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'
222 'hospital_cases', 'ventilator_beds',
223 'new_cases', 'new_deaths',
224 'hospital_cases', 'new_admissions',
225 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'
234 "hospital_cases": Integer
,
235 "ventilator_beds": Integer
,
236 "new_cases": Integer
,
237 "hospital_cases": Integer
,
238 "new_deaths": Integer
,
239 "new_admissions": Integer
,
240 'new_pcr_tests': Integer
,
241 'new_tests': Integer
,
242 'new_pillar_1_2_tests': Integer
248 query_string
= '''drop table if exists uk_data_7;
249 create table uk_data_7
250 (date date primary key,
252 ventilator_beds real,
258 new_pillar_1_2_tests real
261 with engine
.connect() as connection
:
262 connection
.execute(query_string
)
266 query_string
= '''with ownd as (
268 avg(hospital_cases) over wnd as w_hospital_cases,
269 avg(ventilator_beds) over wnd as w_ventilator_beds,
270 avg(new_cases) over wnd as w_new_cases,
271 avg(new_deaths) over wnd as w_new_deaths,
272 avg(new_admissions) over wnd as w_new_admissions,
273 avg(new_pcr_tests) over wnd as w_new_pcr_tests,
274 avg(new_tests) over wnd as w_new_tests,
275 avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
276 count(*) over wnd as w_size
279 order by uk_data.date
280 rows between 3 preceding and 3 following
283 insert into uk_data_7(date,
301 w_new_pillar_1_2_tests
305 with engine
.connect() as connection
:
306 connection
.execute(query_string
)