9 jupytext_version: 1.9.1
11 display_name: Python 3
16 <!-- #region Collapsed="false" -->
17 Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
20 ```python Collapsed="false"
21 from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float
22 from sqlalchemy import create_engine
25 ```python Collapsed="false"
31 from scipy.stats import gmean
34 import matplotlib as mpl
35 import matplotlib.pyplot as plt
39 ```python Collapsed="false"
43 ```python Collapsed="false"
44 connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
47 ```python Collapsed="false"
48 %sql $connection_string
51 ```python Collapsed="false"
52 eng = create_engine(connection_string)
53 engine = eng.execution_options(isolation_level="AUTOCOMMIT")
56 ```python Collapsed="false"
57 DEATH_COUNT_THRESHOLD = 10
58 COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
59 COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
60 COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
61 # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
63 COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
64 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
65 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
66 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
67 COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
68 COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
71 ```python Collapsed="false"
72 !curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv
75 ```python Collapsed="false"
76 # First col is a date, treat geoId of NA as 'Namibia', not "NA" value
77 raw_data = pd.read_csv('covid.csv',
78 parse_dates=[0], dayfirst=True,
79 keep_default_na=False, na_values = [''],
80 # dtype = {'day': np.int64,
85 # 'countriesAndTerritories': str,
87 # 'countryterritoryCode': str,
88 # 'popData2019': np.int64,
89 # 'continentExp': str,
94 ```python Collapsed="false"
98 ```python Collapsed="false"
99 raw_data.fillna(0, inplace=True)
102 ```python Collapsed="false"
106 ```python Collapsed="false"
107 raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
108 'countriesAndTerritories': 'country_name',
109 'countryterritoryCode': 'country_territory_code',
110 'popData2019': 'population_2019',
111 'continentExp': 'continent'}, inplace=True)
114 ```python Collapsed="false"
118 ```python Collapsed="false"
122 ```python Collapsed="false"
123 raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]
126 ```python Collapsed="false"
127 raw_data[['country_name', 'geo_id', 'country_territory_code',
128 'population_2019', 'continent']]
131 ```python Collapsed="false"
132 raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(
140 "cases_weekly": Integer,
141 "deaths_weekly": Integer,
143 "notification_rate_per_100000_population_14-days": Float
148 ```python Collapsed="false"
149 raw_data[['country_name', 'geo_id', 'country_territory_code',
150 'population_2019', 'continent']].drop_duplicates().to_sql(
157 "country_name": Text,
159 "country_territory_code": String,
160 "population_2019": Integer,
166 ```python Collapsed="false"
167 # %sql select geo_id from weekly_cases limit 10
170 ```python Collapsed="false"
171 # %%sql alter table weekly_cases add primary key (geo_id, report_date);
172 # alter table countries add primary key (geo_id);
173 # alter table weekly_cases add foreign key (geo_id) references countries(geo_id);
174 # alter table weekly_cases add culm_cases integer;
175 # alter table weekly_cases add culm_deaths integer;
179 with engine.connect() as connection:
180 connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')
181 connection.execute('alter table countries add primary key (geo_id);')
182 connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')
183 connection.execute('alter table weekly_cases add culm_cases integer;')
184 connection.execute('alter table weekly_cases add culm_deaths integer;')
187 ```python Collapsed="false"
188 # %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10
191 ```python Collapsed="false"
192 # %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10
195 ```python Collapsed="false"
196 %sql select * from weekly_cases limit 10
199 ```python Collapsed="false"
202 # (select report_date, geo_id,
203 # sum(cases_weekly) over (partition by geo_id
204 # order by report_date) as culm_data
207 # update weekly_cases
208 # set culm_cases = culm_data
210 # where weekly_cases.report_date = culm.report_date and
211 # weekly_cases.geo_id = culm.geo_id
214 ```python Collapsed="false"
215 query_string = '''with culm as
216 (select report_date, geo_id,
217 sum(cases_weekly) over (partition by geo_id
218 order by report_date) as culm_data
222 set culm_cases = culm_data
224 where weekly_cases.report_date = culm.report_date and
225 weekly_cases.geo_id = culm.geo_id'''
226 with engine.connect() as connection:
227 connection.execute(query_string)
230 ```python Collapsed="false"
233 # (select report_date, geo_id,
234 # sum(deaths_weekly) over (partition by geo_id
235 # order by report_date) as culm_data
238 # update weekly_cases
239 # set culm_deaths = culm_data
241 # where weekly_cases.report_date = culm.report_date and
242 # weekly_cases.geo_id = culm.geo_id
245 ```python Collapsed="false"
246 query_string = '''with culm as
247 (select report_date, geo_id,
248 sum(deaths_weekly) over (partition by geo_id
249 order by report_date) as culm_data
253 set culm_deaths = culm_data
255 where weekly_cases.report_date = culm.report_date and
256 weekly_cases.geo_id = culm.geo_id'''
257 with engine.connect() as connection:
258 connection.execute(query_string)
261 ```python Collapsed="false"
263 "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
264 "metric=covidOccupiedMVBeds&"
265 "metric=newAdmissions&"
266 "metric=newCasesBySpecimenDate&"
267 "metric=hospitalCases&"
268 "metric=newDeaths28DaysByPublishDate&"
272 !curl "$uk_query_string" > uk_data.csv
275 ```python Collapsed="false"
276 test_query_string = (
277 "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
278 "metric=newPCRTestsByPublishDate&"
279 "metric=newTestsByPublishDate&"
280 "metric=newPillarOneTwoTestsByPublishDate&"
283 !curl "$test_query_string" > test_data.csv
286 ```python Collapsed="false"
287 # hospital_query_string = (
288 # "https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"
289 # "metric=newAdmissions&"
292 # !curl "$hospital_query_string" > hospital_admissions.csv
295 ```python Collapsed="false"
296 # hospital_query_string = (
297 # "https://api.coronavirus.data.gov.uk/v1/data?"
298 # "filters=areaName=United%2520Kingdom;areaType=overview&"
299 # "structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,"
300 # "%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv"
303 # !curl "$hospital_query_string" | gunzip > hospital_admissions.csv
306 ```python Collapsed="false"
307 uk_data = pd.read_csv('uk_data.csv',
308 parse_dates=[0], dayfirst=True)
312 ```python Collapsed="false"
313 test_data = pd.read_csv('test_data.csv',
314 parse_dates=[0], dayfirst=True)
318 ```python Collapsed="false"
322 ```python Collapsed="false"
323 uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',
324 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')
327 ```python Collapsed="false"
331 ```python Collapsed="false"
334 'covidOccupiedMVBeds': 'ventilator_beds',
335 'newCasesBySpecimenDate': 'new_cases',
336 'hospitalCases': 'hospital_cases',
337 'newDeaths28DaysByPublishDate': 'new_deaths',
338 'newAdmissions': 'new_admissions',
339 'newPCRTestsByPublishDate': 'new_pcr_tests',
340 'newTestsByPublishDate': 'new_tests',
341 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'
345 ```python Collapsed="false"
349 ```python Collapsed="false"
353 ```python Collapsed="false"
355 'hospital_cases', 'ventilator_beds',
356 'new_cases', 'new_deaths',
357 'hospital_cases', 'new_admissions',
358 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'
367 "hospital_cases": Integer,
368 "ventilator_beds": Integer,
369 "new_cases": Integer,
370 "hospital_cases": Integer,
371 "new_deaths": Integer,
372 "new_admissions": Integer,
373 'new_pcr_tests': Integer,
374 'new_tests': Integer,
375 'new_pillar_1_2_tests': Integer
380 ```python Collapsed="false"
381 # %sql select * from uk_data order by date desc limit 10
384 ```python Collapsed="false"
385 query_string = '''drop table if exists uk_data_7;
386 create table uk_data_7
387 (date date primary key,
389 ventilator_beds real,
395 new_pillar_1_2_tests real
398 with engine.connect() as connection:
399 connection.execute(query_string)
403 update_string = '''with ownd as (
405 avg(hospital_cases) over wnd as w_hospital_cases,
406 avg(ventilator_beds) over wnd as w_ventilator_beds,
407 avg(new_cases) over wnd as w_new_cases,
408 avg(new_deaths) over wnd as w_new_deaths,
409 avg(new_admissions) over wnd as w_new_admissions,
410 avg(new_pcr_tests) over wnd as w_new_pcr_tests,
411 avg(new_tests) over wnd as w_new_tests,
412 avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
413 count(*) over wnd as w_size
416 order by uk_data.date
417 rows between 3 preceding and 3 following
420 insert into uk_data_7(date,
438 w_new_pillar_1_2_tests
442 with engine.connect() as connection:
443 connection.execute(update_string)
446 ```python Collapsed="false"
447 # %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)
450 # avg(ventilator_beds) over (order by date rows between 6 preceding and current row)
455 ```python Collapsed="false"
456 # query_string = '''insert into uk_data_7(date, hospital_cases)
457 # select uk_data.date,
458 # avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases
460 # with engine.connect() as connection:
461 # connection.execute(query_string)
464 ```python Collapsed="false"
467 # (select uk_data.date as date7,
468 # avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7
472 # set ventilator_beds = nc7
474 # where uk_data_7.date = m7.date7
477 ```python Collapsed="false"
480 # (select uk_data.date as date7,
481 # avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7
485 # set ventilator_beds = nc7
487 # where uk_data_7.date = m7.date7
490 ```python Collapsed="false"
493 # (select uk_data.date as date7,
494 # avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7
498 # set new_cases = nc7
500 # where uk_data_7.date = m7.date7
503 ```python Collapsed="false"
506 # (select uk_data.date as date7,
507 # avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
511 # set hospital_cases = d7
513 # where uk_data_7.date = m7.date7
516 ```python Collapsed="false"
519 # (select uk_data.date as date7,
520 # avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
524 # set new_deaths = d7
526 # where uk_data_7.date = m7.date7
529 ```python Collapsed="false"
532 # (select uk_data.date as date7,
533 # avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
537 # set new_admissions = d7
539 # where uk_data_7.date = m7.date7
542 ```python Collapsed="false"
545 # (select uk_data.date as date7,
546 # avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
550 # set new_pcr_tests = d7
552 # where uk_data_7.date = m7.date7
555 ```python Collapsed="false"
558 # (select uk_data.date as date7,
559 # avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
565 # where uk_data_7.date = m7.date7
568 ```python Collapsed="false"
571 # (select uk_data.date as date7,
572 # avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7
576 # set new_pillar_1_2_tests = d7
578 # where uk_data_7.date = m7.date7
581 ```python Collapsed="false"
585 # avg(new_pillar_1_2_tests)
586 # over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests
590 # set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests
592 # where uk_data_7.date = wnd.date
593 # and (select count(*) from wnd) = 7
596 ```python Collapsed="false"
600 # avg(new_pillar_1_2_tests)
601 # over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,
602 # count(new_pillar_1_2_tests)
603 # over (order by uk_data.date rows between 3 preceding and 3 following) as a_count
606 # select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count
607 # from uk_data_7, wnd
608 # where uk_data_7.date = wnd.date
609 # order by uk_data_7.date desc limit 10
612 # count(*) over wnd as w_size
615 # order by uk_data.date
616 # rows between 3 preceding and 3 following
620 ```python Collapsed="false"
623 # count(*) over wnd as w_size
626 # order by uk_data.date
627 # rows between 3 preceding and 3 following
629 # order by date desc limit 10
636 # avg(hospital_cases) over wnd as w_hospital_cases,
637 # avg(ventilator_beds) over wnd as w_ventilator_beds,
638 # avg(new_cases) over wnd as w_new_cases,
639 # avg(new_deaths) over wnd as w_new_deaths,
640 # avg(new_admissions) over wnd as w_new_admissions,
641 # avg(new_pcr_tests) over wnd as w_new_pcr_tests,
642 # avg(new_tests) over wnd as w_new_tests,
643 # avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
644 # count(*) over wnd as w_size
647 # order by uk_data.date
648 # rows between 3 preceding and 3 following
650 # insert into uk_data_7(date,
658 # new_pillar_1_2_tests
661 # avg(hospital_cases) over wnd as w_hospital_cases,
662 # avg(ventilator_beds) over wnd as w_ventilator_beds,
663 # avg(new_cases) over wnd as w_new_cases,
664 # avg(new_deaths) over wnd as w_new_deaths,
665 # avg(new_admissions) over wnd as w_new_admissions,
666 # avg(new_pcr_tests) over wnd as w_new_pcr_tests,
667 # avg(new_tests) over wnd as w_new_tests,
668 # avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
669 # count(*) over wnd as w_size
672 # order by uk_data.date
673 # rows between 3 preceding and 3 following
676 # set date = ownd.date,
677 # hospital_cases = w_hospital_cases,
678 # ventilator_beds = w_ventilator_beds,
679 # new_cases = w_new_cases,
680 # new_deaths = w_new_deaths,
681 # new_admissions = w_new_admissions,
682 # new_pcr_tests = w_new_pcr_tests,
683 # new_tests = w_new_tests,
684 # new_pillar_1_2_tests = w_new_pillar_1_2_tests
693 # avg(hospital_cases) over wnd as w_hospital_cases,
694 # avg(ventilator_beds) over wnd as w_ventilator_beds,
695 # avg(new_cases) over wnd as w_new_cases,
696 # avg(new_deaths) over wnd as w_new_deaths,
697 # avg(new_admissions) over wnd as w_new_admissions,
698 # avg(new_pcr_tests) over wnd as w_new_pcr_tests,
699 # avg(new_tests) over wnd as w_new_tests,
700 # avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,
701 # count(*) over wnd as w_size
704 # order by uk_data.date
705 # rows between 3 preceding and 3 following
708 # insert into uk_data_7(date,
716 # new_pillar_1_2_tests
726 # w_new_pillar_1_2_tests
732 ```python Collapsed="false"
733 %sql select * from uk_data_7 order by date desc limit 10
736 ```python Collapsed="false"
737 %sql select * from uk_data order by date desc limit 10
740 ```python Collapsed="false"