# from scipy.stats import gmean
import datetime
import os
+# import matplotlib as mpl
+# import matplotlib.pyplot as plt
+# import matplotlib.animation as ani
+
+# # %matplotlib inline
# %%
# %%
-DEATH_COUNT_THRESHOLD = 10
-COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
-COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
-COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
-# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
+# DEATH_COUNT_THRESHOLD = 10
+# COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()
+# COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
+# COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
+# # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
-COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
- 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
- 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
- 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
-COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
-COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
+# COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
+# 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
+# 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
+# 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
+# COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
+# COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
# %%
-os.system('curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv')
+## Old data source
+# os.system('curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv')
+
+## In outer shell script, as it doesn't seem to work here
+os.system('curl https://opendata.ecdc.europa.eu/covid19/nationalcasedeath/csv/ > covid.csv')
# %%
# First col is a date, treat geoId of NA as 'Namibia', not "NA" value
raw_data = pd.read_csv('covid.csv',
- parse_dates=[0], dayfirst=True,
+ parse_dates=[7], dayfirst=True,
keep_default_na=False, na_values = [''],
# dtype = {'day': np.int64,
# 'month': np.int64,
)
raw_data.fillna(0, inplace=True)
+raw_data = raw_data[raw_data.country_code != 0]
+# raw_data
+
+# %%
+raw_data['date'] = raw_data.year_week.apply(lambda yw: datetime.datetime.fromisocalendar(int(yw[:4]), int(yw[5:]), 7 ))
+# raw_data
+
+# %%
+# raw_data.dtypes
+
+# %%
+# raw_data.head()
+
+# %%
+# raw_data[raw_data.country_code == 'GBR'].sort_values(by='date').tail(20)
+
+# %%
+case_data = raw_data[raw_data.indicator == 'cases'][['country_code', 'date', 'weekly_count', 'cumulative_count']]
+case_data.rename(columns={'weekly_count': 'cases_weekly', 'cumulative_count': 'culm_cases'}, inplace=True)
+# case_data.head()
+
+# %%
+deaths_data = raw_data[raw_data.indicator == 'deaths'][['country_code', 'date', 'weekly_count', 'cumulative_count']]
+deaths_data.rename(columns={'weekly_count': 'deaths_weekly', 'cumulative_count': 'culm_deaths'}, inplace=True)
+# deaths_data.head()
+
+# %%
+case_death_data = case_data.merge(deaths_data)
+# case_death_data
+
+# %%
+# raw_data.columns
+
+# %%
+country_data = raw_data[['country', 'country_code', 'continent', 'population']].drop_duplicates()
+# country_data
# %%
-COUNTRIES_ALL
# %%
# raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
# 'countryterritoryCode': 'country_territory_code',
# 'popData2019': 'population_2019',
# 'continentExp': 'continent'}, inplace=True)
-raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
- 'countriesAndTerritories': 'country_name',
- 'countryterritoryCode': 'country_territory_code',
- 'cases': 'cases_weekly',
- 'deaths': 'deaths_weekly',
- 'popData2019': 'population_2019',
- 'continentExp': 'continent',
- 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days'
- },
- inplace=True)
+
+# raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',
+# 'countriesAndTerritories': 'country_name',
+# 'countryterritoryCode': 'country_territory_code',
+# 'cases': 'cases_weekly',
+# 'deaths': 'deaths_weekly',
+# 'popData2019': 'population_2019',
+# 'continentExp': 'continent',
+# # 'Cumulative_number_for_14_days_of_COVID-19_cases_per_100000': 'notification_rate_per_100000_population_14-days'
+# },
+# inplace=True)
# %%
-raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(
+case_death_data.to_sql(
'weekly_cases',
engine,
if_exists='replace',
dtype={
"report_date": Date,
"cases_weekly": Integer,
+ "cases_culm": Integer,
"deaths_weekly": Integer,
- "geo_id": String,
- "notification_rate_per_100000_population_14-days": Float
+ "deaths_culm": Integer,
+ "country_code": String
}
)
# %%
-raw_data[['country_name', 'geo_id', 'country_territory_code',
- 'population_2019', 'continent']].drop_duplicates().to_sql(
+country_data.to_sql(
'countries',
engine,
if_exists='replace',
chunksize=500,
dtype={
"country_name": Text,
- "geo_id": String,
- "country_territory_code": String,
- "population_2019": Integer,
+ "country_code": String,
+ "population": Integer,
"continent": Text
}
)
-# %%
-with engine.connect() as connection:
- connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')
- connection.execute('alter table countries add primary key (geo_id);')
- connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')
- connection.execute('alter table weekly_cases add culm_cases integer;')
- connection.execute('alter table weekly_cases add culm_deaths integer;')
-
-
-# %%
-query_string = '''with culm as
- (select report_date, geo_id,
- sum(cases_weekly) over (partition by geo_id
- order by report_date) as culm_data
- from weekly_cases
- )
-update weekly_cases
- set culm_cases = culm_data
- from culm
- where weekly_cases.report_date = culm.report_date and
- weekly_cases.geo_id = culm.geo_id'''
-with engine.connect() as connection:
- connection.execute(query_string)
-
-
-# %%
-query_string = '''with culm as
- (select report_date, geo_id,
- sum(deaths_weekly) over (partition by geo_id
- order by report_date) as culm_data
- from weekly_cases
- )
-update weekly_cases
- set culm_deaths = culm_data
- from culm
- where weekly_cases.report_date = culm.report_date and
- weekly_cases.geo_id = culm.geo_id'''
-with engine.connect() as connection:
- connection.execute(query_string)
-
-
# %%
uk_query_string = (
"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&"