4 "cell_type": "markdown",
9 "Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)"
14 "execution_count": 314,
20 "from sqlalchemy.types import Integer, Text, String, DateTime, Date, Float\n",
21 "from sqlalchemy import create_engine"
26 "execution_count": 315,
33 "import collections\n",
35 "import pandas as pd\n",
36 "import numpy as np\n",
37 "from scipy.stats import gmean\n",
40 "import matplotlib as mpl\n",
41 "import matplotlib.pyplot as plt\n",
47 "execution_count": 316,
54 "output_type": "stream",
56 "The sql extension is already loaded. To reload it, use:\n",
67 "execution_count": 317,
73 "connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'"
78 "execution_count": 318,
86 "'Connected: covid@covid'"
89 "execution_count": 318,
91 "output_type": "execute_result"
95 "%sql $connection_string"
100 "execution_count": 319,
106 "eng = create_engine(connection_string)\n",
107 "engine = eng.execution_options(isolation_level=\"AUTOCOMMIT\")"
112 "execution_count": 320,
118 "DEATH_COUNT_THRESHOLD = 10\n",
119 "COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()\n",
120 "COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()\n",
121 "COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()\n",
122 "# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()\n",
124 "COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA\n",
125 " 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',\n",
126 " 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',\n",
127 " 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']\n",
128 "COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))\n",
129 "COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))"
134 "execution_count": 321,
141 "output_type": "stream",
143 " % Total % Received % Xferd Average Speed Time Time Time Current\n",
144 " Dload Upload Total Spent Left Speed\n",
145 "100 623k 100 623k 0 0 927k 0 --:--:-- --:--:-- --:--:-- 925k\n"
150 "!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv"
155 "execution_count": 322,
161 "# First col is a date, treat geoId of NA as 'Namibia', not \"NA\" value\n",
162 "raw_data = pd.read_csv('covid.csv', \n",
163 " parse_dates=[0], dayfirst=True,\n",
164 " keep_default_na=False, na_values = [''],\n",
165 "# dtype = {'day': np.int64, \n",
166 "# 'month': np.int64, \n",
167 "# 'year': np.int64, \n",
168 "# 'cases': np.int64, \n",
169 "# 'deaths': np.int64, \n",
170 "# 'countriesAndTerritories': str, \n",
171 "# 'geoId': str, \n",
172 "# 'countryterritoryCode': str, \n",
173 "# 'popData2019': np.int64, \n",
174 "# 'continentExp': str, \n",
181 "execution_count": 323,
192 "execution_count": 323,
194 "output_type": "execute_result"
203 "execution_count": 324,
209 "raw_data.fillna(0, inplace=True)"
214 "execution_count": 325,
224 " .dataframe tbody tr th:only-of-type {\n",
225 " vertical-align: middle;\n",
228 " .dataframe tbody tr th {\n",
229 " vertical-align: top;\n",
232 " .dataframe thead th {\n",
233 " text-align: right;\n",
236 "<table border=\"1\" class=\"dataframe\">\n",
238 " <tr style=\"text-align: right;\">\n",
240 " <th>dateRep</th>\n",
241 " <th>year_week</th>\n",
242 " <th>cases_weekly</th>\n",
243 " <th>deaths_weekly</th>\n",
244 " <th>countriesAndTerritories</th>\n",
246 " <th>countryterritoryCode</th>\n",
247 " <th>popData2019</th>\n",
248 " <th>continentExp</th>\n",
249 " <th>notification_rate_per_100000_population_14-days</th>\n",
255 " <td>2021-01-18</td>\n",
256 " <td>2021-02</td>\n",
259 " <td>Afghanistan</td>\n",
262 " <td>38041757.0</td>\n",
268 " <td>2021-01-11</td>\n",
269 " <td>2021-01</td>\n",
272 " <td>Afghanistan</td>\n",
275 " <td>38041757.0</td>\n",
281 " <td>2021-01-04</td>\n",
282 " <td>2020-53</td>\n",
285 " <td>Afghanistan</td>\n",
288 " <td>38041757.0</td>\n",
294 " <td>2020-12-28</td>\n",
295 " <td>2020-52</td>\n",
298 " <td>Afghanistan</td>\n",
301 " <td>38041757.0</td>\n",
307 " <td>2020-12-21</td>\n",
308 " <td>2020-51</td>\n",
311 " <td>Afghanistan</td>\n",
314 " <td>38041757.0</td>\n",
323 " dateRep year_week cases_weekly deaths_weekly countriesAndTerritories \\\n",
324 "0 2021-01-18 2021-02 557 45 Afghanistan \n",
325 "1 2021-01-11 2021-01 675 71 Afghanistan \n",
326 "2 2021-01-04 2020-53 902 60 Afghanistan \n",
327 "3 2020-12-28 2020-52 1994 88 Afghanistan \n",
328 "4 2020-12-21 2020-51 740 111 Afghanistan \n",
330 " geoId countryterritoryCode popData2019 continentExp \\\n",
331 "0 AF AFG 38041757.0 Asia \n",
332 "1 AF AFG 38041757.0 Asia \n",
333 "2 AF AFG 38041757.0 Asia \n",
334 "3 AF AFG 38041757.0 Asia \n",
335 "4 AF AFG 38041757.0 Asia \n",
337 " notification_rate_per_100000_population_14-days \n",
345 "execution_count": 325,
347 "output_type": "execute_result"
356 "execution_count": 326,
362 "raw_data.rename(columns={'dateRep':'report_date', 'geoId': 'geo_id',\n",
363 " 'countriesAndTerritories': 'country_name',\n",
364 " 'countryterritoryCode': 'country_territory_code',\n",
365 " 'popData2019': 'population_2019',\n",
366 " 'continentExp': 'continent'}, inplace=True)"
371 "execution_count": 327,
379 "Index(['report_date', 'year_week', 'cases_weekly', 'deaths_weekly',\n",
380 " 'country_name', 'geo_id', 'country_territory_code', 'population_2019',\n",
381 " 'continent', 'notification_rate_per_100000_population_14-days'],\n",
385 "execution_count": 327,
387 "output_type": "execute_result"
396 "execution_count": 328,
404 "report_date datetime64[ns]\n",
405 "year_week object\n",
406 "cases_weekly int64\n",
407 "deaths_weekly int64\n",
408 "country_name object\n",
410 "country_territory_code object\n",
411 "population_2019 float64\n",
412 "continent object\n",
413 "notification_rate_per_100000_population_14-days float64\n",
417 "execution_count": 328,
419 "output_type": "execute_result"
428 "execution_count": 329,
438 " .dataframe tbody tr th:only-of-type {\n",
439 " vertical-align: middle;\n",
442 " .dataframe tbody tr th {\n",
443 " vertical-align: top;\n",
446 " .dataframe thead th {\n",
447 " text-align: right;\n",
450 "<table border=\"1\" class=\"dataframe\">\n",
452 " <tr style=\"text-align: right;\">\n",
454 " <th>report_date</th>\n",
455 " <th>cases_weekly</th>\n",
456 " <th>deaths_weekly</th>\n",
457 " <th>geo_id</th>\n",
458 " <th>notification_rate_per_100000_population_14-days</th>\n",
464 " <td>2021-01-18</td>\n",
472 " <td>2021-01-11</td>\n",
480 " <td>2021-01-04</td>\n",
488 " <td>2020-12-28</td>\n",
496 " <td>2020-12-21</td>\n",
512 " <td>2020-04-20</td>\n",
520 " <td>2020-04-13</td>\n",
528 " <td>2020-04-06</td>\n",
536 " <td>2020-03-30</td>\n",
544 " <td>2020-03-23</td>\n",
552 "<p>10005 rows × 5 columns</p>\n",
556 " report_date cases_weekly deaths_weekly geo_id \\\n",
557 "0 2021-01-18 557 45 AF \n",
558 "1 2021-01-11 675 71 AF \n",
559 "2 2021-01-04 902 60 AF \n",
560 "3 2020-12-28 1994 88 AF \n",
561 "4 2020-12-21 740 111 AF \n",
562 "... ... ... ... ... \n",
563 "10000 2020-04-20 11 0 ZW \n",
564 "10001 2020-04-13 5 2 ZW \n",
565 "10002 2020-04-06 2 0 ZW \n",
566 "10003 2020-03-30 5 1 ZW \n",
567 "10004 2020-03-23 2 0 ZW \n",
569 " notification_rate_per_100000_population_14-days \n",
582 "[10005 rows x 5 columns]"
585 "execution_count": 329,
587 "output_type": "execute_result"
591 "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']]"
596 "execution_count": 330,
606 " .dataframe tbody tr th:only-of-type {\n",
607 " vertical-align: middle;\n",
610 " .dataframe tbody tr th {\n",
611 " vertical-align: top;\n",
614 " .dataframe thead th {\n",
615 " text-align: right;\n",
618 "<table border=\"1\" class=\"dataframe\">\n",
620 " <tr style=\"text-align: right;\">\n",
622 " <th>country_name</th>\n",
623 " <th>geo_id</th>\n",
624 " <th>country_territory_code</th>\n",
625 " <th>population_2019</th>\n",
626 " <th>continent</th>\n",
632 " <td>Afghanistan</td>\n",
635 " <td>38041757.0</td>\n",
640 " <td>Afghanistan</td>\n",
643 " <td>38041757.0</td>\n",
648 " <td>Afghanistan</td>\n",
651 " <td>38041757.0</td>\n",
656 " <td>Afghanistan</td>\n",
659 " <td>38041757.0</td>\n",
664 " <td>Afghanistan</td>\n",
667 " <td>38041757.0</td>\n",
680 " <td>Zimbabwe</td>\n",
683 " <td>14645473.0</td>\n",
684 " <td>Africa</td>\n",
688 " <td>Zimbabwe</td>\n",
691 " <td>14645473.0</td>\n",
692 " <td>Africa</td>\n",
696 " <td>Zimbabwe</td>\n",
699 " <td>14645473.0</td>\n",
700 " <td>Africa</td>\n",
704 " <td>Zimbabwe</td>\n",
707 " <td>14645473.0</td>\n",
708 " <td>Africa</td>\n",
712 " <td>Zimbabwe</td>\n",
715 " <td>14645473.0</td>\n",
716 " <td>Africa</td>\n",
720 "<p>10005 rows × 5 columns</p>\n",
724 " country_name geo_id country_territory_code population_2019 continent\n",
725 "0 Afghanistan AF AFG 38041757.0 Asia\n",
726 "1 Afghanistan AF AFG 38041757.0 Asia\n",
727 "2 Afghanistan AF AFG 38041757.0 Asia\n",
728 "3 Afghanistan AF AFG 38041757.0 Asia\n",
729 "4 Afghanistan AF AFG 38041757.0 Asia\n",
730 "... ... ... ... ... ...\n",
731 "10000 Zimbabwe ZW ZWE 14645473.0 Africa\n",
732 "10001 Zimbabwe ZW ZWE 14645473.0 Africa\n",
733 "10002 Zimbabwe ZW ZWE 14645473.0 Africa\n",
734 "10003 Zimbabwe ZW ZWE 14645473.0 Africa\n",
735 "10004 Zimbabwe ZW ZWE 14645473.0 Africa\n",
737 "[10005 rows x 5 columns]"
740 "execution_count": 330,
742 "output_type": "execute_result"
746 "raw_data[['country_name', 'geo_id', 'country_territory_code',\n",
747 " 'population_2019', 'continent']]"
752 "execution_count": 331,
758 "raw_data[['report_date', 'cases_weekly', 'deaths_weekly', 'geo_id', 'notification_rate_per_100000_population_14-days']].to_sql(\n",
759 " 'weekly_cases',\n",
761 " if_exists='replace',\n",
765 " \"report_date\": Date,\n",
766 " \"cases_weekly\": Integer,\n",
767 " \"deaths_weekly\": Integer,\n",
768 " \"geo_id\": String,\n",
769 " \"notification_rate_per_100000_population_14-days\": Float\n",
776 "execution_count": 332,
782 "raw_data[['country_name', 'geo_id', 'country_territory_code',\n",
783 " 'population_2019', 'continent']].drop_duplicates().to_sql(\n",
786 " if_exists='replace',\n",
790 " \"country_name\": Text,\n",
791 " \"geo_id\": String,\n",
792 " \"country_territory_code\": String,\n",
793 " \"population_2019\": Integer,\n",
794 " \"continent\": Text\n",
801 "execution_count": 333,
803 "Collapsed": "false",
808 "# %sql select geo_id from weekly_cases limit 10"
813 "execution_count": 334,
819 "# %%sql alter table weekly_cases add primary key (geo_id, report_date);\n",
820 "# alter table countries add primary key (geo_id);\n",
821 "# alter table weekly_cases add foreign key (geo_id) references countries(geo_id);\n",
822 "# alter table weekly_cases add culm_cases integer;\n",
823 "# alter table weekly_cases add culm_deaths integer;"
828 "execution_count": 335,
832 "with engine.connect() as connection:\n",
833 " connection.execute('alter table weekly_cases add primary key (geo_id, report_date)')\n",
834 " connection.execute('alter table countries add primary key (geo_id);')\n",
835 " connection.execute('alter table weekly_cases add foreign key (geo_id) references countries(geo_id);')\n",
836 " connection.execute('alter table weekly_cases add culm_cases integer;')\n",
837 " connection.execute('alter table weekly_cases add culm_deaths integer;')"
842 "execution_count": 336,
848 "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries using (geo_id) order by report_date desc limit 10"
853 "execution_count": 337,
859 "# %sql select report_date, cases_weekly, country_name from weekly_cases join countries on weekly_cases.geo_id = countries.geo_id order by report_date desc limit 10"
864 "execution_count": 338,
871 "output_type": "stream",
873 " * postgresql://covid:***@localhost/covid\n",
874 "10 rows affected.\n"
883 " <th>report_date</th>\n",
884 " <th>cases_weekly</th>\n",
885 " <th>deaths_weekly</th>\n",
886 " <th>geo_id</th>\n",
887 " <th>notification_rate_per_100000_population_14-days</th>\n",
888 " <th>culm_cases</th>\n",
889 " <th>culm_deaths</th>\n",
894 " <td>2021-01-18</td>\n",
903 " <td>2021-01-11</td>\n",
912 " <td>2021-01-04</td>\n",
921 " <td>2020-12-28</td>\n",
930 " <td>2020-12-21</td>\n",
939 " <td>2020-12-14</td>\n",
948 " <td>2020-12-07</td>\n",
957 " <td>2020-11-30</td>\n",
966 " <td>2020-11-23</td>\n",
975 " <td>2020-11-16</td>\n",
987 "[(datetime.date(2021, 1, 18), 557, 45, 'AF', 3.24, None, None),\n",
988 " (datetime.date(2021, 1, 11), 675, 71, 'AF', 4.15, None, None),\n",
989 " (datetime.date(2021, 1, 4), 902, 60, 'AF', 7.61, None, None),\n",
990 " (datetime.date(2020, 12, 28), 1994, 88, 'AF', 7.19, None, None),\n",
991 " (datetime.date(2020, 12, 21), 740, 111, 'AF', 6.56, None, None),\n",
992 " (datetime.date(2020, 12, 14), 1757, 71, 'AF', 9.01, None, None),\n",
993 " (datetime.date(2020, 12, 7), 1672, 137, 'AF', 7.22, None, None),\n",
994 " (datetime.date(2020, 11, 30), 1073, 68, 'AF', 6.42, None, None),\n",
995 " (datetime.date(2020, 11, 23), 1368, 69, 'AF', 6.66, None, None),\n",
996 " (datetime.date(2020, 11, 16), 1164, 61, 'AF', 4.65, None, None)]"
999 "execution_count": 338,
1001 "output_type": "execute_result"
1005 "%sql select * from weekly_cases limit 10"
1009 "cell_type": "code",
1010 "execution_count": 339,
1012 "Collapsed": "false"
1017 "# with culm as \n",
1018 "# (select report_date, geo_id,\n",
1019 "# sum(cases_weekly) over (partition by geo_id \n",
1020 "# order by report_date) as culm_data\n",
1021 "# from weekly_cases\n",
1023 "# update weekly_cases\n",
1024 "# set culm_cases = culm_data\n",
1026 "# where weekly_cases.report_date = culm.report_date and\n",
1027 "# weekly_cases.geo_id = culm.geo_id"
1031 "cell_type": "code",
1032 "execution_count": 340,
1034 "Collapsed": "false"
1038 "query_string = '''with culm as \n",
1039 " (select report_date, geo_id,\n",
1040 " sum(cases_weekly) over (partition by geo_id \n",
1041 " order by report_date) as culm_data\n",
1042 " from weekly_cases\n",
1044 "update weekly_cases\n",
1045 " set culm_cases = culm_data\n",
1047 " where weekly_cases.report_date = culm.report_date and\n",
1048 " weekly_cases.geo_id = culm.geo_id'''\n",
1049 "with engine.connect() as connection:\n",
1050 " connection.execute(query_string)"
1054 "cell_type": "code",
1055 "execution_count": 341,
1057 "Collapsed": "false"
1062 "# with culm as \n",
1063 "# (select report_date, geo_id,\n",
1064 "# sum(deaths_weekly) over (partition by geo_id \n",
1065 "# order by report_date) as culm_data\n",
1066 "# from weekly_cases\n",
1068 "# update weekly_cases\n",
1069 "# set culm_deaths = culm_data\n",
1071 "# where weekly_cases.report_date = culm.report_date and\n",
1072 "# weekly_cases.geo_id = culm.geo_id"
1076 "cell_type": "code",
1077 "execution_count": 342,
1079 "Collapsed": "false"
1083 "query_string = '''with culm as \n",
1084 " (select report_date, geo_id,\n",
1085 " sum(deaths_weekly) over (partition by geo_id \n",
1086 " order by report_date) as culm_data\n",
1087 " from weekly_cases\n",
1089 "update weekly_cases\n",
1090 " set culm_deaths = culm_data\n",
1092 " where weekly_cases.report_date = culm.report_date and\n",
1093 " weekly_cases.geo_id = culm.geo_id'''\n",
1094 "with engine.connect() as connection:\n",
1095 " connection.execute(query_string)"
1099 "cell_type": "code",
1100 "execution_count": 343,
1102 "Collapsed": "false"
1107 "output_type": "stream",
1109 " % Total % Received % Xferd Average Speed Time Time Time Current\n",
1110 " Dload Upload Total Spent Left Speed\n",
1111 "100 26053 100 26053 0 0 54277 0 --:--:-- --:--:-- --:--:-- 54277\n"
1116 "uk_query_string = (\n",
1117 "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
1118 "\"metric=covidOccupiedMVBeds&\"\n",
1119 "\"metric=newAdmissions&\"\n",
1120 "\"metric=newCasesBySpecimenDate&\"\n",
1121 "\"metric=hospitalCases&\"\n",
1122 "\"metric=newDeaths28DaysByPublishDate&\"\n",
1126 "!curl \"$uk_query_string\" > uk_data.csv"
1130 "cell_type": "code",
1131 "execution_count": 344,
1133 "Collapsed": "false"
1138 "output_type": "stream",
1140 " % Total % Received % Xferd Average Speed Time Time Time Current\n",
1141 " Dload Upload Total Spent Left Speed\n",
1142 "100 20223 100 20223 0 0 78688 0 --:--:-- --:--:-- --:--:-- 78383\n"
1147 "test_query_string = (\n",
1148 "\"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
1149 "\"metric=newPCRTestsByPublishDate&\"\n",
1150 "\"metric=newTestsByPublishDate&\"\n",
1151 "\"metric=newPillarOneTwoTestsByPublishDate&\"\n",
1154 "!curl \"$test_query_string\" > test_data.csv"
1158 "cell_type": "code",
1159 "execution_count": 345,
1161 "Collapsed": "false"
1165 "# hospital_query_string = (\n",
1166 "# \"https://api.coronavirus.data.gov.uk/v2/data?areaType=overview&\"\n",
1167 "# \"metric=newAdmissions&\"\n",
1168 "# \"format=csv\"\n",
1170 "# !curl \"$hospital_query_string\" > hospital_admissions.csv"
1174 "cell_type": "code",
1175 "execution_count": 346,
1177 "Collapsed": "false"
1181 "# hospital_query_string = (\n",
1182 "# \"https://api.coronavirus.data.gov.uk/v1/data?\"\n",
1183 "# \"filters=areaName=United%2520Kingdom;areaType=overview&\"\n",
1184 "# \"structure=%7B%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaType%22:%22areaType%22,\"\n",
1185 "# \"%22newAdmissions%22:%22newAdmissions%22,%22cumAdmissions%22:%22cumAdmissions%22%7D&format=csv\"\n",
1188 "# !curl \"$hospital_query_string\" | gunzip > hospital_admissions.csv"
1192 "cell_type": "code",
1193 "execution_count": 347,
1195 "Collapsed": "false"
1203 " .dataframe tbody tr th:only-of-type {\n",
1204 " vertical-align: middle;\n",
1207 " .dataframe tbody tr th {\n",
1208 " vertical-align: top;\n",
1211 " .dataframe thead th {\n",
1212 " text-align: right;\n",
1215 "<table border=\"1\" class=\"dataframe\">\n",
1217 " <tr style=\"text-align: right;\">\n",
1220 " <th>areaType</th>\n",
1221 " <th>areaCode</th>\n",
1222 " <th>areaName</th>\n",
1223 " <th>covidOccupiedMVBeds</th>\n",
1224 " <th>newAdmissions</th>\n",
1225 " <th>newCasesBySpecimenDate</th>\n",
1226 " <th>hospitalCases</th>\n",
1227 " <th>newDeaths28DaysByPublishDate</th>\n",
1233 " <td>2021-01-26</td>\n",
1234 " <td>overview</td>\n",
1235 " <td>K02000001</td>\n",
1236 " <td>United Kingdom</td>\n",
1245 " <td>2021-01-25</td>\n",
1246 " <td>overview</td>\n",
1247 " <td>K02000001</td>\n",
1248 " <td>United Kingdom</td>\n",
1249 " <td>4032.0</td>\n",
1251 " <td>4482.0</td>\n",
1257 " <td>2021-01-24</td>\n",
1258 " <td>overview</td>\n",
1259 " <td>K02000001</td>\n",
1260 " <td>United Kingdom</td>\n",
1261 " <td>4077.0</td>\n",
1263 " <td>14266.0</td>\n",
1264 " <td>37561.0</td>\n",
1269 " <td>2021-01-23</td>\n",
1270 " <td>overview</td>\n",
1271 " <td>K02000001</td>\n",
1272 " <td>United Kingdom</td>\n",
1273 " <td>4066.0</td>\n",
1275 " <td>20495.0</td>\n",
1276 " <td>37266.0</td>\n",
1281 " <td>2021-01-22</td>\n",
1282 " <td>overview</td>\n",
1283 " <td>K02000001</td>\n",
1284 " <td>United Kingdom</td>\n",
1285 " <td>4076.0</td>\n",
1286 " <td>3341.0</td>\n",
1287 " <td>29094.0</td>\n",
1288 " <td>38144.0</td>\n",
1305 " <td>2020-01-06</td>\n",
1306 " <td>overview</td>\n",
1307 " <td>K02000001</td>\n",
1308 " <td>United Kingdom</td>\n",
1317 " <td>2020-01-05</td>\n",
1318 " <td>overview</td>\n",
1319 " <td>K02000001</td>\n",
1320 " <td>United Kingdom</td>\n",
1329 " <td>2020-01-04</td>\n",
1330 " <td>overview</td>\n",
1331 " <td>K02000001</td>\n",
1332 " <td>United Kingdom</td>\n",
1341 " <td>2020-01-03</td>\n",
1342 " <td>overview</td>\n",
1343 " <td>K02000001</td>\n",
1344 " <td>United Kingdom</td>\n",
1353 " <td>2020-06-14</td>\n",
1354 " <td>overview</td>\n",
1355 " <td>K02000001</td>\n",
1356 " <td>United Kingdom</td>\n",
1357 " <td>393.0</td>\n",
1358 " <td>364.0</td>\n",
1359 " <td>821.0</td>\n",
1360 " <td>4907.0</td>\n",
1365 "<p>390 rows × 9 columns</p>\n",
1369 " date areaType areaCode areaName covidOccupiedMVBeds \\\n",
1370 "0 2021-01-26 overview K02000001 United Kingdom NaN \n",
1371 "1 2021-01-25 overview K02000001 United Kingdom 4032.0 \n",
1372 "2 2021-01-24 overview K02000001 United Kingdom 4077.0 \n",
1373 "3 2021-01-23 overview K02000001 United Kingdom 4066.0 \n",
1374 "4 2021-01-22 overview K02000001 United Kingdom 4076.0 \n",
1375 ".. ... ... ... ... ... \n",
1376 "385 2020-01-06 overview K02000001 United Kingdom NaN \n",
1377 "386 2020-01-05 overview K02000001 United Kingdom NaN \n",
1378 "387 2020-01-04 overview K02000001 United Kingdom NaN \n",
1379 "388 2020-01-03 overview K02000001 United Kingdom NaN \n",
1380 "389 2020-06-14 overview K02000001 United Kingdom 393.0 \n",
1382 " newAdmissions newCasesBySpecimenDate hospitalCases \\\n",
1384 "1 NaN 4482.0 NaN \n",
1385 "2 NaN 14266.0 37561.0 \n",
1386 "3 NaN 20495.0 37266.0 \n",
1387 "4 3341.0 29094.0 38144.0 \n",
1388 ".. ... ... ... \n",
1389 "385 NaN NaN NaN \n",
1390 "386 NaN NaN NaN \n",
1391 "387 NaN NaN NaN \n",
1392 "388 NaN NaN NaN \n",
1393 "389 364.0 821.0 4907.0 \n",
1395 " newDeaths28DaysByPublishDate \n",
1408 "[390 rows x 9 columns]"
1411 "execution_count": 347,
1413 "output_type": "execute_result"
1417 "uk_data = pd.read_csv('uk_data.csv', \n",
1418 " parse_dates=[0], dayfirst=True)\n",
1423 "cell_type": "code",
1424 "execution_count": 348,
1426 "Collapsed": "false"
1434 " .dataframe tbody tr th:only-of-type {\n",
1435 " vertical-align: middle;\n",
1438 " .dataframe tbody tr th {\n",
1439 " vertical-align: top;\n",
1442 " .dataframe thead th {\n",
1443 " text-align: right;\n",
1446 "<table border=\"1\" class=\"dataframe\">\n",
1448 " <tr style=\"text-align: right;\">\n",
1451 " <th>areaType</th>\n",
1452 " <th>areaCode</th>\n",
1453 " <th>areaName</th>\n",
1454 " <th>newPCRTestsByPublishDate</th>\n",
1455 " <th>newTestsByPublishDate</th>\n",
1456 " <th>newPillarOneTwoTestsByPublishDate</th>\n",
1462 " <td>2021-01-25</td>\n",
1463 " <td>overview</td>\n",
1464 " <td>K02000001</td>\n",
1465 " <td>United Kingdom</td>\n",
1466 " <td>196510.0</td>\n",
1467 " <td>546734</td>\n",
1468 " <td>531104</td>\n",
1472 " <td>2021-01-24</td>\n",
1473 " <td>overview</td>\n",
1474 " <td>K02000001</td>\n",
1475 " <td>United Kingdom</td>\n",
1476 " <td>262111.0</td>\n",
1477 " <td>412775</td>\n",
1478 " <td>394479</td>\n",
1482 " <td>2021-01-23</td>\n",
1483 " <td>overview</td>\n",
1484 " <td>K02000001</td>\n",
1485 " <td>United Kingdom</td>\n",
1486 " <td>389209.0</td>\n",
1487 " <td>486425</td>\n",
1488 " <td>465231</td>\n",
1492 " <td>2021-01-22</td>\n",
1493 " <td>overview</td>\n",
1494 " <td>K02000001</td>\n",
1495 " <td>United Kingdom</td>\n",
1496 " <td>401075.0</td>\n",
1497 " <td>631901</td>\n",
1498 " <td>608829</td>\n",
1502 " <td>2021-01-21</td>\n",
1503 " <td>overview</td>\n",
1504 " <td>K02000001</td>\n",
1505 " <td>United Kingdom</td>\n",
1506 " <td>439408.0</td>\n",
1507 " <td>668989</td>\n",
1508 " <td>644537</td>\n",
1522 " <td>2020-04-03</td>\n",
1523 " <td>overview</td>\n",
1524 " <td>K02000001</td>\n",
1525 " <td>United Kingdom</td>\n",
1527 " <td>14629</td>\n",
1528 " <td>14293</td>\n",
1532 " <td>2020-04-02</td>\n",
1533 " <td>overview</td>\n",
1534 " <td>K02000001</td>\n",
1535 " <td>United Kingdom</td>\n",
1537 " <td>13623</td>\n",
1538 " <td>13457</td>\n",
1542 " <td>2020-04-01</td>\n",
1543 " <td>overview</td>\n",
1544 " <td>K02000001</td>\n",
1545 " <td>United Kingdom</td>\n",
1547 " <td>11947</td>\n",
1548 " <td>11924</td>\n",
1552 " <td>2020-03-31</td>\n",
1553 " <td>overview</td>\n",
1554 " <td>K02000001</td>\n",
1555 " <td>United Kingdom</td>\n",
1557 " <td>11896</td>\n",
1558 " <td>11896</td>\n",
1562 " <td>2020-06-14</td>\n",
1563 " <td>overview</td>\n",
1564 " <td>K02000001</td>\n",
1565 " <td>United Kingdom</td>\n",
1566 " <td>69529.0</td>\n",
1567 " <td>83965</td>\n",
1568 " <td>66688</td>\n",
1572 "<p>301 rows × 7 columns</p>\n",
1576 " date areaType areaCode areaName newPCRTestsByPublishDate \\\n",
1577 "0 2021-01-25 overview K02000001 United Kingdom 196510.0 \n",
1578 "1 2021-01-24 overview K02000001 United Kingdom 262111.0 \n",
1579 "2 2021-01-23 overview K02000001 United Kingdom 389209.0 \n",
1580 "3 2021-01-22 overview K02000001 United Kingdom 401075.0 \n",
1581 "4 2021-01-21 overview K02000001 United Kingdom 439408.0 \n",
1582 ".. ... ... ... ... ... \n",
1583 "296 2020-04-03 overview K02000001 United Kingdom NaN \n",
1584 "297 2020-04-02 overview K02000001 United Kingdom NaN \n",
1585 "298 2020-04-01 overview K02000001 United Kingdom NaN \n",
1586 "299 2020-03-31 overview K02000001 United Kingdom NaN \n",
1587 "300 2020-06-14 overview K02000001 United Kingdom 69529.0 \n",
1589 " newTestsByPublishDate newPillarOneTwoTestsByPublishDate \n",
1590 "0 546734 531104 \n",
1591 "1 412775 394479 \n",
1592 "2 486425 465231 \n",
1593 "3 631901 608829 \n",
1594 "4 668989 644537 \n",
1596 "296 14629 14293 \n",
1597 "297 13623 13457 \n",
1598 "298 11947 11924 \n",
1599 "299 11896 11896 \n",
1600 "300 83965 66688 \n",
1602 "[301 rows x 7 columns]"
1605 "execution_count": 348,
1607 "output_type": "execute_result"
1611 "test_data = pd.read_csv('test_data.csv', \n",
1612 " parse_dates=[0], dayfirst=True)\n",
1617 "cell_type": "code",
1618 "execution_count": 349,
1620 "Collapsed": "false"
1626 "Index(['date', 'areaType', 'areaCode', 'areaName', 'newPCRTestsByPublishDate',\n",
1627 " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n",
1631 "execution_count": 349,
1633 "output_type": "execute_result"
1641 "cell_type": "code",
1642 "execution_count": 350,
1644 "Collapsed": "false"
1648 "uk_data = uk_data.merge(test_data[['date', 'newPCRTestsByPublishDate',\n",
1649 " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate']], how='outer', on='date')"
1653 "cell_type": "code",
1654 "execution_count": 351,
1656 "Collapsed": "false"
1662 "Index(['date', 'areaType', 'areaCode', 'areaName', 'covidOccupiedMVBeds',\n",
1663 " 'newAdmissions', 'newCasesBySpecimenDate', 'hospitalCases',\n",
1664 " 'newDeaths28DaysByPublishDate', 'newPCRTestsByPublishDate',\n",
1665 " 'newTestsByPublishDate', 'newPillarOneTwoTestsByPublishDate'],\n",
1669 "execution_count": 351,
1671 "output_type": "execute_result"
1679 "cell_type": "code",
1680 "execution_count": 352,
1682 "Collapsed": "false"
1686 "uk_data.rename(\n",
1688 " 'covidOccupiedMVBeds': 'ventilator_beds',\n",
1689 " 'newCasesBySpecimenDate': 'new_cases',\n",
1690 " 'hospitalCases': 'hospital_cases', \n",
1691 " 'newDeaths28DaysByPublishDate': 'new_deaths',\n",
1692 " 'newAdmissions': 'new_admissions',\n",
1693 " 'newPCRTestsByPublishDate': 'new_pcr_tests',\n",
1694 " 'newTestsByPublishDate': 'new_tests',\n",
1695 " 'newPillarOneTwoTestsByPublishDate': 'new_pillar_1_2_tests'\n",
1700 "cell_type": "code",
1701 "execution_count": 353,
1703 "Collapsed": "false"
1709 "date datetime64[ns]\n",
1710 "areaType object\n",
1711 "areaCode object\n",
1712 "areaName object\n",
1713 "ventilator_beds float64\n",
1714 "new_admissions float64\n",
1715 "new_cases float64\n",
1716 "hospital_cases float64\n",
1717 "new_deaths int64\n",
1718 "new_pcr_tests float64\n",
1719 "new_tests float64\n",
1720 "new_pillar_1_2_tests float64\n",
1724 "execution_count": 353,
1726 "output_type": "execute_result"
1734 "cell_type": "code",
1735 "execution_count": 354,
1737 "Collapsed": "false"
1743 "Index(['date', 'areaType', 'areaCode', 'areaName', 'ventilator_beds',\n",
1744 " 'new_admissions', 'new_cases', 'hospital_cases', 'new_deaths',\n",
1745 " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'],\n",
1749 "execution_count": 354,
1751 "output_type": "execute_result"
1759 "cell_type": "code",
1760 "execution_count": 355,
1762 "Collapsed": "false"
1766 "uk_data[['date', \n",
1767 " 'hospital_cases', 'ventilator_beds',\n",
1768 " 'new_cases', 'new_deaths', \n",
1769 " 'hospital_cases', 'new_admissions',\n",
1770 " 'new_pcr_tests', 'new_tests', 'new_pillar_1_2_tests'\n",
1774 " if_exists='replace',\n",
1776 " chunksize=500,\n",
1778 " \"date\": Date,\n",
1779 " \"hospital_cases\": Integer,\n",
1780 " \"ventilator_beds\": Integer,\n",
1781 " \"new_cases\": Integer,\n",
1782 " \"hospital_cases\": Integer,\n",
1783 " \"new_deaths\": Integer,\n",
1784 " \"new_admissions\": Integer,\n",
1785 " 'new_pcr_tests': Integer, \n",
1786 " 'new_tests': Integer, \n",
1787 " 'new_pillar_1_2_tests': Integer\n",
1793 "cell_type": "code",
1794 "execution_count": 356,
1796 "Collapsed": "false"
1800 "# %sql select * from uk_data order by date desc limit 10"
1804 "cell_type": "code",
1805 "execution_count": 424,
1807 "Collapsed": "false"
1811 "query_string = '''drop table if exists uk_data_7;\n",
1812 "create table uk_data_7 \n",
1813 "(date date primary key,\n",
1814 " hospital_cases real,\n",
1815 " ventilator_beds real,\n",
1816 " new_cases real,\n",
1817 " new_deaths real,\n",
1818 " new_admissions real,\n",
1819 " new_pcr_tests real,\n",
1820 " new_tests real,\n",
1821 " new_pillar_1_2_tests real\n",
1824 "with engine.connect() as connection:\n",
1825 " connection.execute(query_string)"
1829 "cell_type": "code",
1830 "execution_count": 425,
1834 "update_string = '''with ownd as (\n",
1836 " avg(hospital_cases) over wnd as w_hospital_cases,\n",
1837 " avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
1838 " avg(new_cases) over wnd as w_new_cases,\n",
1839 " avg(new_deaths) over wnd as w_new_deaths,\n",
1840 " avg(new_admissions) over wnd as w_new_admissions,\n",
1841 " avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
1842 " avg(new_tests) over wnd as w_new_tests,\n",
1843 " avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
1844 " count(*) over wnd as w_size\n",
1846 " window wnd as (\n",
1847 " order by uk_data.date\n",
1848 " rows between 3 preceding and 3 following\n",
1851 "insert into uk_data_7(date, \n",
1852 " hospital_cases, \n",
1853 " ventilator_beds, \n",
1856 " new_admissions, \n",
1857 " new_pcr_tests, \n",
1859 " new_pillar_1_2_tests\n",
1862 " w_hospital_cases,\n",
1863 " w_ventilator_beds,\n",
1866 " w_new_admissions,\n",
1867 " w_new_pcr_tests,\n",
1869 " w_new_pillar_1_2_tests\n",
1871 " where w_size = 7\n",
1873 "with engine.connect() as connection:\n",
1874 " connection.execute(update_string)"
1878 "cell_type": "code",
1879 "execution_count": 398,
1881 "Collapsed": "false"
1885 "# %%sql insert into uk_data_7(date, ventilator_beds, new_cases, hospital_cases, new_deaths, new_admissions)\n",
1887 "# select date, \n",
1888 "# avg(ventilator_beds) over (order by date rows between 6 preceding and current row)\n",
1894 "cell_type": "code",
1895 "execution_count": 399,
1897 "Collapsed": "false"
1901 "# query_string = '''insert into uk_data_7(date, hospital_cases)\n",
1902 "# select uk_data.date, \n",
1903 "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as hospital_cases\n",
1904 "# from uk_data'''\n",
1905 "# with engine.connect() as connection:\n",
1906 "# connection.execute(query_string) "
1910 "cell_type": "code",
1911 "execution_count": 360,
1913 "Collapsed": "false"
1919 "# (select uk_data.date as date7, \n",
1920 "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 6 preceding and current row) as nc7\n",
1923 "# update uk_data_7\n",
1924 "# set ventilator_beds = nc7\n",
1926 "# where uk_data_7.date = m7.date7"
1930 "cell_type": "code",
1931 "execution_count": 361,
1933 "Collapsed": "false"
1938 "output_type": "stream",
1940 " * postgresql://covid:***@localhost/covid\n",
1941 "390 rows affected.\n"
1950 "execution_count": 361,
1952 "output_type": "execute_result"
1958 "# (select uk_data.date as date7, \n",
1959 "# avg(uk_data.ventilator_beds) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n",
1962 "# update uk_data_7\n",
1963 "# set ventilator_beds = nc7\n",
1965 "# where uk_data_7.date = m7.date7"
1969 "cell_type": "code",
1970 "execution_count": 362,
1972 "Collapsed": "false"
1977 "output_type": "stream",
1979 " * postgresql://covid:***@localhost/covid\n",
1980 "390 rows affected.\n"
1989 "execution_count": 362,
1991 "output_type": "execute_result"
1997 "# (select uk_data.date as date7, \n",
1998 "# avg(uk_data.new_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as nc7\n",
2001 "# update uk_data_7\n",
2002 "# set new_cases = nc7\n",
2004 "# where uk_data_7.date = m7.date7"
2008 "cell_type": "code",
2009 "execution_count": 363,
2011 "Collapsed": "false"
2016 "output_type": "stream",
2018 " * postgresql://covid:***@localhost/covid\n",
2019 "390 rows affected.\n"
2028 "execution_count": 363,
2030 "output_type": "execute_result"
2036 "# (select uk_data.date as date7, \n",
2037 "# avg(uk_data.hospital_cases) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2040 "# update uk_data_7\n",
2041 "# set hospital_cases = d7\n",
2043 "# where uk_data_7.date = m7.date7"
2047 "cell_type": "code",
2048 "execution_count": 364,
2050 "Collapsed": "false"
2055 "output_type": "stream",
2057 " * postgresql://covid:***@localhost/covid\n",
2058 "390 rows affected.\n"
2067 "execution_count": 364,
2069 "output_type": "execute_result"
2075 "# (select uk_data.date as date7, \n",
2076 "# avg(uk_data.new_deaths) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2079 "# update uk_data_7\n",
2080 "# set new_deaths = d7\n",
2082 "# where uk_data_7.date = m7.date7"
2086 "cell_type": "code",
2087 "execution_count": 365,
2089 "Collapsed": "false"
2094 "output_type": "stream",
2096 " * postgresql://covid:***@localhost/covid\n",
2097 "390 rows affected.\n"
2106 "execution_count": 365,
2108 "output_type": "execute_result"
2114 "# (select uk_data.date as date7, \n",
2115 "# avg(uk_data.new_admissions) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2118 "# update uk_data_7\n",
2119 "# set new_admissions = d7\n",
2121 "# where uk_data_7.date = m7.date7"
2125 "cell_type": "code",
2126 "execution_count": 366,
2128 "Collapsed": "false"
2133 "output_type": "stream",
2135 " * postgresql://covid:***@localhost/covid\n",
2136 "390 rows affected.\n"
2145 "execution_count": 366,
2147 "output_type": "execute_result"
2153 "# (select uk_data.date as date7, \n",
2154 "# avg(uk_data.new_pcr_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2157 "# update uk_data_7\n",
2158 "# set new_pcr_tests = d7\n",
2160 "# where uk_data_7.date = m7.date7"
2164 "cell_type": "code",
2165 "execution_count": 367,
2167 "Collapsed": "false"
2172 "output_type": "stream",
2174 " * postgresql://covid:***@localhost/covid\n",
2175 "390 rows affected.\n"
2184 "execution_count": 367,
2186 "output_type": "execute_result"
2192 "# (select uk_data.date as date7, \n",
2193 "# avg(uk_data.new_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2196 "# update uk_data_7\n",
2197 "# set new_tests = d7\n",
2199 "# where uk_data_7.date = m7.date7"
2203 "cell_type": "code",
2204 "execution_count": 309,
2206 "Collapsed": "false"
2212 "# (select uk_data.date as date7, \n",
2213 "# avg(uk_data.new_pillar_1_2_tests) over (order by uk_data.date rows between 3 preceding and 3 following) as d7\n",
2216 "# update uk_data_7\n",
2217 "# set new_pillar_1_2_tests = d7\n",
2219 "# where uk_data_7.date = m7.date7"
2223 "cell_type": "code",
2224 "execution_count": 310,
2226 "Collapsed": "false"
2231 "output_type": "stream",
2233 " * postgresql://covid:***@localhost/covid\n",
2234 "0 rows affected.\n"
2243 "execution_count": 310,
2245 "output_type": "execute_result"
2251 "# ( select date, \n",
2252 "# avg(new_pillar_1_2_tests)\n",
2253 "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests\n",
2256 "# update uk_data_7\n",
2257 "# set new_pillar_1_2_tests = wnd.a_new_pillar_1_2_tests\n",
2259 "# where uk_data_7.date = wnd.date\n",
2260 "# and (select count(*) from wnd) = 7"
2264 "cell_type": "code",
2265 "execution_count": 379,
2267 "Collapsed": "false"
2272 "output_type": "stream",
2274 " * postgresql://covid:***@localhost/covid\n",
2275 "10 rows affected.\n"
2285 " <th>new_pillar_1_2_tests</th>\n",
2286 " <th>a_new_pillar_1_2_tests</th>\n",
2287 " <th>a_count</th>\n",
2292 " <td>2021-01-26</td>\n",
2294 " <td>463604.666666666667</td>\n",
2298 " <td>2021-01-25</td>\n",
2300 " <td>499910.750000000000</td>\n",
2304 " <td>2021-01-24</td>\n",
2306 " <td>528836.000000000000</td>\n",
2310 " <td>2021-01-23</td>\n",
2312 " <td>543566.833333333333</td>\n",
2316 " <td>2021-01-22</td>\n",
2318 " <td>545780.285714285714</td>\n",
2322 " <td>2021-01-21</td>\n",
2324 " <td>546626.571428571429</td>\n",
2328 " <td>2021-01-20</td>\n",
2330 " <td>546806.285714285714</td>\n",
2334 " <td>2021-01-19</td>\n",
2336 " <td>547402.428571428571</td>\n",
2340 " <td>2021-01-18</td>\n",
2342 " <td>542617.857142857143</td>\n",
2346 " <td>2021-01-17</td>\n",
2348 " <td>545088.142857142857</td>\n",
2355 "[(datetime.date(2021, 1, 26), None, Decimal('463604.666666666667'), 3),\n",
2356 " (datetime.date(2021, 1, 25), None, Decimal('499910.750000000000'), 4),\n",
2357 " (datetime.date(2021, 1, 24), None, Decimal('528836.000000000000'), 5),\n",
2358 " (datetime.date(2021, 1, 23), None, Decimal('543566.833333333333'), 6),\n",
2359 " (datetime.date(2021, 1, 22), None, Decimal('545780.285714285714'), 7),\n",
2360 " (datetime.date(2021, 1, 21), None, Decimal('546626.571428571429'), 7),\n",
2361 " (datetime.date(2021, 1, 20), None, Decimal('546806.285714285714'), 7),\n",
2362 " (datetime.date(2021, 1, 19), None, Decimal('547402.428571428571'), 7),\n",
2363 " (datetime.date(2021, 1, 18), None, Decimal('542617.857142857143'), 7),\n",
2364 " (datetime.date(2021, 1, 17), None, Decimal('545088.142857142857'), 7)]"
2367 "execution_count": 379,
2369 "output_type": "execute_result"
2375 "# ( select date, \n",
2376 "# avg(new_pillar_1_2_tests)\n",
2377 "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_new_pillar_1_2_tests,\n",
2378 "# count(new_pillar_1_2_tests)\n",
2379 "# over (order by uk_data.date rows between 3 preceding and 3 following) as a_count\n",
2382 "# select uk_data_7.date, new_pillar_1_2_tests, wnd.a_new_pillar_1_2_tests, wnd.a_count\n",
2383 "# from uk_data_7, wnd\n",
2384 "# where uk_data_7.date = wnd.date\n",
2385 "# order by uk_data_7.date desc limit 10\n",
2387 "# select date, \n",
2388 "# count(*) over wnd as w_size\n",
2390 "# window wnd as (\n",
2391 "# order by uk_data.date\n",
2392 "# rows between 3 preceding and 3 following\n",
2397 "cell_type": "code",
2398 "execution_count": 401,
2400 "Collapsed": "false"
2405 "# select date, \n",
2406 "# count(*) over wnd as w_size\n",
2408 "# window wnd as (\n",
2409 "# order by uk_data.date\n",
2410 "# rows between 3 preceding and 3 following\n",
2412 "# order by date desc limit 10"
2416 "cell_type": "code",
2417 "execution_count": 407,
2422 "output_type": "stream",
2424 " * postgresql://covid:***@localhost/covid\n",
2425 "0 rows affected.\n"
2434 "execution_count": 407,
2436 "output_type": "execute_result"
2441 "# with ownd as (\n",
2443 "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
2444 "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
2445 "# avg(new_cases) over wnd as w_new_cases,\n",
2446 "# avg(new_deaths) over wnd as w_new_deaths,\n",
2447 "# avg(new_admissions) over wnd as w_new_admissions,\n",
2448 "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
2449 "# avg(new_tests) over wnd as w_new_tests,\n",
2450 "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
2451 "# count(*) over wnd as w_size\n",
2453 "# window wnd as (\n",
2454 "# order by uk_data.date\n",
2455 "# rows between 3 preceding and 3 following\n",
2457 "# insert into uk_data_7(date, \n",
2458 "# hospital_cases, \n",
2459 "# ventilator_beds, \n",
2462 "# new_admissions, \n",
2463 "# new_pcr_tests, \n",
2465 "# new_pillar_1_2_tests\n",
2467 "# (select date,\n",
2468 "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
2469 "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
2470 "# avg(new_cases) over wnd as w_new_cases,\n",
2471 "# avg(new_deaths) over wnd as w_new_deaths,\n",
2472 "# avg(new_admissions) over wnd as w_new_admissions,\n",
2473 "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
2474 "# avg(new_tests) over wnd as w_new_tests,\n",
2475 "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
2476 "# count(*) over wnd as w_size\n",
2478 "# window wnd as (\n",
2479 "# order by uk_data.date\n",
2480 "# rows between 3 preceding and 3 following\n",
2483 "# set date = ownd.date,\n",
2484 "# hospital_cases = w_hospital_cases,\n",
2485 "# ventilator_beds = w_ventilator_beds,\n",
2486 "# new_cases = w_new_cases,\n",
2487 "# new_deaths = w_new_deaths,\n",
2488 "# new_admissions = w_new_admissions,\n",
2489 "# new_pcr_tests = w_new_pcr_tests,\n",
2490 "# new_tests = w_new_tests,\n",
2491 "# new_pillar_1_2_tests = w_new_pillar_1_2_tests\n",
2493 "# where w_size = 7"
2497 "cell_type": "code",
2498 "execution_count": 417,
2503 "output_type": "stream",
2505 " * postgresql://covid:***@localhost/covid\n",
2506 "384 rows affected.\n"
2515 "execution_count": 417,
2517 "output_type": "execute_result"
2522 "# with ownd as (\n",
2524 "# avg(hospital_cases) over wnd as w_hospital_cases,\n",
2525 "# avg(ventilator_beds) over wnd as w_ventilator_beds,\n",
2526 "# avg(new_cases) over wnd as w_new_cases,\n",
2527 "# avg(new_deaths) over wnd as w_new_deaths,\n",
2528 "# avg(new_admissions) over wnd as w_new_admissions,\n",
2529 "# avg(new_pcr_tests) over wnd as w_new_pcr_tests,\n",
2530 "# avg(new_tests) over wnd as w_new_tests,\n",
2531 "# avg(new_pillar_1_2_tests) over wnd as w_new_pillar_1_2_tests,\n",
2532 "# count(*) over wnd as w_size\n",
2534 "# window wnd as (\n",
2535 "# order by uk_data.date\n",
2536 "# rows between 3 preceding and 3 following\n",
2539 "# insert into uk_data_7(date, \n",
2540 "# hospital_cases, \n",
2541 "# ventilator_beds, \n",
2544 "# new_admissions, \n",
2545 "# new_pcr_tests, \n",
2547 "# new_pillar_1_2_tests\n",
2549 "# (select date,\n",
2550 "# w_hospital_cases,\n",
2551 "# w_ventilator_beds,\n",
2553 "# w_new_deaths,\n",
2554 "# w_new_admissions,\n",
2555 "# w_new_pcr_tests,\n",
2557 "# w_new_pillar_1_2_tests\n",
2559 "# where w_size = 7\n",
2564 "cell_type": "code",
2565 "execution_count": 427,
2567 "Collapsed": "false"
2572 "output_type": "stream",
2574 " * postgresql://covid:***@localhost/covid\n",
2575 "10 rows affected.\n"
2585 " <th>hospital_cases</th>\n",
2586 " <th>ventilator_beds</th>\n",
2587 " <th>new_cases</th>\n",
2588 " <th>new_deaths</th>\n",
2589 " <th>new_admissions</th>\n",
2590 " <th>new_pcr_tests</th>\n",
2591 " <th>new_tests</th>\n",
2592 " <th>new_pillar_1_2_tests</th>\n",
2597 " <td>2021-01-23</td>\n",
2598 " <td>37915.6</td>\n",
2599 " <td>4027.3333</td>\n",
2600 " <td>22463.666</td>\n",
2601 " <td>1241.7142</td>\n",
2602 " <td>3651.6667</td>\n",
2603 " <td>353735.34</td>\n",
2604 " <td>565312.3</td>\n",
2605 " <td>543566.8</td>\n",
2608 " <td>2021-01-22</td>\n",
2609 " <td>38057.168</td>\n",
2610 " <td>4015.8572</td>\n",
2611 " <td>24870.428</td>\n",
2612 " <td>1238.7142</td>\n",
2613 " <td>3771.75</td>\n",
2614 " <td>354323.16</td>\n",
2615 " <td>567830.3</td>\n",
2616 " <td>545780.3</td>\n",
2619 " <td>2021-01-21</td>\n",
2620 " <td>38217.715</td>\n",
2621 " <td>3999.2856</td>\n",
2622 " <td>30620.428</td>\n",
2623 " <td>1239.7142</td>\n",
2624 " <td>3828.2</td>\n",
2625 " <td>363750.16</td>\n",
2626 " <td>569716.7</td>\n",
2627 " <td>546626.56</td>\n",
2630 " <td>2021-01-20</td>\n",
2631 " <td>38294.0</td>\n",
2632 " <td>3969.8572</td>\n",
2633 " <td>32707.428</td>\n",
2634 " <td>1248.4286</td>\n",
2635 " <td>3811.0</td>\n",
2636 " <td>372037.44</td>\n",
2637 " <td>570458.1</td>\n",
2638 " <td>546806.3</td>\n",
2641 " <td>2021-01-19</td>\n",
2642 " <td>38348.43</td>\n",
2643 " <td>3939.5715</td>\n",
2644 " <td>34143.285</td>\n",
2645 " <td>1240.8572</td>\n",
2646 " <td>3825.8572</td>\n",
2647 " <td>374771.16</td>\n",
2648 " <td>571336.56</td>\n",
2649 " <td>547402.44</td>\n",
2652 " <td>2021-01-18</td>\n",
2653 " <td>38338.145</td>\n",
2654 " <td>3898.5715</td>\n",
2655 " <td>35766.57</td>\n",
2656 " <td>1223.5714</td>\n",
2657 " <td>3872.8572</td>\n",
2658 " <td>377019.28</td>\n",
2659 " <td>566901.1</td>\n",
2660 " <td>542617.9</td>\n",
2663 " <td>2021-01-17</td>\n",
2664 " <td>38292.285</td>\n",
2665 " <td>3857.4285</td>\n",
2666 " <td>37337.855</td>\n",
2667 " <td>1217.5714</td>\n",
2668 " <td>3939.0</td>\n",
2669 " <td>389469.84</td>\n",
2670 " <td>571242.56</td>\n",
2671 " <td>545088.1</td>\n",
2674 " <td>2021-01-16</td>\n",
2675 " <td>38119.855</td>\n",
2676 " <td>3810.7144</td>\n",
2677 " <td>38824.0</td>\n",
2678 " <td>1181.0</td>\n",
2679 " <td>3967.2856</td>\n",
2680 " <td>395066.72</td>\n",
2681 " <td>569445.0</td>\n",
2682 " <td>541938.0</td>\n",
2685 " <td>2021-01-15</td>\n",
2686 " <td>37858.715</td>\n",
2687 " <td>3746.2856</td>\n",
2688 " <td>40268.57</td>\n",
2689 " <td>1128.5714</td>\n",
2690 " <td>4029.2856</td>\n",
2691 " <td>403526.72</td>\n",
2692 " <td>570188.7</td>\n",
2693 " <td>540199.9</td>\n",
2696 " <td>2021-01-14</td>\n",
2697 " <td>37505.57</td>\n",
2698 " <td>3667.2856</td>\n",
2699 " <td>41989.145</td>\n",
2700 " <td>1118.5714</td>\n",
2701 " <td>4065.2856</td>\n",
2702 " <td>412496.44</td>\n",
2703 " <td>567178.7</td>\n",
2704 " <td>537417.1</td>\n",
2710 "[(datetime.date(2021, 1, 23), 37915.6, 4027.3333, 22463.666, 1241.7142, 3651.6667, 353735.34, 565312.3, 543566.8),\n",
2711 " (datetime.date(2021, 1, 22), 38057.168, 4015.8572, 24870.428, 1238.7142, 3771.75, 354323.16, 567830.3, 545780.3),\n",
2712 " (datetime.date(2021, 1, 21), 38217.715, 3999.2856, 30620.428, 1239.7142, 3828.2, 363750.16, 569716.7, 546626.56),\n",
2713 " (datetime.date(2021, 1, 20), 38294.0, 3969.8572, 32707.428, 1248.4286, 3811.0, 372037.44, 570458.1, 546806.3),\n",
2714 " (datetime.date(2021, 1, 19), 38348.43, 3939.5715, 34143.285, 1240.8572, 3825.8572, 374771.16, 571336.56, 547402.44),\n",
2715 " (datetime.date(2021, 1, 18), 38338.145, 3898.5715, 35766.57, 1223.5714, 3872.8572, 377019.28, 566901.1, 542617.9),\n",
2716 " (datetime.date(2021, 1, 17), 38292.285, 3857.4285, 37337.855, 1217.5714, 3939.0, 389469.84, 571242.56, 545088.1),\n",
2717 " (datetime.date(2021, 1, 16), 38119.855, 3810.7144, 38824.0, 1181.0, 3967.2856, 395066.72, 569445.0, 541938.0),\n",
2718 " (datetime.date(2021, 1, 15), 37858.715, 3746.2856, 40268.57, 1128.5714, 4029.2856, 403526.72, 570188.7, 540199.9),\n",
2719 " (datetime.date(2021, 1, 14), 37505.57, 3667.2856, 41989.145, 1118.5714, 4065.2856, 412496.44, 567178.7, 537417.1)]"
2722 "execution_count": 427,
2724 "output_type": "execute_result"
2728 "%sql select * from uk_data_7 order by date desc limit 10"
2732 "cell_type": "code",
2733 "execution_count": 428,
2735 "Collapsed": "false"
2740 "output_type": "stream",
2742 " * postgresql://covid:***@localhost/covid\n",
2743 "10 rows affected.\n"
2753 " <th>hospital_cases</th>\n",
2754 " <th>ventilator_beds</th>\n",
2755 " <th>new_cases</th>\n",
2756 " <th>new_deaths</th>\n",
2757 " <th>new_admissions</th>\n",
2758 " <th>new_pcr_tests</th>\n",
2759 " <th>new_tests</th>\n",
2760 " <th>new_pillar_1_2_tests</th>\n",
2765 " <td>2021-01-26</td>\n",
2776 " <td>2021-01-25</td>\n",
2782 " <td>196510</td>\n",
2783 " <td>546734</td>\n",
2784 " <td>531104</td>\n",
2787 " <td>2021-01-24</td>\n",
2788 " <td>37561</td>\n",
2790 " <td>14266</td>\n",
2793 " <td>262111</td>\n",
2794 " <td>412775</td>\n",
2795 " <td>394479</td>\n",
2798 " <td>2021-01-23</td>\n",
2799 " <td>37266</td>\n",
2801 " <td>20495</td>\n",
2804 " <td>389209</td>\n",
2805 " <td>486425</td>\n",
2806 " <td>465231</td>\n",
2809 " <td>2021-01-22</td>\n",
2810 " <td>38144</td>\n",
2812 " <td>29094</td>\n",
2815 " <td>401075</td>\n",
2816 " <td>631901</td>\n",
2817 " <td>608829</td>\n",
2820 " <td>2021-01-21</td>\n",
2821 " <td>37957</td>\n",
2823 " <td>31430</td>\n",
2826 " <td>439408</td>\n",
2827 " <td>668989</td>\n",
2828 " <td>644537</td>\n",
2831 " <td>2021-01-20</td>\n",
2832 " <td>38650</td>\n",
2834 " <td>35015</td>\n",
2837 " <td>434099</td>\n",
2838 " <td>645050</td>\n",
2839 " <td>617221</td>\n",
2842 " <td>2021-01-19</td>\n",
2843 " <td>38765</td>\n",
2845 " <td>39311</td>\n",
2848 " <td>357850</td>\n",
2849 " <td>582938</td>\n",
2850 " <td>559061</td>\n",
2853 " <td>2021-01-18</td>\n",
2854 " <td>39181</td>\n",
2856 " <td>44732</td>\n",
2859 " <td>262499</td>\n",
2860 " <td>559939</td>\n",
2861 " <td>537028</td>\n",
2864 " <td>2021-01-17</td>\n",
2865 " <td>38095</td>\n",
2867 " <td>28875</td>\n",
2870 " <td>320122</td>\n",
2871 " <td>417965</td>\n",
2872 " <td>395737</td>\n",
2878 "[(datetime.date(2021, 1, 26), None, None, None, 1631, None, None, None, None),\n",
2879 " (datetime.date(2021, 1, 25), None, 4032, 4482, 592, None, 196510, 546734, 531104),\n",
2880 " (datetime.date(2021, 1, 24), 37561, 4077, 14266, 610, None, 262111, 412775, 394479),\n",
2881 " (datetime.date(2021, 1, 23), 37266, 4066, 20495, 1348, None, 389209, 486425, 465231),\n",
2882 " (datetime.date(2021, 1, 22), 38144, 4076, 29094, 1401, 3341, 401075, 631901, 608829),\n",
2883 " (datetime.date(2021, 1, 21), 37957, 3960, 31430, 1290, 3598, 439408, 668989, 644537),\n",
2884 " (datetime.date(2021, 1, 20), 38650, 3953, 35015, 1820, 4016, 434099, 645050, 617221),\n",
2885 " (datetime.date(2021, 1, 19), 38765, 3947, 39311, 1610, 4132, 357850, 582938, 559061),\n",
2886 " (datetime.date(2021, 1, 18), 39181, 3916, 44732, 599, 4054, 262499, 559939, 537028),\n",
2887 " (datetime.date(2021, 1, 17), 38095, 3871, 28875, 671, 3725, 320122, 417965, 395737)]"
2890 "execution_count": 428,
2892 "output_type": "execute_result"
2896 "%sql select * from uk_data order by date desc limit 10"
2900 "cell_type": "code",
2901 "execution_count": null,
2903 "Collapsed": "false"
2911 "formats": "ipynb,md"
2914 "display_name": "Python 3",
2915 "language": "python",
2919 "codemirror_mode": {
2923 "file_extension": ".py",
2924 "mimetype": "text/x-python",
2926 "nbconvert_exporter": "python",
2927 "pygments_lexer": "ipython3",