X-Git-Url: https://git.njae.me.uk/?p=covid19.git;a=blobdiff_plain;f=covid-old.ipynb;fp=covid-old.ipynb;h=954e0eacce989488eba6a4044135e53ac4a7b0ed;hp=0000000000000000000000000000000000000000;hb=5afedd66506be7575034ae6deebcfaa7c2ced978;hpb=4abff18d7988bdea04a267a08a0792ba570fe0bd diff --git a/covid-old.ipynb b/covid-old.ipynb new file mode 100644 index 0000000..954e0ea --- /dev/null +++ b/covid-old.ipynb @@ -0,0 +1,3562 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "import itertools\n", + "import collections\n", + "import json\n", + "import pandas as pd\n", + "import numpy as np\n", + "from scipy.stats import gmean\n", + "import datetime\n", + "\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "DEATH_COUNT_THRESHOLD = 10\n", + "COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()\n", + "COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()\n", + "COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()\n", + "# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()\n", + "\n", + "COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA\n", + " 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',\n", + " 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',\n", + " 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']\n", + "COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))\n", + "COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))" + ] + }, + { + "cell_type": "code", + "execution_count": 4843, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 553k 100 553k 0 0 564k 0 --:--:-- --:--:-- --:--:-- 564k\n" + ] + } + ], + "source": [ + "!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 4844, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# First col is a date, treat geoId of NA as 'Namibia', not \"NA\" value\n", + "raw_data = pd.read_csv('covid.csv', \n", + " parse_dates=[0], dayfirst=True,\n", + " keep_default_na=False, na_values = [''],\n", + "# dtype = {'day': np.int64, \n", + "# 'month': np.int64, \n", + "# 'year': np.int64, \n", + "# 'cases': np.int64, \n", + "# 'deaths': np.int64, \n", + "# 'countriesAndTerritories': str, \n", + "# 'geoId': str, \n", + "# 'countryterritoryCode': str, \n", + "# 'popData2019': np.int64, \n", + "# 'continentExp': str, \n", + "# }\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 4845, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "89150" + ] + }, + "execution_count": 4845, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.size" + ] + }, + { + "cell_type": "code", + "execution_count": 4846, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "raw_data.fillna(0, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4847, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateRepyear_weekcases_weeklydeaths_weeklycountriesAndTerritoriesgeoIdcountryterritoryCodepopData2019continentExpnotification_rate_per_100000_population_14-days
02020-12-142020-50175771AfghanistanAFAFG38041757.0Asia9.01
12020-12-072020-491672137AfghanistanAFAFG38041757.0Asia7.22
22020-11-302020-48107368AfghanistanAFAFG38041757.0Asia6.42
32020-11-232020-47136869AfghanistanAFAFG38041757.0Asia6.66
42020-11-162020-46116461AfghanistanAFAFG38041757.0Asia4.65
\n", + "
" + ], + "text/plain": [ + " dateRep year_week cases_weekly deaths_weekly countriesAndTerritories \\\n", + "0 2020-12-14 2020-50 1757 71 Afghanistan \n", + "1 2020-12-07 2020-49 1672 137 Afghanistan \n", + "2 2020-11-30 2020-48 1073 68 Afghanistan \n", + "3 2020-11-23 2020-47 1368 69 Afghanistan \n", + "4 2020-11-16 2020-46 1164 61 Afghanistan \n", + "\n", + " geoId countryterritoryCode popData2019 continentExp \\\n", + "0 AF AFG 38041757.0 Asia \n", + "1 AF AFG 38041757.0 Asia \n", + "2 AF AFG 38041757.0 Asia \n", + "3 AF AFG 38041757.0 Asia \n", + "4 AF AFG 38041757.0 Asia \n", + "\n", + " notification_rate_per_100000_population_14-days \n", + "0 9.01 \n", + "1 7.22 \n", + "2 6.42 \n", + "3 6.66 \n", + "4 4.65 " + ] + }, + "execution_count": 4847, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4848, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dateRep datetime64[ns]\n", + "year_week object\n", + "cases_weekly int64\n", + "deaths_weekly int64\n", + "countriesAndTerritories object\n", + "geoId object\n", + "countryterritoryCode object\n", + "popData2019 float64\n", + "continentExp object\n", + "notification_rate_per_100000_population_14-days float64\n", + "dtype: object" + ] + }, + "execution_count": 4848, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 4849, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Only a column name can be used for the key in a dtype mappings argument.'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;34m'countryterritoryCode'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m'popData2019'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint64\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m 'continentExp': str })\n\u001b[0m", + "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, **kwargs)\u001b[0m\n\u001b[1;32m 5855\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcol_name\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5856\u001b[0m raise KeyError(\n\u001b[0;32m-> 5857\u001b[0;31m \u001b[0;34m\"Only a column name can be used for the \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5858\u001b[0m \u001b[0;34m\"key in a dtype mappings argument.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5859\u001b[0m )\n", + "\u001b[0;31mKeyError\u001b[0m: 'Only a column name can be used for the key in a dtype mappings argument.'" + ] + } + ], + "source": [ + "# raw_data = raw_data.astype({'dateRep': np.datetime64, \n", + "# 'day': np.int64, \n", + "# 'month': np.int64, \n", + "# 'year': np.int64, \n", + "# 'cases': np.int64, \n", + "# 'deaths': np.int64, \n", + "# 'countriesAndTerritories': str, \n", + "# 'geoId': str, \n", + "# 'countryterritoryCode': str, \n", + "# 'popData2019': np.int64, \n", + "# 'continentExp': str })\n", + "raw_data = raw_data.astype({'dateRep': np.datetime64, \n", + " 'day': np.int64, \n", + " 'month': np.int64, \n", + " 'year': np.int64, \n", + " 'cases': np.int64, \n", + " 'deaths': np.int64, \n", + " 'countriesAndTerritories': str, \n", + " 'geoId': str, \n", + " 'countryterritoryCode': str, \n", + " 'popData2019': np.int64, \n", + " 'continentExp': str })" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "raw_data.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "raw_data[((raw_data.geoId == 'UK') & (raw_data.dateRep >= '2020-07-10'))]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# raw_data = raw_data[~ ((raw_data.geoId == 'ES') & (raw_data.dateRep >= '2020-05-22'))]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "base_data = raw_data.set_index(['geoId', 'dateRep'])\n", + "base_data.sort_index(inplace=True)\n", + "base_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "base_data.loc['ES'].loc['2020-05-10':]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "countries = raw_data[['geoId', 'countriesAndTerritories', 'popData2019', 'continentExp']]\n", + "countries = countries[countries['popData2019'] != '']\n", + "countries = countries.drop_duplicates()\n", + "countries.set_index('geoId', inplace=True)\n", + "countries = countries.astype({'popData2019': 'int64'})\n", + "countries.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "countries.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "countries[countries.countriesAndTerritories == 'Finland']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "countries.loc[COUNTRIES_OF_INTEREST]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "countries[countries.continentExp == 'America'].index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date = base_data[['cases', 'deaths']]\n", + "data_by_date.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date.loc['UK']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# data_by_date.deaths.drop_duplicates().sort_values().to_csv('dth.csv', header=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date.groupby(level=0).cumsum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date = data_by_date.merge(\n", + " data_by_date.groupby(level=0).cumsum(), \n", + " suffixes=('', '_culm'), \n", + " left_index=True, right_index=True)\n", + "data_by_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date = data_by_date.merge(\n", + " data_by_date[['cases', 'deaths']].groupby(level=0).diff(), \n", + " suffixes=('', '_diff'), \n", + " left_index=True, right_index=True)\n", + "data_by_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date.loc['UK', '2020-04-17']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date.loc['UK']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# days_since_threshold = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].groupby(level=0).cumcount()\n", + "# days_since_threshold.rename('since_threshold', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "dbd = data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD].reset_index(level=1)\n", + "dbd['since_threshold'] = dbd.dateRep\n", + "dbd.set_index('dateRep', append=True, inplace=True)\n", + "dbd.sort_index(inplace=True)\n", + "days_since_threshold = dbd.groupby(level=0).diff().since_threshold.dt.days.fillna(0).astype(int).groupby(level=0).cumsum()\n", + "# days_since_threshold.groupby(level=0).cumsum()\n", + "\n", + "# days_since_threshold = dbd.rename('since_threshold')\n", + "days_since_threshold" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# days_since_threshold = (data_by_date[data_by_date.deaths_culm > DEATH_COUNT_THRESHOLD]\n", + "# .reset_index(level=1).groupby(level=0)\n", + "# .diff().dateRep.dt.days\n", + "# .groupby(level=0).cumcount()\n", + "# )\n", + "# days_since_threshold.rename('since_threshold', inplace=True)\n", + "# days_since_threshold" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold = data_by_date.merge(days_since_threshold, \n", + " left_index=True, right_index=True)\n", + "data_since_threshold" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold = data_since_threshold.set_index('since_threshold', append=True\n", + " ).reorder_levels(['since_threshold', 'geoId', 'dateRep']\n", + " ).reset_index('dateRep')\n", + "data_since_threshold.sort_index(inplace=True)\n", + "data_since_threshold" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.loc[(slice(None), ['ES']), :].tail(8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), ['deaths_culm']].unstack().plot(logy=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# deaths = data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm']].unstack().xs('deaths_culm', axis=1, drop_level=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True)\n", + "deaths_by_date = data_by_date.loc[COUNTRIES_ALL, ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True).T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "cases = data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['cases_culm']].unstack().sort_index().xs('cases_culm', axis=1, drop_level=True)\n", + "cases_by_date = data_by_date.loc[ COUNTRIES_ALL, ['cases_culm']].unstack().sort_index().xs('cases_culm', axis=1, drop_level=True).T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "COUNTRIES_AMERICAS_DEAD = list(set(deaths.columns) & set(COUNTRIES_AMERICAS))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId']).sort_index(inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold_per_capita = data_since_threshold.reset_index().merge(countries, on='geoId').set_index(['since_threshold', 'geoId'])\n", + "data_since_threshold_per_capita['cases_culm_pc'] = data_since_threshold_per_capita.cases_culm / data_since_threshold_per_capita.popData2019\n", + "data_since_threshold_per_capita['deaths_culm_pc'] = data_since_threshold_per_capita.deaths_culm / data_since_threshold_per_capita.popData2019\n", + "data_since_threshold_per_capita" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_pc = data_since_threshold_per_capita.loc[(slice(None), ['UK', 'DE', 'IT', 'IE']), ['deaths_culm_pc']].unstack().sort_index().xs('deaths_culm_pc', axis=1, drop_level=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_pc.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_pc = data_since_threshold_per_capita.loc[(slice(None), COUNTRIES_ALL), ['deaths_culm_pc']].unstack().xs('deaths_culm_pc', axis=1, drop_level=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths[COUNTRIES_CORE].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths[COUNTRIES_FRIENDS].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title=\"Total deaths, linear\")\n", + "ax.set_xlabel(f\"Days since {DEATH_COUNT_THRESHOLD} deaths\")\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = deaths[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = f\"{c}: {deaths[c][lvi]:.0f}\")\n", + "# plt.savefig('covid_deaths_total_linear.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths[COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Total deaths, linear\")\n", + "ax.set_xlabel(f\"Days since {DEATH_COUNT_THRESHOLD} deaths\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = f\"{c}: {deaths[c][lvi]:.0f}\")\n", + "# plt.savefig('covid_deaths_total_linear.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_by_date.loc['2020-03-15':, COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Total deaths, linear\")\n", + "# data_by_date.loc[COUNTRIES_CORE]\n", + "# deaths_by_date = data_by_date.loc[COUNTRIES_ALL, ['deaths_culm']].unstack().sort_index().xs('deaths_culm', axis=1, drop_level=True)\n", + "ax.set_xlabel(f\"Date\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_by_date[c].last_valid_index()\n", + " ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_by_date[c][lvi], s = f\"{c}: {deaths_by_date[c][lvi]:.0f}\")\n", + "plt.savefig('covid_deaths_total_linear.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_prime = deaths[COUNTRIES_CORE].copy()\n", + "deaths_prime.loc[73:, 'ES'] = np.NaN\n", + "# deaths_prime['ES'][70:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_prime[COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Total deaths, linear\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_prime[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_prime[c][lvi], s = f\"{c}: {deaths_prime[c][lvi]:.0f}\")\n", + "# plt.savefig('covid_deaths_total_linear.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = cases[COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Total cases, linear\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = cases[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = cases[c][lvi], s = c)\n", + "plt.savefig('covid_cases_total_linear.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths[COUNTRIES_AMERICAS_DEAD].plot(figsize=(10, 6), title=\"Total deaths, linear\")\n", + "for c in COUNTRIES_AMERICAS_DEAD:\n", + " lvi = deaths[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)\n", + "# plt.savefig('covid_deaths_total_linear.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths[COUNTRIES_CORE + ['BR', 'MX']].plot(figsize=(10, 6), title=\"Total deaths, linear\")\n", + "for c in COUNTRIES_CORE + ['BR', 'MX']:\n", + " lvi = deaths[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)\n", + "# plt.savefig('covid_deaths_total_linear.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths[COUNTRIES_NORDIC].plot(figsize=(10, 6), title=\"Total deaths, linear\")\n", + "for c in COUNTRIES_NORDIC:\n", + " lvi = deaths[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)\n", + "# plt.savefig('covid_deaths_total_linear.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths[COUNTRIES_OF_INTEREST].plot(figsize=(10, 6), title=\"Total deaths, linear\")\n", + "for c in COUNTRIES_OF_INTEREST:\n", + " lvi = deaths[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)\n", + "plt.savefig('covid_deaths_total_linear_of_interest.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), title=\"Total deaths, log\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)\n", + "\n", + "plt.savefig('covid_deaths_total_log.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ylim = (5*10**3, 5*10**4)\n", + "ax = deaths[COUNTRIES_CORE].plot(logy=True, figsize=(10, 6), ylim=ylim, title=\"Total deaths, log\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths[c].last_valid_index()\n", + " if ylim[0] < deaths[c][lvi] < ylim[1]:\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)\n", + "\n", + "# plt.savefig('covid_deaths_total_log.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths[COUNTRIES_FRIENDS].plot(logy=True, figsize=(10, 6), title=\"Total deaths, log\")\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = deaths[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)\n", + "\n", + "# plt.savefig('covid_deaths_total_log.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths[COUNTRIES_NORDIC].plot(logy=True, figsize=(10, 6), title=\"Total deaths, log\")\n", + "for c in COUNTRIES_NORDIC:\n", + " lvi = deaths[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)\n", + "\n", + "# plt.savefig('covid_deaths_total_log.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths[COUNTRIES_OF_INTEREST].plot(logy=True, figsize=(10, 6), title=\"Total deaths, log\")\n", + "for c in COUNTRIES_OF_INTEREST:\n", + " lvi = deaths[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths[c][lvi], s = c)\n", + "\n", + "plt.savefig('covid_deaths_total_log.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_pc.plot(figsize=(10, 6), title=\"Deaths per capita, linear\")\n", + "for c in deaths_pc.columns:\n", + " lvi = deaths_pc[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_pc[c][lvi], s = c)\n", + "plt.savefig('covid_deaths_per_capita_linear.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_pc.plot(logy=True, figsize=(10, 6), title=\"Deaths per capita, log\")\n", + "for c in deaths_pc.columns:\n", + " lvi = deaths_pc[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_pc[c][lvi], s = c)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_pc[['UK', 'IE']].plot( figsize=(10, 6), title=\"Deaths per capita, linear\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_pc[['UK', 'IE']].plot(logy=True, figsize=(10, 6), title=\"Deaths per capita, log\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths[['UK', 'ES', 'IT']].plot(logy=True, figsize=(10, 6), title=\"Deaths, log\")\n", + "plt.savefig('covid_deaths_selected_log.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths[['UK', 'ES', 'IT', 'MX']].plot(logy=True, figsize=(10, 6), title=\"Deaths, log\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold['deaths_m4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).mean())\n", + "data_since_threshold['deaths_m7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).mean())\n", + "data_since_threshold['cases_m7'] = data_since_threshold.groupby(level=1)['cases'].transform(lambda x: x.rolling(7, 1).mean())\n", + "# data_since_threshold['deaths_diff_m4'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(4, 1).mean())\n", + "# data_since_threshold['deaths_diff_m7'] = data_since_threshold.groupby(level=1)['deaths_diff'].transform(lambda x: x.rolling(7, 1).mean())\n", + "data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m4']]\n", + " .unstack().sort_index().xs('deaths_m4', axis=1, drop_level=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_m7']]\n", + " .unstack().sort_index().xs('deaths_m7', axis=1, drop_level=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "cases_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['cases_m7']]\n", + " .unstack().sort_index().xs('cases_m7', axis=1, drop_level=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date['cases_m7'] = data_by_date.groupby(level=0)['cases'].transform(lambda x: x.rolling(7, 1).mean())\n", + "data_by_date['deaths_m7'] = data_by_date.groupby(level=0)['deaths'].transform(lambda x: x.rolling(7, 1).mean())\n", + "data_by_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date.loc[('UK', '2020-07-15'):'UK', 'cases'].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "cases_by_date_m7 = data_by_date.loc[COUNTRIES_ALL, 'cases_m7'].unstack(level=0).sort_index()\n", + "cases_by_date_m7[COUNTRIES_CORE].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_by_date_m7 = data_by_date.loc[COUNTRIES_ALL, 'deaths_m7'].unstack(level=0).sort_index()\n", + "deaths_by_date_m7[COUNTRIES_CORE].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m4.plot(figsize=(10, 6), title=\"Deaths per day, 4 day moving average\")\n", + "for c in deaths_m4.columns:\n", + " lvi = deaths_m4[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)\n", + "plt.savefig('covid_deaths_per_day.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m4[COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Deaths per day, 4 day moving average\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_m4[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)\n", + "plt.savefig('covid_deaths_per_day-core.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m4[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title=\"Deaths per day, 4 day moving average\")\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = deaths_m4[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m4[c][lvi], s = c)\n", + "# plt.savefig('covid_deaths_per_day-friends.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "C7s = 'ES FR IT UK'.split()\n", + "ax = deaths_m7[C7s].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\")\n", + "for c in C7s:\n", + " lvi = deaths_m7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)\n", + "# plt.savefig('covid_deaths_per_day-friends.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\")\n", + "ax.set_xlabel(f\"Days since {DEATH_COUNT_THRESHOLD} deaths\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_m7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)\n", + "# plt.axhline(0, color='0.7')\n", + "plt.savefig('covid_deaths_per_day_7.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\")\n", + "ax.set_xlabel(f\"Days since {DEATH_COUNT_THRESHOLD} deaths\")\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = deaths_m7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)\n", + "# plt.axhline(0, color='0.7')\n", + "plt.savefig('covid_deaths_per_day-friends.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_m7_prime = deaths_m7[COUNTRIES_CORE].copy()\n", + "deaths_m7_prime.loc[73:, 'ES'] = np.NaN\n", + "deaths_m7_prime['ES'][70:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m7_prime[COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_m7_prime[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m7_prime[c][lvi], s = c)\n", + "plt.savefig('covid_deaths_per_day_7.png') # see below for where this is written, with the projection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_by_date_m7.loc['2020-03-01':, COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\")\n", + "ax.set_xlabel('Date')\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_by_date_m7[c].last_valid_index()\n", + " ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_by_date_m7[c][lvi], s = c)\n", + "plt.savefig('covid_deaths_per_day_7.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\")\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = deaths_m7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)\n", + "plt.savefig('covid_deaths_per_day_friends_7.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m7[COUNTRIES_CORE + ['BR', 'MX']].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\")\n", + "for c in COUNTRIES_CORE + ['BR', 'MX']:\n", + " lvi = deaths_m7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)\n", + "# plt.savefig('covid_deaths_per_day_7.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_by_date_m7.iloc[-30:][COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\")#, ylim=(-10, 100))\n", + "ax.set_xlabel(\"Date\")\n", + "\n", + "text_x_pos = deaths_by_date_m7.last_valid_index() + pd.Timedelta(days=1)\n", + "\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_by_date_m7[c].last_valid_index()\n", + "# if c != 'ES':\n", + " ax.text(x = text_x_pos, y = deaths_by_date_m7[c][lvi], s = f\"{c}: {deaths_by_date_m7[c][lvi]:.0f}\")\n", + "plt.savefig('deaths_by_date_last_30_days.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_by_date_m7.iloc[-30:][COUNTRIES_FRIENDS].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\")#, ylim=(-10, 100))\n", + "ax.set_xlabel(\"Date\")\n", + "\n", + "text_x_pos = deaths_by_date_m7.last_valid_index() + pd.Timedelta(days=1)\n", + "\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = deaths_by_date_m7[c].last_valid_index()\n", + "# if c != 'ES':\n", + " ax.text(x = text_x_pos, y = deaths_by_date_m7[c][lvi], s = f\"{c}: {deaths_by_date_m7[c][lvi]:.0f}\")\n", + "plt.savefig('deaths_by_date_last_30_days_friends.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = cases_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Cases per day, 7 day moving average\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = cases_m7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = cases_m7[c][lvi], s = c)\n", + "plt.savefig('covid_cases_per_day-core.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = cases_m7[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title=\"Cases per day, 7 day moving average\")\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = cases_m7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = cases_m7[c][lvi], s = c)\n", + "plt.savefig('covid_cases_per_day-core.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = cases_by_date_m7.iloc[-30:][COUNTRIES_FRIENDS].plot(figsize=(10, 6), title=\"Cases per day, 7 day moving average\")#, ylim=(-10, 100))\n", + "ax.set_xlabel(\"Date\")\n", + "\n", + "text_x_pos = cases_by_date_m7.last_valid_index() + pd.Timedelta(days=1)\n", + "\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = cases_by_date_m7[c].last_valid_index()\n", + "# if c != 'ES':\n", + " ax.text(x = text_x_pos, y = cases_by_date_m7[c][lvi], s = f\"{c}: {cases_by_date_m7[c][lvi]:.0f}\")\n", + "plt.savefig('cases_by_date_last_30_days_friends.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "def gmean_scale(items):\n", + " return gmean(items) / items[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "def doubling_time(df):\n", + " return np.log(2) / np.log((df.deaths_culm + df.deaths_g4) / df.deaths_culm)\n", + "\n", + "def doubling_time_7(df):\n", + " return np.log(2) / np.log((df.deaths_culm + df.deaths_g7) / df.deaths_culm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean_scale, raw=True))\n", + "# data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold['deaths_g4'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True))\n", + "data_since_threshold['deaths_g7'] = data_since_threshold.groupby(level=1)['deaths'].transform(lambda x: x.rolling(7, 1).apply(gmean, raw=True))\n", + "data_since_threshold.loc[(slice(None), ['UK', 'DE', 'IT']), :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold['doubling_time'] = data_since_threshold.groupby(level=1).apply(doubling_time).reset_index(level=0, drop=True).sort_index()\n", + "data_since_threshold['doubling_time_7'] = data_since_threshold.groupby(level=1).apply(doubling_time_7).reset_index(level=0, drop=True).sort_index()\n", + "# data_since_threshold.loc[(slice(None), 'UK'), :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date['deaths_g4'] = data_by_date.groupby(level=0)['deaths'].transform(lambda x: x.rolling(4, 1).apply(gmean, raw=True))\n", + "data_by_date['deaths_g7'] = data_by_date.groupby(level=0)['deaths'].transform(lambda x: x.rolling(7, 1).apply(gmean, raw=True))\n", + "data_by_date['doubling_time'] = data_by_date.groupby(level=0).apply(doubling_time).reset_index(level=0, drop=True).sort_index()\n", + "data_by_date['doubling_time_7'] = data_by_date.groupby(level=0).apply(doubling_time_7).reset_index(level=0, drop=True).sort_index()\n", + "data_by_date.loc['UK']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "doubling_times = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time']]\n", + " .unstack().sort_index().xs('doubling_time', axis=1, drop_level=True))\n", + "doubling_times.replace([np.inf, -np.inf], np.nan, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "doubling_times_7 = (data_since_threshold.loc[(slice(None), COUNTRIES_OF_INTEREST), ['doubling_time_7']]\n", + " .unstack().sort_index().xs('doubling_time_7', axis=1, drop_level=True))\n", + "doubling_times_7.replace([np.inf, -np.inf], np.nan, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = doubling_times.plot(figsize=(10, 6), title=\"Doubling times, 4 day average\")\n", + "for c in doubling_times.columns:\n", + " lvi = doubling_times[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)\n", + "# plt.savefig('covid_deaths_per_day.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = doubling_times_7[COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Doubling times, 7 day average\")\n", + "ax.legend(loc=\"upper left\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = doubling_times_7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = doubling_times_7[c][lvi], s = c)\n", + "plt.savefig('covid_doubling_times_7.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = doubling_times[COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Doubling times, 4 day average\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = doubling_times[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)\n", + "plt.savefig('covid_doubling_times.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = doubling_times[COUNTRIES_FRIENDS].plot(figsize=(10, 6), title=\"Doubling times\")\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = doubling_times[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)\n", + "plt.savefig('covid_doubling_times_friends.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = doubling_times[C7s].plot(figsize=(10, 6), title=\"Doubling times\")\n", + "for c in C7s:\n", + " lvi = doubling_times[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = doubling_times[c][lvi], s = c)\n", + "# plt.savefig('covid_doubling_times_friends.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# deaths_diff_m4 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m4']]\n", + "# .unstack().sort_index().xs('deaths_diff_m4', axis=1, drop_level=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# deaths_diff_m7 = (data_since_threshold.loc[(slice(None), COUNTRIES_ALL), ['deaths_diff_m7']]\n", + "# .unstack().sort_index().xs('deaths_diff_m7', axis=1, drop_level=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# deaths_diff_m7" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[COUNTRIES_ALL]#, [doubling_time]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "dstl = data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last()\n", + "dstl.loc[dstl.index.intersection(COUNTRIES_ALL)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# data_since_threshold.replace([np.inf, -np.inf], np.nan).groupby(level=1).last().loc[['UK', 'DE', 'IT']]#, [doubling_time]]\n", + "dstl.loc[['UK', 'DE', 'IT', 'FR', 'ES']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.loc[(slice(None), ['UK']), :].tail(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.loc[(slice(None), ['ES']), :].tail(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "## Death projections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.loc[(slice(None), ['UK']), :].tail(15)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "it_since_threshold = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['IT']), :]\n", + "s_end = it_since_threshold.index.max()[0]\n", + "s_end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_projection = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), :]\n", + "uk_current_end = uk_projection.index.max()[0] + 1\n", + "# s_start = uk_projection.index.max()[0] + 1\n", + "uk_current_end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "current_uk_deaths_m7 = uk_projection[uk_projection.deaths_m7 >= 0].iloc[-1].deaths_m7\n", + "current_uk_deaths_m7" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "it_since_threshold[it_since_threshold.deaths_m7 <= current_uk_deaths_m7].loc[60:].first_valid_index()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "s_start = it_since_threshold[it_since_threshold.deaths_m7 <= current_uk_deaths_m7].loc[60:].first_valid_index()[0]\n", + "s_start" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "s_start_date = data_since_threshold.loc[(89, 'IT'), 'dateRep']# .iloc[0]\n", + "s_start_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "s_end - s_start" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_end = s_end - s_start + uk_current_end\n", + "uk_end" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "proj = it_since_threshold.loc[(slice(s_start, s_end), slice(None)), ['cases', 'deaths', 'deaths_m7']]\n", + "ndiff = uk_current_end - s_start\n", + "proj.index = pd.MultiIndex.from_tuples([(n + ndiff, 'UK') for n, _ in proj.index], names=proj.index.names)\n", + "proj" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "it_since_threshold.loc[(slice(s_start - 8, s_start + 2), slice(None)), ['cases', 'deaths', 'deaths_m7']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_projection[['cases', 'deaths', 'deaths_m7']].tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# proj['deaths_m7'] = proj['deaths_m7'] + 20\n", + "# proj" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "Projected deaths, UK following IT trend from now." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_projection = uk_projection.append(proj, sort=True)\n", + "uk_projection.deaths.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_projection = uk_projection.droplevel(1)\n", + "uk_projection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_projection.loc[152, 'deaths']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "## Correction for cumulative deaths correction on 14 August" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# uk_projection.loc[152, 'deaths'] = 50" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_projection['deaths_m7'] = uk_projection['deaths'].transform(lambda x: x.rolling(7, 1).mean())\n", + "uk_projection.loc[(uk_current_end - 20):(uk_current_end + 5)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_projection.loc[(uk_current_end - 5):]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_projection.deaths_m7.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "proj.droplevel(level=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m7[COUNTRIES_CORE].plot()\n", + "# uk_projection['deaths_m7'].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\", label=\"Projection\", style='--', ax=ax)\n", + "proj.droplevel(level=1)['deaths_m7'].plot(figsize=(10, 6), title=\"Deaths per day, 7 day moving average\", label=\"Projection\", style='--', ax=ax)\n", + "ax.set_xlabel(f\"Days since {DEATH_COUNT_THRESHOLD} deaths\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_m7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = c)\n", + "# plt.savefig('covid_deaths_per_day_7.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "it_since_threshold.deaths.sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "# Excess deaths calculation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('excess_deaths.json') as f:\n", + " excess_deaths_data = json.load(f)\n", + " \n", + "# with open('excess_death_accuracy.json') as f:\n", + "# excess_death_accuracy = json.load(f)\n", + " \n", + "excess_deaths_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "additional_deaths = data_by_date.loc[('UK', excess_deaths_data['end_date']):('UK')].iloc[1:].deaths.sum()\n", + "additional_deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_covid_deaths = data_by_date.loc['UK'].deaths.sum()\n", + "uk_covid_deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "uk_deaths_to_date = int(excess_deaths_data['excess_deaths']) + additional_deaths\n", + "uk_deaths_to_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# excess_deaths_upto = '2020-05-08'\n", + "# excess_deaths = 54500" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# excess_deaths_upto = excess_deaths_data['end_date']\n", + "# excess_deaths = excess_deaths_data['excess_deaths']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "Recorded deaths in period where ONS has reported total deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# ons_reported_deaths = base_data.loc['UK'][:excess_deaths_upto]['deaths'].sum()\n", + "# ons_reported_deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# excess_deaths_upto" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "## Correction for deaths total correction on 14 August" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# ons_unreported_deaths_data = base_data.loc['UK'][excess_deaths_upto:].iloc[1:]['deaths']\n", + "# ons_unreported_deaths_data['2020-08-14'] = 50" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# ons_unreported_deaths = ons_unreported_deaths_data.sum()\n", + "# ons_unreported_deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# scaled_ons_unreported_deaths = ons_unreported_deaths * excess_death_accuracy\n", + "# scaled_ons_unreported_deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# uk_deaths_to_date = excess_deaths + scaled_ons_unreported_deaths\n", + "# uk_deaths_to_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# data_since_threshold.loc[(slice(None), 'UK'), :][data_since_threshold.dateRep == excess_deaths_data['end_date']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# data_since_threshold[data_since_threshold.dateRep == excess_deaths_data['end_date']].loc[(slice(None), 'UK'), :]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# ons_unreported_start = data_since_threshold[data_since_threshold.dateRep == excess_deaths_data['end_date']].loc[(slice(None), 'UK'), :].first_valid_index()[0] + 1\n", + "# ons_unreported_start" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# unreported_projected_deaths = uk_projection.loc[ons_unreported_start:].deaths.sum()\n", + "# unreported_projected_deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# unreported_projected_deaths_scaled = unreported_projected_deaths * excess_death_accuracy\n", + "# unreported_projected_deaths_scaled" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# uk_projection.loc[(s_start):].deaths.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# deaths_actual_projected_scaled = uk_deaths_to_date + uk_projection.loc[(s_start):].deaths.sum() * excess_death_accuracy\n", + "# deaths_actual_projected_scaled" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# excess_deaths / reported_deaths" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "True deaths to date, if we follow the scaling of excess deaths over reported deaths so far." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# uk_covid_deaths = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), 'deaths_culm'].iloc[-1]\n", + "# uk_covid_deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# uk_covid_deaths_scaled = excess_deaths + unreported_deaths * excess_death_accuracy\n", + "# uk_covid_deaths_scaled" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['IT']), 'dateRep'].iloc[-1] + pd.Timedelta(s_end - s_start, unit='days')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), 'dateRep'].iloc[-1].strftime(\"%Y-%m-%d\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# uk_covid_deaths * excess_deaths / reported_deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# uk_projection.deaths.sum() * excess_deaths / reported_deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# data_since_threshold.loc[(slice(None), 'FR'), :]\n", + "# data_since_threshold[data_since_threshold.dateRep == '2020-05-18'].loc[(slice(None), 'FR'), :]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "## School reopenings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "school_reopenings = {\n", + " 'ES': {'date': '2020-05-18'},\n", + " 'FR': {'date': '2020-05-18'}, # some areas only\n", + "# 'IT': {'date': '2020-09-01'},\n", + " # 'IE': {'date': '2020-09-01'},\n", + " 'DE': {'date': '2020-05-04'},\n", + " 'UK': {'date': '2020-06-01'}\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold[data_since_threshold.dateRep == '2020-05-04'].loc[(slice(None), ['DE']), :].first_valid_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold[data_since_threshold.dateRep == '2020-05-04'].loc[(slice(None), ['DE']), :].iloc[0].deaths_m7" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "for cID in school_reopenings:\n", + " dst_in = data_since_threshold[data_since_threshold.dateRep == (school_reopenings[cID]['date'])].loc[(slice(None), [cID]), :]\n", + " dst_i = dst_in.first_valid_index()\n", + " dst_n = dst_in.iloc[0].deaths_m7\n", + " school_reopenings[cID]['since_threshold'] = dst_i[0]\n", + " school_reopenings[cID]['deaths_m7'] = dst_n\n", + "school_reopenings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(15, 9), title=\"Deaths per day, 7 day moving average\")\n", + "# uk_projection.deaths_m7.plot(ax=ax)\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_m7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = f\"{c}: {deaths_m7[c][lvi]:.0f}\")\n", + " if c in school_reopenings:\n", + " marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()\n", + " ax.plot(school_reopenings[c]['since_threshold'], school_reopenings[c]['deaths_m7'], '*', \n", + " markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)\n", + " ax.text(x = school_reopenings[c]['since_threshold'] + 1, y = school_reopenings[c]['deaths_m7'], \n", + " s = f\"{school_reopenings[c]['date']}: {school_reopenings[c]['deaths_m7']:.0f}\")\n", + "plt.savefig('school_reopenings.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(15, 9), title=\"Deaths per day, 7 day moving average\",\n", + "# xlim=(46, 91), ylim=(0, 400))\n", + "# # uk_projection.deaths_m7.plot(ax=ax)\n", + "# for c in COUNTRIES_CORE:\n", + "# lvi = deaths_m7[c].last_valid_index()\n", + "# ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = f\"{c}: {deaths_m7[c][lvi]:.0f}\", fontsize=14)\n", + "# if c in school_reopenings:\n", + "# marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()\n", + "# ax.plot(school_reopenings[c]['since_threshold'], school_reopenings[c]['deaths_m7'], '*', \n", + "# markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)\n", + "# ax.text(x = school_reopenings[c]['since_threshold'] + 1, y = school_reopenings[c]['deaths_m7'], \n", + "# s = f\"{school_reopenings[c]['date']}: {school_reopenings[c]['deaths_m7']:.0f}\",\n", + "# fontsize=14)\n", + "# plt.savefig('school_reopenings_detail.png')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "# Lockdown graphs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "lockdown_dates = {\n", + " 'ES': { 'part_start': {'date': '2020-03-14'}\n", + " , 'full_start': {'date': '2020-03-15'}\n", + " , 'part_finish': {'date': '2020-05-18'}\n", + " },\n", + " 'FR': { 'part_start': {'date': '2020-03-13'}\n", + " , 'full_start': {'date': '2020-03-17'}\n", + " , 'part_finish': {'date': '2020-05-11'}\n", + " },\n", + " 'IT': { 'part_start': {'date': '2020-03-08'}\n", + " , 'full_start': {'date': '2020-03-10'}\n", + " , 'part_finish': {'date': '2020-05-04'}\n", + " },\n", + " 'DE': { #'part_start': {'date': '2020-03-13'}\n", + " 'full_start': {'date': '2020-03-22'}\n", + " , 'part_finish': {'date': '2020-05-06'}\n", + " },\n", + " 'UK': { 'part_start': {'date': '2020-03-23'}\n", + " , 'full_start': {'date': '2020-03-23'}\n", + " , 'part_finish': {'date': '2020-05-31'}\n", + " },\n", + " 'IE': { #'part_start': {'date': '2020-03-12'}\n", + " 'full_start': {'date': '2020-03-27'}\n", + " , 'part_finish': {'date': '2020-05-18'}\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "for cID in lockdown_dates:\n", + " for phase in lockdown_dates[cID]:\n", + " dst_in = data_since_threshold[data_since_threshold.dateRep == (lockdown_dates[cID][phase]['date'])].loc[(slice(None), [cID]), :]\n", + " dst_i = dst_in.first_valid_index()\n", + " dst_n = dst_in.iloc[0].deaths_m7\n", + " dst_c = dst_in.iloc[0].cases_m7\n", + " lockdown_dates[cID][phase]['since_threshold'] = dst_i[0]\n", + " lockdown_dates[cID][phase]['deaths_m7'] = dst_n\n", + " lockdown_dates[cID][phase]['cases_m7'] = dst_c\n", + "\n", + "lockdown_dates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(15, 9), title=\"Deaths per day, 7 day moving averagee, with lockdown dates\")\n", + "ax.set_xlabel(f\"Days since {DEATH_COUNT_THRESHOLD} deaths\")\n", + "# uk_projection.deaths_m7.plot(ax=ax)\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_m7[c].last_valid_index()\n", + " if c != 'UK':\n", + " ax.text(x = lvi + 1, y = deaths_m7[c][lvi], s = f\"{c}: {deaths_m7[c][lvi]:.0f}\")\n", + " if c in lockdown_dates:\n", + " for phase in lockdown_dates[c]:\n", + " marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()\n", + " ax.plot(lockdown_dates[c][phase]['since_threshold'], lockdown_dates[c][phase]['deaths_m7'], '*',\n", + " markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)\n", + " if 'start' not in phase:\n", + " ax.text(x = lockdown_dates[c][phase]['since_threshold'] + 1, y = lockdown_dates[c][phase]['deaths_m7'], \n", + " s = f\"{lockdown_dates[c][phase]['date']}: {lockdown_dates[c][phase]['deaths_m7']:.0f}\")\n", + "# plt.savefig('school_reopenings.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = cases_m7.iloc[-50:][COUNTRIES_CORE].plot(figsize=(15, 9), title=\"Cases per day, 7 day moving average, with lockdown dates\") #, ylim=(-10, 1500))\n", + "ax.set_xlabel(f\"Days since {DEATH_COUNT_THRESHOLD} deaths\")\n", + "# uk_projection.deaths_m7.plot(ax=ax)\n", + "for c in COUNTRIES_CORE:\n", + " lvi = cases_m7[c].last_valid_index()\n", + "# if c != 'UK':\n", + " ax.text(x = lvi + 1, y = cases_m7[c][lvi], s = f\"{c}: {cases_m7[c][lvi]:.0f}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = cases_m7[COUNTRIES_CORE].plot(figsize=(15, 9), title=\"Cases per day, 7 day moving average, with lockdown dates\")\n", + "ax.set_xlabel(f\"Days since {DEATH_COUNT_THRESHOLD} deaths\")\n", + "# uk_projection.deaths_m7.plot(ax=ax)\n", + "for c in COUNTRIES_CORE:\n", + " lvi = cases_m7[c].last_valid_index()\n", + "# if c != 'UK':\n", + " ax.text(x = lvi + 1, y = cases_m7[c][lvi], s = f\"{c}: {cases_m7[c][lvi]:.0f}\")\n", + " if c in lockdown_dates:\n", + " for phase in lockdown_dates[c]:\n", + " marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()\n", + " if 'start' in phase:\n", + " marker_shape = '^'\n", + " else:\n", + " marker_shape = 'v'\n", + " ax.plot(lockdown_dates[c][phase]['since_threshold'], lockdown_dates[c][phase]['cases_m7'], \n", + " marker_shape,\n", + " markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)\n", + " if 'start' not in phase:\n", + " ax.text(x = lockdown_dates[c][phase]['since_threshold'] + 1, y = lockdown_dates[c][phase]['cases_m7'], \n", + " s = f\"{lockdown_dates[c][phase]['date']}: {lockdown_dates[c][phase]['cases_m7']:.0f}\")\n", + "# plt.savefig('cases_per_day_with_lockdown.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "plot_start_date = '2020-03-01'\n", + "ax = cases_by_date_m7.loc[plot_start_date:, COUNTRIES_CORE].plot(figsize=(15, 9), title=\"Cases per day, 7 day moving average, with lockdown dates\")\n", + "ax.set_xlabel(f\"Date\")\n", + "ax.set_ylabel(\"Number of cases\")\n", + "# uk_projection.deaths_m7.plot(ax=ax)\n", + "for c in COUNTRIES_CORE:\n", + " lvi = cases_by_date_m7[c].last_valid_index()\n", + "# if c != 'UK':\n", + " ax.text(x = lvi + pd.Timedelta(days=1), y = cases_by_date_m7[c][lvi], s = f\"{c}: {cases_by_date_m7[c][lvi]:.0f}\")\n", + " if c in lockdown_dates:\n", + " for phase in lockdown_dates[c]:\n", + " marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()\n", + " if 'start' in phase:\n", + " marker_shape = '^'\n", + " else:\n", + " marker_shape = 'v'\n", + " marker_x_pos = ax.get_xlim()[0] + mpl.dates.date2num(pd.to_datetime(lockdown_dates[c][phase]['date'])) - mpl.dates.date2num(pd.to_datetime(plot_start_date))\n", + " ax.plot(marker_x_pos, lockdown_dates[c][phase]['cases_m7'], \n", + " marker_shape,\n", + " markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)\n", + " if 'start' not in phase:\n", + " ax.text(x = marker_x_pos + 3, y = lockdown_dates[c][phase]['cases_m7'], \n", + " s = f\"{lockdown_dates[c][phase]['date']}: {lockdown_dates[c][phase]['cases_m7']:.0f}\")\n", + "plt.savefig('cases_per_day_with_lockdown.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = cases_m7[COUNTRIES_CORE].plot(figsize=(10, 6), title=\"Cases per day, 7 day moving average\")\n", + "for c in COUNTRIES_CORE:\n", + " lvi = cases_m7[c].last_valid_index()\n", + " ax.text(x = lvi + 1, y = cases_m7[c][lvi], s = c)\n", + "plt.savefig('covid_cases_per_day-core.png') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_m7[COUNTRIES_CORE].plot(figsize=(15, 9), title=\"Deaths per day, 7 day moving average\",\n", + " xlim=(0, 15), \n", + " ylim=(0, 66)\n", + " )\n", + "# uk_projection.deaths_m7.plot(ax=ax)\n", + "for c in COUNTRIES_CORE:\n", + " lvi = deaths_m7[c].last_valid_index()\n", + " if c in lockdown_dates:\n", + " for phase in lockdown_dates[c]:\n", + " if 'start' in phase:\n", + " print(c, phase)\n", + " marker_col = [l for l in ax.lines if l.get_label() == c][0].get_color()\n", + " ax.plot(lockdown_dates[c][phase]['since_threshold'], lockdown_dates[c][phase]['deaths_m7'], '*', \n", + " markersize=18, markerfacecolor=marker_col, markeredgecolor=marker_col)\n", + " ax.text(x = lockdown_dates[c][phase]['since_threshold'] + 0.3, y = lockdown_dates[c][phase]['deaths_m7'], \n", + " s = f\"{lockdown_dates[c][phase]['date']}: {lockdown_dates[c][phase]['deaths_m7']:.0f}\")\n", + "# plt.savefig('school_reopenings.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false" + }, + "source": [ + "# Write results to summary file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'w') as f:\n", + " f.write('% Covid death data summary\\n')\n", + " f.write('% Neil Smith\\n')\n", + " f.write(f'% Created on {datetime.datetime.now().strftime(\"%Y-%m-%d\")}\\n')\n", + " f.write('\\n')\n", + " \n", + " last_uk_date = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), 'dateRep'].iloc[-1]\n", + " f.write(f'> Last UK data from {last_uk_date.strftime(\"%Y-%m-%d\")}\\n')\n", + " f.write('\\n') " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('## Headlines\\n')\n", + " f.write('\\n')\n", + " f.write('| []() | |\\n')\n", + " f.write('|:---|---:|\\n')\n", + " f.write(f'| Deaths reported so far | {uk_covid_deaths} | \\n')\n", + " f.write(f'| Total Covid deaths to date (estimated) | {uk_deaths_to_date:.0f} |\\n')\n", + " projection_date = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['IT']), 'dateRep'].iloc[-1] + pd.Timedelta(s_end - s_start, unit='days')\n", + "# f.write(f'| Projected total deaths up to {projection_date.strftime(\"%Y-%m-%d\")} | {deaths_actual_projected_scaled:.0f} | \\n')\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('## Total deaths\\n')\n", + "# f.write(f'Time based on days since {DEATH_COUNT_THRESHOLD} deaths\\n')\n", + " f.write('\\n')\n", + " f.write('![Total deaths](covid_deaths_total_linear.png)\\n')\n", + " f.write('\\n')\n", + " f.write('| Country ID | Country name | Total deaths |\\n')\n", + " f.write('|:-----------|:-------------|-------------:|\\n')\n", + " for c in sorted(COUNTRIES_CORE):\n", + " lvi = deaths_by_date[c].last_valid_index()\n", + " f.write(f'| {c} | {countries.loc[c].countriesAndTerritories} | {int(deaths_by_date[c][lvi])} |\\n')\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('## All-causes deaths, UK\\n')\n", + " f.write('\\n')\n", + " f.write('![All-causes deaths](deaths-radar.png)\\n')\n", + " f.write('\\n')\n", + " f.write('### True deaths\\n')\n", + " f.write('\\n')\n", + " f.write(f'The number of deaths reported in official statistics, {uk_covid_deaths}, is an underestimate '\n", + " 'of the true number of Covid deaths.\\n'\n", + " 'This is especially true early in the pandemic, approximately March to May 2020.\\n')\n", + " f.write('We can get a better understanding of the impact of Covid by looking at the number of deaths, '\n", + " 'over and above what would be expected at each week of the year.\\n')\n", + " f.write(f'The ONS (and other bodies in Scotland and Northern Ireland) have released data on the number of deaths '\n", + " f'up to {pd.to_datetime(excess_deaths_data[\"end_date\"]).strftime(\"%d %B %Y\")}.\\n\\n')\n", + " f.write('If, for each of those weeks, I take the largest of the excess deaths or the reported Covid deaths, ')\n", + " f.write(f'I estimate there have been **{uk_deaths_to_date}** total deaths so far.\\n')\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# with open('covid_summary.md', 'a') as f:\n", + "# f.write(f'In that period, the UK reported {ons_reported_deaths} Covid deaths.\\n')\n", + "# f.write(f'In the last three weeks for which excess deaths have been reported, the excess deaths have been {excess_death_accuracy:.3f} higher than the Covid-reported deaths.\\n')\n", + "# # f.write(f'That means the actual number of Covid death is about {excess_deaths / reported_deaths:.2f} times higher than the reported figures.\\n')\n", + "# f.write('\\n')\n", + "# f.write(f'The UK has reported {uk_covid_deaths} deaths so far.\\n')\n", + "# f.write(f'Using the scaling factor above (for Covid-19 deaths after the ONS figures), I infer that there have been **{uk_deaths_to_date:.0f}** total deaths so far.\\n')\n", + "# f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('## Deaths per day\\n')\n", + " f.write(f'Based on a 7-day moving average\\n')\n", + " f.write('\\n')\n", + " f.write('![Deaths per day](covid_deaths_per_day_7.png)\\n')\n", + " f.write('\\n')\n", + " f.write('![Deaths per day, last 30 days](deaths_by_date_last_30_days.png)\\n')\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "s_end - s_start - 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('## Projected deaths\\n')\n", + " f.write(\"Previously, I was using Italy's deaths data to predict the UK's deaths data. \"\n", + " \"This worked when both countries' trends of deaths were falling or constant, \"\n", + " \"as they were until September.\\n\")\n", + " f.write(\"\\n\")\n", + " f.write(\"As of mid-September, with cases rising in both countries at around the same time, \"\n", + " \"I can't use Italian data to predict the UK's future deaths.\\n\")\n", + " f.write(\"\\n\")\n", + "# f.write(f\"The UK's daily deaths data is very similar to Italy's.\\n\")\n", + "# f.write(f'If I use the Italian data for the next {s_end - s_start - 1} days (from {s_start_date.strftime(\"%d %B %Y\")} onwards),')\n", + "# f.write(f' the UK will report {uk_projection.deaths.sum()} deaths on day {uk_end} of the epidemic.\\n')\n", + "# f.write('\\n')\n", + "# f.write('Using the excess deaths scaling from above, that will translate into ')\n", + "# f.write(f'**{deaths_actual_projected_scaled:.0f}** Covid deaths total.\\n')\n", + "# f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('## Deaths doubling times\\n')\n", + " f.write(f'Based on a 7-day moving average\\n')\n", + " f.write('\\n')\n", + " f.write('![Deaths doubling times](covid_doubling_times_7.png)\\n')\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('\\n')\n", + " f.write('## Cases per day and lockdown dates\\n')\n", + " f.write(f'Based on a 7-day moving average\\n')\n", + " f.write('\\n')\n", + " f.write('![Cases per day](cases_per_day_with_lockdown.png)\\n')\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('| Country ID | Country name | Most recent daily cases | Most recent daily deaths |\\n')\n", + " f.write('|:-----------|:-------------|------------------------:|-------------------------:|\\n')\n", + " for c in sorted(COUNTRIES_CORE):\n", + " lvic = cases_m7[c].last_valid_index()\n", + " lvid = deaths_m7[c].last_valid_index()\n", + " f.write(f'| {c} | {countries.loc[c].countriesAndTerritories} | {cases_m7[c][lvic]:.0f} | {deaths_m7[c][lvid]:.0f} | \\n')\n", + " f.write('\\n')\n", + " f.write('(Figures are 7-day averages)\\n')\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('hospital_normalisation_date.json') as f:\n", + " hospital_normalisation_date_data = json.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('## Hospital care\\n')\n", + " f.write(f'Based on a 7-day moving average\\n')\n", + " f.write('\\n')\n", + " f.write('![Cases, admissions, deaths](cases_admissions_deaths.png)\\n')\n", + " f.write('\\n')\n", + "# f.write('Admissions are shifted by 10 days, deaths by 25 days. '\n", + "# 'This reflects the typical timescales of infection: '\n", + "# 'patients are admitted 10 days after onset of symptoms, '\n", + "# 'and die 15 days after admission.\\n')\n", + "# f.write('\\n')\n", + "# f.write('Plotting this data with offsets shows more clearly '\n", + "# 'the relative changes in these three metrics.\\n')\n", + " f.write('Due to the large scale differences between the three '\n", + " 'measures, they are all normalised to show changes ')\n", + " f.write(f'since {pd.to_datetime(hospital_normalisation_date_data[\"hospital_normalisation_date\"]).strftime(\"%d %B %Y\")}.\\n')\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('## Testing effectiveness\\n')\n", + " f.write('\\n')\n", + " f.write('A question about testing is whether more detected cases is a result of more tests being '\n", + " 'done or is because the number of cases is increasing. One way of telling the differeence '\n", + " 'is by looking at the fraction of tests that are positive.\\n')\n", + " f.write('\\n')\n", + " f.write('![Positive tests and cases](tests_and_cases.png)\\n')\n", + " f.write('\\n')\n", + " f.write('Numbers of positive tests and cases, '\n", + " '7-day moving average.\\n'\n", + " 'Note the different y-axes\\n')\n", + " f.write('\\n') \n", + " f.write('![Fraction of tests with positive result](fraction_positive_tests.png)\\n')\n", + " f.write('\\n')\n", + " f.write('Fraction of tests with a positive result, both daily figures and '\n", + " '7-day moving average.\\n')\n", + " f.write('\\n') \n", + " f.write('\\n')\n", + " f.write('![Tests against fraction positive, trajectory](fraction_positive_tests_vs_tests.png)\\n')\n", + " f.write('\\n')\n", + " f.write('The trajectory of tests done vs fraction positive tests.\\n')\n", + " f.write('\\n')\n", + " f.write('Points higher indicate more tests; points to the right indicate more positive tests.'\n", + " 'More tests being done with the same infection prevelance will move the point up '\n", + " 'and to the left.\\n')\n", + " f.write('\\n')\n", + " f.write('\\n')\n", + " f.write('![Tests against fraction positive, trajectory](tests_vs_fraction_positive_animation.png)\\n')\n", + " f.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('covid_summary.md', 'a') as f:\n", + " f.write('# Data sources\\n')\n", + " f.write('\\n')\n", + " f.write('> Covid data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)\\n')\n", + " f.write('\\n') \n", + " f.write(\"\"\"> Population data from:\n", + "\n", + "* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.\n", + "* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.\n", + "* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.\"\"\")\n", + " \n", + " f.write('\\n\\n')\n", + " f.write('> [Source code available](https://git.njae.me.uk/?p=covid19.git;a=tree)\\n')\n", + " f.write('\\n') \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "!pandoc --toc -s covid_summary.md > covid_summary.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "!scp covid_summary.html neil@ogedei:/var/www/scripts.njae.me.uk/covid/index.html\n", + "!scp covid_deaths_total_linear.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp deaths-radar.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp covid_deaths_per_day_7.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp covid_doubling_times_7.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp cases_per_day_with_lockdown.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp cases_admissions_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp fraction_positive_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ \n", + "!scp tests_and_cases.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp deaths_by_date_last_30_days.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp fraction_positive_tests_vs_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp tests_vs_fraction_positive_animation.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/ " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "with open('uk_covid_deaths.js', 'w') as f:\n", + " f.write(f\"document.write('{uk_covid_deaths}');\")\n", + " \n", + "with open('estimated_total_deaths.js', 'w') as f:\n", + " f.write(f\"document.write('{uk_deaths_to_date:.0f}');\")\n", + "\n", + "# with open('projection_date.js', 'w') as f:\n", + "# f.write(f\"document.write(\\'{projection_date.strftime('%d %B %Y')}\\');\")\n", + "\n", + "# with open('projected_deaths.js', 'w') as f:\n", + "# f.write(f\"document.write('{uk_projection.deaths.sum():.0f}');\")\n", + "\n", + "# with open('projected_excess_deaths.js', 'w') as f:\n", + "# f.write(f\"document.write('{deaths_actual_projected_scaled:.0f}');\")\n", + "\n", + "edut = pd.to_datetime(excess_deaths_data[\"end_date\"]).strftime('%d %B %Y')\n", + "with open('excess_deaths_upto.js', 'w') as f:\n", + " f.write(f\"document.write('{edut}');\")\n", + "\n", + "# with open('excess_deaths.js', 'w') as f:\n", + "# f.write(f\"document.write('{excess_deaths:.0f}');\")\n", + " \n", + "# with open('reported_deaths.js', 'w') as f:\n", + "# f.write(f\"document.write('{ons_reported_deaths:.0f}');\")\n", + " \n", + "# with open('scaling_factor.js', 'w') as f:\n", + "# f.write(f\"document.write('{excess_death_accuracy:.2f}');\") \n", + "\n", + "# with open('projection_length.js', 'w') as f:\n", + "# f.write(f\"document.write('{s_end - s_start - 1}');\")\n", + " \n", + "# with open('s_end.js', 'w') as f:\n", + "# f.write(f\"document.write('{s_end}');\")\n", + " \n", + "# s_start_date_str = s_start_date.strftime(\"%d %B %Y\")\n", + "# with open('s_start_date.js', 'w') as f:\n", + "# f.write(f\"document.write('{s_start_date_str}');\")\n", + " \n", + "# with open('uk_end.js', 'w') as f:\n", + "# f.write(f\"document.write('{uk_end}');\")\n", + " \n", + "with open('last_uk_date.js', 'w') as f:\n", + " f.write(f\"document.write('{pd.to_datetime(last_uk_date).strftime('%d %B %Y')}');\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "# pd.to_datetime(excess_deaths_upto).strftime('%d %B %Y')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "!scp uk_covid_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp estimated_total_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "# !scp projection_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "# !scp projected_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "# !scp projected_excess_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp excess_deaths_upto.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "# !scp excess_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "# !scp reported_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "# !scp scaling_factor.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "# !scp projection_length.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "# !scp s_end.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "# !scp s_start_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "# !scp uk_end.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp last_uk_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/\n", + "!scp hospital_normalisation_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date.loc['UK'].to_csv('data_by_day_uk.csv', header=True, index=True)\n", + "data_by_date.loc['BE'].to_csv('data_by_day_be.csv', header=True, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ukd = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), ['deaths', 'deaths_m7']].droplevel(1)\n", + "ax = ukd.deaths.plot.bar(figsize=(12, 8))\n", + "ukd.deaths_m7.plot.line(ax=ax, color='red')\n", + "# ax = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), 'deaths_m7'].plot.line(figsize=(12, 8), color='red')\n", + "# ax = data_since_threshold.replace([np.inf, -np.inf], np.nan).loc[(slice(None), ['UK']), 'deaths'].plot.bar(ax=ax)\n", + "ax.set_xticks(range(0, 120, 20))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ukdd = data_by_date.loc['UK'].iloc[-30:]\n", + "ax = ukdd.deaths_m7.plot.line(figsize=(12, 8), color='red')\n", + "# ukdd.deaths.plot.bar(ax=ax)\n", + "ax.bar(ukdd.index, ukdd.deaths)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ukdd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "np.arange(0, 130, 20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date.loc['UK']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date.loc['UK'].plot(x='deaths_culm', y='deaths', logx=True, logy=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date.loc['UK'].plot(x='cases_culm', y='cases')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ukdbd = data_by_date.loc['UK'].copy()\n", + "ukdbd['deaths_m7'] = ukdbd.deaths.transform(lambda x: x.rolling(7, 1).mean())\n", + "ukdbd['cases_m7'] = ukdbd.cases.transform(lambda x: x.rolling(7, 1).mean())\n", + "ukdbd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ukdbd.plot(x='deaths_culm', y='deaths_m7', logx=True, logy=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(12, 8))\n", + "xmax = 10\n", + "for c in COUNTRIES_CORE:\n", + " if data_since_threshold.loc[(slice(None), c), 'deaths_culm'].max() > xmax:\n", + " xmax = data_since_threshold.loc[(slice(None), c), 'deaths_culm'].max()\n", + " data_since_threshold.loc[(slice(None), c), :].plot(x='deaths_culm', y='deaths_m7', logx=True, logy=True, xlim=(10, xmax * 1.1), label=c, ax=ax)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.loc[(slice(None), 'UK'), 'deaths_culm'].max()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "countries.continentExp.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "countries.loc['KW']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_by_date.groupby(level=0)['deaths'].shift(-25)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "offset_data = data_by_date.loc[:, ['cases']]\n", + "offset_data['deaths'] = data_by_date.groupby(level=0)['deaths'].shift(-25)\n", + "offset_data['cases_m7'] = offset_data.groupby(level=0)['cases'].transform(lambda x: x.rolling(7, 1).mean())\n", + "offset_data['deaths_m7'] = offset_data['deaths'].dropna().groupby(level=0).transform(lambda x: x.rolling(7, 1).mean())\n", + "offset_data['deaths_per_case'] = offset_data.deaths_m7 / offset_data.cases_m7\n", + "offset_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "deaths_m7" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "offset_deaths_m7 = (offset_data.loc[COUNTRIES_ALL, ['deaths_m7']]\n", + " .unstack().sort_index().xs('deaths_m7', axis=1, drop_level=True)).T.sort_index()\n", + "offset_deaths_m7" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "offset_deaths_m7['UK']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "data_since_threshold.loc[(slice(None), 'UK'), :].tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "countries.loc['PT']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = cases_by_date_m7.iloc[-50:][COUNTRIES_FRIENDS].plot(figsize=(15, 9), title=\"Cases per day, 7 day moving average\")#, ylim=(-10, 1500))\n", + "# ax.set_xlabel(f\"Days since {DEATH_COUNT_THRESHOLD} deaths\")\n", + "# uk_projection.deaths_m7.plot(ax=ax)\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = cases_by_date_m7[c].last_valid_index()\n", + " ax.text(x = lvi + pd.Timedelta(days=1), y = cases_by_date_m7[c][lvi], s = f\"{c}: {cases_by_date_m7[c][lvi]:.0f}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "ax = deaths_by_date_m7.iloc[-50:][COUNTRIES_FRIENDS].plot(figsize=(15, 9), title=\"Deaths per day, 7 day moving average\")#, ylim=(-10, 100))\n", + "# ax.set_xlabel(f\"Days since {DEATH_COUNT_THRESHOLD} deaths\")\n", + "# uk_projection.deaths_m7.plot(ax=ax)\n", + "for c in COUNTRIES_FRIENDS:\n", + " lvi = deaths_by_date_m7[c].last_valid_index()\n", + "# if c != 'ES':\n", + " ax.text(x = lvi + pd.Timedelta(days=1), y = deaths_by_date_m7[c][lvi], s = f\"{c}: {deaths_by_date_m7[c][lvi]:.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,md" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}