{ "cells": [ { "cell_type": "markdown", "metadata": { "Collapsed": "false" }, "source": [ "Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "import itertools\n", "import collections\n", "import json\n", "import pandas as pd\n", "import numpy as np\n", "from scipy.stats import gmean\n", "import datetime\n", "\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "DEATH_COUNT_THRESHOLD = 10\n", "COUNTRIES_CORE = 'IT DE UK ES IE FR BE'.split()\n", "COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()\n", "COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()\n", "# COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()\n", "\n", "COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA\n", " 'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',\n", " 'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',\n", " 'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']\n", "COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))\n", "COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))" ] }, { "cell_type": "code", "execution_count": 4843, "metadata": { "Collapsed": "false" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 553k 100 553k 0 0 564k 0 --:--:-- --:--:-- --:--:-- 564k\n" ] } ], "source": [ "!curl https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/ > covid.csv" ] }, { "cell_type": "code", "execution_count": 4844, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "# First col is a date, treat geoId of NA as 'Namibia', not \"NA\" value\n", "raw_data = pd.read_csv('covid.csv', \n", " parse_dates=[0], dayfirst=True,\n", " keep_default_na=False, na_values = [''],\n", "# dtype = {'day': np.int64, \n", "# 'month': np.int64, \n", "# 'year': np.int64, \n", "# 'cases': np.int64, \n", "# 'deaths': np.int64, \n", "# 'countriesAndTerritories': str, \n", "# 'geoId': str, \n", "# 'countryterritoryCode': str, \n", "# 'popData2019': np.int64, \n", "# 'continentExp': str, \n", "# }\n", " )" ] }, { "cell_type": "code", "execution_count": 4845, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/plain": [ "89150" ] }, "execution_count": 4845, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data.size" ] }, { "cell_type": "code", "execution_count": 4846, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "raw_data.fillna(0, inplace=True)" ] }, { "cell_type": "code", "execution_count": 4847, "metadata": { "Collapsed": "false" }, "outputs": [ { "data": { "text/html": [ "
\n", " | dateRep | \n", "year_week | \n", "cases_weekly | \n", "deaths_weekly | \n", "countriesAndTerritories | \n", "geoId | \n", "countryterritoryCode | \n", "popData2019 | \n", "continentExp | \n", "notification_rate_per_100000_population_14-days | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2020-12-14 | \n", "2020-50 | \n", "1757 | \n", "71 | \n", "Afghanistan | \n", "AF | \n", "AFG | \n", "38041757.0 | \n", "Asia | \n", "9.01 | \n", "
1 | \n", "2020-12-07 | \n", "2020-49 | \n", "1672 | \n", "137 | \n", "Afghanistan | \n", "AF | \n", "AFG | \n", "38041757.0 | \n", "Asia | \n", "7.22 | \n", "
2 | \n", "2020-11-30 | \n", "2020-48 | \n", "1073 | \n", "68 | \n", "Afghanistan | \n", "AF | \n", "AFG | \n", "38041757.0 | \n", "Asia | \n", "6.42 | \n", "
3 | \n", "2020-11-23 | \n", "2020-47 | \n", "1368 | \n", "69 | \n", "Afghanistan | \n", "AF | \n", "AFG | \n", "38041757.0 | \n", "Asia | \n", "6.66 | \n", "
4 | \n", "2020-11-16 | \n", "2020-46 | \n", "1164 | \n", "61 | \n", "Afghanistan | \n", "AF | \n", "AFG | \n", "38041757.0 | \n", "Asia | \n", "4.65 | \n", "