Removed data files
[covid19.git] / euromomo / euromomo.ipynb
diff --git a/euromomo/euromomo.ipynb b/euromomo/euromomo.ipynb
deleted file mode 100644 (file)
index 2d4218e..0000000
+++ /dev/null
@@ -1,314 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import requests\n",
-    "from bs4 import BeautifulSoup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Change this\n",
-    "OUTPUT_FILE = 'euromomo.json'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Change this if the script breaks\n",
-    "BASE_PAGE = \"https://www.euromomo.eu/graphs-and-maps/\"\n",
-    "JS_FILE_PATTERN = \"src-templates-graphs-and-maps-js\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# First, we need to find the JS link inside the webpage\n",
-    "r = requests.get(BASE_PAGE)\n",
-    "if r.status_code != 200:\n",
-    "    raise Exception(f\"Cannot reach webpage {BASE_PAGE} {r.status_code}\")\n",
-    "soup = BeautifulSoup(r.text, features=\"lxml\")\n",
-    "link_to_file = None\n",
-    "for possible in soup.find_all(\"link\", attrs={\"as\": \"script\"}):\n",
-    "    if JS_FILE_PATTERN in possible['href']:\n",
-    "        link_to_file = \"https://www.euromomo.eu\" + possible['href']\n",
-    "        break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[<link as=\"script\" href=\"/50fe40790468168eec944b4bf3a7387c41e08258-80a60393842707b4ef3f.js\" rel=\"preload\"/>,\n",
-       " <link as=\"script\" href=\"/framework-e94c62ba7ecbd1156bac.js\" rel=\"preload\"/>,\n",
-       " <link as=\"script\" href=\"/app-c8f47833a3ed3eeb35b5.js\" rel=\"preload\"/>,\n",
-       " <link as=\"script\" href=\"/component---src-templates-graphs-and-maps-js-4be2e0d0712cbdd396d8.js\" rel=\"preload\"/>,\n",
-       " <link as=\"script\" href=\"/commons-5c085e7cd1a604626d58.js\" rel=\"preload\"/>,\n",
-       " <link as=\"script\" href=\"/5e2a4920-b8478647cb0804da9c90.js\" rel=\"preload\"/>,\n",
-       " <link as=\"script\" href=\"/styles-c41920853eed0685ae46.js\" rel=\"preload\"/>,\n",
-       " <link as=\"script\" href=\"/webpack-runtime-ba300874af730e00e5e4.js\" rel=\"preload\"/>]"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "soup.find_all(\"link\", attrs={\"as\": \"script\"})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if link_to_file is None:\n",
-    "    print(f\"Could not find a JS file with {JS_FILE_PATTERN} in its name :-(\")\n",
-    "    exit(1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'https://www.euromomo.eu/component---src-templates-graphs-and-maps-js-4be2e0d0712cbdd396d8.js'"
-      ]
-     },
-     "execution_count": 33,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "link_to_file"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# We need a bit of renaming\n",
-    "def rename_key(key, value):\n",
-    "    print(f'doing {key}')\n",
-    "    if isinstance(value, list):\n",
-    "        if \"Belgium\" in value:\n",
-    "            return \"countries\"\n",
-    "        if \"Total\" in value:\n",
-    "            return \"age_groups\"\n",
-    "    elif isinstance(value, str):\n",
-    "        value = int(value)\n",
-    "        if value <= 52:\n",
-    "            return \"week\"\n",
-    "        if value >= 2020:\n",
-    "            return \"year\"\n",
-    "    elif isinstance(value, dict):\n",
-    "        if set(value.keys()) == {\"counts\", \"zscores\", \"weeks\"}:\n",
-    "            return \"data_totals\"\n",
-    "        if set(value.keys()) == {\"data\", \"years\"}:\n",
-    "            return \"excess_mortality\"\n",
-    "        if set(value.keys()) == {\"data\", \"weeks\"}:\n",
-    "            if len(value[\"data\"]) == 24:\n",
-    "                return \"z_scores_country_age_groups\"\n",
-    "            if len(value[\"data\"]) >= 276:\n",
-    "                return \"z_scores_country\"\n",
-    "    raise Exception(f\"Cannot find valid key name. Key is {key} with value of type {type(value)}\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "js_file: str = requests.get(link_to_file).text"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pos = 0\n",
-    "found = []\n",
-    "while True:\n",
-    "    pos = js_file.find(\"JSON.parse('\", pos)\n",
-    "    if pos == -1:\n",
-    "        break\n",
-    "    end_pos = js_file.find(\"')\", pos)\n",
-    "\n",
-    "    content = js_file[pos+len(\"JSON.parse('\"):end_pos]\n",
-    "    assert \"'\" not in content\n",
-    "    found.append(content)\n",
-    "    pos += 1\n",
-    "\n",
-    "# the biggest JSON part in the file is probably the good one ;-)\n",
-    "biggest = max(found, key=lambda x: len(x))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "raw_data = json.loads(biggest)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "doing weeks\n"
-     ]
-    },
-    {
-     "ename": "Exception",
-     "evalue": "Cannot find valid key name. Key is weeks with value of type <class 'list'>",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mException\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-38-7fbed730e1a3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mrename_key\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mraw_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m<ipython-input-38-7fbed730e1a3>\u001b[0m in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mrename_key\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mraw_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m<ipython-input-34-f58db1eb5796>\u001b[0m in \u001b[0;36mrename_key\u001b[0;34m(key, value)\u001b[0m\n\u001b[1;32m     23\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"data\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m276\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0;34m\"z_scores_country\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m     \u001b[0;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Cannot find valid key name. Key is {key} with value of type {type(value)}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;31mException\u001b[0m: Cannot find valid key name. Key is weeks with value of type <class 'list'>"
-     ]
-    }
-   ],
-   "source": [
-    "data = {rename_key(k, v): v for k, v in raw_data.items()}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open('euromomo.json', 'w') as f:\n",
-    "    json.dump(data, f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(found)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['weeks', 'ages', 'pooled', 'countries', 'reportYear', 'reportWeek']"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "[k for k in raw_data.keys()]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "list"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "type([1,2,3])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}