From: Neil Smith Date: Sat, 10 Apr 2021 17:01:07 +0000 (+0100) Subject: General updates X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;p=covid19.git General updates --- diff --git a/.gitignore b/.gitignore index c5755ed..0f71ab1 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ *zip *xls *xlsx +*XLSX *ods *csv *png diff --git a/covid_summary.md b/covid_summary.md index c7b1620..6f37ffd 100644 --- a/covid_summary.md +++ b/covid_summary.md @@ -1,20 +1,20 @@ % Covid death data summary % Neil Smith -% Created on 2021-03-11 +% Created on 2021-04-10 -> Last UK data from 11 Mar 2021. Last international data from 07 Mar 2021. +> Last UK data from 10 Apr 2021. Last international data from 04 Apr 2021. ## Headlines (UK data) | []() | | |:---|---:| -| Deaths reported so far | 125168 | -| Deaths in last 30 days | 11147 | -| Cases in last 30 days | 237470 | +| Deaths reported so far | 127080 | +| Deaths in last 30 days | 1914 | +| Cases in last 30 days | 120981 | ## International comparison -Based on weekly data. Last data from 07 Mar 2021 +Based on weekly data. Last data from 04 Apr 2021 ### Total deaths @@ -22,13 +22,13 @@ Based on weekly data. Last data from 07 Mar 2021 | Country ID | Country name | Total deaths | |:-----------|:-------------|-------------:| -| BEL | Belgium | 22287 | -| DEU | Germany | 71934 | -| ESP | Spain | 71436 | -| FRA | France | 88600 | -| GBR | United Kingdom | 124501 | -| IRL | Ireland | 4422 | -| ITA | Italy | 99785 | +| BEL | Belgium | 23198 | +| DEU | Germany | 77013 | +| ESP | Spain | 75783 | +| FRA | France | 96678 | +| GBR | United Kingdom | 126836 | +| IRL | Ireland | 4718 | +| ITA | Italy | 111030 | ### Deaths per week @@ -40,7 +40,7 @@ Based on weekly data. Last data from 07 Mar 2021 ### Total deaths -Deaths reported up to 11 Mar 2021: 125168 +Deaths reported up to 10 Apr 2021: 127080 ![Total deaths](cases_and_deaths.png) diff --git a/historical_deaths_import.md b/historical_deaths_import.md index 9a7acd8..1a226be 100644 --- a/historical_deaths_import.md +++ b/historical_deaths_import.md @@ -5,8 +5,8 @@ jupyter: text_representation: extension: .md format_name: markdown - format_version: '1.2' - jupytext_version: 1.9.1 + format_version: '1.3' + jupytext_version: 1.11.1 kernelspec: display_name: Python 3 language: python @@ -50,13 +50,15 @@ connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid' ```python Collapsed="false" conn = create_engine(connection_string) +engine = create_engine(connection_string) ``` ```python Collapsed="false" england_wales_filename = 'uk-deaths-data/publishedweek532020.xlsx' ``` -```sql Collapsed="false" +```python +query_string = ''' drop table if exists all_causes_deaths; create table all_causes_deaths ( week integer, @@ -66,6 +68,9 @@ create table all_causes_deaths ( deaths integer, CONSTRAINT week_nation PRIMARY KEY(year, week, nation) ); +''' +with engine.connect() as connection: + connection.execute(query_string) ``` ```python Collapsed="false" @@ -321,6 +326,14 @@ deaths_headlines_s %sql select * from all_causes_deaths limit 5 ``` +```python +query_string = ''' +delete from all_causes_deaths where nation = 'Scotland'; +''' +with engine.connect() as connection: + connection.execute(query_string) +``` + ```python Collapsed="false" for year, ser in deaths_headlines_s.items(): year_i = int(year[-4:]) @@ -328,7 +341,11 @@ for year, ser in deaths_headlines_s.items(): for week, deaths in ser.dropna().iteritems(): # print(datetime.date.fromisocalendar(year_i, week, 7), deaths) dut = datetime.date.fromisocalendar(year_i, week, 7) - %sql insert into all_causes_deaths(week, year, date_up_to, nation, deaths) values ({week}, {year_i}, :dut, 'Scotland', {deaths}) + query_string = f'''insert into + all_causes_deaths(week, year, date_up_to, nation, deaths) + values ({week}, {year_i}, '{dut}', 'Scotland', {deaths});''' + with engine.connect() as connection: + connection.execute(query_string) ``` ```python @@ -411,7 +428,11 @@ rd.head() ``` ```python -%sql delete from all_causes_deaths where nation = 'England' +query_string = ''' +delete from all_causes_deaths where nation = 'England'; +''' +with engine.connect() as connection: + connection.execute(query_string) ``` ```python diff --git a/publish.py b/publish.py index d60a7ef..82a3a25 100644 --- a/publish.py +++ b/publish.py @@ -36,7 +36,7 @@ COUNTRIES_FRIENDS = tuple('ITA GBR ESP BEL SVN MEX'.split()) def singleton_sql_value(engine, query_string): with engine.connect() as conn: result = conn.execute(query_string) - return result.next()[0] + return result.first()[0] # %% diff --git a/test_and_case_data.py b/test_and_case_data.py index d2ef00a..a5e8db2 100644 --- a/test_and_case_data.py +++ b/test_and_case_data.py @@ -1,6 +1,19 @@ -#!/usr/bin/env python -# coding: utf-8 -# %% +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.10.2 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +# %% Collapsed="false" import itertools import collections import json @@ -8,29 +21,30 @@ import pandas as pd import numpy as np from scipy.stats import gmean import datetime -import os + import sqlalchemy import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.animation as ani -plt.ioff() # # %matplotlib inline +# # %load_ext sql +plt.ioff() - -# %% +# %% Collapsed="false" chart_start_date = '2020-09-15' - -# %% +# %% Collapsed="false" connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid' +# %% Collapsed="false" +# %sql $connection_string # %% engine = sqlalchemy.create_engine(connection_string) # %% -query_string = '''select uk_data.date, +qstr = '''select uk_data.date, uk_data.new_cases, uk_data_7.new_cases as new_cases_7, uk_data.new_tests, uk_data_7.new_tests as new_tests_7, uk_data.new_pcr_tests, uk_data_7.new_pcr_tests as new_pcr_7, @@ -39,11 +53,42 @@ query_string = '''select uk_data.date, uk_data_7.new_cases / uk_data_7.new_tests as fraction_positive_7 from uk_data left outer join uk_data_7 using (date) order by uk_data.date''' -tests_data = pd.read_sql_query(query_string, engine, +tests_data = pd.read_sql_query(qstr, engine, index_col='date', parse_dates = ['date']) # %% +tests_data.tail(10) + +# %% Collapsed="false" +# tests_data[['new_tests', 'new_cases']].plot() + +# %% Collapsed="false" +# tests_data[['fraction_positive', 'fraction_positive_7']].dropna().plot() + +# %% Collapsed="false" +# ax = data_by_day.dropna().loc['2020-06-15': , ['fraction_positive', 'fraction_positive_m7']].plot(figsize=(10, 8), title='Fraction of tests with positive results') +# ax.legend(['Fraction positive per day', 'Fraction positive, 7 day moving average']) +# ax.set_ylabel('Fraction positive') +# plt.savefig('fraction_positive_tests.png') + +# %% Collapsed="false" +# pri_y_max = int((tests_data.dropna().loc['2020-06-15': , 'new_tests_7'].max() * 1.1) / 100 ) * 100 +# ax = tests_data.dropna().loc['2020-06-15': , 'new_tests_7'].plot(figsize=(10, 8), +# style=['k-'], +# legend=False, +# ylim=(0, pri_y_max)) +# ax.set_title('Tests done and new cases (7 day moving average)') +# ax.legend(['Tests, 7 day moving average'], loc='lower left') +# ax.set_ylabel('Tests') +# sec_y_max = int((tests_data.dropna().loc['2020-06-15':, 'new_cases_7'].max() * 1.1) / 100) * 100 +# ax = tests_data.dropna().loc['2020-06-15':, 'new_cases_7'].plot(ax=ax, secondary_y=True, style='r--') +# ax.set_ylim((0, sec_y_max)) +# ax.legend(['Cases (7 day moving average)'], loc='lower right') +# ax.set_ylabel('New cases') +# # plt.savefig('tests_and_cases.png') + +# %% Collapsed="false" pri_y_max = int((tests_data.dropna().loc[chart_start_date: , 'new_tests_7'].max() * 1.1) / 100 ) * 100 ax = tests_data.dropna().loc[chart_start_date: , 'new_tests_7'].plot(figsize=(10, 8), style=['k-'], @@ -59,16 +104,18 @@ ax.legend(['Cases (7 day moving average)'], loc='lower right') ax.set_ylabel('New cases') plt.savefig('tests_and_cases.png') +# %% Collapsed="false" +pri_y_max = int((tests_data.dropna().loc[chart_start_date: , 'fraction_positive_7'].max() * 1.1) * 100) -# %% -ax = (tests_data.loc[chart_start_date: , ['fraction_positive', 'fraction_positive_7']] * 100).plot(figsize=(10, 8), - style=['b:', 'k-'], legend=False) +ax = (tests_data.loc[chart_start_date: , + ['fraction_positive', 'fraction_positive_7']] * 100).plot( + figsize=(10, 8), style=['b:', 'k-'], legend=False, ylim=(0, pri_y_max)) ax.set_title('Fraction of tests with positive results') ax.legend(['Fraction positive (%)', 'Fraction positive (%), 7 day moving average'], loc='upper left') ax.set_ylabel('Fraction positive') -cases_axis_max = (1.0 * tests_data.loc[chart_start_date:].new_cases_7.max() - * tests_data.loc[chart_start_date:].fraction_positive.max() - / tests_data.loc[chart_start_date:].fraction_positive_7.max() +cases_axis_max = (1.1 * tests_data.loc[chart_start_date:].new_cases_7.max() +# * tests_data.loc[chart_start_date:].fraction_positive.max() +# / tests_data.loc[chart_start_date:].fraction_positive_7.max() ) ax2 = ax.twinx() @@ -78,8 +125,7 @@ ax2.legend(['Cases (7 day moving average)'], loc='center left') ax2.set_ylabel('New cases') plt.savefig('fraction_positive_tests.png') - -# %% +# %% Collapsed="false" ax = tests_data.dropna().loc[chart_start_date:].plot(x='fraction_positive_7', y='new_tests_7', figsize=(8, 8), legend=None) @@ -92,8 +138,7 @@ for d in tests_data.dropna().loc[chart_start_date::15].index: s = d.strftime("%d %B %Y")) plt.savefig('fraction_positive_tests_vs_tests.png') - -# %% +# %% Collapsed="false" fig = plt.figure(figsize=(8, 8)) plt.ylabel('Number of tests') plt.xlabel('Fraction of tests that are positive') @@ -135,13 +180,41 @@ animator.save('tests_vs_fraction_positive.mp4') # plt.show() -# %% -os.system('rm tests_vs_fraction_positive_animation.png') -os.system('ffmpeg -i tests_vs_fraction_positive.mp4 -plays 0 -final_delay 1 -f apng tests_vs_fraction_positive_animation.png') - - -# %% - - - +# %% Collapsed="false" +# !rm tests_vs_fraction_positive_animation.png +# !ffmpeg -i tests_vs_fraction_positive.mp4 -plays 0 -final_delay 1 -f apng tests_vs_fraction_positive_animation.png + +# %% Collapsed="false" +# fig = plt.figure(figsize=(8, 8)) +# plt.ylabel('Number of tests') +# plt.xlabel('Fraction of tests that are positive') + +# all_data = data_by_day.dropna().loc[chart_start_date:] + +# minx = all_data.fraction_positive_m7.min() * 0.9 +# maxx = all_data.fraction_positive_m7.max() * 1.1 +# miny = all_data.tests_m7.min() * 0.9 +# maxy = all_data.tests_m7.max() * 1.1 + +# plt.xlim(minx, maxx) +# plt.ylim(miny, maxy) +# # plt.legend(None) + +# def build_state_frame(i): +# this_data = all_data[:i] +# p = plt.plot(this_data.fraction_positive_m7, this_data.tests_m7) +# p[0].set_color('r') +# for d in this_data[::15].index: +# plt.plot(this_data.loc[d, 'fraction_positive_m7'], +# this_data.loc[d, 'tests_m7'], 'o', +# markersize=8, markerfacecolor='r', markeredgecolor='r') +# plt.text(this_data.loc[d, 'fraction_positive_m7'] + 0.0002, +# this_data.loc[d, 'tests_m7'], +# s = d.strftime("%d %B %Y")) + +# # animator = ani.FuncAnimation(fig, build_state_frame, interval=200) +# # animator.save('myfirstAnimation.mp4') +# build_state_frame(103) +# plt.show() +# %% Collapsed="false" diff --git a/uk_deaths_import.md b/uk_deaths_import.md index baa5ae6..1d34486 100644 --- a/uk_deaths_import.md +++ b/uk_deaths_import.md @@ -6,7 +6,7 @@ jupyter: extension: .md format_name: markdown format_version: '1.3' - jupytext_version: 1.10.2 + jupytext_version: 1.11.1 kernelspec: display_name: Python 3 language: python @@ -57,15 +57,15 @@ engine = create_engine(connection_string) ``` ```python Collapsed="false" -england_wales_filename = 'uk-deaths-data/publishedweek082021.xlsx' +england_wales_filename = 'uk-deaths-data/publishedweek122021.xlsx' ``` ```python Collapsed="false" -scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (10 March 2021).xlsx' +scotland_filename = 'uk-deaths-data/Scottish Government COVID-19 data (08 April 2021).xlsx' ``` ```python -n_ireland_filename = 'uk-deaths-data/Weekly_Deaths.xlsx' +n_ireland_filename = 'uk-deaths-data/Weekly_Deaths - w e 2nd April 2021.XLSX' ``` ```python Collapsed="false" @@ -118,7 +118,7 @@ rd = eng_xls.iloc[1:][['Week ended', 'Wales']].reset_index(level=0).rename( rd['year'] = 2021 rd['nation'] = 'Wales' rd.dropna(inplace=True) -rd.head() +rd.tail() ``` ```python @@ -183,7 +183,7 @@ rd.to_sql( ```python ni_xls = pd.read_excel(n_ireland_filename, sheet_name='Table 1', - skiprows=[0, 1, 2, 3], + skiprows=[0, 1, 2], header=0, ).rename( columns={'Week Ending (Friday)': 'date_up_to', 'Total Number of Deaths Registered in Week (2021P)': 'deaths', @@ -191,7 +191,7 @@ ni_xls = pd.read_excel(n_ireland_filename, rd = ni_xls[ni_xls['deaths'].notna()][['week', 'date_up_to', 'deaths']] rd['year'] = 2021 rd['nation'] = 'Northern Ireland' -rd +rd.tail() ``` ```python @@ -213,7 +213,7 @@ rd.to_sql( ``` ```python -%sql select nation from all_causes_deaths group by nation +%sql select nation, count(*) from all_causes_deaths group by nation ``` ```python