Now using py files, for automation
[covid19.git] / publish.md
diff --git a/publish.md b/publish.md
new file mode 100644 (file)
index 0000000..6b4ed59
--- /dev/null
@@ -0,0 +1,343 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,md
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.2'
+      jupytext_version: 1.9.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+---
+
+<!-- #region Collapsed="false" -->
+Data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)
+<!-- #endregion -->
+
+```python Collapsed="false"
+import itertools
+import collections
+import json
+import pandas as pd
+import numpy as np
+from scipy.stats import gmean
+import datetime
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+%matplotlib inline
+%load_ext sql
+```
+
+```python Collapsed="false"
+connection_string = 'postgresql://covid:3NbjJTkT63@localhost/covid'
+```
+
+```python Collapsed="false"
+%sql $connection_string
+```
+
+```python Collapsed="false"
+# DEATH_COUNT_THRESHOLD = 10
+COUNTRIES_CORE = tuple('IT DE UK ES IE FR BE'.split())
+# COUNTRIES_NORDIC = 'SE NO DK FI UK'.split()
+# COUNTRIES_FRIENDS = 'IT UK ES BE SI MX'.split()
+# # COUNTRIES_FRIENDS = 'IT UK ES BE SI PT'.split()
+
+# COUNTRIES_AMERICAS = ['AG', 'AR', 'AW', 'BS', 'BB', 'BZ', 'BM', 'BO', 'BR', 'VG', 'KY', # excluding Canada and USA
+#        'CL', 'CO', 'CR', 'CU', 'CW', 'DM', 'DO', 'EC', 'SV', 'GL', 'GD', 'GT',
+#        'GY', 'HT', 'HN', 'JM', 'MX', 'MS', 'NI', 'PA', 'PY', 'PE', 'PR', 'KN',
+#        'LC', 'VC', 'SX', 'SR', 'TT', 'TC', 'VI', 'UY', 'VE']
+# COUNTRIES_OF_INTEREST = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS))
+# COUNTRIES_ALL = list(set(COUNTRIES_CORE + COUNTRIES_FRIENDS + COUNTRIES_NORDIC + COUNTRIES_AMERICAS))
+```
+
+<!-- #region Collapsed="false" -->
+# Write results to summary file
+<!-- #endregion -->
+
+```python Collapsed="false"
+last_uk_date = %sql select date from uk_data order by date desc limit 1
+last_uk_date = last_uk_date[0][0]
+last_uk_date
+```
+
+```python Collapsed="false"
+last_intl_date = %sql select report_date from weekly_cases order by report_date desc limit 1
+last_intl_date = last_intl_date[0][0]
+last_intl_date
+```
+
+```sql magic_args="results << select date, new_cases, new_deaths "
+from uk_data 
+order by date
+```
+
+```python
+uk_data = results.DataFrame()
+uk_data['date'] = uk_data.date.astype('datetime64[ns]')
+uk_data.set_index('date', inplace=True)
+uk_data.tail(10)
+```
+
+```python
+most_recent_uk_date = %sql select max(date) from uk_data
+most_recent_uk_date = most_recent_uk_date[0][0]
+most_recent_uk_date
+```
+
+```sql Collapsed="false" magic_args="results << select geo_id, country_name, culm_deaths "
+from weekly_cases join countries using (geo_id)
+where geo_id in :COUNTRIES_CORE 
+    and (geo_id, report_date) in (select geo_id, max(report_date) from weekly_cases group by geo_id)
+order by geo_id
+```
+
+```python
+thirty_days_ago = most_recent_uk_date - datetime.timedelta(days=30)
+thirty_days_ago
+```
+
+```python
+# thirty_days_ago = most_recent_uk_date - datetime.interval(days=30)
+total_uk_deaths = %sql select sum(new_deaths) from uk_data
+total_uk_deaths = total_uk_deaths[0][0]
+deaths_in_past_month = %sql select sum(new_deaths) from uk_data where date > :thirty_days_ago
+deaths_in_past_month = deaths_in_past_month[0][0]
+cases_in_past_month = %sql select sum(new_cases) from uk_data where date > :thirty_days_ago
+cases_in_past_month = cases_in_past_month[0][0]
+total_uk_deaths, deaths_in_past_month, cases_in_past_month
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'w') as f:
+    f.write('% Covid death data summary\n')
+    f.write('% Neil Smith\n')
+    f.write(f'% Created on {datetime.datetime.now().strftime("%Y-%m-%d")}\n')
+    f.write('\n')       
+    f.write(f'> Last UK data from {last_uk_date.strftime("%d %b %Y")}. ')
+    f.write(f' Last international data from {last_intl_date.strftime("%d %b %Y")}.\n')
+    f.write('\n')    
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('## Headlines (UK data)\n')
+    f.write('\n')
+    f.write('| []() | |\n')
+    f.write('|:---|---:|\n')
+    f.write(f'| Deaths reported so far | {total_uk_deaths} | \n')
+    f.write(f'| Deaths in last 30 days | {deaths_in_past_month} | \n')
+    f.write(f'| Cases in last 30 days  | {cases_in_past_month} | \n')
+#     f.write(f'| Total Covid deaths to date (estimated) | {uk_deaths_to_date:.0f} |\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('## International comparison\n')
+    f.write('\n')
+    f.write(f'Based on weekly data. Last data from {last_intl_date.strftime("%d %b %Y")}\n')
+    f.write('\n')
+    f.write('### Total deaths\n')
+    f.write('\n')
+    f.write('![Total deaths](covid_deaths_total_linear.png)\n')
+    f.write('\n')
+    f.write('| Country ID | Country name | Total deaths |\n')
+    f.write('|:-----------|:-------------|-------------:|\n')
+    for c_id, c_name, t_deaths in results:
+        f.write(f'| {c_id} | {c_name} | {t_deaths} |\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+# with open('covid_summary.md', 'a') as f:
+#     f.write('## All-causes deaths, UK\n')
+#     f.write('\n')
+#     f.write('![All-causes deaths](deaths-radar.png)\n')
+#     f.write('\n')
+#     f.write('### True deaths\n')
+#     f.write('\n')
+#     f.write(f'The number of deaths reported in official statistics, {uk_covid_deaths}, is an underestimate '
+#             'of the true number of Covid deaths.\n'
+#             'This is especially true early in the pandemic, approximately March to May 2020.\n')
+#     f.write('We can get a better understanding of the impact of Covid by looking at the number of deaths, '
+#             'over and above what would be expected at each week of the year.\n')
+#     f.write(f'The ONS (and other bodies in Scotland and Northern Ireland) have released data on the number of deaths '
+#             f'up to {pd.to_datetime(excess_deaths_data["end_date"]).strftime("%d %B %Y")}.\n\n')
+#     f.write('If, for each of those weeks, I take the largest of the excess deaths or the reported Covid deaths, ')
+#     f.write(f'I estimate there have been **{uk_deaths_to_date}** total deaths so far.\n')
+#     f.write('\n')
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('### Deaths per week\n')
+    f.write('\n')
+    f.write('![Deaths per week](covid_deaths_per_week.png)\n')
+    f.write('\n')
+    f.write('![Deaths per week, last 6 weeks](deaths_by_date_last_6_weeks.png)\n')
+    f.write('\n')
+```
+
+```python
+with open('covid_summary.md', 'a') as f:
+    f.write('## UK data\n')
+    f.write('\n')
+    f.write('### Total deaths\n')
+    f.write('\n')
+    f.write(f'Deaths reported up to {last_uk_date.strftime("%d %b %Y")}: {total_uk_deaths}\n')
+    f.write('\n')    
+    f.write('![Total deaths](cases_and_deaths.png)\n')
+    f.write('\n')    
+    f.write('![Cases and deaths in last 60 days](cases_and_deaths_last_60_days.png)\n')
+    f.write('\n')
+    f.write('![Deaths compared to past five years](deaths-radar-2020.png)\n')
+    f.write('\n')
+    
+```
+
+```python Collapsed="false"
+with open('hospital_normalisation_date.json') as f:
+    hospital_normalisation_date_data = json.load(f)
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('### Hospital care\n')
+    f.write(f'Based on a 7-day moving average\n')
+    f.write('\n')
+    f.write('![Cases, admissions, deaths](cases_admissions_deaths.png)\n')
+    f.write('\n')
+    f.write('Due to the large scale differences between the three '
+            'measures, they are all normalised to show changes ')
+    f.write(f'since {pd.to_datetime(hospital_normalisation_date_data["hospital_normalisation_date"]).strftime("%d %B %Y")}.\n')
+    f.write('\n')
+    f.write('People in hospital, and on mechanical ventilators\n')
+    f.write('\n')
+    f.write('![People in hospital and on mechancial ventilators](people_in_hospital.png)\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('### Testing effectiveness\n')
+    f.write('\n')
+    f.write('A question about testing is whether more detected cases is a result of more tests being '
+            'done or is because the number of cases is increasing. One way of telling the differeence '
+            'is by looking at the fraction of tests that are positive.\n')
+    f.write('\n')
+    f.write('![Positive tests and cases](tests_and_cases.png)\n')
+    f.write('\n')
+    f.write('Numbers of positive tests and cases, '
+            '7-day moving average.\n'
+            'Note the different y-axes\n')
+    f.write('\n')    
+    f.write('![Fraction of tests with positive result](fraction_positive_tests.png)\n')
+    f.write('\n')
+    f.write('Fraction of tests with a positive result, both daily figures and '
+            '7-day moving average.\n')
+    f.write('\n')    
+    f.write('\n')
+    f.write('![Tests against fraction positive, trajectory](fraction_positive_tests_vs_tests.png)\n')
+    f.write('\n')
+    f.write('The trajectory of tests done vs fraction positive tests.\n')
+    f.write('\n')
+    f.write('Points higher indicate more tests; points to the right indicate more positive tests.'
+            'More tests being done with the same infection prevelance will move the point up '
+            'and to the left.\n')
+    f.write('\n')
+    f.write('\n')
+    f.write('![Tests against fraction positive, trajectory](tests_vs_fraction_positive_animation.png)\n')
+    f.write('\n')
+```
+
+```python Collapsed="false"
+
+```
+
+```python Collapsed="false"
+with open('covid_summary.md', 'a') as f:
+    f.write('# Data sources\n')
+    f.write('\n')
+    f.write('> Covid data from [European Centre for Disease Prevention and Control](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide)\n')
+    f.write('\n')    
+    f.write("""> Population data from:
+
+* [Office of National Statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales) (Endland and Wales) Weeks start on a Saturday.
+* [Northern Ireland Statistics and Research Agency](https://www.nisra.gov.uk/publications/weekly-deaths) (Northern Ireland). Weeks start on a Saturday. Note that the week numbers don't match the England and Wales data.
+* [National Records of Scotland](https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/vital-events/general-publications/weekly-and-monthly-data-on-births-and-deaths/weekly-data-on-births-and-deaths) (Scotland). Note that Scotland uses ISO8601 week numbers, which start on a Monday.""")
+    
+    f.write('\n\n')
+    f.write('> [Source code available](https://git.njae.me.uk/?p=covid19.git;a=tree)\n')
+    f.write('\n') 
+
+```
+
+```python Collapsed="false"
+!pandoc --toc -s covid_summary.md > covid_summary.html
+```
+
+```python Collapsed="false"
+!scp covid_summary.html neil@ogedei:/var/www/scripts.njae.me.uk/covid/index.html
+!scp covid_deaths_total_linear.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp cases_and_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp cases_and_deaths_last_60_days.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp deaths-radar.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp deaths-radar-2020.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp covid_deaths_per_week.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp fraction_positive_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/    
+!scp tests_and_cases.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp deaths_by_date_last_6_weeks.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp fraction_positive_tests_vs_tests.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp tests_vs_fraction_positive_animation.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/   
+!scp people_in_hospital.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp cases_admissions_deaths.png neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+```
+
+```python Collapsed="false"
+with open('uk_covid_deaths.js', 'w') as f:
+    f.write(f"document.write('{total_uk_deaths}');")
+    
+with open('uk_deaths_30_days.js', 'w') as f:
+    f.write(f"document.write('{deaths_in_past_month}');")
+
+with open('uk_cases_30_days.js', 'w') as f:
+    f.write(f"document.write('{cases_in_past_month}');")    
+    
+# with open('estimated_total_deaths.js', 'w') as f:
+#     f.write(f"document.write('{uk_deaths_to_date:.0f}');")
+
+# edut = pd.to_datetime(excess_deaths_data["end_date"]).strftime('%d %B %Y')
+# with open('excess_deaths_upto.js', 'w') as f:
+#     f.write(f"document.write('{edut}');")
+    
+with open('last_uk_date.js', 'w') as f:
+    f.write(f"document.write('{pd.to_datetime(last_uk_date).strftime('%d %B %Y')}');")
+
+with open('last_intl_date.js', 'w') as f:
+    f.write(f"document.write('{pd.to_datetime(last_intl_date).strftime('%d %B %Y')}');")
+
+```
+
+```python Collapsed="false"
+# pd.to_datetime(excess_deaths_upto).strftime('%d %B %Y')
+```
+
+```python Collapsed="false"
+!scp uk_covid_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp uk_deaths_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp uk_cases_30_days.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp estimated_total_deaths.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+# !scp excess_deaths_upto.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp last_uk_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp last_intl_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+!scp hospital_normalisation_date.js neil@ogedei:/var/www/scripts.njae.me.uk/covid/
+```
+
+```python Collapsed="false"
+
+```