From 4c6bfcba885846a2abe23497cb097f91c469abe5 Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Wed, 4 Jan 2017 16:07:42 +0000 Subject: [PATCH] Added standalone script to make database tables --- complete.ipynb | 2 +- dmarc_to_database.py | 127 +++++++++++++++++++++--------------------- make_database.ipynb | 28 +++++----- make_database.py | 129 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 208 insertions(+), 78 deletions(-) create mode 100644 make_database.py diff --git a/complete.ipynb b/complete.ipynb index ccbdaf0..3663338 100644 --- a/complete.ipynb +++ b/complete.ipynb @@ -912,7 +912,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.4.3+" + "version": "3.5.2" } }, "nbformat": 4, diff --git a/dmarc_to_database.py b/dmarc_to_database.py index 4624964..272998f 100644 --- a/dmarc_to_database.py +++ b/dmarc_to_database.py @@ -15,9 +15,9 @@ def fetch_msg(num): def xml_of_part(part): with zipfile.ZipFile(io.BytesIO(part.get_payload(decode=True))) as zf: - fn = zf.infolist()[0].filename - contents = zf.read(fn).decode('utf-8') - return xml.etree.ElementTree.fromstring(contents) + fn = zf.infolist()[0].filename + contents = zf.read(fn).decode('utf-8') + return xml.etree.ElementTree.fromstring(contents) def xml_of(message): @@ -40,66 +40,67 @@ def maybe_strip(text): else: return '' -field_maps = {'./policy_published/adkim': {'pg_field_name': 'policy_published_adkim', - 'pg_table': 'reports', - 'pg_type': 'varchar'}, - './policy_published/aspf': {'pg_field_name': 'policy_published_aspf', - 'pg_table': 'reports', - 'pg_type': 'varchar'}, - './policy_published/domain': {'pg_field_name': 'policy_published_domain', - 'pg_table': 'reports', - 'pg_type': 'varchar'}, - './policy_published/p': {'pg_field_name': 'policy_published_p', - 'pg_table': 'reports', - 'pg_type': 'varchar'}, - './policy_published/pct': {'pg_field_name': 'policy_published_pct', - 'pg_table': 'reports', - 'pg_type': 'int'}, - './record[{}]/auth_results/dkim/domain': {'pg_field_name': 'auth_results_dkim_domain', - 'pg_table': 'report_items', - 'pg_type': 'varchar'}, - './record[{}]/auth_results/dkim/result': {'pg_field_name': 'auth_results_dkim_result', - 'pg_table': 'report_items', - 'pg_type': 'varchar'}, - './record[{}]/auth_results/spf/domain': {'pg_field_name': 'auth_results_spf_domain', - 'pg_table': 'report_items', - 'pg_type': 'varchar'}, - './record[{}]/auth_results/spf/result': {'pg_field_name': 'auth_results_spf_result', - 'pg_table': 'report_items', - 'pg_type': 'varchar'}, - './record[{}]/identifiers/header_from': {'pg_field_name': 'identifiers_header_from', - 'pg_table': 'report_items', - 'pg_type': 'varchar'}, - './record[{}]/row/count': {'pg_field_name': 'count', - 'pg_table': 'report_items', - 'pg_type': 'int'}, - './record[{}]/row/policy_evaluated/disposition': {'pg_field_name': 'policy_evaluated_disposition', - 'pg_table': 'report_items', - 'pg_type': 'varchar'}, - './record[{}]/row/policy_evaluated/dkim': {'pg_field_name': 'policy_evaluated_dkim', - 'pg_table': 'report_items', - 'pg_type': 'varchar'}, - './record[{}]/row/policy_evaluated/spf': {'pg_field_name': 'policy_evaluated_spf', - 'pg_table': 'report_items', - 'pg_type': 'varchar'}, - './record[{}]/row/source_ip': {'pg_field_name': 'source_ip', - 'pg_table': 'report_items', - 'pg_type': 'inet'}, - './report_metadata/date_range/begin': {'pg_field_name': 'report_metadata_date_range_begin', - 'pg_table': 'reports', - 'pg_type': 'timestamp'}, - './report_metadata/date_range/end': {'pg_field_name': 'report_metadata_date_range_end', - 'pg_table': 'reports', - 'pg_type': 'timestamp'}, - './report_metadata/email': {'pg_field_name': 'report_metadata_email', - 'pg_table': 'reports', - 'pg_type': 'varchar'}, - './report_metadata/org_name': {'pg_field_name': 'report_metadata_org_name', - 'pg_table': 'reports', - 'pg_type': 'varchar'}, - './report_metadata/report_id': {'pg_field_name': 'report_metadata_report_id', - 'pg_table': 'reports', - 'pg_type': 'varchar'}} +field_maps = { + './policy_published/adkim': + {'pg_field_name': 'policy_published_adkim', + 'pg_table': 'reports', 'pg_type': 'varchar'}, + './policy_published/aspf': + {'pg_field_name': 'policy_published_aspf', + 'pg_table': 'reports', 'pg_type': 'varchar'}, + './policy_published/domain': + {'pg_field_name': 'policy_published_domain', + 'pg_table': 'reports', 'pg_type': 'varchar'}, + './policy_published/p': + {'pg_field_name': 'policy_published_p', + 'pg_table': 'reports', 'pg_type': 'varchar'}, + './policy_published/pct': + {'pg_field_name': 'policy_published_pct', + 'pg_table': 'reports', 'pg_type': 'int'}, + './record[{}]/auth_results/dkim/domain': + {'pg_field_name': 'auth_results_dkim_domain', + 'pg_table': 'report_items', 'pg_type': 'varchar'}, + './record[{}]/auth_results/dkim/result': + {'pg_field_name': 'auth_results_dkim_result', + 'pg_table': 'report_items', 'pg_type': 'varchar'}, + './record[{}]/auth_results/spf/domain': + {'pg_field_name': 'auth_results_spf_domain', + 'pg_table': 'report_items', 'pg_type': 'varchar'}, + './record[{}]/auth_results/spf/result': + {'pg_field_name': 'auth_results_spf_result', + 'pg_table': 'report_items', 'pg_type': 'varchar'}, + './record[{}]/identifiers/header_from': + {'pg_field_name': 'identifiers_header_from', + 'pg_table': 'report_items', 'pg_type': 'varchar'}, + './record[{}]/row/count': + {'pg_field_name': 'count', + 'pg_table': 'report_items', 'pg_type': 'int'}, + './record[{}]/row/policy_evaluated/disposition': + {'pg_field_name': 'policy_evaluated_disposition', + 'pg_table': 'report_items', 'pg_type': 'varchar'}, + './record[{}]/row/policy_evaluated/dkim': + {'pg_field_name': 'policy_evaluated_dkim', + 'pg_table': 'report_items', 'pg_type': 'varchar'}, + './record[{}]/row/policy_evaluated/spf': + {'pg_field_name': 'policy_evaluated_spf', + 'pg_table': 'report_items', 'pg_type': 'varchar'}, + './record[{}]/row/source_ip': + {'pg_field_name': 'source_ip', + 'pg_table': 'report_items', 'pg_type': 'inet'}, + './report_metadata/date_range/begin': + {'pg_field_name': 'report_metadata_date_range_begin', + 'pg_table': 'reports', 'pg_type': 'timestamp'}, + './report_metadata/date_range/end': + {'pg_field_name': 'report_metadata_date_range_end', + 'pg_table': 'reports', 'pg_type': 'timestamp'}, + './report_metadata/email': + {'pg_field_name': 'report_metadata_email', + 'pg_table': 'reports', 'pg_type': 'varchar'}, + './report_metadata/org_name': + {'pg_field_name': 'report_metadata_org_name', + 'pg_table': 'reports', 'pg_type': 'varchar'}, + './report_metadata/report_id': + {'pg_field_name': 'report_metadata_report_id', + 'pg_table': 'reports', 'pg_type': 'varchar'}} diff --git a/make_database.ipynb b/make_database.ipynb index a44d6d9..73c1cf6 100644 --- a/make_database.ipynb +++ b/make_database.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "metadata": { "collapsed": true }, @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "metadata": { "collapsed": false }, @@ -95,7 +95,7 @@ "['dmarc.ini']" ] }, - "execution_count": 11, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -107,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 8, "metadata": { "collapsed": false }, @@ -121,7 +121,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 9, "metadata": { "collapsed": false }, @@ -139,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -147,10 +147,10 @@ { "data": { "text/plain": [ - "'create table reports (id serial primary key, policy_published_domain varchar, report_metadata_date_range_begin timestamptz, report_metadata_report_id varchar, report_metadata_date_range_end timestamptz, report_metadata_org_name varchar, policy_published_pct int, policy_published_p varchar, policy_published_aspf varchar, policy_published_adkim varchar, report_metadata_email varchar);'" + "'create table reports (id serial primary key, policy_published_domain varchar, policy_published_pct int, report_metadata_report_id varchar, report_metadata_date_range_end timestamptz, report_metadata_org_name varchar, report_metadata_date_range_begin timestamptz, policy_published_aspf varchar, report_metadata_email varchar, policy_published_p varchar, policy_published_adkim varchar);'" ] }, - "execution_count": 14, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -165,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 11, "metadata": { "collapsed": false }, @@ -173,10 +173,10 @@ { "data": { "text/plain": [ - "'create table report_items (id serial primary key, report_id integer references reports, auth_results_dkim_domain varchar, policy_evaluated_dkim varchar, source_ip inet, count int, auth_results_spf_domain varchar, policy_evaluated_spf varchar, policy_evaluated_disposition varchar, identifiers_header_from varchar, auth_results_dkim_result varchar, auth_results_spf_result varchar);'" + "'create table report_items (id serial primary key, report_id integer references reports, auth_results_dkim_domain varchar, policy_evaluated_dkim varchar, policy_evaluated_spf varchar, policy_evaluated_disposition varchar, source_ip inet, auth_results_dkim_result varchar, count int, auth_results_spf_result varchar, auth_results_spf_domain varchar, identifiers_header_from varchar);'" ] }, - "execution_count": 15, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -192,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 12, "metadata": { "collapsed": true }, @@ -232,7 +232,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.4.3+" + "version": "3.5.2+" } }, "nbformat": 4, diff --git a/make_database.py b/make_database.py new file mode 100644 index 0000000..ee955ec --- /dev/null +++ b/make_database.py @@ -0,0 +1,129 @@ + +# coding: utf-8 + +# In[1]: + +import configparser +import psycopg2 + + +# In[2]: + +field_maps = {'./policy_published/adkim': {'pg_field_name': 'policy_published_adkim', + 'pg_table': 'reports', + 'pg_type': 'varchar'}, + './policy_published/aspf': {'pg_field_name': 'policy_published_aspf', + 'pg_table': 'reports', + 'pg_type': 'varchar'}, + './policy_published/domain': {'pg_field_name': 'policy_published_domain', + 'pg_table': 'reports', + 'pg_type': 'varchar'}, + './policy_published/p': {'pg_field_name': 'policy_published_p', + 'pg_table': 'reports', + 'pg_type': 'varchar'}, + './policy_published/pct': {'pg_field_name': 'policy_published_pct', + 'pg_table': 'reports', + 'pg_type': 'int'}, + './record[{}]/auth_results/dkim/domain': {'pg_field_name': 'auth_results_dkim_domain', + 'pg_table': 'report_items', + 'pg_type': 'varchar'}, + './record[{}]/auth_results/dkim/result': {'pg_field_name': 'auth_results_dkim_result', + 'pg_table': 'report_items', + 'pg_type': 'varchar'}, + './record[{}]/auth_results/spf/domain': {'pg_field_name': 'auth_results_spf_domain', + 'pg_table': 'report_items', + 'pg_type': 'varchar'}, + './record[{}]/auth_results/spf/result': {'pg_field_name': 'auth_results_spf_result', + 'pg_table': 'report_items', + 'pg_type': 'varchar'}, + './record[{}]/identifiers/header_from': {'pg_field_name': 'identifiers_header_from', + 'pg_table': 'report_items', + 'pg_type': 'varchar'}, + './record[{}]/row/count': {'pg_field_name': 'count', + 'pg_table': 'report_items', + 'pg_type': 'int'}, + './record[{}]/row/policy_evaluated/disposition': {'pg_field_name': 'policy_evaluated_disposition', + 'pg_table': 'report_items', + 'pg_type': 'varchar'}, + './record[{}]/row/policy_evaluated/dkim': {'pg_field_name': 'policy_evaluated_dkim', + 'pg_table': 'report_items', + 'pg_type': 'varchar'}, + './record[{}]/row/policy_evaluated/spf': {'pg_field_name': 'policy_evaluated_spf', + 'pg_table': 'report_items', + 'pg_type': 'varchar'}, + './record[{}]/row/source_ip': {'pg_field_name': 'source_ip', + 'pg_table': 'report_items', + 'pg_type': 'inet'}, + './report_metadata/date_range/begin': {'pg_field_name': 'report_metadata_date_range_begin', + 'pg_table': 'reports', + 'pg_type': 'timestamptz'}, + './report_metadata/date_range/end': {'pg_field_name': 'report_metadata_date_range_end', + 'pg_table': 'reports', + 'pg_type': 'timestamptz'}, + './report_metadata/email': {'pg_field_name': 'report_metadata_email', + 'pg_table': 'reports', + 'pg_type': 'varchar'}, + './report_metadata/org_name': {'pg_field_name': 'report_metadata_org_name', + 'pg_table': 'reports', + 'pg_type': 'varchar'}, + './report_metadata/report_id': {'pg_field_name': 'report_metadata_report_id', + 'pg_table': 'reports', + 'pg_type': 'varchar'}} + + +# In[3]: + +config = configparser.ConfigParser() +config.read('dmarc.ini') + + +# In[8]: + +conn = psycopg2.connect(host=config['database']['server'], + database=config['database']['database'], + user=config['database']['username'], + password=config['database']['password']) + + +# In[9]: + +with conn.cursor() as cur: + conn.autocommit = True + cur.execute("select exists(select * from information_schema.tables where table_name=%s)", ('report_items',)) + if cur.fetchone()[0]: + cur.execute("drop table report_items;") + cur.execute("select exists(select * from information_schema.tables where table_name=%s)", ('reports',)) + if cur.fetchone()[0]: + cur.execute("drop table reports;") + + +# In[10]: + +create_report_table_string = 'create table reports (id serial primary key, ' + ', '.join(field_maps[p]['pg_field_name'] + ' ' + field_maps[p]['pg_type'] + for p in field_maps if field_maps[p]['pg_table'] == 'reports') + \ +');' +create_report_table_string + + +# In[11]: + +create_report_item_table_string = 'create table report_items (id serial primary key, ' + 'report_id integer references reports, ' + ', '.join(field_maps[p]['pg_field_name'] + ' ' + field_maps[p]['pg_type'] + for p in field_maps if field_maps[p]['pg_table'] == 'report_items') + \ +');' +create_report_item_table_string + + +# In[12]: + +with conn.cursor() as cur: + cur.execute(create_report_table_string) + cur.execute(create_report_item_table_string) + cur.execute('create index on reports (report_metadata_date_range_end);') + cur.execute('create index on reports (report_metadata_report_id);') +conn.commit() + + +# In[ ]: + + + -- 2.34.1