From: Neil Smith <neil.git@njae.me.uk>
Date: Wed, 4 Jan 2017 16:07:42 +0000 (+0000)
Subject: Added standalone script to make database tables
X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=4c6bfcba885846a2abe23497cb097f91c469abe5;p=dmarc.git

Added standalone script to make database tables
---

diff --git a/complete.ipynb b/complete.ipynb
index ccbdaf0..3663338 100644
--- a/complete.ipynb
+++ b/complete.ipynb
@@ -912,7 +912,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.3+"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
diff --git a/dmarc_to_database.py b/dmarc_to_database.py
index 4624964..272998f 100644
--- a/dmarc_to_database.py
+++ b/dmarc_to_database.py
@@ -15,9 +15,9 @@ def fetch_msg(num):
 
 def xml_of_part(part):
     with zipfile.ZipFile(io.BytesIO(part.get_payload(decode=True))) as zf:
-            fn = zf.infolist()[0].filename
-            contents = zf.read(fn).decode('utf-8')
-            return xml.etree.ElementTree.fromstring(contents)
+        fn = zf.infolist()[0].filename
+        contents = zf.read(fn).decode('utf-8')
+        return xml.etree.ElementTree.fromstring(contents)
 
 
 def xml_of(message):
@@ -40,66 +40,67 @@ def maybe_strip(text):
     else:
         return ''
 
-field_maps = {'./policy_published/adkim': {'pg_field_name': 'policy_published_adkim',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/aspf': {'pg_field_name': 'policy_published_aspf',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/domain': {'pg_field_name': 'policy_published_domain',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/p': {'pg_field_name': 'policy_published_p',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/pct': {'pg_field_name': 'policy_published_pct',
-  'pg_table': 'reports',
-  'pg_type': 'int'},
- './record[{}]/auth_results/dkim/domain': {'pg_field_name': 'auth_results_dkim_domain',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/auth_results/dkim/result': {'pg_field_name': 'auth_results_dkim_result',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/auth_results/spf/domain': {'pg_field_name': 'auth_results_spf_domain',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/auth_results/spf/result': {'pg_field_name': 'auth_results_spf_result',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/identifiers/header_from': {'pg_field_name': 'identifiers_header_from',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/count': {'pg_field_name': 'count',
-  'pg_table': 'report_items',
-  'pg_type': 'int'},
- './record[{}]/row/policy_evaluated/disposition': {'pg_field_name': 'policy_evaluated_disposition',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/policy_evaluated/dkim': {'pg_field_name': 'policy_evaluated_dkim',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/policy_evaluated/spf': {'pg_field_name': 'policy_evaluated_spf',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/source_ip': {'pg_field_name': 'source_ip',
-  'pg_table': 'report_items',
-  'pg_type': 'inet'},
- './report_metadata/date_range/begin': {'pg_field_name': 'report_metadata_date_range_begin',
-  'pg_table': 'reports',
-  'pg_type': 'timestamp'},
- './report_metadata/date_range/end': {'pg_field_name': 'report_metadata_date_range_end',
-  'pg_table': 'reports',
-  'pg_type': 'timestamp'},
- './report_metadata/email': {'pg_field_name': 'report_metadata_email',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './report_metadata/org_name': {'pg_field_name': 'report_metadata_org_name',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './report_metadata/report_id': {'pg_field_name': 'report_metadata_report_id',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'}}
+field_maps = {
+    './policy_published/adkim': 
+        {'pg_field_name': 'policy_published_adkim',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+    './policy_published/aspf': 
+        {'pg_field_name': 'policy_published_aspf',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './policy_published/domain':
+        {'pg_field_name': 'policy_published_domain',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './policy_published/p':
+        {'pg_field_name': 'policy_published_p',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './policy_published/pct':
+        {'pg_field_name': 'policy_published_pct',
+         'pg_table': 'reports', 'pg_type': 'int'},
+     './record[{}]/auth_results/dkim/domain':
+        {'pg_field_name': 'auth_results_dkim_domain',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/auth_results/dkim/result':
+        {'pg_field_name': 'auth_results_dkim_result',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/auth_results/spf/domain':
+        {'pg_field_name': 'auth_results_spf_domain',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/auth_results/spf/result':
+        {'pg_field_name': 'auth_results_spf_result',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/identifiers/header_from':
+        {'pg_field_name': 'identifiers_header_from',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/count':
+        {'pg_field_name': 'count',
+         'pg_table': 'report_items', 'pg_type': 'int'},
+     './record[{}]/row/policy_evaluated/disposition':
+        {'pg_field_name': 'policy_evaluated_disposition',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/policy_evaluated/dkim':
+        {'pg_field_name': 'policy_evaluated_dkim',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/policy_evaluated/spf':
+        {'pg_field_name': 'policy_evaluated_spf',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/source_ip':
+        {'pg_field_name': 'source_ip',
+         'pg_table': 'report_items', 'pg_type': 'inet'},
+     './report_metadata/date_range/begin':
+        {'pg_field_name': 'report_metadata_date_range_begin',
+         'pg_table': 'reports', 'pg_type': 'timestamp'},
+     './report_metadata/date_range/end':
+        {'pg_field_name': 'report_metadata_date_range_end',
+         'pg_table': 'reports', 'pg_type': 'timestamp'},
+     './report_metadata/email':
+        {'pg_field_name': 'report_metadata_email',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './report_metadata/org_name':
+        {'pg_field_name': 'report_metadata_org_name',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './report_metadata/report_id':
+        {'pg_field_name': 'report_metadata_report_id',
+         'pg_table': 'reports', 'pg_type': 'varchar'}}
 
 
 
diff --git a/make_database.ipynb b/make_database.ipynb
index a44d6d9..73c1cf6 100644
--- a/make_database.ipynb
+++ b/make_database.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 1,
    "metadata": {
     "collapsed": true
    },
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 2,
    "metadata": {
     "collapsed": true
    },
@@ -84,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 3,
    "metadata": {
     "collapsed": false
    },
@@ -95,7 +95,7 @@
        "['dmarc.ini']"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -107,7 +107,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 8,
    "metadata": {
     "collapsed": false
    },
@@ -121,7 +121,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 9,
    "metadata": {
     "collapsed": false
    },
@@ -139,7 +139,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 10,
    "metadata": {
     "collapsed": false
    },
@@ -147,10 +147,10 @@
     {
      "data": {
       "text/plain": [
-       "'create table reports (id serial primary key, policy_published_domain varchar, report_metadata_date_range_begin timestamptz, report_metadata_report_id varchar, report_metadata_date_range_end timestamptz, report_metadata_org_name varchar, policy_published_pct int, policy_published_p varchar, policy_published_aspf varchar, policy_published_adkim varchar, report_metadata_email varchar);'"
+       "'create table reports (id serial primary key, policy_published_domain varchar, policy_published_pct int, report_metadata_report_id varchar, report_metadata_date_range_end timestamptz, report_metadata_org_name varchar, report_metadata_date_range_begin timestamptz, policy_published_aspf varchar, report_metadata_email varchar, policy_published_p varchar, policy_published_adkim varchar);'"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -165,7 +165,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 11,
    "metadata": {
     "collapsed": false
    },
@@ -173,10 +173,10 @@
     {
      "data": {
       "text/plain": [
-       "'create table report_items (id serial primary key, report_id integer references reports, auth_results_dkim_domain varchar, policy_evaluated_dkim varchar, source_ip inet, count int, auth_results_spf_domain varchar, policy_evaluated_spf varchar, policy_evaluated_disposition varchar, identifiers_header_from varchar, auth_results_dkim_result varchar, auth_results_spf_result varchar);'"
+       "'create table report_items (id serial primary key, report_id integer references reports, auth_results_dkim_domain varchar, policy_evaluated_dkim varchar, policy_evaluated_spf varchar, policy_evaluated_disposition varchar, source_ip inet, auth_results_dkim_result varchar, count int, auth_results_spf_result varchar, auth_results_spf_domain varchar, identifiers_header_from varchar);'"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -192,7 +192,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 12,
    "metadata": {
     "collapsed": true
    },
@@ -232,7 +232,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.3+"
+   "version": "3.5.2+"
   }
  },
  "nbformat": 4,
diff --git a/make_database.py b/make_database.py
new file mode 100644
index 0000000..ee955ec
--- /dev/null
+++ b/make_database.py
@@ -0,0 +1,129 @@
+
+# coding: utf-8
+
+# In[1]:
+
+import configparser
+import psycopg2
+
+
+# In[2]:
+
+field_maps = {'./policy_published/adkim': {'pg_field_name': 'policy_published_adkim',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './policy_published/aspf': {'pg_field_name': 'policy_published_aspf',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './policy_published/domain': {'pg_field_name': 'policy_published_domain',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './policy_published/p': {'pg_field_name': 'policy_published_p',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './policy_published/pct': {'pg_field_name': 'policy_published_pct',
+  'pg_table': 'reports',
+  'pg_type': 'int'},
+ './record[{}]/auth_results/dkim/domain': {'pg_field_name': 'auth_results_dkim_domain',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/auth_results/dkim/result': {'pg_field_name': 'auth_results_dkim_result',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/auth_results/spf/domain': {'pg_field_name': 'auth_results_spf_domain',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/auth_results/spf/result': {'pg_field_name': 'auth_results_spf_result',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/identifiers/header_from': {'pg_field_name': 'identifiers_header_from',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/row/count': {'pg_field_name': 'count',
+  'pg_table': 'report_items',
+  'pg_type': 'int'},
+ './record[{}]/row/policy_evaluated/disposition': {'pg_field_name': 'policy_evaluated_disposition',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/row/policy_evaluated/dkim': {'pg_field_name': 'policy_evaluated_dkim',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/row/policy_evaluated/spf': {'pg_field_name': 'policy_evaluated_spf',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/row/source_ip': {'pg_field_name': 'source_ip',
+  'pg_table': 'report_items',
+  'pg_type': 'inet'},
+ './report_metadata/date_range/begin': {'pg_field_name': 'report_metadata_date_range_begin',
+  'pg_table': 'reports',
+  'pg_type': 'timestamptz'},
+ './report_metadata/date_range/end': {'pg_field_name': 'report_metadata_date_range_end',
+  'pg_table': 'reports',
+  'pg_type': 'timestamptz'},
+ './report_metadata/email': {'pg_field_name': 'report_metadata_email',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './report_metadata/org_name': {'pg_field_name': 'report_metadata_org_name',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './report_metadata/report_id': {'pg_field_name': 'report_metadata_report_id',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'}}
+
+
+# In[3]:
+
+config = configparser.ConfigParser()
+config.read('dmarc.ini')
+
+
+# In[8]:
+
+conn = psycopg2.connect(host=config['database']['server'],
+                        database=config['database']['database'], 
+                        user=config['database']['username'], 
+                        password=config['database']['password'])
+
+
+# In[9]:
+
+with conn.cursor() as cur:
+    conn.autocommit = True
+    cur.execute("select exists(select * from information_schema.tables where table_name=%s)", ('report_items',))
+    if cur.fetchone()[0]:
+        cur.execute("drop table report_items;")
+    cur.execute("select exists(select * from information_schema.tables where table_name=%s)", ('reports',))
+    if cur.fetchone()[0]:
+        cur.execute("drop table reports;")
+
+
+# In[10]:
+
+create_report_table_string = 'create table reports (id serial primary key, ' + ', '.join(field_maps[p]['pg_field_name'] + ' ' + field_maps[p]['pg_type'] 
+ for p in field_maps if field_maps[p]['pg_table'] == 'reports') + \
+');'
+create_report_table_string
+
+
+# In[11]:
+
+create_report_item_table_string = 'create table report_items (id serial primary key, ' +   'report_id integer references reports, ' + ', '.join(field_maps[p]['pg_field_name'] + ' ' + field_maps[p]['pg_type'] 
+ for p in field_maps if field_maps[p]['pg_table'] == 'report_items') + \
+');'
+create_report_item_table_string
+
+
+# In[12]:
+
+with conn.cursor() as cur:
+    cur.execute(create_report_table_string)
+    cur.execute(create_report_item_table_string)
+    cur.execute('create index on reports (report_metadata_date_range_end);')
+    cur.execute('create index on reports (report_metadata_report_id);')
+conn.commit()
+
+
+# In[ ]:
+
+
+