Added standalone script to make database tables
authorNeil Smith <neil.git@njae.me.uk>
Wed, 4 Jan 2017 16:07:42 +0000 (16:07 +0000)
committerNeil Smith <neil.git@njae.me.uk>
Wed, 4 Jan 2017 16:07:42 +0000 (16:07 +0000)
complete.ipynb
dmarc_to_database.py
make_database.ipynb
make_database.py [new file with mode: 0644]

index ccbdaf02d1fa8e43df49d295c40bf12539e9cf57..3663338363a0176dafcfb28db685802532549601 100644 (file)
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.3+"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
index 46249649141088ccf310859496d7f36ac9a41326..272998f8ccce56e494c949fcca7017fe4fb96251 100644 (file)
@@ -15,9 +15,9 @@ def fetch_msg(num):
 
 def xml_of_part(part):
     with zipfile.ZipFile(io.BytesIO(part.get_payload(decode=True))) as zf:
-            fn = zf.infolist()[0].filename
-            contents = zf.read(fn).decode('utf-8')
-            return xml.etree.ElementTree.fromstring(contents)
+        fn = zf.infolist()[0].filename
+        contents = zf.read(fn).decode('utf-8')
+        return xml.etree.ElementTree.fromstring(contents)
 
 
 def xml_of(message):
@@ -40,66 +40,67 @@ def maybe_strip(text):
     else:
         return ''
 
-field_maps = {'./policy_published/adkim': {'pg_field_name': 'policy_published_adkim',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/aspf': {'pg_field_name': 'policy_published_aspf',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/domain': {'pg_field_name': 'policy_published_domain',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/p': {'pg_field_name': 'policy_published_p',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/pct': {'pg_field_name': 'policy_published_pct',
-  'pg_table': 'reports',
-  'pg_type': 'int'},
- './record[{}]/auth_results/dkim/domain': {'pg_field_name': 'auth_results_dkim_domain',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/auth_results/dkim/result': {'pg_field_name': 'auth_results_dkim_result',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/auth_results/spf/domain': {'pg_field_name': 'auth_results_spf_domain',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/auth_results/spf/result': {'pg_field_name': 'auth_results_spf_result',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/identifiers/header_from': {'pg_field_name': 'identifiers_header_from',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/count': {'pg_field_name': 'count',
-  'pg_table': 'report_items',
-  'pg_type': 'int'},
- './record[{}]/row/policy_evaluated/disposition': {'pg_field_name': 'policy_evaluated_disposition',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/policy_evaluated/dkim': {'pg_field_name': 'policy_evaluated_dkim',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/policy_evaluated/spf': {'pg_field_name': 'policy_evaluated_spf',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/source_ip': {'pg_field_name': 'source_ip',
-  'pg_table': 'report_items',
-  'pg_type': 'inet'},
- './report_metadata/date_range/begin': {'pg_field_name': 'report_metadata_date_range_begin',
-  'pg_table': 'reports',
-  'pg_type': 'timestamp'},
- './report_metadata/date_range/end': {'pg_field_name': 'report_metadata_date_range_end',
-  'pg_table': 'reports',
-  'pg_type': 'timestamp'},
- './report_metadata/email': {'pg_field_name': 'report_metadata_email',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './report_metadata/org_name': {'pg_field_name': 'report_metadata_org_name',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './report_metadata/report_id': {'pg_field_name': 'report_metadata_report_id',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'}}
+field_maps = {
+    './policy_published/adkim': 
+        {'pg_field_name': 'policy_published_adkim',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+    './policy_published/aspf': 
+        {'pg_field_name': 'policy_published_aspf',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './policy_published/domain':
+        {'pg_field_name': 'policy_published_domain',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './policy_published/p':
+        {'pg_field_name': 'policy_published_p',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './policy_published/pct':
+        {'pg_field_name': 'policy_published_pct',
+         'pg_table': 'reports', 'pg_type': 'int'},
+     './record[{}]/auth_results/dkim/domain':
+        {'pg_field_name': 'auth_results_dkim_domain',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/auth_results/dkim/result':
+        {'pg_field_name': 'auth_results_dkim_result',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/auth_results/spf/domain':
+        {'pg_field_name': 'auth_results_spf_domain',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/auth_results/spf/result':
+        {'pg_field_name': 'auth_results_spf_result',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/identifiers/header_from':
+        {'pg_field_name': 'identifiers_header_from',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/count':
+        {'pg_field_name': 'count',
+         'pg_table': 'report_items', 'pg_type': 'int'},
+     './record[{}]/row/policy_evaluated/disposition':
+        {'pg_field_name': 'policy_evaluated_disposition',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/policy_evaluated/dkim':
+        {'pg_field_name': 'policy_evaluated_dkim',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/policy_evaluated/spf':
+        {'pg_field_name': 'policy_evaluated_spf',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/source_ip':
+        {'pg_field_name': 'source_ip',
+         'pg_table': 'report_items', 'pg_type': 'inet'},
+     './report_metadata/date_range/begin':
+        {'pg_field_name': 'report_metadata_date_range_begin',
+         'pg_table': 'reports', 'pg_type': 'timestamp'},
+     './report_metadata/date_range/end':
+        {'pg_field_name': 'report_metadata_date_range_end',
+         'pg_table': 'reports', 'pg_type': 'timestamp'},
+     './report_metadata/email':
+        {'pg_field_name': 'report_metadata_email',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './report_metadata/org_name':
+        {'pg_field_name': 'report_metadata_org_name',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './report_metadata/report_id':
+        {'pg_field_name': 'report_metadata_report_id',
+         'pg_table': 'reports', 'pg_type': 'varchar'}}
 
 
 
index a44d6d9fbb93e8887dba617a6cfa227d1754a113..73c1cf6bc80b056cace7f28d1f5fd1d85425d8fd 100644 (file)
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 1,
    "metadata": {
     "collapsed": true
    },
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 2,
    "metadata": {
     "collapsed": true
    },
@@ -84,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 3,
    "metadata": {
     "collapsed": false
    },
@@ -95,7 +95,7 @@
        "['dmarc.ini']"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 8,
    "metadata": {
     "collapsed": false
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 9,
    "metadata": {
     "collapsed": false
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 10,
    "metadata": {
     "collapsed": false
    },
     {
      "data": {
       "text/plain": [
-       "'create table reports (id serial primary key, policy_published_domain varchar, report_metadata_date_range_begin timestamptz, report_metadata_report_id varchar, report_metadata_date_range_end timestamptz, report_metadata_org_name varchar, policy_published_pct int, policy_published_p varchar, policy_published_aspf varchar, policy_published_adkim varchar, report_metadata_email varchar);'"
+       "'create table reports (id serial primary key, policy_published_domain varchar, policy_published_pct int, report_metadata_report_id varchar, report_metadata_date_range_end timestamptz, report_metadata_org_name varchar, report_metadata_date_range_begin timestamptz, policy_published_aspf varchar, report_metadata_email varchar, policy_published_p varchar, policy_published_adkim varchar);'"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 11,
    "metadata": {
     "collapsed": false
    },
     {
      "data": {
       "text/plain": [
-       "'create table report_items (id serial primary key, report_id integer references reports, auth_results_dkim_domain varchar, policy_evaluated_dkim varchar, source_ip inet, count int, auth_results_spf_domain varchar, policy_evaluated_spf varchar, policy_evaluated_disposition varchar, identifiers_header_from varchar, auth_results_dkim_result varchar, auth_results_spf_result varchar);'"
+       "'create table report_items (id serial primary key, report_id integer references reports, auth_results_dkim_domain varchar, policy_evaluated_dkim varchar, policy_evaluated_spf varchar, policy_evaluated_disposition varchar, source_ip inet, auth_results_dkim_result varchar, count int, auth_results_spf_result varchar, auth_results_spf_domain varchar, identifiers_header_from varchar);'"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 12,
    "metadata": {
     "collapsed": true
    },
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.3+"
+   "version": "3.5.2+"
   }
  },
  "nbformat": 4,
diff --git a/make_database.py b/make_database.py
new file mode 100644 (file)
index 0000000..ee955ec
--- /dev/null
@@ -0,0 +1,129 @@
+
+# coding: utf-8
+
+# In[1]:
+
+import configparser
+import psycopg2
+
+
+# In[2]:
+
+field_maps = {'./policy_published/adkim': {'pg_field_name': 'policy_published_adkim',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './policy_published/aspf': {'pg_field_name': 'policy_published_aspf',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './policy_published/domain': {'pg_field_name': 'policy_published_domain',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './policy_published/p': {'pg_field_name': 'policy_published_p',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './policy_published/pct': {'pg_field_name': 'policy_published_pct',
+  'pg_table': 'reports',
+  'pg_type': 'int'},
+ './record[{}]/auth_results/dkim/domain': {'pg_field_name': 'auth_results_dkim_domain',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/auth_results/dkim/result': {'pg_field_name': 'auth_results_dkim_result',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/auth_results/spf/domain': {'pg_field_name': 'auth_results_spf_domain',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/auth_results/spf/result': {'pg_field_name': 'auth_results_spf_result',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/identifiers/header_from': {'pg_field_name': 'identifiers_header_from',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/row/count': {'pg_field_name': 'count',
+  'pg_table': 'report_items',
+  'pg_type': 'int'},
+ './record[{}]/row/policy_evaluated/disposition': {'pg_field_name': 'policy_evaluated_disposition',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/row/policy_evaluated/dkim': {'pg_field_name': 'policy_evaluated_dkim',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/row/policy_evaluated/spf': {'pg_field_name': 'policy_evaluated_spf',
+  'pg_table': 'report_items',
+  'pg_type': 'varchar'},
+ './record[{}]/row/source_ip': {'pg_field_name': 'source_ip',
+  'pg_table': 'report_items',
+  'pg_type': 'inet'},
+ './report_metadata/date_range/begin': {'pg_field_name': 'report_metadata_date_range_begin',
+  'pg_table': 'reports',
+  'pg_type': 'timestamptz'},
+ './report_metadata/date_range/end': {'pg_field_name': 'report_metadata_date_range_end',
+  'pg_table': 'reports',
+  'pg_type': 'timestamptz'},
+ './report_metadata/email': {'pg_field_name': 'report_metadata_email',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './report_metadata/org_name': {'pg_field_name': 'report_metadata_org_name',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'},
+ './report_metadata/report_id': {'pg_field_name': 'report_metadata_report_id',
+  'pg_table': 'reports',
+  'pg_type': 'varchar'}}
+
+
+# In[3]:
+
+config = configparser.ConfigParser()
+config.read('dmarc.ini')
+
+
+# In[8]:
+
+conn = psycopg2.connect(host=config['database']['server'],
+                        database=config['database']['database'], 
+                        user=config['database']['username'], 
+                        password=config['database']['password'])
+
+
+# In[9]:
+
+with conn.cursor() as cur:
+    conn.autocommit = True
+    cur.execute("select exists(select * from information_schema.tables where table_name=%s)", ('report_items',))
+    if cur.fetchone()[0]:
+        cur.execute("drop table report_items;")
+    cur.execute("select exists(select * from information_schema.tables where table_name=%s)", ('reports',))
+    if cur.fetchone()[0]:
+        cur.execute("drop table reports;")
+
+
+# In[10]:
+
+create_report_table_string = 'create table reports (id serial primary key, ' + ', '.join(field_maps[p]['pg_field_name'] + ' ' + field_maps[p]['pg_type'] 
+ for p in field_maps if field_maps[p]['pg_table'] == 'reports') + \
+');'
+create_report_table_string
+
+
+# In[11]:
+
+create_report_item_table_string = 'create table report_items (id serial primary key, ' +   'report_id integer references reports, ' + ', '.join(field_maps[p]['pg_field_name'] + ' ' + field_maps[p]['pg_type'] 
+ for p in field_maps if field_maps[p]['pg_table'] == 'report_items') + \
+');'
+create_report_item_table_string
+
+
+# In[12]:
+
+with conn.cursor() as cur:
+    cur.execute(create_report_table_string)
+    cur.execute(create_report_item_table_string)
+    cur.execute('create index on reports (report_metadata_date_range_end);')
+    cur.execute('create index on reports (report_metadata_report_id);')
+conn.commit()
+
+
+# In[ ]:
+
+
+