Added standalone script to make database tables
[dmarc.git] / dmarc_to_database.py
index 2fd237568b03a466ac5ed2f16d56215344d8ce23..272998f8ccce56e494c949fcca7017fe4fb96251 100644 (file)
@@ -1,3 +1,5 @@
+#!/usr/bin/python3
+
 import configparser
 import imaplib
 import email
@@ -13,9 +15,9 @@ def fetch_msg(num):
 
 def xml_of_part(part):
     with zipfile.ZipFile(io.BytesIO(part.get_payload(decode=True))) as zf:
-            fn = zf.infolist()[0].filename
-            contents = zf.read(fn).decode('utf-8')
-            return xml.etree.ElementTree.fromstring(contents)
+        fn = zf.infolist()[0].filename
+        contents = zf.read(fn).decode('utf-8')
+        return xml.etree.ElementTree.fromstring(contents)
 
 
 def xml_of(message):
@@ -38,66 +40,67 @@ def maybe_strip(text):
     else:
         return ''
 
-field_maps = {'./policy_published/adkim': {'pg_field_name': 'policy_published_adkim',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/aspf': {'pg_field_name': 'policy_published_aspf',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/domain': {'pg_field_name': 'policy_published_domain',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/p': {'pg_field_name': 'policy_published_p',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './policy_published/pct': {'pg_field_name': 'policy_published_pct',
-  'pg_table': 'reports',
-  'pg_type': 'int'},
- './record[{}]/auth_results/dkim/domain': {'pg_field_name': 'auth_results_dkim_domain',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/auth_results/dkim/result': {'pg_field_name': 'auth_results_dkim_result',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/auth_results/spf/domain': {'pg_field_name': 'auth_results_spf_domain',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/auth_results/spf/result': {'pg_field_name': 'auth_results_spf_result',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/identifiers/header_from': {'pg_field_name': 'identifiers_header_from',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/count': {'pg_field_name': 'count',
-  'pg_table': 'report_items',
-  'pg_type': 'int'},
- './record[{}]/row/policy_evaluated/disposition': {'pg_field_name': 'policy_evaluated_disposition',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/policy_evaluated/dkim': {'pg_field_name': 'policy_evaluated_dkim',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/policy_evaluated/spf': {'pg_field_name': 'policy_evaluated_spf',
-  'pg_table': 'report_items',
-  'pg_type': 'varchar'},
- './record[{}]/row/source_ip': {'pg_field_name': 'source_ip',
-  'pg_table': 'report_items',
-  'pg_type': 'inet'},
- './report_metadata/date_range/begin': {'pg_field_name': 'report_metadata_date_range_begin',
-  'pg_table': 'reports',
-  'pg_type': 'timestamp'},
- './report_metadata/date_range/end': {'pg_field_name': 'report_metadata_date_range_end',
-  'pg_table': 'reports',
-  'pg_type': 'timestamp'},
- './report_metadata/email': {'pg_field_name': 'report_metadata_email',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './report_metadata/org_name': {'pg_field_name': 'report_metadata_org_name',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'},
- './report_metadata/report_id': {'pg_field_name': 'report_metadata_report_id',
-  'pg_table': 'reports',
-  'pg_type': 'varchar'}}
+field_maps = {
+    './policy_published/adkim': 
+        {'pg_field_name': 'policy_published_adkim',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+    './policy_published/aspf': 
+        {'pg_field_name': 'policy_published_aspf',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './policy_published/domain':
+        {'pg_field_name': 'policy_published_domain',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './policy_published/p':
+        {'pg_field_name': 'policy_published_p',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './policy_published/pct':
+        {'pg_field_name': 'policy_published_pct',
+         'pg_table': 'reports', 'pg_type': 'int'},
+     './record[{}]/auth_results/dkim/domain':
+        {'pg_field_name': 'auth_results_dkim_domain',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/auth_results/dkim/result':
+        {'pg_field_name': 'auth_results_dkim_result',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/auth_results/spf/domain':
+        {'pg_field_name': 'auth_results_spf_domain',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/auth_results/spf/result':
+        {'pg_field_name': 'auth_results_spf_result',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/identifiers/header_from':
+        {'pg_field_name': 'identifiers_header_from',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/count':
+        {'pg_field_name': 'count',
+         'pg_table': 'report_items', 'pg_type': 'int'},
+     './record[{}]/row/policy_evaluated/disposition':
+        {'pg_field_name': 'policy_evaluated_disposition',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/policy_evaluated/dkim':
+        {'pg_field_name': 'policy_evaluated_dkim',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/policy_evaluated/spf':
+        {'pg_field_name': 'policy_evaluated_spf',
+         'pg_table': 'report_items', 'pg_type': 'varchar'},
+     './record[{}]/row/source_ip':
+        {'pg_field_name': 'source_ip',
+         'pg_table': 'report_items', 'pg_type': 'inet'},
+     './report_metadata/date_range/begin':
+        {'pg_field_name': 'report_metadata_date_range_begin',
+         'pg_table': 'reports', 'pg_type': 'timestamp'},
+     './report_metadata/date_range/end':
+        {'pg_field_name': 'report_metadata_date_range_end',
+         'pg_table': 'reports', 'pg_type': 'timestamp'},
+     './report_metadata/email':
+        {'pg_field_name': 'report_metadata_email',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './report_metadata/org_name':
+        {'pg_field_name': 'report_metadata_org_name',
+         'pg_table': 'reports', 'pg_type': 'varchar'},
+     './report_metadata/report_id':
+        {'pg_field_name': 'report_metadata_report_id',
+         'pg_table': 'reports', 'pg_type': 'varchar'}}
 
 
 
@@ -155,13 +158,14 @@ def write_report(connection, cursor, report):
 config = configparser.ConfigParser()
 config.read('dmarc.ini')
 
-with psycopg2.connect(host=config['database']['server'],
+conn = psycopg2.connect(host=config['database']['server'],
                         database=config['database']['database'], 
                         user=config['database']['username'], 
-                        password=config['database']['password']) as conn:
-    with conn.cursor() as cur:
-        cur.execute('select max(report_metadata_date_range_end) from reports')
-        results = cur.fetchall()
+                        password=config['database']['password']) 
+
+cur = conn.cursor()
+cur.execute('select max(report_metadata_date_range_end) from reports')
+results = cur.fetchall()
 most_recent_date = results[0][0]
 
 mailbox = imaplib.IMAP4(host=config['imap']['server'], 
@@ -184,19 +188,12 @@ dmarc_reports = [report for report_set in [extract_report(fetch_msg(n)) for n in
 mailbox.close()
 mailbox.logout()
 
+for report in dmarc_reports:
+    cur.execute('select id, report_metadata_report_id from reports where report_metadata_report_id = %s;', 
+        [report.find('./report_metadata/report_id').text])
+    results = cur.fetchall()
+    if not results:
+        print('write', report.find('./report_metadata/report_id').text)
+        write_report(conn, cur, report)
 
-with psycopg2.connect(host=config['database']['server'],
-                        database=config['database']['database'], 
-                        user=config['database']['username'], 
-                        password=config['database']['password']) as conn:
-    with conn.cursor() as cur:
-        for report in dmarc_reports:
-            cur.execute('select id, report_metadata_report_id from reports where report_metadata_report_id = %s;', 
-                [report.find('./report_metadata/report_id').text])
-            results = cur.fetchall()
-            if not results:
-                print('write', report.find('./report_metadata/report_id').text)
-                write_report(conn, cur, report)
-
-
-
+conn.close()