X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=write-to-database.ipynb;h=331d569fa7000155053a444936b8df04a42c8406;hb=HEAD;hp=96884afeb6272390af65c98a3a143d2e2be5c972;hpb=cbc82ab0d7821ee263e1eabf32b1f932810775a2;p=dmarc.git diff --git a/write-to-database.ipynb b/write-to-database.ipynb index 96884af..331d569 100644 --- a/write-to-database.ipynb +++ b/write-to-database.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 92, + "execution_count": 1, "metadata": { "collapsed": true }, @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { "collapsed": false }, @@ -51,7 +51,7 @@ "['imap', 'database']" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": { "collapsed": false }, @@ -73,7 +73,7 @@ "['server', 'username', 'port', 'password']" ] }, - "execution_count": 6, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": { "collapsed": false }, @@ -95,7 +95,7 @@ "['server', 'database', 'username', 'password']" ] }, - "execution_count": 7, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -106,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 6, "metadata": { "collapsed": true }, @@ -124,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 7, "metadata": { "collapsed": false }, @@ -144,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 8, "metadata": { "collapsed": false }, @@ -159,7 +159,7 @@ " 'yahoo.com!njae.me.uk!1459036800!1459123199.xml']" ] }, - "execution_count": 17, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -171,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 9, "metadata": { "collapsed": false }, @@ -179,14 +179,14 @@ { "data": { "text/plain": [ - "{'google.com!njae.me.uk!1458777600!1458863999.xml': ,\n", - " 'google.com!njae.me.uk!1458950400!1459036799.xml': ,\n", - " 'hotmail.com!njae.me.uk!1459011600!1459098000.xml': ,\n", - " 'yahoo.com!njae.me.uk!1458864000!1458950399.xml': ,\n", - " 'yahoo.com!njae.me.uk!1459036800!1459123199.xml': }" + "{'google.com!njae.me.uk!1458777600!1458863999.xml': ,\n", + " 'google.com!njae.me.uk!1458950400!1459036799.xml': ,\n", + " 'hotmail.com!njae.me.uk!1459011600!1459098000.xml': ,\n", + " 'yahoo.com!njae.me.uk!1458864000!1458950399.xml': ,\n", + " 'yahoo.com!njae.me.uk!1459036800!1459123199.xml': }" ] }, - "execution_count": 19, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -198,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -211,10 +211,10 @@ " org_name : google.com\n", " email : noreply-dmarc-support@google.com\n", " extra_contact_info : https://support.google.com/a/answer/2466580\n", - " report_id : 2150510829392606201\n", + " report_id : 13032699446207263761\n", " date_range : \n", - " begin : 1458950400\n", - " end : 1459036799\n", + " begin : 1458777600\n", + " end : 1458863999\n", " policy_published : \n", " domain : njae.me.uk\n", " adkim : r\n", @@ -224,29 +224,12 @@ " pct : 100\n", " record : \n", " row : \n", - " source_ip : 82.109.184.9\n", - " count : 1\n", - " policy_evaluated : \n", - " disposition : none\n", - " dkim : fail\n", - " spf : fail\n", - " identifiers : \n", - " header_from : njae.me.uk\n", - " auth_results : \n", - " dkim : \n", - " domain : clublloyds.com\n", - " result : pass\n", - " spf : \n", - " domain : clublloyds.com\n", - " result : pass\n", - " record : \n", - " row : \n", - " source_ip : 212.69.55.62\n", + " source_ip : 146.185.136.235\n", " count : 1\n", " policy_evaluated : \n", " disposition : none\n", " dkim : pass\n", - " spf : pass\n", + " spf : fail\n", " identifiers : \n", " header_from : njae.me.uk\n", " auth_results : \n", @@ -255,7 +238,7 @@ " result : pass\n", " spf : \n", " domain : njae.me.uk\n", - " result : pass\n", + " result : softfail\n", "\n", " report_metadata : \n", " org_name : Yahoo! Inc.\n", @@ -289,22 +272,41 @@ " result : softfail\n", "\n", " report_metadata : \n", - " org_name : Yahoo! Inc.\n", - " email : postmaster@dmarc.yahoo.com\n", - " report_id : 1458957186.548175\n", + " org_name : google.com\n", + " email : noreply-dmarc-support@google.com\n", + " extra_contact_info : https://support.google.com/a/answer/2466580\n", + " report_id : 2150510829392606201\n", " date_range : \n", - " begin : 1458864000\n", - " end : 1458950399\n", + " begin : 1458950400\n", + " end : 1459036799\n", " policy_published : \n", " domain : njae.me.uk\n", " adkim : r\n", " aspf : r\n", " p : none\n", + " sp : none\n", " pct : 100\n", " record : \n", " row : \n", + " source_ip : 82.109.184.9\n", + " count : 1\n", + " policy_evaluated : \n", + " disposition : none\n", + " dkim : fail\n", + " spf : fail\n", + " identifiers : \n", + " header_from : njae.me.uk\n", + " auth_results : \n", + " dkim : \n", + " domain : clublloyds.com\n", + " result : pass\n", + " spf : \n", + " domain : clublloyds.com\n", + " result : pass\n", + " record : \n", + " row : \n", " source_ip : 212.69.55.62\n", - " count : 2\n", + " count : 1\n", " policy_evaluated : \n", " disposition : none\n", " dkim : pass\n", @@ -352,28 +354,26 @@ " result : pass\n", "\n", " report_metadata : \n", - " org_name : google.com\n", - " email : noreply-dmarc-support@google.com\n", - " extra_contact_info : https://support.google.com/a/answer/2466580\n", - " report_id : 13032699446207263761\n", + " org_name : Yahoo! Inc.\n", + " email : postmaster@dmarc.yahoo.com\n", + " report_id : 1458957186.548175\n", " date_range : \n", - " begin : 1458777600\n", - " end : 1458863999\n", + " begin : 1458864000\n", + " end : 1458950399\n", " policy_published : \n", " domain : njae.me.uk\n", " adkim : r\n", " aspf : r\n", " p : none\n", - " sp : none\n", " pct : 100\n", " record : \n", " row : \n", - " source_ip : 146.185.136.235\n", - " count : 1\n", + " source_ip : 212.69.55.62\n", + " count : 2\n", " policy_evaluated : \n", " disposition : none\n", " dkim : pass\n", - " spf : fail\n", + " spf : pass\n", " identifiers : \n", " header_from : njae.me.uk\n", " auth_results : \n", @@ -382,7 +382,7 @@ " result : pass\n", " spf : \n", " domain : njae.me.uk\n", - " result : softfail\n", + " result : pass\n", "\n" ] } @@ -395,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 11, "metadata": { "collapsed": false }, @@ -444,7 +444,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 12, "metadata": { "collapsed": false }, @@ -458,7 +458,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 13, "metadata": { "collapsed": false }, @@ -466,10 +466,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 30, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -481,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 14, "metadata": { "collapsed": false }, @@ -489,10 +489,14 @@ { "data": { "text/plain": [ - "{'email': 'varchar', 'org_name': 'varchar', 'report_id': 'varchar'}" + "{'date_range_begin': 'timestamp',\n", + " 'date_range_end': 'timestamp',\n", + " 'email': 'varchar',\n", + " 'org_name': 'varchar',\n", + " 'report_id': 'varchar'}" ] }, - "execution_count": 31, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -509,7 +513,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 15, "metadata": { "collapsed": false }, @@ -532,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 16, "metadata": { "collapsed": false }, @@ -562,7 +566,7 @@ " 'feedback/report_metadata/report_id': '1459129809.695034'}" ] }, - "execution_count": 66, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -573,7 +577,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 17, "metadata": { "collapsed": false }, @@ -584,7 +588,7 @@ "'1459036800'" ] }, - "execution_count": 63, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -595,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 18, "metadata": { "collapsed": false }, @@ -606,7 +610,7 @@ "'./report_metadata/date_range/begin'" ] }, - "execution_count": 77, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -617,7 +621,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 19, "metadata": { "collapsed": false }, @@ -628,7 +632,7 @@ "'report_metadata_date_range_begin'" ] }, - "execution_count": 82, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -640,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 20, "metadata": { "collapsed": false, "scrolled": true @@ -711,7 +715,7 @@ " 'pg_type': 'varchar'}}" ] }, - "execution_count": 156, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -729,8 +733,8 @@ " 'pg_type': 'varchar'}\n", " for name in xpath_of(root2)\n", " if 'record' in name})\n", - "field_maps['./report_metadata/date_range/begin']['pg_type'] = 'timestamp'\n", - "field_maps['./report_metadata/date_range/end']['pg_type'] = 'timestamp'\n", + "field_maps['./report_metadata/date_range/begin']['pg_type'] = 'timestamptz'\n", + "field_maps['./report_metadata/date_range/end']['pg_type'] = 'timestamptz'\n", "field_maps['./policy_published/pct']['pg_type'] = 'int'\n", "field_maps['./record[{}]/row/count']['pg_type'] = 'int'\n", "field_maps['./record[{}]/row/source_ip']['pg_type'] = 'inet'\n", @@ -1287,8 +1291,9 @@ " field_names += [field_maps[f]['pg_field_name']]\n", " if field_maps[f]['pg_type'] == 'int':\n", " values[field_maps[f]['pg_field_name']] = int(rep.find(f).text)\n", - " elif field_maps[f]['pg_type'] == 'timestamp':\n", - " values[field_maps[f]['pg_field_name']] = datetime.datetime.utcfromtimestamp(int(rep.find(f).text))\n", + " elif field_maps[f]['pg_type'] == 'timestamptz':\n", + " values[field_maps[f]['pg_field_name']] = datetime.datetime.fromtimestamp(int(rep.find(f).text), \n", + " tz=datetime.timezone.utc)\n", " elif field_maps[f]['pg_type'] == 'inet':\n", " values[field_maps[f]['pg_field_name']] = maybe_strip(rep.find(f).text)\n", " else:\n",