Fixed error handling
[dmarc.git] / write-to-database.ipynb
index 96884afeb6272390af65c98a3a143d2e2be5c972..331d569fa7000155053a444936b8df04a42c8406 100644 (file)
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 92,
+   "execution_count": 1,
    "metadata": {
     "collapsed": true
    },
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {
     "collapsed": false
    },
@@ -51,7 +51,7 @@
        "['imap', 'database']"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -62,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {
     "collapsed": false
    },
@@ -73,7 +73,7 @@
        "['server', 'username', 'port', 'password']"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -84,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "metadata": {
     "collapsed": false
    },
@@ -95,7 +95,7 @@
        "['server', 'database', 'username', 'password']"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 6,
    "metadata": {
     "collapsed": true
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 7,
    "metadata": {
     "collapsed": false
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 8,
    "metadata": {
     "collapsed": false
    },
        " 'yahoo.com!njae.me.uk!1459036800!1459123199.xml']"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 9,
    "metadata": {
     "collapsed": false
    },
     {
      "data": {
       "text/plain": [
-       "{'google.com!njae.me.uk!1458777600!1458863999.xml': <Element 'feedback' at 0x7fa9a8169a98>,\n",
-       " 'google.com!njae.me.uk!1458950400!1459036799.xml': <Element 'feedback' at 0x7fa9a8170a48>,\n",
-       " 'hotmail.com!njae.me.uk!1459011600!1459098000.xml': <Element 'feedback' at 0x7fa9a80f4a48>,\n",
-       " 'yahoo.com!njae.me.uk!1458864000!1458950399.xml': <Element 'feedback' at 0x7fa9a80f74a8>,\n",
-       " 'yahoo.com!njae.me.uk!1459036800!1459123199.xml': <Element 'feedback' at 0x7fa9a80f7ea8>}"
+       "{'google.com!njae.me.uk!1458777600!1458863999.xml': <Element 'feedback' at 0x7f1fd40b4318>,\n",
+       " 'google.com!njae.me.uk!1458950400!1459036799.xml': <Element 'feedback' at 0x7f1fd40b4db8>,\n",
+       " 'hotmail.com!njae.me.uk!1459011600!1459098000.xml': <Element 'feedback' at 0x7f1fd40bbdb8>,\n",
+       " 'yahoo.com!njae.me.uk!1458864000!1458950399.xml': <Element 'feedback' at 0x7f1fd40bf818>,\n",
+       " 'yahoo.com!njae.me.uk!1459036800!1459123199.xml': <Element 'feedback' at 0x7f1fd4043278>}"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 10,
    "metadata": {
     "collapsed": false
    },
       "   org_name : google.com\n",
       "   email : noreply-dmarc-support@google.com\n",
       "   extra_contact_info : https://support.google.com/a/answer/2466580\n",
-      "   report_id : 2150510829392606201\n",
+      "   report_id : 13032699446207263761\n",
       "   date_range : \n",
-      "     begin : 1458950400\n",
-      "     end : 1459036799\n",
+      "     begin : 1458777600\n",
+      "     end : 1458863999\n",
       " policy_published : \n",
       "   domain : njae.me.uk\n",
       "   adkim : r\n",
       "   pct : 100\n",
       " record : \n",
       "   row : \n",
-      "     source_ip : 82.109.184.9\n",
-      "     count : 1\n",
-      "     policy_evaluated : \n",
-      "       disposition : none\n",
-      "       dkim : fail\n",
-      "       spf : fail\n",
-      "   identifiers : \n",
-      "     header_from : njae.me.uk\n",
-      "   auth_results : \n",
-      "     dkim : \n",
-      "       domain : clublloyds.com\n",
-      "       result : pass\n",
-      "     spf : \n",
-      "       domain : clublloyds.com\n",
-      "       result : pass\n",
-      " record : \n",
-      "   row : \n",
-      "     source_ip : 212.69.55.62\n",
+      "     source_ip : 146.185.136.235\n",
       "     count : 1\n",
       "     policy_evaluated : \n",
       "       disposition : none\n",
       "       dkim : pass\n",
-      "       spf : pass\n",
+      "       spf : fail\n",
       "   identifiers : \n",
       "     header_from : njae.me.uk\n",
       "   auth_results : \n",
       "       result : pass\n",
       "     spf : \n",
       "       domain : njae.me.uk\n",
-      "       result : pass\n",
+      "       result : softfail\n",
       "\n",
       " report_metadata : \n",
       "   org_name : Yahoo! Inc.\n",
       "       result : softfail\n",
       "\n",
       " report_metadata : \n",
-      "   org_name : Yahoo! Inc.\n",
-      "   email : postmaster@dmarc.yahoo.com\n",
-      "   report_id : 1458957186.548175\n",
+      "   org_name : google.com\n",
+      "   email : noreply-dmarc-support@google.com\n",
+      "   extra_contact_info : https://support.google.com/a/answer/2466580\n",
+      "   report_id : 2150510829392606201\n",
       "   date_range : \n",
-      "     begin : 1458864000\n",
-      "     end : 1458950399\n",
+      "     begin : 1458950400\n",
+      "     end : 1459036799\n",
       " policy_published : \n",
       "   domain : njae.me.uk\n",
       "   adkim : r\n",
       "   aspf : r\n",
       "   p : none\n",
+      "   sp : none\n",
       "   pct : 100\n",
       " record : \n",
       "   row : \n",
+      "     source_ip : 82.109.184.9\n",
+      "     count : 1\n",
+      "     policy_evaluated : \n",
+      "       disposition : none\n",
+      "       dkim : fail\n",
+      "       spf : fail\n",
+      "   identifiers : \n",
+      "     header_from : njae.me.uk\n",
+      "   auth_results : \n",
+      "     dkim : \n",
+      "       domain : clublloyds.com\n",
+      "       result : pass\n",
+      "     spf : \n",
+      "       domain : clublloyds.com\n",
+      "       result : pass\n",
+      " record : \n",
+      "   row : \n",
       "     source_ip : 212.69.55.62\n",
-      "     count : 2\n",
+      "     count : 1\n",
       "     policy_evaluated : \n",
       "       disposition : none\n",
       "       dkim : pass\n",
       "       result : pass\n",
       "\n",
       " report_metadata : \n",
-      "   org_name : google.com\n",
-      "   email : noreply-dmarc-support@google.com\n",
-      "   extra_contact_info : https://support.google.com/a/answer/2466580\n",
-      "   report_id : 13032699446207263761\n",
+      "   org_name : Yahoo! Inc.\n",
+      "   email : postmaster@dmarc.yahoo.com\n",
+      "   report_id : 1458957186.548175\n",
       "   date_range : \n",
-      "     begin : 1458777600\n",
-      "     end : 1458863999\n",
+      "     begin : 1458864000\n",
+      "     end : 1458950399\n",
       " policy_published : \n",
       "   domain : njae.me.uk\n",
       "   adkim : r\n",
       "   aspf : r\n",
       "   p : none\n",
-      "   sp : none\n",
       "   pct : 100\n",
       " record : \n",
       "   row : \n",
-      "     source_ip : 146.185.136.235\n",
-      "     count : 1\n",
+      "     source_ip : 212.69.55.62\n",
+      "     count : 2\n",
       "     policy_evaluated : \n",
       "       disposition : none\n",
       "       dkim : pass\n",
-      "       spf : fail\n",
+      "       spf : pass\n",
       "   identifiers : \n",
       "     header_from : njae.me.uk\n",
       "   auth_results : \n",
       "       result : pass\n",
       "     spf : \n",
       "       domain : njae.me.uk\n",
-      "       result : softfail\n",
+      "       result : pass\n",
       "\n"
      ]
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 11,
    "metadata": {
     "collapsed": false
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 12,
    "metadata": {
     "collapsed": false
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 13,
    "metadata": {
     "collapsed": false
    },
     {
      "data": {
       "text/plain": [
-       "<cursor object at 0x7fa9a819b7c8; closed: 0>"
+       "<cursor object at 0x7f1fdd2e25e8; closed: 0>"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 14,
    "metadata": {
     "collapsed": false
    },
     {
      "data": {
       "text/plain": [
-       "{'email': 'varchar', 'org_name': 'varchar', 'report_id': 'varchar'}"
+       "{'date_range_begin': 'timestamp',\n",
+       " 'date_range_end': 'timestamp',\n",
+       " 'email': 'varchar',\n",
+       " 'org_name': 'varchar',\n",
+       " 'report_id': 'varchar'}"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 15,
    "metadata": {
     "collapsed": false
    },
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 16,
    "metadata": {
     "collapsed": false
    },
        " 'feedback/report_metadata/report_id': '1459129809.695034'}"
       ]
      },
-     "execution_count": 66,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 17,
    "metadata": {
     "collapsed": false
    },
        "'1459036800'"
       ]
      },
-     "execution_count": 63,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
+   "execution_count": 18,
    "metadata": {
     "collapsed": false
    },
        "'./report_metadata/date_range/begin'"
       ]
      },
-     "execution_count": 77,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 19,
    "metadata": {
     "collapsed": false
    },
        "'report_metadata_date_range_begin'"
       ]
      },
-     "execution_count": 82,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
-   "execution_count": 156,
+   "execution_count": 20,
    "metadata": {
     "collapsed": false,
     "scrolled": true
        "  'pg_type': 'varchar'}}"
       ]
      },
-     "execution_count": 156,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
     "              'pg_type': 'varchar'}\n",
     "             for name in xpath_of(root2)\n",
     "             if 'record' in name})\n",
-    "field_maps['./report_metadata/date_range/begin']['pg_type'] = 'timestamp'\n",
-    "field_maps['./report_metadata/date_range/end']['pg_type'] = 'timestamp'\n",
+    "field_maps['./report_metadata/date_range/begin']['pg_type'] = 'timestamptz'\n",
+    "field_maps['./report_metadata/date_range/end']['pg_type'] = 'timestamptz'\n",
     "field_maps['./policy_published/pct']['pg_type'] = 'int'\n",
     "field_maps['./record[{}]/row/count']['pg_type'] = 'int'\n",
     "field_maps['./record[{}]/row/source_ip']['pg_type'] = 'inet'\n",
     "    field_names += [field_maps[f]['pg_field_name']]\n",
     "    if field_maps[f]['pg_type'] == 'int':\n",
     "        values[field_maps[f]['pg_field_name']] = int(rep.find(f).text)\n",
-    "    elif field_maps[f]['pg_type'] == 'timestamp':\n",
-    "        values[field_maps[f]['pg_field_name']] = datetime.datetime.utcfromtimestamp(int(rep.find(f).text))\n",
+    "    elif field_maps[f]['pg_type'] == 'timestamptz':\n",
+    "        values[field_maps[f]['pg_field_name']] = datetime.datetime.fromtimestamp(int(rep.find(f).text), \n",
+    "                                                                                 tz=datetime.timezone.utc)\n",
     "    elif field_maps[f]['pg_type'] == 'inet':\n",
     "        values[field_maps[f]['pg_field_name']] = maybe_strip(rep.find(f).text)\n",
     "    else:\n",