{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import configparser\n", "import imaplib\n", "import email\n", "import io\n", "import zipfile\n", "import xml.etree.ElementTree\n", "import psycopg2\n", "import re\n", "import datetime" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def fetch_msg(num):\n", " return mailbox.fetch(num, '(RFC822)')[1][0][1]" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def xml_of_part(part):\n", " with zipfile.ZipFile(io.BytesIO(part.get_payload(decode=True))) as zf:\n", " fn = zf.infolist()[0].filename\n", " contents = zf.read(fn).decode('utf-8')\n", " return xml.etree.ElementTree.fromstring(contents)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def xml_of(message):\n", " reports = []\n", " if message.is_multipart():\n", " for p in message.get_payload():\n", " if 'zip' in p.get_content_type():\n", " reports += [xml_of_part(p)]\n", " else:\n", " reports = [xml_of_part(message)]\n", " return reports" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def extract_report(msg):\n", " pmsg = email.message_from_bytes(msg)\n", " return xml_of(pmsg)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['dmarc.ini']" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "config = configparser.ConfigParser()\n", "config.read('dmarc.ini')" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "('OK', [b'174'])" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mailbox = imaplib.IMAP4(host=config['imap']['server'], \n", " port=config['imap']['port'])\n", "mailbox.starttls()\n", "mailbox.login(config['imap']['username'], config['imap']['password'])\n", "mailbox.select('INBOX', readonly=True)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "('OK',\n", " [b'1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174'])" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resp, nums = mailbox.search(None, \"ALL\")\n", "resp, nums" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b'1'\n", "b'1' []\n", "b'2'\n", "b'2' []\n", "b'3'\n", "b'3' []\n", "b'4'\n", "b'4' []\n", "b'5'\n", "b'5' []\n", "b'6'\n", "b'6' []\n", "b'7'\n", "b'7' []\n", "b'8'\n", "b'8' []\n", "b'9'\n", "b'9' []\n", "b'10'\n", "b'10' []\n", "b'11'\n", "b'11' []\n", "b'12'\n", "b'12' []\n", "b'13'\n", "b'13' []\n", "b'14'\n", "b'14' []\n", "b'15'\n", "b'15' []\n", "b'16'\n", "b'16' []\n", "b'17'\n", "b'17' []\n", "b'18'\n", "b'18' []\n", "b'19'\n", "b'19' []\n", "b'20'\n", "b'20' []\n", "b'21'\n", "b'21' []\n", "b'22'\n", "b'22' []\n", "b'23'\n", "b'23' []\n", "b'24'\n", "b'24' []\n", "b'25'\n", "b'25' []\n", "b'26'\n", "b'26' []\n", "b'27'\n", "b'27' []\n", "b'28'\n", "b'28' []\n", "b'29'\n", "b'29' []\n", "b'30'\n", "b'30' []\n", "b'31'\n", "b'31' []\n", "b'32'\n", "b'32' []\n", "b'33'\n", "b'33' []\n", "b'34'\n", "b'34' []\n", "b'35'\n", "b'35' []\n", "b'36'\n", "b'36' []\n", "b'37'\n", "b'37' []\n", "b'38'\n", "b'38' []\n", "b'39'\n", "b'39' []\n", "b'40'\n", "b'40' []\n", "b'41'\n", "b'41' []\n", "b'42'\n", "b'42' []\n", "b'43'\n", "b'43' []\n", "b'44'\n", "b'44' []\n", "b'45'\n", "b'45' []\n", "b'46'\n", "b'46' []\n", "b'47'\n", "b'47' []\n", "b'48'\n", "b'48' []\n", "b'49'\n", "b'49' []\n", "b'50'\n", "b'50' []\n", "b'51'\n", "b'51' []\n", "b'52'\n", "b'52' []\n", "b'53'\n", "b'53' []\n", "b'54'\n", "b'54' []\n", "b'55'\n", "b'55' []\n", "b'56'\n", "b'56' []\n", "b'57'\n", "b'57' []\n", "b'58'\n", "b'58' []\n", "b'59'\n", "b'59' []\n", "b'60'\n", "b'60' []\n", "b'61'\n", "b'61' []\n", "b'62'\n", "b'62' []\n", "b'63'\n", "b'63' []\n", "b'64'\n", "b'64' []\n", "b'65'\n", "b'65' []\n", "b'66'\n", "b'66' []\n", "b'67'\n", "b'67' []\n", "b'68'\n", "b'68' []\n", "b'69'\n", "b'69' []\n", "b'70'\n", "b'70' []\n", "b'71'\n", "b'71' []\n", "b'72'\n", "b'72' []\n", "b'73'\n", "b'73' []\n", "b'74'\n", "b'74' []\n", "b'75'\n", "b'75' []\n", "b'76'\n", "b'76' []\n", "b'77'\n", "b'77' []\n", "b'78'\n", "b'78' []\n", "b'79'\n", "b'79' []\n", "b'80'\n", "b'80' []\n", "b'81'\n", "b'81' []\n", "b'82'\n", "b'82' []\n", "b'83'\n", "b'83' []\n", "b'84'\n", "b'84' []\n", "b'85'\n", "b'85' []\n", "b'86'\n", "b'86' []\n", "b'87'\n", "b'87' []\n", "b'88'\n", "b'88' []\n", "b'89'\n", "b'89' []\n", "b'90'\n", "b'90' []\n", "b'91'\n", "b'91' []\n", "b'92'\n", "b'92' []\n", "b'93'\n", "b'93' []\n", "b'94'\n", "b'94' []\n", "b'95'\n", "b'95' []\n", "b'96'\n", "b'96' []\n", "b'97'\n", "b'97' []\n", "b'98'\n", "b'98' []\n", "b'99'\n", "b'99' []\n", "b'100'\n", "b'100' []\n", "b'101'\n", "b'101' []\n", "b'102'\n", "b'102' []\n", "b'103'\n", "b'103' []\n", "b'104'\n", "b'104' []\n", "b'105'\n", "b'105' []\n", "b'106'\n", "b'106' []\n", "b'107'\n", "b'107' []\n", "b'108'\n", "b'108' []\n", "b'109'\n", "b'109' []\n", "b'110'\n", "b'110' []\n", "b'111'\n", "b'111' []\n", "b'112'\n", "b'112' []\n", "b'113'\n", "b'113' []\n", "b'114'\n", "b'114' []\n", "b'115'\n", "b'115' []\n", "b'116'\n", "b'116' []\n", "b'117'\n", "b'117' []\n", "b'118'\n", "b'118' []\n", "b'119'\n", "b'119' []\n", "b'120'\n", "b'120' []\n", "b'121'\n", "b'121' []\n", "b'122'\n", "b'122' []\n", "b'123'\n", "b'123' []\n", "b'124'\n", "b'124' []\n", "b'125'\n", "b'125' []\n", "b'126'\n", "b'126' []\n", "b'127'\n", "b'127' []\n", "b'128'\n", "b'128' []\n", "b'129'\n", "b'129' []\n", "b'130'\n", "b'130' []\n", "b'131'\n", "b'131' []\n", "b'132'\n", "b'132' []\n", "b'133'\n", "b'133' []\n", "b'134'\n", "b'134' []\n", "b'135'\n", "b'135' []\n", "b'136'\n", "b'136' []\n", "b'137'\n", "b'137' []\n", "b'138'\n", "b'138' []\n", "b'139'\n", "b'139' []\n", "b'140'\n", "b'140' []\n", "b'141'\n", "b'141' []\n", "b'142'\n", "b'142' []\n", "b'143'\n", "b'143' []\n", "b'144'\n", "b'144' []\n", "b'145'\n", "b'145' []\n", "b'146'\n", "b'146' []\n", "b'147'\n", "b'147' []\n", "b'148'\n", "b'148' []\n", "b'149'\n", "b'149' []\n", "b'150'\n", "b'150' []\n", "b'151'\n", "b'151' []\n", "b'152'\n", "b'152' []\n", "b'153'\n", "b'153' []\n", "b'154'\n", "b'154' []\n", "b'155'\n", "b'155' []\n", "b'156'\n", "b'156' []\n", "b'157'\n", "b'157' []\n", "b'158'\n", "b'158' []\n", "b'159'\n", "b'159' []\n", "b'160'\n", "b'160' []\n", "b'161'\n", "b'161' []\n", "b'162'\n", "b'162' []\n", "b'163'\n", "b'163' []\n", "b'164'\n", "b'164' []\n", "b'165'\n", "b'165' []\n", "b'166'\n", "b'166' []\n", "b'167'\n", "b'167' []\n", "b'168'\n", "b'168' []\n", "b'169'\n", "b'169' []\n", "b'170'\n", "b'170' []\n", "b'171'\n", "b'171' []\n", "b'172'\n", "b'172' []\n", "b'173'\n", "b'173' []\n", "b'174'\n", "b'174' []\n" ] } ], "source": [ "for n in nums[0].split():\n", " print(n)\n", " reports = extract_report(fetch_msg(n))\n", " print(n, reports)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dmarc_reports = [report for report_set in [extract_report(fetch_msg(n)) for n in nums[0].split()]\n", " for report in report_set]\n", "dmarc_reports" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "('BYE', [b'Logging out'])" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mailbox.close()\n", "mailbox.logout()" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": true }, "outputs": [], "source": [ "field_maps = {'./policy_published/adkim': {'pg_field_name': 'policy_published_adkim',\n", " 'pg_table': 'reports',\n", " 'pg_type': 'varchar'},\n", " './policy_published/aspf': {'pg_field_name': 'policy_published_aspf',\n", " 'pg_table': 'reports',\n", " 'pg_type': 'varchar'},\n", " './policy_published/domain': {'pg_field_name': 'policy_published_domain',\n", " 'pg_table': 'reports',\n", " 'pg_type': 'varchar'},\n", " './policy_published/p': {'pg_field_name': 'policy_published_p',\n", " 'pg_table': 'reports',\n", " 'pg_type': 'varchar'},\n", " './policy_published/pct': {'pg_field_name': 'policy_published_pct',\n", " 'pg_table': 'reports',\n", " 'pg_type': 'int'},\n", " './record[{}]/auth_results/dkim/domain': {'pg_field_name': 'auth_results_dkim_domain',\n", " 'pg_table': 'report_items',\n", " 'pg_type': 'varchar'},\n", " './record[{}]/auth_results/dkim/result': {'pg_field_name': 'auth_results_dkim_result',\n", " 'pg_table': 'report_items',\n", " 'pg_type': 'varchar'},\n", " './record[{}]/auth_results/spf/domain': {'pg_field_name': 'auth_results_spf_domain',\n", " 'pg_table': 'report_items',\n", " 'pg_type': 'varchar'},\n", " './record[{}]/auth_results/spf/result': {'pg_field_name': 'auth_results_spf_result',\n", " 'pg_table': 'report_items',\n", " 'pg_type': 'varchar'},\n", " './record[{}]/identifiers/header_from': {'pg_field_name': 'identifiers_header_from',\n", " 'pg_table': 'report_items',\n", " 'pg_type': 'varchar'},\n", " './record[{}]/row/count': {'pg_field_name': 'count',\n", " 'pg_table': 'report_items',\n", " 'pg_type': 'int'},\n", " './record[{}]/row/policy_evaluated/disposition': {'pg_field_name': 'policy_evaluated_disposition',\n", " 'pg_table': 'report_items',\n", " 'pg_type': 'varchar'},\n", " './record[{}]/row/policy_evaluated/dkim': {'pg_field_name': 'policy_evaluated_dkim',\n", " 'pg_table': 'report_items',\n", " 'pg_type': 'varchar'},\n", " './record[{}]/row/policy_evaluated/spf': {'pg_field_name': 'policy_evaluated_spf',\n", " 'pg_table': 'report_items',\n", " 'pg_type': 'varchar'},\n", " './record[{}]/row/source_ip': {'pg_field_name': 'source_ip',\n", " 'pg_table': 'report_items',\n", " 'pg_type': 'inet'},\n", " './report_metadata/date_range/begin': {'pg_field_name': 'report_metadata_date_range_begin',\n", " 'pg_table': 'reports',\n", " 'pg_type': 'timestamp'},\n", " './report_metadata/date_range/end': {'pg_field_name': 'report_metadata_date_range_end',\n", " 'pg_table': 'reports',\n", " 'pg_type': 'timestamp'},\n", " './report_metadata/email': {'pg_field_name': 'report_metadata_email',\n", " 'pg_table': 'reports',\n", " 'pg_type': 'varchar'},\n", " './report_metadata/org_name': {'pg_field_name': 'report_metadata_org_name',\n", " 'pg_table': 'reports',\n", " 'pg_type': 'varchar'},\n", " './report_metadata/report_id': {'pg_field_name': 'report_metadata_report_id',\n", " 'pg_table': 'reports',\n", " 'pg_type': 'varchar'}}" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def maybe_strip(text):\n", " if text:\n", " return text.strip()\n", " else:\n", " return ''" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def write_report(connection, cursor, report):\n", " \n", " field_names = []\n", " values = {}\n", " for f in [f for f in field_maps if field_maps[f]['pg_table'] == 'reports']:\n", " field_names += [field_maps[f]['pg_field_name']]\n", " if field_maps[f]['pg_type'] == 'int':\n", " values[field_maps[f]['pg_field_name']] = int(report.find(f).text)\n", " elif field_maps[f]['pg_type'] == 'timestamp':\n", " values[field_maps[f]['pg_field_name']] = datetime.datetime.utcfromtimestamp(int(report.find(f).text))\n", " elif field_maps[f]['pg_type'] == 'inet':\n", " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f).text)\n", " else:\n", " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f).text)\n", " insert_string = 'insert into reports (' + ', '.join(field_names) + ') '\n", " insert_string += 'values (' + ', '.join('%({})s'.format(f) for f in field_names) + ');'\n", " cursor.execute(insert_string, values)\n", " \n", " for i in range(1, len(report.findall('./record'))+1):\n", " field_names = []\n", " cursor.execute('select id, report_metadata_report_id from reports where report_metadata_report_id = %s;', \n", " [report.find('./report_metadata/report_id').text])\n", " results = cursor.fetchall()\n", " if len(results) != 1:\n", " raise\n", " else:\n", " report_id = results[0][0]\n", " values = {'report_id': report_id}\n", " for f in [f for f in field_maps if field_maps[f]['pg_table'] == 'report_items']:\n", " field_names += [field_maps[f]['pg_field_name']]\n", " if field_maps[f]['pg_type'] == 'int':\n", " values[field_maps[f]['pg_field_name']] = int(report.find(f.format(i)).text)\n", " elif field_maps[f]['pg_type'] == 'timestamp':\n", " values[field_maps[f]['pg_field_name']] = datetime.datetime.utcfromtimestamp(int(report.find(f.format(i)).text))\n", " elif field_maps[f]['pg_type'] == 'inet':\n", " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f.format(i)).text)\n", " else:\n", " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f.format(i)).text)\n", " insert_string = 'insert into report_items (report_id, ' + ', '.join(field_names) + ') '\n", " insert_string += 'values (%(report_id)s, ' + ', '.join('%({})s'.format(f) for f in field_names) + ');'\n", " cursor.execute(insert_string, values)\n", " connection.commit()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": true }, "outputs": [], "source": [ "conn = psycopg2.connect(host=config['database']['server'],\n", " database=config['database']['database'], \n", " user=config['database']['username'], \n", " password=config['database']['password'])\n", "with conn.cursor() as cur:\n", " for report in dmarc_reports:\n", " write_report(conn, cur, report)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": true }, "outputs": [], "source": [ "conn.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3+" } }, "nbformat": 4, "nbformat_minor": 0 }