From ffcd7c45aeb60112bacfb559db75295ebbccf515 Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Sat, 21 Jan 2017 18:19:35 +0000 Subject: [PATCH] Handling invalid zip files --- complete.ipynb | 369 +++++++++++++++++++------------------------ dmarc_to_database.py | 14 +- 2 files changed, 168 insertions(+), 215 deletions(-) diff --git a/complete.ipynb b/complete.ipynb index 3663338..ebd8329 100644 --- a/complete.ipynb +++ b/complete.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 31, + "execution_count": 1, "metadata": { "collapsed": true }, @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -35,20 +35,23 @@ "cell_type": "code", "execution_count": 33, "metadata": { - "collapsed": true + "collapsed": false }, "outputs": [], "source": [ "def xml_of_part(part):\n", - " with zipfile.ZipFile(io.BytesIO(part.get_payload(decode=True))) as zf:\n", + " try:\n", + " with zipfile.ZipFile(io.BytesIO(part.get_payload(decode=True))) as zf:\n", " fn = zf.infolist()[0].filename\n", " contents = zf.read(fn).decode('utf-8')\n", - " return xml.etree.ElementTree.fromstring(contents)" + " return xml.etree.ElementTree.fromstring(contents)\n", + " except zipfile.BadZipFile:\n", + " return None" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 28, "metadata": { "collapsed": true }, @@ -59,7 +62,9 @@ " if message.is_multipart():\n", " for p in message.get_payload():\n", " if 'zip' in p.get_content_type():\n", - " reports += [xml_of_part(p)]\n", + " report = xml_of_part(p)\n", + " if report:\n", + " reports += [report]\n", " else:\n", " reports = [xml_of_part(message)]\n", " return reports" @@ -67,7 +72,32 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 4]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xs = [1, 2, 3]\n", + "x = 4\n", + "if x: xs += [x]\n", + "xs" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": { "collapsed": true }, @@ -80,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 6, "metadata": { "collapsed": true }, @@ -95,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 7, "metadata": { "collapsed": true }, @@ -165,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 8, "metadata": { "collapsed": true }, @@ -209,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 9, "metadata": { "collapsed": true }, @@ -238,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -249,7 +279,7 @@ "['dmarc.ini']" ] }, - "execution_count": 40, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -261,7 +291,29 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 17, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'ogedei.njae.me.uk'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config['database']['server']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "metadata": { "collapsed": false }, @@ -276,11 +328,22 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 18, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.datetime(2017, 1, 13, 23, 59, 59, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None))" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cur.execute('select max(report_metadata_date_range_end) from reports')\n", "results = cur.fetchall()\n", @@ -290,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 19, "metadata": { "collapsed": false }, @@ -298,10 +361,10 @@ { "data": { "text/plain": [ - "('OK', [b'182'])" + "('OK', [b'541'])" ] }, - "execution_count": 43, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -316,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 30, "metadata": { "collapsed": false }, @@ -324,12 +387,12 @@ { "data": { "text/plain": [ - "('ALL',\n", + "('SINCE 11-Jan-2017',\n", " 'OK',\n", - " [b'2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183'])" + " [b'519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542'])" ] }, - "execution_count": 44, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -345,7 +408,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 36, "metadata": { "collapsed": false }, @@ -353,198 +416,84 @@ { "data": { "text/plain": [ - "['1448246712.259522',\n", - " '75eef2128eb84e9ca8e4837f3d4e31bd@hotmail.com',\n", - " '9138782308848375092',\n", - " '840fc02d5cb847ec9f007fb236c0c190@hotmail.com',\n", - " '15111277194568576101',\n", - " '1448419746.206772',\n", - " '1448592471.754446',\n", - " 'c8e4fe772ae3434594e8396a8abf77b1@hotmail.com',\n", - " '2273327349069127175',\n", - " '1448851661.602960',\n", - " '1448938177.97045',\n", - " '18203685646345145151',\n", - " '12700770160536483846',\n", - " '1449024342.601561',\n", - " '1fadaaf3d9d544568a41042a1f42df05@hotmail.com',\n", - " '3544432548848738700',\n", - " '1449110875.443288',\n", - " '1449197294.177690',\n", - " '10356602625290246518',\n", - " '1449283514.868026',\n", - " '4654132158287507198',\n", - " '5164714352250096219',\n", - " '1449542804.259788',\n", - " '20246ec4a70041caa276560032fdc595@hotmail.com',\n", - " '14089985608998676943',\n", - " '1449629119.120001',\n", - " 'c8f339f8f3364d78820dd03a84d11daa@hotmail.com',\n", - " '1233953228184721030',\n", - " '17984785070730076605',\n", - " '1449888489.668429',\n", - " '121bb39637c14847a40669dd3e7cdf49@hotmail.com',\n", - " '14661842628106423589',\n", - " '1449974899.615312',\n", - " '7834597727856283739',\n", - " 'f94f00dee713479fb8bfdbcad539da88@hotmail.com',\n", - " '17659080448126516391',\n", - " '1450234735.690828',\n", - " '17880573400612806781',\n", - " '1451184218.12383',\n", - " '6505353761044749597',\n", - " '02aca4c6bb144bb6b54773374c14a43a@hotmail.com',\n", - " '1451443783.692950',\n", - " '12137203242660163862',\n", - " 'd1019291012e4f4589f59f1dacef31a3@hotmail.com',\n", - " '1451616408.476341',\n", - " '11743474449139138027',\n", - " 'e245ef81f05b4697bea34bed977d8286@hotmail.com',\n", - " '1452134765.308922',\n", - " '271c80b9234a4c99bb9608abc5470c99@hotmail.com',\n", - " '15974729567081493290',\n", - " 'ba1e9c35eec5428b8c3d6a32d25daa46@hotmail.com',\n", - " '1452393953.83099',\n", - " 'd71792d8afc246e9a4756bd13e600b59@hotmail.com',\n", - " '1452480188.666644',\n", - " '1452566814.805501',\n", - " 'fc0750780e0d4b1395c4c9f41cb9791f@hotmail.com',\n", - " '1452653275.294149',\n", - " '1452741832.342075',\n", - " '2884439811958006165',\n", - " '3508955360705645678',\n", - " '1452826445.971145',\n", - " '1452912512.114840',\n", - " 'eda16c25bbdb48b2bae0d18eab13f736@hotmail.com',\n", - " '724976339745122723',\n", - " '1453084984.369330',\n", - " '1453171440.157305',\n", - " '1862812391966551302',\n", - " '1453258067.440890',\n", - " '1453344550.144698',\n", - " '2002148329698541727',\n", - " '6110291457768008366',\n", - " 'de651cefbc0749a3aa3d91cdc5084f5a@hotmail.com',\n", - " '1453517236.399019',\n", - " '14593873841710243963',\n", - " 'ba64911adb0743d0b926588647ec5dcf@hotmail.com',\n", - " '1453689981.768611',\n", - " '13959504937502303973',\n", - " '3eba8b9e31724809aaf8d517e5656c4b@hotmail.com',\n", - " '1453776417.898739',\n", - " '9594298247075547297',\n", - " 'd77c5b90344149f5bb7b50ff69400aeb@hotmail.com',\n", - " '1453862958.482897',\n", - " '3267388508667988463',\n", - " '580ca0a74948493ba8b7c6e62b8c81d9@hotmail.com',\n", - " '6876229717666715946',\n", - " '6474240574237085621',\n", - " '1454381343.228960',\n", - " '77b6c2aa32bf440aa240195db229cd4a@hotmail.com',\n", - " '1922265692629464594',\n", - " '1454467952.258012',\n", - " '1454640482.903992',\n", - " '16409503372348966326',\n", - " '456ffff78456494fbaed47b465259d46@hotmail.com',\n", - " '543258397004515473',\n", - " '4158049796460953654',\n", - " 'a6b6aec1af1647eb8c6fd83baaed6bbe@hotmail.com',\n", - " '1454986282.636073',\n", - " '8986675713054039362',\n", - " '30d5ef78d6d746a4a777c56e7ca6f1c6@hotmail.com',\n", - " '1455072659.49250',\n", - " '10744929815548178249',\n", - " '1178c4fe34364ff39cb6e24a51219093@hotmail.com',\n", - " '730219275619457',\n", - " '164180405175632647',\n", - " '16520810497197721925',\n", - " '13f28e952d67451281b90d55eee9c7c3@hotmail.com',\n", - " '1455418081.446180',\n", - " '641b4cc500334a7eb3f649b21af003ca@hotmail.com',\n", - " '14416654486328871570',\n", - " 'dbe2f49dbea947428b179e0ecec5f813@hotmail.com',\n", - " '1455679107.469767',\n", - " '7815164892280952980',\n", - " '511d6fbb8484454b916ad8d799938390@hotmail.com',\n", - " '1455763965.140751',\n", - " '10491829640821461358',\n", - " '13702792868066092542',\n", - " '1456109267.60200',\n", - " '2cfe3d0ec75b43f89a6a7ae57777e25c@hotmail.com',\n", - " '9037150256193522538',\n", - " 'e327753089e04950906848d104a93695@hotmail.com',\n", - " '1456282325.455667',\n", - " '3584764172722215181',\n", - " '1456455410.661669',\n", - " '16824795799639390032',\n", - " '14217880258295310962',\n", - " '1456541727.873510',\n", - " '1102163566627474204',\n", - " '1456713880.905762',\n", - " '1456800606.884839',\n", - " '1bcc49be66744cc7a70ae1cc53515bbf@hotmail.com',\n", - " '11019573554877343650',\n", - " '1456886761.643377',\n", - " '36dd4e2b3f424bb6919d2d5d4c77c8dd@hotmail.com',\n", - " 'b6d15133598b4e00b21c8e2b7cbec05e@hotmail.com',\n", - " '1457059537.625770',\n", - " '85136d21c514431e9f5cf9d36faa4a22@hotmail.com',\n", - " '1457146943.63077',\n", - " '1457232742.472142',\n", - " 'ea3a96df66a845c5a666068ef8ca1368@hotmail.com',\n", - " '17044038014166863505',\n", - " '1457318880.887129',\n", - " '3689737760348689620',\n", - " '726a3261dfab4b4590b5fc898c561b08@hotmail.com',\n", - " '129a9dc3ff2e40ae8a3890ecc1c291d9@hotmail.com',\n", - " '1457491692.793016',\n", - " '1207168150882246442',\n", - " '10706879170593875813',\n", - " '4226319448276355598',\n", - " '1457750956.253773',\n", - " '12434676377746978866',\n", - " '43e7b5e2e0fb400aa914058360c8aaab@hotmail.com',\n", - " '1457919850.989656',\n", - " '594749737641304465',\n", - " '18c9a966da81401e80ca2a22310ffee6@hotmail.com',\n", - " '1458006833.731671',\n", - " '6996084840178215735',\n", - " '1458093082.193350',\n", - " '3188090359734380692',\n", - " '7006513620611789399',\n", - " '1458353823.362308',\n", - " '17071564483265101388',\n", - " '1458524834.32161',\n", - " '1458611429.595569',\n", - " '14662396456930987863',\n", - " '1458784545.303847',\n", - " '13032699446207263761',\n", - " '8426152571808739514',\n", - " '1458957186.548175',\n", - " '2150510829392606201',\n", - " '68aad5080a774e2c997d159b546569b9@hotmail.com',\n", - " '1459129809.695034',\n", - " '16143280651570354241',\n", - " '8c177254c3cb41869dc3afab59f74c76@hotmail.com',\n", - " '15410706527896810898',\n", - " '1459216304.582931',\n", - " '15497495941279624940',\n", - " '1459302353.261157',\n", - " '7773a696f4a54f1e8c01f4644fbb94ee@hotmail.com',\n", - " '15185964531645951164',\n", - " '1459391203.751219',\n", - " '9452413620101638402',\n", - " '1459475662.735732']" + "[,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ]" ] }, - "execution_count": 45, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dmarc_reports = [report for report_set in [extract_report(fetch_msg(n)) for n in nums[0].split()]\n", - " for report in report_set]\n", + " for report in report_set\n", + " if report]\n", + "dmarc_reports" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['3644362597779170059',\n", + " '1484103023.660115',\n", + " '16703417802819394712',\n", + " '1484189549.594225',\n", + " '1484276013.355502',\n", + " '4b1a497aa31d4fdbaaa56c13eb7987fb@hotmail.com',\n", + " '12376654726187971850',\n", + " '1484362840.261197',\n", + " '16723890724451093500',\n", + " '1484448519.463747',\n", + " '3eb8d9f114044eb7bd0509a2e327451d@hotmail.com',\n", + " '17985849130842520792',\n", + " 'b5559b13fcd04062ae46c911334f47e2@hotmail.com',\n", + " '3557556884331174573',\n", + " '80eff7abd7b844c0830df5cdd3fd476c@hotmail.com',\n", + " '13353913580885545629',\n", + " '1484707828.212792',\n", + " '8880815782389375117',\n", + " '1484794248.174834',\n", + " '15941205824990790163',\n", + " '1484880971.613805',\n", + " '12141824316076722331',\n", + " '1484966874.391661']" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ "[r.find('./report_metadata/report_id').text for r in dmarc_reports]" ] }, @@ -912,7 +861,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.5.2+" } }, "nbformat": 4, diff --git a/dmarc_to_database.py b/dmarc_to_database.py index 272998f..4bb2049 100644 --- a/dmarc_to_database.py +++ b/dmarc_to_database.py @@ -14,10 +14,13 @@ def fetch_msg(num): return mailbox.uid('FETCH', num, '(RFC822)')[1][0][1] def xml_of_part(part): - with zipfile.ZipFile(io.BytesIO(part.get_payload(decode=True))) as zf: - fn = zf.infolist()[0].filename - contents = zf.read(fn).decode('utf-8') - return xml.etree.ElementTree.fromstring(contents) + try: + with zipfile.ZipFile(io.BytesIO(part.get_payload(decode=True))) as zf: + fn = zf.infolist()[0].filename + contents = zf.read(fn).decode('utf-8') + return xml.etree.ElementTree.fromstring(contents) + except zipfile.BadZipFile: + return None def xml_of(message): @@ -183,7 +186,8 @@ resp, nums = mailbox.uid('SEARCH', None, mails_from) dmarc_reports = [report for report_set in [extract_report(fetch_msg(n)) for n in nums[0].split()] - for report in report_set] + for report in report_set + if report] mailbox.close() mailbox.logout() -- 2.34.1