Got the initial set into the database
[dmarc.git] / complete.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 1,
6 "metadata": {
7 "collapsed": true
8 },
9 "outputs": [],
10 "source": [
11 "import configparser\n",
12 "import imaplib\n",
13 "import email\n",
14 "import io\n",
15 "import zipfile\n",
16 "import xml.etree.ElementTree\n",
17 "import psycopg2\n",
18 "import re\n",
19 "import datetime"
20 ]
21 },
22 {
23 "cell_type": "code",
24 "execution_count": 11,
25 "metadata": {
26 "collapsed": true
27 },
28 "outputs": [],
29 "source": [
30 "def fetch_msg(num):\n",
31 " return mailbox.fetch(num, '(RFC822)')[1][0][1]"
32 ]
33 },
34 {
35 "cell_type": "code",
36 "execution_count": 36,
37 "metadata": {
38 "collapsed": true
39 },
40 "outputs": [],
41 "source": [
42 "def xml_of_part(part):\n",
43 " with zipfile.ZipFile(io.BytesIO(part.get_payload(decode=True))) as zf:\n",
44 " fn = zf.infolist()[0].filename\n",
45 " contents = zf.read(fn).decode('utf-8')\n",
46 " return xml.etree.ElementTree.fromstring(contents)"
47 ]
48 },
49 {
50 "cell_type": "code",
51 "execution_count": 42,
52 "metadata": {
53 "collapsed": true
54 },
55 "outputs": [],
56 "source": [
57 "def xml_of(message):\n",
58 " reports = []\n",
59 " if message.is_multipart():\n",
60 " for p in message.get_payload():\n",
61 " if 'zip' in p.get_content_type():\n",
62 " reports += [xml_of_part(p)]\n",
63 " else:\n",
64 " reports = [xml_of_part(message)]\n",
65 " return reports"
66 ]
67 },
68 {
69 "cell_type": "code",
70 "execution_count": 35,
71 "metadata": {
72 "collapsed": true
73 },
74 "outputs": [],
75 "source": [
76 "def extract_report(msg):\n",
77 " pmsg = email.message_from_bytes(msg)\n",
78 " return xml_of(pmsg)"
79 ]
80 },
81 {
82 "cell_type": "code",
83 "execution_count": 38,
84 "metadata": {
85 "collapsed": false
86 },
87 "outputs": [
88 {
89 "data": {
90 "text/plain": [
91 "['dmarc.ini']"
92 ]
93 },
94 "execution_count": 38,
95 "metadata": {},
96 "output_type": "execute_result"
97 }
98 ],
99 "source": [
100 "config = configparser.ConfigParser()\n",
101 "config.read('dmarc.ini')"
102 ]
103 },
104 {
105 "cell_type": "code",
106 "execution_count": 39,
107 "metadata": {
108 "collapsed": false
109 },
110 "outputs": [
111 {
112 "data": {
113 "text/plain": [
114 "('OK', [b'174'])"
115 ]
116 },
117 "execution_count": 39,
118 "metadata": {},
119 "output_type": "execute_result"
120 }
121 ],
122 "source": [
123 "mailbox = imaplib.IMAP4(host=config['imap']['server'], \n",
124 " port=config['imap']['port'])\n",
125 "mailbox.starttls()\n",
126 "mailbox.login(config['imap']['username'], config['imap']['password'])\n",
127 "mailbox.select('INBOX', readonly=True)"
128 ]
129 },
130 {
131 "cell_type": "code",
132 "execution_count": 40,
133 "metadata": {
134 "collapsed": false
135 },
136 "outputs": [
137 {
138 "data": {
139 "text/plain": [
140 "('OK',\n",
141 " [b'1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174'])"
142 ]
143 },
144 "execution_count": 40,
145 "metadata": {},
146 "output_type": "execute_result"
147 }
148 ],
149 "source": [
150 "resp, nums = mailbox.search(None, \"ALL\")\n",
151 "resp, nums"
152 ]
153 },
154 {
155 "cell_type": "code",
156 "execution_count": 44,
157 "metadata": {
158 "collapsed": false
159 },
160 "outputs": [
161 {
162 "name": "stdout",
163 "output_type": "stream",
164 "text": [
165 "b'1'\n",
166 "b'1' [<Element 'feedback' at 0x7f9ea4237278>]\n",
167 "b'2'\n",
168 "b'2' [<Element 'feedback' at 0x7f9ea4274138>]\n",
169 "b'3'\n",
170 "b'3' [<Element 'feedback' at 0x7f9ea4211368>]\n",
171 "b'4'\n",
172 "b'4' [<Element 'feedback' at 0x7f9ea427ae08>]\n",
173 "b'5'\n",
174 "b'5' [<Element 'feedback' at 0x7f9ea420d4a8>]\n",
175 "b'6'\n",
176 "b'6' [<Element 'feedback' at 0x7f9ea42743b8>]\n",
177 "b'7'\n",
178 "b'7' [<Element 'feedback' at 0x7f9ea4211868>]\n",
179 "b'8'\n",
180 "b'8' [<Element 'feedback' at 0x7f9ea427ae08>]\n",
181 "b'9'\n",
182 "b'9' [<Element 'feedback' at 0x7f9ea4274138>]\n",
183 "b'10'\n",
184 "b'10' [<Element 'feedback' at 0x7f9ea422ff48>]\n",
185 "b'11'\n",
186 "b'11' [<Element 'feedback' at 0x7f9ea41c6cc8>]\n",
187 "b'12'\n",
188 "b'12' [<Element 'feedback' at 0x7f9ea427aa98>]\n",
189 "b'13'\n",
190 "b'13' [<Element 'feedback' at 0x7f9ea42743b8>]\n",
191 "b'14'\n",
192 "b'14' [<Element 'feedback' at 0x7f9ea427aef8>]\n",
193 "b'15'\n",
194 "b'15' [<Element 'feedback' at 0x7f9ea4237278>]\n",
195 "b'16'\n",
196 "b'16' [<Element 'feedback' at 0x7f9ea4237db8>]\n",
197 "b'17'\n",
198 "b'17' [<Element 'feedback' at 0x7f9ea422ff48>]\n",
199 "b'18'\n",
200 "b'18' [<Element 'feedback' at 0x7f9ea41c6a48>]\n",
201 "b'19'\n",
202 "b'19' [<Element 'feedback' at 0x7f9ea41c6cc8>]\n",
203 "b'20'\n",
204 "b'20' [<Element 'feedback' at 0x7f9ea41c6908>]\n",
205 "b'21'\n",
206 "b'21' [<Element 'feedback' at 0x7f9ea41c6c28>]\n",
207 "b'22'\n",
208 "b'22' [<Element 'feedback' at 0x7f9ea41c6f98>]\n",
209 "b'23'\n",
210 "b'23' [<Element 'feedback' at 0x7f9ea41c6ae8>]\n",
211 "b'24'\n",
212 "b'24' [<Element 'feedback' at 0x7f9ea41c6f48>]\n",
213 "b'25'\n",
214 "b'25' [<Element 'feedback' at 0x7f9ea4211228>]\n",
215 "b'26'\n",
216 "b'26' [<Element 'feedback' at 0x7f9ea4211188>]\n",
217 "b'27'\n",
218 "b'27' [<Element 'feedback' at 0x7f9ea41c6a48>]\n",
219 "b'28'\n",
220 "b'28' [<Element 'feedback' at 0x7f9ea41c66d8>]\n",
221 "b'29'\n",
222 "b'29' [<Element 'feedback' at 0x7f9ea422f598>]\n",
223 "b'30'\n",
224 "b'30' [<Element 'feedback' at 0x7f9ea4211548>]\n",
225 "b'31'\n",
226 "b'31' [<Element 'feedback' at 0x7f9ea41c6ae8>]\n",
227 "b'32'\n",
228 "b'32' [<Element 'feedback' at 0x7f9ea41c6ea8>]\n",
229 "b'33'\n",
230 "b'33' [<Element 'feedback' at 0x7f9ea41c6c28>]\n",
231 "b'34'\n",
232 "b'34' [<Element 'feedback' at 0x7f9ea42374f8>]\n",
233 "b'35'\n",
234 "b'35' [<Element 'feedback' at 0x7f9ea41c6a48>]\n",
235 "b'36'\n",
236 "b'36' [<Element 'feedback' at 0x7f9ea41c67c8>]\n",
237 "b'37'\n",
238 "b'37' [<Element 'feedback' at 0x7f9ea4211cc8>]\n",
239 "b'38'\n",
240 "b'38' [<Element 'feedback' at 0x7f9ea4211318>]\n",
241 "b'39'\n",
242 "b'39' [<Element 'feedback' at 0x7f9ea426cd18>]\n",
243 "b'40'\n",
244 "b'40' [<Element 'feedback' at 0x7f9ea41c64f8>]\n",
245 "b'41'\n",
246 "b'41' [<Element 'feedback' at 0x7f9ea42743b8>]\n",
247 "b'42'\n",
248 "b'42' [<Element 'feedback' at 0x7f9ea4211318>]\n",
249 "b'43'\n",
250 "b'43' [<Element 'feedback' at 0x7f9ea427a318>]\n",
251 "b'44'\n",
252 "b'44' [<Element 'feedback' at 0x7f9ea42b7c78>]\n",
253 "b'45'\n",
254 "b'45' [<Element 'feedback' at 0x7f9ea42b7d68>]\n",
255 "b'46'\n",
256 "b'46' [<Element 'feedback' at 0x7f9ea42b7bd8>]\n",
257 "b'47'\n",
258 "b'47' [<Element 'feedback' at 0x7f9ea42b7c28>]\n",
259 "b'48'\n",
260 "b'48' [<Element 'feedback' at 0x7f9ea42743b8>]\n",
261 "b'49'\n",
262 "b'49' [<Element 'feedback' at 0x7f9ea42b79a8>]\n",
263 "b'50'\n",
264 "b'50' [<Element 'feedback' at 0x7f9ea42b7958>]\n",
265 "b'51'\n",
266 "b'51' [<Element 'feedback' at 0x7f9ea42b7c78>]\n",
267 "b'52'\n",
268 "b'52' [<Element 'feedback' at 0x7f9ea46e1408>]\n",
269 "b'53'\n",
270 "b'53' [<Element 'feedback' at 0x7f9ea46e1b88>]\n",
271 "b'54'\n",
272 "b'54' [<Element 'feedback' at 0x7f9ea42b7c28>]\n",
273 "b'55'\n",
274 "b'55' [<Element 'feedback' at 0x7f9ea46e1778>]\n",
275 "b'56'\n",
276 "b'56' [<Element 'feedback' at 0x7f9ea4274f98>]\n",
277 "b'57'\n",
278 "b'57' [<Element 'feedback' at 0x7f9ea422ff48>]\n",
279 "b'58'\n",
280 "b'58' [<Element 'feedback' at 0x7f9ea42480e8>]\n",
281 "b'59'\n",
282 "b'59' [<Element 'feedback' at 0x7f9ea42b79a8>]\n",
283 "b'60'\n",
284 "b'60' [<Element 'feedback' at 0x7f9ea427a958>]\n",
285 "b'61'\n",
286 "b'61' [<Element 'feedback' at 0x7f9ea427aa48>]\n",
287 "b'62'\n",
288 "b'62' [<Element 'feedback' at 0x7f9ea41c60e8>]\n",
289 "b'63'\n",
290 "b'63' [<Element 'feedback' at 0x7f9ea41c6d68>]\n",
291 "b'64'\n",
292 "b'64' [<Element 'feedback' at 0x7f9ea427a548>]\n",
293 "b'65'\n",
294 "b'65' [<Element 'feedback' at 0x7f9ea427aa48>]\n",
295 "b'66'\n",
296 "b'66' [<Element 'feedback' at 0x7f9ea41c60e8>]\n",
297 "b'67'\n",
298 "b'67' [<Element 'feedback' at 0x7f9ea41c64f8>]\n",
299 "b'68'\n",
300 "b'68' [<Element 'feedback' at 0x7f9ea46f27c8>]\n",
301 "b'69'\n",
302 "b'69' [<Element 'feedback' at 0x7f9ea427a8b8>]\n",
303 "b'70'\n",
304 "b'70' [<Element 'feedback' at 0x7f9ea41c64a8>]\n",
305 "b'71'\n",
306 "b'71' [<Element 'feedback' at 0x7f9ea41c69a8>]\n",
307 "b'72'\n",
308 "b'72' [<Element 'feedback' at 0x7f9ea41c6a98>]\n",
309 "b'73'\n",
310 "b'73' [<Element 'feedback' at 0x7f9ea42110e8>]\n",
311 "b'74'\n",
312 "b'74' [<Element 'feedback' at 0x7f9ea41c6458>]\n",
313 "b'75'\n",
314 "b'75' [<Element 'feedback' at 0x7f9ea41c6f98>]\n",
315 "b'76'\n",
316 "b'76' [<Element 'feedback' at 0x7f9ea427a548>]\n",
317 "b'77'\n",
318 "b'77' [<Element 'feedback' at 0x7f9ea426cdb8>]\n",
319 "b'78'\n",
320 "b'78' [<Element 'feedback' at 0x7f9ea4274f98>]\n",
321 "b'79'\n",
322 "b'79' [<Element 'feedback' at 0x7f9ea41c6188>]\n",
323 "b'80'\n",
324 "b'80' [<Element 'feedback' at 0x7f9ea4211e08>]\n",
325 "b'81'\n",
326 "b'81' [<Element 'feedback' at 0x7f9ea4237b88>]\n",
327 "b'82'\n",
328 "b'82' [<Element 'feedback' at 0x7f9ea4237188>]\n",
329 "b'83'\n",
330 "b'83' [<Element 'feedback' at 0x7f9ea4211868>]\n",
331 "b'84'\n",
332 "b'84' [<Element 'feedback' at 0x7f9ea46e1408>]\n",
333 "b'85'\n",
334 "b'85' [<Element 'feedback' at 0x7f9ea4237b38>]\n",
335 "b'86'\n",
336 "b'86' [<Element 'feedback' at 0x7f9ea42379f8>]\n",
337 "b'87'\n",
338 "b'87' [<Element 'feedback' at 0x7f9ea46e14a8>]\n",
339 "b'88'\n",
340 "b'88' [<Element 'feedback' at 0x7f9ea427aae8>]\n",
341 "b'89'\n",
342 "b'89' [<Element 'feedback' at 0x7f9ea42743b8>]\n",
343 "b'90'\n",
344 "b'90' [<Element 'feedback' at 0x7f9ea426cf48>]\n",
345 "b'91'\n",
346 "b'91' [<Element 'feedback' at 0x7f9ebd87d958>]\n",
347 "b'92'\n",
348 "b'92' [<Element 'feedback' at 0x7f9ea4248048>]\n",
349 "b'93'\n",
350 "b'93' [<Element 'feedback' at 0x7f9ea4274ea8>]\n",
351 "b'94'\n",
352 "b'94' [<Element 'feedback' at 0x7f9ea4248138>]\n",
353 "b'95'\n",
354 "b'95' [<Element 'feedback' at 0x7f9ebd87d958>]\n",
355 "b'96'\n",
356 "b'96' [<Element 'feedback' at 0x7f9ea4248048>]\n",
357 "b'97'\n",
358 "b'97' [<Element 'feedback' at 0x7f9ea4237688>]\n",
359 "b'98'\n",
360 "b'98' [<Element 'feedback' at 0x7f9ea4248138>]\n",
361 "b'99'\n",
362 "b'99' [<Element 'feedback' at 0x7f9ebd87d958>]\n",
363 "b'100'\n",
364 "b'100' [<Element 'feedback' at 0x7f9ea427a278>]\n",
365 "b'101'\n",
366 "b'101' [<Element 'feedback' at 0x7f9ea426cf48>]\n",
367 "b'102'\n",
368 "b'102' [<Element 'feedback' at 0x7f9ea4bb24f8>]\n",
369 "b'103'\n",
370 "b'103' [<Element 'feedback' at 0x7f9ebd87d958>]\n",
371 "b'104'\n",
372 "b'104' [<Element 'feedback' at 0x7f9ea4211138>]\n",
373 "b'105'\n",
374 "b'105' [<Element 'feedback' at 0x7f9ea4274f98>]\n",
375 "b'106'\n",
376 "b'106' [<Element 'feedback' at 0x7f9ea4248138>]\n",
377 "b'107'\n",
378 "b'107' [<Element 'feedback' at 0x7f9ebd87d958>]\n",
379 "b'108'\n",
380 "b'108' [<Element 'feedback' at 0x7f9ea4248048>]\n",
381 "b'109'\n",
382 "b'109' [<Element 'feedback' at 0x7f9ea4274f98>]\n",
383 "b'110'\n",
384 "b'110' [<Element 'feedback' at 0x7f9ea4248138>]\n",
385 "b'111'\n",
386 "b'111' [<Element 'feedback' at 0x7f9ebd87d958>]\n",
387 "b'112'\n",
388 "b'112' [<Element 'feedback' at 0x7f9ea4248048>]\n",
389 "b'113'\n",
390 "b'113' [<Element 'feedback' at 0x7f9ebcb50548>]\n",
391 "b'114'\n",
392 "b'114' [<Element 'feedback' at 0x7f9ea4b7dea8>]\n",
393 "b'115'\n",
394 "b'115' [<Element 'feedback' at 0x7f9ebd87d958>]\n",
395 "b'116'\n",
396 "b'116' [<Element 'feedback' at 0x7f9ea42b7d68>]\n",
397 "b'117'\n",
398 "b'117' [<Element 'feedback' at 0x7f9ebcb50a48>]\n",
399 "b'118'\n",
400 "b'118' [<Element 'feedback' at 0x7f9ea427aae8>]\n",
401 "b'119'\n",
402 "b'119' [<Element 'feedback' at 0x7f9ebd87d958>]\n",
403 "b'120'\n",
404 "b'120' [<Element 'feedback' at 0x7f9ea46e1408>]\n",
405 "b'121'\n",
406 "b'121' [<Element 'feedback' at 0x7f9ea46e1778>]\n",
407 "b'122'\n",
408 "b'122' [<Element 'feedback' at 0x7f9ea46e1598>]\n",
409 "b'123'\n",
410 "b'123' [<Element 'feedback' at 0x7f9ea427a8b8>]\n",
411 "b'124'\n",
412 "b'124' [<Element 'feedback' at 0x7f9ea46e1408>]\n",
413 "b'125'\n",
414 "b'125' [<Element 'feedback' at 0x7f9ea427aa48>]\n",
415 "b'126'\n",
416 "b'126' [<Element 'feedback' at 0x7f9ea46e1598>]\n",
417 "b'127'\n",
418 "b'127' [<Element 'feedback' at 0x7f9ea427a8b8>]\n",
419 "b'128'\n",
420 "b'128' [<Element 'feedback' at 0x7f9ea46e1408>]\n",
421 "b'129'\n",
422 "b'129' [<Element 'feedback' at 0x7f9ea4211548>]\n",
423 "b'130'\n",
424 "b'130' [<Element 'feedback' at 0x7f9ea42119f8>]\n",
425 "b'131'\n",
426 "b'131' [<Element 'feedback' at 0x7f9ea427a8b8>]\n",
427 "b'132'\n",
428 "b'132' [<Element 'feedback' at 0x7f9ebd87d958>]\n",
429 "b'133'\n",
430 "b'133' [<Element 'feedback' at 0x7f9ea4b7d1d8>]\n",
431 "b'134'\n",
432 "b'134' [<Element 'feedback' at 0x7f9ebcb50c28>]\n",
433 "b'135'\n",
434 "b'135' [<Element 'feedback' at 0x7f9ea427a8b8>]\n",
435 "b'136'\n",
436 "b'136' [<Element 'feedback' at 0x7f9ea426c9a8>]\n",
437 "b'137'\n",
438 "b'137' [<Element 'feedback' at 0x7f9ea4b7d1d8>]\n",
439 "b'138'\n",
440 "b'138' [<Element 'feedback' at 0x7f9ea42b79a8>]\n",
441 "b'139'\n",
442 "b'139' [<Element 'feedback' at 0x7f9ea46f26d8>]\n",
443 "b'140'\n",
444 "b'140' [<Element 'feedback' at 0x7f9ea426c9a8>]\n",
445 "b'141'\n",
446 "b'141' [<Element 'feedback' at 0x7f9ea427a8b8>]\n",
447 "b'142'\n",
448 "b'142' [<Element 'feedback' at 0x7f9ea46e1598>]\n",
449 "b'143'\n",
450 "b'143' [<Element 'feedback' at 0x7f9ea4bb24f8>]\n",
451 "b'144'\n",
452 "b'144' [<Element 'feedback' at 0x7f9ea426c9a8>]\n",
453 "b'145'\n",
454 "b'145' [<Element 'feedback' at 0x7f9ea426cdb8>]\n",
455 "b'146'\n",
456 "b'146' [<Element 'feedback' at 0x7f9ea4bb2458>]\n",
457 "b'147'\n",
458 "b'147' [<Element 'feedback' at 0x7f9ea426cd18>]\n",
459 "b'148'\n",
460 "b'148' [<Element 'feedback' at 0x7f9ea4bb2688>]\n",
461 "b'149'\n",
462 "b'149' [<Element 'feedback' at 0x7f9ea426cdb8>]\n",
463 "b'150'\n",
464 "b'150' [<Element 'feedback' at 0x7f9ea46e1408>]\n",
465 "b'151'\n",
466 "b'151' [<Element 'feedback' at 0x7f9ea4211188>]\n",
467 "b'152'\n",
468 "b'152' [<Element 'feedback' at 0x7f9ea4bb2688>]\n",
469 "b'153'\n",
470 "b'153' [<Element 'feedback' at 0x7f9ea426cdb8>]\n",
471 "b'154'\n",
472 "b'154' [<Element 'feedback' at 0x7f9ea4274138>]\n",
473 "b'155'\n",
474 "b'155' [<Element 'feedback' at 0x7f9ea422ff48>]\n",
475 "b'156'\n",
476 "b'156' [<Element 'feedback' at 0x7f9ea4bb2688>]\n",
477 "b'157'\n",
478 "b'157' [<Element 'feedback' at 0x7f9ea426cdb8>]\n",
479 "b'158'\n",
480 "b'158' [<Element 'feedback' at 0x7f9ea427ab88>]\n",
481 "b'159'\n",
482 "b'159' [<Element 'feedback' at 0x7f9ea422ff48>]\n",
483 "b'160'\n",
484 "b'160' [<Element 'feedback' at 0x7f9ea4237638>]\n",
485 "b'161'\n",
486 "b'161' [<Element 'feedback' at 0x7f9ea426cdb8>]\n",
487 "b'162'\n",
488 "b'162' [<Element 'feedback' at 0x7f9ea42480e8>]\n",
489 "b'163'\n",
490 "b'163' [<Element 'feedback' at 0x7f9ea422ff48>]\n",
491 "b'164'\n",
492 "b'164' [<Element 'feedback' at 0x7f9ea4237638>]\n",
493 "b'165'\n",
494 "b'165' [<Element 'feedback' at 0x7f9ea426cdb8>]\n",
495 "b'166'\n",
496 "b'166' [<Element 'feedback' at 0x7f9ea427ab88>]\n",
497 "b'167'\n",
498 "b'167' [<Element 'feedback' at 0x7f9ea422ff48>]\n",
499 "b'168'\n",
500 "b'168' [<Element 'feedback' at 0x7f9ea42b79a8>]\n",
501 "b'169'\n",
502 "b'169' [<Element 'feedback' at 0x7f9ea426cdb8>]\n",
503 "b'170'\n",
504 "b'170' [<Element 'feedback' at 0x7f9ea4b7dae8>]\n",
505 "b'171'\n",
506 "b'171' [<Element 'feedback' at 0x7f9ea422ff48>]\n",
507 "b'172'\n",
508 "b'172' [<Element 'feedback' at 0x7f9ea42b79a8>]\n",
509 "b'173'\n",
510 "b'173' [<Element 'feedback' at 0x7f9ea426cdb8>]\n",
511 "b'174'\n",
512 "b'174' [<Element 'feedback' at 0x7f9ea4b7d3b8>]\n"
513 ]
514 }
515 ],
516 "source": [
517 "for n in nums[0].split():\n",
518 " print(n)\n",
519 " reports = extract_report(fetch_msg(n))\n",
520 " print(n, reports)"
521 ]
522 },
523 {
524 "cell_type": "code",
525 "execution_count": 45,
526 "metadata": {
527 "collapsed": false
528 },
529 "outputs": [
530 {
531 "data": {
532 "text/plain": [
533 "[<Element 'feedback' at 0x7f9ea422ff48>,\n",
534 " <Element 'feedback' at 0x7f9ea4b7d048>,\n",
535 " <Element 'feedback' at 0x7f9ebcb50bd8>,\n",
536 " <Element 'feedback' at 0x7f9ea427aef8>,\n",
537 " <Element 'feedback' at 0x7f9ea42113b8>,\n",
538 " <Element 'feedback' at 0x7f9ea42376d8>,\n",
539 " <Element 'feedback' at 0x7f9ea46f25e8>,\n",
540 " <Element 'feedback' at 0x7f9ebcb34db8>,\n",
541 " <Element 'feedback' at 0x7f9ea4246098>,\n",
542 " <Element 'feedback' at 0x7f9ea4246bd8>,\n",
543 " <Element 'feedback' at 0x7f9ebcb497c8>,\n",
544 " <Element 'feedback' at 0x7f9ea46f2048>,\n",
545 " <Element 'feedback' at 0x7f9ea41ce908>,\n",
546 " <Element 'feedback' at 0x7f9ea41dc4a8>,\n",
547 " <Element 'feedback' at 0x7f9ea41d0048>,\n",
548 " <Element 'feedback' at 0x7f9ea41d0b38>,\n",
549 " <Element 'feedback' at 0x7f9ea41c26d8>,\n",
550 " <Element 'feedback' at 0x7f9ea41dc598>,\n",
551 " <Element 'feedback' at 0x7f9ea41c0e08>,\n",
552 " <Element 'feedback' at 0x7f9ea41c0ef8>,\n",
553 " <Element 'feedback' at 0x7f9ea41be188>,\n",
554 " <Element 'feedback' at 0x7f9ea41bec78>,\n",
555 " <Element 'feedback' at 0x7f9ea41df818>,\n",
556 " <Element 'feedback' at 0x7f9ea41e43b8>,\n",
557 " <Element 'feedback' at 0x7f9ea41e4ea8>,\n",
558 " <Element 'feedback' at 0x7f9ea41e8a48>,\n",
559 " <Element 'feedback' at 0x7f9ea41ed688>,\n",
560 " <Element 'feedback' at 0x7f9ea41f11d8>,\n",
561 " <Element 'feedback' at 0x7f9ea41f1368>,\n",
562 " <Element 'feedback' at 0x7f9ea41ee408>,\n",
563 " <Element 'feedback' at 0x7f9ea41eef48>,\n",
564 " <Element 'feedback' at 0x7f9ea41f6a98>,\n",
565 " <Element 'feedback' at 0x7f9ea41f9638>,\n",
566 " <Element 'feedback' at 0x7f9ea417e278>,\n",
567 " <Element 'feedback' at 0x7f9ea4182f48>,\n",
568 " <Element 'feedback' at 0x7f9ea4182ea8>,\n",
569 " <Element 'feedback' at 0x7f9ea41869a8>,\n",
570 " <Element 'feedback' at 0x7f9ea4186cc8>,\n",
571 " <Element 'feedback' at 0x7f9ea418a7c8>,\n",
572 " <Element 'feedback' at 0x7f9ea418b408>,\n",
573 " <Element 'feedback' at 0x7f9ea418bef8>,\n",
574 " <Element 'feedback' at 0x7f9ea4190a98>,\n",
575 " <Element 'feedback' at 0x7f9ea4195638>,\n",
576 " <Element 'feedback' at 0x7f9ea4199228>,\n",
577 " <Element 'feedback' at 0x7f9ea4199d68>,\n",
578 " <Element 'feedback' at 0x7f9ea419e908>,\n",
579 " <Element 'feedback' at 0x7f9ea419e9f8>,\n",
580 " <Element 'feedback' at 0x7f9ea41a15e8>,\n",
581 " <Element 'feedback' at 0x7f9ea41a2188>,\n",
582 " <Element 'feedback' at 0x7f9ea41a2d18>,\n",
583 " <Element 'feedback' at 0x7f9ea41a5908>,\n",
584 " <Element 'feedback' at 0x7f9ea41a94a8>,\n",
585 " <Element 'feedback' at 0x7f9ea41ad048>,\n",
586 " <Element 'feedback' at 0x7f9ea41adb88>,\n",
587 " <Element 'feedback' at 0x7f9ea41adcc8>,\n",
588 " <Element 'feedback' at 0x7f9ea41b17c8>,\n",
589 " <Element 'feedback' at 0x7f9ea41b4ef8>,\n",
590 " <Element 'feedback' at 0x7f9ea41b1958>,\n",
591 " <Element 'feedback' at 0x7f9ea41b8908>,\n",
592 " <Element 'feedback' at 0x7f9ea41ba4f8>,\n",
593 " <Element 'feedback' at 0x7f9ea413d098>,\n",
594 " <Element 'feedback' at 0x7f9ea4142138>,\n",
595 " <Element 'feedback' at 0x7f9ea4142c78>,\n",
596 " <Element 'feedback' at 0x7f9ea4146868>,\n",
597 " <Element 'feedback' at 0x7f9ea414b458>,\n",
598 " <Element 'feedback' at 0x7f9ea41ba4a8>,\n",
599 " <Element 'feedback' at 0x7f9ea414bbd8>,\n",
600 " <Element 'feedback' at 0x7f9ebcb43778>,\n",
601 " <Element 'feedback' at 0x7f9ea4b93368>,\n",
602 " <Element 'feedback' at 0x7f9ea4b93ea8>,\n",
603 " <Element 'feedback' at 0x7f9ea414fa98>,\n",
604 " <Element 'feedback' at 0x7f9ea4153688>,\n",
605 " <Element 'feedback' at 0x7f9ea4159188>,\n",
606 " <Element 'feedback' at 0x7f9ea4159cc8>,\n",
607 " <Element 'feedback' at 0x7f9ea4159db8>,\n",
608 " <Element 'feedback' at 0x7f9ea415fea8>,\n",
609 " <Element 'feedback' at 0x7f9ea415da48>,\n",
610 " <Element 'feedback' at 0x7f9ea4161638>,\n",
611 " <Element 'feedback' at 0x7f9ea4165728>,\n",
612 " <Element 'feedback' at 0x7f9ea416a2c8>,\n",
613 " <Element 'feedback' at 0x7f9ea416e188>,\n",
614 " <Element 'feedback' at 0x7f9ea416ef48>,\n",
615 " <Element 'feedback' at 0x7f9ea416e048>,\n",
616 " <Element 'feedback' at 0x7f9ea4173cc8>,\n",
617 " <Element 'feedback' at 0x7f9ea416de08>,\n",
618 " <Element 'feedback' at 0x7f9ea4176958>,\n",
619 " <Element 'feedback' at 0x7f9ea417a4f8>,\n",
620 " <Element 'feedback' at 0x7f9ea40fe098>,\n",
621 " <Element 'feedback' at 0x7f9ea40feb88>,\n",
622 " <Element 'feedback' at 0x7f9ea4104c78>,\n",
623 " <Element 'feedback' at 0x7f9ea4108868>,\n",
624 " <Element 'feedback' at 0x7f9ea40feb38>,\n",
625 " <Element 'feedback' at 0x7f9ea410fa98>,\n",
626 " <Element 'feedback' at 0x7f9ea410b688>,\n",
627 " <Element 'feedback' at 0x7f9ea41121d8>,\n",
628 " <Element 'feedback' at 0x7f9ea4112d68>,\n",
629 " <Element 'feedback' at 0x7f9ea41158b8>,\n",
630 " <Element 'feedback' at 0x7f9ea411b458>,\n",
631 " <Element 'feedback' at 0x7f9ea411e048>,\n",
632 " <Element 'feedback' at 0x7f9ea411eb88>,\n",
633 " <Element 'feedback' at 0x7f9ea411b408>,\n",
634 " <Element 'feedback' at 0x7f9ea4123818>,\n",
635 " <Element 'feedback' at 0x7f9ea411f958>,\n",
636 " <Element 'feedback' at 0x7f9ea41264a8>,\n",
637 " <Element 'feedback' at 0x7f9ea412a098>,\n",
638 " <Element 'feedback' at 0x7f9ea412abd8>,\n",
639 " <Element 'feedback' at 0x7f9ea412f728>,\n",
640 " <Element 'feedback' at 0x7f9ea41352c8>,\n",
641 " <Element 'feedback' at 0x7f9ea4135e58>,\n",
642 " <Element 'feedback' at 0x7f9ea413aa48>,\n",
643 " <Element 'feedback' at 0x7f9ea41159a8>,\n",
644 " <Element 'feedback' at 0x7f9ea4132638>,\n",
645 " <Element 'feedback' at 0x7f9ea40bf228>,\n",
646 " <Element 'feedback' at 0x7f9ea40c1318>,\n",
647 " <Element 'feedback' at 0x7f9ea40c1e58>,\n",
648 " <Element 'feedback' at 0x7f9ea40c7a48>,\n",
649 " <Element 'feedback' at 0x7f9ea40cb5e8>,\n",
650 " <Element 'feedback' at 0x7f9ea40d0188>,\n",
651 " <Element 'feedback' at 0x7f9ea40d0c78>,\n",
652 " <Element 'feedback' at 0x7f9ea40d0e08>,\n",
653 " <Element 'feedback' at 0x7f9ea40d6958>,\n",
654 " <Element 'feedback' at 0x7f9ea40c84f8>,\n",
655 " <Element 'feedback' at 0x7f9ea40cd098>,\n",
656 " <Element 'feedback' at 0x7f9ea40dc228>,\n",
657 " <Element 'feedback' at 0x7f9ea40dcd18>,\n",
658 " <Element 'feedback' at 0x7f9ea40e08b8>,\n",
659 " <Element 'feedback' at 0x7f9ea40e7408>,\n",
660 " <Element 'feedback' at 0x7f9ea40eb4a8>,\n",
661 " <Element 'feedback' at 0x7f9ea40eb778>,\n",
662 " <Element 'feedback' at 0x7f9ea40e5868>,\n",
663 " <Element 'feedback' at 0x7f9ea40f1458>,\n",
664 " <Element 'feedback' at 0x7f9ea40f1f98>,\n",
665 " <Element 'feedback' at 0x7f9ea40f5bd8>,\n",
666 " <Element 'feedback' at 0x7f9ea40f9728>,\n",
667 " <Element 'feedback' at 0x7f9ea407e818>,\n",
668 " <Element 'feedback' at 0x7f9ea4081458>,\n",
669 " <Element 'feedback' at 0x7f9ea40f5228>,\n",
670 " <Element 'feedback' at 0x7f9ea40830e8>,\n",
671 " <Element 'feedback' at 0x7f9ea4083cc8>,\n",
672 " <Element 'feedback' at 0x7f9ea4085e08>,\n",
673 " <Element 'feedback' at 0x7f9ea4089ef8>,\n",
674 " <Element 'feedback' at 0x7f9ea408eb38>,\n",
675 " <Element 'feedback' at 0x7f9ea4092688>,\n",
676 " <Element 'feedback' at 0x7f9ea40967c8>,\n",
677 " <Element 'feedback' at 0x7f9ea4096868>,\n",
678 " <Element 'feedback' at 0x7f9ea409c958>,\n",
679 " <Element 'feedback' at 0x7f9ea4099548>,\n",
680 " <Element 'feedback' at 0x7f9ea409e688>,\n",
681 " <Element 'feedback' at 0x7f9ea40a2188>,\n",
682 " <Element 'feedback' at 0x7f9ea40a2cc8>,\n",
683 " <Element 'feedback' at 0x7f9ea40a8818>,\n",
684 " <Element 'feedback' at 0x7f9ea40ad3b8>,\n",
685 " <Element 'feedback' at 0x7f9ea40adef8>,\n",
686 " <Element 'feedback' at 0x7f9ea40b2ae8>,\n",
687 " <Element 'feedback' at 0x7f9ea40b2bd8>,\n",
688 " <Element 'feedback' at 0x7f9ea40a77c8>,\n",
689 " <Element 'feedback' at 0x7f9ea40b6368>,\n",
690 " <Element 'feedback' at 0x7f9ea40b94f8>,\n",
691 " <Element 'feedback' at 0x7f9ea403e048>,\n",
692 " <Element 'feedback' at 0x7f9ea403eb88>,\n",
693 " <Element 'feedback' at 0x7f9ea4044c78>,\n",
694 " <Element 'feedback' at 0x7f9ea40b9f98>,\n",
695 " <Element 'feedback' at 0x7f9ea40bb138>,\n",
696 " <Element 'feedback' at 0x7f9ea40bbc28>,\n",
697 " <Element 'feedback' at 0x7f9ea404e818>,\n",
698 " <Element 'feedback' at 0x7f9ea4053458>,\n",
699 " <Element 'feedback' at 0x7f9ea40594f8>,\n",
700 " <Element 'feedback' at 0x7f9ea405e098>,\n",
701 " <Element 'feedback' at 0x7f9ea405eb88>,\n",
702 " <Element 'feedback' at 0x7f9ea405ec78>,\n",
703 " <Element 'feedback' at 0x7f9ea40634f8>,\n",
704 " <Element 'feedback' at 0x7f9ea4137598>,\n",
705 " <Element 'feedback' at 0x7f9ea40511d8>,\n",
706 " <Element 'feedback' at 0x7f9ea4051d18>]"
707 ]
708 },
709 "execution_count": 45,
710 "metadata": {},
711 "output_type": "execute_result"
712 }
713 ],
714 "source": [
715 "dmarc_reports = [report for report_set in [extract_report(fetch_msg(n)) for n in nums[0].split()]\n",
716 " for report in report_set]\n",
717 "dmarc_reports"
718 ]
719 },
720 {
721 "cell_type": "code",
722 "execution_count": 46,
723 "metadata": {
724 "collapsed": false
725 },
726 "outputs": [
727 {
728 "data": {
729 "text/plain": [
730 "('BYE', [b'Logging out'])"
731 ]
732 },
733 "execution_count": 46,
734 "metadata": {},
735 "output_type": "execute_result"
736 }
737 ],
738 "source": [
739 "mailbox.close()\n",
740 "mailbox.logout()"
741 ]
742 },
743 {
744 "cell_type": "code",
745 "execution_count": 47,
746 "metadata": {
747 "collapsed": true
748 },
749 "outputs": [],
750 "source": [
751 "field_maps = {'./policy_published/adkim': {'pg_field_name': 'policy_published_adkim',\n",
752 " 'pg_table': 'reports',\n",
753 " 'pg_type': 'varchar'},\n",
754 " './policy_published/aspf': {'pg_field_name': 'policy_published_aspf',\n",
755 " 'pg_table': 'reports',\n",
756 " 'pg_type': 'varchar'},\n",
757 " './policy_published/domain': {'pg_field_name': 'policy_published_domain',\n",
758 " 'pg_table': 'reports',\n",
759 " 'pg_type': 'varchar'},\n",
760 " './policy_published/p': {'pg_field_name': 'policy_published_p',\n",
761 " 'pg_table': 'reports',\n",
762 " 'pg_type': 'varchar'},\n",
763 " './policy_published/pct': {'pg_field_name': 'policy_published_pct',\n",
764 " 'pg_table': 'reports',\n",
765 " 'pg_type': 'int'},\n",
766 " './record[{}]/auth_results/dkim/domain': {'pg_field_name': 'auth_results_dkim_domain',\n",
767 " 'pg_table': 'report_items',\n",
768 " 'pg_type': 'varchar'},\n",
769 " './record[{}]/auth_results/dkim/result': {'pg_field_name': 'auth_results_dkim_result',\n",
770 " 'pg_table': 'report_items',\n",
771 " 'pg_type': 'varchar'},\n",
772 " './record[{}]/auth_results/spf/domain': {'pg_field_name': 'auth_results_spf_domain',\n",
773 " 'pg_table': 'report_items',\n",
774 " 'pg_type': 'varchar'},\n",
775 " './record[{}]/auth_results/spf/result': {'pg_field_name': 'auth_results_spf_result',\n",
776 " 'pg_table': 'report_items',\n",
777 " 'pg_type': 'varchar'},\n",
778 " './record[{}]/identifiers/header_from': {'pg_field_name': 'identifiers_header_from',\n",
779 " 'pg_table': 'report_items',\n",
780 " 'pg_type': 'varchar'},\n",
781 " './record[{}]/row/count': {'pg_field_name': 'count',\n",
782 " 'pg_table': 'report_items',\n",
783 " 'pg_type': 'int'},\n",
784 " './record[{}]/row/policy_evaluated/disposition': {'pg_field_name': 'policy_evaluated_disposition',\n",
785 " 'pg_table': 'report_items',\n",
786 " 'pg_type': 'varchar'},\n",
787 " './record[{}]/row/policy_evaluated/dkim': {'pg_field_name': 'policy_evaluated_dkim',\n",
788 " 'pg_table': 'report_items',\n",
789 " 'pg_type': 'varchar'},\n",
790 " './record[{}]/row/policy_evaluated/spf': {'pg_field_name': 'policy_evaluated_spf',\n",
791 " 'pg_table': 'report_items',\n",
792 " 'pg_type': 'varchar'},\n",
793 " './record[{}]/row/source_ip': {'pg_field_name': 'source_ip',\n",
794 " 'pg_table': 'report_items',\n",
795 " 'pg_type': 'inet'},\n",
796 " './report_metadata/date_range/begin': {'pg_field_name': 'report_metadata_date_range_begin',\n",
797 " 'pg_table': 'reports',\n",
798 " 'pg_type': 'timestamp'},\n",
799 " './report_metadata/date_range/end': {'pg_field_name': 'report_metadata_date_range_end',\n",
800 " 'pg_table': 'reports',\n",
801 " 'pg_type': 'timestamp'},\n",
802 " './report_metadata/email': {'pg_field_name': 'report_metadata_email',\n",
803 " 'pg_table': 'reports',\n",
804 " 'pg_type': 'varchar'},\n",
805 " './report_metadata/org_name': {'pg_field_name': 'report_metadata_org_name',\n",
806 " 'pg_table': 'reports',\n",
807 " 'pg_type': 'varchar'},\n",
808 " './report_metadata/report_id': {'pg_field_name': 'report_metadata_report_id',\n",
809 " 'pg_table': 'reports',\n",
810 " 'pg_type': 'varchar'}}"
811 ]
812 },
813 {
814 "cell_type": "code",
815 "execution_count": 48,
816 "metadata": {
817 "collapsed": true
818 },
819 "outputs": [],
820 "source": [
821 "def maybe_strip(text):\n",
822 " if text:\n",
823 " return text.strip()\n",
824 " else:\n",
825 " return ''"
826 ]
827 },
828 {
829 "cell_type": "code",
830 "execution_count": 49,
831 "metadata": {
832 "collapsed": true
833 },
834 "outputs": [],
835 "source": [
836 "def write_report(connection, cursor, report):\n",
837 " \n",
838 " field_names = []\n",
839 " values = {}\n",
840 " for f in [f for f in field_maps if field_maps[f]['pg_table'] == 'reports']:\n",
841 " field_names += [field_maps[f]['pg_field_name']]\n",
842 " if field_maps[f]['pg_type'] == 'int':\n",
843 " values[field_maps[f]['pg_field_name']] = int(report.find(f).text)\n",
844 " elif field_maps[f]['pg_type'] == 'timestamp':\n",
845 " values[field_maps[f]['pg_field_name']] = datetime.datetime.utcfromtimestamp(int(report.find(f).text))\n",
846 " elif field_maps[f]['pg_type'] == 'inet':\n",
847 " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f).text)\n",
848 " else:\n",
849 " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f).text)\n",
850 " insert_string = 'insert into reports (' + ', '.join(field_names) + ') '\n",
851 " insert_string += 'values (' + ', '.join('%({})s'.format(f) for f in field_names) + ');'\n",
852 " cursor.execute(insert_string, values)\n",
853 " \n",
854 " for i in range(1, len(report.findall('./record'))+1):\n",
855 " field_names = []\n",
856 " cursor.execute('select id, report_metadata_report_id from reports where report_metadata_report_id = %s;', \n",
857 " [report.find('./report_metadata/report_id').text])\n",
858 " results = cursor.fetchall()\n",
859 " if len(results) != 1:\n",
860 " raise\n",
861 " else:\n",
862 " report_id = results[0][0]\n",
863 " values = {'report_id': report_id}\n",
864 " for f in [f for f in field_maps if field_maps[f]['pg_table'] == 'report_items']:\n",
865 " field_names += [field_maps[f]['pg_field_name']]\n",
866 " if field_maps[f]['pg_type'] == 'int':\n",
867 " values[field_maps[f]['pg_field_name']] = int(report.find(f.format(i)).text)\n",
868 " elif field_maps[f]['pg_type'] == 'timestamp':\n",
869 " values[field_maps[f]['pg_field_name']] = datetime.datetime.utcfromtimestamp(int(report.find(f.format(i)).text))\n",
870 " elif field_maps[f]['pg_type'] == 'inet':\n",
871 " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f.format(i)).text)\n",
872 " else:\n",
873 " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f.format(i)).text)\n",
874 " insert_string = 'insert into report_items (report_id, ' + ', '.join(field_names) + ') '\n",
875 " insert_string += 'values (%(report_id)s, ' + ', '.join('%({})s'.format(f) for f in field_names) + ');'\n",
876 " cursor.execute(insert_string, values)\n",
877 " connection.commit()"
878 ]
879 },
880 {
881 "cell_type": "code",
882 "execution_count": 51,
883 "metadata": {
884 "collapsed": true
885 },
886 "outputs": [],
887 "source": [
888 "conn = psycopg2.connect(host=config['database']['server'],\n",
889 " database=config['database']['database'], \n",
890 " user=config['database']['username'], \n",
891 " password=config['database']['password'])\n",
892 "with conn.cursor() as cur:\n",
893 " for report in dmarc_reports:\n",
894 " write_report(conn, cur, report)"
895 ]
896 },
897 {
898 "cell_type": "code",
899 "execution_count": 52,
900 "metadata": {
901 "collapsed": true
902 },
903 "outputs": [],
904 "source": [
905 "conn.close()"
906 ]
907 },
908 {
909 "cell_type": "code",
910 "execution_count": null,
911 "metadata": {
912 "collapsed": true
913 },
914 "outputs": [],
915 "source": []
916 }
917 ],
918 "metadata": {
919 "kernelspec": {
920 "display_name": "Python 3",
921 "language": "python",
922 "name": "python3"
923 },
924 "language_info": {
925 "codemirror_mode": {
926 "name": "ipython",
927 "version": 3
928 },
929 "file_extension": ".py",
930 "mimetype": "text/x-python",
931 "name": "python",
932 "nbconvert_exporter": "python",
933 "pygments_lexer": "ipython3",
934 "version": "3.4.3+"
935 }
936 },
937 "nbformat": 4,
938 "nbformat_minor": 0
939 }