Fixed error handling
[dmarc.git] / write-to-database.ipynb
1 {
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 1,
6 "metadata": {
7 "collapsed": true
8 },
9 "outputs": [],
10 "source": [
11 "import configparser\n",
12 "import xml.etree.ElementTree\n",
13 "import psycopg2\n",
14 "import re\n",
15 "import datetime"
16 ]
17 },
18 {
19 "cell_type": "code",
20 "execution_count": 2,
21 "metadata": {
22 "collapsed": false
23 },
24 "outputs": [
25 {
26 "data": {
27 "text/plain": [
28 "['dmarc.ini']"
29 ]
30 },
31 "execution_count": 2,
32 "metadata": {},
33 "output_type": "execute_result"
34 }
35 ],
36 "source": [
37 "config = configparser.ConfigParser()\n",
38 "config.read('dmarc.ini')"
39 ]
40 },
41 {
42 "cell_type": "code",
43 "execution_count": 3,
44 "metadata": {
45 "collapsed": false
46 },
47 "outputs": [
48 {
49 "data": {
50 "text/plain": [
51 "['imap', 'database']"
52 ]
53 },
54 "execution_count": 3,
55 "metadata": {},
56 "output_type": "execute_result"
57 }
58 ],
59 "source": [
60 "config.sections()"
61 ]
62 },
63 {
64 "cell_type": "code",
65 "execution_count": 4,
66 "metadata": {
67 "collapsed": false
68 },
69 "outputs": [
70 {
71 "data": {
72 "text/plain": [
73 "['server', 'username', 'port', 'password']"
74 ]
75 },
76 "execution_count": 4,
77 "metadata": {},
78 "output_type": "execute_result"
79 }
80 ],
81 "source": [
82 "[k for k in config['imap']]"
83 ]
84 },
85 {
86 "cell_type": "code",
87 "execution_count": 5,
88 "metadata": {
89 "collapsed": false
90 },
91 "outputs": [
92 {
93 "data": {
94 "text/plain": [
95 "['server', 'database', 'username', 'password']"
96 ]
97 },
98 "execution_count": 5,
99 "metadata": {},
100 "output_type": "execute_result"
101 }
102 ],
103 "source": [
104 "[k for k in config['database']]"
105 ]
106 },
107 {
108 "cell_type": "code",
109 "execution_count": 6,
110 "metadata": {
111 "collapsed": true
112 },
113 "outputs": [],
114 "source": [
115 "def walk(node, indent=0):\n",
116 " for child in node:\n",
117 " if child.text:\n",
118 " txt = child.text.strip()\n",
119 " else:\n",
120 " txt = ''\n",
121 " print(' '*indent, child.tag, ':', txt)\n",
122 " walk(child, indent+2)"
123 ]
124 },
125 {
126 "cell_type": "code",
127 "execution_count": 7,
128 "metadata": {
129 "collapsed": false
130 },
131 "outputs": [
132 {
133 "name": "stdout",
134 "output_type": "stream",
135 "text": [
136 "google.com!njae.me.uk!1458777600!1458863999.xml hotmail.com!njae.me.uk!1459011600!1459098000.xml yahoo.com!njae.me.uk!1459036800!1459123199.xml\r\n",
137 "google.com!njae.me.uk!1458950400!1459036799.xml yahoo.com!njae.me.uk!1458864000!1458950399.xml\r\n"
138 ]
139 }
140 ],
141 "source": [
142 "!ls *xml"
143 ]
144 },
145 {
146 "cell_type": "code",
147 "execution_count": 8,
148 "metadata": {
149 "collapsed": false
150 },
151 "outputs": [
152 {
153 "data": {
154 "text/plain": [
155 "['google.com!njae.me.uk!1458777600!1458863999.xml',\n",
156 " 'google.com!njae.me.uk!1458950400!1459036799.xml',\n",
157 " 'hotmail.com!njae.me.uk!1459011600!1459098000.xml',\n",
158 " 'yahoo.com!njae.me.uk!1458864000!1458950399.xml',\n",
159 " 'yahoo.com!njae.me.uk!1459036800!1459123199.xml']"
160 ]
161 },
162 "execution_count": 8,
163 "metadata": {},
164 "output_type": "execute_result"
165 }
166 ],
167 "source": [
168 "xmls = !ls *xml\n",
169 "xmls"
170 ]
171 },
172 {
173 "cell_type": "code",
174 "execution_count": 9,
175 "metadata": {
176 "collapsed": false
177 },
178 "outputs": [
179 {
180 "data": {
181 "text/plain": [
182 "{'google.com!njae.me.uk!1458777600!1458863999.xml': <Element 'feedback' at 0x7f1fd40b4318>,\n",
183 " 'google.com!njae.me.uk!1458950400!1459036799.xml': <Element 'feedback' at 0x7f1fd40b4db8>,\n",
184 " 'hotmail.com!njae.me.uk!1459011600!1459098000.xml': <Element 'feedback' at 0x7f1fd40bbdb8>,\n",
185 " 'yahoo.com!njae.me.uk!1458864000!1458950399.xml': <Element 'feedback' at 0x7f1fd40bf818>,\n",
186 " 'yahoo.com!njae.me.uk!1459036800!1459123199.xml': <Element 'feedback' at 0x7f1fd4043278>}"
187 ]
188 },
189 "execution_count": 9,
190 "metadata": {},
191 "output_type": "execute_result"
192 }
193 ],
194 "source": [
195 "dmarc_reports = {f: xml.etree.ElementTree.fromstring(open(f).read()) for f in xmls}\n",
196 "dmarc_reports"
197 ]
198 },
199 {
200 "cell_type": "code",
201 "execution_count": 10,
202 "metadata": {
203 "collapsed": false
204 },
205 "outputs": [
206 {
207 "name": "stdout",
208 "output_type": "stream",
209 "text": [
210 " report_metadata : \n",
211 " org_name : google.com\n",
212 " email : noreply-dmarc-support@google.com\n",
213 " extra_contact_info : https://support.google.com/a/answer/2466580\n",
214 " report_id : 13032699446207263761\n",
215 " date_range : \n",
216 " begin : 1458777600\n",
217 " end : 1458863999\n",
218 " policy_published : \n",
219 " domain : njae.me.uk\n",
220 " adkim : r\n",
221 " aspf : r\n",
222 " p : none\n",
223 " sp : none\n",
224 " pct : 100\n",
225 " record : \n",
226 " row : \n",
227 " source_ip : 146.185.136.235\n",
228 " count : 1\n",
229 " policy_evaluated : \n",
230 " disposition : none\n",
231 " dkim : pass\n",
232 " spf : fail\n",
233 " identifiers : \n",
234 " header_from : njae.me.uk\n",
235 " auth_results : \n",
236 " dkim : \n",
237 " domain : njae.me.uk\n",
238 " result : pass\n",
239 " spf : \n",
240 " domain : njae.me.uk\n",
241 " result : softfail\n",
242 "\n",
243 " report_metadata : \n",
244 " org_name : Yahoo! Inc.\n",
245 " email : postmaster@dmarc.yahoo.com\n",
246 " report_id : 1459129809.695034\n",
247 " date_range : \n",
248 " begin : 1459036800\n",
249 " end : 1459123199\n",
250 " policy_published : \n",
251 " domain : njae.me.uk\n",
252 " adkim : r\n",
253 " aspf : r\n",
254 " p : none\n",
255 " pct : 100\n",
256 " record : \n",
257 " row : \n",
258 " source_ip : 65.20.0.12\n",
259 " count : 1\n",
260 " policy_evaluated : \n",
261 " disposition : none\n",
262 " dkim : pass\n",
263 " spf : fail\n",
264 " identifiers : \n",
265 " header_from : njae.me.uk\n",
266 " auth_results : \n",
267 " dkim : \n",
268 " domain : njae.me.uk\n",
269 " result : pass\n",
270 " spf : \n",
271 " domain : njae.me.uk\n",
272 " result : softfail\n",
273 "\n",
274 " report_metadata : \n",
275 " org_name : google.com\n",
276 " email : noreply-dmarc-support@google.com\n",
277 " extra_contact_info : https://support.google.com/a/answer/2466580\n",
278 " report_id : 2150510829392606201\n",
279 " date_range : \n",
280 " begin : 1458950400\n",
281 " end : 1459036799\n",
282 " policy_published : \n",
283 " domain : njae.me.uk\n",
284 " adkim : r\n",
285 " aspf : r\n",
286 " p : none\n",
287 " sp : none\n",
288 " pct : 100\n",
289 " record : \n",
290 " row : \n",
291 " source_ip : 82.109.184.9\n",
292 " count : 1\n",
293 " policy_evaluated : \n",
294 " disposition : none\n",
295 " dkim : fail\n",
296 " spf : fail\n",
297 " identifiers : \n",
298 " header_from : njae.me.uk\n",
299 " auth_results : \n",
300 " dkim : \n",
301 " domain : clublloyds.com\n",
302 " result : pass\n",
303 " spf : \n",
304 " domain : clublloyds.com\n",
305 " result : pass\n",
306 " record : \n",
307 " row : \n",
308 " source_ip : 212.69.55.62\n",
309 " count : 1\n",
310 " policy_evaluated : \n",
311 " disposition : none\n",
312 " dkim : pass\n",
313 " spf : pass\n",
314 " identifiers : \n",
315 " header_from : njae.me.uk\n",
316 " auth_results : \n",
317 " dkim : \n",
318 " domain : njae.me.uk\n",
319 " result : pass\n",
320 " spf : \n",
321 " domain : njae.me.uk\n",
322 " result : pass\n",
323 "\n",
324 " report_metadata : \n",
325 " org_name : Microsoft Corp.\n",
326 " email : dmarcrep@microsoft.com\n",
327 " report_id : 68aad5080a774e2c997d159b546569b9@hotmail.com\n",
328 " date_range : \n",
329 " begin : 1459011600\n",
330 " end : 1459098000\n",
331 " policy_published : \n",
332 " domain : njae.me.uk\n",
333 " adkim : r\n",
334 " aspf : r\n",
335 " p : none\n",
336 " sp : none\n",
337 " pct : 100\n",
338 " record : \n",
339 " row : \n",
340 " source_ip : 212.69.55.62\n",
341 " count : 2\n",
342 " policy_evaluated : \n",
343 " disposition : none\n",
344 " dkim : pass\n",
345 " spf : pass\n",
346 " identifiers : \n",
347 " header_from : njae.me.uk\n",
348 " auth_results : \n",
349 " spf : \n",
350 " domain : njae.me.uk\n",
351 " result : pass\n",
352 " dkim : \n",
353 " domain : njae.me.uk\n",
354 " result : pass\n",
355 "\n",
356 " report_metadata : \n",
357 " org_name : Yahoo! Inc.\n",
358 " email : postmaster@dmarc.yahoo.com\n",
359 " report_id : 1458957186.548175\n",
360 " date_range : \n",
361 " begin : 1458864000\n",
362 " end : 1458950399\n",
363 " policy_published : \n",
364 " domain : njae.me.uk\n",
365 " adkim : r\n",
366 " aspf : r\n",
367 " p : none\n",
368 " pct : 100\n",
369 " record : \n",
370 " row : \n",
371 " source_ip : 212.69.55.62\n",
372 " count : 2\n",
373 " policy_evaluated : \n",
374 " disposition : none\n",
375 " dkim : pass\n",
376 " spf : pass\n",
377 " identifiers : \n",
378 " header_from : njae.me.uk\n",
379 " auth_results : \n",
380 " dkim : \n",
381 " domain : njae.me.uk\n",
382 " result : pass\n",
383 " spf : \n",
384 " domain : njae.me.uk\n",
385 " result : pass\n",
386 "\n"
387 ]
388 }
389 ],
390 "source": [
391 "for f in dmarc_reports:\n",
392 " walk(dmarc_reports[f])\n",
393 " print()"
394 ]
395 },
396 {
397 "cell_type": "code",
398 "execution_count": 11,
399 "metadata": {
400 "collapsed": false
401 },
402 "outputs": [
403 {
404 "name": "stdout",
405 "output_type": "stream",
406 "text": [
407 " report_metadata : \n",
408 " org_name : Yahoo! Inc.\n",
409 " email : postmaster@dmarc.yahoo.com\n",
410 " report_id : 1459129809.695034\n",
411 " date_range : \n",
412 " begin : 1459036800\n",
413 " end : 1459123199\n",
414 " policy_published : \n",
415 " domain : njae.me.uk\n",
416 " adkim : r\n",
417 " aspf : r\n",
418 " p : none\n",
419 " pct : 100\n",
420 " record : \n",
421 " row : \n",
422 " source_ip : 65.20.0.12\n",
423 " count : 1\n",
424 " policy_evaluated : \n",
425 " disposition : none\n",
426 " dkim : pass\n",
427 " spf : fail\n",
428 " identifiers : \n",
429 " header_from : njae.me.uk\n",
430 " auth_results : \n",
431 " dkim : \n",
432 " domain : njae.me.uk\n",
433 " result : pass\n",
434 " spf : \n",
435 " domain : njae.me.uk\n",
436 " result : softfail\n"
437 ]
438 }
439 ],
440 "source": [
441 "root2 = xml.etree.ElementTree.fromstring(open('yahoo.com!njae.me.uk!1459036800!1459123199.xml').read())\n",
442 "walk(root2)"
443 ]
444 },
445 {
446 "cell_type": "code",
447 "execution_count": 12,
448 "metadata": {
449 "collapsed": false
450 },
451 "outputs": [],
452 "source": [
453 "conn = psycopg2.connect(host=config['database']['server'],\n",
454 " database=config['database']['database'], \n",
455 " user=config['database']['username'], \n",
456 " password=config['database']['password'])"
457 ]
458 },
459 {
460 "cell_type": "code",
461 "execution_count": 13,
462 "metadata": {
463 "collapsed": false
464 },
465 "outputs": [
466 {
467 "data": {
468 "text/plain": [
469 "<cursor object at 0x7f1fdd2e25e8; closed: 0>"
470 ]
471 },
472 "execution_count": 13,
473 "metadata": {},
474 "output_type": "execute_result"
475 }
476 ],
477 "source": [
478 "cur = conn.cursor()\n",
479 "cur"
480 ]
481 },
482 {
483 "cell_type": "code",
484 "execution_count": 14,
485 "metadata": {
486 "collapsed": false
487 },
488 "outputs": [
489 {
490 "data": {
491 "text/plain": [
492 "{'date_range_begin': 'timestamp',\n",
493 " 'date_range_end': 'timestamp',\n",
494 " 'email': 'varchar',\n",
495 " 'org_name': 'varchar',\n",
496 " 'report_id': 'varchar'}"
497 ]
498 },
499 "execution_count": 14,
500 "metadata": {},
501 "output_type": "execute_result"
502 }
503 ],
504 "source": [
505 "report_fields = {'date_range_begin': 'timestamp', 'date_range_end': 'timestamp'}\n",
506 "for child in root2:\n",
507 " if child.tag == 'report_metadata':\n",
508 " for c in child:\n",
509 " if c.tag != 'date_range':\n",
510 " report_fields[c.tag] = 'varchar'\n",
511 "report_fields"
512 ]
513 },
514 {
515 "cell_type": "code",
516 "execution_count": 15,
517 "metadata": {
518 "collapsed": false
519 },
520 "outputs": [],
521 "source": [
522 "def xpath_of(node, prefix=''):\n",
523 " if len(node) == 0:\n",
524 " if node.text:\n",
525 " nodes = {prefix + node.tag: node.text.strip()}\n",
526 " else:\n",
527 " nodes = {prefix + node.tag: ''}\n",
528 " else:\n",
529 " nodes = {}\n",
530 " if node:\n",
531 " for child in node:\n",
532 " nodes.update(xpath_of(child, \n",
533 " prefix + node.tag + '/'))\n",
534 " return nodes"
535 ]
536 },
537 {
538 "cell_type": "code",
539 "execution_count": 16,
540 "metadata": {
541 "collapsed": false
542 },
543 "outputs": [
544 {
545 "data": {
546 "text/plain": [
547 "{'feedback/policy_published/adkim': 'r',\n",
548 " 'feedback/policy_published/aspf': 'r',\n",
549 " 'feedback/policy_published/domain': 'njae.me.uk',\n",
550 " 'feedback/policy_published/p': 'none',\n",
551 " 'feedback/policy_published/pct': '100',\n",
552 " 'feedback/record/auth_results/dkim/domain': 'njae.me.uk',\n",
553 " 'feedback/record/auth_results/dkim/result': 'pass',\n",
554 " 'feedback/record/auth_results/spf/domain': 'njae.me.uk',\n",
555 " 'feedback/record/auth_results/spf/result': 'softfail',\n",
556 " 'feedback/record/identifiers/header_from': 'njae.me.uk',\n",
557 " 'feedback/record/row/count': '1',\n",
558 " 'feedback/record/row/policy_evaluated/disposition': 'none',\n",
559 " 'feedback/record/row/policy_evaluated/dkim': 'pass',\n",
560 " 'feedback/record/row/policy_evaluated/spf': 'fail',\n",
561 " 'feedback/record/row/source_ip': '65.20.0.12',\n",
562 " 'feedback/report_metadata/date_range/begin': '1459036800',\n",
563 " 'feedback/report_metadata/date_range/end': '1459123199',\n",
564 " 'feedback/report_metadata/email': 'postmaster@dmarc.yahoo.com',\n",
565 " 'feedback/report_metadata/org_name': 'Yahoo! Inc.',\n",
566 " 'feedback/report_metadata/report_id': '1459129809.695034'}"
567 ]
568 },
569 "execution_count": 16,
570 "metadata": {},
571 "output_type": "execute_result"
572 }
573 ],
574 "source": [
575 "xpath_of(root2)"
576 ]
577 },
578 {
579 "cell_type": "code",
580 "execution_count": 17,
581 "metadata": {
582 "collapsed": false
583 },
584 "outputs": [
585 {
586 "data": {
587 "text/plain": [
588 "'1459036800'"
589 ]
590 },
591 "execution_count": 17,
592 "metadata": {},
593 "output_type": "execute_result"
594 }
595 ],
596 "source": [
597 "root2.find('./report_metadata/date_range/begin').text"
598 ]
599 },
600 {
601 "cell_type": "code",
602 "execution_count": 18,
603 "metadata": {
604 "collapsed": false
605 },
606 "outputs": [
607 {
608 "data": {
609 "text/plain": [
610 "'./report_metadata/date_range/begin'"
611 ]
612 },
613 "execution_count": 18,
614 "metadata": {},
615 "output_type": "execute_result"
616 }
617 ],
618 "source": [
619 "re.sub(r'^\\w*(/.*)$', r'.\\1', 'feedback/report_metadata/date_range/begin')"
620 ]
621 },
622 {
623 "cell_type": "code",
624 "execution_count": 19,
625 "metadata": {
626 "collapsed": false
627 },
628 "outputs": [
629 {
630 "data": {
631 "text/plain": [
632 "'report_metadata_date_range_begin'"
633 ]
634 },
635 "execution_count": 19,
636 "metadata": {},
637 "output_type": "execute_result"
638 }
639 ],
640 "source": [
641 "re.sub(r'/', r'_',\n",
642 " re.sub(r'^[^\\/]*/(.*)$', r'\\1', 'feedback/report_metadata/date_range/begin'))"
643 ]
644 },
645 {
646 "cell_type": "code",
647 "execution_count": 20,
648 "metadata": {
649 "collapsed": false,
650 "scrolled": true
651 },
652 "outputs": [
653 {
654 "data": {
655 "text/plain": [
656 "{'./policy_published/adkim': {'pg_field_name': 'policy_published_adkim',\n",
657 " 'pg_table': 'reports',\n",
658 " 'pg_type': 'varchar'},\n",
659 " './policy_published/aspf': {'pg_field_name': 'policy_published_aspf',\n",
660 " 'pg_table': 'reports',\n",
661 " 'pg_type': 'varchar'},\n",
662 " './policy_published/domain': {'pg_field_name': 'policy_published_domain',\n",
663 " 'pg_table': 'reports',\n",
664 " 'pg_type': 'varchar'},\n",
665 " './policy_published/p': {'pg_field_name': 'policy_published_p',\n",
666 " 'pg_table': 'reports',\n",
667 " 'pg_type': 'varchar'},\n",
668 " './policy_published/pct': {'pg_field_name': 'policy_published_pct',\n",
669 " 'pg_table': 'reports',\n",
670 " 'pg_type': 'int'},\n",
671 " './record[{}]/auth_results/dkim/domain': {'pg_field_name': 'auth_results_dkim_domain',\n",
672 " 'pg_table': 'report_items',\n",
673 " 'pg_type': 'varchar'},\n",
674 " './record[{}]/auth_results/dkim/result': {'pg_field_name': 'auth_results_dkim_result',\n",
675 " 'pg_table': 'report_items',\n",
676 " 'pg_type': 'varchar'},\n",
677 " './record[{}]/auth_results/spf/domain': {'pg_field_name': 'auth_results_spf_domain',\n",
678 " 'pg_table': 'report_items',\n",
679 " 'pg_type': 'varchar'},\n",
680 " './record[{}]/auth_results/spf/result': {'pg_field_name': 'auth_results_spf_result',\n",
681 " 'pg_table': 'report_items',\n",
682 " 'pg_type': 'varchar'},\n",
683 " './record[{}]/identifiers/header_from': {'pg_field_name': 'identifiers_header_from',\n",
684 " 'pg_table': 'report_items',\n",
685 " 'pg_type': 'varchar'},\n",
686 " './record[{}]/row/count': {'pg_field_name': 'count',\n",
687 " 'pg_table': 'report_items',\n",
688 " 'pg_type': 'int'},\n",
689 " './record[{}]/row/policy_evaluated/disposition': {'pg_field_name': 'policy_evaluated_disposition',\n",
690 " 'pg_table': 'report_items',\n",
691 " 'pg_type': 'varchar'},\n",
692 " './record[{}]/row/policy_evaluated/dkim': {'pg_field_name': 'policy_evaluated_dkim',\n",
693 " 'pg_table': 'report_items',\n",
694 " 'pg_type': 'varchar'},\n",
695 " './record[{}]/row/policy_evaluated/spf': {'pg_field_name': 'policy_evaluated_spf',\n",
696 " 'pg_table': 'report_items',\n",
697 " 'pg_type': 'varchar'},\n",
698 " './record[{}]/row/source_ip': {'pg_field_name': 'source_ip',\n",
699 " 'pg_table': 'report_items',\n",
700 " 'pg_type': 'inet'},\n",
701 " './report_metadata/date_range/begin': {'pg_field_name': 'report_metadata_date_range_begin',\n",
702 " 'pg_table': 'reports',\n",
703 " 'pg_type': 'timestamp'},\n",
704 " './report_metadata/date_range/end': {'pg_field_name': 'report_metadata_date_range_end',\n",
705 " 'pg_table': 'reports',\n",
706 " 'pg_type': 'timestamp'},\n",
707 " './report_metadata/email': {'pg_field_name': 'report_metadata_email',\n",
708 " 'pg_table': 'reports',\n",
709 " 'pg_type': 'varchar'},\n",
710 " './report_metadata/org_name': {'pg_field_name': 'report_metadata_org_name',\n",
711 " 'pg_table': 'reports',\n",
712 " 'pg_type': 'varchar'},\n",
713 " './report_metadata/report_id': {'pg_field_name': 'report_metadata_report_id',\n",
714 " 'pg_table': 'reports',\n",
715 " 'pg_type': 'varchar'}}"
716 ]
717 },
718 "execution_count": 20,
719 "metadata": {},
720 "output_type": "execute_result"
721 }
722 ],
723 "source": [
724 "field_maps = {re.sub(r'^\\w*(/.*)$', r'.\\1', name): \n",
725 " {'pg_field_name': re.sub(r'/', r'_', re.sub(r'^[^\\/]*/(.*)$', r'\\1', name)),\n",
726 " 'pg_table': 'reports',\n",
727 " 'pg_type': 'varchar'}\n",
728 " for name in xpath_of(root2)\n",
729 " if 'record' not in name}\n",
730 "field_maps.update({re.sub(r'^\\w*/record(/.*)$', r'./record[{}]\\1', name): \n",
731 " {'pg_field_name': re.sub(r'/', r'_', re.sub(r'^.*/record(/row)?/(.*)$', r'\\2', name)),\n",
732 " 'pg_table': 'report_items',\n",
733 " 'pg_type': 'varchar'}\n",
734 " for name in xpath_of(root2)\n",
735 " if 'record' in name})\n",
736 "field_maps['./report_metadata/date_range/begin']['pg_type'] = 'timestamptz'\n",
737 "field_maps['./report_metadata/date_range/end']['pg_type'] = 'timestamptz'\n",
738 "field_maps['./policy_published/pct']['pg_type'] = 'int'\n",
739 "field_maps['./record[{}]/row/count']['pg_type'] = 'int'\n",
740 "field_maps['./record[{}]/row/source_ip']['pg_type'] = 'inet'\n",
741 "\n",
742 "field_maps"
743 ]
744 },
745 {
746 "cell_type": "code",
747 "execution_count": 95,
748 "metadata": {
749 "collapsed": false
750 },
751 "outputs": [
752 {
753 "data": {
754 "text/plain": [
755 "datetime.datetime(2016, 3, 27, 23, 59, 59)"
756 ]
757 },
758 "execution_count": 95,
759 "metadata": {},
760 "output_type": "execute_result"
761 }
762 ],
763 "source": [
764 "datetime.datetime.utcfromtimestamp(1459123199)"
765 ]
766 },
767 {
768 "cell_type": "code",
769 "execution_count": 100,
770 "metadata": {
771 "collapsed": false
772 },
773 "outputs": [
774 {
775 "data": {
776 "text/plain": [
777 "1459119599.0"
778 ]
779 },
780 "execution_count": 100,
781 "metadata": {},
782 "output_type": "execute_result"
783 }
784 ],
785 "source": [
786 "datetime.datetime.utcfromtimestamp(1459123199).timestamp()"
787 ]
788 },
789 {
790 "cell_type": "code",
791 "execution_count": 157,
792 "metadata": {
793 "collapsed": false
794 },
795 "outputs": [
796 {
797 "data": {
798 "text/plain": [
799 "'create table reports (id serial primary key, policy_published_p varchar, report_metadata_date_range_end timestamp, report_metadata_org_name varchar, policy_published_aspf varchar, policy_published_adkim varchar, policy_published_pct int, report_metadata_email varchar, report_metadata_date_range_begin timestamp, report_metadata_report_id varchar, policy_published_domain varchar);'"
800 ]
801 },
802 "execution_count": 157,
803 "metadata": {},
804 "output_type": "execute_result"
805 }
806 ],
807 "source": [
808 "create_report_table_string = 'create table reports (id serial primary key, ' + \\\n",
809 "', '.join(field_maps[p]['pg_field_name'] + ' ' + field_maps[p]['pg_type'] \n",
810 " for p in field_maps if field_maps[p]['pg_table'] == 'reports') + \\\n",
811 "');'\n",
812 "create_report_table_string"
813 ]
814 },
815 {
816 "cell_type": "code",
817 "execution_count": 193,
818 "metadata": {
819 "collapsed": false
820 },
821 "outputs": [
822 {
823 "data": {
824 "text/plain": [
825 "'create table report_items (id serial primary key, report_id integer references reports, auth_results_spf_domain varchar, auth_results_dkim_domain varchar, policy_evaluated_dkim varchar, auth_results_spf_result varchar, source_ip inet, policy_evaluated_spf varchar, count int, identifiers_header_from varchar, policy_evaluated_disposition varchar, auth_results_dkim_result varchar);'"
826 ]
827 },
828 "execution_count": 193,
829 "metadata": {},
830 "output_type": "execute_result"
831 }
832 ],
833 "source": [
834 "create_report_item_table_string = 'create table report_items (id serial primary key, ' + \\\n",
835 " 'report_id integer references reports, ' + \\\n",
836 "', '.join(field_maps[p]['pg_field_name'] + ' ' + field_maps[p]['pg_type'] \n",
837 " for p in field_maps if field_maps[p]['pg_table'] == 'report_items') + \\\n",
838 "');'\n",
839 "create_report_item_table_string"
840 ]
841 },
842 {
843 "cell_type": "code",
844 "execution_count": 117,
845 "metadata": {
846 "collapsed": false
847 },
848 "outputs": [
849 {
850 "data": {
851 "text/plain": [
852 "<cursor object at 0x7fa9a819b7c8; closed: 0>"
853 ]
854 },
855 "execution_count": 117,
856 "metadata": {},
857 "output_type": "execute_result"
858 }
859 ],
860 "source": [
861 "cur"
862 ]
863 },
864 {
865 "cell_type": "code",
866 "execution_count": 194,
867 "metadata": {
868 "collapsed": false
869 },
870 "outputs": [],
871 "source": [
872 "cur.execute(create_report_table_string)"
873 ]
874 },
875 {
876 "cell_type": "code",
877 "execution_count": 195,
878 "metadata": {
879 "collapsed": false
880 },
881 "outputs": [],
882 "source": [
883 "cur.execute(create_report_item_table_string)"
884 ]
885 },
886 {
887 "cell_type": "code",
888 "execution_count": 196,
889 "metadata": {
890 "collapsed": true
891 },
892 "outputs": [],
893 "source": [
894 "conn.commit()"
895 ]
896 },
897 {
898 "cell_type": "code",
899 "execution_count": 128,
900 "metadata": {
901 "collapsed": false
902 },
903 "outputs": [],
904 "source": [
905 "rep = dmarc_reports['google.com!njae.me.uk!1458777600!1458863999.xml']"
906 ]
907 },
908 {
909 "cell_type": "code",
910 "execution_count": 134,
911 "metadata": {
912 "collapsed": false
913 },
914 "outputs": [
915 {
916 "data": {
917 "text/plain": [
918 "'1'"
919 ]
920 },
921 "execution_count": 134,
922 "metadata": {},
923 "output_type": "execute_result"
924 }
925 ],
926 "source": [
927 "rep.find('./record/row/count').text"
928 ]
929 },
930 {
931 "cell_type": "code",
932 "execution_count": 135,
933 "metadata": {
934 "collapsed": false
935 },
936 "outputs": [
937 {
938 "data": {
939 "text/plain": [
940 "[('google.com!njae.me.uk!1458950400!1459036799.xml', '1'),\n",
941 " ('yahoo.com!njae.me.uk!1459036800!1459123199.xml', '1'),\n",
942 " ('yahoo.com!njae.me.uk!1458864000!1458950399.xml', '2'),\n",
943 " ('hotmail.com!njae.me.uk!1459011600!1459098000.xml', '2'),\n",
944 " ('google.com!njae.me.uk!1458777600!1458863999.xml', '1')]"
945 ]
946 },
947 "execution_count": 135,
948 "metadata": {},
949 "output_type": "execute_result"
950 }
951 ],
952 "source": [
953 "[(r, dmarc_reports[r].find('./record/row/count').text) for r in dmarc_reports]"
954 ]
955 },
956 {
957 "cell_type": "code",
958 "execution_count": 136,
959 "metadata": {
960 "collapsed": false
961 },
962 "outputs": [
963 {
964 "name": "stdout",
965 "output_type": "stream",
966 "text": [
967 " report_metadata : \n",
968 " org_name : Microsoft Corp.\n",
969 " email : dmarcrep@microsoft.com\n",
970 " report_id : 68aad5080a774e2c997d159b546569b9@hotmail.com\n",
971 " date_range : \n",
972 " begin : 1459011600\n",
973 " end : 1459098000\n",
974 " policy_published : \n",
975 " domain : njae.me.uk\n",
976 " adkim : r\n",
977 " aspf : r\n",
978 " p : none\n",
979 " sp : none\n",
980 " pct : 100\n",
981 " record : \n",
982 " row : \n",
983 " source_ip : 212.69.55.62\n",
984 " count : 2\n",
985 " policy_evaluated : \n",
986 " disposition : none\n",
987 " dkim : pass\n",
988 " spf : pass\n",
989 " identifiers : \n",
990 " header_from : njae.me.uk\n",
991 " auth_results : \n",
992 " spf : \n",
993 " domain : njae.me.uk\n",
994 " result : pass\n",
995 " dkim : \n",
996 " domain : njae.me.uk\n",
997 " result : pass\n"
998 ]
999 }
1000 ],
1001 "source": [
1002 "walk(dmarc_reports['hotmail.com!njae.me.uk!1459011600!1459098000.xml'])"
1003 ]
1004 },
1005 {
1006 "cell_type": "code",
1007 "execution_count": 137,
1008 "metadata": {
1009 "collapsed": false
1010 },
1011 "outputs": [
1012 {
1013 "name": "stdout",
1014 "output_type": "stream",
1015 "text": [
1016 " report_metadata : \n",
1017 " org_name : google.com\n",
1018 " email : noreply-dmarc-support@google.com\n",
1019 " extra_contact_info : https://support.google.com/a/answer/2466580\n",
1020 " report_id : 2150510829392606201\n",
1021 " date_range : \n",
1022 " begin : 1458950400\n",
1023 " end : 1459036799\n",
1024 " policy_published : \n",
1025 " domain : njae.me.uk\n",
1026 " adkim : r\n",
1027 " aspf : r\n",
1028 " p : none\n",
1029 " sp : none\n",
1030 " pct : 100\n",
1031 " record : \n",
1032 " row : \n",
1033 " source_ip : 82.109.184.9\n",
1034 " count : 1\n",
1035 " policy_evaluated : \n",
1036 " disposition : none\n",
1037 " dkim : fail\n",
1038 " spf : fail\n",
1039 " identifiers : \n",
1040 " header_from : njae.me.uk\n",
1041 " auth_results : \n",
1042 " dkim : \n",
1043 " domain : clublloyds.com\n",
1044 " result : pass\n",
1045 " spf : \n",
1046 " domain : clublloyds.com\n",
1047 " result : pass\n",
1048 " record : \n",
1049 " row : \n",
1050 " source_ip : 212.69.55.62\n",
1051 " count : 1\n",
1052 " policy_evaluated : \n",
1053 " disposition : none\n",
1054 " dkim : pass\n",
1055 " spf : pass\n",
1056 " identifiers : \n",
1057 " header_from : njae.me.uk\n",
1058 " auth_results : \n",
1059 " dkim : \n",
1060 " domain : njae.me.uk\n",
1061 " result : pass\n",
1062 " spf : \n",
1063 " domain : njae.me.uk\n",
1064 " result : pass\n"
1065 ]
1066 }
1067 ],
1068 "source": [
1069 "walk(dmarc_reports['google.com!njae.me.uk!1458950400!1459036799.xml'])"
1070 ]
1071 },
1072 {
1073 "cell_type": "code",
1074 "execution_count": 159,
1075 "metadata": {
1076 "collapsed": false
1077 },
1078 "outputs": [
1079 {
1080 "data": {
1081 "text/plain": [
1082 "['./record[{}]/auth_results/spf/domain',\n",
1083 " './record[{}]/auth_results/dkim/domain',\n",
1084 " './record[{}]/row/policy_evaluated/dkim',\n",
1085 " './record[{}]/auth_results/spf/result',\n",
1086 " './record[{}]/row/source_ip',\n",
1087 " './record[{}]/row/policy_evaluated/spf',\n",
1088 " './record[{}]/row/count',\n",
1089 " './record[{}]/identifiers/header_from',\n",
1090 " './record[{}]/row/policy_evaluated/disposition',\n",
1091 " './record[{}]/auth_results/dkim/result']"
1092 ]
1093 },
1094 "execution_count": 159,
1095 "metadata": {},
1096 "output_type": "execute_result"
1097 }
1098 ],
1099 "source": [
1100 "[p for p in field_maps if field_maps[p]['pg_table'] == 'report_items']"
1101 ]
1102 },
1103 {
1104 "cell_type": "code",
1105 "execution_count": 140,
1106 "metadata": {
1107 "collapsed": false
1108 },
1109 "outputs": [
1110 {
1111 "data": {
1112 "text/plain": [
1113 "2"
1114 ]
1115 },
1116 "execution_count": 140,
1117 "metadata": {},
1118 "output_type": "execute_result"
1119 }
1120 ],
1121 "source": [
1122 "len(dmarc_reports['google.com!njae.me.uk!1458950400!1459036799.xml'].findall('./record'))"
1123 ]
1124 },
1125 {
1126 "cell_type": "code",
1127 "execution_count": 144,
1128 "metadata": {
1129 "collapsed": false
1130 },
1131 "outputs": [
1132 {
1133 "data": {
1134 "text/plain": [
1135 "'212.69.55.62'"
1136 ]
1137 },
1138 "execution_count": 144,
1139 "metadata": {},
1140 "output_type": "execute_result"
1141 }
1142 ],
1143 "source": [
1144 "dmarc_reports['google.com!njae.me.uk!1458950400!1459036799.xml'].find('./record[2]/row/source_ip').text"
1145 ]
1146 },
1147 {
1148 "cell_type": "code",
1149 "execution_count": 153,
1150 "metadata": {
1151 "collapsed": false
1152 },
1153 "outputs": [
1154 {
1155 "data": {
1156 "text/plain": [
1157 "'82.109.184.9'"
1158 ]
1159 },
1160 "execution_count": 153,
1161 "metadata": {},
1162 "output_type": "execute_result"
1163 }
1164 ],
1165 "source": [
1166 "dmarc_reports['google.com!njae.me.uk!1458950400!1459036799.xml'].find('./record[{}]/row/source_ip'.format(1)).text"
1167 ]
1168 },
1169 {
1170 "cell_type": "code",
1171 "execution_count": 163,
1172 "metadata": {
1173 "collapsed": false
1174 },
1175 "outputs": [
1176 {
1177 "name": "stdout",
1178 "output_type": "stream",
1179 "text": [
1180 "policy_published_p :> none\n",
1181 "report_metadata_date_range_end :> 1459036799\n",
1182 "report_metadata_org_name :> google.com\n",
1183 "policy_published_aspf :> r\n",
1184 "policy_published_adkim :> r\n",
1185 "policy_published_pct :> 100\n",
1186 "report_metadata_email :> noreply-dmarc-support@google.com\n",
1187 "report_metadata_date_range_begin :> 1458950400\n",
1188 "report_metadata_report_id :> 2150510829392606201\n",
1189 "policy_published_domain :> njae.me.uk\n"
1190 ]
1191 }
1192 ],
1193 "source": [
1194 "rep = dmarc_reports['google.com!njae.me.uk!1458950400!1459036799.xml']\n",
1195 "for f in [f for f in field_maps if field_maps[f]['pg_table'] == 'reports']:\n",
1196 " print(field_maps[f]['pg_field_name'], ':>', rep.find(f).text)"
1197 ]
1198 },
1199 {
1200 "cell_type": "code",
1201 "execution_count": 165,
1202 "metadata": {
1203 "collapsed": false
1204 },
1205 "outputs": [
1206 {
1207 "name": "stdout",
1208 "output_type": "stream",
1209 "text": [
1210 "1\n",
1211 "auth_results_spf_domain :> clublloyds.com\n",
1212 "auth_results_dkim_domain :> clublloyds.com\n",
1213 "policy_evaluated_dkim :> fail\n",
1214 "auth_results_spf_result :> pass\n",
1215 "source_ip :> 82.109.184.9\n",
1216 "policy_evaluated_spf :> fail\n",
1217 "count :> 1\n",
1218 "identifiers_header_from :> njae.me.uk\n",
1219 "policy_evaluated_disposition :> none\n",
1220 "auth_results_dkim_result :> pass\n",
1221 "2\n",
1222 "auth_results_spf_domain :> njae.me.uk\n",
1223 "auth_results_dkim_domain :> njae.me.uk\n",
1224 "policy_evaluated_dkim :> pass\n",
1225 "auth_results_spf_result :> pass\n",
1226 "source_ip :> 212.69.55.62\n",
1227 "policy_evaluated_spf :> pass\n",
1228 "count :> 1\n",
1229 "identifiers_header_from :> njae.me.uk\n",
1230 "policy_evaluated_disposition :> none\n",
1231 "auth_results_dkim_result :> pass\n"
1232 ]
1233 }
1234 ],
1235 "source": [
1236 "rep = dmarc_reports['google.com!njae.me.uk!1458950400!1459036799.xml']\n",
1237 "for i in range(1, len(rep.findall('./record'))+1):\n",
1238 " print(i)\n",
1239 " for f in [f for f in field_maps if field_maps[f]['pg_table'] == 'report_items']:\n",
1240 " print(field_maps[f]['pg_field_name'], ':>', rep.find(f.format(i)).text)"
1241 ]
1242 },
1243 {
1244 "cell_type": "code",
1245 "execution_count": 166,
1246 "metadata": {
1247 "collapsed": true
1248 },
1249 "outputs": [],
1250 "source": [
1251 "def maybe_strip(text):\n",
1252 " if text:\n",
1253 " return text.strip()\n",
1254 " else:\n",
1255 " return ''"
1256 ]
1257 },
1258 {
1259 "cell_type": "code",
1260 "execution_count": 197,
1261 "metadata": {
1262 "collapsed": false
1263 },
1264 "outputs": [
1265 {
1266 "data": {
1267 "text/plain": [
1268 "('insert into reports (policy_published_p, report_metadata_date_range_end, report_metadata_org_name, policy_published_aspf, policy_published_adkim, policy_published_pct, report_metadata_email, report_metadata_date_range_begin, report_metadata_report_id, policy_published_domain) values (%(policy_published_p)s, %(report_metadata_date_range_end)s, %(report_metadata_org_name)s, %(policy_published_aspf)s, %(policy_published_adkim)s, %(policy_published_pct)s, %(report_metadata_email)s, %(report_metadata_date_range_begin)s, %(report_metadata_report_id)s, %(policy_published_domain)s);',\n",
1269 " {'policy_published_adkim': 'r',\n",
1270 " 'policy_published_aspf': 'r',\n",
1271 " 'policy_published_domain': 'njae.me.uk',\n",
1272 " 'policy_published_p': 'none',\n",
1273 " 'policy_published_pct': 100,\n",
1274 " 'report_metadata_date_range_begin': datetime.datetime(2016, 3, 26, 0, 0),\n",
1275 " 'report_metadata_date_range_end': datetime.datetime(2016, 3, 26, 23, 59, 59),\n",
1276 " 'report_metadata_email': 'noreply-dmarc-support@google.com',\n",
1277 " 'report_metadata_org_name': 'google.com',\n",
1278 " 'report_metadata_report_id': '2150510829392606201'})"
1279 ]
1280 },
1281 "execution_count": 197,
1282 "metadata": {},
1283 "output_type": "execute_result"
1284 }
1285 ],
1286 "source": [
1287 "rep = dmarc_reports['google.com!njae.me.uk!1458950400!1459036799.xml']\n",
1288 "field_names = []\n",
1289 "values = {}\n",
1290 "for f in [f for f in field_maps if field_maps[f]['pg_table'] == 'reports']:\n",
1291 " field_names += [field_maps[f]['pg_field_name']]\n",
1292 " if field_maps[f]['pg_type'] == 'int':\n",
1293 " values[field_maps[f]['pg_field_name']] = int(rep.find(f).text)\n",
1294 " elif field_maps[f]['pg_type'] == 'timestamptz':\n",
1295 " values[field_maps[f]['pg_field_name']] = datetime.datetime.fromtimestamp(int(rep.find(f).text), \n",
1296 " tz=datetime.timezone.utc)\n",
1297 " elif field_maps[f]['pg_type'] == 'inet':\n",
1298 " values[field_maps[f]['pg_field_name']] = maybe_strip(rep.find(f).text)\n",
1299 " else:\n",
1300 " values[field_maps[f]['pg_field_name']] = maybe_strip(rep.find(f).text)\n",
1301 "insert_string = 'insert into reports (' + ', '.join(field_names) +') '\n",
1302 "insert_string += 'values (' + ', '.join('%({})s'.format(f) for f in field_names) + ');'\n",
1303 "insert_string, values"
1304 ]
1305 },
1306 {
1307 "cell_type": "code",
1308 "execution_count": 198,
1309 "metadata": {
1310 "collapsed": false
1311 },
1312 "outputs": [],
1313 "source": [
1314 "cur.execute(insert_string, values)\n",
1315 "conn.commit()"
1316 ]
1317 },
1318 {
1319 "cell_type": "code",
1320 "execution_count": 201,
1321 "metadata": {
1322 "collapsed": false
1323 },
1324 "outputs": [
1325 {
1326 "data": {
1327 "text/plain": [
1328 "1"
1329 ]
1330 },
1331 "execution_count": 201,
1332 "metadata": {},
1333 "output_type": "execute_result"
1334 }
1335 ],
1336 "source": [
1337 "cur.execute('select id, report_metadata_report_id from reports where report_metadata_report_id = %s;', \n",
1338 " [rep.find('./report_metadata/report_id').text])\n",
1339 "cur.fetchall()[0][0]"
1340 ]
1341 },
1342 {
1343 "cell_type": "code",
1344 "execution_count": 203,
1345 "metadata": {
1346 "collapsed": false
1347 },
1348 "outputs": [
1349 {
1350 "name": "stdout",
1351 "output_type": "stream",
1352 "text": [
1353 "1\n",
1354 "insert into report_items (report_id, auth_results_spf_domain, auth_results_dkim_domain, policy_evaluated_dkim, auth_results_spf_result, source_ip, policy_evaluated_spf, count, identifiers_header_from, policy_evaluated_disposition, auth_results_dkim_result) values (%(report_id)s, %(auth_results_spf_domain)s, %(auth_results_dkim_domain)s, %(policy_evaluated_dkim)s, %(auth_results_spf_result)s, %(source_ip)s, %(policy_evaluated_spf)s, %(count)s, %(identifiers_header_from)s, %(policy_evaluated_disposition)s, %(auth_results_dkim_result)s); {'auth_results_spf_result': 'pass', 'count': 1, 'report_id': 1, 'identifiers_header_from': 'njae.me.uk', 'policy_evaluated_disposition': 'none', 'auth_results_dkim_result': 'pass', 'source_ip': '82.109.184.9', 'policy_evaluated_spf': 'fail', 'auth_results_dkim_domain': 'clublloyds.com', 'auth_results_spf_domain': 'clublloyds.com', 'policy_evaluated_dkim': 'fail'}\n",
1355 "2\n",
1356 "insert into report_items (report_id, auth_results_spf_domain, auth_results_dkim_domain, policy_evaluated_dkim, auth_results_spf_result, source_ip, policy_evaluated_spf, count, identifiers_header_from, policy_evaluated_disposition, auth_results_dkim_result) values (%(report_id)s, %(auth_results_spf_domain)s, %(auth_results_dkim_domain)s, %(policy_evaluated_dkim)s, %(auth_results_spf_result)s, %(source_ip)s, %(policy_evaluated_spf)s, %(count)s, %(identifiers_header_from)s, %(policy_evaluated_disposition)s, %(auth_results_dkim_result)s); {'auth_results_spf_result': 'pass', 'count': 1, 'report_id': 1, 'identifiers_header_from': 'njae.me.uk', 'policy_evaluated_disposition': 'none', 'auth_results_dkim_result': 'pass', 'source_ip': '212.69.55.62', 'policy_evaluated_spf': 'pass', 'auth_results_dkim_domain': 'njae.me.uk', 'auth_results_spf_domain': 'njae.me.uk', 'policy_evaluated_dkim': 'pass'}\n"
1357 ]
1358 }
1359 ],
1360 "source": [
1361 "rep = dmarc_reports['google.com!njae.me.uk!1458950400!1459036799.xml']\n",
1362 "for i in range(1, len(rep.findall('./record'))+1):\n",
1363 " print(i)\n",
1364 " field_names = []\n",
1365 " cur.execute('select id, report_metadata_report_id from reports where report_metadata_report_id = %s;', \n",
1366 " [rep.find('./report_metadata/report_id').text])\n",
1367 " results = cur.fetchall()\n",
1368 " if len(results) != 1:\n",
1369 " raise\n",
1370 " else:\n",
1371 " report_id = results[0][0]\n",
1372 " values = {'report_id': report_id}\n",
1373 " for f in [f for f in field_maps if field_maps[f]['pg_table'] == 'report_items']:\n",
1374 " field_names += [field_maps[f]['pg_field_name']]\n",
1375 " if field_maps[f]['pg_type'] == 'int':\n",
1376 " values[field_maps[f]['pg_field_name']] = int(rep.find(f.format(i)).text)\n",
1377 " elif field_maps[f]['pg_type'] == 'timestamp':\n",
1378 " values[field_maps[f]['pg_field_name']] = datetime.datetime.utcfromtimestamp(int(rep.find(f.format(i)).text))\n",
1379 " elif field_maps[f]['pg_type'] == 'inet':\n",
1380 " values[field_maps[f]['pg_field_name']] = maybe_strip(rep.find(f.format(i)).text)\n",
1381 " else:\n",
1382 " values[field_maps[f]['pg_field_name']] = maybe_strip(rep.find(f.format(i)).text)\n",
1383 " insert_string = 'insert into report_items (report_id, ' + ', '.join(field_names) +') '\n",
1384 " insert_string += 'values (%(report_id)s, ' + ', '.join('%({})s'.format(f) for f in field_names) + ');'\n",
1385 " print(insert_string, values)\n",
1386 " cur.execute(insert_string, values)\n",
1387 "conn.commit()"
1388 ]
1389 },
1390 {
1391 "cell_type": "code",
1392 "execution_count": 191,
1393 "metadata": {
1394 "collapsed": true
1395 },
1396 "outputs": [],
1397 "source": [
1398 "conn.rollback()"
1399 ]
1400 },
1401 {
1402 "cell_type": "code",
1403 "execution_count": 209,
1404 "metadata": {
1405 "collapsed": true
1406 },
1407 "outputs": [],
1408 "source": [
1409 "def write_report(connection, cursor, report):\n",
1410 " \n",
1411 " field_names = []\n",
1412 " values = {}\n",
1413 " for f in [f for f in field_maps if field_maps[f]['pg_table'] == 'reports']:\n",
1414 " field_names += [field_maps[f]['pg_field_name']]\n",
1415 " if field_maps[f]['pg_type'] == 'int':\n",
1416 " values[field_maps[f]['pg_field_name']] = int(report.find(f).text)\n",
1417 " elif field_maps[f]['pg_type'] == 'timestamp':\n",
1418 " values[field_maps[f]['pg_field_name']] = datetime.datetime.utcfromtimestamp(int(report.find(f).text))\n",
1419 " elif field_maps[f]['pg_type'] == 'inet':\n",
1420 " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f).text)\n",
1421 " else:\n",
1422 " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f).text)\n",
1423 " insert_string = 'insert into reports (' + ', '.join(field_names) + ') '\n",
1424 " insert_string += 'values (' + ', '.join('%({})s'.format(f) for f in field_names) + ');'\n",
1425 " cursor.execute(insert_string, values)\n",
1426 " \n",
1427 " for i in range(1, len(report.findall('./record'))+1):\n",
1428 " field_names = []\n",
1429 " cursor.execute('select id, report_metadata_report_id from reports where report_metadata_report_id = %s;', \n",
1430 " [report.find('./report_metadata/report_id').text])\n",
1431 " results = cursor.fetchall()\n",
1432 " if len(results) != 1:\n",
1433 " raise\n",
1434 " else:\n",
1435 " report_id = results[0][0]\n",
1436 " values = {'report_id': report_id}\n",
1437 " for f in [f for f in field_maps if field_maps[f]['pg_table'] == 'report_items']:\n",
1438 " field_names += [field_maps[f]['pg_field_name']]\n",
1439 " if field_maps[f]['pg_type'] == 'int':\n",
1440 " values[field_maps[f]['pg_field_name']] = int(report.find(f.format(i)).text)\n",
1441 " elif field_maps[f]['pg_type'] == 'timestamp':\n",
1442 " values[field_maps[f]['pg_field_name']] = datetime.datetime.utcfromtimestamp(int(report.find(f.format(i)).text))\n",
1443 " elif field_maps[f]['pg_type'] == 'inet':\n",
1444 " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f.format(i)).text)\n",
1445 " else:\n",
1446 " values[field_maps[f]['pg_field_name']] = maybe_strip(report.find(f.format(i)).text)\n",
1447 " insert_string = 'insert into report_items (report_id, ' + ', '.join(field_names) + ') '\n",
1448 " insert_string += 'values (%(report_id)s, ' + ', '.join('%({})s'.format(f) for f in field_names) + ');'\n",
1449 " cursor.execute(insert_string, values)\n",
1450 " connection.commit()"
1451 ]
1452 },
1453 {
1454 "cell_type": "code",
1455 "execution_count": 210,
1456 "metadata": {
1457 "collapsed": false
1458 },
1459 "outputs": [],
1460 "source": [
1461 "for rep in dmarc_reports:\n",
1462 " write_report(conn, cur, dmarc_reports[rep])"
1463 ]
1464 },
1465 {
1466 "cell_type": "code",
1467 "execution_count": 211,
1468 "metadata": {
1469 "collapsed": true
1470 },
1471 "outputs": [],
1472 "source": [
1473 "conn.close()"
1474 ]
1475 },
1476 {
1477 "cell_type": "code",
1478 "execution_count": 208,
1479 "metadata": {
1480 "collapsed": true
1481 },
1482 "outputs": [],
1483 "source": [
1484 "conn.rollback()"
1485 ]
1486 },
1487 {
1488 "cell_type": "code",
1489 "execution_count": null,
1490 "metadata": {
1491 "collapsed": true
1492 },
1493 "outputs": [],
1494 "source": []
1495 }
1496 ],
1497 "metadata": {
1498 "kernelspec": {
1499 "display_name": "Python 3",
1500 "language": "python",
1501 "name": "python3"
1502 },
1503 "language_info": {
1504 "codemirror_mode": {
1505 "name": "ipython",
1506 "version": 3
1507 },
1508 "file_extension": ".py",
1509 "mimetype": "text/x-python",
1510 "name": "python",
1511 "nbconvert_exporter": "python",
1512 "pygments_lexer": "ipython3",
1513 "version": "3.4.3+"
1514 }
1515 },
1516 "nbformat": 4,
1517 "nbformat_minor": 0
1518 }