Initial files
authorNeil Smith <neil.git@njae.me.uk>
Thu, 3 Apr 2014 17:30:43 +0000 (18:30 +0100)
committerNeil Smith <neil.git@njae.me.uk>
Thu, 3 Apr 2014 17:30:43 +0000 (18:30 +0100)
data-cleaning.ipynb [new file with mode: 0755]
data-use.ipynb [new file with mode: 0755]
make-small-accidents.ipynb [new file with mode: 0644]

diff --git a/data-cleaning.ipynb b/data-cleaning.ipynb
new file mode 100755 (executable)
index 0000000..30913ac
--- /dev/null
@@ -0,0 +1,219 @@
+{
+ "metadata": {
+  "name": ""
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import pymongo\n",
+      "client = pymongo.MongoClient('mongodb://ogedei:27017/')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "db = client.accidents\n",
+      "accidents = db.accidents\n",
+      "vehicles = db.vehicles\n",
+      "casualties = db.casualties"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "accidents.find().count()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 5,
+       "text": [
+        "1355615"
+       ]
+      }
+     ],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "casualties.find().count()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 9,
+       "text": [
+        "0"
+       ]
+      }
+     ],
+     "prompt_number": 9
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "[accidents.find_one({'Accident_Index':v['Acc_Index']}, ['Accident_Index', 'Date']) \n",
+      " for v in vehicles.find(fields=['Acc_Index'], limit=10)]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 6,
+       "text": [
+        "[]"
+       ]
+      }
+     ],
+     "prompt_number": 6
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Find all the vehicles that don't have an accident\n",
+      "for v in vehicles.find(fields=['Acc_Index']):\n",
+      "    if not accidents.find_one({'Accident_Index': v['Acc_Index']}):\n",
+      "        print(v)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "for v in vehicles.find():\n",
+      "    accidents.update({'Accident_Index': v['Acc_Index']}, {'$push' : {\"Involved_Vehicles\" : v}})"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "for v in vehicles.find():\n",
+      "    accidents.update({'Accident_Index': v['Acc_Index']}, {'$push' : {\"Vehicles\" : v}})"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 8
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "for c in casualties.find():\n",
+      "    accidents.update({'Accident_Index': c['Acc_Index']}, {'$push' : {\"Casualties\" : c}})"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 9
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "accidents.update({\"$exists\": \"Involved_Vehicles\"}, {\"$unset\": {\"Involved_Vehicles\":1}})"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 11,
+       "text": [
+        "{'connectionId': 20, 'err': None, 'n': 0, 'ok': 1.0, 'updatedExisting': False}"
+       ]
+      }
+     ],
+     "prompt_number": 11
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "accidents.find({\"Involved_Vehicles\" : {\"$exists\" : 1}}).count()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 22,
+       "text": [
+        "0"
+       ]
+      }
+     ],
+     "prompt_number": 22
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "accidents.update({\"Involved_Vehicles\" : {\"$exists\" : 1}}, {\"$unset\": {\"Involved_Vehicles\":1}}, multi=True)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 21,
+       "text": [
+        "{'connectionId': 20, 'err': None, 'n': 57, 'ok': 1.0, 'updatedExisting': True}"
+       ]
+      }
+     ],
+     "prompt_number": 21
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file
diff --git a/data-use.ipynb b/data-use.ipynb
new file mode 100755 (executable)
index 0000000..e69de29
diff --git a/make-small-accidents.ipynb b/make-small-accidents.ipynb
new file mode 100644 (file)
index 0000000..8635c80
--- /dev/null
@@ -0,0 +1,168 @@
+{
+ "metadata": {
+  "name": ""
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import pymongo\n",
+      "client = pymongo.MongoClient('mongodb://ogedei:27017/')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Connect to the `accidents` database"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "db = client.accidents\n",
+      "accidents = db.accidents"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "accidents.find().count()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 4,
+       "text": [
+        "1355615"
+       ]
+      }
+     ],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Drop any existing small accident database"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "client.drop_database('asmall')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 21
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Create a new database with a new collection.\n",
+      "\n",
+      "Note that this is created lazily, so neither the database nor the collection will appear on the server until we've put some data in it."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "small_db = client.asmall\n",
+      "small_accidents = small_db.accidents"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 22
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Copy the first 100 accidents across."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "for a in accidents.find(limit=100):\n",
+      "    small_accidents.insert(a)\n",
+      "small_accidents.find().count()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 23,
+       "text": [
+        "100"
+       ]
+      }
+     ],
+     "prompt_number": 23
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Create the index for it."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "small_accidents.create_index('Accident_Index')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 24,
+       "text": [
+        "'Accident_Index_1'"
+       ]
+      }
+     ],
+     "prompt_number": 24
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file