X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=data-analysis%2Fimport-data.ipynb;h=24c5691c597aa2a9f53b6a9bfcaab4f7fb661c2d;hb=256cad02e44f18b53ee0ca8c5a2737b3c1cd71bc;hp=da7863ee62b45b0f1a8540fa1d46ef39e4538635;hpb=2f9c7346690126c33c311b31f2c71d6442753a04;p=ou-summer-of-code-2017.git diff --git a/data-analysis/import-data.ipynb b/data-analysis/import-data.ipynb index da7863e..24c5691 100644 --- a/data-analysis/import-data.ipynb +++ b/data-analysis/import-data.ipynb @@ -2,8 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 20, - "metadata": {}, + "execution_count": 1, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# Import the required libraries and open the connection to Mongo\n", @@ -27,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -41,25 +43,34 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "14.1 Basic CRUD.ipynb\r\n", - "2016J_TMA01_q1-TN.ipynb\r\n", - "2016J_TMA01_q2-withTN.ipynb\r\n", - "import-data-Copy1.ipynb\r\n", + "all-starts-hours.png\r\n", + "attempts-per-day.png\r\n", + "completion-hours.png\r\n", "import-data.ipynb\r\n", + "participated-in-days.png\r\n", + "scores-per-day.png\r\n", "SUMMEROFCODE-Day 0 Warm up holiday prices-grades.csv\r\n", + "SUMMEROFCODE-Day 10 Word search-grades.csv\r\n", "SUMMEROFCODE-Day 1 Choosing a holiday-grades.csv\r\n", "SUMMEROFCODE-Day 2 Lifts-grades.csv\r\n", "SUMMEROFCODE-Day 3 Door codes-grades.csv\r\n", "SUMMEROFCODE-Day 4 Beach labyrinth-grades.csv\r\n", "SUMMEROFCODE-Day 5 Laser display boards-grades.csv\r\n", - "TMA01_Question3_Solutions.ipynb\r\n" + "SUMMEROFCODE-Day 6 Tour guides-grades.csv\r\n", + "SUMMEROFCODE-Day 7 Fixing the minibar-grades.csv\r\n", + "SUMMEROFCODE-Day 8 Visa woes-grades.csv\r\n", + "SUMMEROFCODE-Day 9 Resolving the bill-grades.csv\r\n", + "survey-email_1.csv\r\n", + "survey-email.csv\r\n", + "task-analysis.ipynb\r\n", + "winners.ipynb\r\n" ] } ], @@ -69,33 +80,7 @@ }, { "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['SUMMEROFCODE-Day 0 Warm up holiday prices-grades.csv',\n", - " 'SUMMEROFCODE-Day 1 Choosing a holiday-grades.csv',\n", - " 'SUMMEROFCODE-Day 2 Lifts-grades.csv',\n", - " 'SUMMEROFCODE-Day 3 Door codes-grades.csv',\n", - " 'SUMMEROFCODE-Day 4 Beach labyrinth-grades.csv',\n", - " 'SUMMEROFCODE-Day 5 Laser display boards-grades.csv']" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "csvs = !ls *csv\n", - "csvs" - ] - }, - { - "cell_type": "code", - "execution_count": 114, + "execution_count": 4, "metadata": { "collapsed": true }, @@ -112,8 +97,10 @@ }, { "cell_type": "code", - "execution_count": 109, - "metadata": {}, + "execution_count": 5, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def update_dates():\n", @@ -133,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 6, "metadata": { "collapsed": true }, @@ -150,42 +137,47 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['SUMMEROFCODE-Day 0 Warm up holiday prices-grades.csv',\n", + " 'SUMMEROFCODE-Day 10 Word search-grades.csv',\n", " 'SUMMEROFCODE-Day 1 Choosing a holiday-grades.csv',\n", " 'SUMMEROFCODE-Day 2 Lifts-grades.csv',\n", " 'SUMMEROFCODE-Day 3 Door codes-grades.csv',\n", " 'SUMMEROFCODE-Day 4 Beach labyrinth-grades.csv',\n", - " 'SUMMEROFCODE-Day 5 Laser display boards-grades.csv']" + " 'SUMMEROFCODE-Day 5 Laser display boards-grades.csv',\n", + " 'SUMMEROFCODE-Day 6 Tour guides-grades.csv',\n", + " 'SUMMEROFCODE-Day 7 Fixing the minibar-grades.csv',\n", + " 'SUMMEROFCODE-Day 8 Visa woes-grades.csv',\n", + " 'SUMMEROFCODE-Day 9 Resolving the bill-grades.csv']" ] }, - "execution_count": 111, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "csvs = !ls *csv\n", + "csvs = !ls SUMMEROFCODE*csv\n", "csvs" ] }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 123, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -196,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 132, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -204,29 +196,49 @@ "output_type": "stream", "text": [ "SUMMEROFCODE-Day 0 Warm up holiday prices-grades.csv\n", - "2017-07-22T15:24:32.622+0100\tconnected to: localhost\n", - "2017-07-22T15:24:32.622+0100\tdropping: summerofcode.imp\n", - "2017-07-22T15:24:32.629+0100\timported 285 documents\n", + "2017-08-01T10:27:51.256+0100\tconnected to: localhost\n", + "2017-08-01T10:27:51.256+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:51.285+0100\timported 293 documents\n", + "SUMMEROFCODE-Day 10 Word search-grades.csv\n", + "2017-08-01T10:27:51.655+0100\tconnected to: localhost\n", + "2017-08-01T10:27:51.655+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:51.657+0100\timported 61 documents\n", "SUMMEROFCODE-Day 1 Choosing a holiday-grades.csv\n", - "2017-07-22T15:24:32.971+0100\tconnected to: localhost\n", - "2017-07-22T15:24:32.971+0100\tdropping: summerofcode.imp\n", - "2017-07-22T15:24:32.974+0100\timported 137 documents\n", + "2017-08-01T10:27:51.821+0100\tconnected to: localhost\n", + "2017-08-01T10:27:51.821+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:51.824+0100\timported 153 documents\n", "SUMMEROFCODE-Day 2 Lifts-grades.csv\n", - "2017-07-22T15:24:33.202+0100\tconnected to: localhost\n", - "2017-07-22T15:24:33.202+0100\tdropping: summerofcode.imp\n", - "2017-07-22T15:24:33.204+0100\timported 92 documents\n", + "2017-08-01T10:27:52.064+0100\tconnected to: localhost\n", + "2017-08-01T10:27:52.064+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:52.069+0100\timported 103 documents\n", "SUMMEROFCODE-Day 3 Door codes-grades.csv\n", - "2017-07-22T15:24:33.394+0100\tconnected to: localhost\n", - "2017-07-22T15:24:33.394+0100\tdropping: summerofcode.imp\n", - "2017-07-22T15:24:33.396+0100\timported 77 documents\n", + "2017-08-01T10:27:52.287+0100\tconnected to: localhost\n", + "2017-08-01T10:27:52.287+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:52.290+0100\timported 88 documents\n", "SUMMEROFCODE-Day 4 Beach labyrinth-grades.csv\n", - "2017-07-22T15:24:33.573+0100\tconnected to: localhost\n", - "2017-07-22T15:24:33.573+0100\tdropping: summerofcode.imp\n", - "2017-07-22T15:24:33.575+0100\timported 79 documents\n", + "2017-08-01T10:27:52.485+0100\tconnected to: localhost\n", + "2017-08-01T10:27:52.485+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:52.488+0100\timported 97 documents\n", "SUMMEROFCODE-Day 5 Laser display boards-grades.csv\n", - "2017-07-22T15:24:33.753+0100\tconnected to: localhost\n", - "2017-07-22T15:24:33.753+0100\tdropping: summerofcode.imp\n", - "2017-07-22T15:24:33.755+0100\timported 57 documents\n" + "2017-08-01T10:27:52.693+0100\tconnected to: localhost\n", + "2017-08-01T10:27:52.694+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:52.696+0100\timported 77 documents\n", + "SUMMEROFCODE-Day 6 Tour guides-grades.csv\n", + "2017-08-01T10:27:52.882+0100\tconnected to: localhost\n", + "2017-08-01T10:27:52.882+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:52.885+0100\timported 100 documents\n", + "SUMMEROFCODE-Day 7 Fixing the minibar-grades.csv\n", + "2017-08-01T10:27:53.081+0100\tconnected to: localhost\n", + "2017-08-01T10:27:53.081+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:53.083+0100\timported 54 documents\n", + "SUMMEROFCODE-Day 8 Visa woes-grades.csv\n", + "2017-08-01T10:27:53.255+0100\tconnected to: localhost\n", + "2017-08-01T10:27:53.255+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:53.257+0100\timported 61 documents\n", + "SUMMEROFCODE-Day 9 Resolving the bill-grades.csv\n", + "2017-08-01T10:27:53.421+0100\tconnected to: localhost\n", + "2017-08-01T10:27:53.421+0100\tdropping: summerofcode.imp\n", + "2017-08-01T10:27:53.422+0100\timported 42 documents\n" ] } ], @@ -243,16 +255,16 @@ }, { "cell_type": "code", - "execution_count": 134, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "721" + "1118" ] }, - "execution_count": 134, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -263,27 +275,33 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'_id': 3, 'count': 76},\n", - " {'_id': 2, 'count': 91},\n", - " {'_id': 5, 'count': 56},\n", - " {'_id': 4, 'count': 78},\n", - " {'_id': 1, 'count': 136},\n", - " {'_id': 0, 'count': 284}]" + "[{'_id': 0, 'count': 292},\n", + " {'_id': 1, 'count': 152},\n", + " {'_id': 2, 'count': 102},\n", + " {'_id': 3, 'count': 87},\n", + " {'_id': 4, 'count': 96},\n", + " {'_id': 5, 'count': 76},\n", + " {'_id': 6, 'count': 99},\n", + " {'_id': 7, 'count': 53},\n", + " {'_id': 8, 'count': 60},\n", + " {'_id': 9, 'count': 41},\n", + " {'_id': 10, 'count': 60}]" ] }, - "execution_count": 138, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pipeline = [{\"$group\": {\"_id\": \"$icma_number\", \"count\": {\"$sum\": 1}}}]\n", + "pipeline = [{\"$group\": {\"_id\": \"$icma_number\", \"count\": {\"$sum\": 1}}},\n", + " {'$sort': {'_id': 1}}]\n", "list(icmas.aggregate(pipeline))" ] },