In [1]:
# Import the required libraries and open the connection to Mongo

import collections
from datetime import datetime
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import pandas as pd
import scipy.stats

import pymongo
from bson.objectid import ObjectId
# client = pymongo.MongoClient('mongodb://localhost:27117/')
# client = pymongo.MongoClient('mongodb://ogedei:27017')
client = pymongo.MongoClient('mongodb://localhost:27017')

In [2]:
# Connect to the database
summerofcode_db = client.summerofcode
imp = summerofcode_db.imp
icmas = summerofcode_db.icmas

In [3]:
!ls

all-starts-hours.png
attempts-per-day.png
completion-hours.png
import-data.ipynb
participated-in-days.png
scores-per-day.png
SUMMEROFCODE-Day 0 Warm up holiday prices-grades.csv
SUMMEROFCODE-Day 10 Word search-grades.csv
SUMMEROFCODE-Day 1 Choosing a holiday-grades.csv
SUMMEROFCODE-Day 2 Lifts-grades.csv
SUMMEROFCODE-Day 3 Door codes-grades.csv
SUMMEROFCODE-Day 4 Beach labyrinth-grades.csv
SUMMEROFCODE-Day 5 Laser display boards-grades.csv
SUMMEROFCODE-Day 6 Tour guides-grades.csv
SUMMEROFCODE-Day 7 Fixing the minibar-grades.csv
SUMMEROFCODE-Day 8 Visa woes-grades.csv
SUMMEROFCODE-Day 9 Resolving the bill-grades.csv
task-analysis.ipynb
winners.ipynb


In [4]:
def update_grades():
 for i in imp.find(modifiers={"$snapshot": True}):
 imp.update_one({'_id': i['_id']},
 {'$set': {'grade': i['Grade/10']['00'],
 'q1': i['Q'][' 1 /5']['00'],
 'q2': i['Q'][' 2 /5']['00']},
 '$unset': {'Grade/10': '', 'Q': ''}})

In [5]:
def update_dates():
 for i in imp.find(modifiers={"$snapshot": True}):
 starttime = datetime.strptime(i['Started on'].strip(), '%d %b %Y %H:%M')
 if i['Completed'] == '-':
 imp.update_one({'_id': i['_id']},
 {'$set': {'started': starttime},
 '$unset': {'Started on': '', 'Completed': '', 'Time taken': ''}})
 else:
 endtime = datetime.strptime(i['Completed'].strip(), '%d %b %Y %H:%M')
 imp.update_one({'_id': i['_id']},
 {'$set': {'started': starttime,
 'completed': endtime},
 '$unset': {'Started on': '', 'Completed': '', 'Time taken': ''}})

In [6]:
def merge_imported(icma_number):
 icmas.delete_many({'icma_number': icma_number})

 for i in imp.find(modifiers={"$snapshot": True}):
 del i['_id']
 i['icma_number'] = icma_number
 icmas.insert_one(i)

In [7]:
csvs = !ls SUMMEROFCODE*csv
csvs

['SUMMEROFCODE-Day 0 Warm up holiday prices-grades.csv',
 'SUMMEROFCODE-Day 10 Word search-grades.csv',
 'SUMMEROFCODE-Day 1 Choosing a holiday-grades.csv',
 'SUMMEROFCODE-Day 2 Lifts-grades.csv',
 'SUMMEROFCODE-Day 3 Door codes-grades.csv',
 'SUMMEROFCODE-Day 4 Beach labyrinth-grades.csv',
 'SUMMEROFCODE-Day 5 Laser display boards-grades.csv',
 'SUMMEROFCODE-Day 6 Tour guides-grades.csv',
 'SUMMEROFCODE-Day 7 Fixing the minibar-grades.csv',
 'SUMMEROFCODE-Day 8 Visa woes-grades.csv',
 'SUMMEROFCODE-Day 9 Resolving the bill-grades.csv']

In [8]:
icmas.delete_many({})



In [9]:
for csv in csvs:
 print(csv)
 qcsv = "'{}'".format(csv)
 !mongoimport --drop --db summerofcode --collection imp --type csv --headerline --ignoreBlanks --file {qcsv}
 imp.delete_many({'Surname': 'Overall average'})
 update_grades()
 update_dates()
 merge_imported(int(csv.split()[1]))

SUMMEROFCODE-Day 0 Warm up holiday prices-grades.csv
2017-08-07T10:55:24.121+0100	connected to: localhost
2017-08-07T10:55:24.121+0100	dropping: summerofcode.imp
2017-08-07T10:55:24.190+0100	imported 294 documents
SUMMEROFCODE-Day 10 Word search-grades.csv
2017-08-07T10:55:24.567+0100	connected to: localhost
2017-08-07T10:55:24.567+0100	dropping: summerofcode.imp
2017-08-07T10:55:24.577+0100	imported 64 documents
SUMMEROFCODE-Day 1 Choosing a holiday-grades.csv
2017-08-07T10:55:24.746+0100	connected to: localhost
2017-08-07T10:55:24.746+0100	dropping: summerofcode.imp
2017-08-07T10:55:24.751+0100	imported 158 documents
SUMMEROFCODE-Day 2 Lifts-grades.csv
2017-08-07T10:55:24.990+0100	connected to: localhost
2017-08-07T10:55:24.990+0100	dropping: summerofcode.imp
2017-08-07T10:55:24.993+0100	imported 105 documents
SUMMEROFCODE-Day 3 Door codes-grades.csv
2017-08-07T10:55:25.187+0100	connected to: localhost
2017-08-07T10:55:25.187+0100	dropping: summerofcode.imp
2017-08-07T10:55:25.198+01

In [10]:
icmas.find({}, ['icma']).count()

1131

In [11]:
pipeline = [{"$group": {"_id": "$icma_number", "count": {"$sum": 1}}},
 {'$sort': {'_id': 1}}]
list(icmas.aggregate(pipeline))

[{'_id': 0, 'count': 293},
 {'_id': 1, 'count': 157},
 {'_id': 2, 'count': 104},
 {'_id': 3, 'count': 88},
 {'_id': 4, 'count': 96},
 {'_id': 5, 'count': 76},
 {'_id': 6, 'count': 100},
 {'_id': 7, 'count': 53},
 {'_id': 8, 'count': 60},
 {'_id': 9, 'count': 41},
 {'_id': 10, 'count': 63}]