Imported all the notebooks
[tm351-notebooks.git] / notebooks / 10. Keys and Normalisation / normalisation demo.ipynb
diff --git a/notebooks/10. Keys and Normalisation/normalisation demo.ipynb b/notebooks/10. Keys and Normalisation/normalisation demo.ipynb
new file mode 100644 (file)
index 0000000..ecde4cd
--- /dev/null
@@ -0,0 +1,3231 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:05ab0f6d326d0cdd26a6cc999707fac5ac79983144798a0a865c2312c35bad99"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "heading",
+     "level": 1,
+     "metadata": {},
+     "source": [
+      "Normalisation Demo - Prescriptions"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "A worked example of how to normalise the example prescription data, using prescription data records of the following form:\n",
+      "\n",
+      "![Example of a patient record](images/tm351-patient_record.png)"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import pandas as pd"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Unnormalised Form (UNF)"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Read in the data file\n",
+      "df=pd.read_csv('data/normalisation-prescription.csv')\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>patient_name</th>\n",
+        "      <th>doctor_id</th>\n",
+        "      <th>doctor_name</th>\n",
+        "      <th>date</th>\n",
+        "      <th>drug_code</th>\n",
+        "      <th>drug_name</th>\n",
+        "      <th>dosage</th>\n",
+        "      <th>duration</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> T02378</td>\n",
+        "      <td>      Tramadol</td>\n",
+        "      <td>  50 mg 3 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>  NaN</td>\n",
+        "      <td>      NaN</td>\n",
+        "      <td> NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> O17663</td>\n",
+        "      <td>    Omeprazole</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>  NaN</td>\n",
+        "      <td>      NaN</td>\n",
+        "      <td> NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td> 23-May-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td>  NaN</td>\n",
+        "      <td>      NaN</td>\n",
+        "      <td> NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td> 15-Jun-14</td>\n",
+        "      <td> A12458</td>\n",
+        "      <td> Amitriptyline</td>\n",
+        "      <td>  10 mg 5 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> C31319</td>\n",
+        "      <td> Ciprofloxacin</td>\n",
+        "      <td> 500 mg 2 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td>  NaN</td>\n",
+        "      <td>      NaN</td>\n",
+        "      <td> NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> T05223</td>\n",
+        "      <td>    Tamsulosin</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td>  NaN</td>\n",
+        "      <td>      NaN</td>\n",
+        "      <td> NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td>  20 mg 1 x day</td>\n",
+        "      <td>     6 weeks</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 88,
+       "text": [
+        "  patient_id patient_name doctor_id doctor_name       date drug_code  \\\n",
+        "0       p001     Thornton       d06      Gibson  15-May-14    T02378   \n",
+        "1        NaN          NaN       NaN         NaN  15-May-14    O17663   \n",
+        "2        NaN          NaN       NaN         NaN  23-May-14    S33558   \n",
+        "3        NaN          NaN       NaN         NaN  15-Jun-14    A12458   \n",
+        "4       p007      Tennent       d07      Paxton  01-Jun-14    C31319   \n",
+        "5        NaN          NaN       NaN         NaN  01-Jun-14    T05223   \n",
+        "6        NaN          NaN       NaN         NaN  01-Jul-14    S33558   \n",
+        "\n",
+        "       drug_name          dosage     duration  \n",
+        "0       Tramadol   50 mg 3 x day  As required  \n",
+        "1     Omeprazole   40 mg 1 x day        Daily  \n",
+        "2    Simvastatin   40 mg 1 x day        Daily  \n",
+        "3  Amitriptyline   10 mg 5 x day  As required  \n",
+        "4  Ciprofloxacin  500 mg 2 x day      20 days  \n",
+        "5     Tamsulosin   40 mg 1 x day      20 days  \n",
+        "6    Simvastatin   20 mg 1 x day      6 weeks  "
+       ]
+      }
+     ],
+     "prompt_number": 88
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Note that there are cells in the table with null values that should actually be read as having the value of the previously populated cell.\n",
+      "\n",
+      "Note that the contents of these empty cells is thus highly dependent on the order of the rows in the table. Maintaining such a table by hand in an spreadsheet could thus be prone to significant errors. If new precscription items are added to each patient's record at the bottom of their precscription list, it could be easy to make a mistake in making sure the prescription is added to the correct person's list, especially if the list is long and you can't accurately see whose list you are adding a new item too (for example, if their name has scrolled off the top of the page).\n"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Let's tidy up the data by filing in the blanks. As the `.fillna()` documentation describes, the *forward fill* method (`ffill`) \"can propagate [the] last valid observation forward to next valid [one]\"."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df.fillna(method='ffill', inplace=True)\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>patient_name</th>\n",
+        "      <th>doctor_id</th>\n",
+        "      <th>doctor_name</th>\n",
+        "      <th>date</th>\n",
+        "      <th>drug_code</th>\n",
+        "      <th>drug_name</th>\n",
+        "      <th>dosage</th>\n",
+        "      <th>duration</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> T02378</td>\n",
+        "      <td>      Tramadol</td>\n",
+        "      <td>  50 mg 3 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> O17663</td>\n",
+        "      <td>    Omeprazole</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "      <td> 23-May-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "      <td> 15-Jun-14</td>\n",
+        "      <td> A12458</td>\n",
+        "      <td> Amitriptyline</td>\n",
+        "      <td>  10 mg 5 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> C31319</td>\n",
+        "      <td> Ciprofloxacin</td>\n",
+        "      <td> 500 mg 2 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> T05223</td>\n",
+        "      <td>    Tamsulosin</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td>  20 mg 1 x day</td>\n",
+        "      <td>     6 weeks</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 89,
+       "text": [
+        "  patient_id patient_name doctor_id doctor_name       date drug_code  \\\n",
+        "0       p001     Thornton       d06      Gibson  15-May-14    T02378   \n",
+        "1       p001     Thornton       d06      Gibson  15-May-14    O17663   \n",
+        "2       p001     Thornton       d06      Gibson  23-May-14    S33558   \n",
+        "3       p001     Thornton       d06      Gibson  15-Jun-14    A12458   \n",
+        "4       p007      Tennent       d07      Paxton  01-Jun-14    C31319   \n",
+        "5       p007      Tennent       d07      Paxton  01-Jun-14    T05223   \n",
+        "6       p007      Tennent       d07      Paxton  01-Jul-14    S33558   \n",
+        "\n",
+        "       drug_name          dosage     duration  \n",
+        "0       Tramadol   50 mg 3 x day  As required  \n",
+        "1     Omeprazole   40 mg 1 x day        Daily  \n",
+        "2    Simvastatin   40 mg 1 x day        Daily  \n",
+        "3  Amitriptyline   10 mg 5 x day  As required  \n",
+        "4  Ciprofloxacin  500 mg 2 x day      20 days  \n",
+        "5     Tamsulosin   40 mg 1 x day      20 days  \n",
+        "6    Simvastatin   20 mg 1 x day      6 weeks  "
+       ]
+      }
+     ],
+     "prompt_number": 89
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "First Normal Form (1NF)"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "<blockquote>To represent the data in 1NF we remove any repeating groups of data to separate relations, and choose a primary key for each new relation. A repeating group of data is defined as any attribute or group of attributes that may occur with multiple values for a single value of the primary key.</blockquote>\n",
+      "\n",
+      "So what data elements repeat?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "for c in df.columns:\n",
+      "    print(c,df[c].value_counts(),sep='\\n',end='\\n\\n')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "patient_id\n",
+        "p001    4\n",
+        "p007    3\n",
+        "dtype: int64\n",
+        "\n",
+        "patient_name\n",
+        "Thornton    4\n",
+        "Tennent     3\n",
+        "dtype: int64\n",
+        "\n",
+        "doctor_id\n",
+        "d06    4\n",
+        "d07    3\n",
+        "dtype: int64\n",
+        "\n",
+        "doctor_name\n",
+        "Gibson    4\n",
+        "Paxton    3\n",
+        "dtype: int64\n",
+        "\n",
+        "date\n",
+        "15-May-14    2\n",
+        "01-Jun-14    2\n",
+        "01-Jul-14    1\n",
+        "15-Jun-14    1\n",
+        "23-May-14    1\n",
+        "dtype: int64\n",
+        "\n",
+        "drug_code\n",
+        "S33558    2\n",
+        "C31319    1\n",
+        "T02378    1\n",
+        "T05223    1\n",
+        "O17663    1\n",
+        "A12458    1\n",
+        "dtype: int64\n",
+        "\n",
+        "drug_name\n",
+        "Simvastatin      2\n",
+        "Tramadol         1\n",
+        "Ciprofloxacin    1\n",
+        "Tamsulosin       1\n",
+        "Omeprazole       1\n",
+        "Amitriptyline    1\n",
+        "dtype: int64\n",
+        "\n",
+        "dosage\n",
+        "40 mg 1 x day     3\n",
+        "500 mg 2 x day    1\n",
+        "20 mg 1 x day     1\n",
+        "10 mg 5 x day     1\n",
+        "50 mg 3 x day     1\n",
+        "dtype: int64\n",
+        "\n",
+        "duration\n",
+        "As required    2\n",
+        "Daily          2\n",
+        "20 days        2\n",
+        "6 weeks        1\n",
+        "dtype: int64\n",
+        "\n"
+       ]
+      }
+     ],
+     "prompt_number": 90
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "By inspection, some of the columns appear to have similar structures, based on the counts of unique items.\n",
+      "\n",
+      "For example, the `patient_id`, `patient_name`, `doctor_id` and `doctor_name` tables each contain two unique values, with  4 occurrences of one value and 3 of the other. Let's separate them out into another table."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df_1a=df[['patient_id', 'patient_name', 'doctor_id','doctor_name']]\n",
+      "df_1a"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>patient_name</th>\n",
+        "      <th>doctor_id</th>\n",
+        "      <th>doctor_name</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 39,
+       "text": [
+        "  patient_id patient_name doctor_id doctor_name\n",
+        "0       p001     Thornton       d06      Gibson\n",
+        "1       p001     Thornton       d06      Gibson\n",
+        "2       p001     Thornton       d06      Gibson\n",
+        "3       p001     Thornton       d06      Gibson\n",
+        "4       p007      Tennent       d07      Paxton\n",
+        "5       p007      Tennent       d07      Paxton\n",
+        "6       p007      Tennent       d07      Paxton"
+       ]
+      }
+     ],
+     "prompt_number": 39
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "We actually want to retain the unique combinations of these."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df_1a=df_1a.drop_duplicates()\n",
+      "df_1a"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>patient_name</th>\n",
+        "      <th>doctor_id</th>\n",
+        "      <th>doctor_name</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 40,
+       "text": [
+        "  patient_id patient_name doctor_id doctor_name\n",
+        "0       p001     Thornton       d06      Gibson\n",
+        "4       p007      Tennent       d07      Paxton"
+       ]
+      }
+     ],
+     "prompt_number": 40
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "We need to retain one of these columns as a key element in the actual prescriptions table. As the prescriptions are applied to patients, it perhaps makes sense to use a unique paitent identifier as the link which is to say, `patient id`. Let's create a new table by dropping the `patient_name`, `doctor_id` and `doctor_name` columns from the original table, but retaining the  `patient id` column and the other columns relating to the prescription."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df_1b=df.drop(['patient_name', 'doctor_id','doctor_name'], 1)\n",
+      "df_1b"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>date</th>\n",
+        "      <th>drug_code</th>\n",
+        "      <th>drug_name</th>\n",
+        "      <th>dosage</th>\n",
+        "      <th>duration</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> T02378</td>\n",
+        "      <td>      Tramadol</td>\n",
+        "      <td>  50 mg 3 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> O17663</td>\n",
+        "      <td>    Omeprazole</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 23-May-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-Jun-14</td>\n",
+        "      <td> A12458</td>\n",
+        "      <td> Amitriptyline</td>\n",
+        "      <td>  10 mg 5 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> C31319</td>\n",
+        "      <td> Ciprofloxacin</td>\n",
+        "      <td> 500 mg 2 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> T05223</td>\n",
+        "      <td>    Tamsulosin</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td>  20 mg 1 x day</td>\n",
+        "      <td>     6 weeks</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 36,
+       "text": [
+        "  patient_id       date drug_code      drug_name          dosage     duration\n",
+        "0       p001  15-May-14    T02378       Tramadol   50 mg 3 x day  As required\n",
+        "1       p001  15-May-14    O17663     Omeprazole   40 mg 1 x day        Daily\n",
+        "2       p001  23-May-14    S33558    Simvastatin   40 mg 1 x day        Daily\n",
+        "3       p001  15-Jun-14    A12458  Amitriptyline   10 mg 5 x day  As required\n",
+        "4       p007  01-Jun-14    C31319  Ciprofloxacin  500 mg 2 x day      20 days\n",
+        "5       p007  01-Jun-14    T05223     Tamsulosin   40 mg 1 x day      20 days\n",
+        "6       p007  01-Jul-14    S33558    Simvastatin   20 mg 1 x day      6 weeks"
+       ]
+      }
+     ],
+     "prompt_number": 36
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "<blockquote>As both new relations have an attribute in common, patient_id, the original relation can be recreated from these relations by performing a join operation on patient_id.</blockquote>\n"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pd.merge(df_1a,df_1b,on='patient_id')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>patient_name</th>\n",
+        "      <th>doctor_id</th>\n",
+        "      <th>doctor_name</th>\n",
+        "      <th>date</th>\n",
+        "      <th>drug_code</th>\n",
+        "      <th>drug_name</th>\n",
+        "      <th>dosage</th>\n",
+        "      <th>duration</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> T02378</td>\n",
+        "      <td>      Tramadol</td>\n",
+        "      <td>  50 mg 3 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> O17663</td>\n",
+        "      <td>    Omeprazole</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "      <td> 23-May-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "      <td> 15-Jun-14</td>\n",
+        "      <td> A12458</td>\n",
+        "      <td> Amitriptyline</td>\n",
+        "      <td>  10 mg 5 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> C31319</td>\n",
+        "      <td> Ciprofloxacin</td>\n",
+        "      <td> 500 mg 2 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> T05223</td>\n",
+        "      <td>    Tamsulosin</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td>  20 mg 1 x day</td>\n",
+        "      <td>     6 weeks</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 75,
+       "text": [
+        "  patient_id patient_name doctor_id doctor_name       date drug_code  \\\n",
+        "0       p001     Thornton       d06      Gibson  15-May-14    T02378   \n",
+        "1       p001     Thornton       d06      Gibson  15-May-14    O17663   \n",
+        "2       p001     Thornton       d06      Gibson  23-May-14    S33558   \n",
+        "3       p001     Thornton       d06      Gibson  15-Jun-14    A12458   \n",
+        "4       p007      Tennent       d07      Paxton  01-Jun-14    C31319   \n",
+        "5       p007      Tennent       d07      Paxton  01-Jun-14    T05223   \n",
+        "6       p007      Tennent       d07      Paxton  01-Jul-14    S33558   \n",
+        "\n",
+        "       drug_name          dosage     duration  \n",
+        "0       Tramadol   50 mg 3 x day  As required  \n",
+        "1     Omeprazole   40 mg 1 x day        Daily  \n",
+        "2    Simvastatin   40 mg 1 x day        Daily  \n",
+        "3  Amitriptyline   10 mg 5 x day  As required  \n",
+        "4  Ciprofloxacin  500 mg 2 x day      20 days  \n",
+        "5     Tamsulosin   40 mg 1 x day      20 days  \n",
+        "6    Simvastatin   20 mg 1 x day      6 weeks  "
+       ]
+      }
+     ],
+     "prompt_number": 75
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Second Normal Form (2NF)"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "<blockquote>In the first of the two 1NF relations shown above, the combination of patient_id, date and drug_code attributes together determine the dosage and duration attributes, but only drug_code determines drug_name. Thus, drug_name is removed from the relation, and drug_code and drug_name form a new relation, with drug_code as the primary key.</blockquote>\n"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Split out the drug code and drug name\n",
+      "df_2a=df_1b[['drug_code','drug_name']].drop_duplicates()\n",
+      "df_2a"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>drug_code</th>\n",
+        "      <th>drug_name</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> T02378</td>\n",
+        "      <td>      Tramadol</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> O17663</td>\n",
+        "      <td>    Omeprazole</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> A12458</td>\n",
+        "      <td> Amitriptyline</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> C31319</td>\n",
+        "      <td> Ciprofloxacin</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> T05223</td>\n",
+        "      <td>    Tamsulosin</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 42,
+       "text": [
+        "  drug_code      drug_name\n",
+        "0    T02378       Tramadol\n",
+        "1    O17663     Omeprazole\n",
+        "2    S33558    Simvastatin\n",
+        "3    A12458  Amitriptyline\n",
+        "4    C31319  Ciprofloxacin\n",
+        "5    T05223     Tamsulosin"
+       ]
+      }
+     ],
+     "prompt_number": 42
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Remove the drug name from table\n",
+      "df_2b=df_1b.drop(['drug_name'], 1)\n",
+      "df_2b"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>date</th>\n",
+        "      <th>drug_code</th>\n",
+        "      <th>dosage</th>\n",
+        "      <th>duration</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> T02378</td>\n",
+        "      <td>  50 mg 3 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> O17663</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 23-May-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-Jun-14</td>\n",
+        "      <td> A12458</td>\n",
+        "      <td>  10 mg 5 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> C31319</td>\n",
+        "      <td> 500 mg 2 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> T05223</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>  20 mg 1 x day</td>\n",
+        "      <td>     6 weeks</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 44,
+       "text": [
+        "  patient_id       date drug_code          dosage     duration\n",
+        "0       p001  15-May-14    T02378   50 mg 3 x day  As required\n",
+        "1       p001  15-May-14    O17663   40 mg 1 x day        Daily\n",
+        "2       p001  23-May-14    S33558   40 mg 1 x day        Daily\n",
+        "3       p001  15-Jun-14    A12458   10 mg 5 x day  As required\n",
+        "4       p007  01-Jun-14    C31319  500 mg 2 x day      20 days\n",
+        "5       p007  01-Jun-14    T05223   40 mg 1 x day      20 days\n",
+        "6       p007  01-Jul-14    S33558   20 mg 1 x day      6 weeks"
+       ]
+      }
+     ],
+     "prompt_number": 44
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "<blockquote>As both new relations have an attribute in common, drug_code, the original relation can be recreated from these relations by performing a join operation on drug_code.</blockquote>"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Test the join\n",
+      "pd.merge(df_2a,df_2b,on='drug_code')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>drug_code</th>\n",
+        "      <th>drug_name</th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>date</th>\n",
+        "      <th>dosage</th>\n",
+        "      <th>duration</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> T02378</td>\n",
+        "      <td>      Tramadol</td>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td>  50 mg 3 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> O17663</td>\n",
+        "      <td>    Omeprazole</td>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td> p001</td>\n",
+        "      <td> 23-May-14</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td>  20 mg 1 x day</td>\n",
+        "      <td>     6 weeks</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> A12458</td>\n",
+        "      <td> Amitriptyline</td>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-Jun-14</td>\n",
+        "      <td>  10 mg 5 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> C31319</td>\n",
+        "      <td> Ciprofloxacin</td>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> 500 mg 2 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> T05223</td>\n",
+        "      <td>    Tamsulosin</td>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 76,
+       "text": [
+        "  drug_code      drug_name patient_id       date          dosage     duration\n",
+        "0    T02378       Tramadol       p001  15-May-14   50 mg 3 x day  As required\n",
+        "1    O17663     Omeprazole       p001  15-May-14   40 mg 1 x day        Daily\n",
+        "2    S33558    Simvastatin       p001  23-May-14   40 mg 1 x day        Daily\n",
+        "3    S33558    Simvastatin       p007  01-Jul-14   20 mg 1 x day      6 weeks\n",
+        "4    A12458  Amitriptyline       p001  15-Jun-14   10 mg 5 x day  As required\n",
+        "5    C31319  Ciprofloxacin       p007  01-Jun-14  500 mg 2 x day      20 days\n",
+        "6    T05223     Tamsulosin       p007  01-Jun-14   40 mg 1 x day      20 days"
+       ]
+      }
+     ],
+     "prompt_number": 76
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Third Normal Form (3NF)"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "<blockquote>To represent the data in 3NF we remove any attributes that are not directly dependent upon the primary key to separate relations, and choose a primary key for each new relation.\n",
+      "<br/>\n",
+      "In the second of the two 1NF relations shown above, the patient_name and doctor_id attributes are all directly dependent on patient_id but, doctor_name is directly dependent on doctor_id not patient_id. Therefore create a new relation from doctor_id and doctor_name where doctor_id is the primary key. The doctor_id remains in the original relation, as its value is determined by patient_id and where it acts as a foreign key referencing the new relation.</blockquote>"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Separate out the doctor and patient details\n",
+      "df_3a=df_1a[['doctor_id','doctor_name']]\n",
+      "df_3a"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>doctor_id</th>\n",
+        "      <th>doctor_name</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 47,
+       "text": [
+        "  doctor_id doctor_name\n",
+        "0       d06      Gibson\n",
+        "4       d07      Paxton"
+       ]
+      }
+     ],
+     "prompt_number": 47
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df_3b=df_1a.drop(['doctor_name'],1)\n",
+      "df_3b"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>patient_name</th>\n",
+        "      <th>doctor_id</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 49,
+       "text": [
+        "  patient_id patient_name doctor_id\n",
+        "0       p001     Thornton       d06\n",
+        "4       p007      Tennent       d07"
+       ]
+      }
+     ],
+     "prompt_number": 49
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "<blockquote>As both new relations have an attribute in common, doctor_id, the original relation can be recreated from these relations by performing a join operation on doctor_id.</blockquote>"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pd.merge(df_3a,df_3b,on='doctor_id')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>doctor_id</th>\n",
+        "      <th>doctor_name</th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>patient_name</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 78,
+       "text": [
+        "  doctor_id doctor_name patient_id patient_name\n",
+        "0       d06      Gibson       p001     Thornton\n",
+        "1       d07      Paxton       p007      Tennent"
+       ]
+      }
+     ],
+     "prompt_number": 78
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "*That's where the example stops. Don't we need to go a step further?*"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df_3c=df_3b[['patient_id','patient_name']]\n",
+      "df_3c"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>patient_name</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 51,
+       "text": [
+        "  patient_id patient_name\n",
+        "0       p001     Thornton\n",
+        "4       p007      Tennent"
+       ]
+      }
+     ],
+     "prompt_number": 51
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df_3d=df_3b.drop(['patient_name'],1)\n",
+      "df_3d"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>doctor_id</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> d06</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td> d07</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 52,
+       "text": [
+        "  patient_id doctor_id\n",
+        "0       p001       d06\n",
+        "4       p007       d07"
+       ]
+      }
+     ],
+     "prompt_number": 52
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Making a Function of It..."
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Looking at the steps abovem can we make a function to help perform some of the above operations?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "def tableNorming(df,newTableCols,keyCol):\n",
+      "    df1=df[newTableCols].drop_duplicates()\n",
+      "    df2=df.drop(set(newTableCols)-set([keyCol]),1)\n",
+      "    return df1,df2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 70
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "a,b=tableNorming(df,['patient_id', 'patient_name', 'doctor_id','doctor_name'],'patient_id')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 72
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "a"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>patient_name</th>\n",
+        "      <th>doctor_id</th>\n",
+        "      <th>doctor_name</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> Thornton</td>\n",
+        "      <td> d06</td>\n",
+        "      <td> Gibson</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td>  Tennent</td>\n",
+        "      <td> d07</td>\n",
+        "      <td> Paxton</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 73,
+       "text": [
+        "  patient_id patient_name doctor_id doctor_name\n",
+        "0       p001     Thornton       d06      Gibson\n",
+        "4       p007      Tennent       d07      Paxton"
+       ]
+      }
+     ],
+     "prompt_number": 73
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "b"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>date</th>\n",
+        "      <th>drug_code</th>\n",
+        "      <th>drug_name</th>\n",
+        "      <th>dosage</th>\n",
+        "      <th>duration</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> T02378</td>\n",
+        "      <td>      Tramadol</td>\n",
+        "      <td>  50 mg 3 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-May-14</td>\n",
+        "      <td> O17663</td>\n",
+        "      <td>    Omeprazole</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 23-May-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>       Daily</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> p001</td>\n",
+        "      <td> 15-Jun-14</td>\n",
+        "      <td> A12458</td>\n",
+        "      <td> Amitriptyline</td>\n",
+        "      <td>  10 mg 5 x day</td>\n",
+        "      <td> As required</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> C31319</td>\n",
+        "      <td> Ciprofloxacin</td>\n",
+        "      <td> 500 mg 2 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jun-14</td>\n",
+        "      <td> T05223</td>\n",
+        "      <td>    Tamsulosin</td>\n",
+        "      <td>  40 mg 1 x day</td>\n",
+        "      <td>     20 days</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> p007</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> S33558</td>\n",
+        "      <td>   Simvastatin</td>\n",
+        "      <td>  20 mg 1 x day</td>\n",
+        "      <td>     6 weeks</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 74,
+       "text": [
+        "  patient_id       date drug_code      drug_name          dosage     duration\n",
+        "0       p001  15-May-14    T02378       Tramadol   50 mg 3 x day  As required\n",
+        "1       p001  15-May-14    O17663     Omeprazole   40 mg 1 x day        Daily\n",
+        "2       p001  23-May-14    S33558    Simvastatin   40 mg 1 x day        Daily\n",
+        "3       p001  15-Jun-14    A12458  Amitriptyline   10 mg 5 x day  As required\n",
+        "4       p007  01-Jun-14    C31319  Ciprofloxacin  500 mg 2 x day      20 days\n",
+        "5       p007  01-Jun-14    T05223     Tamsulosin   40 mg 1 x day      20 days\n",
+        "6       p007  01-Jul-14    S33558    Simvastatin   20 mg 1 x day      6 weeks"
+       ]
+      }
+     ],
+     "prompt_number": 74
+    },
+    {
+     "cell_type": "heading",
+     "level": 1,
+     "metadata": {},
+     "source": [
+      "Exercise - Invoice Data"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Data from a dataset generated from invoice records taking the following form:\n",
+      "\n",
+      "![Example inovice record](images/tm351-invoice.png)\n",
+      "\n",
+      "is presented in an unnormalised form:"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Read in the data file\n",
+      "ex1=pd.read_csv('data/normalisation-books.csv')\n",
+      "ex1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>invoice_no</th>\n",
+        "      <th>date</th>\n",
+        "      <th>customer_no</th>\n",
+        "      <th>customer_name</th>\n",
+        "      <th>isbn</th>\n",
+        "      <th>title</th>\n",
+        "      <th>quantity</th>\n",
+        "      <th>cost</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 978-1292025827</td>\n",
+        "      <td>    A First Course in Database Systems</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a310.00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>       NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>          NaN</td>\n",
+        "      <td> 978-1558604568</td>\n",
+        "      <td>                              SQL:1999</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a354.99</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>       NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>          NaN</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td> 10</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>       NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>          NaN</td>\n",
+        "      <td> 978-0130402646</td>\n",
+        "      <td>        Database System Implementation</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a348.78</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>       NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>          NaN</td>\n",
+        "      <td> 978-1852330088</td>\n",
+        "      <td> A Guided Tour of Relational Databases</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a341.69</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 234678</td>\n",
+        "      <td>   Roger Monk</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td>  1</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>       NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>          NaN</td>\n",
+        "      <td> 978-0471141617</td>\n",
+        "      <td>           Building the Data Warehouse</td>\n",
+        "      <td>  1</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>7</th>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>       NaN</td>\n",
+        "      <td>    NaN</td>\n",
+        "      <td>          NaN</td>\n",
+        "      <td> 978-1558604896</td>\n",
+        "      <td>  Data Mining: Concepts and Techniques</td>\n",
+        "      <td>  1</td>\n",
+        "      <td> \u00a318.55</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 85,
+       "text": [
+        "   invoice_no       date  customer_no customer_name            isbn  \\\n",
+        "0      966047  01-Jul-14       123789  Dimity Stone  978-1292025827   \n",
+        "1         NaN        NaN          NaN           NaN  978-1558604568   \n",
+        "2         NaN        NaN          NaN           NaN  978-0071005296   \n",
+        "3         NaN        NaN          NaN           NaN  978-0130402646   \n",
+        "4         NaN        NaN          NaN           NaN  978-1852330088   \n",
+        "5      966048  01-Jul-14       234678    Roger Monk  978-0071005296   \n",
+        "6         NaN        NaN          NaN           NaN  978-0471141617   \n",
+        "7         NaN        NaN          NaN           NaN  978-1558604896   \n",
+        "\n",
+        "                                   title  quantity    cost  \n",
+        "0     A First Course in Database Systems        10  \u00a310.00  \n",
+        "1                               SQL:1999        10  \u00a354.99  \n",
+        "2               Database System Concepts        10   \u00a39.55  \n",
+        "3         Database System Implementation        10  \u00a348.78  \n",
+        "4  A Guided Tour of Relational Databases        10  \u00a341.69  \n",
+        "5               Database System Concepts         1   \u00a39.55  \n",
+        "6            Building the Data Warehouse         1   \u00a39.55  \n",
+        "7   Data Mining: Concepts and Techniques         1  \u00a318.55  "
+       ]
+      }
+     ],
+     "prompt_number": 85
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Clean the dataset as required and then put it into a set of normalised relations (tables) that avoid unnecessary duplication of data, and minimise the chances of update, deletion and addition anomalies."
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 4,
+     "metadata": {},
+     "source": [
+      "Discussion"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Clean\n",
+      "ex1.fillna(method='ffill', inplace=True)\n",
+      "ex1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>invoice_no</th>\n",
+        "      <th>date</th>\n",
+        "      <th>customer_no</th>\n",
+        "      <th>customer_name</th>\n",
+        "      <th>isbn</th>\n",
+        "      <th>title</th>\n",
+        "      <th>quantity</th>\n",
+        "      <th>cost</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 978-1292025827</td>\n",
+        "      <td>    A First Course in Database Systems</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a310.00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 978-1558604568</td>\n",
+        "      <td>                              SQL:1999</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a354.99</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td> 10</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 978-0130402646</td>\n",
+        "      <td>        Database System Implementation</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a348.78</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 978-1852330088</td>\n",
+        "      <td> A Guided Tour of Relational Databases</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a341.69</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 234678</td>\n",
+        "      <td>   Roger Monk</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td>  1</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 234678</td>\n",
+        "      <td>   Roger Monk</td>\n",
+        "      <td> 978-0471141617</td>\n",
+        "      <td>           Building the Data Warehouse</td>\n",
+        "      <td>  1</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>7</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 234678</td>\n",
+        "      <td>   Roger Monk</td>\n",
+        "      <td> 978-1558604896</td>\n",
+        "      <td>  Data Mining: Concepts and Techniques</td>\n",
+        "      <td>  1</td>\n",
+        "      <td> \u00a318.55</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 86,
+       "text": [
+        "   invoice_no       date  customer_no customer_name            isbn  \\\n",
+        "0      966047  01-Jul-14       123789  Dimity Stone  978-1292025827   \n",
+        "1      966047  01-Jul-14       123789  Dimity Stone  978-1558604568   \n",
+        "2      966047  01-Jul-14       123789  Dimity Stone  978-0071005296   \n",
+        "3      966047  01-Jul-14       123789  Dimity Stone  978-0130402646   \n",
+        "4      966047  01-Jul-14       123789  Dimity Stone  978-1852330088   \n",
+        "5      966048  01-Jul-14       234678    Roger Monk  978-0071005296   \n",
+        "6      966048  01-Jul-14       234678    Roger Monk  978-0471141617   \n",
+        "7      966048  01-Jul-14       234678    Roger Monk  978-1558604896   \n",
+        "\n",
+        "                                   title  quantity    cost  \n",
+        "0     A First Course in Database Systems        10  \u00a310.00  \n",
+        "1                               SQL:1999        10  \u00a354.99  \n",
+        "2               Database System Concepts        10   \u00a39.55  \n",
+        "3         Database System Implementation        10  \u00a348.78  \n",
+        "4  A Guided Tour of Relational Databases        10  \u00a341.69  \n",
+        "5               Database System Concepts         1   \u00a39.55  \n",
+        "6            Building the Data Warehouse         1   \u00a39.55  \n",
+        "7   Data Mining: Concepts and Techniques         1  \u00a318.55  "
+       ]
+      }
+     ],
+     "prompt_number": 86
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Review the data\n",
+      "for c in ex1.columns:\n",
+      "    print(c,ex1[c].value_counts(),sep='\\n',end='\\n\\n')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "invoice_no\n",
+        "966047    5\n",
+        "966048    3\n",
+        "dtype: int64\n",
+        "\n",
+        "date\n",
+        "01-Jul-14    8\n",
+        "dtype: int64\n",
+        "\n",
+        "customer_no\n",
+        "123789    5\n",
+        "234678    3\n",
+        "dtype: int64\n",
+        "\n",
+        "customer_name\n",
+        "Dimity Stone    5\n",
+        "Roger Monk      3\n",
+        "dtype: int64\n",
+        "\n",
+        "isbn\n",
+        "978-0071005296    2\n",
+        "978-1558604896    1\n",
+        "978-0471141617    1\n",
+        "978-0130402646    1\n",
+        "978-1292025827    1\n",
+        "978-1852330088    1\n",
+        "978-1558604568    1\n",
+        "dtype: int64\n",
+        "\n",
+        "title\n",
+        "Database System Concepts                 2\n",
+        "A Guided Tour of Relational Databases    1\n",
+        "Building the Data Warehouse              1\n",
+        "Data Mining: Concepts and Techniques     1\n",
+        "Database System Implementation           1\n",
+        "A First Course in Database Systems       1\n",
+        "SQL:1999                                 1\n",
+        "dtype: int64\n",
+        "\n",
+        "quantity\n",
+        "10    5\n",
+        "1     3\n",
+        "dtype: int64\n",
+        "\n",
+        "cost\n",
+        "\u00a39.55     3\n",
+        "\u00a318.55    1\n",
+        "\u00a348.78    1\n",
+        "\u00a310.00    1\n",
+        "\u00a341.69    1\n",
+        "\u00a354.99    1\n",
+        "dtype: int64\n",
+        "\n"
+       ]
+      }
+     ],
+     "prompt_number": 92
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "If we were to simply inspect the unique counts, they might suggest that the `invoice_no`, `customer_no`, `customer_name` and `quantity` columns might share common sets of values as a repeating group. Looking at the column names and values, as well as the original invoice, we might anticipate that there is a meaningful relation between `customer_no` and `customer_name`, that an `invoice_no` relates to a particular transaction with a particular customer on a particular `date`, and the `quantity` is actually an independent value relating to the individual book purchase transactions detailed by a particular invoice."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Convert to 1NF\n",
+      "ex1_1a,ex1_1b=tableNorming(ex1,['invoice_no', 'customer_no', 'customer_name','date'],'invoice_no')\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 93
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "ex1_1a"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>invoice_no</th>\n",
+        "      <th>customer_no</th>\n",
+        "      <th>customer_name</th>\n",
+        "      <th>date</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 234678</td>\n",
+        "      <td>   Roger Monk</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 95,
+       "text": [
+        "   invoice_no  customer_no customer_name       date\n",
+        "0      966047       123789  Dimity Stone  01-Jul-14\n",
+        "5      966048       234678    Roger Monk  01-Jul-14"
+       ]
+      }
+     ],
+     "prompt_number": 95
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "ex1_1b"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>invoice_no</th>\n",
+        "      <th>isbn</th>\n",
+        "      <th>title</th>\n",
+        "      <th>quantity</th>\n",
+        "      <th>cost</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 978-1292025827</td>\n",
+        "      <td>    A First Course in Database Systems</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a310.00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 978-1558604568</td>\n",
+        "      <td>                              SQL:1999</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a354.99</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td> 10</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 978-0130402646</td>\n",
+        "      <td>        Database System Implementation</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a348.78</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 978-1852330088</td>\n",
+        "      <td> A Guided Tour of Relational Databases</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a341.69</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td>  1</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 978-0471141617</td>\n",
+        "      <td>           Building the Data Warehouse</td>\n",
+        "      <td>  1</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>7</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 978-1558604896</td>\n",
+        "      <td>  Data Mining: Concepts and Techniques</td>\n",
+        "      <td>  1</td>\n",
+        "      <td> \u00a318.55</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 96,
+       "text": [
+        "   invoice_no            isbn                                  title  \\\n",
+        "0      966047  978-1292025827     A First Course in Database Systems   \n",
+        "1      966047  978-1558604568                               SQL:1999   \n",
+        "2      966047  978-0071005296               Database System Concepts   \n",
+        "3      966047  978-0130402646         Database System Implementation   \n",
+        "4      966047  978-1852330088  A Guided Tour of Relational Databases   \n",
+        "5      966048  978-0071005296               Database System Concepts   \n",
+        "6      966048  978-0471141617            Building the Data Warehouse   \n",
+        "7      966048  978-1558604896   Data Mining: Concepts and Techniques   \n",
+        "\n",
+        "   quantity    cost  \n",
+        "0        10  \u00a310.00  \n",
+        "1        10  \u00a354.99  \n",
+        "2        10   \u00a39.55  \n",
+        "3        10  \u00a348.78  \n",
+        "4        10  \u00a341.69  \n",
+        "5         1   \u00a39.55  \n",
+        "6         1   \u00a39.55  \n",
+        "7         1  \u00a318.55  "
+       ]
+      }
+     ],
+     "prompt_number": 96
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Test\n",
+      "pd.merge(ex1_1a,ex1_1b,on='invoice_no')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>invoice_no</th>\n",
+        "      <th>customer_no</th>\n",
+        "      <th>customer_name</th>\n",
+        "      <th>date</th>\n",
+        "      <th>isbn</th>\n",
+        "      <th>title</th>\n",
+        "      <th>quantity</th>\n",
+        "      <th>cost</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 978-1292025827</td>\n",
+        "      <td>    A First Course in Database Systems</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a310.00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 978-1558604568</td>\n",
+        "      <td>                              SQL:1999</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a354.99</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td> 10</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 978-0130402646</td>\n",
+        "      <td>        Database System Implementation</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a348.78</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 978-1852330088</td>\n",
+        "      <td> A Guided Tour of Relational Databases</td>\n",
+        "      <td> 10</td>\n",
+        "      <td> \u00a341.69</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 234678</td>\n",
+        "      <td>   Roger Monk</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td>  1</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 234678</td>\n",
+        "      <td>   Roger Monk</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 978-0471141617</td>\n",
+        "      <td>           Building the Data Warehouse</td>\n",
+        "      <td>  1</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>7</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 234678</td>\n",
+        "      <td>   Roger Monk</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "      <td> 978-1558604896</td>\n",
+        "      <td>  Data Mining: Concepts and Techniques</td>\n",
+        "      <td>  1</td>\n",
+        "      <td> \u00a318.55</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 101,
+       "text": [
+        "   invoice_no  customer_no customer_name       date            isbn  \\\n",
+        "0      966047       123789  Dimity Stone  01-Jul-14  978-1292025827   \n",
+        "1      966047       123789  Dimity Stone  01-Jul-14  978-1558604568   \n",
+        "2      966047       123789  Dimity Stone  01-Jul-14  978-0071005296   \n",
+        "3      966047       123789  Dimity Stone  01-Jul-14  978-0130402646   \n",
+        "4      966047       123789  Dimity Stone  01-Jul-14  978-1852330088   \n",
+        "5      966048       234678    Roger Monk  01-Jul-14  978-0071005296   \n",
+        "6      966048       234678    Roger Monk  01-Jul-14  978-0471141617   \n",
+        "7      966048       234678    Roger Monk  01-Jul-14  978-1558604896   \n",
+        "\n",
+        "                                   title  quantity    cost  \n",
+        "0     A First Course in Database Systems        10  \u00a310.00  \n",
+        "1                               SQL:1999        10  \u00a354.99  \n",
+        "2               Database System Concepts        10   \u00a39.55  \n",
+        "3         Database System Implementation        10  \u00a348.78  \n",
+        "4  A Guided Tour of Relational Databases        10  \u00a341.69  \n",
+        "5               Database System Concepts         1   \u00a39.55  \n",
+        "6            Building the Data Warehouse         1   \u00a39.55  \n",
+        "7   Data Mining: Concepts and Techniques         1  \u00a318.55  "
+       ]
+      }
+     ],
+     "prompt_number": 101
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Convert to 2NF\n",
+      "#In ex1_1b, the combination of invoice_no and isbn attributes together determine the quantity attribute.\n",
+      "# Only isbn determines cost. cost is removed from the relation, and isbn and cost form a new relation, with isbn as key.\n",
+      "\n",
+      "ex1_2a,ex1_2b=tableNorming(ex1_1b,['isbn', 'title', 'cost'],'isbn')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 104
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "ex1_2a"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>isbn</th>\n",
+        "      <th>title</th>\n",
+        "      <th>cost</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 978-1292025827</td>\n",
+        "      <td>    A First Course in Database Systems</td>\n",
+        "      <td> \u00a310.00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 978-1558604568</td>\n",
+        "      <td>                              SQL:1999</td>\n",
+        "      <td> \u00a354.99</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 978-0130402646</td>\n",
+        "      <td>        Database System Implementation</td>\n",
+        "      <td> \u00a348.78</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 978-1852330088</td>\n",
+        "      <td> A Guided Tour of Relational Databases</td>\n",
+        "      <td> \u00a341.69</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> 978-0471141617</td>\n",
+        "      <td>           Building the Data Warehouse</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>7</th>\n",
+        "      <td> 978-1558604896</td>\n",
+        "      <td>  Data Mining: Concepts and Techniques</td>\n",
+        "      <td> \u00a318.55</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 105,
+       "text": [
+        "             isbn                                  title    cost\n",
+        "0  978-1292025827     A First Course in Database Systems  \u00a310.00\n",
+        "1  978-1558604568                               SQL:1999  \u00a354.99\n",
+        "2  978-0071005296               Database System Concepts   \u00a39.55\n",
+        "3  978-0130402646         Database System Implementation  \u00a348.78\n",
+        "4  978-1852330088  A Guided Tour of Relational Databases  \u00a341.69\n",
+        "6  978-0471141617            Building the Data Warehouse   \u00a39.55\n",
+        "7  978-1558604896   Data Mining: Concepts and Techniques  \u00a318.55"
+       ]
+      }
+     ],
+     "prompt_number": 105
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "ex1_2b"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>invoice_no</th>\n",
+        "      <th>isbn</th>\n",
+        "      <th>quantity</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 978-1292025827</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 978-1558604568</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 978-0130402646</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 978-1852330088</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>  1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 978-0471141617</td>\n",
+        "      <td>  1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>7</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 978-1558604896</td>\n",
+        "      <td>  1</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 106,
+       "text": [
+        "   invoice_no            isbn  quantity\n",
+        "0      966047  978-1292025827        10\n",
+        "1      966047  978-1558604568        10\n",
+        "2      966047  978-0071005296        10\n",
+        "3      966047  978-0130402646        10\n",
+        "4      966047  978-1852330088        10\n",
+        "5      966048  978-0071005296         1\n",
+        "6      966048  978-0471141617         1\n",
+        "7      966048  978-1558604896         1"
+       ]
+      }
+     ],
+     "prompt_number": 106
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Test\n",
+      "pd.merge(ex1_2a,ex1_2b,on='isbn')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>isbn</th>\n",
+        "      <th>title</th>\n",
+        "      <th>cost</th>\n",
+        "      <th>invoice_no</th>\n",
+        "      <th>quantity</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 978-1292025827</td>\n",
+        "      <td>    A First Course in Database Systems</td>\n",
+        "      <td> \u00a310.00</td>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 978-1558604568</td>\n",
+        "      <td>                              SQL:1999</td>\n",
+        "      <td> \u00a354.99</td>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "      <td> 966048</td>\n",
+        "      <td>  1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 978-0130402646</td>\n",
+        "      <td>        Database System Implementation</td>\n",
+        "      <td> \u00a348.78</td>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 978-1852330088</td>\n",
+        "      <td> A Guided Tour of Relational Databases</td>\n",
+        "      <td> \u00a341.69</td>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> 978-0471141617</td>\n",
+        "      <td>           Building the Data Warehouse</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "      <td> 966048</td>\n",
+        "      <td>  1</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>7</th>\n",
+        "      <td> 978-1558604896</td>\n",
+        "      <td>  Data Mining: Concepts and Techniques</td>\n",
+        "      <td> \u00a318.55</td>\n",
+        "      <td> 966048</td>\n",
+        "      <td>  1</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 107,
+       "text": [
+        "             isbn                                  title    cost  invoice_no  \\\n",
+        "0  978-1292025827     A First Course in Database Systems  \u00a310.00      966047   \n",
+        "1  978-1558604568                               SQL:1999  \u00a354.99      966047   \n",
+        "2  978-0071005296               Database System Concepts   \u00a39.55      966047   \n",
+        "3  978-0071005296               Database System Concepts   \u00a39.55      966048   \n",
+        "4  978-0130402646         Database System Implementation  \u00a348.78      966047   \n",
+        "5  978-1852330088  A Guided Tour of Relational Databases  \u00a341.69      966047   \n",
+        "6  978-0471141617            Building the Data Warehouse   \u00a39.55      966048   \n",
+        "7  978-1558604896   Data Mining: Concepts and Techniques  \u00a318.55      966048   \n",
+        "\n",
+        "   quantity  \n",
+        "0        10  \n",
+        "1        10  \n",
+        "2        10  \n",
+        "3         1  \n",
+        "4        10  \n",
+        "5        10  \n",
+        "6         1  \n",
+        "7         1  "
+       ]
+      }
+     ],
+     "prompt_number": 107
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Convert to 3NF\n",
+      "#In ex1_1a, the date and customer_no attributes are all directly dependent on invoice_no \n",
+      "#customer_name is directly dependent on customer_no not invoice_no.\n",
+      "#Therefore create a new relation from customer_no and customer_name where customer_no is the primary key.\n",
+      "#The customer_no remains in the original relation as a foreign key, as its value is determined by invoice_no\n",
+      "\n",
+      "ex1_3a,ex1_3b=tableNorming(ex1_1a,['customer_no', 'customer_name'],'customer_no')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 108
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "ex1_3a"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>customer_no</th>\n",
+        "      <th>customer_name</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 234678</td>\n",
+        "      <td>   Roger Monk</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 109,
+       "text": [
+        "   customer_no customer_name\n",
+        "0       123789  Dimity Stone\n",
+        "5       234678    Roger Monk"
+       ]
+      }
+     ],
+     "prompt_number": 109
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "ex1_3b"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>invoice_no</th>\n",
+        "      <th>customer_no</th>\n",
+        "      <th>date</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 123789</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 234678</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 110,
+       "text": [
+        "   invoice_no  customer_no       date\n",
+        "0      966047       123789  01-Jul-14\n",
+        "5      966048       234678  01-Jul-14"
+       ]
+      }
+     ],
+     "prompt_number": 110
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Test\n",
+      "pd.merge(ex1_3a,ex1_3b,on='customer_no')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>customer_no</th>\n",
+        "      <th>customer_name</th>\n",
+        "      <th>invoice_no</th>\n",
+        "      <th>date</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 123789</td>\n",
+        "      <td> Dimity Stone</td>\n",
+        "      <td> 966047</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 234678</td>\n",
+        "      <td>   Roger Monk</td>\n",
+        "      <td> 966048</td>\n",
+        "      <td> 01-Jul-14</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 111,
+       "text": [
+        "   customer_no customer_name  invoice_no       date\n",
+        "0       123789  Dimity Stone      966047  01-Jul-14\n",
+        "1       234678    Roger Monk      966048  01-Jul-14"
+       ]
+      }
+     ],
+     "prompt_number": 111
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Exercise"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "ex3=pd.read_csv('data/normalisation-authors.csv')\n",
+      "ex3"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>isbn</th>\n",
+        "      <th>title</th>\n",
+        "      <th>authors</th>\n",
+        "      <th>cost</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 978-1292025827</td>\n",
+        "      <td>    A First Course in Database Systems</td>\n",
+        "      <td>                  Jeffrey D Ullman, Jennifer Widom</td>\n",
+        "      <td> \u00a310.00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 978-1558604568</td>\n",
+        "      <td>                              SQL:1999</td>\n",
+        "      <td>                          Jim Melton, Alan R Simon</td>\n",
+        "      <td> \u00a354.99</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td>               Henry F Korth, Abraham Silberschatz</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 978-0130402646</td>\n",
+        "      <td>        Database System Implementation</td>\n",
+        "      <td> Hector Garcia-Molina, Jeffrey D Ullman, Jennif...</td>\n",
+        "      <td> \u00a348.78</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 978-1852330088</td>\n",
+        "      <td> A Guided Tour of Relational Databases</td>\n",
+        "      <td>                        Mark Levene, George Loizou</td>\n",
+        "      <td> \u00a341.69</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 978-0471141617</td>\n",
+        "      <td>           Building the Data Warehouse</td>\n",
+        "      <td>                                   William H Inmon</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> 978-1558604896</td>\n",
+        "      <td>  Data Mining: Concepts and Techniques</td>\n",
+        "      <td>                      Jiawei Han, Micheline Kamber</td>\n",
+        "      <td> \u00a318.55</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 114,
+       "text": [
+        "             isbn                                  title  \\\n",
+        "0  978-1292025827     A First Course in Database Systems   \n",
+        "1  978-1558604568                               SQL:1999   \n",
+        "2  978-0071005296               Database System Concepts   \n",
+        "3  978-0130402646         Database System Implementation   \n",
+        "4  978-1852330088  A Guided Tour of Relational Databases   \n",
+        "5  978-0471141617            Building the Data Warehouse   \n",
+        "6  978-1558604896   Data Mining: Concepts and Techniques   \n",
+        "\n",
+        "                                             authors    cost  \n",
+        "0                   Jeffrey D Ullman, Jennifer Widom  \u00a310.00  \n",
+        "1                           Jim Melton, Alan R Simon  \u00a354.99  \n",
+        "2                Henry F Korth, Abraham Silberschatz   \u00a39.55  \n",
+        "3  Hector Garcia-Molina, Jeffrey D Ullman, Jennif...  \u00a348.78  \n",
+        "4                         Mark Levene, George Loizou  \u00a341.69  \n",
+        "5                                    William H Inmon   \u00a39.55  \n",
+        "6                       Jiawei Han, Micheline Kamber  \u00a318.55  "
+       ]
+      }
+     ],
+     "prompt_number": 114
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "To be able to list books by author, we need to reshape this dataset by splitting on the authors column. In the original table, authors are essentially specified in a comma separated list."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Sort of via http://stackoverflow.com/a/12681217/454773\n",
+      "ex3_authors=pd.concat([pd.DataFrame({'isbn':row['isbn'], 'author':row['authors'].split(',') })             \n",
+      "                    for _, row in ex3.iterrows()])\n",
+      "ex3_authors"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>author</th>\n",
+        "      <th>isbn</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>      Jeffrey D Ullman</td>\n",
+        "      <td> 978-1292025827</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>        Jennifer Widom</td>\n",
+        "      <td> 978-1292025827</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>            Jim Melton</td>\n",
+        "      <td> 978-1558604568</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>          Alan R Simon</td>\n",
+        "      <td> 978-1558604568</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>         Henry F Korth</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>  Abraham Silberschatz</td>\n",
+        "      <td> 978-0071005296</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>  Hector Garcia-Molina</td>\n",
+        "      <td> 978-0130402646</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>      Jeffrey D Ullman</td>\n",
+        "      <td> 978-0130402646</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>        Jennifer Widom</td>\n",
+        "      <td> 978-0130402646</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>           Mark Levene</td>\n",
+        "      <td> 978-1852330088</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>         George Loizou</td>\n",
+        "      <td> 978-1852330088</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>       William H Inmon</td>\n",
+        "      <td> 978-0471141617</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>            Jiawei Han</td>\n",
+        "      <td> 978-1558604896</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>      Micheline Kamber</td>\n",
+        "      <td> 978-1558604896</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 133,
+       "text": [
+        "                  author            isbn\n",
+        "0       Jeffrey D Ullman  978-1292025827\n",
+        "1         Jennifer Widom  978-1292025827\n",
+        "0             Jim Melton  978-1558604568\n",
+        "1           Alan R Simon  978-1558604568\n",
+        "0          Henry F Korth  978-0071005296\n",
+        "1   Abraham Silberschatz  978-0071005296\n",
+        "0   Hector Garcia-Molina  978-0130402646\n",
+        "1       Jeffrey D Ullman  978-0130402646\n",
+        "2         Jennifer Widom  978-0130402646\n",
+        "0            Mark Levene  978-1852330088\n",
+        "1          George Loizou  978-1852330088\n",
+        "0        William H Inmon  978-0471141617\n",
+        "0             Jiawei Han  978-1558604896\n",
+        "1       Micheline Kamber  978-1558604896"
+       ]
+      }
+     ],
+     "prompt_number": 133
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "ex3_books=ex3.drop('authors',1)\n",
+      "ex3_books"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>isbn</th>\n",
+        "      <th>title</th>\n",
+        "      <th>cost</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> 978-1292025827</td>\n",
+        "      <td>    A First Course in Database Systems</td>\n",
+        "      <td> \u00a310.00</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> 978-1558604568</td>\n",
+        "      <td>                              SQL:1999</td>\n",
+        "      <td> \u00a354.99</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> 978-0071005296</td>\n",
+        "      <td>              Database System Concepts</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> 978-0130402646</td>\n",
+        "      <td>        Database System Implementation</td>\n",
+        "      <td> \u00a348.78</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> 978-1852330088</td>\n",
+        "      <td> A Guided Tour of Relational Databases</td>\n",
+        "      <td> \u00a341.69</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> 978-0471141617</td>\n",
+        "      <td>           Building the Data Warehouse</td>\n",
+        "      <td>  \u00a39.55</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6</th>\n",
+        "      <td> 978-1558604896</td>\n",
+        "      <td>  Data Mining: Concepts and Techniques</td>\n",
+        "      <td> \u00a318.55</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 135,
+       "text": [
+        "             isbn                                  title    cost\n",
+        "0  978-1292025827     A First Course in Database Systems  \u00a310.00\n",
+        "1  978-1558604568                               SQL:1999  \u00a354.99\n",
+        "2  978-0071005296               Database System Concepts   \u00a39.55\n",
+        "3  978-0130402646         Database System Implementation  \u00a348.78\n",
+        "4  978-1852330088  A Guided Tour of Relational Databases  \u00a341.69\n",
+        "5  978-0471141617            Building the Data Warehouse   \u00a39.55\n",
+        "6  978-1558604896   Data Mining: Concepts and Techniques  \u00a318.55"
+       ]
+      }
+     ],
+     "prompt_number": 135
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file