Imported all the notebooks
[tm351-notebooks.git] / notebooks / 10. Keys and Normalisation / 10.1 Keys and Normalisation.ipynb
diff --git a/notebooks/10. Keys and Normalisation/10.1 Keys and Normalisation.ipynb b/notebooks/10. Keys and Normalisation/10.1 Keys and Normalisation.ipynb
new file mode 100644 (file)
index 0000000..75c939c
--- /dev/null
@@ -0,0 +1,511 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:2b3f613860107207495ca10e32bd62179d2093a54a590024894b044c19173ae4"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "heading",
+     "level": 1,
+     "metadata": {},
+     "source": [
+      "Section 10 Keys and Normalisation Part 1"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "In this activity you will be asked to write and execute SQL SELECT statements to answer questions about the *doctor* and *patient* tables as given in Section 10 Part 2. Each SELECT statement will require you to effect the appropriate type of join (INNER, LEFT OUTER, RIGHT OUTER or FULL OUTER) between the *doctor* and *patient* tables.\n",
+      "\n",
+      "The *doctor* and *patient* tables are defined and populated as follows:"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import pandas as pd"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import pandas.io.sql as psql"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import psycopg2 as pg"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "conn = pg.connect(\"dbname='postgres'  host='localhost' user='test' password='test' \")"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c = conn.cursor()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Create the doctor and patient tables and populate them with the same data as shown in Section 10 Part 2."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c.execute(\"DROP TABLE IF EXISTS patient\")\n",
+      "c.execute(\"DROP TABLE IF EXISTS doctor\")\n",
+      "c.execute(\"CREATE TABLE doctor (\\\n",
+      "            doctor_id CHAR(3) NOT NULL CHECK (doctor_id SIMILAR TO 'd[0-9][0-9]'),\\\n",
+      "            doctor_name VARCHAR(20) NOT NULL,\\\n",
+      "            date_of_birth DATE NOT NULL,\\\n",
+      "            PRIMARY KEY (doctor_id)\\\n",
+      "           )\")\n",
+      "c.execute(\"CREATE TABLE patient (\\\n",
+      "            patient_id CHAR(4) NOT NULL CHECK (patient_id SIMILAR TO 'p[0-9][0-9][0-9]'),\\\n",
+      "            patient_name VARCHAR(20) NOT NULL,\\\n",
+      "            date_of_birth DATE NOT NULL,\\\n",
+      "            gender CHAR(1) NOT NULL CHECK (gender = 'F' OR gender = 'M'),\\\n",
+      "            height DECIMAL(4,1) CHECK (height > 0),\\\n",
+      "            weight DECIMAL(4,1) CHECK (weight > 0),\\\n",
+      "            doctor_id CHAR(3),\\\n",
+      "            PRIMARY KEY (patient_id),\\\n",
+      "            FOREIGN KEY (doctor_id) REFERENCES doctor(doctor_id)\\\n",
+      "           )\")\n",
+      "\n",
+      "c.execute(\"INSERT INTO doctor (doctor_id, doctor_name, date_of_birth)\\\n",
+      "            VALUES ('d06','Gibson','1954-02-24')\")\n",
+      "c.execute(\"INSERT INTO doctor (doctor_id, doctor_name, date_of_birth)\\\n",
+      "            VALUES ('d07','Paxton','1960-05-23')\")\n",
+      "c.execute(\"INSERT INTO doctor (doctor_id, doctor_name, date_of_birth)\\\n",
+      "            VALUES ('d09','Tamblin','1972-12-22')\")\n",
+      "c.execute(\"INSERT INTO doctor (doctor_id, doctor_name, date_of_birth)\\\n",
+      "            VALUES ('d10','Rampton','1980-09-25')\")\n",
+      "c.execute(\"INSERT INTO doctor (doctor_id, doctor_name, date_of_birth)\\\n",
+      "            VALUES ('d11','Nolan','1988-04-01')\")\n",
+      "\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p001', 'Thornton', '1980-01-22', 'F', 162.3, 71.6,'d06')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p007', 'Tennent', '1980-04-01', 'M', 176.8, 70.9,'d07')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p008', 'James', '1980-07-08', 'M', 167.9, 70.5,'d07')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p009', 'Kay', '1980-09-25', 'F', 164.7, 53.2,'d06')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p015', 'Harris', '1980-12-04', 'M', 180.6, 64.3,'d06')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender)\\\n",
+      "            VALUES ('p031', 'Rubinstein', '1980-12-23', 'F')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender)\\\n",
+      "            VALUES ('p037', 'Boswell', '1981-06-11', 'F')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p038', 'Ming', '1981-09-23', 'M', 186.3, 85.4,'d11')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p039', 'Maher', '1981-10-09', 'F', 161.9, 73.0,'d11')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p068', 'Monroe', '1981-10-21', 'F', 165.0, 62.6,'d10')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p071', 'Harris', '1981-12-12', 'M', 186.3, 76.7,'d10')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p078', 'Hunt', '1982-02-25', 'M', 179.9, 74.3,'d10')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p079', 'Dixon', '1982-05-05', 'F', 163.9, 56.5,'d06')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p080', 'Bell', '1982-06-11', 'F', 171.3, 49.2,'d07')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p087', 'Reed', '1982-06-14', 'F', 160.0, 59.1,'d07')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+      "            VALUES ('p088', 'Boswell', '1982-08-23', 'M', 168.4, 91.4,'d06')\")\n",
+      "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight)\\\n",
+      "            VALUES ('p089', 'Jarvis', '1982-11-09', 'F', 172.9, 53.4)\")\n",
+      "c.execute(\"commit\")"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 6
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Display the contents of the doctor table."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = pd.read_sql(\"SELECT * FROM doctor \\\n",
+      "                  ORDER BY doctor_id\", conn)\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>doctor_id</th>\n",
+        "      <th>doctor_name</th>\n",
+        "      <th>date_of_birth</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td> d06</td>\n",
+        "      <td>  Gibson</td>\n",
+        "      <td> 1954-02-24</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> d07</td>\n",
+        "      <td>  Paxton</td>\n",
+        "      <td> 1960-05-23</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td> d09</td>\n",
+        "      <td> Tamblin</td>\n",
+        "      <td> 1972-12-22</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> d10</td>\n",
+        "      <td> Rampton</td>\n",
+        "      <td> 1980-09-25</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td> d11</td>\n",
+        "      <td>   Nolan</td>\n",
+        "      <td> 1988-04-01</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 7,
+       "text": [
+        "  doctor_id doctor_name date_of_birth\n",
+        "0       d06      Gibson    1954-02-24\n",
+        "1       d07      Paxton    1960-05-23\n",
+        "2       d09     Tamblin    1972-12-22\n",
+        "3       d10     Rampton    1980-09-25\n",
+        "4       d11       Nolan    1988-04-01"
+       ]
+      }
+     ],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Display the contents of the patient table."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = pd.read_sql(\"SELECT * FROM patient \\\n",
+      "                  ORDER BY patient_id\", conn)\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>patient_id</th>\n",
+        "      <th>patient_name</th>\n",
+        "      <th>date_of_birth</th>\n",
+        "      <th>gender</th>\n",
+        "      <th>height</th>\n",
+        "      <th>weight</th>\n",
+        "      <th>doctor_id</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0 </th>\n",
+        "      <td> p001</td>\n",
+        "      <td>   Thornton</td>\n",
+        "      <td> 1980-01-22</td>\n",
+        "      <td> F</td>\n",
+        "      <td> 162.3</td>\n",
+        "      <td> 71.6</td>\n",
+        "      <td>  d06</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1 </th>\n",
+        "      <td> p007</td>\n",
+        "      <td>    Tennent</td>\n",
+        "      <td> 1980-04-01</td>\n",
+        "      <td> M</td>\n",
+        "      <td> 176.8</td>\n",
+        "      <td> 70.9</td>\n",
+        "      <td>  d07</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2 </th>\n",
+        "      <td> p008</td>\n",
+        "      <td>      James</td>\n",
+        "      <td> 1980-07-08</td>\n",
+        "      <td> M</td>\n",
+        "      <td> 167.9</td>\n",
+        "      <td> 70.5</td>\n",
+        "      <td>  d07</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3 </th>\n",
+        "      <td> p009</td>\n",
+        "      <td>        Kay</td>\n",
+        "      <td> 1980-09-25</td>\n",
+        "      <td> F</td>\n",
+        "      <td> 164.7</td>\n",
+        "      <td> 53.2</td>\n",
+        "      <td>  d06</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4 </th>\n",
+        "      <td> p015</td>\n",
+        "      <td>     Harris</td>\n",
+        "      <td> 1980-12-04</td>\n",
+        "      <td> M</td>\n",
+        "      <td> 180.6</td>\n",
+        "      <td> 64.3</td>\n",
+        "      <td>  d06</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5 </th>\n",
+        "      <td> p031</td>\n",
+        "      <td> Rubinstein</td>\n",
+        "      <td> 1980-12-23</td>\n",
+        "      <td> F</td>\n",
+        "      <td>   NaN</td>\n",
+        "      <td>  NaN</td>\n",
+        "      <td> None</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>6 </th>\n",
+        "      <td> p037</td>\n",
+        "      <td>    Boswell</td>\n",
+        "      <td> 1981-06-11</td>\n",
+        "      <td> F</td>\n",
+        "      <td>   NaN</td>\n",
+        "      <td>  NaN</td>\n",
+        "      <td> None</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>7 </th>\n",
+        "      <td> p038</td>\n",
+        "      <td>       Ming</td>\n",
+        "      <td> 1981-09-23</td>\n",
+        "      <td> M</td>\n",
+        "      <td> 186.3</td>\n",
+        "      <td> 85.4</td>\n",
+        "      <td>  d11</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>8 </th>\n",
+        "      <td> p039</td>\n",
+        "      <td>      Maher</td>\n",
+        "      <td> 1981-10-09</td>\n",
+        "      <td> F</td>\n",
+        "      <td> 161.9</td>\n",
+        "      <td> 73.0</td>\n",
+        "      <td>  d11</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>9 </th>\n",
+        "      <td> p068</td>\n",
+        "      <td>     Monroe</td>\n",
+        "      <td> 1981-10-21</td>\n",
+        "      <td> F</td>\n",
+        "      <td> 165.0</td>\n",
+        "      <td> 62.6</td>\n",
+        "      <td>  d10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>10</th>\n",
+        "      <td> p071</td>\n",
+        "      <td>     Harris</td>\n",
+        "      <td> 1981-12-12</td>\n",
+        "      <td> M</td>\n",
+        "      <td> 186.3</td>\n",
+        "      <td> 76.7</td>\n",
+        "      <td>  d10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>11</th>\n",
+        "      <td> p078</td>\n",
+        "      <td>       Hunt</td>\n",
+        "      <td> 1982-02-25</td>\n",
+        "      <td> M</td>\n",
+        "      <td> 179.9</td>\n",
+        "      <td> 74.3</td>\n",
+        "      <td>  d10</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>12</th>\n",
+        "      <td> p079</td>\n",
+        "      <td>      Dixon</td>\n",
+        "      <td> 1982-05-05</td>\n",
+        "      <td> F</td>\n",
+        "      <td> 163.9</td>\n",
+        "      <td> 56.5</td>\n",
+        "      <td>  d06</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>13</th>\n",
+        "      <td> p080</td>\n",
+        "      <td>       Bell</td>\n",
+        "      <td> 1982-06-11</td>\n",
+        "      <td> F</td>\n",
+        "      <td> 171.3</td>\n",
+        "      <td> 49.2</td>\n",
+        "      <td>  d07</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>14</th>\n",
+        "      <td> p087</td>\n",
+        "      <td>       Reed</td>\n",
+        "      <td> 1982-06-14</td>\n",
+        "      <td> F</td>\n",
+        "      <td> 160.0</td>\n",
+        "      <td> 59.1</td>\n",
+        "      <td>  d07</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>15</th>\n",
+        "      <td> p088</td>\n",
+        "      <td>    Boswell</td>\n",
+        "      <td> 1982-08-23</td>\n",
+        "      <td> M</td>\n",
+        "      <td> 168.4</td>\n",
+        "      <td> 91.4</td>\n",
+        "      <td>  d06</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>16</th>\n",
+        "      <td> p089</td>\n",
+        "      <td>     Jarvis</td>\n",
+        "      <td> 1982-11-09</td>\n",
+        "      <td> F</td>\n",
+        "      <td> 172.9</td>\n",
+        "      <td> 53.4</td>\n",
+        "      <td> None</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 8,
+       "text": [
+        "   patient_id patient_name date_of_birth gender  height  weight doctor_id\n",
+        "0        p001     Thornton    1980-01-22      F   162.3    71.6       d06\n",
+        "1        p007      Tennent    1980-04-01      M   176.8    70.9       d07\n",
+        "2        p008        James    1980-07-08      M   167.9    70.5       d07\n",
+        "3        p009          Kay    1980-09-25      F   164.7    53.2       d06\n",
+        "4        p015       Harris    1980-12-04      M   180.6    64.3       d06\n",
+        "5        p031   Rubinstein    1980-12-23      F     NaN     NaN      None\n",
+        "6        p037      Boswell    1981-06-11      F     NaN     NaN      None\n",
+        "7        p038         Ming    1981-09-23      M   186.3    85.4       d11\n",
+        "8        p039        Maher    1981-10-09      F   161.9    73.0       d11\n",
+        "9        p068       Monroe    1981-10-21      F   165.0    62.6       d10\n",
+        "10       p071       Harris    1981-12-12      M   186.3    76.7       d10\n",
+        "11       p078         Hunt    1982-02-25      M   179.9    74.3       d10\n",
+        "12       p079        Dixon    1982-05-05      F   163.9    56.5       d06\n",
+        "13       p080         Bell    1982-06-11      F   171.3    49.2       d07\n",
+        "14       p087         Reed    1982-06-14      F   160.0    59.1       d07\n",
+        "15       p088      Boswell    1982-08-23      M   168.4    91.4       d06\n",
+        "16       p089       Jarvis    1982-11-09      F   172.9    53.4      None"
+       ]
+      }
+     ],
+     "prompt_number": 8
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "Exercise"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Write and execute SQL SELECT statements to answer the following questions about doctors and their patients:\n",
+      "    \n",
+      "    1. For each doctor who has patients under their care, give their identifier, name, and the minimum and maximum weights of their patients.\n",
+      "    \n",
+      "    2. Give the identifier and name of those doctors who have no patients under their care.\n",
+      "    \n",
+      "    3. For every doctor, give their identifier, name, and the number of patients under their care.\n",
+      "    \n",
+      "    4. Give the number of doctors who have no patients under their care, and number of patients who are not registered with a doctor.\n",
+      "    \n",
+      "    "
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file