--- /dev/null
+{
+ "metadata": {
+ "name": "",
+ "signature": "sha256:2b3f613860107207495ca10e32bd62179d2093a54a590024894b044c19173ae4"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+ {
+ "cells": [
+ {
+ "cell_type": "heading",
+ "level": 1,
+ "metadata": {},
+ "source": [
+ "Section 10 Keys and Normalisation Part 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this activity you will be asked to write and execute SQL SELECT statements to answer questions about the *doctor* and *patient* tables as given in Section 10 Part 2. Each SELECT statement will require you to effect the appropriate type of join (INNER, LEFT OUTER, RIGHT OUTER or FULL OUTER) between the *doctor* and *patient* tables.\n",
+ "\n",
+ "The *doctor* and *patient* tables are defined and populated as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import pandas as pd"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 1
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import pandas.io.sql as psql"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 2
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import psycopg2 as pg"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 3
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "conn = pg.connect(\"dbname='postgres' host='localhost' user='test' password='test' \")"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 4
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "c = conn.cursor()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 5
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create the doctor and patient tables and populate them with the same data as shown in Section 10 Part 2."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "c.execute(\"DROP TABLE IF EXISTS patient\")\n",
+ "c.execute(\"DROP TABLE IF EXISTS doctor\")\n",
+ "c.execute(\"CREATE TABLE doctor (\\\n",
+ " doctor_id CHAR(3) NOT NULL CHECK (doctor_id SIMILAR TO 'd[0-9][0-9]'),\\\n",
+ " doctor_name VARCHAR(20) NOT NULL,\\\n",
+ " date_of_birth DATE NOT NULL,\\\n",
+ " PRIMARY KEY (doctor_id)\\\n",
+ " )\")\n",
+ "c.execute(\"CREATE TABLE patient (\\\n",
+ " patient_id CHAR(4) NOT NULL CHECK (patient_id SIMILAR TO 'p[0-9][0-9][0-9]'),\\\n",
+ " patient_name VARCHAR(20) NOT NULL,\\\n",
+ " date_of_birth DATE NOT NULL,\\\n",
+ " gender CHAR(1) NOT NULL CHECK (gender = 'F' OR gender = 'M'),\\\n",
+ " height DECIMAL(4,1) CHECK (height > 0),\\\n",
+ " weight DECIMAL(4,1) CHECK (weight > 0),\\\n",
+ " doctor_id CHAR(3),\\\n",
+ " PRIMARY KEY (patient_id),\\\n",
+ " FOREIGN KEY (doctor_id) REFERENCES doctor(doctor_id)\\\n",
+ " )\")\n",
+ "\n",
+ "c.execute(\"INSERT INTO doctor (doctor_id, doctor_name, date_of_birth)\\\n",
+ " VALUES ('d06','Gibson','1954-02-24')\")\n",
+ "c.execute(\"INSERT INTO doctor (doctor_id, doctor_name, date_of_birth)\\\n",
+ " VALUES ('d07','Paxton','1960-05-23')\")\n",
+ "c.execute(\"INSERT INTO doctor (doctor_id, doctor_name, date_of_birth)\\\n",
+ " VALUES ('d09','Tamblin','1972-12-22')\")\n",
+ "c.execute(\"INSERT INTO doctor (doctor_id, doctor_name, date_of_birth)\\\n",
+ " VALUES ('d10','Rampton','1980-09-25')\")\n",
+ "c.execute(\"INSERT INTO doctor (doctor_id, doctor_name, date_of_birth)\\\n",
+ " VALUES ('d11','Nolan','1988-04-01')\")\n",
+ "\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p001', 'Thornton', '1980-01-22', 'F', 162.3, 71.6,'d06')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p007', 'Tennent', '1980-04-01', 'M', 176.8, 70.9,'d07')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p008', 'James', '1980-07-08', 'M', 167.9, 70.5,'d07')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p009', 'Kay', '1980-09-25', 'F', 164.7, 53.2,'d06')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p015', 'Harris', '1980-12-04', 'M', 180.6, 64.3,'d06')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender)\\\n",
+ " VALUES ('p031', 'Rubinstein', '1980-12-23', 'F')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender)\\\n",
+ " VALUES ('p037', 'Boswell', '1981-06-11', 'F')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p038', 'Ming', '1981-09-23', 'M', 186.3, 85.4,'d11')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p039', 'Maher', '1981-10-09', 'F', 161.9, 73.0,'d11')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p068', 'Monroe', '1981-10-21', 'F', 165.0, 62.6,'d10')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p071', 'Harris', '1981-12-12', 'M', 186.3, 76.7,'d10')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p078', 'Hunt', '1982-02-25', 'M', 179.9, 74.3,'d10')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p079', 'Dixon', '1982-05-05', 'F', 163.9, 56.5,'d06')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p080', 'Bell', '1982-06-11', 'F', 171.3, 49.2,'d07')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p087', 'Reed', '1982-06-14', 'F', 160.0, 59.1,'d07')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight, doctor_id)\\\n",
+ " VALUES ('p088', 'Boswell', '1982-08-23', 'M', 168.4, 91.4,'d06')\")\n",
+ "c.execute(\"INSERT INTO patient (patient_id, patient_name, date_of_birth, gender, height, weight)\\\n",
+ " VALUES ('p089', 'Jarvis', '1982-11-09', 'F', 172.9, 53.4)\")\n",
+ "c.execute(\"commit\")"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 6
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Display the contents of the doctor table."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df = pd.read_sql(\"SELECT * FROM doctor \\\n",
+ " ORDER BY doctor_id\", conn)\n",
+ "df"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>doctor_id</th>\n",
+ " <th>doctor_name</th>\n",
+ " <th>date_of_birth</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td> d06</td>\n",
+ " <td> Gibson</td>\n",
+ " <td> 1954-02-24</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td> d07</td>\n",
+ " <td> Paxton</td>\n",
+ " <td> 1960-05-23</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td> d09</td>\n",
+ " <td> Tamblin</td>\n",
+ " <td> 1972-12-22</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td> d10</td>\n",
+ " <td> Rampton</td>\n",
+ " <td> 1980-09-25</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td> d11</td>\n",
+ " <td> Nolan</td>\n",
+ " <td> 1988-04-01</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 7,
+ "text": [
+ " doctor_id doctor_name date_of_birth\n",
+ "0 d06 Gibson 1954-02-24\n",
+ "1 d07 Paxton 1960-05-23\n",
+ "2 d09 Tamblin 1972-12-22\n",
+ "3 d10 Rampton 1980-09-25\n",
+ "4 d11 Nolan 1988-04-01"
+ ]
+ }
+ ],
+ "prompt_number": 7
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Display the contents of the patient table."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df = pd.read_sql(\"SELECT * FROM patient \\\n",
+ " ORDER BY patient_id\", conn)\n",
+ "df"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>patient_id</th>\n",
+ " <th>patient_name</th>\n",
+ " <th>date_of_birth</th>\n",
+ " <th>gender</th>\n",
+ " <th>height</th>\n",
+ " <th>weight</th>\n",
+ " <th>doctor_id</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0 </th>\n",
+ " <td> p001</td>\n",
+ " <td> Thornton</td>\n",
+ " <td> 1980-01-22</td>\n",
+ " <td> F</td>\n",
+ " <td> 162.3</td>\n",
+ " <td> 71.6</td>\n",
+ " <td> d06</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1 </th>\n",
+ " <td> p007</td>\n",
+ " <td> Tennent</td>\n",
+ " <td> 1980-04-01</td>\n",
+ " <td> M</td>\n",
+ " <td> 176.8</td>\n",
+ " <td> 70.9</td>\n",
+ " <td> d07</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2 </th>\n",
+ " <td> p008</td>\n",
+ " <td> James</td>\n",
+ " <td> 1980-07-08</td>\n",
+ " <td> M</td>\n",
+ " <td> 167.9</td>\n",
+ " <td> 70.5</td>\n",
+ " <td> d07</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3 </th>\n",
+ " <td> p009</td>\n",
+ " <td> Kay</td>\n",
+ " <td> 1980-09-25</td>\n",
+ " <td> F</td>\n",
+ " <td> 164.7</td>\n",
+ " <td> 53.2</td>\n",
+ " <td> d06</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4 </th>\n",
+ " <td> p015</td>\n",
+ " <td> Harris</td>\n",
+ " <td> 1980-12-04</td>\n",
+ " <td> M</td>\n",
+ " <td> 180.6</td>\n",
+ " <td> 64.3</td>\n",
+ " <td> d06</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>5 </th>\n",
+ " <td> p031</td>\n",
+ " <td> Rubinstein</td>\n",
+ " <td> 1980-12-23</td>\n",
+ " <td> F</td>\n",
+ " <td> NaN</td>\n",
+ " <td> NaN</td>\n",
+ " <td> None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>6 </th>\n",
+ " <td> p037</td>\n",
+ " <td> Boswell</td>\n",
+ " <td> 1981-06-11</td>\n",
+ " <td> F</td>\n",
+ " <td> NaN</td>\n",
+ " <td> NaN</td>\n",
+ " <td> None</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>7 </th>\n",
+ " <td> p038</td>\n",
+ " <td> Ming</td>\n",
+ " <td> 1981-09-23</td>\n",
+ " <td> M</td>\n",
+ " <td> 186.3</td>\n",
+ " <td> 85.4</td>\n",
+ " <td> d11</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>8 </th>\n",
+ " <td> p039</td>\n",
+ " <td> Maher</td>\n",
+ " <td> 1981-10-09</td>\n",
+ " <td> F</td>\n",
+ " <td> 161.9</td>\n",
+ " <td> 73.0</td>\n",
+ " <td> d11</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>9 </th>\n",
+ " <td> p068</td>\n",
+ " <td> Monroe</td>\n",
+ " <td> 1981-10-21</td>\n",
+ " <td> F</td>\n",
+ " <td> 165.0</td>\n",
+ " <td> 62.6</td>\n",
+ " <td> d10</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>10</th>\n",
+ " <td> p071</td>\n",
+ " <td> Harris</td>\n",
+ " <td> 1981-12-12</td>\n",
+ " <td> M</td>\n",
+ " <td> 186.3</td>\n",
+ " <td> 76.7</td>\n",
+ " <td> d10</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>11</th>\n",
+ " <td> p078</td>\n",
+ " <td> Hunt</td>\n",
+ " <td> 1982-02-25</td>\n",
+ " <td> M</td>\n",
+ " <td> 179.9</td>\n",
+ " <td> 74.3</td>\n",
+ " <td> d10</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>12</th>\n",
+ " <td> p079</td>\n",
+ " <td> Dixon</td>\n",
+ " <td> 1982-05-05</td>\n",
+ " <td> F</td>\n",
+ " <td> 163.9</td>\n",
+ " <td> 56.5</td>\n",
+ " <td> d06</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>13</th>\n",
+ " <td> p080</td>\n",
+ " <td> Bell</td>\n",
+ " <td> 1982-06-11</td>\n",
+ " <td> F</td>\n",
+ " <td> 171.3</td>\n",
+ " <td> 49.2</td>\n",
+ " <td> d07</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>14</th>\n",
+ " <td> p087</td>\n",
+ " <td> Reed</td>\n",
+ " <td> 1982-06-14</td>\n",
+ " <td> F</td>\n",
+ " <td> 160.0</td>\n",
+ " <td> 59.1</td>\n",
+ " <td> d07</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>15</th>\n",
+ " <td> p088</td>\n",
+ " <td> Boswell</td>\n",
+ " <td> 1982-08-23</td>\n",
+ " <td> M</td>\n",
+ " <td> 168.4</td>\n",
+ " <td> 91.4</td>\n",
+ " <td> d06</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>16</th>\n",
+ " <td> p089</td>\n",
+ " <td> Jarvis</td>\n",
+ " <td> 1982-11-09</td>\n",
+ " <td> F</td>\n",
+ " <td> 172.9</td>\n",
+ " <td> 53.4</td>\n",
+ " <td> None</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 8,
+ "text": [
+ " patient_id patient_name date_of_birth gender height weight doctor_id\n",
+ "0 p001 Thornton 1980-01-22 F 162.3 71.6 d06\n",
+ "1 p007 Tennent 1980-04-01 M 176.8 70.9 d07\n",
+ "2 p008 James 1980-07-08 M 167.9 70.5 d07\n",
+ "3 p009 Kay 1980-09-25 F 164.7 53.2 d06\n",
+ "4 p015 Harris 1980-12-04 M 180.6 64.3 d06\n",
+ "5 p031 Rubinstein 1980-12-23 F NaN NaN None\n",
+ "6 p037 Boswell 1981-06-11 F NaN NaN None\n",
+ "7 p038 Ming 1981-09-23 M 186.3 85.4 d11\n",
+ "8 p039 Maher 1981-10-09 F 161.9 73.0 d11\n",
+ "9 p068 Monroe 1981-10-21 F 165.0 62.6 d10\n",
+ "10 p071 Harris 1981-12-12 M 186.3 76.7 d10\n",
+ "11 p078 Hunt 1982-02-25 M 179.9 74.3 d10\n",
+ "12 p079 Dixon 1982-05-05 F 163.9 56.5 d06\n",
+ "13 p080 Bell 1982-06-11 F 171.3 49.2 d07\n",
+ "14 p087 Reed 1982-06-14 F 160.0 59.1 d07\n",
+ "15 p088 Boswell 1982-08-23 M 168.4 91.4 d06\n",
+ "16 p089 Jarvis 1982-11-09 F 172.9 53.4 None"
+ ]
+ }
+ ],
+ "prompt_number": 8
+ },
+ {
+ "cell_type": "heading",
+ "level": 3,
+ "metadata": {},
+ "source": [
+ "Exercise"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Write and execute SQL SELECT statements to answer the following questions about doctors and their patients:\n",
+ " \n",
+ " 1. For each doctor who has patients under their care, give their identifier, name, and the minimum and maximum weights of their patients.\n",
+ " \n",
+ " 2. Give the identifier and name of those doctors who have no patients under their care.\n",
+ " \n",
+ " 3. For every doctor, give their identifier, name, and the number of patients under their care.\n",
+ " \n",
+ " 4. Give the number of doctors who have no patients under their care, and number of patients who are not registered with a doctor.\n",
+ " \n",
+ " "
+ ]
+ }
+ ],
+ "metadata": {}
+ }
+ ]
+}
\ No newline at end of file