X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;ds=sidebyside;f=section5.1solutions.ipynb;fp=section5.1solutions.ipynb;h=614f03e2764bd508d912bc48bfd6ece16cdc151d;hb=42eadd5803ad1ca3804d800cd25dd72071c751eb;hp=785c7017b925f3a860d7b6e9ab75ae0acdc20078;hpb=eb2e022359b221c7712502aa5802bb85c692c94f;p=ou-jupyter-r-demo.git

diff --git a/section5.1solutions.ipynb b/section5.1solutions.ipynb
index 785c701..614f03e 100644
--- a/section5.1solutions.ipynb
+++ b/section5.1solutions.ipynb
@@ -38,82 +38,11 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "hidden": true,
-    "init_cell": true
-   },
-   "outputs": [],
-   "source": [
-    "# From http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/\n",
-    "# Multiple plot function\n",
-    "#\n",
-    "# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)\n",
-    "# - cols:   Number of columns in layout\n",
-    "# - layout: A matrix specifying the layout. If present, 'cols' is ignored.\n",
-    "#\n",
-    "# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),\n",
-    "# then plot 1 will go in the upper left, 2 will go in the upper right, and\n",
-    "# 3 will go all the way across the bottom.\n",
-    "#\n",
-    "multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {\n",
-    "  library(grid)\n",
-    "\n",
-    "  # Make a list from the ... arguments and plotlist\n",
-    "  plots <- c(list(...), plotlist)\n",
-    "\n",
-    "  numPlots = length(plots)\n",
-    "\n",
-    "  # If layout is NULL, then use 'cols' to determine layout\n",
-    "  if (is.null(layout)) {\n",
-    "    # Make the panel\n",
-    "    # ncol: Number of columns of plots\n",
-    "    # nrow: Number of rows needed, calculated from # of cols\n",
-    "    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),\n",
-    "                    ncol = cols, nrow = ceiling(numPlots/cols))\n",
-    "  }\n",
-    "\n",
-    " if (numPlots==1) {\n",
-    "    print(plots[[1]])\n",
-    "\n",
-    "  } else {\n",
-    "    # Set up the page\n",
-    "    grid.newpage()\n",
-    "    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))\n",
-    "\n",
-    "    # Make each plot, in the correct location\n",
-    "    for (i in 1:numPlots) {\n",
-    "      # Get the i,j matrix positions of the regions that contain this subplot\n",
-    "      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))\n",
-    "\n",
-    "      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,\n",
-    "                                      layout.pos.col = matchidx$col))\n",
-    "    }\n",
-    "  }\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "hidden": true,
     "init_cell": true
    },
    "outputs": [],
    "source": [
-    "# From https://sejohnston.com/2012/08/09/a-quick-and-easy-function-to-plot-lm-results-in-r/\n",
-    "ggplotRegression <- function (fit) {\n",
-    "\n",
-    "require(ggplot2)\n",
-    "\n",
-    "ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) + \n",
-    "    geom_point() +\n",
-    "    stat_smooth(method = \"lm\", col = \"red\") +\n",
-    "    labs(title = paste(\"Adj R2 = \",signif(summary(fit)$adj.r.squared, 5),\n",
-    "                     \"Intercept =\",signif(fit$coef[[1]],5 ),\n",
-    "                     \" Slope =\",signif(fit$coef[[2]], 5),\n",
-    "                     \" P =\",signif(summary(fit)$coef[2,4], 5))) + \n",
-    "    theme(plot.title = element_text(size=12))\n",
-    "}"
+    "source('plot_extensions.R')"
    ]
   },
   {
@@ -132,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -149,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -159,6 +88,13 @@
     "multiplot(taheat, tsheat, cols=2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Blah, blah, comment, blah."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -168,53 +104,115 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
-    "fit <- lm(heat ~ TA, data = cemheat)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fit.ta <- lm(heat ~ TA, data = cemheat)\n",
+    "summary(fit.ta)\n",
+    "anova(fit.ta)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
-    "ggplotRegression(fit)"
+    "ggplotRegression(fit.ta)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
-    "fit <- lm(heat ~ TS, data = cemheat)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fit.ts <- lm(heat ~ TS, data = cemheat)\n",
+    "summary(fit.ts)\n",
+    "anova(fit.ts)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
-    "ggplotRegression(fit)"
+    "ggplotRegression(fit.ts)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
-    "fit <- lm(heat ~ TA + TS, data = cemheat)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fit.tats <- lm(heat ~ TA + TS, data = cemheat)\n",
+    "summary(fit.tats)\n",
+    "anova(fit.tats)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now combine the results into one dataframe for easy comparison."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "<thead><tr><th scope=col>Vars</th><th scope=col>Adj.R.2</th></tr></thead>\n",
+       "<tbody>\n",
+       "\t<tr><td>TA       </td><td>0.4915797</td></tr>\n",
+       "\t<tr><td>TS       </td><td>0.6359290</td></tr>\n",
+       "\t<tr><td>TA, TS   </td><td>0.9744140</td></tr>\n",
+       "</tbody>\n",
+       "</table>\n"
+      ],
+      "text/latex": [
+       "\\begin{tabular}{r|ll}\n",
+       " Vars & Adj.R.2\\\\\n",
+       "\\hline\n",
+       "\t TA        & 0.4915797\\\\\n",
+       "\t TS        & 0.6359290\\\\\n",
+       "\t TA, TS    & 0.9744140\\\\\n",
+       "\\end{tabular}\n"
+      ],
+      "text/markdown": [
+       "\n",
+       "Vars | Adj.R.2 | \n",
+       "|---|---|---|\n",
+       "| TA        | 0.4915797 | \n",
+       "| TS        | 0.6359290 | \n",
+       "| TA, TS    | 0.9744140 | \n",
+       "\n",
+       "\n"
+      ],
+      "text/plain": [
+       "  Vars   Adj.R.2  \n",
+       "1 TA     0.4915797\n",
+       "2 TS     0.6359290\n",
+       "3 TA, TS 0.9744140"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "fits <- list(fit.ta, fit.ts, fit.tats)\n",
+    "data.frame(\n",
+    "    \"Vars\" = sapply(fits, function(x) toString(attr(summary(x)$terms, \"variables\")[-(1:2)]) ),\n",
+    "    \"Adj R^2\" = sapply(fits, function(x) summary(x)$adj.r.squared)\n",
+    ")"
    ]
   },
   {
@@ -226,11 +224,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 113,
    "metadata": {},
    "outputs": [],
    "source": [
-    "predict(fit, data.frame(\"TA\" = 15, \"TS\" = 55))"
+    "predict(fit.tats, data.frame(\"TA\" = 15, \"TS\" = 55))"
    ]
   },
   {
@@ -242,11 +240,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 114,
    "metadata": {},
    "outputs": [],
    "source": [
-    "autoplot(fit)"
+    "autoplot(fit.tats)"
    ]
   },
   {
@@ -258,7 +256,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -275,7 +273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -286,7 +284,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -295,7 +293,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -307,12 +305,12 @@
    "metadata": {},
    "source": [
     "### Now form a new variable `oxy2`, say, by squaring oxygen.\n",
-    "(Create a new column in the `anearobic` dataframe which is `anaerobic$oxygen ^ 2`.) Perform the regression of ventil on `oxygen` and `oxy2`. Comment on the fit of this model according to the printed output (and with recourse to Figure 3.2 in Example 3.1)."
+    "(Create a new column in the `anearobic` dataframe which is `anaerobic$oxygen ^ 2`.) Perform the regression of `ventil` on `oxygen` and `oxy2`. Comment on the fit of this model according to the printed output (and with recourse to Figure 3.2 in Example 3.1)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -322,7 +320,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -340,7 +338,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -349,7 +347,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [