Tweaked to have separate script for boilerplate
[ou-jupyter-r-demo.git] / section5.1solutions.ipynb
index 4c7507abcabe361a0a4de9dc6951fdad9355f2d8..1ef5c8ed7b6f0fe166321e15c9debcbac63adbd7 100644 (file)
@@ -18,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "hidden": true,
     "init_cell": true
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {
-    "hidden": true,
     "init_cell": true
    },
    "outputs": [],
    "source": [
-    "# Multiple plot function\n",
-    "#\n",
-    "# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)\n",
-    "# - cols:   Number of columns in layout\n",
-    "# - layout: A matrix specifying the layout. If present, 'cols' is ignored.\n",
-    "#\n",
-    "# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),\n",
-    "# then plot 1 will go in the upper left, 2 will go in the upper right, and\n",
-    "# 3 will go all the way across the bottom.\n",
-    "#\n",
-    "multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {\n",
-    "  library(grid)\n",
-    "\n",
-    "  # Make a list from the ... arguments and plotlist\n",
-    "  plots <- c(list(...), plotlist)\n",
-    "\n",
-    "  numPlots = length(plots)\n",
-    "\n",
-    "  # If layout is NULL, then use 'cols' to determine layout\n",
-    "  if (is.null(layout)) {\n",
-    "    # Make the panel\n",
-    "    # ncol: Number of columns of plots\n",
-    "    # nrow: Number of rows needed, calculated from # of cols\n",
-    "    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),\n",
-    "                    ncol = cols, nrow = ceiling(numPlots/cols))\n",
-    "  }\n",
-    "\n",
-    " if (numPlots==1) {\n",
-    "    print(plots[[1]])\n",
-    "\n",
-    "  } else {\n",
-    "    # Set up the page\n",
-    "    grid.newpage()\n",
-    "    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))\n",
-    "\n",
-    "    # Make each plot, in the correct location\n",
-    "    for (i in 1:numPlots) {\n",
-    "      # Get the i,j matrix positions of the regions that contain this subplot\n",
-    "      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))\n",
-    "\n",
-    "      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,\n",
-    "                                      layout.pos.col = matchidx$col))\n",
-    "    }\n",
-    "  }\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "hidden": true,
-    "init_cell": true
-   },
-   "outputs": [],
-   "source": [
-    "# From https://sejohnston.com/2012/08/09/a-quick-and-easy-function-to-plot-lm-results-in-r/\n",
-    "ggplotRegression <- function (fit) {\n",
-    "\n",
-    "require(ggplot2)\n",
-    "\n",
-    "ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) + \n",
-    "    geom_point() +\n",
-    "    stat_smooth(method = \"lm\", col = \"red\") +\n",
-    "    labs(title = paste(\"Adj R2 = \",signif(summary(fit)$adj.r.squared, 5),\n",
-    "                     \"Intercept =\",signif(fit$coef[[1]],5 ),\n",
-    "                     \" Slope =\",signif(fit$coef[[2]], 5),\n",
-    "                     \" P =\",signif(summary(fit)$coef[2,4], 5))) + \n",
-    "    theme(plot.title = element_text(size=12))\n",
-    "}"
+    "source('plot_extensions.R')"
    ]
   },
   {
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
     "multiplot(taheat, tsheat, cols=2)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Blah, blah, comment, blah."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
-    "fit <- lm(heat ~ TA, data = cemheat)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fit.ta <- lm(heat ~ TA, data = cemheat)\n",
+    "summary(fit.ta)\n",
+    "anova(fit.ta)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
-    "ggplotRegression(fit)"
+    "ggplotRegression(fit.ta)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
-    "fit <- lm(heat ~ TS, data = cemheat)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fit.ts <- lm(heat ~ TS, data = cemheat)\n",
+    "summary(fit.ts)\n",
+    "anova(fit.ts)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
-    "ggplotRegression(fit)"
+    "ggplotRegression(fit.ts)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
-    "fit <- lm(heat ~ TA + TS, data = cemheat)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fit.tats <- lm(heat ~ TA + TS, data = cemheat)\n",
+    "summary(fit.tats)\n",
+    "anova(fit.tats)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now combine the results into one dataframe for easy comparison."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "<thead><tr><th scope=col>Vars</th><th scope=col>Adj.R.2</th></tr></thead>\n",
+       "<tbody>\n",
+       "\t<tr><td>TA       </td><td>0.4915797</td></tr>\n",
+       "\t<tr><td>TS       </td><td>0.6359290</td></tr>\n",
+       "\t<tr><td>TA, TS   </td><td>0.9744140</td></tr>\n",
+       "</tbody>\n",
+       "</table>\n"
+      ],
+      "text/latex": [
+       "\\begin{tabular}{r|ll}\n",
+       " Vars & Adj.R.2\\\\\n",
+       "\\hline\n",
+       "\t TA        & 0.4915797\\\\\n",
+       "\t TS        & 0.6359290\\\\\n",
+       "\t TA, TS    & 0.9744140\\\\\n",
+       "\\end{tabular}\n"
+      ],
+      "text/markdown": [
+       "\n",
+       "Vars | Adj.R.2 | \n",
+       "|---|---|---|\n",
+       "| TA        | 0.4915797 | \n",
+       "| TS        | 0.6359290 | \n",
+       "| TA, TS    | 0.9744140 | \n",
+       "\n",
+       "\n"
+      ],
+      "text/plain": [
+       "  Vars   Adj.R.2  \n",
+       "1 TA     0.4915797\n",
+       "2 TS     0.6359290\n",
+       "3 TA, TS 0.9744140"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "fits <- list(fit.ta, fit.ts, fit.tats)\n",
+    "data.frame(\n",
+    "    \"Vars\" = sapply(fits, function(x) toString(attr(summary(x)$terms, \"variables\")[-(1:2)]) ),\n",
+    "    \"Adj R^2\" = sapply(fits, function(x) summary(x)$adj.r.squared)\n",
+    ")"
    ]
   },
   {
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 113,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
-    "predict(fit, data.frame(\"TA\" = 15, \"TS\" = 55))"
+    "predict(fit.tats, data.frame(\"TA\" = 15, \"TS\" = 55))"
    ]
   },
   {
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 114,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
-    "autoplot(fit)"
+    "autoplot(fit.tats)"
    ]
   },
   {
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
    "metadata": {},
    "source": [
     "### Now form a new variable `oxy2`, say, by squaring oxygen.\n",
-    "(Create a new column in the `anearobic` dataframe which is `anaerobic$oxygen ^ 2`.) Perform the regression of ventil on `oxygen` and `oxy2`. Comment on the fit of this model according to the printed output (and with recourse to Figure 3.2 in Example 3.1)."
+    "(Create a new column in the `anearobic` dataframe which is `anaerobic$oxygen ^ 2`.) Perform the regression of `ventil` on `oxygen` and `oxy2`. Comment on the fit of this model according to the printed output (and with recourse to Figure 3.2 in Example 3.1)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [
     {