Cleared outputs, set some init cells

[ou-jupyter-r-demo.git] / section5.1solutions.ipynb
diff --git a/section5.1solutions.ipynb b/section5.1solutions.ipynb

index 785c7017b925f3a860d7b6e9ab75ae0acdc20078..a4bb428b333b277f50ae55b15daa0de33b301bb5 100644 (file)
--- a/section5.1solutions.ipynb
+++ b/section5.1solutions.ipynb
@@ -43,77 +43,7 @@
     },
     "outputs": [],
     "source": [
-    "# From http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/\n",
-    "# Multiple plot function\n",
-    "#\n",
-    "# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)\n",
-    "# - cols:   Number of columns in layout\n",
-    "# - layout: A matrix specifying the layout. If present, 'cols' is ignored.\n",
-    "#\n",
-    "# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),\n",
-    "# then plot 1 will go in the upper left, 2 will go in the upper right, and\n",
-    "# 3 will go all the way across the bottom.\n",
-    "#\n",
-    "multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {\n",
-    "  library(grid)\n",
-    "\n",
-    "  # Make a list from the ... arguments and plotlist\n",
-    "  plots <- c(list(...), plotlist)\n",
-    "\n",
-    "  numPlots = length(plots)\n",
-    "\n",
-    "  # If layout is NULL, then use 'cols' to determine layout\n",
-    "  if (is.null(layout)) {\n",
-    "    # Make the panel\n",
-    "    # ncol: Number of columns of plots\n",
-    "    # nrow: Number of rows needed, calculated from # of cols\n",
-    "    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),\n",
-    "                    ncol = cols, nrow = ceiling(numPlots/cols))\n",
-    "  }\n",
-    "\n",
-    " if (numPlots==1) {\n",
-    "    print(plots[[1]])\n",
-    "\n",
-    "  } else {\n",
-    "    # Set up the page\n",
-    "    grid.newpage()\n",
-    "    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))\n",
-    "\n",
-    "    # Make each plot, in the correct location\n",
-    "    for (i in 1:numPlots) {\n",
-    "      # Get the i,j matrix positions of the regions that contain this subplot\n",
-    "      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))\n",
-    "\n",
-    "      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,\n",
-    "                                      layout.pos.col = matchidx$col))\n",
-    "    }\n",
-    "  }\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "hidden": true,
-    "init_cell": true
-   },
-   "outputs": [],
-   "source": [
-    "# From https://sejohnston.com/2012/08/09/a-quick-and-easy-function-to-plot-lm-results-in-r/\n",
-    "ggplotRegression <- function (fit) {\n",
-    "\n",
-    "require(ggplot2)\n",
-    "\n",
-    "ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) + \n",
-    "    geom_point() +\n",
-    "    stat_smooth(method = \"lm\", col = \"red\") +\n",
-    "    labs(title = paste(\"Adj R2 = \",signif(summary(fit)$adj.r.squared, 5),\n",
-    "                     \"Intercept =\",signif(fit$coef[[1]],5 ),\n",
-    "                     \" Slope =\",signif(fit$coef[[2]], 5),\n",
-    "                     \" P =\",signif(summary(fit)$coef[2,4], 5))) + \n",
-    "    theme(plot.title = element_text(size=12))\n",
-    "}"
+    "source('plot_extensions.R')"
     ]
    },
    {
@@ -159,6 +89,13 @@
      "multiplot(taheat, tsheat, cols=2)"
     ]
    },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Blah, blah, comment, blah."
+   ]
+  },
    {
     "cell_type": "markdown",
     "metadata": {},
@@ -172,9 +109,29 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "fit <- lm(heat ~ TA, data = cemheat)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fit.ta <- lm(heat ~ TA, data = cemheat)\n",
+    "summary(fit.ta)\n",
+    "anova(fit.ta)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ggplotRegression(fit.ta)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fit.ts <- lm(heat ~ TS, data = cemheat)\n",
+    "summary(fit.ts)\n",
+    "anova(fit.ts)"
     ]
    },
    {
@@ -183,7 +140,7 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "ggplotRegression(fit)"
+    "ggplotRegression(fit.ts)"
     ]
    },
    {
@@ -192,9 +149,9 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "fit <- lm(heat ~ TS, data = cemheat)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fit.tats <- lm(heat ~ TA + TS, data = cemheat)\n",
+    "summary(fit.tats)\n",
+    "anova(fit.tats)"
     ]
    },
    {
@@ -203,7 +160,14 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "ggplotRegression(fit)"
+    "ggplotRegression(fit.tats)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now combine the results into one dataframe for easy comparison."
     ]
    },
    {
@@ -212,9 +176,11 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "fit <- lm(heat ~ TA + TS, data = cemheat)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fits <- list(fit.ta, fit.ts, fit.tats)\n",
+    "data.frame(\n",
+    "    \"Vars\" = sapply(fits, function(x) toString(attr(summary(x)$terms, \"variables\")[-(1:2)]) ),\n",
+    "    \"Adj R^2\" = sapply(fits, function(x) summary(x)$adj.r.squared)\n",
+    ")"
     ]
    },
    {
@@ -230,7 +196,7 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "predict(fit, data.frame(\"TA\" = 15, \"TS\" = 55))"
+    "predict(fit.tats, data.frame(\"TA\" = 15, \"TS\" = 55))"
     ]
    },
    {
@@ -246,7 +212,7 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "autoplot(fit)"
+    "autoplot(fit.tats)"
     ]
    },
    {
@@ -279,9 +245,9 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "fit <- lm(ventil ~ oxygen, data = anaerobic)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fit.o <- lm(ventil ~ oxygen, data = anaerobic)\n",
+    "summary(fit.o)\n",
+    "anova(fit.o)"
     ]
    },
    {
@@ -290,7 +256,7 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "ggplotRegression(fit)"
+    "ggplotRegression(fit.o)"
     ]
    },
    {
@@ -299,7 +265,7 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "autoplot(fit)"
+    "autoplot(fit.o)"
     ]
    },
    {
@@ -307,7 +273,7 @@
     "metadata": {},
     "source": [
      "### Now form a new variable `oxy2`, say, by squaring oxygen.\n",
-    "(Create a new column in the `anearobic` dataframe which is `anaerobic$oxygen ^ 2`.) Perform the regression of ventil on `oxygen` and `oxy2`. Comment on the fit of this model according to the printed output (and with recourse to Figure 3.2 in Example 3.1)."
+    "(Create a new column in the `anearobic` dataframe which is `anaerobic$oxygen ^ 2`.) Perform the regression of `ventil` on `oxygen` and `oxy2`. Comment on the fit of this model according to the printed output (and with recourse to Figure 3.2 in Example 3.1)."
     ]
    },
    {
@@ -326,9 +292,9 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "fit <- lm(ventil ~ oxygen + oxy2, data = anaerobic)\n",
-    "summary(fit)\n",
-    "anova(fit)"
+    "fit.o2 <- lm(ventil ~ oxygen + oxy2, data = anaerobic)\n",
+    "summary(fit.o2)\n",
+    "anova(fit.o2)"
     ]
    },
    {
@@ -344,7 +310,7 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "ggplotRegression(fit)"
+    "ggplotRegression(fit.o2)"
     ]
    },
    {
@@ -353,7 +319,7 @@
     "metadata": {},
     "outputs": [],
     "source": [
-    "autoplot(fit)"
+    "autoplot(fit.o2)"
     ]
    },
    {