X-Git-Url: https://git.njae.me.uk/?p=ou-jupyter-r-demo.git;a=blobdiff_plain;f=section5.1solutions.ipynb;h=1ef5c8ed7b6f0fe166321e15c9debcbac63adbd7;hp=4c7507abcabe361a0a4de9dc6951fdad9355f2d8;hb=923ed635a8798313db455184de7a575f49e7d05c;hpb=ff5d747cb96e0759c2ba2c7f8e057a78ce2206f4 diff --git a/section5.1solutions.ipynb b/section5.1solutions.ipynb index 4c7507a..1ef5c8e 100644 --- a/section5.1solutions.ipynb +++ b/section5.1solutions.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "hidden": true, "init_cell": true @@ -36,83 +36,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { - "hidden": true, "init_cell": true }, "outputs": [], "source": [ - "# Multiple plot function\n", - "#\n", - "# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)\n", - "# - cols: Number of columns in layout\n", - "# - layout: A matrix specifying the layout. If present, 'cols' is ignored.\n", - "#\n", - "# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),\n", - "# then plot 1 will go in the upper left, 2 will go in the upper right, and\n", - "# 3 will go all the way across the bottom.\n", - "#\n", - "multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {\n", - " library(grid)\n", - "\n", - " # Make a list from the ... arguments and plotlist\n", - " plots <- c(list(...), plotlist)\n", - "\n", - " numPlots = length(plots)\n", - "\n", - " # If layout is NULL, then use 'cols' to determine layout\n", - " if (is.null(layout)) {\n", - " # Make the panel\n", - " # ncol: Number of columns of plots\n", - " # nrow: Number of rows needed, calculated from # of cols\n", - " layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),\n", - " ncol = cols, nrow = ceiling(numPlots/cols))\n", - " }\n", - "\n", - " if (numPlots==1) {\n", - " print(plots[[1]])\n", - "\n", - " } else {\n", - " # Set up the page\n", - " grid.newpage()\n", - " pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))\n", - "\n", - " # Make each plot, in the correct location\n", - " for (i in 1:numPlots) {\n", - " # Get the i,j matrix positions of the regions that contain this subplot\n", - " matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))\n", - "\n", - " print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,\n", - " layout.pos.col = matchidx$col))\n", - " }\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "hidden": true, - "init_cell": true - }, - "outputs": [], - "source": [ - "# From https://sejohnston.com/2012/08/09/a-quick-and-easy-function-to-plot-lm-results-in-r/\n", - "ggplotRegression <- function (fit) {\n", - "\n", - "require(ggplot2)\n", - "\n", - "ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) + \n", - " geom_point() +\n", - " stat_smooth(method = \"lm\", col = \"red\") +\n", - " labs(title = paste(\"Adj R2 = \",signif(summary(fit)$adj.r.squared, 5),\n", - " \"Intercept =\",signif(fit$coef[[1]],5 ),\n", - " \" Slope =\",signif(fit$coef[[2]], 5),\n", - " \" P =\",signif(summary(fit)$coef[2,4], 5))) + \n", - " theme(plot.title = element_text(size=12))\n", - "}" + "source('plot_extensions.R')" ] }, { @@ -131,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -202,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -223,6 +153,13 @@ "multiplot(taheat, tsheat, cols=2)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Blah, blah, comment, blah." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -232,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -300,14 +237,14 @@ } ], "source": [ - "fit <- lm(heat ~ TA, data = cemheat)\n", - "summary(fit)\n", - "anova(fit)" + "fit.ta <- lm(heat ~ TA, data = cemheat)\n", + "summary(fit.ta)\n", + "anova(fit.ta)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -327,12 +264,12 @@ } ], "source": [ - "ggplotRegression(fit)" + "ggplotRegression(fit.ta)" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -400,14 +337,14 @@ } ], "source": [ - "fit <- lm(heat ~ TS, data = cemheat)\n", - "summary(fit)\n", - "anova(fit)" + "fit.ts <- lm(heat ~ TS, data = cemheat)\n", + "summary(fit.ts)\n", + "anova(fit.ts)" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -427,12 +364,12 @@ } ], "source": [ - "ggplotRegression(fit)" + "ggplotRegression(fit.ts)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -505,9 +442,71 @@ } ], "source": [ - "fit <- lm(heat ~ TA + TS, data = cemheat)\n", - "summary(fit)\n", - "anova(fit)" + "fit.tats <- lm(heat ~ TA + TS, data = cemheat)\n", + "summary(fit.tats)\n", + "anova(fit.tats)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now combine the results into one dataframe for easy comparison." + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
VarsAdj.R.2
TA 0.4915797
TS 0.6359290
TA, TS 0.9744140
\n" + ], + "text/latex": [ + "\\begin{tabular}{r|ll}\n", + " Vars & Adj.R.2\\\\\n", + "\\hline\n", + "\t TA & 0.4915797\\\\\n", + "\t TS & 0.6359290\\\\\n", + "\t TA, TS & 0.9744140\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "Vars | Adj.R.2 | \n", + "|---|---|---|\n", + "| TA | 0.4915797 | \n", + "| TS | 0.6359290 | \n", + "| TA, TS | 0.9744140 | \n", + "\n", + "\n" + ], + "text/plain": [ + " Vars Adj.R.2 \n", + "1 TA 0.4915797\n", + "2 TS 0.6359290\n", + "3 TA, TS 0.9744140" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fits <- list(fit.ta, fit.ts, fit.tats)\n", + "data.frame(\n", + " \"Vars\" = sapply(fits, function(x) toString(attr(summary(x)$terms, \"variables\")[-(1:2)]) ),\n", + " \"Adj R^2\" = sapply(fits, function(x) summary(x)$adj.r.squared)\n", + ")" ] }, { @@ -519,7 +518,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 113, "metadata": {}, "outputs": [ { @@ -543,7 +542,7 @@ } ], "source": [ - "predict(fit, data.frame(\"TA\" = 15, \"TS\" = 55))" + "predict(fit.tats, data.frame(\"TA\" = 15, \"TS\" = 55))" ] }, { @@ -555,7 +554,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 114, "metadata": {}, "outputs": [ { @@ -575,7 +574,7 @@ } ], "source": [ - "autoplot(fit)" + "autoplot(fit.tats)" ] }, { @@ -587,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -620,7 +619,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 36, "metadata": {}, "outputs": [ { @@ -695,7 +694,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -720,7 +719,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -748,12 +747,12 @@ "metadata": {}, "source": [ "### Now form a new variable `oxy2`, say, by squaring oxygen.\n", - "(Create a new column in the `anearobic` dataframe which is `anaerobic$oxygen ^ 2`.) Perform the regression of ventil on `oxygen` and `oxy2`. Comment on the fit of this model according to the printed output (and with recourse to Figure 3.2 in Example 3.1)." + "(Create a new column in the `anearobic` dataframe which is `anaerobic$oxygen ^ 2`.) Perform the regression of `ventil` on `oxygen` and `oxy2`. Comment on the fit of this model according to the printed output (and with recourse to Figure 3.2 in Example 3.1)." ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -817,7 +816,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 40, "metadata": {}, "outputs": [ { @@ -904,7 +903,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -929,7 +928,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 42, "metadata": {}, "outputs": [ {