X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;ds=sidebyside;f=section5.1solutions.ipynb;fp=section5.1solutions.ipynb;h=614f03e2764bd508d912bc48bfd6ece16cdc151d;hb=42eadd5803ad1ca3804d800cd25dd72071c751eb;hp=785c7017b925f3a860d7b6e9ab75ae0acdc20078;hpb=eb2e022359b221c7712502aa5802bb85c692c94f;p=ou-jupyter-r-demo.git diff --git a/section5.1solutions.ipynb b/section5.1solutions.ipynb index 785c701..614f03e 100644 --- a/section5.1solutions.ipynb +++ b/section5.1solutions.ipynb @@ -38,82 +38,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "hidden": true, - "init_cell": true - }, - "outputs": [], - "source": [ - "# From http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/\n", - "# Multiple plot function\n", - "#\n", - "# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)\n", - "# - cols: Number of columns in layout\n", - "# - layout: A matrix specifying the layout. If present, 'cols' is ignored.\n", - "#\n", - "# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),\n", - "# then plot 1 will go in the upper left, 2 will go in the upper right, and\n", - "# 3 will go all the way across the bottom.\n", - "#\n", - "multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {\n", - " library(grid)\n", - "\n", - " # Make a list from the ... arguments and plotlist\n", - " plots <- c(list(...), plotlist)\n", - "\n", - " numPlots = length(plots)\n", - "\n", - " # If layout is NULL, then use 'cols' to determine layout\n", - " if (is.null(layout)) {\n", - " # Make the panel\n", - " # ncol: Number of columns of plots\n", - " # nrow: Number of rows needed, calculated from # of cols\n", - " layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),\n", - " ncol = cols, nrow = ceiling(numPlots/cols))\n", - " }\n", - "\n", - " if (numPlots==1) {\n", - " print(plots[[1]])\n", - "\n", - " } else {\n", - " # Set up the page\n", - " grid.newpage()\n", - " pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))\n", - "\n", - " # Make each plot, in the correct location\n", - " for (i in 1:numPlots) {\n", - " # Get the i,j matrix positions of the regions that contain this subplot\n", - " matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))\n", - "\n", - " print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,\n", - " layout.pos.col = matchidx$col))\n", - " }\n", - " }\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true, "init_cell": true }, "outputs": [], "source": [ - "# From https://sejohnston.com/2012/08/09/a-quick-and-easy-function-to-plot-lm-results-in-r/\n", - "ggplotRegression <- function (fit) {\n", - "\n", - "require(ggplot2)\n", - "\n", - "ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) + \n", - " geom_point() +\n", - " stat_smooth(method = \"lm\", col = \"red\") +\n", - " labs(title = paste(\"Adj R2 = \",signif(summary(fit)$adj.r.squared, 5),\n", - " \"Intercept =\",signif(fit$coef[[1]],5 ),\n", - " \" Slope =\",signif(fit$coef[[2]], 5),\n", - " \" P =\",signif(summary(fit)$coef[2,4], 5))) + \n", - " theme(plot.title = element_text(size=12))\n", - "}" + "source('plot_extensions.R')" ] }, { @@ -132,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -149,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -159,6 +88,13 @@ "multiplot(taheat, tsheat, cols=2)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Blah, blah, comment, blah." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -168,53 +104,115 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "fit <- lm(heat ~ TA, data = cemheat)\n", - "summary(fit)\n", - "anova(fit)" + "fit.ta <- lm(heat ~ TA, data = cemheat)\n", + "summary(fit.ta)\n", + "anova(fit.ta)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "ggplotRegression(fit)" + "ggplotRegression(fit.ta)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "fit <- lm(heat ~ TS, data = cemheat)\n", - "summary(fit)\n", - "anova(fit)" + "fit.ts <- lm(heat ~ TS, data = cemheat)\n", + "summary(fit.ts)\n", + "anova(fit.ts)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "ggplotRegression(fit)" + "ggplotRegression(fit.ts)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "fit <- lm(heat ~ TA + TS, data = cemheat)\n", - "summary(fit)\n", - "anova(fit)" + "fit.tats <- lm(heat ~ TA + TS, data = cemheat)\n", + "summary(fit.tats)\n", + "anova(fit.tats)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now combine the results into one dataframe for easy comparison." + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
VarsAdj.R.2
TA 0.4915797
TS 0.6359290
TA, TS 0.9744140
\n" + ], + "text/latex": [ + "\\begin{tabular}{r|ll}\n", + " Vars & Adj.R.2\\\\\n", + "\\hline\n", + "\t TA & 0.4915797\\\\\n", + "\t TS & 0.6359290\\\\\n", + "\t TA, TS & 0.9744140\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "Vars | Adj.R.2 | \n", + "|---|---|---|\n", + "| TA | 0.4915797 | \n", + "| TS | 0.6359290 | \n", + "| TA, TS | 0.9744140 | \n", + "\n", + "\n" + ], + "text/plain": [ + " Vars Adj.R.2 \n", + "1 TA 0.4915797\n", + "2 TS 0.6359290\n", + "3 TA, TS 0.9744140" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fits <- list(fit.ta, fit.ts, fit.tats)\n", + "data.frame(\n", + " \"Vars\" = sapply(fits, function(x) toString(attr(summary(x)$terms, \"variables\")[-(1:2)]) ),\n", + " \"Adj R^2\" = sapply(fits, function(x) summary(x)$adj.r.squared)\n", + ")" ] }, { @@ -226,11 +224,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 113, "metadata": {}, "outputs": [], "source": [ - "predict(fit, data.frame(\"TA\" = 15, \"TS\" = 55))" + "predict(fit.tats, data.frame(\"TA\" = 15, \"TS\" = 55))" ] }, { @@ -242,11 +240,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 114, "metadata": {}, "outputs": [], "source": [ - "autoplot(fit)" + "autoplot(fit.tats)" ] }, { @@ -258,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -275,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -286,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -295,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -307,12 +305,12 @@ "metadata": {}, "source": [ "### Now form a new variable `oxy2`, say, by squaring oxygen.\n", - "(Create a new column in the `anearobic` dataframe which is `anaerobic$oxygen ^ 2`.) Perform the regression of ventil on `oxygen` and `oxy2`. Comment on the fit of this model according to the printed output (and with recourse to Figure 3.2 in Example 3.1)." + "(Create a new column in the `anearobic` dataframe which is `anaerobic$oxygen ^ 2`.) Perform the regression of `ventil` on `oxygen` and `oxy2`. Comment on the fit of this model according to the printed output (and with recourse to Figure 3.2 in Example 3.1)." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -322,7 +320,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -340,7 +338,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -349,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [