#--------------------------------- # Three simple models #--------------------------------- # The main points with respect to programming R are: # (i) lm() and other modeling functions return # "lm" objects, aka "fit" objects, # (ii) the language for specifying formulas is a # little different than the language for doing # math, and the two can be used simultaneously # (iii) so-called "extractor" functions, aka "accessor" # functions work on "lm" objects, and # (iv) many generic functions have "lm" methods. #--------------------------------- # Example 1 - One sample t-test # and its related linear model #--------------------------------- y <- rnorm(25) + 5 ### y <- rnorm(25, mean=5) hist(y) t.test(y, mu=5) t.test(y) # the mu=0 model fit1 <- lm(y ~ 1) # In the related linear model, we estimate mu fit1 # from print.lm() coef(fit1) confint(fit1) summary(fit1) # from summary.lm() str(fit1) plot(fit1) # from plot.lm() qqnorm(residuals(fit1)) qqline(residuals(fit1)) #--------------------------------- # Example 2 - Two group t-test # paired t-test, and related # linear models #--------------------------------- sleep ?sleep with(sleep, tapply(extra, group, mean)) with(sleep, tapply(extra, group, sd)) t.test(extra ~ group, data=sleep, var.equal=TRUE) summary(fit2 <- lm(extra ~ group, data=sleep)) summary(fit3 <- update(fit2, . ~ . -group)) #plot(fit2) # These data are actually paired/repeated measures plot(extra ~ ID, data=sleep) plot(extra ~ as.integer(ID), data=sleep) points(as.integer(sleep$ID)[1:10], sleep$extra[1:10], col="red") t.test(extra ~ group, data=sleep, paired=TRUE) # Doing math within a model formula summary(fit2a <- lm(extra[group==1]-extra[group==2] ~ 1, data=sleep)) # A similar model summary(fit2b <- lm(extra ~ ID+group, data=sleep)) # How do the residuals from fit2a relate to the residuals in # fit2b? abs(abs(residuals(fit2a)) - 2*abs(residuals(fit2b))) abs(abs(residuals(fit2a)) - 2*abs(residuals(fit2b))) < .Machine$double.eps #--------------------------------- # Example 3 - Classic Bivariate # Regression #--------------------------------- library(faraway) ?nels88 str(nels88) cor(nels88$math, nels88$ses) plot(math ~ ses, data=nels88) summary(fit3 <- lm(math ~ ses, data=nels88)) plot(math ~ ses, data=nels88) abline(fit3) segments(x0=nels88$ses, y0=predict(fit3), y1=nels88$math) #--------------------------------- # Example 4 - A Factor and a # Covariate #--------------------------------- x <- rep(c(0, 1), 20) z <- runif(40, -5, 5) y <- 3 + 6*x + 2.5*z + rnorm(40, sd=2.5) plot(y ~ z) points(z[x==1], y[x==1], col="blue") plot(y ~ factor(x)) fit4 <- lm(y ~ 1 + x +z) fit4 summary(fit4) anova(fit4) plot(y ~ z) points(predict(fit4) ~ z, pch="+") segments(z, predict(fit4), y1=y) fit5 <- lm(y ~ x * z) # crossed summary(fit5) plot(y ~ z) points(predict(fit5) ~ z, pch="+") fit6 <- lm(math ~ (sex + race)*ses, data=nels88) summary(fit6) anova(fit6) fit7 <- lm(math ~ (sex + race)*ses - sex:ses - sex, data=nels88) anova(fit7) fit7a <- update(fit6, .~.-sex-sex:ses) anova(fit7a) summary(fit7a) anova(fit7a, fit6) plot(math ~ ses, data=nels88) points(predict(fit7) ~ ses, data=nels88, pch="+") levels(nels88$race) raceb <- relevel(nels88$race, "Black") summary(update(fit7, .~.-race+raceb))