# The Graphics package #--------------------- # Setup #----------------- #install.packages("faraway") library(faraway) str(nels88) summary(nels88) # The plot() function has different methods for # different kinds of objects. Contrast plot(nels88$paredu) # a factor plot(nels88) # a data frame methods(plot) # Univariate plots #----------------- opar<-par(mfrow=c(2,2)) # for a factor, plot() returns a bar chart plot(nels88$paredu, main="Factor plot()") # ERROR, character data first must be converted to a factor plot(as.character(nels88$paredu), main="Character") # for a numeric vector, a "scatterplot" plot(nels88$math, main="Numeric plot()") # boolean values coerced to numeric plot(nels88$math>mean(nels88$math), main="Boolean plot()") par(opar) plot(factor(nels88$math>mean(nels88$math)), main="Boolean plot()", ylab="Count", xlab="Above average") # Notice the order of the categories in these factors is alphabetical. # This is probably an artifact of reading character data into a factor. # Rearrange the factor levels by reinvoking factor(). This is really # an ordered factor. parentedu <- factor(nels88$paredu, levels=c("lesshs", "hs", "college", "ba", "ma", "phd"), ordered=TRUE) xtabs(~paredu+parentedu, data=nels88) # Check your logic plot(parentedu) # Other Univariate factor plots #------------------------------ opar<-par(mfrow=c(2,2)) plot(nels88$race, main="plot()") # note that pie() and barplot() start from tabular data pie(table(nels88$race), main="pie()") barplot(table(nels88$race), main="barplot()", ylab="Count") par(opar) # Histogram #------------------- opar<-par(mfrow=c(1,2)) hist(nels88$ses, probability=TRUE) a<-hist(nels88$ses, probability=TRUE) par(new=TRUE) curve(dnorm, ylim=c(0,max(a$density)), xlab="", ylab="", col="red") par(opar, new=FALSE) # Q-Q Plot #------------------- qqnorm(nels88$ses) qqline(nels88$ses, col="blue") # Stem Plot #------------------- stem(nels88$ses) # prints directly to the Console # Bivariate (conditional) statistics #----------------------------------- tapply(nels88$math, nels88$paredu, mean) tapply(nels88$math, nels88$paredu, sd) tapply(nels88$math, nels88$paredu, min) tapply(nels88$math, nels88$paredu, max) tapply(nels88$math, nels88$paredu, length) tapply(nels88$math, nels88$paredu, quantile) # Bivariate graphs #----------------------------------- plot(nels88$ses, nels88$math) # scatterplot, two parameters plot(math ~ ses, data=nels88) # scatterplot, formula plot(nels88$paredu, nels88$math) # boxplot, two parameters plot(math ~ paredu, data=nels88) # boxplot, a "formula" parameter boxplot(math ~ paredu, data=nels88) # As x=,y= parameters, both vectors are treated as numeric plot(nels88$math, nels88$paredu) # scatterplot, two parameters # As a formula, both vectors are treated as factors!!! plot(paredu ~ math, data=nels88) # "mosaic" plot, aka "spineplot" spineplot(paredu ~ math, data=nels88) plot(nels88$race, nels88$paredu) # spine plot by two parameters plot(paredu ~ race, data=nels88) # spine plot by formula mosaicplot(paredu ~ race, data=nels88) spineplot(paredu ~ race, data=nels88) # Scatterplot matrix revisited #----------------------------- plot(nels88[,3:5]) pairs(nels88[,3:5]) ptab <- xtabs(~parentedu) barplot(ptab) # Graphing counts and percents plot(parentedu) # basic barplot(table(parentedu)*100/length(parentedu), ylab="Percent")