# The Graphics package #--------------------- # Setup #----------------- #install.packages("faraway") library(faraway) str(nels88) summary(nels88) # The plot() function has different methods for # different kinds of objects. Contrast plot(nels88\$paredu) # a factor plot(nels88) # a data frame methods(plot) # Univariate plots #----------------- opar<-par(mfrow=c(2,2)) # for a factor, plot() returns a bar chart plot(nels88\$paredu, main="Factor plot()") # ERROR, character data first must be converted to a factor plot(as.character(nels88\$paredu), main="Character") # for a numeric vector, a "scatterplot" plot(nels88\$math, main="Numeric plot()") # boolean values coerced to numeric plot(nels88\$math>mean(nels88\$math), main="Boolean plot()") par(opar) plot(factor(nels88\$math>mean(nels88\$math)), main="Boolean plot()", ylab="Count", xlab="Above average") # Notice the order of the categories in these factors is alphabetical. # This is probably an artifact of reading character data into a factor. # Rearrange the factor levels by reinvoking factor(). This is really # an ordered factor. parentedu <- factor(nels88\$paredu, levels=c("lesshs", "hs", "college", "ba", "ma", "phd"), ordered=TRUE) xtabs(~paredu+parentedu, data=nels88) # Check your logic plot(parentedu) # Other Univariate factor plots #------------------------------ opar<-par(mfrow=c(2,2)) plot(nels88\$race, main="plot()") # note that pie() and barplot() start from tabular data pie(table(nels88\$race), main="pie()") barplot(table(nels88\$race), main="barplot()", ylab="Count") par(opar) # Histogram #------------------- opar<-par(mfrow=c(1,2)) hist(nels88\$ses, probability=TRUE) a<-hist(nels88\$ses, probability=TRUE) par(new=TRUE) curve(dnorm, ylim=c(0,max(a\$density)), xlab="", ylab="", col="red") par(opar, new=FALSE) # Q-Q Plot #------------------- qqnorm(nels88\$ses) qqline(nels88\$ses, col="blue") # Stem Plot #------------------- stem(nels88\$ses) # prints directly to the Console # Bivariate (conditional) statistics #----------------------------------- tapply(nels88\$math, nels88\$paredu, mean) tapply(nels88\$math, nels88\$paredu, sd) tapply(nels88\$math, nels88\$paredu, min) tapply(nels88\$math, nels88\$paredu, max) tapply(nels88\$math, nels88\$paredu, length) tapply(nels88\$math, nels88\$paredu, quantile) # Bivariate graphs #----------------------------------- plot(nels88\$ses, nels88\$math) # scatterplot, two parameters plot(math ~ ses, data=nels88) # scatterplot, formula plot(nels88\$paredu, nels88\$math) # boxplot, two parameters plot(math ~ paredu, data=nels88) # boxplot, a "formula" parameter boxplot(math ~ paredu, data=nels88) # As x=,y= parameters, both vectors are treated as numeric plot(nels88\$math, nels88\$paredu) # scatterplot, two parameters # As a formula, both vectors are treated as factors!!! plot(paredu ~ math, data=nels88) # "mosaic" plot, aka "spineplot" spineplot(paredu ~ math, data=nels88) plot(nels88\$race, nels88\$paredu) # spine plot by two parameters plot(paredu ~ race, data=nels88) # spine plot by formula mosaicplot(paredu ~ race, data=nels88) spineplot(paredu ~ race, data=nels88) # Scatterplot matrix revisited #----------------------------- plot(nels88[,3:5]) pairs(nels88[,3:5]) ptab <- xtabs(~parentedu) barplot(ptab) # Graphing counts and percents plot(parentedu) # basic barplot(table(parentedu)*100/length(parentedu), ylab="Percent")