# Frequencies and Crosstabs #-------------------------- str(airquality) View(airquality) # Frequencies table(airquality$Month) table(airquality$Day) # Notice that `summary` does not give you a # frequency table, because this data is numeric (integer) summary(airquality$Month) # One solution is to change the type of the data month <- factor(airquality$Month, labels=c("May", "June", "July", "Aug", "Sep")) summary(month) # Percents prop.table(table(airquality$Month)) prop.table(summary(month)) # Hypothesis tests - one sample chi-square chisq.test(summary(month)) chisq.test(summary(month), p=c(2,1,1,1,2), rescale.p=TRUE) # Crosstabs table(airquality$Day, airquality$Month) str(mtcars) View(mtcars) # Number of cylinders by V- or straight engine table(mtcars$cyl, mtcars$vs) summary(lm(Ozone ~ Month, data=airquality)) tapply(airquality$Ozone, airquality$Month, mean, na.rm=TRUE) summary(hsb$gender) # This works on factors, # if there aren't too many categories summary(hsb$read) # but not on numeric variables summary(as.factor(hsb$read), maxsum=10) (gender.table <- table(hsb$gender)) # More generally useful (read.tab <- table(hsb$read)) # Example with lots of categories str(read.tab) # A vector with "named" elements barchart(read.table) sort(read.tab, decreasing=TRUE) cumsum(read.tab/sum(read.tab)) # Percents ??percents ??proportions prop.table(gender.table) # starting with the "table" object prop.test(gender.table) ?prop.test (gender.test <- prop.test(gender.table, correct=FALSE)) str(gender.test) (read.prop <- prop.table(read.tab)) (cumsum(read.tab)/200) barchart(read.prop, xlab="proportion") # Crosstabs ??crosstabulation (ses.sch <- with(hsb, table(ses, schtyp))) xtabs(~ ses + schtyp, data=hsb) # alt a formula-style command str(ses.sch) methods(class="table") summary(ses.sch) barchart(ses.sch, auto.key=TRUE) (ses.read <- with(hsb, table(read, ses))) prop.table(ses.sch) # cell percents (ses.prop <- prop.table(ses.sch, 1)) # rows, dimension "1" barchart(ses.prop, beside=TRUE, group=FALSE) prop.table(ses.sch, 2) # columns, dimension "2" margin.table(ses.sch, 1) # marginal counts ses.sch rowSums(ses.sch) summary(ses.sch) prop.table(margin.table(ses.sch, 1)) # marginal percents ??chisquare chisq.test(ses.sch) # Three-way tables str(ses.tab <- with(hsb, table(gender, schtyp, ses))) ftable(ses.tab) prop.table(ses.tab,2) apply(ses.tab, 2, sum) margin.table(ses.tab,2) dotplot(ses.tab, groups=FALSE)