library(UsingR)
##Example: Homedata
##The dataset homedata contains assessed values for Maplewood, NJ for the year 1970 and the year 2000. What
##is the shape of the distribution?
data(homedata) # from simple package
attach(homedata)
hist(y1970);hist(y2000) # make two histograms
detach(homedata) # clean up
##On first appearances (gure 35), the 1970 data looks more normal, the year 2000 data has a heavier tail. Let's see
##using our simple.eda function.
attach(homedata)
simple.eda(y1970);
simple.eda(y2000)
detach(homedata) # clean up
##Example: CEO salaries
#The data set exec.pay gives the total direct compensation for CEO's at 200 large publicly traded companies in
#the U.S for the year 2000 (in units of $100,000). What can we say about this distribution besides it looks like good
#work if you can get it? Using simple.eda yields
data(exec.pay) # or read in from file
simple.eda(exec.pay)
#does not look normal
#blows up so try the log
log.exec.pay = log(exec.pay[exec.pay >0])/log(10) # 0 is a problem
simple.eda(log.exec.pay)
## Example: Taxi time at EWR
## The dataset ewr contains taxi in and taxi out times at Newark airport (EWR). Let's see what the trends are.
data(ewr)
names(ewr) # only 3-10 are raw data
airnames = names(ewr) # store them for later
ewr.actual = ewr[,3:10] # get the important columns
boxplot(ewr.actual)
##All of them look skewed. Let's see if there is a dierence between taxi in and out times.
par(mfrow=c(2,4)) # 2 rows 4 columns
attach(ewr)
for(i in 3:10) boxplot(ewr[,i] ~ as.factor(inorout),main=airnames[i])
detach(ewr)
par(mfrow=c(1,1)) # return graphics as is (or close window)
library(lattice)
mewr<-as.matrix(ewr.actual)
barchart(mewr,group=F,layout = c(4, 2))
sewr<-stack(ewr.actual)
bwplot(~sewr$values | factor(sewr$ind),layout = c(4, 2))
bwplot(~sewr$values | factor(sewr$ind),layout = c(2, 4), panel = panel.violin)
#Example: Symmetric or skewed, Long or short?
#For unimodal data, there are 6 basic possibilities as it is symmetric or skewed, and the tails are short, regular
#or long. Here are some examples with random data from known distributions (gure 42).
## symmetric: short, regular then long
X=runif(100);boxplot(X,horizontal=T,bty='n')
X=rnorm(100);boxplot(X,horizontal=T,bty='n')
X=rt(100,2);boxplot(X,horizontal=T,bty='n')
## skewed: short, regular then long
# triangle distribution
X=sample(1:6,100,p=7-(1:6),replace=T);boxplot(X,horizontal=T,bty='n')
X=abs(rnorm(200));boxplot(X,horizontal=T,bty='n')
X=rexp(200);boxplot(X,horizontal=T,bty='n')