Chapter 6 Answers
6.1 Answer for Exercise4.4
library(ggplot2)
data("mpg")
A1
length(unique(mpg$manufacturer))
## [1] 15
A2
table(mpg[mpg$class=="suv","manufacturer"])
##
## chevrolet dodge ford jeep land rover lincoln mercury
## 9 7 9 8 4 3 4
## nissan subaru toyota
## 4 6 8
A3
grepl("auto",mpg$trans)&mpg$cyl==4,] mpg[
## # A tibble: 41 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(~ f 18 29 p compa~
## 2 audi a4 2 2008 4 auto(~ f 21 30 p compa~
## 3 audi a4 quat~ 1.8 1999 4 auto(~ 4 16 25 p compa~
## 4 audi a4 quat~ 2 2008 4 auto(~ 4 19 27 p compa~
## 5 chevrolet malibu 2.4 1999 4 auto(~ f 19 27 r midsi~
## 6 chevrolet malibu 2.4 2008 4 auto(~ f 22 30 r midsi~
## 7 dodge caravan~ 2.4 1999 4 auto(~ f 18 24 r miniv~
## 8 honda civic 1.6 1999 4 auto(~ f 24 32 r subco~
## 9 honda civic 1.6 1999 4 auto(~ f 24 32 r subco~
## 10 honda civic 1.8 2008 4 auto(~ f 25 36 r subco~
## # ... with 31 more rows
A4
$trans_subtype <- gsub(".*\\((.*)\\)","\\1",mpg$trans) mpg
A5
$model=="toyota tacoma 4wd","model"] <- "tacoma 4wd"
mpg[mpg#or
$model <- gsub("^toyota ", "",mpg$model) mpg
A6
$ID <- paste0(toupper(substr(mpg$manufacturer,1,3)),"_",mpg$year) mpg
A7
table(mpg[grepl('\\d',mpg$model),"manufacturer"])
##
## audi chevrolet dodge ford jeep lincoln mercury nissan
## 18 9 37 16 8 3 4 4
## toyota
## 15
A8
<- unique(mpg$manufacturer)
manufacturers sapply(manufacturers, function(x){
mean(mpg[mpg$manufacturer==x,]$cty)
})
## audi chevrolet dodge ford honda hyundai jeep
## 17.61111 15.00000 13.13514 14.00000 24.44444 18.64286 13.50000
## land rover lincoln mercury nissan pontiac subaru toyota
## 11.50000 11.33333 13.25000 18.07692 17.00000 19.28571 18.52941
## volkswagen
## 20.92593
A9
<- unique(mpg$manufacturer)
manufacturers sapply(manufacturers, function(x){
<- mean(mpg[mpg$manufacturer==x&grepl('\\d',mpg$model),]$cty)
x if(is.na(x)){0}else{x}
})
## audi chevrolet dodge ford honda hyundai jeep
## 17.61111 12.66667 13.13514 12.93750 0.00000 0.00000 13.50000
## land rover lincoln mercury nissan pontiac subaru toyota
## 0.00000 11.33333 13.25000 13.75000 0.00000 0.00000 14.93333
## volkswagen
## 0.00000
A10
<- data.frame(A=character(), B=character(), C=character(), D=character(),
mpg_summary E=character(), F=character(), G=character(), H=character(),
I=character(), J=character(), K=character(), L=character())
for(x in unique(mpg$manufacturer)){
<- mpg[mpg$manufacturer==x,]
subset <- x
A <- length(unique(subset$model))
B <- mean(subset$displ)
C <- paste0(min(subset$year),"-",max(subset$year))
D <- mean(subset$cyl)
E <- paste(names(which(table(subset$trans)==max(table(subset$trans)))),collapse = "|")
F <- paste(names(which(table(subset$trans)==min(table(subset$trans)))),collapse = "|")
G <- max(subset$cty)
H <- min(subset$hwy)
I <- paste(unique(subset$fl),collapse = "|")
J <- subset$class[which.max(nchar(subset$class))]
K
<- max(gsub(".(\\d)","\\1",subset$trans_subtype[grepl("\\d",subset$trans_subtype)]))
largest_value <- paste(unique(subset$trans_subtype[grepl(largest_value,subset$trans_subtype)]), collapse = "|")
L nrow(mpg_summary)+1,] <- c(A,B,C,D,E,F,G,H,I,J,K,L)
mpg_summary[ }
A11
<-function(x){
get_hwy_ranage <- min(mpg[mpg$manufacturer==x,"hwy"])
lower <- max(mpg[mpg$manufacturer==x,"hwy"])
upper print(paste0(lower,"-",upper))
}
6.2 Answer for Exercise5.4
PlantGrowth
## weight group
## 1 4.17 ctrl
## 2 5.58 ctrl
## 3 5.18 ctrl
## 4 6.11 ctrl
## 5 4.50 ctrl
## 6 4.61 ctrl
## 7 5.17 ctrl
## 8 4.53 ctrl
## 9 5.33 ctrl
## 10 5.14 ctrl
## 11 4.81 trt1
## 12 4.17 trt1
## 13 4.41 trt1
## 14 3.59 trt1
## 15 5.87 trt1
## 16 3.83 trt1
## 17 6.03 trt1
## 18 4.89 trt1
## 19 4.32 trt1
## 20 4.69 trt1
## 21 6.31 trt2
## 22 5.12 trt2
## 23 5.54 trt2
## 24 5.50 trt2
## 25 5.37 trt2
## 26 5.29 trt2
## 27 4.92 trt2
## 28 6.15 trt2
## 29 5.80 trt2
## 30 5.26 trt2
A1
<- PlantGrowth[PlantGrowth[, 2] == "ctrl",1]
ctrl_grp <- PlantGrowth[PlantGrowth[, 2] == "trt1",1] trt1_grp
A2
plot(density(ctrl_grp), main = "Control Group vs Treatment 1", xlab = "weight", xlim = c(2.5,7.5), ylim = c(0,0.65))
par(new = TRUE)
plot(density(trt1_grp), main = "", axes = FALSE, xlab = "", ylab = "", lty = 2, xlim = c(2.5,7.5), ylim = c(0,0.65))
A3
t.test(ctrl_grp, trt1_grp)
##
## Welch Two Sample t-test
##
## data: ctrl_grp and trt1_grp
## t = 1.1913, df = 16.524, p-value = 0.2504
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2875162 1.0295162
## sample estimates:
## mean of x mean of y
## 5.032 4.661