Validación cruzada y Bootstrap
El conjunto de validación
library(ISLR)
## Warning: package 'ISLR' was built under R version 3.1.1
set.seed(1)
train <- sample(x=392, size=196)
lm.fit <- lm(mpg ~ horsepower, data=Auto, subset=train)
attach(Auto)
mean((mpg - predict(lm.fit,Auto))[-train]^2)
## [1] 26.14
lm.fit2 <- lm(mpg ~ poly(horsepower,2), data=Auto, subset=train)
mean((mpg - predict(lm.fit2,Auto))[-train]^2)
## [1] 19.82
lm.fit3 <- lm(mpg ~ poly(horsepower,3), data=Auto, subset=train)
mean((mpg - predict(lm.fit3,Auto))[-train]^2)
## [1] 19.78
set.seed(2)
train <- sample(x=392, size=196)
lm.fit <- lm(mpg ~ horsepower, subset=train)
mean((mpg - predict(lm.fit,Auto))[-train]^2)
## [1] 23.3
lm.fit2 <- lm(mpg ~ poly(horsepower,2), data=Auto, subset=train)
mean((mpg - predict(lm.fit2,Auto))[-train]^2)
## [1] 18.9
lm.fit3 <- lm(mpg ~ poly(horsepower,3), data=Auto, subset=train)
mean((mpg - predict(lm.fit3,Auto))[-train]^2)
## [1] 19.26
Validación cruzada leave-one-out
glm.fit <- glm(mpg ~ horsepower, data=Auto)
coef(glm.fit)
## (Intercept) horsepower
## 39.9359 -0.1578
library(boot)
cv.err <- cv.glm(Auto, glm.fit)
cv.err$delta
## [1] 24.23 24.23
cv.error <- rep(0,5)
for (i in 1:5){
glm.fit <- glm(mpg ~ poly(horsepower,i), data=Auto)
cv.error[i] <- cv.glm(Auto, glm.fit)$delta[1]
}
cv.error
## [1] 24.23 19.25 19.33 19.42 19.03
Validación cruzada K-fold
set.seed(17)
cv.error.10 <- rep(0,10)
for (i in 1:10){
glm.fit <- glm(mpg ~ poly(horsepower,i), data=Auto)
cv.error.10[i] <- cv.glm(Auto, glm.fit, K=10)$delta[1]
}
cv.error.10
## [1] 24.21 19.19 19.31 19.34 18.88 19.02 18.90 19.71 18.95 19.50
Bootstrap
alpha.fn <- function(data,index){
X=data$X[index]
Y=data$Y[index]
return((var(Y) - cov(X,Y))/(var(X) + var(Y) - 2*cov(X,Y)))
}
alpha.fn(Portfolio, 1:100)
## [1] 0.5758
set.seed(1)
alpha.fn(Portfolio, sample(x=100, size=100, replace=T))
## [1] 0.5964
boot(Portfolio, alpha.fn, R=1000)
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Portfolio, statistic = alpha.fn, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 0.5758 -7.315e-05 0.08862
Evaluando la precisión de la regresión lineal simple
boot.fn <- function(data,index)
return(coef(lm(mpg ~ horsepower, data=data, subset=index)))
boot.fn(Auto, 1:392)
## (Intercept) horsepower
## 39.9359 -0.1578
set.seed(1)
boot.fn(Auto,sample(x=392, size=392, replace=T))
## (Intercept) horsepower
## 38.7387 -0.1482
boot.fn(Auto,sample(x=392, size=392, replace=T))
## (Intercept) horsepower
## 40.0383 -0.1596
boot(Auto,boot.fn, 1000)
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Auto, statistic = boot.fn, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 39.9359 0.0297219 0.860008
## t2* -0.1578 -0.0003082 0.007404
summary(lm(mpg ~ horsepower, data=Auto))$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.9359 0.717499 55.66 1.220e-187
## horsepower -0.1578 0.006446 -24.49 7.032e-81
boot.fn <- function(data,index)
coefficients(lm(mpg ~ horsepower+I(horsepower^2), data=data, subset=index))
set.seed(1)
boot(Auto,boot.fn,1000)
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Auto, statistic = boot.fn, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 56.900100 6.098e-03 2.0944856
## t2* -0.466190 -1.777e-04 0.0334124
## t3* 0.001231 1.324e-06 0.0001208
summary(lm(mpg ~ horsepower+I(horsepower^2), data=Auto))$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.900100 1.8004268 31.60 1.741e-109
## horsepower -0.466190 0.0311246 -14.98 2.289e-40
## I(horsepower^2) 0.001231 0.0001221 10.08 2.196e-21