Selección de modelos
Best subset selection
library(ISLR)
head(Hitters)
## AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits
## -Andy Allanson 293 66 1 30 29 14 1 293 66
## -Alan Ashby 315 81 7 24 38 39 14 3449 835
## -Alvin Davis 479 130 18 66 72 76 3 1624 457
## -Andre Dawson 496 141 20 65 78 37 11 5628 1575
## -Andres Galarraga 321 87 10 39 42 30 2 396 101
## -Alfredo Griffin 594 169 4 74 51 35 11 4408 1133
## CHmRun CRuns CRBI CWalks League Division PutOuts Assists
## -Andy Allanson 1 30 29 14 A E 446 33
## -Alan Ashby 69 321 414 375 N W 632 43
## -Alvin Davis 63 224 266 263 A W 880 82
## -Andre Dawson 225 828 838 354 N E 200 11
## -Andres Galarraga 12 48 46 33 N E 805 40
## -Alfredo Griffin 19 501 336 194 A W 282 421
## Errors Salary NewLeague
## -Andy Allanson 20 NA A
## -Alan Ashby 10 475.0 N
## -Alvin Davis 14 480.0 A
## -Andre Dawson 3 500.0 N
## -Andres Galarraga 4 91.5 N
## -Alfredo Griffin 25 750.0 A
names(Hitters)
## [1] "AtBat" "Hits" "HmRun" "Runs" "RBI"
## [6] "Walks" "Years" "CAtBat" "CHits" "CHmRun"
## [11] "CRuns" "CRBI" "CWalks" "League" "Division"
## [16] "PutOuts" "Assists" "Errors" "Salary" "NewLeague"
dim(Hitters)
## [1] 322 20
sum(is.na(Hitters$Salary))
## [1] 59
Hitters=na.omit(Hitters)
dim(Hitters)
## [1] 263 20
sum(is.na(Hitters))
## [1] 0
library(leaps)
regfit.full=regsubsets(Salary~.,Hitters)
summary(regfit.full)
## Subset selection object
## Call: regsubsets.formula(Salary ~ ., Hitters)
## 19 Variables (and intercept)
## Forced in Forced out
## AtBat FALSE FALSE
## Hits FALSE FALSE
## HmRun FALSE FALSE
## Runs FALSE FALSE
## RBI FALSE FALSE
## Walks FALSE FALSE
## Years FALSE FALSE
## CAtBat FALSE FALSE
## CHits FALSE FALSE
## CHmRun FALSE FALSE
## CRuns FALSE FALSE
## CRBI FALSE FALSE
## CWalks FALSE FALSE
## LeagueN FALSE FALSE
## DivisionW FALSE FALSE
## PutOuts FALSE FALSE
## Assists FALSE FALSE
## Errors FALSE FALSE
## NewLeagueN FALSE FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: exhaustive
## AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns
## 1 ( 1 ) " " " " " " " " " " " " " " " " " " " " " "
## 2 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 3 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 4 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 5 ( 1 ) "*" "*" " " " " " " " " " " " " " " " " " "
## 6 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " " "
## 7 ( 1 ) " " "*" " " " " " " "*" " " "*" "*" "*" " "
## 8 ( 1 ) "*" "*" " " " " " " "*" " " " " " " "*" "*"
## CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
## 1 ( 1 ) "*" " " " " " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " " " " " " " " "
## 3 ( 1 ) "*" " " " " " " "*" " " " " " "
## 4 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 5 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 6 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 7 ( 1 ) " " " " " " "*" "*" " " " " " "
## 8 ( 1 ) " " "*" " " "*" "*" " " " " " "
regfit.full=regsubsets(Salary~.,data=Hitters,nvmax=19)
reg.summary=summary(regfit.full)
names(reg.summary)
## [1] "which" "rsq" "rss" "adjr2" "cp" "bic" "outmat" "obj"
reg.summary$rsq
## [1] 0.3215 0.4252 0.4514 0.4754 0.4908 0.5087 0.5141 0.5286 0.5346 0.5405
## [11] 0.5426 0.5436 0.5445 0.5452 0.5455 0.5458 0.5460 0.5461 0.5461
plot(reg.summary$rss,xlab="Number of Variables",ylab="RSS",type="l")
plot(reg.summary$adjr2,xlab="Number of Variables",ylab="Adjusted RSq",type="l")
which.max(reg.summary$adjr2)
## [1] 11
points(11,reg.summary$adjr2[11], col="red",cex=2,pch=20)
plot(reg.summary$cp,xlab="Number of Variables",ylab="Cp",type='l')
which.min(reg.summary$cp)
## [1] 10
points(10,reg.summary$cp[10],col="red",cex=2,pch=20)
which.min(reg.summary$bic)
## [1] 6
plot(reg.summary$bic,xlab="Number of Variables",ylab="BIC",type='l')
points(6,reg.summary$bic[6],col="red",cex=2,pch=20)
plot(regfit.full,scale="r2")
plot(regfit.full,scale="adjr2")
plot(regfit.full,scale="Cp")
plot(regfit.full,scale="bic")
coef(regfit.full,6)
## (Intercept) AtBat Hits Walks CRBI DivisionW
## 91.5118 -1.8686 7.6044 3.6976 0.6430 -122.9515
## PutOuts
## 0.2643
Forward y Backward Stepwise Selection
regfit.fwd=regsubsets(Salary~.,data=Hitters,nvmax=19,method="forward")
summary(regfit.fwd)
## Subset selection object
## Call: regsubsets.formula(Salary ~ ., data = Hitters, nvmax = 19, method = "forward")
## 19 Variables (and intercept)
## Forced in Forced out
## AtBat FALSE FALSE
## Hits FALSE FALSE
## HmRun FALSE FALSE
## Runs FALSE FALSE
## RBI FALSE FALSE
## Walks FALSE FALSE
## Years FALSE FALSE
## CAtBat FALSE FALSE
## CHits FALSE FALSE
## CHmRun FALSE FALSE
## CRuns FALSE FALSE
## CRBI FALSE FALSE
## CWalks FALSE FALSE
## LeagueN FALSE FALSE
## DivisionW FALSE FALSE
## PutOuts FALSE FALSE
## Assists FALSE FALSE
## Errors FALSE FALSE
## NewLeagueN FALSE FALSE
## 1 subsets of each size up to 19
## Selection Algorithm: forward
## AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns
## 1 ( 1 ) " " " " " " " " " " " " " " " " " " " " " "
## 2 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 3 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 4 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 5 ( 1 ) "*" "*" " " " " " " " " " " " " " " " " " "
## 6 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " " "
## 7 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " " "
## 8 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " "*"
## 9 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 10 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 11 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 12 ( 1 ) "*" "*" " " "*" " " "*" " " "*" " " " " "*"
## 13 ( 1 ) "*" "*" " " "*" " " "*" " " "*" " " " " "*"
## 14 ( 1 ) "*" "*" "*" "*" " " "*" " " "*" " " " " "*"
## 15 ( 1 ) "*" "*" "*" "*" " " "*" " " "*" "*" " " "*"
## 16 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" " " "*"
## 17 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" " " "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" " " "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
## 1 ( 1 ) "*" " " " " " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " " " " " " " " "
## 3 ( 1 ) "*" " " " " " " "*" " " " " " "
## 4 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 5 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 6 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 7 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 8 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 9 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 10 ( 1 ) "*" "*" " " "*" "*" "*" " " " "
## 11 ( 1 ) "*" "*" "*" "*" "*" "*" " " " "
## 12 ( 1 ) "*" "*" "*" "*" "*" "*" " " " "
## 13 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 14 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 15 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 16 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 17 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
regfit.bwd=regsubsets(Salary~.,data=Hitters,nvmax=19,method="backward")
summary(regfit.bwd)
## Subset selection object
## Call: regsubsets.formula(Salary ~ ., data = Hitters, nvmax = 19, method = "backward")
## 19 Variables (and intercept)
## Forced in Forced out
## AtBat FALSE FALSE
## Hits FALSE FALSE
## HmRun FALSE FALSE
## Runs FALSE FALSE
## RBI FALSE FALSE
## Walks FALSE FALSE
## Years FALSE FALSE
## CAtBat FALSE FALSE
## CHits FALSE FALSE
## CHmRun FALSE FALSE
## CRuns FALSE FALSE
## CRBI FALSE FALSE
## CWalks FALSE FALSE
## LeagueN FALSE FALSE
## DivisionW FALSE FALSE
## PutOuts FALSE FALSE
## Assists FALSE FALSE
## Errors FALSE FALSE
## NewLeagueN FALSE FALSE
## 1 subsets of each size up to 19
## Selection Algorithm: backward
## AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns
## 1 ( 1 ) " " " " " " " " " " " " " " " " " " " " "*"
## 2 ( 1 ) " " "*" " " " " " " " " " " " " " " " " "*"
## 3 ( 1 ) " " "*" " " " " " " " " " " " " " " " " "*"
## 4 ( 1 ) "*" "*" " " " " " " " " " " " " " " " " "*"
## 5 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " "*"
## 6 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " "*"
## 7 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " "*"
## 8 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " "*"
## 9 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 10 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 11 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 12 ( 1 ) "*" "*" " " "*" " " "*" " " "*" " " " " "*"
## 13 ( 1 ) "*" "*" " " "*" " " "*" " " "*" " " " " "*"
## 14 ( 1 ) "*" "*" "*" "*" " " "*" " " "*" " " " " "*"
## 15 ( 1 ) "*" "*" "*" "*" " " "*" " " "*" "*" " " "*"
## 16 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" " " "*"
## 17 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" " " "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" " " "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
## 1 ( 1 ) " " " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " "*" " " " " " "
## 4 ( 1 ) " " " " " " " " "*" " " " " " "
## 5 ( 1 ) " " " " " " " " "*" " " " " " "
## 6 ( 1 ) " " " " " " "*" "*" " " " " " "
## 7 ( 1 ) " " "*" " " "*" "*" " " " " " "
## 8 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 9 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 10 ( 1 ) "*" "*" " " "*" "*" "*" " " " "
## 11 ( 1 ) "*" "*" "*" "*" "*" "*" " " " "
## 12 ( 1 ) "*" "*" "*" "*" "*" "*" " " " "
## 13 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 14 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 15 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 16 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 17 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
coef(regfit.full,7)
## (Intercept) Hits Walks CAtBat CHits CHmRun
## 79.4509 1.2834 3.2274 -0.3752 1.4957 1.4421
## DivisionW PutOuts
## -129.9866 0.2367
coef(regfit.fwd,7)
## (Intercept) AtBat Hits Walks CRBI CWalks
## 109.7873 -1.9589 7.4499 4.9131 0.8538 -0.3053
## DivisionW PutOuts
## -127.1224 0.2533
coef(regfit.bwd,7)
## (Intercept) AtBat Hits Walks CRuns CWalks
## 105.6487 -1.9763 6.7575 6.0559 1.1293 -0.7163
## DivisionW PutOuts
## -116.1692 0.3029
Validación usando data de entrenamiento y prueba
set.seed(1)
train=sample(c(TRUE,FALSE), nrow(Hitters), rep=T) # Data de entrenamiento
test=(!train) # Data de entrenamiento
regfit.best=regsubsets(Salary~.,data=Hitters[train,],nvmax=19)
summary(regfit.best)
## Subset selection object
## Call: regsubsets.formula(Salary ~ ., data = Hitters[train, ], nvmax = 19)
## 19 Variables (and intercept)
## Forced in Forced out
## AtBat FALSE FALSE
## Hits FALSE FALSE
## HmRun FALSE FALSE
## Runs FALSE FALSE
## RBI FALSE FALSE
## Walks FALSE FALSE
## Years FALSE FALSE
## CAtBat FALSE FALSE
## CHits FALSE FALSE
## CHmRun FALSE FALSE
## CRuns FALSE FALSE
## CRBI FALSE FALSE
## CWalks FALSE FALSE
## LeagueN FALSE FALSE
## DivisionW FALSE FALSE
## PutOuts FALSE FALSE
## Assists FALSE FALSE
## Errors FALSE FALSE
## NewLeagueN FALSE FALSE
## 1 subsets of each size up to 19
## Selection Algorithm: exhaustive
## AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns
## 1 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 2 ( 1 ) " " "*" " " " " " " " " " " " " "*" " " " "
## 3 ( 1 ) " " "*" " " " " " " " " " " " " "*" " " " "
## 4 ( 1 ) " " "*" " " " " " " " " " " "*" "*" " " " "
## 5 ( 1 ) " " "*" " " " " " " " " " " "*" "*" " " " "
## 6 ( 1 ) " " "*" " " " " " " " " " " "*" "*" " " " "
## 7 ( 1 ) "*" "*" "*" " " " " " " " " "*" "*" " " " "
## 8 ( 1 ) "*" "*" " " " " " " "*" " " " " "*" "*" " "
## 9 ( 1 ) "*" "*" " " " " " " "*" " " "*" "*" "*" " "
## 10 ( 1 ) "*" "*" " " " " " " "*" " " "*" "*" "*" " "
## 11 ( 1 ) "*" "*" " " " " " " "*" " " "*" "*" "*" " "
## 12 ( 1 ) "*" "*" " " " " " " "*" " " "*" "*" "*" " "
## 13 ( 1 ) "*" "*" " " " " " " "*" " " "*" "*" "*" " "
## 14 ( 1 ) "*" "*" " " " " " " "*" " " "*" "*" "*" "*"
## 15 ( 1 ) "*" "*" " " " " "*" "*" " " "*" "*" "*" "*"
## 16 ( 1 ) "*" "*" " " " " "*" "*" "*" "*" "*" "*" "*"
## 17 ( 1 ) "*" "*" " " " " "*" "*" "*" "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" " " "*" "*" "*" "*" "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
## 1 ( 1 ) " " " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " " " "*" " " " "
## 4 ( 1 ) " " " " " " " " "*" " " " " " "
## 5 ( 1 ) " " " " " " " " "*" "*" " " " "
## 6 ( 1 ) " " " " "*" " " "*" "*" " " " "
## 7 ( 1 ) " " " " "*" " " "*" " " " " " "
## 8 ( 1 ) " " "*" "*" " " "*" " " " " " "
## 9 ( 1 ) " " "*" "*" " " "*" " " " " " "
## 10 ( 1 ) " " "*" "*" "*" "*" " " " " " "
## 11 ( 1 ) " " "*" "*" "*" "*" " " "*" " "
## 12 ( 1 ) " " "*" "*" "*" "*" " " "*" "*"
## 13 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*"
## 14 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*"
## 15 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*"
## 16 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*"
## 17 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
test.mat=model.matrix(Salary~.,data=Hitters[test,]) # Sirve para obtener la matriz X con la data de prueba
val.errors=rep(NA,19)
for(i in 1:19){
coefi=coef(regfit.best,id=i)
pred=test.mat[,names(coefi)]%*%coefi
val.errors[i]=mean((Hitters$Salary[test]-pred)^2)
}
val.errors
## [1] 220968 169157 178518 163426 168418 171271 162377 157909 154056 148162
## [11] 151156 151742 152214 157359 158541 158743 159973 159860 160106
which.min(val.errors)
## [1] 10
coef(regfit.best,10)
## (Intercept) AtBat Hits Walks CAtBat CHits
## -80.2751 -1.4684 7.1625 3.6430 -0.1856 1.1053
## CHmRun CWalks LeagueN DivisionW PutOuts
## 1.3845 -0.7483 84.5576 -53.0290 0.2382
# La siguiente función realiza el proceso de predicción con la data de prueba
predict.regsubsets=function(object,newdata,id,...){
form=as.formula(object$call[[2]])
mat=model.matrix(form,newdata)
coefi=coef(object,id=id)
xvars=names(coefi)
mat[,xvars]%*%coefi
}
# El mejor modelo con 10 variables se estima con toda la data
regfit.best=regsubsets(Salary~.,data=Hitters,nvmax=19)
coef(regfit.best,10)
## (Intercept) AtBat Hits Walks CAtBat CRuns
## 162.5354 -2.1687 6.9180 5.7732 -0.1301 1.4082
## CRBI CWalks DivisionW PutOuts Assists
## 0.7743 -0.8308 -112.3801 0.2974 0.2832
Validación cruzada K=10 fold
k=10
set.seed(1)
folds=sample(1:k,nrow(Hitters),replace=TRUE)
cv.errors=matrix(NA,k,19, dimnames=list(NULL, paste(1:19)))
for(j in 1:k){
best.fit=regsubsets(Salary~.,data=Hitters[folds!=j,],nvmax=19)
for(i in 1:19){
pred=predict.regsubsets(best.fit,Hitters[folds==j,],id=i)
cv.errors[j,i]=mean((Hitters$Salary[folds==j]-pred)^2)
}
}
mean.cv.errors=apply(cv.errors,2,mean)
mean.cv.errors
## 1 2 3 4 5 6 7 8 9 10
## 160093 140197 153117 151159 146841 138303 144346 130208 129460 125335
## 11 12 13 14 15 16 17 18 19
## 125154 128274 133461 133975 131826 131883 132751 133096 132805
par(mfrow=c(1,1))
plot(mean.cv.errors,type='b')
reg.best=regsubsets(Salary~.,data=Hitters, nvmax=19)
coef(reg.best,11)
## (Intercept) AtBat Hits Walks CAtBat CRuns
## 135.7512 -2.1277 6.9237 5.6203 -0.1390 1.4553
## CRBI CWalks LeagueN DivisionW PutOuts Assists
## 0.7853 -0.8229 43.1116 -111.1460 0.2894 0.2688
Métodos de encogimiento
Regresión Ridge
x=model.matrix(Salary~.,Hitters)[,-1]
y=Hitters$Salary
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 1.9-8
grid=10^seq(10,-2,length=100)
ridge.mod=glmnet(x,y,alpha=0,lambda=grid)
dim(coef(ridge.mod))
## [1] 20 100
ridge.mod$lambda[50]
## [1] 11498
coef(ridge.mod)[,50]
## (Intercept) AtBat Hits HmRun Runs RBI
## 407.356050 0.036957 0.138180 0.524630 0.230702 0.239841
## Walks Years CAtBat CHits CHmRun CRuns
## 0.289619 1.107703 0.003132 0.011654 0.087546 0.023380
## CRBI CWalks LeagueN DivisionW PutOuts Assists
## 0.024138 0.025015 0.085028 -6.215441 0.016483 0.002613
## Errors NewLeagueN
## -0.020503 0.301434
sqrt(sum(coef(ridge.mod)[-1,50]^2))
## [1] 6.361
ridge.mod$lambda[60]
## [1] 705.5
coef(ridge.mod)[,60]
## (Intercept) AtBat Hits HmRun Runs RBI
## 54.32520 0.11211 0.65622 1.17981 0.93770 0.84719
## Walks Years CAtBat CHits CHmRun CRuns
## 1.31988 2.59640 0.01083 0.04675 0.33777 0.09356
## CRBI CWalks LeagueN DivisionW PutOuts Assists
## 0.09780 0.07190 13.68370 -54.65878 0.11852 0.01606
## Errors NewLeagueN
## -0.70359 8.61181
sqrt(sum(coef(ridge.mod)[-1,60]^2))
## [1] 57.11
Regresión Lasso
lasso.mod=glmnet(x,y,alpha=1,lambda=grid)
plot(lasso.mod)
Métodos de reducción de la dimensión
Regresión por Componentes Principales
library(pls)
##
## Attaching package: 'pls'
##
## The following object is masked from 'package:stats':
##
## loadings
set.seed(2)
pcr.fit=pcr(Salary~., data=Hitters,scale=TRUE,validation="CV")
summary(pcr.fit)
## Data: X dimension: 263 19
## Y dimension: 263 1
## Fit method: svdpc
## Number of components considered: 19
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 452 348.9 352.2 353.5 352.8 350.1 349.1
## adjCV 452 348.7 351.8 352.9 352.1 349.3 348.0
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## CV 349.6 350.9 352.9 353.8 355.0 356.2 363.5
## adjCV 348.5 349.8 351.6 352.3 353.4 354.5 361.6
## 14 comps 15 comps 16 comps 17 comps 18 comps 19 comps
## CV 355.2 357.4 347.6 350.1 349.2 352.6
## adjCV 352.8 355.2 345.5 347.6 346.7 349.8
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps
## X 38.31 60.16 70.84 79.03 84.29 88.63 92.26
## Salary 40.63 41.58 42.17 43.22 44.90 46.48 46.69
## 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps 14 comps
## X 94.96 96.28 97.26 97.98 98.65 99.15 99.47
## Salary 46.75 46.86 47.76 47.82 47.85 48.10 50.40
## 15 comps 16 comps 17 comps 18 comps 19 comps
## X 99.75 99.89 99.97 99.99 100.00
## Salary 50.55 53.01 53.85 54.61 54.61
pcr.fit$coeff
## , , 1 comps
##
## Salary
## AtBat 21.13208
## Hits 20.87321
## HmRun 21.77988
## Runs 21.13706
## RBI 25.06280
## Walks 22.26530
## Years 30.11446
## CAtBat 35.21789
## CHits 35.24760
## CHmRun 33.99409
## CRuns 36.04328
## CRBI 36.27081
## CWalks 33.76213
## LeagueN -5.80504
## DivisionW -2.74158
## PutOuts 8.28030
## Assists -0.08969
## Errors -0.83758
## NewLeagueN -4.46644
##
## , , 2 comps
##
## Salary
## AtBat 29.439
## Hits 29.039
## HmRun 26.913
## Runs 29.313
## RBI 31.871
## Walks 27.235
## Years 24.435
## CAtBat 31.043
## CHits 31.289
## CHmRun 31.260
## CRuns 32.314
## CRBI 32.633
## CWalks 29.600
## LeagueN -7.866
## DivisionW -3.535
## PutOuts 11.651
## Assists 3.561
## Errors 3.508
## NewLeagueN -6.146
##
## , , 3 comps
##
## Salary
## AtBat 31.596
## Hits 30.841
## HmRun 21.651
## Runs 28.895
## RBI 30.092
## Walks 28.346
## Years 25.277
## CAtBat 33.077
## CHits 33.388
## CHmRun 29.161
## CRuns 33.604
## CRBI 32.997
## CWalks 30.625
## LeagueN 5.466
## DivisionW -3.930
## PutOuts 12.899
## Assists 13.245
## Errors 12.827
## NewLeagueN 7.110
##
## , , 4 comps
##
## Salary
## AtBat 30.412
## Hits 30.175
## HmRun 30.390
## Runs 30.746
## RBI 35.242
## Walks 33.186
## Years 21.745
## CAtBat 29.700
## CHits 30.285
## CHmRun 31.913
## CRuns 31.041
## CRBI 32.750
## CWalks 29.499
## LeagueN 20.141
## DivisionW -5.514
## PutOuts 23.556
## Assists -6.174
## Errors -2.808
## NewLeagueN 22.589
##
## , , 5 comps
##
## Salary
## AtBat 28.766
## Hits 30.447
## HmRun 25.844
## Runs 33.001
## RBI 33.820
## Walks 35.088
## Years 22.351
## CAtBat 29.015
## CHits 29.786
## CHmRun 30.002
## CRuns 32.069
## CRBI 31.112
## CWalks 31.487
## LeagueN 19.439
## DivisionW -63.204
## PutOuts 17.360
## Assists -5.523
## Errors -6.044
## NewLeagueN 21.743
##
## , , 6 comps
##
## Salary
## AtBat 24.363
## Hits 25.321
## HmRun 16.518
## Runs 24.484
## RBI 26.860
## Walks 33.874
## Years 24.423
## CAtBat 30.534
## CHits 31.618
## CHmRun 27.460
## CRuns 32.498
## CRBI 31.828
## CWalks 33.606
## LeagueN 10.911
## DivisionW -68.868
## PutOuts 74.954
## Assists -3.328
## Errors 3.192
## NewLeagueN 11.960
##
## , , 7 comps
##
## Salary
## AtBat 27.005
## Hits 28.531
## HmRun 4.031
## Runs 29.464
## RBI 18.974
## Walks 47.659
## Years 24.126
## CAtBat 30.832
## CHits 32.112
## CHmRun 21.812
## CRuns 34.054
## CRBI 28.901
## CWalks 37.991
## LeagueN 9.022
## DivisionW -66.069
## PutOuts 74.483
## Assists -3.655
## Errors -6.005
## NewLeagueN 11.401
##
## , , 8 comps
##
## Salary
## AtBat 31.2842
## Hits 34.6957
## HmRun 0.4426
## Runs 31.2803
## RBI 19.0549
## Walks 37.7735
## Years 26.3194
## CAtBat 33.1633
## CHits 35.1968
## CHmRun 17.8551
## CRuns 35.3957
## CRBI 28.8660
## CWalks 33.8181
## LeagueN 8.6066
## DivisionW -66.0226
## PutOuts 75.5157
## Assists -4.8535
## Errors -10.7815
## NewLeagueN 12.4752
##
## , , 9 comps
##
## Salary
## AtBat 30.923
## Hits 32.870
## HmRun 4.219
## Runs 26.206
## RBI 22.941
## Walks 37.138
## Years 26.004
## CAtBat 31.544
## CHits 32.384
## CHmRun 22.832
## CRuns 32.624
## CRBI 30.388
## CWalks 33.742
## LeagueN 8.555
## DivisionW -65.920
## PutOuts 78.304
## Assists 16.091
## Errors -28.836
## NewLeagueN 13.615
##
## , , 10 comps
##
## Salary
## AtBat 45.462
## Hits 45.897
## HmRun -30.836
## Runs 29.425
## RBI 5.746
## Walks 25.064
## Years -25.164
## CAtBat 21.446
## CHits 24.674
## CHmRun 87.780
## CRuns 33.634
## CRBI 58.444
## CWalks 25.593
## LeagueN 10.145
## DivisionW -67.068
## PutOuts 77.713
## Assists 10.070
## Errors -26.041
## NewLeagueN 11.293
##
## , , 11 comps
##
## Salary
## AtBat 42.525
## Hits 45.461
## HmRun -24.736
## Runs 38.844
## RBI -1.598
## Walks 19.742
## Years -30.943
## CAtBat 22.359
## CHits 25.575
## CHmRun 85.373
## CRuns 38.211
## CRBI 55.511
## CWalks 31.342
## LeagueN 27.666
## DivisionW -66.659
## PutOuts 78.785
## Assists 12.698
## Errors -28.331
## NewLeagueN -5.118
##
## , , 12 comps
##
## Salary
## AtBat 44.695
## Hits 48.006
## HmRun -31.358
## Runs 32.078
## RBI 5.716
## Walks 23.639
## Years -23.484
## CAtBat 21.046
## CHits 23.819
## CHmRun 86.932
## CRuns 33.392
## CRBI 58.049
## CWalks 26.806
## LeagueN 36.219
## DivisionW -66.672
## PutOuts 77.911
## Assists 10.634
## Errors -28.138
## NewLeagueN -14.150
##
## , , 13 comps
##
## Salary
## AtBat 41.804
## Hits 48.267
## HmRun -39.242
## Runs 4.424
## RBI 37.644
## Walks 26.615
## Years -67.372
## CAtBat 31.825
## CHits 43.173
## CHmRun 62.986
## CRuns 48.067
## CRBI 60.434
## CWalks 38.898
## LeagueN 36.880
## DivisionW -66.620
## PutOuts 75.142
## Assists 8.196
## Errors -30.678
## NewLeagueN -18.672
##
## , , 14 comps
##
## Salary
## AtBat -43.09
## Hits -10.12
## HmRun -38.87
## Runs 78.32
## RBI 39.72
## Walks 92.57
## Years -91.23
## CAtBat 78.44
## CHits 123.65
## CHmRun 51.94
## CRuns 83.86
## CRBI 118.31
## CWalks -165.39
## LeagueN 41.45
## DivisionW -68.33
## PutOuts 78.46
## Assists 31.95
## Errors -37.21
## NewLeagueN -26.82
##
## , , 15 comps
##
## Salary
## AtBat -82.658
## Hits -8.376
## HmRun -65.282
## Runs 113.125
## RBI 74.012
## Walks 79.455
## Years -86.312
## CAtBat 66.717
## CHits 113.498
## CHmRun 52.643
## CRuns 80.151
## CRBI 123.467
## CWalks -147.199
## LeagueN 38.267
## DivisionW -64.632
## PutOuts 80.503
## Assists 35.048
## Errors -36.151
## NewLeagueN -22.178
##
## , , 16 comps
##
## Salary
## AtBat -298.80
## Hits 296.64
## HmRun 19.60
## Runs 19.73
## RBI -26.52
## Walks 122.88
## Years -96.53
## CAtBat 42.65
## CHits 160.27
## CHmRun 41.29
## CRuns 11.37
## CRBI 184.00
## CWalks -145.30
## LeagueN 28.22
## DivisionW -66.13
## PutOuts 75.89
## Assists 42.46
## Errors -26.03
## NewLeagueN -16.87
##
## , , 17 comps
##
## Salary
## AtBat -347.25
## Hits 354.83
## HmRun 3.01
## Runs -29.28
## RBI 10.58
## Walks 137.57
## Years -63.30
## CAtBat -20.73
## CHits 149.15
## CHmRun 142.73
## CRuns 227.83
## CRBI -27.97
## CWalks -204.32
## LeagueN 30.84
## DivisionW -60.59
## PutOuts 81.31
## Assists 47.01
## Errors -22.95
## NewLeagueN -14.96
##
## , , 18 comps
##
## Salary
## AtBat -287.1639
## Hits 330.3183
## HmRun 35.8569
## Runs -55.7545
## RBI -25.4324
## Walks 133.8275
## Years -15.0312
## CAtBat -425.9233
## CHits 151.1037
## CHmRun -0.3535
## CRuns 452.9583
## CRBI 239.5045
## CWalks -206.9835
## LeagueN 31.7984
## DivisionW -58.5995
## PutOuts 78.7188
## Assists 54.5750
## Errors -22.7108
## NewLeagueN -13.0026
##
## , , 19 comps
##
## Salary
## AtBat -291.65
## Hits 338.47
## HmRun 37.93
## Runs -60.69
## RBI -27.05
## Walks 135.33
## Years -16.73
## CAtBat -391.78
## CHits 86.85
## CHmRun -14.21
## CRuns 481.66
## CRBI 261.19
## CWalks -214.30
## LeagueN 31.31
## DivisionW -58.53
## PutOuts 78.91
## Assists 53.83
## Errors -22.20
## NewLeagueN -12.37
Mínimos Cuadrados Parciales
pls.fit=plsr(Salary~., data=Hitters,subset=train,scale=TRUE, validation="CV")
summary(pls.fit)
## Data: X dimension: 134 19
## Y dimension: 134 1
## Fit method: kernelpls
## Number of components considered: 19
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 408.2 310.8 304.7 303.6 301.8 301.8 306
## adjCV 408.2 310.2 303.9 302.3 300.3 300.4 304
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## CV 310.5 311.2 306.5 304.7 302.9 306.9 308.6
## adjCV 307.5 308.0 303.7 302.2 300.4 304.0 305.5
## 14 comps 15 comps 16 comps 17 comps 18 comps 19 comps
## CV 310.8 309.9 312.2 313.3 314.1 311.5
## adjCV 307.7 306.7 308.9 309.9 310.5 308.2
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps
## X 36.10 57.12 67.46 73.46 80.01 85.49 88.51
## Salary 46.12 50.07 53.47 55.63 56.72 58.01 59.55
## 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps 14 comps
## X 90.89 93.07 95.40 97.29 97.71 98.16 98.87
## Salary 60.41 60.95 61.18 61.35 61.53 61.62 61.67
## 15 comps 16 comps 17 comps 18 comps 19 comps
## X 99.06 99.48 99.75 99.98 100.00
## Salary 61.83 61.91 62.07 62.20 62.24
validationplot(pls.fit,val.type="MSEP")
pls.fit$coeff
## , , 1 comps
##
## Salary
## AtBat 30.889
## Hits 34.526
## HmRun 25.582
## Runs 32.319
## RBI 33.600
## Walks 26.066
## Years 20.993
## CAtBat 28.869
## CHits 30.941
## CHmRun 26.349
## CRuns 30.905
## CRBI 29.480
## CWalks 24.524
## LeagueN 4.988
## DivisionW -11.386
## PutOuts 18.288
## Assists 1.040
## Errors 2.086
## NewLeagueN 3.916
##
## , , 2 comps
##
## Salary
## AtBat 42.430
## Hits 56.147
## HmRun 24.169
## Runs 44.821
## RBI 42.174
## Walks 25.737
## Years 12.178
## CAtBat 23.754
## CHits 30.299
## CHmRun 14.101
## CRuns 26.179
## CRBI 22.825
## CWalks 8.018
## LeagueN 20.586
## DivisionW -24.558
## PutOuts 39.861
## Assists -5.033
## Errors 1.241
## NewLeagueN 14.468
##
## , , 3 comps
##
## Salary
## AtBat 36.666
## Hits 75.787
## HmRun 4.921
## Runs 43.778
## RBI 36.263
## Walks 3.979
## Years 19.010
## CAtBat 34.893
## CHits 52.000
## CHmRun 2.594
## CRuns 36.533
## CRBI 29.114
## CWalks -11.594
## LeagueN 49.090
## DivisionW -42.306
## PutOuts 64.908
## Assists -34.303
## Errors -17.618
## NewLeagueN 27.889
##
## , , 4 comps
##
## Salary
## AtBat 35.193
## Hits 113.434
## HmRun -16.853
## Runs 53.328
## RBI 32.389
## Walks -18.487
## Years 18.617
## CAtBat 44.500
## CHits 78.593
## CHmRun -13.577
## CRuns 48.780
## CRBI 33.796
## CWalks -46.314
## LeagueN 42.265
## DivisionW -44.956
## PutOuts 70.014
## Assists -52.029
## Errors -28.759
## NewLeagueN -5.666
##
## , , 5 comps
##
## Salary
## AtBat 20.766
## Hits 141.265
## HmRun -30.311
## Runs 56.104
## RBI 29.661
## Walks -16.374
## Years 8.979
## CAtBat 50.504
## CHits 103.303
## CHmRun -18.758
## CRuns 59.569
## CRBI 42.254
## CWalks -77.604
## LeagueN 65.835
## DivisionW -21.849
## PutOuts 58.632
## Assists -53.192
## Errors -14.858
## NewLeagueN -15.906
##
## , , 6 comps
##
## Salary
## AtBat -24.115
## Hits 170.465
## HmRun -21.820
## Runs 54.280
## RBI 37.574
## Walks 8.762
## Years -18.611
## CAtBat 47.718
## CHits 132.008
## CHmRun -10.416
## CRuns 69.730
## CRBI 56.496
## CWalks -126.308
## LeagueN 93.344
## DivisionW -4.856
## PutOuts 49.072
## Assists -79.511
## Errors -15.046
## NewLeagueN -43.147
##
## , , 7 comps
##
## Salary
## AtBat -99.191
## Hits 206.142
## HmRun -24.037
## Runs 38.743
## RBI 38.118
## Walks 51.441
## Years -50.882
## CAtBat 46.603
## CHits 177.140
## CHmRun 1.540
## CRuns 86.082
## CRBI 77.466
## CWalks -189.546
## LeagueN 122.080
## DivisionW -36.264
## PutOuts 44.797
## Assists -72.430
## Errors 5.026
## NewLeagueN -81.141
##
## , , 8 comps
##
## Salary
## AtBat -149.64
## Hits 239.11
## HmRun -23.47
## Runs 14.40
## RBI 28.72
## Walks 87.30
## Years -67.53
## CAtBat 44.43
## CHits 210.96
## CHmRun 13.60
## CRuns 94.29
## CRBI 88.07
## CWalks -234.57
## LeagueN 115.85
## DivisionW -25.16
## PutOuts 75.14
## Assists -38.33
## Errors -15.98
## NewLeagueN -85.00
##
## , , 9 comps
##
## Salary
## AtBat -181.676
## Hits 274.183
## HmRun -3.513
## Runs -5.563
## RBI 26.857
## Walks 97.684
## Years -72.527
## CAtBat 41.060
## CHits 237.528
## CHmRun 22.077
## CRuns 98.246
## CRBI 89.554
## CWalks -274.669
## LeagueN 104.294
## DivisionW -29.639
## PutOuts 62.282
## Assists -11.655
## Errors -40.100
## NewLeagueN -64.953
##
## , , 10 comps
##
## Salary
## AtBat -199.63
## Hits 297.11
## HmRun 16.13
## Runs -23.50
## RBI 19.38
## Walks 98.17
## Years -64.36
## CAtBat 36.81
## CHits 252.54
## CHmRun 29.86
## CRuns 98.28
## CRBI 83.08
## CWalks -293.70
## LeagueN 87.20
## DivisionW -28.13
## PutOuts 67.58
## Assists -13.70
## Errors -25.80
## NewLeagueN -51.19
##
## , , 11 comps
##
## Salary
## AtBat -210.592
## Hits 322.540
## HmRun 21.138
## Runs -34.056
## RBI 1.126
## Walks 115.823
## Years -54.999
## CAtBat 29.855
## CHits 263.917
## CHmRun 34.488
## CRuns 99.025
## CRBI 68.974
## CWalks -299.461
## LeagueN 73.769
## DivisionW -29.564
## PutOuts 63.995
## Assists -22.812
## Errors -19.979
## NewLeagueN -39.291
##
## , , 12 comps
##
## Salary
## AtBat -220.401
## Hits 371.429
## HmRun 36.624
## Runs -60.637
## RBI -23.680
## Walks 113.596
## Years -45.708
## CAtBat 5.409
## CHits 276.334
## CHmRun 64.597
## CRuns 100.567
## CRBI 31.368
## CWalks -284.077
## LeagueN 86.644
## DivisionW -30.836
## PutOuts 63.748
## Assists -19.903
## Errors -21.251
## NewLeagueN -50.099
##
## , , 13 comps
##
## Salary
## AtBat -242.485
## Hits 388.371
## HmRun 22.980
## Runs -58.200
## RBI -8.088
## Walks 103.082
## Years -58.111
## CAtBat -15.883
## CHits 306.100
## CHmRun 86.480
## CRuns 105.043
## CRBI -7.824
## CWalks -261.590
## LeagueN 78.786
## DivisionW -28.993
## PutOuts 63.952
## Assists -21.224
## Errors -18.566
## NewLeagueN -41.558
##
## , , 14 comps
##
## Salary
## AtBat -256.572
## Hits 389.104
## HmRun 20.571
## Runs -54.636
## RBI 6.932
## Walks 102.057
## Years -48.680
## CAtBat -29.329
## CHits 334.688
## CHmRun 89.903
## CRuns 103.560
## CRBI -43.979
## CWalks -252.328
## LeagueN 80.137
## DivisionW -26.276
## PutOuts 64.937
## Assists -18.701
## Errors -19.296
## NewLeagueN -42.645
##
## , , 15 comps
##
## Salary
## AtBat -235.91
## Hits 350.94
## HmRun -1.55
## Runs -50.23
## RBI 32.98
## Walks 105.04
## Years -34.69
## CAtBat -86.75
## CHits 474.03
## CHmRun 166.69
## CRuns 77.30
## CRBI -166.09
## CWalks -268.27
## LeagueN 82.70
## DivisionW -26.34
## PutOuts 65.79
## Assists -17.72
## Errors -20.48
## NewLeagueN -44.22
##
## , , 16 comps
##
## Salary
## AtBat -225.681
## Hits 329.686
## HmRun 7.986
## Runs -39.208
## RBI 23.408
## Walks 101.728
## Years -50.205
## CAtBat -110.816
## CHits 569.664
## CHmRun 186.440
## CRuns 47.206
## CRBI -220.016
## CWalks -258.024
## LeagueN 84.373
## DivisionW -27.465
## PutOuts 67.973
## Assists -15.753
## Errors -21.148
## NewLeagueN -45.831
##
## , , 17 comps
##
## Salary
## AtBat -201.889
## Hits 311.275
## HmRun -2.059
## Runs -48.959
## RBI 40.706
## Walks 100.993
## Years -57.970
## CAtBat -187.059
## CHits 783.281
## CHmRun 223.803
## CRuns -94.259
## CRBI -282.539
## CWalks -216.615
## LeagueN 87.350
## DivisionW -28.258
## PutOuts 64.304
## Assists -18.840
## Errors -22.648
## NewLeagueN -53.068
##
## , , 18 comps
##
## Salary
## AtBat -204.427
## Hits 296.228
## HmRun -9.730
## Runs -8.319
## RBI 34.481
## Walks 89.287
## Years -41.866
## CAtBat -267.919
## CHits 944.691
## CHmRun 273.293
## CRuns -219.627
## CRBI -317.800
## CWalks -190.703
## LeagueN 91.619
## DivisionW -28.073
## PutOuts 64.105
## Assists -14.569
## Errors -24.810
## NewLeagueN -55.913
##
## , , 19 comps
##
## Salary
## AtBat -188.361
## Hits 281.008
## HmRun -5.012
## Runs -8.912
## RBI 31.295
## Walks 86.037
## Years -33.608
## CAtBat -374.346
## CHits 1002.834
## CHmRun 256.077
## CRuns -197.615
## CRBI -293.615
## CWalks -179.663
## LeagueN 90.309
## DivisionW -27.712
## PutOuts 65.116
## Assists -13.355
## Errors -24.026
## NewLeagueN -54.165