Regresión logÃstica
library(ISLR)
## Warning: package 'ISLR' was built under R version 3.1.1
attach(Default)
head(Default)
## default student balance income
## 1 No No 729.5 44362
## 2 No Yes 817.2 12106
## 3 No No 1073.5 31767
## 4 No No 529.3 35704
## 5 No No 785.7 38463
## 6 No Yes 919.6 7492
plot(balance,income,type="n")
points(balance[default=="No"],income[default=="No"],col="blue")
points(balance[default=="Yes"],income[default=="Yes"],col="red")

par(mfrow=c(1,2))
boxplot(balance ~ default, xlab="Default", ylab="Balance", col=c("blue","red"))
boxplot(income ~ default, xlab="Default", ylab="Income", col=c("blue","red"))

m1 <- glm(default ~ balance, family=binomial(link=logit))
summary(m1)
##
## Call:
## glm(formula = default ~ balance, family = binomial(link = logit))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.270 -0.146 -0.059 -0.022 3.759
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.65133 0.36116 -29.5 <2e-16 ***
## balance 0.00550 0.00022 24.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1596.5 on 9998 degrees of freedom
## AIC: 1600
##
## Number of Fisher Scoring iterations: 8
predict(m1, data.frame(balance=1000), type="response")
## 1
## 0.005752
predict(m1, data.frame(balance=2000), type="response")
## 1
## 0.5858
m2 <- glm(default ~ student, family=binomial(link=logit))
summary(m2)
##
## Call:
## glm(formula = default ~ student, family = binomial(link = logit))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.297 -0.297 -0.243 -0.243 2.659
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5041 0.0707 -49.55 < 2e-16 ***
## studentYes 0.4049 0.1150 3.52 0.00043 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 2908.7 on 9998 degrees of freedom
## AIC: 2913
##
## Number of Fisher Scoring iterations: 6
predict(m2, data.frame(student="Yes"), type="response")
## 1
## 0.04314
predict(m2, data.frame(student="No"), type="response")
## 1
## 0.0292
m3 <- glm(default ~ balance + income + student, family=binomial(link=logit))
summary(m3)
##
## Call:
## glm(formula = default ~ balance + income + student, family = binomial(link = logit))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.469 -0.142 -0.056 -0.020 3.738
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.09e+01 4.92e-01 -22.08 <2e-16 ***
## balance 5.74e-03 2.32e-04 24.74 <2e-16 ***
## income 3.03e-06 8.20e-06 0.37 0.7115
## studentYes -6.47e-01 2.36e-01 -2.74 0.0062 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1571.5 on 9996 degrees of freedom
## AIC: 1580
##
## Number of Fisher Scoring iterations: 8
m4 <- glm(default ~ balance + student, family=binomial(link=logit))
m4.probs <- predict(m4, type="response")
m4.pred <- ifelse(m4.probs>=0.5, "Yes", "No")
table(m4.pred, default)
## default
## m4.pred No Yes
## No 9628 228
## Yes 39 105
Smarket data
data(Smarket)
attach(Smarket)
head(Smarket)
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today Direction
## 1 2001 0.381 -0.192 -2.624 -1.055 5.010 1.191 0.959 Up
## 2 2001 0.959 0.381 -0.192 -2.624 -1.055 1.296 1.032 Up
## 3 2001 1.032 0.959 0.381 -0.192 -2.624 1.411 -0.623 Down
## 4 2001 -0.623 1.032 0.959 0.381 -0.192 1.276 0.614 Up
## 5 2001 0.614 -0.623 1.032 0.959 0.381 1.206 0.213 Up
## 6 2001 0.213 0.614 -0.623 1.032 0.959 1.349 1.392 Up
log.m1 <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume ,family=binomial(link=logit))
log.m1.probs <- predict(log.m1, type="response")
log.m1.pred <- ifelse(log.m1.probs>0.5, "Up", "Down")
table(log.m1.pred,Direction)
## Direction
## log.m1.pred Down Up
## Down 145 141
## Up 457 507
mean(log.m1.pred==Direction)
## [1] 0.5216
Data de entrenamiento y de prueba
entrenamiento <- Year < 2005
log.m2 <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, family=binomial, subset=entrenamiento)
log.m2.probs <- predict(log.m2, newdata=Smarket[!entrenamiento,], type="response")
log.m2.pred <- ifelse(log.m2.probs>0.5, "Up", "Down")
Direction.2005 <- Smarket$Direction[!entrenamiento]
table(log.m2.pred, Direction.2005)
## Direction.2005
## log.m2.pred Down Up
## Down 77 97
## Up 34 44
mean(log.m2.pred==Direction.2005)
## [1] 0.4802
Analisis Discriminante Lineal
library(MASS)
lda.m1 <- lda(Direction ~ Lag1 + Lag2, subset=Year<2005)
lda.m1
## Call:
## lda(Direction ~ Lag1 + Lag2, subset = Year < 2005)
##
## Prior probabilities of groups:
## Down Up
## 0.492 0.508
##
## Group means:
## Lag1 Lag2
## Down 0.04279 0.03389
## Up -0.03955 -0.03133
##
## Coefficients of linear discriminants:
## LD1
## Lag1 -0.6420
## Lag2 -0.5135
plot(lda.m1)

Smarket.2005 <- subset(Smarket, Year==2005)
lda.m1.pred <- predict(lda.m1, Smarket.2005)
table(lda.m1.pred$class, Smarket.2005$Direction)
##
## Down Up
## Down 35 35
## Up 76 106
mean(lda.m1.pred$class==Smarket.2005$Direction)
## [1] 0.5595
Analisis Discriminante Cuadratico
library(MASS)
qda.m1 <- qda(Direction ~ Lag1 + Lag2, subset=Year<2005)
qda.m1
## Call:
## qda(Direction ~ Lag1 + Lag2, subset = Year < 2005)
##
## Prior probabilities of groups:
## Down Up
## 0.492 0.508
##
## Group means:
## Lag1 Lag2
## Down 0.04279 0.03389
## Up -0.03955 -0.03133
qda.m1.pred <- predict(qda.m1, Smarket.2005)
table(qda.m1.pred$class, Smarket.2005$Direction)
##
## Down Up
## Down 30 20
## Up 81 121
mean(qda.m1.pred$class==Smarket.2005$Direction)
## [1] 0.5992