Capitulo 5: Regresión polinomial y factores

Regresión polinomial

x <- seq(-1, 1,length = 51)
y1 <- -x^2 + 1
y2 <- x^2
par(mfrow = c(1, 2))
plot(x, y1, type = "l", ylab = "E[Y/X]", xlab = "(a)")
abline(v = 0.3, lty = 2) 
abline(v = 0.7, lty = 2) 
plot(x, y2, type = "l", ylab = "E[Y/X]", xlab = "(b)")
abline(v = 0.3, lty = 2) 
abline(v = 0.7, lty = 2)

Polinomios con varios predictores

Pastel.data <- read.table(file = "http://tarwi.lamolina.edu.pe/~clopez/Regresion/Pastel.txt", header = T)
attach(Pastel.data)
Pastel.data
##    bloque       X1       X2    Y
## 1       0 33.00000 340.0000 3.89
## 2       0 37.00000 340.0000 6.36
## 3       0 33.00000 360.0000 7.65
## 4       0 37.00000 360.0000 6.79
## 5       0 35.00000 350.0000 8.36
## 6       0 35.00000 350.0000 7.63
## 7       0 35.00000 350.0000 8.12
## 8       1 37.82843 350.0000 8.40
## 9       1 32.17157 350.0000 5.38
## 10      1 35.00000 364.1421 7.00
## 11      1 35.00000 335.8579 4.51
## 12      1 35.00000 350.0000 7.81
## 13      1 35.00000 350.0000 8.44
## 14      1 35.00000 350.0000 8.06
Pastel.m1 <- lm(Y ~ X1 + X2 + I(X1^2) + I(X2^2) + X1:X2)
par(mfrow = c(1, 2))
plot(X1, Y, type = "n", xlab = "(a) X1")
X1nuevo <- seq(32, 38, length = 50) 
lines(X1nuevo, predict(Pastel.m1, newdata = data.frame(X1 = X1nuevo, X2 = rep(340, 50))))
lines(X1nuevo, predict(Pastel.m1, newdata = data.frame(X1 = X1nuevo, X2 = rep(350, 50))))
lines(X1nuevo, predict(Pastel.m1, newdata = data.frame(X1 = X1nuevo, X2 = rep(360, 50))))
text(34, 4.7, "X2=340", adj = 0, cex = 0.7)
text(32, 5.7, "X2=350", adj = 0, cex = 0.7) 
text(32, 7.6,"X2=360",adj=0,cex=0.7)
plot(X2, Y, type = "n", xlab = "(b) X2")
X2nuevo <- seq(330, 370, length = 50)
lines(X2nuevo, predict(Pastel.m1, newdata = data.frame(X1 = rep(33, 50), X2 = X2nuevo)))
lines(X2nuevo, predict(Pastel.m1, newdata = data.frame(X1 = rep(35, 50), X2 = X2nuevo)))
lines(X2nuevo, predict(Pastel.m1, newdata = data.frame(X1 = rep(37, 50), X2 = X2nuevo)))
text(342, 4, "X1=33", adj = 0, cex = 0.7) 
text(335, 4.55, "X1=35", adj = 0, cex = 0.7) 
text(336, 7.3, "X1=37", adj = 0, cex = 0.7)

Pastel.m2 <- update(Pastel.m1, ~ . -X1:X2) 
par(mfrow = c(1, 2))
plot(X1, Y, type = "n", xlab = "(a) X1")
X1nuevo <- seq(32, 38, length = 50)
lines(X1nuevo, predict(Pastel.m2, newdata = data.frame(X1 = X1nuevo, X2 = rep(340, 50)))) 
lines(X1nuevo, predict(Pastel.m2, newdata = data.frame(X1 = X1nuevo, X2 = rep(350, 50)))) 
lines(X1nuevo, predict(Pastel.m2, newdata = data.frame(X1 = X1nuevo, X2 = rep(360, 50)))) 
text(33, 4.3, "X2=340", adj = 0, cex = 0.7) 
text(32, 7.2, "X2=350", adj = 0, cex = 0.7) 
text(34, 7.1, "X2=360", adj = 0, cex = 0.7)
plot(X2, Y, type = "n", xlab = "(b) X2") 
X2nuevo <- seq(330, 370, length = 50) 
lines(X2nuevo, predict(Pastel.m2, newdata = data.frame(X1 = rep(33, 50), X2 = X2nuevo))) 
lines(X2nuevo, predict(Pastel.m2, newdata = data.frame(X1 = rep(35, 50), X2 = X2nuevo))) 
lines(X2nuevo, predict(Pastel.m2, newdata = data.frame(X1 = rep(37, 50), X2 = X2nuevo))) 
text(340, 4.3, "X1=33", adj = 0, cex = 0.7) 
text(336, 7.0, "X1=35", adj = 0, cex = 0.7) 
text(346, 7.3, "X1=37", adj = 0, cex = 0.7)

Factores

Mamiferos.data <- read.table(file = "http://tarwi.lamolina.edu.pe/~clopez/Regresion/Mamiferos.txt", header = T)
attach(Mamiferos.data)
plot(D, TS, xlab = "Indice de peligro", ylab = "Total horas de sueño"

D <- as.factor(D)

Mamiferos.m1 <- lm(TS ~ D)
coef(Mamiferos.m1)
## (Intercept)          D2          D3          D4          D5 
##   13.083333   -1.333333   -2.773333   -4.272222   -9.011905
Mamiferos.m2 <- lm(TS ~ -1 + D)
coef(Mamiferos.m2)
##        D1        D2        D3        D4        D5 
## 13.083333 11.750000 10.310000  8.811111  4.071429
x <- logb(PesoCuerpo,2)
Mamiferos.n1 <- lm(TS ~ D + D*x)
coef(Mamiferos.n1)
## (Intercept)          D2          D3          D4          D5           x 
## 13.86818223 -2.35924292 -3.50086642 -4.22738075 -7.03108292 -0.40274346 
##        D2:x        D3:x        D4:x        D5:x 
## -0.02388549 -0.23869576  0.11760239 -0.06680545
plot(logb(PesoCuerpo, 2), TS, type = "n")
text(logb(PesoCuerpo, 2), TS, labels  =as.character(D), cex = 0.75)
abline(coef(Mamiferos.n1)[1], coef(Mamiferos.n1)[6], col = "blue") 
abline(coef(Mamiferos.n1)[1] + coef(Mamiferos.n1)[2], coef(Mamiferos.n1)[7], col = "red") 
abline(coef(Mamiferos.n1)[1] + coef(Mamiferos.n1)[3], coef(Mamiferos.n1)[8], col = "green") 
abline(coef(Mamiferos.n1)[1] + coef(Mamiferos.n1)[4], coef(Mamiferos.n1)[9], col = "orange") 
abline(coef(Mamiferos.n1)[1] + coef(Mamiferos.n1)[5], coef(Mamiferos.n1)[10], col = "pink")

Mamiferos.n2 <- lm(TS ~ D + x)
coef(Mamiferos.n2)
## (Intercept)          D2          D3          D4          D5           x 
##   13.932502   -2.428716   -3.583566   -3.853468   -7.294486   -0.435749
plot(logb(PesoCuerpo, 2), TS, type = "n")
text(logb(PesoCuerpo, 2), TS, labels = as.character(D), cex = 0.75)
abline(coef(Mamiferos.n2)[1], coef(Mamiferos.n2)[6], col = "blue")
abline(coef(Mamiferos.n2)[1] + coef(Mamiferos.n2)[2], coef(Mamiferos.n2)[6], col = "red")
abline(coef(Mamiferos.n2)[1] + coef(Mamiferos.n2)[3], coef(Mamiferos.n2)[6], col = "green")
abline(coef(Mamiferos.n2)[1] + coef(Mamiferos.n2)[4], coef(Mamiferos.n2)[6], col = "orange")
abline(coef(Mamiferos.n2)[1] + coef(Mamiferos.n2)[5], coef(Mamiferos.n2)[6], col = "pink")

Mamiferos.n3 <- lm(TS ~ x + x:D)
coef(Mamiferos.n3)
## (Intercept)           x        x:D2        x:D3        x:D4        x:D5 
##  11.6258529  -0.2004724  -0.2105761  -0.4458769  -0.2441391  -0.9491168
plot(logb(PesoCuerpo,2), TS, type = "n")
text(logb(PesoCuerpo, 2), TS, labels = as.character(D), cex = 0.75)
abline(coef(Mamiferos.n3)[1], coef(Mamiferos.n3)[2], col = "blue")
abline(coef(Mamiferos.n3)[1], coef(Mamiferos.n3)[2] + coef(Mamiferos.n3)[3], col = "red")
abline(coef(Mamiferos.n3)[1], coef(Mamiferos.n3)[2] + coef(Mamiferos.n3)[4], col = "green")
abline(coef(Mamiferos.n3)[1], coef(Mamiferos.n3)[2] + coef(Mamiferos.n3)[5], col = "orange")
abline(coef(Mamiferos.n3)[1], coef(Mamiferos.n3)[2] + coef(Mamiferos.n3)[6], col = "pink")

Mamiferos.n4 <- lm(TS ~ x)
coef(Mamiferos.n4)
## (Intercept)           x 
##  11.4377412  -0.5497446
plot(logb(PesoCuerpo, 2), TS, type = "n")
text(logb(PesoCuerpo, 2), TS, labels = as.character(D), cex = 0.75)
abline(Mamiferos.n4, col = "blue")

AIC(Mamiferos.n1)
## [1] 318.6739
AIC(Mamiferos.n2)
## [1] 312.2683
AIC(Mamiferos.n3)
## [1] 323.8351
AIC(Mamiferos.n4)
## [1] 327.4118