Capítulo 6: Transformaciones
Diagramas de dispersión
Mamiferos.data <- read.table(file = "http://tarwi.lamolina.edu.pe/~clopez/Regresion/Mamiferos.txt", header = T)
head(Mamiferos.data)
## SWS PS TS PesoCuerpo PesoCerebro Life GP P SE D
## 1 NA NA 3.3 6654.000 5712.0 38.6 645 3 5 3
## 2 6.3 2.0 8.3 1.000 6.6 4.5 42 3 1 3
## 3 NA NA 12.5 3.385 44.5 14.0 60 1 1 1
## 4 NA NA 16.5 0.920 5.7 NA 25 5 2 3
## 5 2.1 1.8 3.9 2547.000 4603.0 69.0 624 3 5 4
## 6 9.1 0.7 9.8 10.550 179.5 27.0 180 4 4 4
## Especie
## 1 African_elephant
## 2 African_giant_pouched_rat
## 3 Arctic_Fox
## 4 Arctic_ground_squirrel
## 5 Asian_elephant
## 6 Baboon
attach(Mamiferos.data)
plot(PesoCuerpo, PesoCerebro, xlab = "Peso del cuerpo (kg)", ylab = "Peso del cerebro (g)")
text(4400, 5750, "African_elephant", adj = 0)
text(1600, 4300, "Asian_elephant", adj = 0)
text(300, 1400, "Human", adj = 0)
Transformaciones potencia
par(mfrow = c(2, 2), mai = c(0.6, 0.6, 0.1, 0.1), mgp = c(2, 1, 0), cex.lab = 1.0, cex = 0.6)
PesoCuerpo1 <- 1/PesoCuerpo
PesoCerebro1 <- 1/PesoCerebro
Mamiferos.m1 <- lm(PesoCerebro1 ~ PesoCuerpo1)
plot(PesoCuerpo1, PesoCerebro1, xlab = expression(paste("(a) ",PesoCuerpo^-1)), ylab = expression(PesoCerebro^-1))
abline(Mamiferos.m1)
lines(lowess(PesoCerebro1 ~ PesoCuerpo1, f = 2/3, iter = 1), lty = 2, col = "red")
PesoCuerpo2 <- log(PesoCuerpo)
PesoCerebro2 <- log(PesoCerebro)
Mamiferos.m2 <- lm(PesoCerebro2 ~ PesoCuerpo2)
plot(PesoCuerpo2, PesoCerebro2, xlab = expression(paste("(b) ",log[e](PesoCuerpo))), ylab = expression(log[e](PesoCerebro)))
abline(Mamiferos.m2)
lines(lowess(PesoCerebro2 ~ PesoCuerpo2, f = 2/3, iter = 1), lty = 2, col ="red")
PesoCuerpo3 <- (PesoCuerpo)^(1/3)
PesoCerebro3 <- (PesoCerebro)^(1/3)
Mamiferos.m3 <- lm(PesoCerebro3 ~ PesoCuerpo3)
plot(PesoCuerpo3, PesoCerebro3, xlab = expression(paste("(c) ",PesoCuerpo^0.33)), ylab = expression(PesoCerebro^0.33))
abline(Mamiferos.m3)
lines(lowess(PesoCerebro3 ~ PesoCuerpo3, f = 2/3, iter = 1), lty = 2, col = "red")
PesoCuerpo4 <- (PesoCuerpo)^(1/2)
PesoCerebro4 <- (PesoCerebro)^(1/2)
Mamiferos.m4 <- lm(PesoCerebro4 ~ PesoCuerpo4)
plot(PesoCuerpo4, PesoCerebro4, xlab = expression(paste("(d) ",PesoCuerpo^.5)), ylab = expression(PesoCerebro^.5))
abline(Mamiferos.m4)
lines(lowess(PesoCerebro4 ~ PesoCuerpo4, f = 2/3, iter = 1), lty = 2, col = "red")
Transformando solo la variable predictora
Arboles.data <- read.table(file = "http://tarwi.lamolina.edu.pe/~clopez/Regresion/Arboles.txt", header = T)
head(Arboles.data)
## Plot Tree Species Dbh Altura
## 1 3 5 WC 360 207
## 2 3 8 WC 380 225
## 3 4 1 WC 460 180
## 4 6 2 WC 290 220
## 5 6 3 WC 294 320
## 6 6 4 WC 685 260
attach(Arboles.data)
library(alr3)
## Warning: package 'alr3' was built under R version 3.3.3
## Loading required package: car
nuevo <- seq(min(Dbh), max(Dbh), length = 100)
plot(Dbh, Altura, xlab = "X = Dhn", ylab = "Y = Altura", cex = 0.7)
Arboles.m1 <- lm(Altura ~ bcPower(Dbh, lambda = 1))
lines(nuevo, predict(Arboles.m1, data.frame(Dbh = nuevo)), col = "blue")
Arboles.m2 <- lm(Altura ~ bcPower(Dbh, lambda = 0))
lines(nuevo, predict(Arboles.m2, data.frame(Dbh = nuevo)), col ="red")
Arboles.m3 <- lm(Altura ~ bcPower(Dbh, lambda = -1))
lines(nuevo, predict(Arboles.m3, data.frame(Dbh = nuevo)), col ="purple")
legend(940, 200, legend = c(1, 0, -1), cex = 0.75, lty = 1, col = c("blue", "red", "purple"), xjust = 1, yjust = 1)
summary(Arboles.m1)
##
## Call:
## lm(formula = Altura ~ bcPower(Dbh, lambda = 1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -77.693 -29.467 0.713 28.959 115.237
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 111.34002 7.44300 14.96 <2e-16 ***
## bcPower(Dbh, lambda = 1) 0.31885 0.01736 18.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.61 on 137 degrees of freedom
## Multiple R-squared: 0.7113, Adjusted R-squared: 0.7091
## F-statistic: 337.5 on 1 and 137 DF, p-value: < 2.2e-16
summary(Arboles.m2)
##
## Call:
## lm(formula = Altura ~ bcPower(Dbh, lambda = 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -89.485 -20.046 3.652 22.586 104.017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -463.314 32.438 -14.28 <2e-16 ***
## bcPower(Dbh, lambda = 0) 119.519 5.532 21.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.33 on 137 degrees of freedom
## Multiple R-squared: 0.7731, Adjusted R-squared: 0.7715
## F-statistic: 466.8 on 1 and 137 DF, p-value: < 2.2e-16
plot(logb(Dbh,2), Altura, ylab = "Y = Altura", xlab = expression(log[2](Dbh)))
abline(lm(Altura ~ logb(Dbh, 2)), col = "red")
Transformación de Box y Cox
data(cars)
attach(cars)
plot(speed, dist)
cars.m1 <- lm(dist ~ speed)
library(nortest)
ad.test(resid(cars.m1))
##
## Anderson-Darling normality test
##
## data: resid(cars.m1)
## A = 0.79406, p-value = 0.0369
#Librería alr3
boxCox(cars.m1, lambda = seq(0, 1, by = 0.1))
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:alr3':
##
## forbes
boxCox(dist ~ speed, lambda = seq(0, 1, by = 0.1), data = cars)
## Warning in plot.window(...): "data" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not
## a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not
## a graphical parameter
## Warning in box(...): "data" is not a graphical parameter
## Warning in title(...): "data" is not a graphical parameter
dist1 <- sqrt(dist)
cars.m2 <- lm(dist1 ~ speed)
ad.test(resid(cars.m2))
##
## Anderson-Darling normality test
##
## data: resid(cars.m2)
## A = 0.39752, p-value = 0.3551