Capitulo 4: Regresión ponderada y falta de ajuste
Data Bland
Bland.data <- read.table(file = "http://tarwi.lamolina.edu.pe/~clopez/Regresion/Bland.txt", header = T)
CapPequeño <- subset(Bland.data, Tipo == 0)
CapGrande <- subset(Bland.data, Tipo == 1)
head(CapPequeño)
## Dia n ybar SD Tipo
## 1 0 5 10.00 0.00 0
## 2 6 5 11.00 0.72 0
## 3 9 5 10.00 0.72 0
## 4 19 11 13.36 1.03 0
## 5 27 7 14.29 0.95 0
## 6 30 8 14.50 1.19 0
head(CapGrande)
## Dia n ybar SD Tipo
## 31 0 5 10.2 0.83 1
## 32 3 5 10.4 0.54 1
## 33 7 5 10.6 0.54 1
## 34 13 6 12.5 0.83 1
## 35 18 5 12.0 1.41 1
## 36 24 4 15.0 0.82 1
Bland.m1 <- lm(ybar ~ Dia, weights = n, data = CapGrande)
summary(Bland.m1)
##
## Call:
## lm(formula = ybar ~ Dia, data = CapGrande, weights = n)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -4.217 -1.173 0.022 1.099 2.975
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.973754 0.314272 31.74 <2e-16 ***
## Dia 0.217330 0.005339 40.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.929 on 20 degrees of freedom
## Multiple R-squared: 0.9881, Adjusted R-squared: 0.9875
## F-statistic: 1657 on 1 and 20 DF, p-value: < 2.2e-16
anova(Bland.m1)
## Analysis of Variance Table
##
## Response: ybar
## Df Sum Sq Mean Sq F value Pr(>F)
## Dia 1 6164.3 6164.3 1657.2 < 2.2e-16 ***
## Residuals 20 74.4 3.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
attach(CapGrande)
plot(Dia, ybar, xlab = "Dias de inactividad", ylab = "Número promedio de tallos")
abline(Bland.m1, lty = 2)
detach(CapGrande)
Falta de ajuste
x1 <- c(1, 1, 1, 2, 3, 3, 4, 4, 4, 4)
y1 <- c(2.55, 2.75, 2.57, 2.40, 4.19, 4.70, 3.81, 4.87, 2.93, 4.52)
Modelo <- lm(y1 ~ x1)
library(alr3)
## Warning: package 'alr3' was built under R version 3.3.3
## Loading required package: car
pureErrorAnova(Modelo)
## Analysis of Variance Table
##
## Response: y1
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 4.5693 4.5693 11.6247 0.01433 *
## Residuals 8 4.2166 0.5271
## Lack of fit 2 1.8582 0.9291 2.3638 0.17496
## Pure Error 6 2.3584 0.3931
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Prueba F general
Grasa.data <- read.table(file = "http://tarwi.lamolina.edu.pe/~clopez/Regresion/Grasa.txt", header = T)
Grasa.m1 <- lm(Grasa ~ ., data = Grasa.data)
summary(Grasa.m1)
##
## Call:
## lm(formula = Grasa ~ ., data = Grasa.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1687 -2.8639 -0.1014 3.2085 10.0068
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -18.18849 17.34857 -1.048 0.29551
## Edad 0.06208 0.03235 1.919 0.05618 .
## Peso -0.08844 0.05353 -1.652 0.09978 .
## Altura -0.06959 0.09601 -0.725 0.46925
## Cuello -0.47060 0.23247 -2.024 0.04405 *
## Pecho -0.02386 0.09915 -0.241 0.81000
## Abdomen 0.95477 0.08645 11.044 < 2e-16 ***
## Cadera -0.20754 0.14591 -1.422 0.15622
## Muslo 0.23610 0.14436 1.636 0.10326
## Rodilla 0.01528 0.24198 0.063 0.94970
## Tobillo 0.17400 0.22147 0.786 0.43285
## Biceps 0.18160 0.17113 1.061 0.28966
## Antebrazo 0.45202 0.19913 2.270 0.02410 *
## Muñeca -1.62064 0.53495 -3.030 0.00272 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.305 on 238 degrees of freedom
## Multiple R-squared: 0.749, Adjusted R-squared: 0.7353
## F-statistic: 54.65 on 13 and 238 DF, p-value: < 2.2e-16
Grasa.m2 <- lm(Grasa ~ Abdomen + Muñeca + Antebrazo + Cuello + Edad + Peso, data = Grasa.data)
Grasa.m3 <- lm(Grasa ~ Abdomen + Muñeca + Antebrazo + Cuello, data = Grasa.data)
anova(Grasa.m3, Grasa.m2)
## Analysis of Variance Table
##
## Model 1: Grasa ~ Abdomen + Muñeca + Antebrazo + Cuello
## Model 2: Grasa ~ Abdomen + Muñeca + Antebrazo + Cuello + Edad + Peso
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 247 5029.6
## 2 245 4559.2 2 470.33 12.637 5.982e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Regiones conjuntas de confianza
UN.data <- read.table(file = "http://tarwi.lamolina.edu.pe/~clopez/Regresion/UN.txt", header = T)
UN.m1 <- lm(log(Fertilidad) ~ log2(PBIpp) + Purban, data = UN.data)
vcov(UN.m1)
## (Intercept) log2(PBIpp) Purban
## (Intercept) 0.0215689669 -2.450423e-03 1.111217e-04
## log2(PBIpp) -0.0024504230 3.646103e-04 -2.804833e-05
## Purban 0.0001111217 -2.804833e-05 3.550960e-06
cov2cor(vcov(UN.m1))
## (Intercept) log2(PBIpp) Purban
## (Intercept) 1.0000000 -0.8737997 0.4015238
## log2(PBIpp) -0.8737997 1.0000000 -0.7795064
## Purban 0.4015238 -0.7795064 1.0000000
confint(UN.m1,level = 0.95)
## 2.5 % 97.5 %
## (Intercept) 2.303302995 2.8826888443
## log2(PBIpp) -0.163140103 -0.0878101520
## Purban -0.007239249 0.0001948123
library(car)
confidenceEllipse(UN.m1, levels = 0.95)
confidenceEllipse(UN.m1, levels = 0.92, which.coef = c(1, 3))