Capitulo 3 Regresión lineal multiple

Data Naciones Unidas

UN.data <- read.table(file = "http://tarwi.lamolina.edu.pe/~clopez/Regresion/UN.txt", header = T)
attach(UN.data)
UN.m1 <- lm(log(Fertilidad) ~ log2(PBIpp))
UN.m1

## 
## Call:
## lm(formula = log(Fertilidad) ~ log2(PBIpp))
## 
## Coefficients:
## (Intercept)  log2(PBIpp)  
##      2.7032      -0.1533

UN.m2 <- lm(log(Fertilidad) ~ Purban)
UN.m2

## 
## Call:
## lm(formula = log(Fertilidad) ~ Purban)
## 
## Coefficients:
## (Intercept)       Purban  
##     1.74972     -0.01317

Grafico de variable añadida

par(mfrow = c(2, 2))
plot(log2(PBIpp), log(Fertilidad), xlab = "log2(PBIpp) (a)", cex.lab = 0.9)
abline(UN.m1)
plot(Purban, log(Fertilidad), xlab = "Purban (b)", cex.lab = 0.9)
abline(UN.m2)
UN.m3 <- lm(Purban ~ log2(PBIpp))
plot(log2(PBIpp), Purban, xlab = "log2(PBIpp) (c)", cex.lab = 0.9)
abline(UN.m3)
UN.m4 <- lm(resid(UN.m1) ~ resid(UN.m3))
plot(resid(UN.m3), resid(UN.m1), xlab = expression(paste(hat(e), " Purban vs log2(PBIpp) (d)")), ylab = expression(paste(hat(e), " log(Fertilidad) vs log2(PBIpp)")), cex.lab = 0.9)
abline(UN.m4)
abline(h = 0, lty = 2)
abline(v = 0, lty = 2)

Consumo de Gasolina

Gasolina2001.data <- read.table(file = "http://tarwi.lamolina.edu.pe/~clopez/Regresion/Gasolina2001.txt", header = T)
attach(Gasolina2001.data)
TasaLic <- 1000*Licencias/Poblacion
TasaComb <- 1000*Combustible/Poblacion
Ingreso <- Ingreso/1000
logMillas <- log2(Millas)
Gasolina2001.m5 <- lm(TasaComb ~ Impuesto + TasaLic + Ingreso + logMillas)
summary(Gasolina2001.m5)

## 
## Call:
## lm(formula = TasaComb ~ Impuesto + TasaLic + Ingreso + logMillas)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -163.145  -33.039    5.895   31.989  183.499 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 154.1928   194.9062   0.791 0.432938    
## Impuesto     -4.2280     2.0301  -2.083 0.042873 *  
## TasaLic       0.4719     0.1285   3.672 0.000626 ***
## Ingreso      -6.1353     2.1936  -2.797 0.007508 ** 
## logMillas    18.5453     6.4722   2.865 0.006259 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 64.89 on 46 degrees of freedom
## Multiple R-squared:  0.5105, Adjusted R-squared:  0.4679 
## F-statistic: 11.99 on 4 and 46 DF,  p-value: 9.331e-07

anova(Gasolina2001.m5)

## Analysis of Variance Table
## 
## Response: TasaComb
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Impuesto   1  26635   26635  6.3254 0.0154602 *  
## TasaLic    1  79378   79378 18.8506 7.692e-05 ***
## Ingreso    1  61408   61408 14.5833 0.0003997 ***
## logMillas  1  34573   34573  8.2104 0.0062592 ** 
## Residuals 46 193700    4211                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

X <- cbind(Impuesto, TasaLic, Ingreso, logMillas)
anova(lm(TasaComb ~ X))

## Analysis of Variance Table
## 
## Response: TasaComb
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## X          4 201994   50499  11.992 9.331e-07 ***
## Residuals 46 193700    4211                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Gasolina2001.m6 <-update(Gasolina2001.m5, ~ . -Impuesto)
anova(Gasolina2001.m6, Gasolina2001.m5)

## Analysis of Variance Table
## 
## Model 1: TasaComb ~ TasaLic + Ingreso + logMillas
## Model 2: TasaComb ~ Impuesto + TasaLic + Ingreso + logMillas
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     47 211964                              
## 2     46 193700  1     18264 4.3373 0.04287 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Predicción y valores estimados

predict(Gasolina2001.m5, data.frame(Impuesto = 20, TasaLic = 900, Ingreso = 30, logMillas = 15),level = 0.98, interval = "confidence")

##        fit      lwr      upr
## 1 588.4365 563.3869 613.4861

predict(Gasolina2001.m5, data.frame(Impuesto = 20, TasaLic = 900, Ingreso = 30, logMillas = 15),level = 0.98, interval = "prediction")

##        fit      lwr      upr
## 1 588.4365 430.0431 746.8298

Estudio Berkeley

Berkeley.data <- read.table(file = "http://tarwi.lamolina.edu.pe/~clopez/Regresion/Berkeley.txt", header = T)
attach(Berkeley.data)
pairs(Puntuacion ~ Peso2 + Peso9 + Peso18)

PesoG9 <- Peso9 - Peso2
PesoG18 <- Peso18 - Peso9
Berkeley.m1 <- lm(Puntuacion ~ Peso2 + Peso9 + Peso18)
Berkeley.m1

## 
## Call:
## lm(formula = Puntuacion ~ Peso2 + Peso9 + Peso18)
## 
## Coefficients:
## (Intercept)        Peso2        Peso9       Peso18  
##     1.59210     -0.11564      0.05625      0.04834

Berkeley.m2 <- lm(Puntuacion ~ Peso2 + PesoG9 + PesoG18)
Berkeley.m2

## 
## Call:
## lm(formula = Puntuacion ~ Peso2 + PesoG9 + PesoG18)
## 
## Coefficients:
## (Intercept)        Peso2       PesoG9      PesoG18  
##     1.59210     -0.01106      0.10459      0.04834

Berkeley.m3 <- lm(Puntuacion ~ Peso2 + Peso9 + Peso18 + PesoG9 + PesoG18)
Berkeley.m3

## 
## Call:
## lm(formula = Puntuacion ~ Peso2 + Peso9 + Peso18 + PesoG9 + PesoG18)
## 
## Coefficients:
## (Intercept)        Peso2        Peso9       Peso18       PesoG9  
##     1.59210     -0.11564      0.05625      0.04834           NA  
##     PesoG18  
##          NA