Regresion logística nominal
Data Opiniones
Opiniones.data <- read.table(file="http://tarwi.lamolina.edu.pe/~clopez/Categoricos/Opinion.txt", header=T)
attach(Opiniones.data)
Opiniones.data
## Raza Genero Opinion Frecuencia
## 1 Blanca Femenino Si 371
## 2 Blanca Femenino Nosabe 49
## 3 Blanca Femenino No 74
## 4 Blanca Masculino Si 250
## 5 Blanca Masculino Nosabe 45
## 6 Blanca Masculino No 71
## 7 Negra Femenino Si 64
## 8 Negra Femenino Nosabe 9
## 9 Negra Femenino No 15
## 10 Negra Masculino Si 25
## 11 Negra Masculino Nosabe 5
## 12 Negra Masculino No 13
library(nnet)
m1 <- multinom(Opinion ~ Raza + Genero, weights=Frecuencia)
## # weights: 12 (6 variable)
## initial value 1088.724778
## iter 10 value 773.731422
## final value 773.726510
## converged
summary(m1)
## Call:
## multinom(formula = Opinion ~ Raza + Genero, weights = Frecuencia)
##
## Coefficients:
## (Intercept) RazaNegra GeneroMasculino
## Nosabe -0.382 -0.2709 -0.1051
## Si 1.643 -0.3418 -0.4185
##
## Std. Errors:
## (Intercept) RazaNegra GeneroMasculino
## Nosabe 0.1788 0.3541 0.2465
## Si 0.1241 0.2370 0.1713
##
## Residual Deviance: 1547
## AIC: 1559
Raza <- relevel(Raza, ref="Negra")
Genero <- relevel(Genero, ref="Masculino")
Opinion <- relevel(Opinion, ref="Nosabe")
m2 <- multinom(Opinion ~ Raza + Genero, weights=Frecuencia)
## # weights: 12 (6 variable)
## initial value 1088.724778
## iter 10 value 773.726529
## final value 773.726510
## converged
summary(m2)
## Call:
## multinom(formula = Opinion ~ Raza + Genero, weights = Frecuencia)
##
## Coefficients:
## (Intercept) RazaBlanca GeneroFemenino
## No 0.758 -0.27099 -0.1051
## Si 1.641 0.07079 0.3135
##
## Std. Errors:
## (Intercept) RazaBlanca GeneroFemenino
## No 0.3614 0.3541 0.2465
## Si 0.3172 0.3092 0.2083
##
## Residual Deviance: 1547
## AIC: 1559
fitted.values(m1)
## No Nosabe Si
## 1 0.1459 0.09956 0.7546
## 2 0.1459 0.09956 0.7546
## 3 0.1459 0.09956 0.7546
## 4 0.1993 0.12245 0.6783
## 5 0.1993 0.12245 0.6783
## 6 0.1993 0.12245 0.6783
## 7 0.1925 0.10019 0.7073
## 8 0.1925 0.10019 0.7073
## 9 0.1925 0.10019 0.7073
## 10 0.2573 0.12056 0.6222
## 11 0.2573 0.12056 0.6222
## 12 0.2573 0.12056 0.6222
fitted.values(m2)
## Nosabe No Si
## 1 0.09956 0.1459 0.7546
## 2 0.09956 0.1459 0.7546
## 3 0.09956 0.1459 0.7546
## 4 0.12245 0.1993 0.6783
## 5 0.12245 0.1993 0.6783
## 6 0.12245 0.1993 0.6783
## 7 0.10018 0.1925 0.7074
## 8 0.10018 0.1925 0.7074
## 9 0.10018 0.1925 0.7074
## 10 0.12056 0.2573 0.6222
## 11 0.12056 0.2573 0.6222
## 12 0.12056 0.2573 0.6222
Residuales de Pearson y Devianza
res.pearson <- resid(m1, type="pearson")
X2 <- sum(res.pearson^2)
X2
## [1] 10.39
res.devianza <- resid(m1, type="deviance")
Estadistico C y Pseudo R2
m0 <- multinom(Opinion ~ 1, weights=Frecuencia)
## # weights: 6 (2 variable)
## initial value 1088.724778
## final value 778.098356
## converged
C <- 2*(logLik(m1)[1] - logLik(m0)[1])
C
## [1] 8.744
p.valor <- 1-pchisq(q=C, df=4)
p.valor
## [1] 0.06784
pseudoR2 <- (logLik(m0)[1] - logLik(m1)[1])/logLik(m0)[1]
pseudoR2
## [1] 0.005619
detach(Opiniones.data)
rm(Genero)
Regresion logística ordinal
Data Discapacidad mental
library(VGAM)
## Warning: package 'VGAM' was built under R version 3.1.2
## Loading required package: stats4
## Loading required package: splines
Mental <- read.table(file="http://tarwi.lamolina.edu.pe/~clopez/Categoricos/Discapacidad.txt", header=T)
attach(Mental)
Discapacidad <- ordered(Discapacidad, labels=c("Ausente", "Leve", "Moderado", "Presente"))
Modelo logit acumulativo
m1 <- vglm(Discapacidad ~ x1 + x2, family=cumulative)
summary(m1)
##
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = cumulative)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logit(P[Y<=1]) -1.51 -0.671 -0.213 0.831 2.79
## logit(P[Y<=2]) -2.58 -0.603 0.249 0.628 1.79
## logit(P[Y<=3]) -3.84 0.113 0.185 0.410 2.06
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.193 0.739 -0.26 0.7938
## (Intercept):2 0.828 0.704 1.18 0.2394
## (Intercept):3 2.805 0.962 2.92 0.0035 **
## x1:1 -0.318 0.160 -1.99 0.0463 *
## x1:2 -0.274 0.137 -2.00 0.0458 *
## x1:3 -0.396 0.159 -2.49 0.0128 *
## x2:1 0.973 0.772 1.26 0.2074
## x2:2 1.496 0.746 2.01 0.0449 *
## x2:3 0.752 0.836 0.90 0.3684
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 3
##
## Names of linear predictors:
## logit(P[Y<=1]), logit(P[Y<=2]), logit(P[Y<=3])
##
## Dispersion Parameter for cumulative family: 1
##
## Residual deviance: 96.75 on 111 degrees of freedom
##
## Log-likelihood: -48.37 on 111 degrees of freedom
##
## Number of iterations: 14
Modelo de odds proporcionales
m2 <- vglm(Discapacidad ~ x1 + x2, family=cumulative(parallel=TRUE))
summary(m2)
##
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = cumulative(parallel = TRUE))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logit(P[Y<=1]) -1.57 -0.705 -0.210 0.807 2.71
## logit(P[Y<=2]) -2.33 -0.467 0.266 0.690 1.61
## logit(P[Y<=3]) -3.69 0.120 0.204 0.419 1.89
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.282 0.623 -0.45 0.6510
## (Intercept):2 1.213 0.651 1.86 0.0625 .
## (Intercept):3 2.209 0.717 3.08 0.0021 **
## x1 -0.319 0.119 -2.67 0.0076 **
## x2 1.111 0.614 1.81 0.0704 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 3
##
## Names of linear predictors:
## logit(P[Y<=1]), logit(P[Y<=2]), logit(P[Y<=3])
##
## Dispersion Parameter for cumulative family: 1
##
## Residual deviance: 99.1 on 115 degrees of freedom
##
## Log-likelihood: -49.55 on 115 degrees of freedom
##
## Number of iterations: 5
Modelo logit de categorias adyacentes
m3 <- vglm(Discapacidad ~ x1 + x2, family=acat(reverse=TRUE, parallel=TRUE))
summary(m3)
##
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = acat(reverse = TRUE,
## parallel = TRUE))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## loge(P[Y=1]/P[Y=2]) -1.35 -0.8788 -0.173 0.866 2.59
## loge(P[Y=2]/P[Y=3]) -2.17 -0.5072 0.207 0.769 1.60
## loge(P[Y=3]/P[Y=4]) -4.30 0.0318 0.157 0.396 1.69
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.1965 0.4850 0.41 0.685
## (Intercept):2 0.9608 0.5635 1.70 0.088 .
## (Intercept):3 0.4110 0.6467 0.64 0.525
## x1 -0.1759 0.0696 -2.53 0.011 *
## x2 0.6576 0.3509 1.87 0.061 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 3
##
## Names of linear predictors:
## loge(P[Y=1]/P[Y=2]), loge(P[Y=2]/P[Y=3]), loge(P[Y=3]/P[Y=4])
##
## Dispersion Parameter for acat family: 1
##
## Residual deviance: 98.7 on 115 degrees of freedom
##
## Log-likelihood: -49.35 on 115 degrees of freedom
##
## Number of iterations: 4
m4 <- vglm(Discapacidad ~ x1 + x2, family=acat(reverse=FALSE, parallel=FALSE))
summary(m4)
##
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = acat(reverse = FALSE,
## parallel = FALSE))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## loge(P[Y=2]/P[Y=1]) -2.86 -0.865 0.170 0.7632 1.35
## loge(P[Y=3]/P[Y=2]) -1.70 -0.666 -0.181 0.5682 2.58
## loge(P[Y=4]/P[Y=3]) -1.73 -0.445 -0.141 -0.0345 4.03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.65523 0.88537 -0.74 0.46
## (Intercept):2 -0.00698 0.98207 -0.01 0.99
## (Intercept):3 -1.24050 1.15427 -1.07 0.28
## x1:1 0.22854 0.18321 1.25 0.21
## x1:2 0.06447 0.20012 0.32 0.75
## x1:3 0.26717 0.20731 1.29 0.20
## x2:1 -0.24871 0.90476 -0.27 0.78
## x2:2 -1.69505 1.07639 -1.57 0.12
## x2:3 0.33748 1.13538 0.30 0.77
##
## Number of linear predictors: 3
##
## Names of linear predictors:
## loge(P[Y=2]/P[Y=1]), loge(P[Y=3]/P[Y=2]), loge(P[Y=4]/P[Y=3])
##
## Dispersion Parameter for acat family: 1
##
## Residual deviance: 96.7 on 111 degrees of freedom
##
## Log-likelihood: -48.35 on 111 degrees of freedom
##
## Number of iterations: 4
Comparación de modelos
AIC(m1)
## [1] 114.7
AIC(m2)
## [1] 109.1
AIC(m3)
## [1] 108.7
AIC(m4)
## [1] 114.7
fitted(m1)[1:4,]
## Ausente Leve Moderado Presente
## 1 0.6135 0.2725 0.07335 0.04069
## 2 0.1107 0.3541 0.03251 0.50268
## 3 0.3793 0.3943 0.10419 0.12227
## 4 0.4565 0.3614 0.09640 0.08568
fitted(m4)[1:4,]
## Ausente Leve Moderado Presente
## 1 0.6023 0.3065 0.05961 0.03156
## 2 0.1017 0.3222 0.10495 0.47107
## 3 0.3953 0.3993 0.09422 0.11119
## 4 0.4668 0.3752 0.08301 0.07499
C y Pseudo R2
m0 <- vglm(Discapacidad ~ 1, family=cumulative(parallel=TRUE))
summary(m0)
##
## Call:
## vglm(formula = Discapacidad ~ 1, family = cumulative(parallel = TRUE))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logit(P[Y<=1]) -1.02 -1.024 -0.256 1.461 1.46
## logit(P[Y<=2]) -1.75 -0.580 0.382 1.073 1.07
## logit(P[Y<=3]) -1.74 0.232 0.232 0.367 1.22
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.847 0.345 -2.46 0.0141 *
## (Intercept):2 0.405 0.323 1.26 0.2090
## (Intercept):3 1.237 0.379 3.27 0.0011 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 3
##
## Names of linear predictors:
## logit(P[Y<=1]), logit(P[Y<=2]), logit(P[Y<=3])
##
## Dispersion Parameter for cumulative family: 1
##
## Residual deviance: 109 on 117 degrees of freedom
##
## Log-likelihood: -54.52 on 117 degrees of freedom
##
## Number of iterations: 1
C <- 2*(logLik(m3)[1] - logLik(m0)[1])
C
## [1] 10.34
p.valor <- 1-pchisq(q=C, df=2)
p.valor
## [1] 0.005685
pseudoR2 <- (logLik(m0)[1] - logLik(m3)[1])/logLik(m0)[1]
pseudoR2
## [1] 0.09482
Otros modelos
m5 <- vglm(Discapacidad ~ x1 + x2, family=cratio(reverse=TRUE, parallel=TRUE))
summary(m5)
##
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = cratio(reverse = TRUE,
## parallel = TRUE))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logit(P[Y<2|Y<=2]) -1.79 -9.33e-01 1.22e-05 0.699 2.70
## logit(P[Y<3|Y<=3]) -2.56 3.26e-06 3.61e-01 0.520 1.19
## logit(P[Y<4|Y<=4]) -3.44 2.47e-01 3.78e-01 0.558 1.22
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.251 0.594 0.42 0.6727
## (Intercept):2 1.748 0.641 2.73 0.0064 **
## (Intercept):3 1.952 0.637 3.06 0.0022 **
## x1 -0.262 0.100 -2.62 0.0089 **
## x2 1.040 0.520 2.00 0.0456 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 3
##
## Names of linear predictors:
## logit(P[Y<2|Y<=2]), logit(P[Y<3|Y<=3]), logit(P[Y<4|Y<=4])
##
## Dispersion Parameter for cratio family: 1
##
## Residual deviance: 99.28 on 115 degrees of freedom
##
## Log-likelihood: -49.64 on 115 degrees of freedom
##
## Number of iterations: 5
m6 <- vglm(Discapacidad ~ x1 + x2, family=cratio(reverse=TRUE, parallel=FALSE))
summary(m6)
##
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = cratio(reverse = TRUE,
## parallel = FALSE))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logit(P[Y<2|Y<=2]) -1.50 -9.44e-01 4.90e-06 0.808 2.61
## logit(P[Y<3|Y<=3]) -3.19 9.91e-06 3.04e-01 0.617 1.32
## logit(P[Y<4|Y<=4]) -4.04 1.96e-01 3.30e-01 0.480 1.48
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.567 0.888 0.64 0.5230
## (Intercept):2 1.015 0.863 1.18 0.2396
## (Intercept):3 2.595 0.975 2.66 0.0078 **
## x1:1 -0.218 0.185 -1.17 0.2402
## x1:2 -0.173 0.193 -0.89 0.3709
## x1:3 -0.376 0.166 -2.26 0.0239 *
## x2:1 0.322 0.919 0.35 0.7260
## x2:2 1.874 1.015 1.85 0.0647 .
## x2:3 0.947 0.868 1.09 0.2751
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 3
##
## Names of linear predictors:
## logit(P[Y<2|Y<=2]), logit(P[Y<3|Y<=3]), logit(P[Y<4|Y<=4])
##
## Dispersion Parameter for cratio family: 1
##
## Residual deviance: 96.68 on 111 degrees of freedom
##
## Log-likelihood: -48.34 on 111 degrees of freedom
##
## Number of iterations: 5
AIC(m5)
## [1] 109.3
AIC(m6)
## [1] 114.7
Data Cinturon de seguridad
Condiciones.data <- read.table(file="http://tarwi.lamolina.edu.pe/~clopez/Categoricos/Condicion.txt", header=T)
attach(Condiciones.data)
Condicion <- ordered(Condicion, labels=c("y1", "y2", "y3", "y4", "y5"))
Modelo logit acumulativo
modelo1 <- vglm(Condicion ~ Genero + Localizacion + Cinturon, family=cumulative, weights=Frecuencia)
summary(modelo1)
##
## Call:
## vglm(formula = Condicion ~ Genero + Localizacion + Cinturon,
## family = cumulative, weights = Frecuencia)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logit(P[Y<=1]) -93.3 -47.47 -7.09 -0.743 25.9
## logit(P[Y<=2]) -97.7 -10.55 -1.40 17.297 57.0
## logit(P[Y<=3]) -109.8 -9.80 2.38 6.379 20.4
## logit(P[Y<=4]) -135.4 0.34 1.19 2.751 12.1
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 1.2157 0.0265 45.91 < 2e-16 ***
## (Intercept):2 1.3332 0.0278 48.00 < 2e-16 ***
## (Intercept):3 3.1011 0.0604 51.36 < 2e-16 ***
## (Intercept):4 4.8604 0.1479 32.85 < 2e-16 ***
## GeneroMujer:1 0.5454 0.0273 20.01 < 2e-16 ***
## GeneroMujer:2 0.5774 0.0292 19.81 < 2e-16 ***
## GeneroMujer:3 0.2884 0.0671 4.30 1.7e-05 ***
## GeneroMujer:4 0.2621 0.1713 1.53 0.13
## LocalizacionUrbano:1 0.7588 0.0270 28.14 < 2e-16 ***
## LocalizacionUrbano:2 0.8308 0.0289 28.72 < 2e-16 ***
## LocalizacionUrbano:3 1.2458 0.0706 17.65 < 2e-16 ***
## LocalizacionUrbano:4 1.6942 0.1996 8.49 < 2e-16 ***
## CinturonSi:1 0.8173 0.0276 29.56 < 2e-16 ***
## CinturonSi:2 0.8377 0.0296 28.29 < 2e-16 ***
## CinturonSi:3 1.0627 0.0723 14.71 < 2e-16 ***
## CinturonSi:4 1.1888 0.1906 6.24 4.4e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 4
##
## Names of linear predictors:
## logit(P[Y<=1]), logit(P[Y<=2]), logit(P[Y<=3]), logit(P[Y<=4])
##
## Dispersion Parameter for cumulative family: 1
##
## Residual deviance: 50744 on 144 degrees of freedom
##
## Log-likelihood: -25372 on 144 degrees of freedom
##
## Number of iterations: 5
modelo2 <- vglm(Condicion ~ Genero + Localizacion + Cinturon, family=cumulative(parallel=TRUE), weights=Frecuencia)
summary(modelo2)
##
## Call:
## vglm(formula = Condicion ~ Genero + Localizacion + Cinturon,
## family = cumulative(parallel = TRUE), weights = Frecuencia)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logit(P[Y<=1]) -106.1 -46.191 -7.62 -1.09 26.3
## logit(P[Y<=2]) -98.7 -10.225 -1.41 16.94 65.9
## logit(P[Y<=3]) -102.5 -9.973 2.76 6.60 17.7
## logit(P[Y<=4]) -115.0 0.396 1.40 2.92 11.9
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 1.2034 0.0264 45.7 <2e-16 ***
## (Intercept):2 1.3777 0.0268 51.5 <2e-16 ***
## (Intercept):3 3.2448 0.0399 81.3 <2e-16 ***
## (Intercept):4 5.1516 0.0880 58.6 <2e-16 ***
## GeneroMujer 0.5449 0.0272 20.0 <2e-16 ***
## LocalizacionUrbano 0.7732 0.0269 28.7 <2e-16 ***
## CinturonSi 0.8241 0.0276 29.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 4
##
## Names of linear predictors:
## logit(P[Y<=1]), logit(P[Y<=2]), logit(P[Y<=3]), logit(P[Y<=4])
##
## Dispersion Parameter for cumulative family: 1
##
## Residual deviance: 50879 on 153 degrees of freedom
##
## Log-likelihood: -25440 on 153 degrees of freedom
##
## Number of iterations: 4
AIC(modelo1)
## [1] 50776
AIC(modelo2)
## [1] 50893