Regresion logística nominal

Data Opiniones

Opiniones.data <- read.table(file="http://tarwi.lamolina.edu.pe/~clopez/Categoricos/Opinion.txt", header=T)
attach(Opiniones.data)
Opiniones.data

##      Raza    Genero Opinion Frecuencia
## 1  Blanca  Femenino      Si        371
## 2  Blanca  Femenino  Nosabe         49
## 3  Blanca  Femenino      No         74
## 4  Blanca Masculino      Si        250
## 5  Blanca Masculino  Nosabe         45
## 6  Blanca Masculino      No         71
## 7   Negra  Femenino      Si         64
## 8   Negra  Femenino  Nosabe          9
## 9   Negra  Femenino      No         15
## 10  Negra Masculino      Si         25
## 11  Negra Masculino  Nosabe          5
## 12  Negra Masculino      No         13

library(nnet)
m1 <- multinom(Opinion ~ Raza + Genero, weights=Frecuencia)

## # weights:  12 (6 variable)
## initial  value 1088.724778 
## iter  10 value 773.731422
## final  value 773.726510 
## converged

summary(m1)

## Call:
## multinom(formula = Opinion ~ Raza + Genero, weights = Frecuencia)
## 
## Coefficients:
##        (Intercept) RazaNegra GeneroMasculino
## Nosabe      -0.382   -0.2709         -0.1051
## Si           1.643   -0.3418         -0.4185
## 
## Std. Errors:
##        (Intercept) RazaNegra GeneroMasculino
## Nosabe      0.1788    0.3541          0.2465
## Si          0.1241    0.2370          0.1713
## 
## Residual Deviance: 1547 
## AIC: 1559

Raza <- relevel(Raza, ref="Negra")
Genero <- relevel(Genero, ref="Masculino")
Opinion <- relevel(Opinion, ref="Nosabe")
m2 <- multinom(Opinion ~ Raza + Genero, weights=Frecuencia)

## # weights:  12 (6 variable)
## initial  value 1088.724778 
## iter  10 value 773.726529
## final  value 773.726510 
## converged

summary(m2)

## Call:
## multinom(formula = Opinion ~ Raza + Genero, weights = Frecuencia)
## 
## Coefficients:
##    (Intercept) RazaBlanca GeneroFemenino
## No       0.758   -0.27099        -0.1051
## Si       1.641    0.07079         0.3135
## 
## Std. Errors:
##    (Intercept) RazaBlanca GeneroFemenino
## No      0.3614     0.3541         0.2465
## Si      0.3172     0.3092         0.2083
## 
## Residual Deviance: 1547 
## AIC: 1559

fitted.values(m1)

##        No  Nosabe     Si
## 1  0.1459 0.09956 0.7546
## 2  0.1459 0.09956 0.7546
## 3  0.1459 0.09956 0.7546
## 4  0.1993 0.12245 0.6783
## 5  0.1993 0.12245 0.6783
## 6  0.1993 0.12245 0.6783
## 7  0.1925 0.10019 0.7073
## 8  0.1925 0.10019 0.7073
## 9  0.1925 0.10019 0.7073
## 10 0.2573 0.12056 0.6222
## 11 0.2573 0.12056 0.6222
## 12 0.2573 0.12056 0.6222

fitted.values(m2)

##     Nosabe     No     Si
## 1  0.09956 0.1459 0.7546
## 2  0.09956 0.1459 0.7546
## 3  0.09956 0.1459 0.7546
## 4  0.12245 0.1993 0.6783
## 5  0.12245 0.1993 0.6783
## 6  0.12245 0.1993 0.6783
## 7  0.10018 0.1925 0.7074
## 8  0.10018 0.1925 0.7074
## 9  0.10018 0.1925 0.7074
## 10 0.12056 0.2573 0.6222
## 11 0.12056 0.2573 0.6222
## 12 0.12056 0.2573 0.6222

Residuales de Pearson y Devianza

res.pearson <- resid(m1, type="pearson")
X2 <- sum(res.pearson^2)
X2

## [1] 10.39

res.devianza <- resid(m1, type="deviance")

Estadistico C y Pseudo R2

m0 <- multinom(Opinion ~ 1, weights=Frecuencia)

## # weights:  6 (2 variable)
## initial  value 1088.724778 
## final  value 778.098356 
## converged

C <- 2*(logLik(m1)[1] - logLik(m0)[1])
C

## [1] 8.744

p.valor <- 1-pchisq(q=C, df=4)
p.valor

## [1] 0.06784

pseudoR2 <- (logLik(m0)[1] - logLik(m1)[1])/logLik(m0)[1]
pseudoR2

## [1] 0.005619

detach(Opiniones.data)
rm(Genero)

Regresion logística ordinal

Data Discapacidad mental

library(VGAM)

## Warning: package 'VGAM' was built under R version 3.1.2

## Loading required package: stats4
## Loading required package: splines

Mental <- read.table(file="http://tarwi.lamolina.edu.pe/~clopez/Categoricos/Discapacidad.txt", header=T)
attach(Mental)
Discapacidad <- ordered(Discapacidad, labels=c("Ausente", "Leve", "Moderado", "Presente"))

Modelo logit acumulativo

m1 <- vglm(Discapacidad ~ x1 + x2, family=cumulative)
summary(m1)

## 
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = cumulative)
## 
## Pearson residuals:
##                  Min     1Q Median    3Q  Max
## logit(P[Y<=1]) -1.51 -0.671 -0.213 0.831 2.79
## logit(P[Y<=2]) -2.58 -0.603  0.249 0.628 1.79
## logit(P[Y<=3]) -3.84  0.113  0.185 0.410 2.06
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept):1   -0.193      0.739   -0.26   0.7938   
## (Intercept):2    0.828      0.704    1.18   0.2394   
## (Intercept):3    2.805      0.962    2.92   0.0035 **
## x1:1            -0.318      0.160   -1.99   0.0463 * 
## x1:2            -0.274      0.137   -2.00   0.0458 * 
## x1:3            -0.396      0.159   -2.49   0.0128 * 
## x2:1             0.973      0.772    1.26   0.2074   
## x2:2             1.496      0.746    2.01   0.0449 * 
## x2:3             0.752      0.836    0.90   0.3684   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  3 
## 
## Names of linear predictors: 
## logit(P[Y<=1]), logit(P[Y<=2]), logit(P[Y<=3])
## 
## Dispersion Parameter for cumulative family:   1
## 
## Residual deviance: 96.75 on 111 degrees of freedom
## 
## Log-likelihood: -48.37 on 111 degrees of freedom
## 
## Number of iterations: 14

Modelo de odds proporcionales

m2 <- vglm(Discapacidad ~ x1 + x2, family=cumulative(parallel=TRUE))
summary(m2)

## 
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = cumulative(parallel = TRUE))
## 
## Pearson residuals:
##                  Min     1Q Median    3Q  Max
## logit(P[Y<=1]) -1.57 -0.705 -0.210 0.807 2.71
## logit(P[Y<=2]) -2.33 -0.467  0.266 0.690 1.61
## logit(P[Y<=3]) -3.69  0.120  0.204 0.419 1.89
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept):1   -0.282      0.623   -0.45   0.6510   
## (Intercept):2    1.213      0.651    1.86   0.0625 . 
## (Intercept):3    2.209      0.717    3.08   0.0021 **
## x1              -0.319      0.119   -2.67   0.0076 **
## x2               1.111      0.614    1.81   0.0704 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  3 
## 
## Names of linear predictors: 
## logit(P[Y<=1]), logit(P[Y<=2]), logit(P[Y<=3])
## 
## Dispersion Parameter for cumulative family:   1
## 
## Residual deviance: 99.1 on 115 degrees of freedom
## 
## Log-likelihood: -49.55 on 115 degrees of freedom
## 
## Number of iterations: 5

Modelo logit de categorias adyacentes

m3 <- vglm(Discapacidad ~ x1 + x2, family=acat(reverse=TRUE, parallel=TRUE))
summary(m3)

## 
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = acat(reverse = TRUE, 
##     parallel = TRUE))
## 
## Pearson residuals:
##                       Min      1Q Median    3Q  Max
## loge(P[Y=1]/P[Y=2]) -1.35 -0.8788 -0.173 0.866 2.59
## loge(P[Y=2]/P[Y=3]) -2.17 -0.5072  0.207 0.769 1.60
## loge(P[Y=3]/P[Y=4]) -4.30  0.0318  0.157 0.396 1.69
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept):1   0.1965     0.4850    0.41    0.685  
## (Intercept):2   0.9608     0.5635    1.70    0.088 .
## (Intercept):3   0.4110     0.6467    0.64    0.525  
## x1             -0.1759     0.0696   -2.53    0.011 *
## x2              0.6576     0.3509    1.87    0.061 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  3 
## 
## Names of linear predictors: 
## loge(P[Y=1]/P[Y=2]), loge(P[Y=2]/P[Y=3]), loge(P[Y=3]/P[Y=4])
## 
## Dispersion Parameter for acat family:   1
## 
## Residual deviance: 98.7 on 115 degrees of freedom
## 
## Log-likelihood: -49.35 on 115 degrees of freedom
## 
## Number of iterations: 4

m4 <- vglm(Discapacidad ~ x1 + x2, family=acat(reverse=FALSE, parallel=FALSE))
summary(m4)

## 
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = acat(reverse = FALSE, 
##     parallel = FALSE))
## 
## Pearson residuals:
##                       Min     1Q Median      3Q  Max
## loge(P[Y=2]/P[Y=1]) -2.86 -0.865  0.170  0.7632 1.35
## loge(P[Y=3]/P[Y=2]) -1.70 -0.666 -0.181  0.5682 2.58
## loge(P[Y=4]/P[Y=3]) -1.73 -0.445 -0.141 -0.0345 4.03
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.65523    0.88537   -0.74     0.46
## (Intercept):2 -0.00698    0.98207   -0.01     0.99
## (Intercept):3 -1.24050    1.15427   -1.07     0.28
## x1:1           0.22854    0.18321    1.25     0.21
## x1:2           0.06447    0.20012    0.32     0.75
## x1:3           0.26717    0.20731    1.29     0.20
## x2:1          -0.24871    0.90476   -0.27     0.78
## x2:2          -1.69505    1.07639   -1.57     0.12
## x2:3           0.33748    1.13538    0.30     0.77
## 
## Number of linear predictors:  3 
## 
## Names of linear predictors: 
## loge(P[Y=2]/P[Y=1]), loge(P[Y=3]/P[Y=2]), loge(P[Y=4]/P[Y=3])
## 
## Dispersion Parameter for acat family:   1
## 
## Residual deviance: 96.7 on 111 degrees of freedom
## 
## Log-likelihood: -48.35 on 111 degrees of freedom
## 
## Number of iterations: 4

Comparación de modelos

AIC(m1)

## [1] 114.7

AIC(m2)

## [1] 109.1

AIC(m3)

## [1] 108.7

AIC(m4)

## [1] 114.7

fitted(m1)[1:4,]

##   Ausente   Leve Moderado Presente
## 1  0.6135 0.2725  0.07335  0.04069
## 2  0.1107 0.3541  0.03251  0.50268
## 3  0.3793 0.3943  0.10419  0.12227
## 4  0.4565 0.3614  0.09640  0.08568

fitted(m4)[1:4,]

##   Ausente   Leve Moderado Presente
## 1  0.6023 0.3065  0.05961  0.03156
## 2  0.1017 0.3222  0.10495  0.47107
## 3  0.3953 0.3993  0.09422  0.11119
## 4  0.4668 0.3752  0.08301  0.07499

C y Pseudo R2

m0 <- vglm(Discapacidad ~ 1, family=cumulative(parallel=TRUE))
summary(m0)

## 
## Call:
## vglm(formula = Discapacidad ~ 1, family = cumulative(parallel = TRUE))
## 
## Pearson residuals:
##                  Min     1Q Median    3Q  Max
## logit(P[Y<=1]) -1.02 -1.024 -0.256 1.461 1.46
## logit(P[Y<=2]) -1.75 -0.580  0.382 1.073 1.07
## logit(P[Y<=3]) -1.74  0.232  0.232 0.367 1.22
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept):1   -0.847      0.345   -2.46   0.0141 * 
## (Intercept):2    0.405      0.323    1.26   0.2090   
## (Intercept):3    1.237      0.379    3.27   0.0011 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  3 
## 
## Names of linear predictors: 
## logit(P[Y<=1]), logit(P[Y<=2]), logit(P[Y<=3])
## 
## Dispersion Parameter for cumulative family:   1
## 
## Residual deviance: 109 on 117 degrees of freedom
## 
## Log-likelihood: -54.52 on 117 degrees of freedom
## 
## Number of iterations: 1

C <- 2*(logLik(m3)[1] - logLik(m0)[1])
C

## [1] 10.34

p.valor <- 1-pchisq(q=C, df=2)
p.valor

## [1] 0.005685

pseudoR2 <- (logLik(m0)[1] - logLik(m3)[1])/logLik(m0)[1]
pseudoR2

## [1] 0.09482

Otros modelos

m5 <- vglm(Discapacidad ~ x1 + x2, family=cratio(reverse=TRUE, parallel=TRUE))
summary(m5)

## 
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = cratio(reverse = TRUE, 
##     parallel = TRUE))
## 
## Pearson residuals:
##                      Min        1Q   Median    3Q  Max
## logit(P[Y<2|Y<=2]) -1.79 -9.33e-01 1.22e-05 0.699 2.70
## logit(P[Y<3|Y<=3]) -2.56  3.26e-06 3.61e-01 0.520 1.19
## logit(P[Y<4|Y<=4]) -3.44  2.47e-01 3.78e-01 0.558 1.22
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept):1    0.251      0.594    0.42   0.6727   
## (Intercept):2    1.748      0.641    2.73   0.0064 **
## (Intercept):3    1.952      0.637    3.06   0.0022 **
## x1              -0.262      0.100   -2.62   0.0089 **
## x2               1.040      0.520    2.00   0.0456 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  3 
## 
## Names of linear predictors: 
## logit(P[Y<2|Y<=2]), logit(P[Y<3|Y<=3]), logit(P[Y<4|Y<=4])
## 
## Dispersion Parameter for cratio family:   1
## 
## Residual deviance: 99.28 on 115 degrees of freedom
## 
## Log-likelihood: -49.64 on 115 degrees of freedom
## 
## Number of iterations: 5

m6 <- vglm(Discapacidad ~ x1 + x2, family=cratio(reverse=TRUE, parallel=FALSE))
summary(m6)

## 
## Call:
## vglm(formula = Discapacidad ~ x1 + x2, family = cratio(reverse = TRUE, 
##     parallel = FALSE))
## 
## Pearson residuals:
##                      Min        1Q   Median    3Q  Max
## logit(P[Y<2|Y<=2]) -1.50 -9.44e-01 4.90e-06 0.808 2.61
## logit(P[Y<3|Y<=3]) -3.19  9.91e-06 3.04e-01 0.617 1.32
## logit(P[Y<4|Y<=4]) -4.04  1.96e-01 3.30e-01 0.480 1.48
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept):1    0.567      0.888    0.64   0.5230   
## (Intercept):2    1.015      0.863    1.18   0.2396   
## (Intercept):3    2.595      0.975    2.66   0.0078 **
## x1:1            -0.218      0.185   -1.17   0.2402   
## x1:2            -0.173      0.193   -0.89   0.3709   
## x1:3            -0.376      0.166   -2.26   0.0239 * 
## x2:1             0.322      0.919    0.35   0.7260   
## x2:2             1.874      1.015    1.85   0.0647 . 
## x2:3             0.947      0.868    1.09   0.2751   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  3 
## 
## Names of linear predictors: 
## logit(P[Y<2|Y<=2]), logit(P[Y<3|Y<=3]), logit(P[Y<4|Y<=4])
## 
## Dispersion Parameter for cratio family:   1
## 
## Residual deviance: 96.68 on 111 degrees of freedom
## 
## Log-likelihood: -48.34 on 111 degrees of freedom
## 
## Number of iterations: 5

AIC(m5)

## [1] 109.3

AIC(m6)

## [1] 114.7

Data Cinturon de seguridad

Condiciones.data <- read.table(file="http://tarwi.lamolina.edu.pe/~clopez/Categoricos/Condicion.txt", header=T)
attach(Condiciones.data)
Condicion <- ordered(Condicion, labels=c("y1", "y2", "y3", "y4", "y5"))

Modelo logit acumulativo

modelo1 <- vglm(Condicion ~ Genero + Localizacion + Cinturon, family=cumulative, weights=Frecuencia)
summary(modelo1)

## 
## Call:
## vglm(formula = Condicion ~ Genero + Localizacion + Cinturon, 
##     family = cumulative, weights = Frecuencia)
## 
## Pearson residuals:
##                   Min     1Q Median     3Q  Max
## logit(P[Y<=1])  -93.3 -47.47  -7.09 -0.743 25.9
## logit(P[Y<=2])  -97.7 -10.55  -1.40 17.297 57.0
## logit(P[Y<=3]) -109.8  -9.80   2.38  6.379 20.4
## logit(P[Y<=4]) -135.4   0.34   1.19  2.751 12.1
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1          1.2157     0.0265   45.91  < 2e-16 ***
## (Intercept):2          1.3332     0.0278   48.00  < 2e-16 ***
## (Intercept):3          3.1011     0.0604   51.36  < 2e-16 ***
## (Intercept):4          4.8604     0.1479   32.85  < 2e-16 ***
## GeneroMujer:1          0.5454     0.0273   20.01  < 2e-16 ***
## GeneroMujer:2          0.5774     0.0292   19.81  < 2e-16 ***
## GeneroMujer:3          0.2884     0.0671    4.30  1.7e-05 ***
## GeneroMujer:4          0.2621     0.1713    1.53     0.13    
## LocalizacionUrbano:1   0.7588     0.0270   28.14  < 2e-16 ***
## LocalizacionUrbano:2   0.8308     0.0289   28.72  < 2e-16 ***
## LocalizacionUrbano:3   1.2458     0.0706   17.65  < 2e-16 ***
## LocalizacionUrbano:4   1.6942     0.1996    8.49  < 2e-16 ***
## CinturonSi:1           0.8173     0.0276   29.56  < 2e-16 ***
## CinturonSi:2           0.8377     0.0296   28.29  < 2e-16 ***
## CinturonSi:3           1.0627     0.0723   14.71  < 2e-16 ***
## CinturonSi:4           1.1888     0.1906    6.24  4.4e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  4 
## 
## Names of linear predictors: 
## logit(P[Y<=1]), logit(P[Y<=2]), logit(P[Y<=3]), logit(P[Y<=4])
## 
## Dispersion Parameter for cumulative family:   1
## 
## Residual deviance: 50744 on 144 degrees of freedom
## 
## Log-likelihood: -25372 on 144 degrees of freedom
## 
## Number of iterations: 5

modelo2 <- vglm(Condicion ~ Genero + Localizacion + Cinturon, family=cumulative(parallel=TRUE), weights=Frecuencia)
summary(modelo2)

## 
## Call:
## vglm(formula = Condicion ~ Genero + Localizacion + Cinturon, 
##     family = cumulative(parallel = TRUE), weights = Frecuencia)
## 
## Pearson residuals:
##                   Min      1Q Median    3Q  Max
## logit(P[Y<=1]) -106.1 -46.191  -7.62 -1.09 26.3
## logit(P[Y<=2])  -98.7 -10.225  -1.41 16.94 65.9
## logit(P[Y<=3]) -102.5  -9.973   2.76  6.60 17.7
## logit(P[Y<=4]) -115.0   0.396   1.40  2.92 11.9
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1        1.2034     0.0264    45.7   <2e-16 ***
## (Intercept):2        1.3777     0.0268    51.5   <2e-16 ***
## (Intercept):3        3.2448     0.0399    81.3   <2e-16 ***
## (Intercept):4        5.1516     0.0880    58.6   <2e-16 ***
## GeneroMujer          0.5449     0.0272    20.0   <2e-16 ***
## LocalizacionUrbano   0.7732     0.0269    28.7   <2e-16 ***
## CinturonSi           0.8241     0.0276    29.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  4 
## 
## Names of linear predictors: 
## logit(P[Y<=1]), logit(P[Y<=2]), logit(P[Y<=3]), logit(P[Y<=4])
## 
## Dispersion Parameter for cumulative family:   1
## 
## Residual deviance: 50879 on 153 degrees of freedom
## 
## Log-likelihood: -25440 on 153 degrees of freedom
## 
## Number of iterations: 4

AIC(modelo1)

## [1] 50776

AIC(modelo2)

## [1] 50893