The dataset below contains data by state, including population, area in square miles, percent urban population, percent below poverty line, whether there are gun registration laws or not, and the number of homicides. The socioeconomic data are from 1990/91. The gun registration indicator is taken from a USA Today article (Tuesday, January 7, 1992, PAGE 5A).1

Variables: Socioeconomic data from 1990/1991

Research question:

Do gun registration laws affect the rate of homicides in a state?

Read in data

"GunReg" <- 
structure(.Data = list(
"pop" = c(4089, 2372, 30380, 3291, 598, 13277, 1135, 2795, 11543, 5996, 
   4860, 9368, 4432, 5158, 6737, 635, 7760, 18058, 10939, 11961, 1004,
   3560, 4953, 17349, 1770, 5018, 570, 3750, 3377, 680, 6623,
   1039, 5610, 2495, 3713, 4252, 1235, 2592, 808, 1593, 1105,
   1548, 1284, 3175, 2922, 703, 6286, 567, 4955, 1801, 460.), 
"area" = c(52.4, 53.2, 163.7, 5.5, 0.1, 65.8, 10.9, 56.3, 57.9, 10.6, 
   12.4, 96.8, 86.9, 69.7, 53.8, 70.7, 8.7, 54.5, 44.8, 46.1, 1.5, 32, 
   42.1, 268.6, 84.9, 71.3, 656.4, 114, 104.1, 2.5, 59.4, 83.6, 36.4, 
   82.3, 40.4, 51.8, 35.4, 48.4, 147, 77.4, 9.4, 121.6, 110.6, 69.9, 
   98.4, 77.1, 42.8, 9.6, 65.5, 24.2, 97.8), 
"urban" = c(60, 54, 93, 79, 100, 85, 89, 61, 85, 84, 81, 70, 71, 53, 
   50, 53, 89, 84, 74, 69, 86, 55, 61, 80, 87, 76, 68, 88, 82,
   73, 63, 57, 65, 69, 52, 68, 45, 47, 53, 66, 51, 73, 88,
   68, 71, 50, 69, 32, 66, 36, 65.), 
"poverty" = c(19, 18.4, 14.2, 5.8, 19.2, 14.1, 10, 10.1, 13.3, 10.2, 
   9.3, 13.9, 12, 13.6, 13.2, 13.5, 9, 14.1, 11.8, 10.8, 8.2, 16.5, 
   16.9, 16.8, 9.8, 26.2, 11.2, 14.2, 12.1, 8.1, 16, 13.7, 14.1, 11.1, 
   17.4, 22, 12.5, 23.8, 15.8, 10.9, 7.1, 20.9, 10.7, 15.8, 11.3, 13.5, 
   10.6, 7.1, 9.2, 17.2, 10.6), 
"gunreg" = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.), 
"homicides" = c(410, 240, 3710, 170, 489, 1300, 44, 62, 1270, 200, 540, 
   1020, 100, 550, 730, 11, 350, 2550, 760, 740, 38, 350, 470, 2660,
   43, 220, 56, 290, 155, 32, 720, 21, 380, 150, 260, 760,
   23, 370, 29, 43, 32, 160, 135, 220, 120, 9, 550, 24, 240,
   135, 20.)), 
names = c("pop", "area", "urban", "poverty", "gunreg", "homicides"), 
row.names = c("AL", "AR", "CA", "CT", "DC", "FL", "HI", "IA", "IL", "MA", 
   "MD", "MI", "MN", "MO", "NC", "ND", "NJ", "NY", "OH", "PA", "RI", "SC", 
   "TN", "TX", "UT", "WA", "AK", "AZ", "CO", "DE", "GA", "ID", "IN", "KS", 
   "KY", "LA", "ME", "MS", "MT", "NE", "NH", "NM", "NV", "OK", "OR", "SD", 
   "VA", "VT", "WI", "WV", "WY"), class = "data.frame")

Exploratory Data Analysis

Create a “population density” variable

GunReg <- GunReg %>% mutate(pop_density = (pop*1000)/(area*1000))

Scatterplot matrix

plot(GunReg)

scatterplotMatrix(GunReg)

Create homicide rate variable:

GunReg <- GunReg %>% mutate(homicide_rate = homicides/pop)
GunReg <- data.frame(state = row.names(GunReg), GunReg)

Correlations

cor(GunReg[,c(2:8)])
##                     pop        area       urban      poverty       gunreg
## pop          1.00000000  0.09445559  0.38102617  0.080474205  0.433288255
## area         0.09445559  1.00000000  0.06176086  0.061633537 -0.169263080
## urban        0.38102617  0.06176086  1.00000000 -0.138967877  0.381711182
## poverty      0.08047421  0.06163354 -0.13896788  1.000000000 -0.002147141
## gunreg       0.43328825 -0.16926308  0.38171118 -0.002147141  1.000000000
## homicides    0.94933164  0.15569769  0.35523752  0.176080637  0.375505900
## pop_density -0.05754515 -0.17864052  0.37838034  0.104020415  0.230611294
##             homicides pop_density
## pop         0.9493316 -0.05754515
## area        0.1556977 -0.17864052
## urban       0.3552375  0.37838034
## poverty     0.1760806  0.10402042
## gunreg      0.3755059  0.23061129
## homicides   1.0000000  0.03171880
## pop_density 0.0317188  1.00000000
GunReg %>% ggplot(aes(x = poverty, y = homicide_rate)) +
  geom_point()

plot(GunReg$poverty, GunReg$homicide_rate,
     xlab = "Percent poverty", ylab = "Homicide rate per 1000 people")

# Use code below to interactively identify outliers:
# identify(GunReg$poverty, GunReg$homicide_rate)
boxplot(GunReg$homicide_rate ~ GunReg$gunreg,
        xlab = "Gun Registration Law (0 = No)",
        ylab = "Homicide Rate per 1,000 people")

Fitting Poisson regression with rates

mod <- glm(homicides ~ gunreg, family = poisson, 
           data = GunReg, offset = log(pop))
summary(mod)
## 
## Call:
## glm(formula = homicides ~ gunreg, family = poisson, data = GunReg, 
##     offset = log(pop))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -19.827   -8.012   -2.853    2.114   34.513  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.54925    0.01424 -179.07   <2e-16 ***
## gunreg       0.25316    0.01598   15.85   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 5742.1  on 50  degrees of freedom
## Residual deviance: 5478.5  on 49  degrees of freedom
## AIC: 5843.6
## 
## Number of Fisher Scoring iterations: 4
# Incorrect model since no offset is included
mod.wrong <- glm(homicides ~ gunreg, family = poisson, data = GunReg)
summary(mod.wrong)
## 
## Call:
## glm(formula = homicides ~ gunreg, family = poisson, data = GunReg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -36.732  -15.944   -7.440    3.592   78.027  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  5.28503    0.01424  371.24   <2e-16 ***
## gunreg       1.31049    0.01598   82.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 35577  on 50  degrees of freedom
## Residual deviance: 27272  on 49  degrees of freedom
## AIC: 27637
## 
## Number of Fisher Scoring iterations: 5

Something strange…

GunReg$x <- rnorm(51)
mod <- glm(homicides ~ gunreg + x, family = poisson, 
           data = GunReg, offset = log(pop))
summary(mod)
## 
## Call:
## glm(formula = homicides ~ gunreg + x, family = poisson, data = GunReg, 
##     offset = log(pop))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -19.851   -7.986   -2.861    2.088   34.520  
## 
## Coefficients:
##              Estimate Std. Error  z value Pr(>|z|)    
## (Intercept) -2.550845   0.014326 -178.059   <2e-16 ***
## gunreg       0.253389   0.015977   15.859   <2e-16 ***
## x           -0.005565   0.005533   -1.006    0.315    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 5742.1  on 50  degrees of freedom
## Residual deviance: 5477.5  on 48  degrees of freedom
## AIC: 5844.6
## 
## Number of Fisher Scoring iterations: 4

Quasi-Poisson model

mod_q <- glm(homicides ~ gunreg, family = quasipoisson, 
           data = GunReg, offset = log(pop))
summary(mod_q)
## 
## Call:
## glm(formula = homicides ~ gunreg, family = quasipoisson, data = GunReg, 
##     offset = log(pop))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -19.827   -8.012   -2.853    2.114   34.513  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.5493     0.1695 -15.039   <2e-16 ***
## gunreg        0.2532     0.1902   1.331    0.189    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 141.7833)
## 
##     Null deviance: 5742.1  on 50  degrees of freedom
## Residual deviance: 5478.5  on 49  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4

Check calculations:

pres <- residuals(mod, type = "pearson")
phihat <- sum(pres^2)/(51 - 2)
orig_se <- summary(mod)$coefficients[,2]
corrected_se <- orig_se*sqrt(phihat)

Model comparison tests:

mod0 <- glm(homicides ~ gunreg, family = poisson, 
           data = GunReg, offset = log(pop))
mod0_q <- glm(homicides ~ gunreg, family = quasipoisson, 
           data = GunReg, offset = log(pop))
mod1 <- glm(homicides ~ gunreg + poverty + urban, family = poisson, 
           data = GunReg, offset = log(pop))
mod1_q <- glm(homicides ~ gunreg + poverty + urban, family = quasipoisson, 
           data = GunReg, offset = log(pop))

anova(mod0, mod1, test = "LRT")
## Analysis of Deviance Table
## 
## Model 1: homicides ~ gunreg
## Model 2: homicides ~ gunreg + poverty + urban
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        49     5478.5                          
## 2        47     3919.4  2   1559.2 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod0_q, mod1_q, test = "LRT")
## Analysis of Deviance Table
## 
## Model 1: homicides ~ gunreg
## Model 2: homicides ~ gunreg + poverty + urban
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        49     5478.5                          
## 2        47     3919.4  2   1559.2 0.0001154 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Same deviance, but different p-values!

dev <- anova(mod0, mod1, test = "LRT")$Deviance[2]
# p-values
pchisq(dev, df = 2, lower.tail=FALSE) # p-value in original LRT
## [1] 0
pchisq(dev/phihat, df = 2, lower.tail=FALSE) # corrected p-value...?
## [1] 0.004117905

Other way to deal with overdispersion: negative binomial regression

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
mod_nb <- glm.nb(homicides ~ gunreg + poverty + urban + offset(log(pop)), 
           data = GunReg)
summary(mod_nb)
## 
## Call:
## glm.nb(formula = homicides ~ gunreg + poverty + urban + offset(log(pop)), 
##     data = GunReg, init.theta = 3.418144423, link = log)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.78734  -0.84712  -0.06245   0.28497   2.73334  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.262017   0.478178 -11.004  < 2e-16 ***
## gunreg       0.158352   0.166324   0.952  0.34106    
## poverty      0.100595   0.018321   5.491 4.01e-08 ***
## urban        0.017749   0.005608   3.165  0.00155 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.4181) family taken to be 1)
## 
##     Null deviance: 106.179  on 50  degrees of freedom
## Residual deviance:  53.877  on 47  degrees of freedom
## AIC: 634.09
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.418 
##           Std. Err.:  0.677 
## 
##  2 x log-likelihood:  -624.088

  1. Original source: http://people.reed.edu/~jones/141/Guns.html↩︎