# Add packages to library
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(essurvey) # Add ESS API package.
library(ggplot2) # Allows us to create nice figures.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
library(broom) # Allows us to turn parts of model objects into tibbles.
library(modelr) # Model predictions (can also do resampling).
library(lmtest) # Allows us to estimate (cluster-)robust standard errors for GLMs.
library(sandwich) # Allows us to estimate (cluster-)robust standard errors for GLMs.
library(margins) # Averaged marginal effects
ESS <- ESS %>% transmute( # Create new variables and keep only those
cntry = as_factor(cntry), # Country of interview
gndr = as_factor(gndr),
facntr = as_factor(facntr), # Father born in cntry
mocntr = as_factor(mocntr), # Mother born in cntry
dscrgrp = as_factor(dscrgrp), # Belonging to discriminated group #<<
dscrrlg = as_factor(dscrrlg), # Discriminated because of religion #<<
dscrrce = as_factor(dscrrce), # Discriminated because of race #<<
dscrlng = as_factor(dscrlng), # Discriminated because of language #<<
dscretn = as_factor(dscretn), # Discriminated because of ethnicity #<<
discr = case_when( #<<
dscrgrp == "Yes" & (dscrrlg == "Marked" | dscrrce == "Marked" | #<<
dscrlng == "Marked" | dscretn == "Marked") ~ "Yes", #<<
dscrgrp == "No" | (dscrgrp == "Yes" & dscrrlg != "Marked" & dscrrce != "Marked" #<<
& dscrlng != "Marked" & dscretn != "Marked") ~ "No", #<<
TRUE ~ as.character(NA)) %>% as_factor(), # all others missing, then change to factor #<<
pspwght = zap_label(pspwght),
# Regigion
rlgblg = as_factor(rlgblg), # Belonging to a religion
rlgdgr = zap_labels(rlgdgr), # How religios
rlgatnd = max(rlgatnd, na.rm = TRUE) - zap_labels(rlgatnd), # Service attendance
pray = max(pray, na.rm = TRUE) - zap_labels(pray), # Prayer frequency
agea = zap_label(agea),
eduyrs = case_when( # Education
eduyrs > 21 ~ 21, # Recode to max 21 years of edu.
eduyrs < 9 ~ 9, # Recode to min 9 years of edu.
TRUE ~ zap_labels(eduyrs))) %>% # make it numeric, then
dplyr::filter(# Keep only respondents of immigrant origin,
facntr == "No" & mocntr == "No" &
# filter cases from four countries, then
(cntry == "Denmark" | cntry == "Sweden" | cntry == "Norway" | cntry == "Germany"))
# Conduct PCA of climate change concerns
(pca <- ESS %>% prcomp(
formula = ~ rlgdgr + rlgatnd + pray,
data = ., na.action = na.exclude,
center = TRUE, scale = TRUE))
# Standard deviations (1, .., p=3):
# [1] 1.49 0.69 0.54
#
# Rotation (n x k) = (3 x 3):
# PC1 PC2 PC3
# rlgdgr -0.59 0.37 0.714
# rlgatnd -0.55 -0.84 -0.017
# pray -0.59 0.40 -0.700
# Show importance of single components.
summary(pca)
# Importance of components:
# PC1 PC2 PC3
# Standard deviation 1.494 0.686 0.5435
# Proportion of Variance 0.744 0.157 0.0985
# Cumulative Proportion 0.744 0.902 1.0000
# Add new religiosity variable to ESS; beware to turn it around using "-"
ESS$relig <- -pca$x[, "PC1"] %>% scale() %>% as.numeric()
# Make histogram of religiosity
ggplot(data = ESS, aes(x = relig)) +
geom_histogram() +
labs(y = "Count", x = "Religiosity") +
theme_minimal()
cntry
, and make a design matrix like I did in my slides (i.e., keep only discr
, relig
, cntry
, agea
, gndr
, eduyrs
, pspwght
, and apply casewise deletion).ESS <- ESS %>%
dplyr::select(discr, relig, cntry, agea, gndr, eduyrs, pspwght) %>%
mutate( # Drop unused factor levels
cntry = fct_drop(cntry),
gndr = fct_drop(gndr)) %>%
drop_na()
(n <- nrow(ESS))
# [1] 818
That design matrix has observations left.
lpm <- lm_robust(as.numeric(discr) - 1 ~ relig, weights = pspwght, data = ESS)
log_model <- glm(discr ~ relig, weights = pspwght, data = ESS, family = binomial(link="logit"))
log_model_robSE <- coeftest(log_model, sandwich::vcovHC(log_model))
# Summary of LPMs, plug in SEs by hand
screenreg(list(lpm, log_model_robSE), include.ci = FALSE, custom.model.names = c("LPM", "Logistic"))
#
# ==================================
# LPM Logistic
# ----------------------------------
# (Intercept) 0.14 *** -1.81 ***
# (0.01) (0.11)
# relig 0.02 0.16
# (0.01) (0.11)
# ----------------------------------
# R^2 0.00
# Adj. R^2 0.00
# Num. obs. 818
# RMSE 0.37
# ==================================
# *** p < 0.001; ** p < 0.01; * p < 0.05
cntry
, agea
, and gndr
as control variables to your logistic regression, what happens to our two religiosity estimates? log_model2 <- glm(discr ~ relig + agea + gndr + cntry, weights = pspwght, data = ESS, family = binomial(link="logit"))
log_model2_robSE <- coeftest(log_model2, sandwich::vcovHC(log_model2))
# Summary
screenreg(list(log_model_robSE, log_model2_robSE), include.ci = FALSE)
#
# ==================================
# Model 1 Model 2
# ----------------------------------
# (Intercept) -1.81 *** -0.76 *
# (0.11) (0.30)
# relig 0.16 0.14
# (0.11) (0.12)
# agea -0.03 ***
# (0.01)
# gndrFemale -0.05
# (0.24)
# cntryDenmark 0.32
# (0.37)
# cntryNorway 0.09
# (0.32)
# cntrySweden 0.11
# (0.29)
# ==================================
# *** p < 0.001; ** p < 0.01; * p < 0.05
AME_log_model2 <- margins(log_model2, vcov = sandwich::vcovHC(log_model2))
lpm2 <- lm_robust(as.numeric(discr) - 1 ~ relig + agea + gndr + cntry, weights = pspwght, data = ESS)
# Summary of LPMs versus AMEs
screenreg(list(lpm2, AME_log_model2), include.ci = FALSE, custom.model.names = c("LPM", "AME"))
#
# ========================================
# LPM AME
# ----------------------------------------
# (Intercept) 0.26 ***
# (0.04)
# relig 0.02 0.02
# (0.01) (0.01)
# agea -0.00 *** -0.00 ***
# (0.00) (0.00)
# gndrFemale -0.01 -0.01
# (0.03) (0.03)
# cntryDenmark 0.04 0.04
# (0.05) (0.05)
# cntryNorway 0.01 0.01
# (0.04) (0.04)
# cntrySweden 0.02 0.01
# (0.03) (0.03)
# ----------------------------------------
# R^2 0.03
# Adj. R^2 0.02
# Num. obs. 818
# RMSE 0.37
# Deviance (Null) 753.78
# df.null 817
# Log Likelihood -368.58
# AIC 751.16
# BIC 784.11
# Deviance 724.80
# DF Resid. 811
# nobs 818
# ========================================
# *** p < 0.001; ** p < 0.01; * p < 0.05
log_model3 <- glm(discr ~ relig*cntry + agea + gndr, weights = pspwght, data = ESS, family = binomial(link="logit")) %>%
coeftest(., sandwich::vcovHC(.))
# Summary of LPMs, plug in SEs by hand
screenreg(list(log_model, log_model2, log_model3),
include.ci = FALSE)
#
# =======================================================
# Model 1 Model 2 Model 3
# -------------------------------------------------------
# (Intercept) -1.81 *** -0.76 ** -0.83 *
# (0.10) (0.29) (0.32)
# relig 0.16 0.14 0.47 *
# (0.09) (0.09) (0.20)
# agea -0.03 *** -0.03 ***
# (0.01) (0.01)
# gndrFemale -0.05 -0.05
# (0.19) (0.24)
# cntryDenmark 0.32 0.45
# (0.32) (0.38)
# cntryNorway 0.09 0.21
# (0.28) (0.33)
# cntrySweden 0.11 0.24
# (0.23) (0.29)
# relig:cntryDenmark -0.37
# (0.34)
# relig:cntryNorway -0.73 *
# (0.36)
# relig:cntrySweden -0.53
# (0.28)
# -------------------------------------------------------
# AIC 767.24 751.16
# BIC 776.66 784.11
# Log Likelihood -381.62 -368.58
# Deviance 750.69 724.80
# Num. obs. 818 818
# =======================================================
# *** p < 0.001; ** p < 0.01; * p < 0.05
log_model3 <- glm(discr ~ relig*cntry + agea + gndr, weights = pspwght, data = ESS, family = binomial(link="logit"))
synth_data <- ESS %>% # Use ESS_minority, then
# Create synthetic data, then
data_grid(agea, gndr, cntry, relig) %>%
mutate( # Set confounders to a constant value, then
agea = mean(ESS$agea, na.rm = TRUE),
gndr = "Female") %>%
unique() %>% # Drop duplicates, then
mutate( # Get the predictions
fit = predict(log_model3,
type = "response",
newdata = .),
fit_se = predict(log_model3,
type = "response",
newdata = .,
se.fit = TRUE)$se.fit,
min95 = fit - fit_se*qt(0.975, df = nrow(ESS)),
max95 = fit + fit_se*qt(0.975, df = nrow(ESS)))
ggplot(data = synth_data,
aes(y = fit, x = relig)) +
geom_ribbon(aes(ymin = min95, ymax = max95,
fill = cntry), alpha = 1/10) +
geom_line(aes(color = cntry)) +
labs(x = "Religiosity",
y = "Probability of belonging to a group \n discriminated based on \n race, ethnicity, language, or religion",
caption = "(Covariates set to: Women at mean age)") +
theme_minimal() +
theme(legend.position="bottom",
legend.title=element_blank())
ggplot(data = synth_data %>% dplyr::filter(cntry != "Denmark" & cntry != "Sweden"),
aes(y = fit, x = relig)) +
geom_ribbon(aes(ymin = min95, ymax = max95,
fill = cntry), alpha = 1/10) +
geom_line(aes(color = cntry)) +
labs(x = "Religiosity",
y = "Probability of belonging to a group \n discriminated based on \n race, ethnicity, language, or religion",
caption = "(Covariates set to: Women at mean age)") +
theme_minimal() +
theme(legend.position="bottom",
legend.title=element_blank())