# Add packages to library
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(essurvey) # Add ESS API package.
library(ggplot2) # Allows us to create nice figures.
library(factoextra) # Allows us to visualize PCAs
library(psych) # Supports PCA & factor analysis with several useful functions.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
library(broom) # Allows us to turn parts of model objects into tibbles.
library(modelr) # Model predictions (can also do resampling).
library(manifestoR) # Add Manifesto API package.
library(lme4) # Allows mixed mulktilevel modeling
library(parameters) # Re-scale weights for mixed multilevel models.
library(merTools) # Prediction intervals for mixed multilevel models.
ESS <- ESS %>% transmute( # Create new variables and keep only those
cntry = as_factor(cntry), # Country of interview
gndr = as_factor(gndr),
facntr = as_factor(facntr), # Father born in cntry
mocntr = as_factor(mocntr), # Mother born in cntry
imbgeco = max(imbgeco, na.rm = TRUE) - zap_labels(imbgeco),
imueclt = max(imueclt, na.rm = TRUE) - zap_labels(imueclt),
imwbcnt = max(imwbcnt, na.rm = TRUE) - zap_labels(imwbcnt),
pspwght = zap_label(pspwght),
# Regigion
rlgblg = as_factor(rlgblg), # Belonging to a religion
rlgdgr = zap_labels(rlgdgr), # How religios
rlgatnd = max(rlgatnd, na.rm = TRUE) - zap_labels(rlgatnd), # Service attendance
pray = max(pray, na.rm = TRUE) - zap_labels(pray), # Prayer frequency
# Parental education (ISCED)
eiscedf = zap_labels(eiscedf),
eiscedm = zap_labels(eiscedm),
par_edu = case_when(
# If father's edu is missing but not mother's, take mother's
(eiscedf == 55 | is.na(eiscedf)) & eiscedm != 55 & !is.na(eiscedm) ~ eiscedm,
# If mother's edu is missing but not father's, take father's
(eiscedm == 55 | is.na(eiscedm)) & eiscedf != 55 & !is.na(eiscedf) ~ eiscedf,
# If both are missing, its missing
eiscedf == 55 & eiscedm == 55 ~ as.numeric(NA),
# For all others, take the average of both parents
TRUE ~ (eiscedm + eiscedf) / 2),
eduyrs = case_when( # Education
eduyrs > 21 ~ 21, # Recode to max 21 years of edu.
eduyrs < 9 ~ 9, # Recode to min 9 years of edu.
TRUE ~ zap_labels(eduyrs))) %>% # Make it numeric, then
dplyr::filter(
facntr == "Yes" & mocntr == "Yes")
# Conduct PCA of climate change concerns
(pca <- ESS %>% prcomp(
formula = ~ imbgeco + imueclt + imwbcnt +
rlgdgr + rlgatnd + pray,
data = ., na.action = na.exclude,
center = TRUE, scale = TRUE))
# Standard deviations (1, .., p=6):
# [1] 1.60 1.48 0.62 0.57 0.56 0.50
#
# Rotation (n x k) = (6 x 6):
# PC1 PC2 PC3 PC4 PC5 PC6
# imbgeco -0.46 0.33 0.1449 0.780 -0.219 0.0303
# imueclt -0.48 0.33 -0.1257 -0.380 0.162 0.6901
# imwbcnt -0.47 0.35 0.0022 -0.379 0.064 -0.7147
# rlgdgr -0.33 -0.48 0.5448 0.077 0.602 -0.0025
# rlgatnd -0.34 -0.46 -0.7844 0.185 0.140 -0.0911
# pray -0.34 -0.47 0.2259 -0.251 -0.735 0.0612
# Show importance of single components.
summary(pca)
# Importance of components:
# PC1 PC2 PC3 PC4 PC5 PC6
# Standard deviation 1.596 1.477 0.6181 0.5705 0.5594 0.4996
# Proportion of Variance 0.425 0.364 0.0637 0.0542 0.0522 0.0416
# Cumulative Proportion 0.425 0.788 0.8520 0.9062 0.9584 1.0000
# Oblique rotation
(oblique_solution <- promax(
# of first two PCs
pca$rotation[, c("PC1", "PC2")]))
# $loadings
#
# Loadings:
# PC1 PC2
# imbgeco -0.567
# imueclt -0.581
# imwbcnt -0.584
# rlgdgr -0.579
# rlgatnd -0.568
# pray -0.585
#
# PC1 PC2
# SS loadings 1.00 1.00
# Proportion Var 0.17 0.17
# Cumulative Var 0.17 0.33
#
# $rotmat
# [,1] [,2]
# [1,] 0.81 0.58
# [2,] -0.58 0.81
# Visualize the results
pca$rotation <- oblique_solution$loadings
fviz_pca_var(pca) # Visualize PCA object
# Generate new component scores for every respondent based on the
# rotated principal components and write the into object "rotated_PCs"
rotated_PCs <- ESS %>%
dplyr::select(imbgeco, imueclt, imwbcnt,
rlgdgr, rlgatnd, pray) %>%
factor.scores(x = ., f = pca$rotation)
# Add a "xeno" and "relig" from "rotated_PCs" to our data.
ESS[, c("xeno", "relig")] <- rotated_PCs$scores
# Turn xenophobia variable around
ESS$xeno <- max(ESS$xeno, na.rm = TRUE) - ESS$xeno
# Make histogram of religiosity
ggplot(data = ESS, aes(y = xeno, x = relig)) +
geom_point(alpha = 0.1, aes(size = pspwght)) +
geom_smooth(aes(weight = pspwght)) +
geom_smooth(method = "lm", color = "red", se = FALSE, aes(weight = pspwght)) +
labs(y = "Xenophobia", x = "Religiosity") +
theme_minimal()
cntry
, make a design matrix, and re-scale the weight for multilevel modeling.ESS <- ESS %>%
dplyr::select(xeno, relig, cntry, gndr, eduyrs, par_edu, pspwght) %>%
mutate(cntry = fct_drop(cntry),
gndr = fct_drop(gndr)) %>%
drop_na()
# Rescale the post-stratification weight for multilevel modeling #<<
ESS <- rescale_weights(data = ESS, group = "cntry", probability_weights = "pspwght") #<<
(n <- nrow(ESS))
# [1] 35592
That design matrix has observations left.
eduyrs
, par_edu
, and gndr
as control variables. Use a mixed effects multilevel model. What do you learn about religiosity and xenophobia? model_1 <- lmer(xeno ~ relig + eduyrs + par_edu + gndr + (1 | cntry), weights = pweights_a, data = ESS)
screenreg(model_1, include.ci = FALSE)
#
# =====================================
# Model 1
# -------------------------------------
# (Intercept) 3.06 ***
# (0.07)
# relig 0.05 ***
# (0.01)
# eduyrs -0.05 ***
# (0.00)
# par_edu -0.07 ***
# (0.00)
# gndrFemale -0.01
# (0.01)
# -------------------------------------
# AIC 97742.56
# BIC 97801.92
# Log Likelihood -48864.28
# Num. obs. 35592
# Num. groups: cntry 29
# Var: cntry (Intercept) 0.13
# Var: Residual 0.80
# =====================================
# *** p < 0.001; ** p < 0.01; * p < 0.05
(synth_data <- ESS %>% # Use ESS_minority, then
# reduce complexity to speed up data_grid(), then
mutate(
# religiosity & cntry: leave as is, we want to vary it
par_edu = mean(par_edu, na.rm = TRUE),
eduyrs = mean(eduyrs, na.rm = TRUE),
gndr = "Female") %>%
# Create synthetic data, then
data_grid(relig, par_edu, eduyrs, gndr, cntry) %>%
unique() %>% # Drop duplicates, then
mutate( # Get the predictions
fit = predict(
model_1,
newdata = .,
# Predictions by random effect (i.e., cntry)
re.form = NULL)))
# # A tibble: 572,576 × 6
# relig par_edu eduyrs gndr cntry fit
# <dbl> <dbl> <dbl> <chr> <fct> <dbl>
# 1 -2.55 2.83 13.3 Female Austria 2.15
# 2 -2.55 2.83 13.3 Female Belgium 2.02
# 3 -2.55 2.83 13.3 Female Bulgaria 2.46
# 4 -2.55 2.83 13.3 Female Switzerland 1.72
# 5 -2.55 2.83 13.3 Female Cyprus 2.44
# 6 -2.55 2.83 13.3 Female Czechia 2.66
# 7 -2.55 2.83 13.3 Female Germany 1.90
# 8 -2.55 2.83 13.3 Female Denmark 1.97
# 9 -2.55 2.83 13.3 Female Estonia 2.23
# 10 -2.55 2.83 13.3 Female Spain 1.72
# # … with 572,566 more rows
ggplot(data = synth_data,
aes(y = fit, x = relig)) +
geom_line(aes(color = cntry)) +
labs(x = "Religiosity",
y = "Xenophobia",
caption = "(Covariates set to: Women at mean (parental) education") +
theme_minimal() +
theme(legend.position="bottom",
legend.title=element_blank())
rile
. Does the average left-right orientation of political parties in a country predict the population's average level of xenophobia? mp_setapikey("manifesto_apikey.txt")
(ESS <- mp_maindataset() %>% # Read Manifesto data, then
dplyr::select(countryname, edate, rile) %>% # Select vars, then
mutate(cntry = countryname) %>% # Rename cntry key
dplyr::group_by(cntry) %>% # Group the data by country, then
dplyr::filter(edate == max(edate)) %>% # Use only the most recent pre-2016 manifestos from each country, then
dplyr::summarize( # Calculate the average orientations of parties, then
party_rile = mean(rile, na.rm = TRUE)
) %>% # drop countries with NA on predictors.
drop_na() %>%
inner_join(ESS, ., by = "cntry") %>%
as_tibble())
# Connecting to Manifesto Project DB API...
# Connecting to Manifesto Project DB API... corpus version: 2021-1
# # A tibble: 33,777 × 10
# xeno relig cntry gndr eduyrs par_edu pspwght pweights_a pweights_b party_rile
# <dbl> <dbl> <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2.96 1.34 Austria Male 12 3 0.218 0.226 0.129 -13.3
# 2 2.42 0.512 Austria Male 12 2.5 0.413 0.428 0.245 -13.3
# 3 1.99 -0.145 Austria Female 12 3 2.27 2.35 1.35 -13.3
# 4 2.31 0.759 Austria Male 11 2 0.386 0.400 0.229 -13.3
# 5 2.50 -1.71 Austria Female 9 2 1.03 1.07 0.612 -13.3
# 6 1.83 -0.462 Austria Male 13 2 0.576 0.595 0.341 -13.3
# 7 2.23 -0.945 Austria Male 12 2.5 0.721 0.745 0.427 -13.3
# 8 4.48 0.379 Austria Male 12 2 1.77 1.83 1.05 -13.3
# 9 1.73 0.352 Austria Female 12 2.5 0.743 0.768 0.441 -13.3
# 10 0.126 1.21 Austria Male 12 3 0.160 0.165 0.0949 -13.3
# # … with 33,767 more rows
model_2 <- lmer(xeno ~ relig + eduyrs + par_edu + gndr + party_rile + (1 | cntry), weights = pweights_a, data = ESS)
screenreg(list(model_1, model_2), include.ci = FALSE)
#
# ====================================================
# Model 1 Model 2
# ----------------------------------------------------
# (Intercept) 3.06 *** 3.21 ***
# (0.07) (0.07)
# relig 0.05 *** 0.05 ***
# (0.01) (0.01)
# eduyrs -0.05 *** -0.05 ***
# (0.00) (0.00)
# par_edu -0.07 *** -0.07 ***
# (0.00) (0.00)
# gndrFemale -0.01 -0.02 *
# (0.01) (0.01)
# party_rile 0.02 ***
# (0.01)
# ----------------------------------------------------
# AIC 97742.56 92974.73
# BIC 97801.92 93042.15
# Log Likelihood -48864.28 -46479.37
# Num. obs. 35592 33777
# Num. groups: cntry 29 28
# Var: cntry (Intercept) 0.13 0.08
# Var: Residual 0.80 0.81
# ====================================================
# *** p < 0.001; ** p < 0.01; * p < 0.05
predict()
!# Using predict() and no uncertainty interval
(synth_data <- ESS %>% # Use ESS_minority, then
# reduce complexity to speed up data_grid(), then
mutate(
# party_rile & cntry: leave as is, we want to vary it
relig = mean(relig, na.rm = TRUE),
par_edu = mean(par_edu, na.rm = TRUE),
eduyrs = mean(eduyrs, na.rm = TRUE),
gndr = "Female") %>%
# Create synthetic data, then
data_grid(relig, par_edu, eduyrs, gndr, cntry, party_rile) %>%
unique() %>% # Drop duplicates, then
mutate( # Get the predictions
fit = predict(
model_2,
newdata = .,
# Predictions NOT by random effect (i.e., cntry)
re.form = ~0)))
# # A tibble: 784 × 7
# relig par_edu eduyrs gndr cntry party_rile fit
# <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
# 1 -0.0244 2.79 13.3 Female Austria -26.0 1.77
# 2 -0.0244 2.79 13.3 Female Austria -23.3 1.82
# 3 -0.0244 2.79 13.3 Female Austria -21.5 1.86
# 4 -0.0244 2.79 13.3 Female Austria -17.8 1.94
# 5 -0.0244 2.79 13.3 Female Austria -17.0 1.96
# 6 -0.0244 2.79 13.3 Female Austria -15.4 1.99
# 7 -0.0244 2.79 13.3 Female Austria -14.8 2.00
# 8 -0.0244 2.79 13.3 Female Austria -13.3 2.03
# 9 -0.0244 2.79 13.3 Female Austria -11.6 2.07
# 10 -0.0244 2.79 13.3 Female Austria -11.3 2.07
# # … with 774 more rows
ggplot(data = synth_data,
aes(y = fit, x = party_rile)) +
geom_line() +
labs(x = "Average left-right orientation of political parties",
y = "Xenophobia",
caption = "(Covariates set to: Women at mean (parental) education and religiosity)") +
theme_minimal() +
theme(legend.position="bottom",
legend.title=element_blank())
predictInterval()
to also get an uncertainty interval of say 50%.# Using predictInterval() and a 50% uncertainty interval
(synth_data <- ESS %>% # Use ESS_minority, then
# reduce complexity to speed up data_grid(), then
mutate(
# party_rile & cntry: leave as is, we want to vary it
relig = mean(relig, na.rm = TRUE),
par_edu = mean(par_edu, na.rm = TRUE),
eduyrs = mean(eduyrs, na.rm = TRUE),
gndr = "Female") %>%
# Create synthetic data, then
data_grid(relig, par_edu, eduyrs, gndr, cntry, party_rile) %>%
unique()) # Drop duplicates
# # A tibble: 784 × 6
# relig par_edu eduyrs gndr cntry party_rile
# <dbl> <dbl> <dbl> <chr> <chr> <dbl>
# 1 -0.0244 2.79 13.3 Female Austria -26.0
# 2 -0.0244 2.79 13.3 Female Austria -23.3
# 3 -0.0244 2.79 13.3 Female Austria -21.5
# 4 -0.0244 2.79 13.3 Female Austria -17.8
# 5 -0.0244 2.79 13.3 Female Austria -17.0
# 6 -0.0244 2.79 13.3 Female Austria -15.4
# 7 -0.0244 2.79 13.3 Female Austria -14.8
# 8 -0.0244 2.79 13.3 Female Austria -13.3
# 9 -0.0244 2.79 13.3 Female Austria -11.6
# 10 -0.0244 2.79 13.3 Female Austria -11.3
# # … with 774 more rows
(synth_data <- predictInterval(
merMod = model_2, newdata = synth_data,
which = "fixed", level = 0.5, n.sims = 10000,
include.resid.var = TRUE) %>%
as_tibble() %>% # Turn into a tibble, then
bind_cols(synth_data, .)) # Add to the synthetic data.
# # A tibble: 784 × 9
# relig par_edu eduyrs gndr cntry party_rile fit upr lwr
# <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 -0.0244 2.79 13.3 Female Austria -26.0 1.78 2.37 1.17
# 2 -0.0244 2.79 13.3 Female Austria -23.3 1.85 2.43 1.21
# 3 -0.0244 2.79 13.3 Female Austria -21.5 1.87 2.48 1.26
# 4 -0.0244 2.79 13.3 Female Austria -17.8 1.95 2.55 1.32
# 5 -0.0244 2.79 13.3 Female Austria -17.0 1.94 2.56 1.33
# 6 -0.0244 2.79 13.3 Female Austria -15.4 1.99 2.60 1.38
# 7 -0.0244 2.79 13.3 Female Austria -14.8 2.01 2.63 1.42
# 8 -0.0244 2.79 13.3 Female Austria -13.3 2.04 2.64 1.44
# 9 -0.0244 2.79 13.3 Female Austria -11.6 2.05 2.65 1.45
# 10 -0.0244 2.79 13.3 Female Austria -11.3 2.06 2.68 1.47
# # … with 774 more rows
ggplot(data = synth_data,
aes(y = fit, x = party_rile)) +
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.5) +
geom_line() +
labs(x = "Average left-right orientation of political parties",
y = "Xenophobia",
caption = "(Covariates set to: Women at mean education and age)") +
theme_minimal() +
theme(legend.position="bottom",
legend.title=element_blank())