1. Use the set up on my slides. Add a religiosity and a xenophobia variable using PCA. Z-standardize religiosity and visualize their association in a scatter plot.
# Add packages to library
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(essurvey) # Add ESS API package.
library(ggplot2) # Allows us to create nice figures.
library(factoextra) # Allows us to visualize PCAs
library(psych) # Supports PCA & factor analysis with several useful functions.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
library(broom) # Allows us to turn parts of model objects into tibbles.
library(modelr) # Model predictions (can also do resampling).
library(manifestoR) # Add Manifesto API package.
library(lme4) # Allows mixed mulktilevel modeling
library(parameters) # Re-scale weights for mixed multilevel models.
library(merTools) # Prediction intervals for mixed multilevel models.
ESS <- ESS %>% transmute( # Create new variables and keep only those
    cntry = as_factor(cntry), # Country of interview
    gndr = as_factor(gndr),
    facntr = as_factor(facntr), # Father born in cntry
    mocntr = as_factor(mocntr), # Mother born in cntry
    imbgeco = max(imbgeco, na.rm = TRUE) - zap_labels(imbgeco),
    imueclt = max(imueclt, na.rm = TRUE) - zap_labels(imueclt),
    imwbcnt = max(imwbcnt, na.rm = TRUE) - zap_labels(imwbcnt),
    pspwght = zap_label(pspwght),
    # Regigion
    rlgblg = as_factor(rlgblg), # Belonging to a religion
    rlgdgr = zap_labels(rlgdgr), # How religios
    rlgatnd = max(rlgatnd, na.rm = TRUE) - zap_labels(rlgatnd), # Service attendance
    pray = max(pray, na.rm = TRUE) - zap_labels(pray), # Prayer frequency
    # Parental education (ISCED)
    eiscedf = zap_labels(eiscedf),
    eiscedm = zap_labels(eiscedm),
    par_edu = case_when(
      # If father's edu is missing but not mother's, take mother's
      (eiscedf == 55 | is.na(eiscedf)) & eiscedm != 55 & !is.na(eiscedm) ~ eiscedm,
      # If mother's edu is missing but not father's, take father's
      (eiscedm == 55 | is.na(eiscedm)) & eiscedf != 55 & !is.na(eiscedf)  ~ eiscedf,
      # If both are missing, its missing
      eiscedf == 55 & eiscedm == 55 ~ as.numeric(NA),
      # For all others, take the average of both parents
      TRUE ~ (eiscedm + eiscedf) / 2),
    eduyrs = case_when( # Education
      eduyrs > 21 ~ 21, # Recode to max 21 years of edu.
      eduyrs < 9 ~ 9, # Recode to min 9 years of edu.
      TRUE ~ zap_labels(eduyrs))) %>% # Make it numeric, then
  dplyr::filter(
    facntr == "Yes" & mocntr == "Yes")

# Conduct PCA of climate change concerns
(pca <- ESS %>% prcomp(
  formula = ~ imbgeco + imueclt + imwbcnt + 
    rlgdgr + rlgatnd + pray,
  data = ., na.action = na.exclude,
  center = TRUE, scale = TRUE))
# Standard deviations (1, .., p=6):
# [1] 1.60 1.48 0.62 0.57 0.56 0.50
# 
# Rotation (n x k) = (6 x 6):
#           PC1   PC2     PC3    PC4    PC5     PC6
# imbgeco -0.46  0.33  0.1449  0.780 -0.219  0.0303
# imueclt -0.48  0.33 -0.1257 -0.380  0.162  0.6901
# imwbcnt -0.47  0.35  0.0022 -0.379  0.064 -0.7147
# rlgdgr  -0.33 -0.48  0.5448  0.077  0.602 -0.0025
# rlgatnd -0.34 -0.46 -0.7844  0.185  0.140 -0.0911
# pray    -0.34 -0.47  0.2259 -0.251 -0.735  0.0612
# Show importance of single components.
summary(pca) 
# Importance of components:
#                          PC1   PC2    PC3    PC4    PC5    PC6
# Standard deviation     1.596 1.477 0.6181 0.5705 0.5594 0.4996
# Proportion of Variance 0.425 0.364 0.0637 0.0542 0.0522 0.0416
# Cumulative Proportion  0.425 0.788 0.8520 0.9062 0.9584 1.0000
# Oblique rotation
(oblique_solution <- promax(
  # of first two PCs
  pca$rotation[, c("PC1", "PC2")]))
# $loadings
# 
# Loadings:
#         PC1    PC2   
# imbgeco -0.567       
# imueclt -0.581       
# imwbcnt -0.584       
# rlgdgr         -0.579
# rlgatnd        -0.568
# pray           -0.585
# 
#                 PC1  PC2
# SS loadings    1.00 1.00
# Proportion Var 0.17 0.17
# Cumulative Var 0.17 0.33
# 
# $rotmat
#       [,1] [,2]
# [1,]  0.81 0.58
# [2,] -0.58 0.81
# Visualize the results
pca$rotation <- oblique_solution$loadings
fviz_pca_var(pca) # Visualize PCA object

# Generate new component scores for every respondent based on the 
# rotated principal components and write the into object "rotated_PCs"
rotated_PCs <- ESS %>% 
  dplyr::select(imbgeco, imueclt, imwbcnt, 
    rlgdgr, rlgatnd, pray) %>% 
  factor.scores(x = ., f = pca$rotation)
# Add a "xeno" and "relig" from "rotated_PCs" to our data.
ESS[, c("xeno", "relig")] <- rotated_PCs$scores

# Turn xenophobia variable around
ESS$xeno <- max(ESS$xeno, na.rm = TRUE) - ESS$xeno

# Make histogram of religiosity
ggplot(data = ESS, aes(y = xeno, x = relig)) +
  geom_point(alpha = 0.1, aes(size = pspwght)) +
  geom_smooth(aes(weight = pspwght)) +
  geom_smooth(method = "lm", color = "red", se = FALSE, aes(weight = pspwght)) +
  labs(y = "Xenophobia", x = "Religiosity") +
  theme_minimal()

  1. Drop unused factor levels in cntry, make a design matrix, and re-scale the weight for multilevel modeling.
ESS <- ESS %>%
  dplyr::select(xeno, relig, cntry, gndr, eduyrs, par_edu, pspwght) %>%
  mutate(cntry = fct_drop(cntry),
         gndr = fct_drop(gndr)) %>%
  drop_na()

# Rescale the post-stratification weight for multilevel modeling #<<
ESS <- rescale_weights(data = ESS, group = "cntry", probability_weights = "pspwght") #<<

(n <- nrow(ESS))
# [1] 35592

That design matrix has observations left.

  1. Regress xenophobia on religiosity, as well as eduyrs, par_edu, and gndr as control variables. Use a mixed effects multilevel model. What do you learn about religiosity and xenophobia?
model_1 <- lmer(xeno ~ relig + eduyrs + par_edu + gndr + (1 | cntry), weights = pweights_a, data = ESS)

screenreg(model_1, include.ci = FALSE)
# 
# =====================================
#                         Model 1      
# -------------------------------------
# (Intercept)                  3.06 ***
#                             (0.07)   
# relig                        0.05 ***
#                             (0.01)   
# eduyrs                      -0.05 ***
#                             (0.00)   
# par_edu                     -0.07 ***
#                             (0.00)   
# gndrFemale                  -0.01    
#                             (0.01)   
# -------------------------------------
# AIC                      97742.56    
# BIC                      97801.92    
# Log Likelihood          -48864.28    
# Num. obs.                35592       
# Num. groups: cntry          29       
# Var: cntry (Intercept)       0.13    
# Var: Residual                0.80    
# =====================================
# *** p < 0.001; ** p < 0.01; * p < 0.05
  1. Visualize the association between religiosity and xenophobia to get a better impression of it. Make sure to also show the variation across countries.
(synth_data <- ESS %>% # Use ESS_minority, then
   # reduce complexity to speed up data_grid(), then
   mutate(
     # religiosity & cntry: leave as is, we want to vary it
     par_edu = mean(par_edu, na.rm = TRUE),
     eduyrs = mean(eduyrs, na.rm = TRUE),
     gndr = "Female") %>%
   # Create synthetic data, then
   data_grid(relig, par_edu, eduyrs, gndr, cntry) %>%
   unique() %>% # Drop duplicates, then
   mutate( # Get the predictions
     fit = predict(
       model_1, 
       newdata = .,
       # Predictions by random effect (i.e., cntry)
       re.form = NULL)))
# # A tibble: 572,576 × 6
#    relig par_edu eduyrs gndr   cntry         fit
#    <dbl>   <dbl>  <dbl> <chr>  <fct>       <dbl>
#  1 -2.55    2.83   13.3 Female Austria      2.15
#  2 -2.55    2.83   13.3 Female Belgium      2.02
#  3 -2.55    2.83   13.3 Female Bulgaria     2.46
#  4 -2.55    2.83   13.3 Female Switzerland  1.72
#  5 -2.55    2.83   13.3 Female Cyprus       2.44
#  6 -2.55    2.83   13.3 Female Czechia      2.66
#  7 -2.55    2.83   13.3 Female Germany      1.90
#  8 -2.55    2.83   13.3 Female Denmark      1.97
#  9 -2.55    2.83   13.3 Female Estonia      2.23
# 10 -2.55    2.83   13.3 Female Spain        1.72
# # … with 572,566 more rows
ggplot(data = synth_data, 
       aes(y = fit, x = relig)) +
  geom_line(aes(color = cntry)) +
  labs(x = "Religiosity",
  y = "Xenophobia",
  caption = "(Covariates set to: Women at mean (parental) education") + 
  theme_minimal() +
  theme(legend.position="bottom",
        legend.title=element_blank())

  1. The manifesto data contain a left-right variable called rile. Does the average left-right orientation of political parties in a country predict the population's average level of xenophobia?
mp_setapikey("manifesto_apikey.txt")
(ESS <- mp_maindataset() %>% # Read Manifesto data, then
    dplyr::select(countryname, edate, rile) %>% # Select vars, then
    mutate(cntry = countryname) %>% # Rename cntry key 
    dplyr::group_by(cntry) %>% # Group the data by country, then
    dplyr::filter(edate == max(edate)) %>% # Use only the most recent pre-2016 manifestos from each country, then
    dplyr::summarize( # Calculate the average orientations of parties, then
      party_rile = mean(rile, na.rm = TRUE)
    ) %>% # drop countries with NA on predictors.
    drop_na() %>%
    inner_join(ESS, ., by = "cntry") %>%
    as_tibble())
# Connecting to Manifesto Project DB API... 
# Connecting to Manifesto Project DB API... corpus version: 2021-1
# # A tibble: 33,777 × 10
#     xeno  relig cntry   gndr   eduyrs par_edu pspwght pweights_a pweights_b party_rile
#    <dbl>  <dbl> <chr>   <fct>   <dbl>   <dbl>   <dbl>      <dbl>      <dbl>      <dbl>
#  1 2.96   1.34  Austria Male       12     3     0.218      0.226     0.129       -13.3
#  2 2.42   0.512 Austria Male       12     2.5   0.413      0.428     0.245       -13.3
#  3 1.99  -0.145 Austria Female     12     3     2.27       2.35      1.35        -13.3
#  4 2.31   0.759 Austria Male       11     2     0.386      0.400     0.229       -13.3
#  5 2.50  -1.71  Austria Female      9     2     1.03       1.07      0.612       -13.3
#  6 1.83  -0.462 Austria Male       13     2     0.576      0.595     0.341       -13.3
#  7 2.23  -0.945 Austria Male       12     2.5   0.721      0.745     0.427       -13.3
#  8 4.48   0.379 Austria Male       12     2     1.77       1.83      1.05        -13.3
#  9 1.73   0.352 Austria Female     12     2.5   0.743      0.768     0.441       -13.3
# 10 0.126  1.21  Austria Male       12     3     0.160      0.165     0.0949      -13.3
# # … with 33,767 more rows
model_2 <- lmer(xeno ~ relig + eduyrs + par_edu + gndr + party_rile + (1 | cntry), weights = pweights_a, data = ESS)

screenreg(list(model_1, model_2), include.ci = FALSE)
# 
# ====================================================
#                         Model 1        Model 2      
# ----------------------------------------------------
# (Intercept)                  3.06 ***       3.21 ***
#                             (0.07)         (0.07)   
# relig                        0.05 ***       0.05 ***
#                             (0.01)         (0.01)   
# eduyrs                      -0.05 ***      -0.05 ***
#                             (0.00)         (0.00)   
# par_edu                     -0.07 ***      -0.07 ***
#                             (0.00)         (0.00)   
# gndrFemale                  -0.01          -0.02 *  
#                             (0.01)         (0.01)   
# party_rile                                  0.02 ***
#                                            (0.01)   
# ----------------------------------------------------
# AIC                      97742.56       92974.73    
# BIC                      97801.92       93042.15    
# Log Likelihood          -48864.28      -46479.37    
# Num. obs.                35592          33777       
# Num. groups: cntry          29             28       
# Var: cntry (Intercept)       0.13           0.08    
# Var: Residual                0.80           0.81    
# ====================================================
# *** p < 0.001; ** p < 0.01; * p < 0.05
  1. Visualize the relation between the average left-right orientation in a country and xenophobia; use predict()!
# Using predict() and no uncertainty interval
(synth_data <- ESS %>% # Use ESS_minority, then
   # reduce complexity to speed up data_grid(), then
   mutate(
     # party_rile & cntry: leave as is, we want to vary it
     relig = mean(relig, na.rm = TRUE),
     par_edu = mean(par_edu, na.rm = TRUE),
     eduyrs = mean(eduyrs, na.rm = TRUE),
     gndr = "Female") %>%
   # Create synthetic data, then
   data_grid(relig, par_edu, eduyrs, gndr, cntry, party_rile) %>%
   unique() %>% # Drop duplicates, then
   mutate( # Get the predictions
     fit = predict(
       model_2, 
       newdata = .,
       # Predictions NOT by random effect (i.e., cntry)
       re.form = ~0)))
# # A tibble: 784 × 7
#      relig par_edu eduyrs gndr   cntry   party_rile   fit
#      <dbl>   <dbl>  <dbl> <chr>  <chr>        <dbl> <dbl>
#  1 -0.0244    2.79   13.3 Female Austria      -26.0  1.77
#  2 -0.0244    2.79   13.3 Female Austria      -23.3  1.82
#  3 -0.0244    2.79   13.3 Female Austria      -21.5  1.86
#  4 -0.0244    2.79   13.3 Female Austria      -17.8  1.94
#  5 -0.0244    2.79   13.3 Female Austria      -17.0  1.96
#  6 -0.0244    2.79   13.3 Female Austria      -15.4  1.99
#  7 -0.0244    2.79   13.3 Female Austria      -14.8  2.00
#  8 -0.0244    2.79   13.3 Female Austria      -13.3  2.03
#  9 -0.0244    2.79   13.3 Female Austria      -11.6  2.07
# 10 -0.0244    2.79   13.3 Female Austria      -11.3  2.07
# # … with 774 more rows
ggplot(data = synth_data, 
       aes(y = fit, x = party_rile)) +
  geom_line() +
  labs(x = "Average left-right orientation of political parties",
  y = "Xenophobia",
  caption = "(Covariates set to: Women at mean (parental) education and religiosity)") + 
  theme_minimal() +
  theme(legend.position="bottom",
        legend.title=element_blank())

  1. Now use predictInterval() to also get an uncertainty interval of say 50%.
# Using predictInterval() and a 50% uncertainty interval
(synth_data <- ESS %>% # Use ESS_minority, then
   # reduce complexity to speed up data_grid(), then
   mutate(
     # party_rile & cntry: leave as is, we want to vary it
     relig = mean(relig, na.rm = TRUE),
     par_edu = mean(par_edu, na.rm = TRUE),
     eduyrs = mean(eduyrs, na.rm = TRUE),
     gndr = "Female") %>%
   # Create synthetic data, then
   data_grid(relig, par_edu, eduyrs, gndr, cntry, party_rile) %>%
   unique()) # Drop duplicates
# # A tibble: 784 × 6
#      relig par_edu eduyrs gndr   cntry   party_rile
#      <dbl>   <dbl>  <dbl> <chr>  <chr>        <dbl>
#  1 -0.0244    2.79   13.3 Female Austria      -26.0
#  2 -0.0244    2.79   13.3 Female Austria      -23.3
#  3 -0.0244    2.79   13.3 Female Austria      -21.5
#  4 -0.0244    2.79   13.3 Female Austria      -17.8
#  5 -0.0244    2.79   13.3 Female Austria      -17.0
#  6 -0.0244    2.79   13.3 Female Austria      -15.4
#  7 -0.0244    2.79   13.3 Female Austria      -14.8
#  8 -0.0244    2.79   13.3 Female Austria      -13.3
#  9 -0.0244    2.79   13.3 Female Austria      -11.6
# 10 -0.0244    2.79   13.3 Female Austria      -11.3
# # … with 774 more rows
(synth_data <- predictInterval(
      merMod = model_2, newdata = synth_data,
      which = "fixed", level = 0.5, n.sims = 10000,
      include.resid.var = TRUE) %>%
    as_tibble() %>% # Turn into a tibble, then
    bind_cols(synth_data, .)) # Add to the synthetic data.
# # A tibble: 784 × 9
#      relig par_edu eduyrs gndr   cntry   party_rile   fit   upr   lwr
#      <dbl>   <dbl>  <dbl> <chr>  <chr>        <dbl> <dbl> <dbl> <dbl>
#  1 -0.0244    2.79   13.3 Female Austria      -26.0  1.78  2.37  1.17
#  2 -0.0244    2.79   13.3 Female Austria      -23.3  1.85  2.43  1.21
#  3 -0.0244    2.79   13.3 Female Austria      -21.5  1.87  2.48  1.26
#  4 -0.0244    2.79   13.3 Female Austria      -17.8  1.95  2.55  1.32
#  5 -0.0244    2.79   13.3 Female Austria      -17.0  1.94  2.56  1.33
#  6 -0.0244    2.79   13.3 Female Austria      -15.4  1.99  2.60  1.38
#  7 -0.0244    2.79   13.3 Female Austria      -14.8  2.01  2.63  1.42
#  8 -0.0244    2.79   13.3 Female Austria      -13.3  2.04  2.64  1.44
#  9 -0.0244    2.79   13.3 Female Austria      -11.6  2.05  2.65  1.45
# 10 -0.0244    2.79   13.3 Female Austria      -11.3  2.06  2.68  1.47
# # … with 774 more rows
ggplot(data = synth_data, 
       aes(y = fit, x = party_rile)) +
  geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.5) +
  geom_line() +
  labs(x = "Average left-right orientation of political parties",
       y = "Xenophobia",
       caption = "(Covariates set to: Women at mean education and age)") + 
  theme_minimal() +
  theme(legend.position="bottom",
        legend.title=element_blank())