1. Use today's set-up but add three further variables on prayer frequency, attendance of religious services, and general importance of religion. The tibble has observations.
# Add packages to library
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(essurvey) # Add ESS API package.
library(ggplot2) # Allows us to create nice figures.
library(factoextra) # Allows us to visualize PCAs
library(psych) # Supports PCA & factor analysis with several useful functions.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
library(broom) # Allows us to turn parts of model objects into tibbles.
ESS <- ESS %>% transmute( # Create new variables and keep only those
    cntry = as_factor(cntry), # Country of interview
    gndr = as_factor(gndr),
    facntr = as_factor(facntr), # Father born in cntry
    mocntr = as_factor(mocntr), # Mother born in cntry
    imbgeco = max(imbgeco, na.rm = TRUE) - zap_labels(imbgeco),
    imueclt = max(imueclt, na.rm = TRUE) - zap_labels(imueclt),
    imwbcnt = max(imwbcnt, na.rm = TRUE) - zap_labels(imwbcnt),
    # Regigion
    rlgblg = as_factor(rlgblg), # Belonging to a religion
    rlgdgr = zap_labels(rlgdgr), # How religios
    rlgatnd = max(rlgatnd, na.rm = TRUE) - zap_labels(rlgatnd), # Service attendance
    pray = max(pray, na.rm = TRUE) - zap_labels(pray), # Prayer frequency
    pspwght = zap_label(pspwght),
    # Parental education (ISCED) #<<
    eiscedf = zap_labels(eiscedf), #<<
    eiscedm = zap_labels(eiscedm), #<<
    par_edu = case_when( #<<
      # If father's edu is missing but not mother's, take mother's #<<
      (eiscedf == 55 | is.na(eiscedf)) & eiscedm != 55 & !is.na(eiscedm) ~ eiscedm, #<<
      # If mother's edu is missing but not father's, take father's #<<
      (eiscedm == 55 | is.na(eiscedm)) & eiscedf != 55 & !is.na(eiscedf)  ~ eiscedf, #<<
      # If both are missing, its missing #<<
      eiscedf == 55 & eiscedm == 55 ~ as.numeric(NA), #<<
      # For all others, take the average of both parents #<<
      TRUE ~ (eiscedm + eiscedf) / 2), #<<
    eduyrs = case_when( # Education
      eduyrs > 21 ~ 21, # Recode to max 21 years of edu.
      eduyrs < 9 ~ 9, # Recode to min 9 years of edu.
      TRUE ~ zap_labels(eduyrs))) %>% # Make it numeric, then
  dplyr::filter(
    # Keep only respondents with native-born parents,
    facntr == "Yes" & mocntr == "Yes" &
      (cntry == "Denmark" | cntry == "Sweden" | cntry == "Norway" | cntry == "Germany"))
  1. Among these religious respondents, the ESS contains three questions on religiosity: Prayer frequency, attendance of religious services, and general importance. Use PCA to identify a xenophobia and a religiosity variable. Using oblique rotation, the two correlate by r = .
# Conduct PCA of climate change concerns
(pca <- ESS %>% prcomp(
  formula = ~ imbgeco + imueclt + imwbcnt + 
    rlgdgr + rlgatnd + pray,
  data = ., na.action = na.exclude,
  center = TRUE, scale = TRUE))
# Standard deviations (1, .., p=6):
# [1] 1.52 1.47 0.68 0.66 0.60 0.52
# 
# Rotation (n x k) = (6 x 6):
#           PC1   PC2   PC3    PC4    PC5    PC6
# imbgeco  0.54 -0.10 -0.20  0.808 -0.010 -0.049
# imueclt  0.57 -0.14  0.02 -0.438 -0.039 -0.679
# imwbcnt  0.57 -0.14  0.14 -0.325  0.032  0.723
# rlgdgr  -0.12 -0.58  0.22  0.067  0.770 -0.064
# rlgatnd -0.14 -0.54 -0.78 -0.167 -0.181  0.091
# pray    -0.13 -0.56  0.52  0.132 -0.610 -0.026
# Show importance of single components.
summary(pca) 
# Importance of components:
#                          PC1   PC2    PC3    PC4    PC5    PC6
# Standard deviation     1.517 1.471 0.6846 0.6553 0.6034 0.5221
# Proportion of Variance 0.383 0.361 0.0781 0.0716 0.0607 0.0454
# Cumulative Proportion  0.383 0.744 0.8223 0.8939 0.9546 1.0000
# Oblique rotation
(oblique_solution <- promax(
  # of first two PCs
  pca$rotation[, c("PC1", "PC2")]))
# $loadings
# 
# Loadings:
#         PC1    PC2   
# imbgeco  0.552       
# imueclt  0.587       
# imwbcnt  0.591       
# rlgdgr         -0.591
# rlgatnd        -0.561
# pray           -0.579
# 
#                 PC1  PC2
# SS loadings    1.00 1.00
# Proportion Var 0.17 0.17
# Cumulative Var 0.17 0.33
# 
# $rotmat
#       [,1] [,2]
# [1,]  0.97 0.22
# [2,] -0.22 0.98
# Visualize the results
pca$rotation <- oblique_solution$loadings
fviz_pca_var(pca) # Visualize PCA object

# Generate new component scores for every respondent based on the 
# rotated principal components and write the into object "rotated_PCs"
rotated_PCs <- ESS %>% 
  dplyr::select(imbgeco, imueclt, imwbcnt, 
    rlgdgr, rlgatnd, pray) %>% 
  factor.scores(x = ., f = pca$rotation)
# Add a "xeno" and "relig" from "rotated_PCs" to our data.
ESS[, c("xeno", "relig")] <- rotated_PCs$scores

# Correlation
ESS %>%
  select(xeno, relig) %>%
  drop_na() %>%
  cor()
#          xeno   relig
# xeno   1.0000 -0.0017
# relig -0.0017  1.0000
  1. Model 1: Regress xenophobia on religiosity; \(\beta\) = and the relation is
ols1 <- lm_robust(xeno ~ relig, w = pspwght, data = ESS)
summary(ols1)
# 
# Call:
# lm_robust(formula = xeno ~ relig, data = ESS, weights = pspwght)
# 
# Weighted, Standard error type:  HC2 
# 
# Coefficients:
#             Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper   DF
# (Intercept)  0.05738     0.0160   3.578 0.000349   0.0259   0.0888 5368
# relig       -0.00368     0.0158  -0.233 0.815578  -0.0346   0.0273 5368
# 
# Multiple R-squared:  1.3e-05 ,    Adjusted R-squared:  -0.000173 
# F-statistic: 0.0544 on 1 and 5368 DF,  p-value: 0.816
  1. Model 2: Add cntry, par_edu, eduyrs, and gndr as control variables, what happens to our religiosity estimate?
ols2 <- lm_robust(xeno ~ relig + eduyrs + par_edu + cntry + gndr, w = pspwght, data = ESS)
summary(ols2)
# 
# Call:
# lm_robust(formula = xeno ~ relig + eduyrs + par_edu + cntry + 
#     gndr, data = ESS, weights = pspwght)
# 
# Weighted, Standard error type:  HC2 
# 
# Coefficients:
#              Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper   DF
# (Intercept)    1.3400    0.07602   17.63 1.40e-67  1.19095   1.4890 5189
# relig          0.0269    0.01636    1.64 1.01e-01 -0.00520   0.0589 5189
# eduyrs        -0.0605    0.00474  -12.76 9.32e-37 -0.06980  -0.0512 5189
# par_edu       -0.0945    0.01006   -9.40 8.31e-21 -0.11427  -0.0748 5189
# cntryDenmark   0.0752    0.04278    1.76 7.88e-02 -0.00867   0.1591 5189
# cntryNorway   -0.0942    0.03977   -2.37 1.79e-02 -0.17216  -0.0162 5189
# cntrySweden   -0.3821    0.04377   -8.73 3.40e-18 -0.46787  -0.2963 5189
# gndrFemale    -0.1276    0.03101   -4.11 3.94e-05 -0.18839  -0.0668 5189
# 
# Multiple R-squared:  0.107 ,  Adjusted R-squared:  0.106 
# F-statistic: 63.8 on 7 and 5189 DF,  p-value: <2e-16
  1. Make Denmark the refernce category (i.e., the first category) in the cntry variable.
ESS$cntry <- fct_relevel(ESS$cntry, "Denmark")
  1. Estimate Model 2 again. What changed? Discuss among each other.
ols2 <- lm_robust(xeno ~ relig + eduyrs + par_edu + cntry + gndr, w = pspwght, data = ESS)
summary(ols2)
# 
# Call:
# lm_robust(formula = xeno ~ relig + eduyrs + par_edu + cntry + 
#     gndr, data = ESS, weights = pspwght)
# 
# Weighted, Standard error type:  HC2 
# 
# Coefficients:
#              Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper   DF
# (Intercept)    1.4152    0.07719   18.33 8.52e-73   1.2639  1.56652 5189
# relig          0.0269    0.01636    1.64 1.01e-01  -0.0052  0.05893 5189
# eduyrs        -0.0605    0.00474  -12.76 9.32e-37  -0.0698 -0.05121 5189
# par_edu       -0.0945    0.01006   -9.40 8.31e-21  -0.1143 -0.07482 5189
# cntryGermany  -0.0752    0.04278   -1.76 7.88e-02  -0.1591  0.00867 5189
# cntryNorway   -0.1694    0.04414   -3.84 1.26e-04  -0.2559 -0.08287 5189
# cntrySweden   -0.4573    0.04676   -9.78 2.17e-22  -0.5490 -0.36560 5189
# gndrFemale    -0.1276    0.03101   -4.11 3.94e-05  -0.1884 -0.06680 5189
# 
# Multiple R-squared:  0.107 ,  Adjusted R-squared:  0.106 
# F-statistic: 63.8 on 7 and 5189 DF,  p-value: <2e-16
  1. Make a regression table of the results of Models 1 and (the revised) 2.
screenreg(list(ols1, ols2), include.ci = FALSE,
          custom.coef.names = c("Intercept", 
                                "Religiosity",
                                "Years of education",
                                "Parental education", 
                                "Germany", "Norway", "Sweden", 
                                "Women"),
          caption = "My regression table", 
          caption.above = TRUE)
# 
# ============================================
#                     Model 1      Model 2    
# --------------------------------------------
# Intercept              0.06 ***     1.42 ***
#                       (0.02)       (0.08)   
# Religiosity           -0.00         0.03    
#                       (0.02)       (0.02)   
# Years of education                 -0.06 ***
#                                    (0.00)   
# Parental education                 -0.09 ***
#                                    (0.01)   
# Germany                            -0.08    
#                                    (0.04)   
# Norway                             -0.17 ***
#                                    (0.04)   
# Sweden                             -0.46 ***
#                                    (0.05)   
# Women                              -0.13 ***
#                                    (0.03)   
# --------------------------------------------
# R^2                    0.00         0.11    
# Adj. R^2              -0.00         0.11    
# Num. obs.           5370         5197       
# RMSE                   1.00         0.95    
# ============================================
# *** p < 0.001; ** p < 0.01; * p < 0.05
  1. Make a coefficient plot of the results of Model 1 and 2, and emphasize religiosity as predictor of interest.
bind_rows(tidy(ols1), tidy(ols2), # Stack the data of model objects into one tibble.
          .id = "model") %>% # Identify which model the data came from, then
  mutate(
    model = case_when(model == 1 ~ "Model 1", # Rename model identifyer.
                      model == 2 ~ "Model 2"),
    term = fct_recode(factor(term), # Recode predictor names, then
                      "Intercept" = "(Intercept)",
                      "Religiosity" = "relig",
                      "Years of Education" = "eduyrs",
                      "Parental education" = "par_edu",
                      "Germany" = "cntryGermany",
                      "Norway" = "cntryNorway",
                      "Sweden" = "cntrySweden",
                      "Women" = "gndrFemale"),
    emphasize = case_when(
      term == "Religiosity" ~ "Predictor",
      TRUE ~ "Confonfounder")) %>% 
  select(term, estimate, conf.low, conf.high, model, emphasize) %>% # keep only some of all the info, then
  ggplot(aes(x = reorder(term, estimate),
             y = estimate, 
             ymin = conf.low, ymax = conf.high,
             color = emphasize,
             shape = model)) +
  geom_hline(yintercept = 0, # Null-hypothesis line.
             color = "#901A1E", lty = "dashed") + 
  geom_pointrange(# Coefs & 95% CI. #<<
    position = position_dodge(width=0.5)) + #<<
  coord_flip() + 
  theme_minimal() +
  labs(y = expression(beta), # Axis title: greek beta.
       x = "") +
  theme(legend.position="bottom",
        legend.title=element_blank(),
        legend.box = 'vertical')

  1. Drop the two intercepts and the Sweden dummy from the coefficients plot, but mention this in the plot's caption.
bind_rows(tidy(ols1), tidy(ols2), # Stack the data of model objects into one tibble.
          .id = "model") %>% # Identify which model the data came from, then
  dplyr::filter(term != "(Intercept)" & term != "cntrySweden") %>% # drop the two intercepts and Sweden dummy, then
  mutate(
    model = case_when(model == 1 ~ "Model 1", # Rename model identifyer.
                      model == 2 ~ "Model 2"),
    term = fct_recode(factor(term), # Recode predictor names, then
                      "Religiosity" = "relig",
                      "Years of Education" = "eduyrs",
                      "Parental education" = "par_edu",
                      "Germany" = "cntryGermany",
                      "Norway" = "cntryNorway",
                      "Sweden" = "cntrySweden",
                      "Women" = "gndrFemale"),
    emphasize = case_when(
      term == "Religiosity" ~ "Predictor",
      TRUE ~ "Confonfounder")) %>%  
  select(term, estimate, conf.low, conf.high, model, emphasize) %>% # keep only some of all the info, then
  ggplot(aes(x = reorder(term, estimate),
             y = estimate, 
             ymin = conf.low, ymax = conf.high,
             color = emphasize,
             shape = model)) +
  geom_hline(yintercept = 0, # Null-hypothesis line.
             color = "#901A1E", lty = "dashed") + 
  geom_pointrange(# Coefs & 95% CI. #<<
    position = position_dodge(width=0.5)) + #<<
  coord_flip() + 
  theme_minimal() +
  labs(y = expression(beta), # Axis title: Greek beta.
       x = "",
       caption = "(Estimates of the intercept 
       and a Sweden dummy are not shown)") +
  theme(legend.position="bottom",
        legend.title=element_blank(),
        legend.box = 'vertical')