1. Go through ESS 9 codebook, identify variables that allow you to generate an fairness of the political system scale. In particular identify variables measuring the aspects listed below. Use the set up of my slides, focus on the same four countries, and add these variables. Use the set up of my slides and add these variables. Conduct a PCA; does it identify a single underlying latent construct?
    • Political system in country ensures everyone fair chance to participate in politics,
    • Every one in country fair chance achieve level of education they seek,
    • Every one in country fair chance get job they seek,
    • Differences in wealth in country, how fair.
    • Influence decision to recruit in country: person's gender
ESS <- ESS %>% # Import the ESS, then (pipe result into)
  transmute( # Create new variables and keep only those
    cntry = as_factor(cntry), # Country of interview
    gndr = as_factor(gndr),
    facntr = as_factor(facntr), # Father born in cntry
    mocntr = as_factor(mocntr), # Mother born in cntry
    # Homophobia
    freehms = zap_labels(freehms),
    hmsfmlsh = max(hmsfmlsh, na.rm = TRUE) - zap_labels(hmsfmlsh),
    hmsacld = zap_labels(hmsacld),
    # Xenophobia
    imbgeco = max(imbgeco, na.rm = TRUE) - zap_labels(imbgeco),
    imueclt = max(imueclt, na.rm = TRUE) - zap_labels(imueclt),
    imwbcnt = max(imwbcnt, na.rm = TRUE) - zap_labels(imwbcnt),
    # Fairness
    frprtpl = zap_labels(frprtpl),
    evfredu = zap_labels(evfredu),
    evfrjob = zap_labels(evfrjob),
    wltdffr = zap_labels(wltdffr),
    recgndr = zap_labels(recgndr),
    pspwght = zap_label(pspwght),
    eduyrs = case_when( # Education
      eduyrs > 21 ~ 21, # Recode to max 21 years of edu.
      eduyrs < 9 ~ 9, # Recode to min 9 years of edu.
      TRUE ~ zap_labels(eduyrs))) %>% # Make it numeric, then
  dplyr::filter(
    # Keep only respondents with native-born parents,
    facntr == "Yes" & mocntr == "Yes",
    # filter cases from four countries
    cntry == "Denmark" | cntry == "Germany" | cntry == "Sweden" | cntry == "Norway")

# Conduct PCA of society's fairness
(fairness_pca <- ESS %>% prcomp(
  formula = ~ frprtpl + evfredu + evfrjob + wltdffr + recgndr,
  data = ., na.action = na.exclude,
  center = TRUE, scale = TRUE))
# Standard deviations (1, .., p=5):
# [1] 1.39 0.99 0.95 0.91 0.59
# 
# Rotation (n x k) = (5 x 5):
#           PC1  PC2      PC3    PC4    PC5
# frprtpl -0.38 0.24 -0.00091 -0.890  0.045
# evfredu -0.61 0.11  0.20454  0.256 -0.715
# evfrjob -0.60 0.11  0.18934  0.324  0.698
# wltdffr  0.19 0.94 -0.22187  0.176 -0.015
# recgndr  0.30 0.18  0.93439 -0.081  0.012
# Show importance of single components.
summary(fairness_pca) 
# Importance of components:
#                          PC1   PC2   PC3   PC4    PC5
# Standard deviation     1.393 0.987 0.952 0.911 0.5922
# Proportion of Variance 0.388 0.195 0.181 0.166 0.0701
# Cumulative Proportion  0.388 0.583 0.764 0.930 1.0000
# Visualize the results
fviz_pca_var(fairness_pca)

  1. Extract a fariness of the system variable from a PCA of the three items that work together well. Make a plot that shows whether fairness is predicted by years of education. What is the strength of the correlation (rounded to two digits)?
# Conduct PCA of society's fairness
(fairness_pca <- ESS %>% prcomp(
  formula = ~ frprtpl + evfredu + evfrjob,
  data = ., na.action = na.exclude, scale = TRUE))
# Standard deviations (1, .., p=3):
# [1] 1.35 0.91 0.59
# 
# Rotation (n x k) = (3 x 3):
#           PC1   PC2    PC3
# frprtpl -0.41 -0.91  0.045
# evfredu -0.65  0.26 -0.715
# evfrjob -0.64  0.32  0.697
# Make a new "fairness_pc" variable and fill it with PC1.
ESS$fairness_pc <- fairness_pca$x[ , "PC1"]

# Plot association with education.
ggplot(data = ESS, aes(y = fairness_pc, x = eduyrs)) +
  geom_jitter(alpha = 1/5) +
  geom_smooth() +
  labs(y = "Perception that society is fair",
       x = "Years of education") +
  theme_minimal()

# Estimate correlation between the two variables.
ESS %>% 
  select(fairness_pc, eduyrs) %>%
  drop_na() %>% cor()
#             fairness_pc eduyrs
# fairness_pc       1.000 -0.026
# eduyrs           -0.026  1.000
  1. Perform a large PCA of all xenophobia, homophobia, and the three fairness variables. Can you identify three distinct latent concepts based on all those variables: Xenophobia, Homophobia, and Fairness?
    • Tip: Use rotation methods to extract meaningful latent variables. Note, with the argument axes = c(1, 2), you can change, which PCs fviz_pca_var() visualizes.
# conduct a large PCA of all ten variables.
(large_pca <- ESS %>% prcomp(
  formula = ~ frprtpl + evfredu + evfrjob +
    freehms + hmsfmlsh + hmsacld + 
    imbgeco + imueclt + imwbcnt,
  data = ., na.action = na.exclude, scale = TRUE))
# Standard deviations (1, .., p=9):
# [1] 1.70 1.34 1.19 0.89 0.76 0.71 0.66 0.59 0.52
# 
# Rotation (n x k) = (9 x 9):
#             PC1     PC2    PC3    PC4    PC5    PC6    PC7     PC8    PC9
# frprtpl  -0.219 -0.3531  0.031 -0.888  0.106 -0.122  0.081 -0.0660  0.021
# evfredu  -0.071 -0.6369 -0.180  0.204  0.034 -0.023 -0.154  0.6922 -0.104
# evfrjob  -0.096 -0.6254 -0.141  0.315 -0.043  0.066  0.097 -0.6690  0.133
# freehms   0.327 -0.1579  0.473  0.069 -0.297 -0.738  0.030 -0.0016  0.071
# hmsfmlsh  0.302 -0.1338  0.475  0.069  0.796  0.148 -0.023 -0.0352 -0.051
# hmsacld   0.347 -0.1821  0.396 -0.160 -0.505  0.627 -0.120  0.0326 -0.030
# imbgeco   0.419  0.0034 -0.368 -0.150  0.065 -0.125 -0.781 -0.1895 -0.019
# imueclt   0.467 -0.0479 -0.324 -0.073 -0.012 -0.038  0.453 -0.0603 -0.677
# imwbcnt   0.474 -0.0131 -0.316 -0.086  0.066  0.058  0.359  0.1643  0.710
# Show importance of single components.
summary(large_pca) 
# Importance of components:
#                          PC1   PC2   PC3    PC4    PC5    PC6    PC7    PC8   PC9
# Standard deviation     1.695 1.339 1.194 0.8854 0.7616 0.7058 0.6558 0.5868 0.519
# Proportion of Variance 0.319 0.199 0.158 0.0871 0.0644 0.0554 0.0478 0.0383 0.030
# Cumulative Proportion  0.319 0.519 0.677 0.7641 0.8286 0.8840 0.9317 0.9700 1.000
# Oblique rotation
(oblique_solution <- promax(
  # of first three PCs
  large_pca$rotation[, c("PC1", "PC2", "PC3")]))
# $loadings
# 
# Loadings:
#          PC1    PC2    PC3   
# frprtpl  -0.183 -0.364       
# evfredu         -0.666       
# evfrjob         -0.649       
# freehms                 0.596
# hmsfmlsh                0.577
# hmsacld                 0.554
# imbgeco   0.556              
# imueclt   0.571              
# imwbcnt   0.570              
# 
#                 PC1  PC2  PC3
# SS loadings    1.00 1.00 1.00
# Proportion Var 0.11 0.11 0.11
# Cumulative Var 0.11 0.22 0.33
# 
# $rotmat
#        [,1] [,2]  [,3]
# [1,]  0.806 0.14  0.56
# [2,] -0.034 0.96 -0.27
# [3,] -0.593 0.24  0.78
# Overwrite the original PCA loadings with the new rotated ones.
large_pca$rotation <- oblique_solution$loadings

perspective1 <- fviz_pca_var(large_pca, axes = c(1, 2)) +
  labs(title = "")

perspective2 <- fviz_pca_var(large_pca, axes = c(1, 3)) +
  labs(title = "")

perspective3 <- fviz_pca_var(large_pca, axes = c(2, 3)) +
  labs(title = "")

library(ggpubr)
ggarrange(perspective1, perspective2, perspective3,
          labels = c('PC1 & PC2', 'PC1 & PC3', 'PC2 & PC3'))