Use today's set-up but add three further variables on prayer frequency, attendance of religious services, and general importance of religion. The tibble has observations.
# Add packages to library
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(essurvey) # Add ESS API package.
library(ggplot2) # Allows us to create nice figures.
library(factoextra) # Allows us to visualize PCAs
library(psych) # Supports PCA & factor analysis with several useful functions.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
library(broom) # Allows us to turn parts of model objects into tibbles.
ESS <- ESS %>% transmute( # Create new variables and keep only those
cntry = as_factor(cntry), # Country of interview
gndr = as_factor(gndr),
facntr = as_factor(facntr), # Father born in cntry
mocntr = as_factor(mocntr), # Mother born in cntry
imbgeco = max(imbgeco, na.rm = TRUE) - zap_labels(imbgeco),
imueclt = max(imueclt, na.rm = TRUE) - zap_labels(imueclt),
imwbcnt = max(imwbcnt, na.rm = TRUE) - zap_labels(imwbcnt),
# Regigion
rlgblg = as_factor(rlgblg), # Belonging to a religion
rlgdgr = zap_labels(rlgdgr), # How religios
rlgatnd = max(rlgatnd, na.rm = TRUE) - zap_labels(rlgatnd), # Service attendance
pray = max(pray, na.rm = TRUE) - zap_labels(pray), # Prayer frequency
pspwght = zap_label(pspwght),
# Parental education (ISCED) #<<
eiscedf = zap_labels(eiscedf), #<<
eiscedm = zap_labels(eiscedm), #<<
par_edu = case_when( #<<
# If father's edu is missing but not mother's, take mother's #<<
(eiscedf == 55 | is.na(eiscedf)) & eiscedm != 55 & !is.na(eiscedm) ~ eiscedm, #<<
# If mother's edu is missing but not father's, take father's #<<
(eiscedm == 55 | is.na(eiscedm)) & eiscedf != 55 & !is.na(eiscedf) ~ eiscedf, #<<
# If both are missing, its missing #<<
eiscedf == 55 & eiscedm == 55 ~ as.numeric(NA), #<<
# For all others, take the average of both parents #<<
TRUE ~ (eiscedm + eiscedf) / 2), #<<
eduyrs = case_when( # Education
eduyrs > 21 ~ 21, # Recode to max 21 years of edu.
eduyrs < 9 ~ 9, # Recode to min 9 years of edu.
TRUE ~ zap_labels(eduyrs))) %>% # Make it numeric, then
dplyr::filter(
# Keep only respondents with native-born parents,
facntr == "Yes" & mocntr == "Yes" &
(cntry == "Denmark" | cntry == "Sweden" | cntry == "Norway" | cntry == "Germany"))
Among these religious respondents, the ESS contains three questions on religiosity: Prayer frequency, attendance of religious services, and general importance. Use PCA to identify a xenophobia and a religiosity variable. Using oblique rotation, the two correlate by r = .
# Generate new component scores for every respondent based on the
# rotated principal components and write the into object "rotated_PCs"
rotated_PCs <- ESS %>%
dplyr::select(imbgeco, imueclt, imwbcnt,
rlgdgr, rlgatnd, pray) %>%
factor.scores(x = ., f = pca$rotation)
# Add a "xeno" and "relig" from "rotated_PCs" to our data.
ESS[, c("xeno", "relig")] <- rotated_PCs$scores
# Correlation
ESS %>%
select(xeno, relig) %>%
drop_na() %>%
cor()
# xeno relig
# xeno 1.0000 -0.0017
# relig -0.0017 1.0000
Model 1: Regress xenophobia on religiosity; \(\beta\) = and the relation is
ols1 <- lm_robust(xeno ~ relig, w = pspwght, data = ESS)
summary(ols1)
#
# Call:
# lm_robust(formula = xeno ~ relig, data = ESS, weights = pspwght)
#
# Weighted, Standard error type: HC2
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper DF
# (Intercept) 0.05738 0.0160 3.578 0.000349 0.0259 0.0888 5368
# relig -0.00368 0.0158 -0.233 0.815578 -0.0346 0.0273 5368
#
# Multiple R-squared: 1.3e-05 , Adjusted R-squared: -0.000173
# F-statistic: 0.0544 on 1 and 5368 DF, p-value: 0.816
Model 2: Add cntry, par_edu, eduyrs, and gndr as control variables, what happens to our religiosity estimate?
Make a coefficient plot of the results of Model 1 and 2, and emphasize religiosity as predictor of interest.
bind_rows(tidy(ols1), tidy(ols2), # Stack the data of model objects into one tibble.
.id = "model") %>% # Identify which model the data came from, then
mutate(
model = case_when(model == 1 ~ "Model 1", # Rename model identifyer.
model == 2 ~ "Model 2"),
term = fct_recode(factor(term), # Recode predictor names, then
"Intercept" = "(Intercept)",
"Religiosity" = "relig",
"Years of Education" = "eduyrs",
"Parental education" = "par_edu",
"Germany" = "cntryGermany",
"Norway" = "cntryNorway",
"Sweden" = "cntrySweden",
"Women" = "gndrFemale"),
emphasize = case_when(
term == "Religiosity" ~ "Predictor",
TRUE ~ "Confonfounder")) %>%
select(term, estimate, conf.low, conf.high, model, emphasize) %>% # keep only some of all the info, then
ggplot(aes(x = reorder(term, estimate),
y = estimate,
ymin = conf.low, ymax = conf.high,
color = emphasize,
shape = model)) +
geom_hline(yintercept = 0, # Null-hypothesis line.
color = "#901A1E", lty = "dashed") +
geom_pointrange(# Coefs & 95% CI. #<<
position = position_dodge(width=0.5)) + #<<
coord_flip() +
theme_minimal() +
labs(y = expression(beta), # Axis title: greek beta.
x = "") +
theme(legend.position="bottom",
legend.title=element_blank(),
legend.box = 'vertical')
Drop the two intercepts and the Sweden dummy from the coefficients plot, but mention this in the plot's caption.
bind_rows(tidy(ols1), tidy(ols2), # Stack the data of model objects into one tibble.
.id = "model") %>% # Identify which model the data came from, then
dplyr::filter(term != "(Intercept)" & term != "cntrySweden") %>% # drop the two intercepts and Sweden dummy, then
mutate(
model = case_when(model == 1 ~ "Model 1", # Rename model identifyer.
model == 2 ~ "Model 2"),
term = fct_recode(factor(term), # Recode predictor names, then
"Religiosity" = "relig",
"Years of Education" = "eduyrs",
"Parental education" = "par_edu",
"Germany" = "cntryGermany",
"Norway" = "cntryNorway",
"Sweden" = "cntrySweden",
"Women" = "gndrFemale"),
emphasize = case_when(
term == "Religiosity" ~ "Predictor",
TRUE ~ "Confonfounder")) %>%
select(term, estimate, conf.low, conf.high, model, emphasize) %>% # keep only some of all the info, then
ggplot(aes(x = reorder(term, estimate),
y = estimate,
ymin = conf.low, ymax = conf.high,
color = emphasize,
shape = model)) +
geom_hline(yintercept = 0, # Null-hypothesis line.
color = "#901A1E", lty = "dashed") +
geom_pointrange(# Coefs & 95% CI. #<<
position = position_dodge(width=0.5)) + #<<
coord_flip() +
theme_minimal() +
labs(y = expression(beta), # Axis title: Greek beta.
x = "",
caption = "(Estimates of the intercept
and a Sweden dummy are not shown)") +
theme(legend.position="bottom",
legend.title=element_blank(),
legend.box = 'vertical')