# Add packages to library
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(essurvey) # Add ESS API package to library.
# Import the ESS round 9 data via the API
ESS <- import_rounds(rounds = 9, ess_email = "YOUR-EMAIL", format = "spss")
Keep only a person and a country identifier, the post-stratification weights, eduyrs, agea, dscrgrp, and the homophobia variables that you identified. The function that allows you to do that is called .
Z-standardize eduyrs and agea. Remember, \(z_x = \frac{x - \bar{x}}{\text{SD(x)}}\).
# First. check whether values on those variables make sense.
summary(ESS$eduyrs) # 0 or 55 years of education?
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 0 11 12 13 16 60 708
summary(ESS$agea) # Age seems fine.
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 15 36 52 51 66 90 222
# Second, recode.
ESS <- mutate(ESS,
# Recode to max 21 years of edu.
eduyrs = case_when(
eduyrs > 21 ~ 21,
eduyrs < 9 ~ 9,
TRUE ~ as.numeric(eduyrs)
),
# z-standardize
z_eduyrs = (eduyrs - mean(eduyrs, na.rm = TRUE)) / sd(eduyrs, na.rm = TRUE),
z_agea = (agea - mean(agea, na.rm = TRUE)) / sd(agea, na.rm = TRUE)
)
summary(ESS$z_eduyrs)
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# -1 -1 0 0 1 2 708
summary(ESS$z_agea)
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# -2 -1 0 0 1 2 222
Recode dscrgrp to "Yes" for all persons older than 80 in Germany, and to "No" for all respondents with more than average years of education in France. Use the table() function to show the frequency of dscrgrp by country. After the recode, respondents from Germany answered that they belong to a discriminated minority, and from France.
ESS <- mutate(ESS,
dscrgrp = case_when( # Start complex recode
# Older than 80 & Germany ~ "Yes",
agea > 80 & cntry == "DE" ~ "Yes",
# Older than mean & France
eduyrs > mean(eduyrs, na.rm = TRUE) & cntry == "FR" ~ "No",
# All others ~ as before, need to make it character for type consistency.
TRUE ~ as.character(dscrgrp)
),
# Now make it a factor again.
dscrgrp = factor(dscrgrp)
)
# ... don't forget to check your recode for bugs!
# Absolute frequencies cross-table.
table(ESS$dscrgrp, ESS$cntry) # Israel, no surprise 20% are Arab in this contested state.
#
# Austria Belgium Bulgaria Switzerland Cyprus Czechia Germany Denmark Estonia Spain Finland France
# No 2344 1605 1980 1443 724 2296 2184 1480 1754 1503 1577 1735
# Yes 142 155 161 82 56 88 166 84 148 150 170 263
#
# United Kingdom Croatia Hungary Ireland Iceland Italy Lithuania Latvia Montenegro Netherlands Norway Poland
# No 1859 1646 1593 2061 720 2585 1731 817 1030 1520 1294 1431
# Yes 335 145 48 143 136 104 63 90 163 144 109 56
#
# Portugal Serbia Sweden Slovenia Slovakia
# No 985 1819 1387 1250 1026
# Yes 62 180 142 61 51
Now check out prop.table() and find out how to use it. In which country do most people report discrimination (apart from Germany and France, which we messed up ...)?
# Relative frequencies cross-table.
prop.table(table(ESS$dscrgrp, ESS$cntry), margin = 1) # Relative frequencies. With 24.6% it is still Israel, followed by the UK.
#
# Austria Belgium Bulgaria Switzerland Cyprus Czechia Germany Denmark Estonia Spain Finland France
# No 0.052 0.035 0.044 0.032 0.016 0.051 0.048 0.033 0.039 0.033 0.035 0.038
# Yes 0.038 0.042 0.044 0.022 0.015 0.024 0.045 0.023 0.040 0.041 0.046 0.071
#
# United Kingdom Croatia Hungary Ireland Iceland Italy Lithuania Latvia Montenegro Netherlands Norway Poland
# No 0.041 0.036 0.035 0.045 0.016 0.057 0.038 0.018 0.023 0.033 0.029 0.032
# Yes 0.091 0.039 0.013 0.039 0.037 0.028 0.017 0.024 0.044 0.039 0.029 0.015
#
# Portugal Serbia Sweden Slovenia Slovakia
# No 0.022 0.040 0.031 0.028 0.023
# Yes 0.017 0.049 0.038 0.016 0.014
Show the frequency table only for respondents who are older than 45.
# Use the index to select cases
prop.table(table(ESS$cntry[ESS$agea > 45], ESS$dscrgrp))
# Error in table(ESS$cntry[ESS$agea > 45], ESS$dscrgrp): all arguments must have the same length
# You need to make sure to select the same cases for both variables!
prop.table(table(ESS$cntry[ESS$agea > 45], ESS$dscrgrp[ESS$agea > 45]))
#
# No Yes
# Austria 0.04978 0.00222
# Belgium 0.03027 0.00209
# Bulgaria 0.04631 0.00363
# Switzerland 0.02583 0.00101
# Cyprus 0.01594 0.00104
# Czechia 0.04376 0.00185
# Germany 0.04487 0.00212
# Denmark 0.02923 0.00158
# Estonia 0.03474 0.00259
# Spain 0.02882 0.00202
# Finland 0.03273 0.00272
# France 0.03794 0.00400
# United Kingdom 0.03895 0.00693
# Croatia 0.03310 0.00309
# Hungary 0.03195 0.00091
# Ireland 0.04282 0.00225
# Iceland 0.01436 0.00303
# Italy 0.05348 0.00148
# Lithuania 0.04278 0.00178
# Latvia 0.01988 0.00198
# Montenegro 0.01944 0.00309
# Netherlands 0.03030 0.00185
# Norway 0.02368 0.00124
# Poland 0.02472 0.00104
# Portugal 0.02109 0.00087
# Serbia 0.04083 0.00360
# Sweden 0.03081 0.00161
# Slovenia 0.02412 0.00081
# Slovakia 0.02381 0.00121
How did you filter the cases; would there have been another way?
# Use filter() to generate a new tibble that contains only cases who are older than 45.
ESS_46Plus <- filter(ESS, agea > 45)
prop.table(table(ESS_46Plus$cntry, ESS_46Plus$dscrgrp))
#
# No Yes
# Austria 0.04978 0.00222
# Belgium 0.03027 0.00209
# Bulgaria 0.04631 0.00363
# Switzerland 0.02583 0.00101
# Cyprus 0.01594 0.00104
# Czechia 0.04376 0.00185
# Germany 0.04487 0.00212
# Denmark 0.02923 0.00158
# Estonia 0.03474 0.00259
# Spain 0.02882 0.00202
# Finland 0.03273 0.00272
# France 0.03794 0.00400
# United Kingdom 0.03895 0.00693
# Croatia 0.03310 0.00309
# Hungary 0.03195 0.00091
# Ireland 0.04282 0.00225
# Iceland 0.01436 0.00303
# Italy 0.05348 0.00148
# Lithuania 0.04278 0.00178
# Latvia 0.01988 0.00198
# Montenegro 0.01944 0.00309
# Netherlands 0.03030 0.00185
# Norway 0.02368 0.00124
# Poland 0.02472 0.00104
# Portugal 0.02109 0.00087
# Serbia 0.04083 0.00360
# Sweden 0.03081 0.00161
# Slovenia 0.02412 0.00081
# Slovakia 0.02381 0.00121