Web Exercise 3

Import the ESS round 9 data using the API.

# Add packages to library
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(essurvey) # Add ESS API package to library.

# Import the ESS round 9 data via the API
ESS <- import_rounds(rounds = 9, ess_email = "YOUR-EMAIL", format = "spss")

Keep only a person and a country identifier, the post-stratification weights, eduyrs, agea, dscrgrp, and the homophobia variables that you identified. The function that allows you to do that is called .

(ESS <- select(ESS, idno, cntry, pspwght,
               eduyrs, agea, dscrgrp, 
               freehms, hmsfmlsh, hmsacld))
# # A tibble: 49,519 × 9
#     idno cntry        pspwght    eduyrs      agea   dscrgrp                        freehms     hmsfmlsh     hmsacld
#    <dbl> <chr+lbl>      <dbl> <dbl+lbl> <dbl+lbl> <dbl+lbl>                      <dbl+lbl>    <dbl+lbl>   <dbl+lbl>
#  1    27 AT [Austria]   0.218        12        43    2 [No] 1 [Agree strongly]             5 [Disagree… 1 [Agree s…
#  2   137 AT [Austria]   0.413        12        67    2 [No] 3 [Neither agree nor disagree] 3 [Neither … 4 [Disagre…
#  3   194 AT [Austria]   2.27         12        40    2 [No] 3 [Neither agree nor disagree] 4 [Disagree] 3 [Neither…
#  4   208 AT [Austria]   0.386        11        63    2 [No] 1 [Agree strongly]             5 [Disagree… 1 [Agree s…
#  5   220 AT [Austria]   1.03          8        71    2 [No] 2 [Agree]                      2 [Agree]    4 [Disagre…
#  6   254 AT [Austria]   0.576        13        64    2 [No] 3 [Neither agree nor disagree] 4 [Disagree] 3 [Neither…
#  7   290 AT [Austria]   0.721        12        56    2 [No] 2 [Agree]                      4 [Disagree] 1 [Agree s…
#  8   301 AT [Austria]   0.130        11        74    2 [No] 4 [Disagree]                   3 [Neither … 5 [Disagre…
#  9   305 AT [Austria]   1.77         12        37    2 [No] 1 [Agree strongly]             5 [Disagree… 1 [Agree s…
# 10   400 AT [Austria]   0.743        12        22    2 [No] 2 [Agree]                      2 [Agree]    2 [Agree]  
# # … with 49,509 more rows

Change the variables into numeric and factors appropriately. For that, you need the two functions for numeric and for categorical variables.

ESS <- mutate(ESS,
              idno = zap_labels(idno),
              cntry = as_factor(cntry),
              pspwght = zap_labels(pspwght),
              eduyrs = zap_labels(eduyrs),
              agea = zap_labels(agea),
              dscrgrp = as_factor(dscrgrp),
              freehms = zap_labels(freehms),
              hmsfmlsh = zap_labels(hmsfmlsh),
              hmsacld = zap_labels(hmsacld)
)

Z-standardize eduyrs and agea. Remember, \(z_x = \frac{x - \bar{x}}{\text{SD(x)}}\).

# First. check whether values on those variables make sense.
summary(ESS$eduyrs) # 0 or 55 years of education?
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#       0      11      12      13      16      60     708
summary(ESS$agea) # Age seems fine.
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#      15      36      52      51      66      90     222
# Second, recode.
ESS <- mutate(ESS,
              # Recode to max 21 years of edu.
              eduyrs = case_when(
                eduyrs > 21 ~ 21,
                eduyrs < 9 ~ 9,
                TRUE ~ as.numeric(eduyrs)
              ),
              # z-standardize
              z_eduyrs = (eduyrs - mean(eduyrs, na.rm = TRUE)) / sd(eduyrs, na.rm = TRUE),
              z_agea = (agea - mean(agea, na.rm = TRUE)) / sd(agea, na.rm = TRUE)
)

summary(ESS$z_eduyrs)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#      -1      -1       0       0       1       2     708
summary(ESS$z_agea)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#      -2      -1       0       0       1       2     222

Recode dscrgrp to "Yes" for all persons older than 80 in Germany, and to "No" for all respondents with more than average years of education in France. Use the table() function to show the frequency of dscrgrp by country. After the recode, respondents from Germany answered that they belong to a discriminated minority, and from France.

ESS <- mutate(ESS,
              dscrgrp = case_when( # Start complex recode
                # Older than 80 & Germany ~ "Yes",
                agea > 80 & cntry == "DE" ~ "Yes", 
                # Older than mean & France
                eduyrs > mean(eduyrs, na.rm = TRUE) & cntry == "FR" ~ "No",
                # All others ~ as before, need to make it character for type consistency.
                TRUE ~ as.character(dscrgrp) 
              ),
              # Now make it a factor again.
              dscrgrp = factor(dscrgrp) 
)

# ... don't forget to check your recode for bugs!

# Absolute frequencies cross-table.
table(ESS$dscrgrp, ESS$cntry) # Israel, no surprise 20% are Arab in this contested state.
#      
#       Austria Belgium Bulgaria Switzerland Cyprus Czechia Germany Denmark Estonia Spain Finland France
#   No     2344    1605     1980        1443    724    2296    2184    1480    1754  1503    1577   1735
#   Yes     142     155      161          82     56      88     166      84     148   150     170    263
#      
#       United Kingdom Croatia Hungary Ireland Iceland Italy Lithuania Latvia Montenegro Netherlands Norway Poland
#   No            1859    1646    1593    2061     720  2585      1731    817       1030        1520   1294   1431
#   Yes            335     145      48     143     136   104        63     90        163         144    109     56
#      
#       Portugal Serbia Sweden Slovenia Slovakia
#   No       985   1819   1387     1250     1026
#   Yes       62    180    142       61       51

Now check out prop.table() and find out how to use it. In which country do most people report discrimination (apart from Germany and France, which we messed up ...)?

# Relative frequencies cross-table.
prop.table(table(ESS$dscrgrp, ESS$cntry), margin = 1) # Relative frequencies. With 24.6% it is still Israel, followed by the UK.
#      
#       Austria Belgium Bulgaria Switzerland Cyprus Czechia Germany Denmark Estonia Spain Finland France
#   No    0.052   0.035    0.044       0.032  0.016   0.051   0.048   0.033   0.039 0.033   0.035  0.038
#   Yes   0.038   0.042    0.044       0.022  0.015   0.024   0.045   0.023   0.040 0.041   0.046  0.071
#      
#       United Kingdom Croatia Hungary Ireland Iceland Italy Lithuania Latvia Montenegro Netherlands Norway Poland
#   No           0.041   0.036   0.035   0.045   0.016 0.057     0.038  0.018      0.023       0.033  0.029  0.032
#   Yes          0.091   0.039   0.013   0.039   0.037 0.028     0.017  0.024      0.044       0.039  0.029  0.015
#      
#       Portugal Serbia Sweden Slovenia Slovakia
#   No     0.022  0.040  0.031    0.028    0.023
#   Yes    0.017  0.049  0.038    0.016    0.014

Show the frequency table only for respondents who are older than 45.

# Use the index to select cases
prop.table(table(ESS$cntry[ESS$agea > 45], ESS$dscrgrp))
# Error in table(ESS$cntry[ESS$agea > 45], ESS$dscrgrp): all arguments must have the same length
# You need to make sure to select the same cases for both variables!
prop.table(table(ESS$cntry[ESS$agea > 45], ESS$dscrgrp[ESS$agea > 45]))
#                 
#                       No     Yes
#   Austria        0.04978 0.00222
#   Belgium        0.03027 0.00209
#   Bulgaria       0.04631 0.00363
#   Switzerland    0.02583 0.00101
#   Cyprus         0.01594 0.00104
#   Czechia        0.04376 0.00185
#   Germany        0.04487 0.00212
#   Denmark        0.02923 0.00158
#   Estonia        0.03474 0.00259
#   Spain          0.02882 0.00202
#   Finland        0.03273 0.00272
#   France         0.03794 0.00400
#   United Kingdom 0.03895 0.00693
#   Croatia        0.03310 0.00309
#   Hungary        0.03195 0.00091
#   Ireland        0.04282 0.00225
#   Iceland        0.01436 0.00303
#   Italy          0.05348 0.00148
#   Lithuania      0.04278 0.00178
#   Latvia         0.01988 0.00198
#   Montenegro     0.01944 0.00309
#   Netherlands    0.03030 0.00185
#   Norway         0.02368 0.00124
#   Poland         0.02472 0.00104
#   Portugal       0.02109 0.00087
#   Serbia         0.04083 0.00360
#   Sweden         0.03081 0.00161
#   Slovenia       0.02412 0.00081
#   Slovakia       0.02381 0.00121

How did you filter the cases; would there have been another way?

# Use filter() to generate a new tibble that contains only cases who are older than 45.
ESS_46Plus <- filter(ESS, agea > 45)

prop.table(table(ESS_46Plus$cntry, ESS_46Plus$dscrgrp))
#                 
#                       No     Yes
#   Austria        0.04978 0.00222
#   Belgium        0.03027 0.00209
#   Bulgaria       0.04631 0.00363
#   Switzerland    0.02583 0.00101
#   Cyprus         0.01594 0.00104
#   Czechia        0.04376 0.00185
#   Germany        0.04487 0.00212
#   Denmark        0.02923 0.00158
#   Estonia        0.03474 0.00259
#   Spain          0.02882 0.00202
#   Finland        0.03273 0.00272
#   France         0.03794 0.00400
#   United Kingdom 0.03895 0.00693
#   Croatia        0.03310 0.00309
#   Hungary        0.03195 0.00091
#   Ireland        0.04282 0.00225
#   Iceland        0.01436 0.00303
#   Italy          0.05348 0.00148
#   Lithuania      0.04278 0.00178
#   Latvia         0.01988 0.00198
#   Montenegro     0.01944 0.00309
#   Netherlands    0.03030 0.00185
#   Norway         0.02368 0.00124
#   Poland         0.02472 0.00104
#   Portugal       0.02109 0.00087
#   Serbia         0.04083 0.00360
#   Sweden         0.03081 0.00161
#   Slovenia       0.02412 0.00081
#   Slovakia       0.02381 0.00121

Web Exercise 3

Merlin Schaeffer

2021-02-15