05 - Sampling Designs in {srvyr}

Slides

View slides in full screen

Your Turn

Set-up

Load necessary packages

library(dplyr)
library(srvyr)
library(srvyrexploR)

Load in data and preview it

glimpse(chis_2023)
Rows: 21,671
Columns: 98
$ PUF1Y_ID    <chr> "23021436", "23009146", "23005039", "23025815", "23010158"…
$ AH1V2       <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes…
$ AH22        <fct> No, No, No, Yes, No, No, No, No, No, Yes, No, No, No, No, …
$ SMKCUR30    <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, No, No…
$ AB1         <fct> Very good, Excellent, Good, Fair, Good, Excellent, Fair, E…
$ DIABETES    <fct> No, No, No, No, Yes, No, No, No, No, No, No, No, Yes, No, …
$ BMI_P       <dbl> 35.55, 22.96, 25.61, 42.52, 24.71, 19.14, 34.19, 31.89, 24…
$ RBMI        <fct> Obese 30.0+, Normal 18.5-24.99, Overweight 25.0-29.99, Obe…
$ AB17        <fct> No, No, Yes, No, No, No, No, No, No, No, No, No, No, No, N…
$ DSTRS12     <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, No, No…
$ AB29V2      <fct> No, No, Borderline hypertension, Borderline hypertension, …
$ SPK_ENG     <fct> Speak only English, Speak only English, Speak English very…
$ POVLL2_P1V2 <dbl> 1.91, 6.00, 6.00, 3.31, 6.00, 6.00, 3.64, 1.21, 6.00, 1.78…
$ POVLL       <fct> 100-199% FPL, 300% FPL and above, 300% FPL and above, 300%…
$ SRAGE_P1    <ord> 35-39, 30-34, 40-44, 60-64, 60-64, 70-74, 60-64, 80-84, 55…
$ SRSEX       <fct> Female, Male, Female, Male, Male, Female, Male, Female, Fe…
$ OMBSRR_P1   <fct> "White, NH", "White, NH", "White, NH", "Hispanic", "Asian,…
$ RAKEDW0     <dbl> 377.76342, 5440.82230, 1510.73568, 189.35127, 816.45231, 1…
$ RAKEDW1     <dbl> 379.67385, 5423.54632, 1524.34483, 189.04169, 823.97981, 1…
$ RAKEDW2     <dbl> 377.67206, 5501.09481, 1518.55832, 189.20316, 813.52862, 1…
$ RAKEDW3     <dbl> 391.87241, 5422.38467, 1518.87859, 189.67529, 812.89534, 1…
$ RAKEDW4     <dbl> 383.52718, 5498.89734, 1501.34474, 187.64245, 811.32652, 1…
$ RAKEDW5     <dbl> 379.36716, 5426.12142, 1497.12070, 189.18938, 797.03189, 1…
$ RAKEDW6     <dbl> 372.56373, 5456.04102, 1525.81079, 188.55471, 807.12411, 1…
$ RAKEDW7     <dbl> 373.88851, 5518.54856, 1507.19963, 191.93340, 814.08142, 1…
$ RAKEDW8     <dbl> 386.89715, 5481.25082, 1532.46184, 185.18966, 829.15080, 1…
$ RAKEDW9     <dbl> 380.30036, 5393.13753, 1482.86902, 192.35734, 799.39751, 1…
$ RAKEDW10    <dbl> 396.80874, 5520.21994, 1512.59631, 188.96238, 823.09128, 1…
$ RAKEDW11    <dbl> 381.20852, 5560.46991, 1513.11707, 191.01037, 809.26274, 1…
$ RAKEDW12    <dbl> 370.08881, 5471.17062, 1490.55583, 190.17599, 828.57677, 1…
$ RAKEDW13    <dbl> 381.63377, 5539.08920, 1503.27857, 190.02121, 825.39467, 1…
$ RAKEDW14    <dbl> 387.73313, 5531.07340, 1518.70073, 185.98445, 816.56755, 1…
$ RAKEDW15    <dbl> 373.96319, 5461.81096, 1488.12767, 186.04791, 1643.84520, …
$ RAKEDW16    <dbl> 375.63387, 5425.88871, 1539.72594, 185.85065, 821.55347, 1…
$ RAKEDW17    <dbl> 375.90868, 5413.12792, 1532.15331, 190.06499, 817.35716, 1…
$ RAKEDW18    <dbl> 374.06907, 5409.66538, 1533.86418, 188.83553, 812.23683, 1…
$ RAKEDW19    <dbl> 370.70155, 5335.39076, 3030.67675, 190.35148, 816.76742, 1…
$ RAKEDW20    <dbl> 376.16361, 5542.36138, 1520.98004, 190.40463, 795.17561, 1…
$ RAKEDW21    <dbl> 376.66374, 5438.99627, 1529.29484, 188.40847, 836.53256, 1…
$ RAKEDW22    <dbl> 372.88810, 5407.19403, 1512.71368, 194.52411, 815.79660, 1…
$ RAKEDW23    <dbl> 377.08123, 5511.05350, 1501.40871, 187.07666, 822.90284, 1…
$ RAKEDW24    <dbl> 378.14045, 5447.60615, 1532.24763, 191.06993, 819.86697, 1…
$ RAKEDW25    <dbl> 376.63905, 5469.76063, 1499.22198, 190.37025, 826.91579, 1…
$ RAKEDW26    <dbl> 381.21627, 5424.59692, 1503.23199, 184.82446, 821.61634, 1…
$ RAKEDW27    <dbl> 736.12313, 5392.15220, 1508.96013, 189.40821, 812.01521, 1…
$ RAKEDW28    <dbl> 374.87207, 5399.98490, 1502.29676, 189.41890, 825.64499, 1…
$ RAKEDW29    <dbl> 370.48273, 5609.82334, 1504.46387, 193.33981, 822.45949, 1…
$ RAKEDW30    <dbl> 374.08421, 5478.56452, 1529.79756, 190.29283, 823.20287, 1…
$ RAKEDW31    <dbl> 375.93944, 5531.46321, 1517.66913, 187.70034, 831.32689, 1…
$ RAKEDW32    <dbl> 376.77720, 5454.02908, 1523.81288, 190.89860, 818.75318, 1…
$ RAKEDW33    <dbl> 373.90451, 5359.04687, 1496.22835, 187.71859, 808.17450, 1…
$ RAKEDW34    <dbl> 384.70325, 5372.82318, 1513.76344, 0.00000, 804.31669, 108…
$ RAKEDW35    <dbl> 385.93101, 5540.75364, 1515.22343, 189.41755, 841.06640, 1…
$ RAKEDW36    <dbl> 380.05346, 5367.44428, 1540.28237, 189.23365, 807.56729, 1…
$ RAKEDW37    <dbl> 383.44112, 5499.94872, 1514.47617, 185.00184, 816.35734, 1…
$ RAKEDW38    <dbl> 380.96955, 5365.76141, 1533.75380, 190.40014, 803.55967, 1…
$ RAKEDW39    <dbl> 378.53806, 5408.72203, 1509.70452, 190.37006, 820.94697, 1…
$ RAKEDW40    <dbl> 373.32674, 5618.84749, 1512.48396, 185.96514, 835.63063, 1…
$ RAKEDW41    <dbl> 375.60203, 5442.86619, 1507.47738, 188.91416, 797.63435, 1…
$ RAKEDW42    <dbl> 388.49579, 5468.25392, 1514.51889, 192.55026, 831.84420, 1…
$ RAKEDW43    <dbl> 379.04804, 5657.00804, 1504.12435, 187.34036, 821.21227, 1…
$ RAKEDW44    <dbl> 372.79291, 5412.42624, 1510.57340, 189.99224, 811.94509, 1…
$ RAKEDW45    <dbl> 387.31702, 5380.19192, 1524.80976, 189.34965, 796.24619, 1…
$ RAKEDW46    <dbl> 378.68040, 5455.72685, 1499.24639, 190.23263, 804.89197, 0…
$ RAKEDW47    <dbl> 379.80074, 5479.65824, 1491.39890, 189.03908, 816.88308, 1…
$ RAKEDW48    <dbl> 377.62516, 5370.25650, 1513.32532, 190.51240, 817.18117, 1…
$ RAKEDW49    <dbl> 370.27108, 5374.99529, 1534.30372, 195.08625, 819.42334, 1…
$ RAKEDW50    <dbl> 381.09146, 5623.57965, 1529.57536, 191.14493, 799.42589, 1…
$ RAKEDW51    <dbl> 372.93695, 5609.68675, 1518.73015, 185.10356, 810.56250, 1…
$ RAKEDW52    <dbl> 376.83386, 5416.01179, 1502.63013, 190.52319, 825.65017, 1…
$ RAKEDW53    <dbl> 389.22721, 5464.54688, 1510.69820, 191.24272, 856.36362, 1…
$ RAKEDW54    <dbl> 375.42718, 5516.58306, 1487.37912, 190.05101, 801.45826, 1…
$ RAKEDW55    <dbl> 384.84576, 5492.52884, 1505.72717, 187.44860, 811.25627, 1…
$ RAKEDW56    <dbl> 383.56347, 5647.00544, 1511.08240, 191.69014, 831.28855, 1…
$ RAKEDW57    <dbl> 380.48633, 5606.80831, 1511.94020, 192.65707, 794.17496, 1…
$ RAKEDW58    <dbl> 373.62795, 5757.47977, 1512.46551, 187.94741, 815.14317, 1…
$ RAKEDW59    <dbl> 378.91924, 5477.69162, 1507.52377, 197.38367, 817.99470, 1…
$ RAKEDW60    <dbl> 379.50452, 5477.71051, 1544.63224, 183.65976, 816.89856, 1…
$ RAKEDW61    <dbl> 376.64977, 5452.65149, 1514.34498, 188.45472, 814.88040, 1…
$ RAKEDW62    <dbl> 383.56295, 5491.55744, 1501.05195, 190.40998, 800.40138, 1…
$ RAKEDW63    <dbl> 373.75339, 5419.57694, 1516.86197, 190.99482, 781.46844, 1…
$ RAKEDW64    <dbl> 377.09404, 5438.36416, 1476.57247, 190.49051, 821.19084, 1…
$ RAKEDW65    <dbl> 382.93142, 5410.64353, 1528.48797, 187.00515, 825.62659, 1…
$ RAKEDW66    <dbl> 371.80213, 5483.77954, 1508.08229, 194.32258, 824.90959, 1…
$ RAKEDW67    <dbl> 376.80663, 5494.01669, 1506.71439, 192.39086, 814.74078, 1…
$ RAKEDW68    <dbl> 376.07679, 5382.78726, 1502.21384, 186.68741, 802.94914, 1…
$ RAKEDW69    <dbl> 378.27932, 5393.35827, 1514.61013, 190.74435, 810.13806, 1…
$ RAKEDW70    <dbl> 369.99802, 5501.78894, 1521.28708, 186.60201, 830.13613, 1…
$ RAKEDW71    <dbl> 374.53516, 5499.46675, 1512.58341, 191.23757, 805.99356, 1…
$ RAKEDW72    <dbl> 387.86304, 5404.68519, 1492.87434, 190.87869, 815.55435, 1…
$ RAKEDW73    <dbl> 374.74585, 5491.12022, 1498.81821, 187.83433, 819.17877, 1…
$ RAKEDW74    <dbl> 379.83108, 5413.06749, 1508.31555, 189.78261, 815.79475, 1…
$ RAKEDW75    <dbl> 382.49719, 5487.47759, 1551.93120, 186.79749, 823.61018, 1…
$ RAKEDW76    <dbl> 380.05337, 5418.50610, 1520.87989, 184.52873, 814.85938, 1…
$ RAKEDW77    <dbl> 379.98450, 5434.79433, 1480.95731, 190.16368, 812.64372, 1…
$ RAKEDW78    <dbl> 382.62349, 5523.48197, 1491.87493, 186.64661, 814.11781, 1…
$ RAKEDW79    <dbl> 372.87396, 5462.95628, 1516.71710, 189.64531, 813.59217, 1…
$ RAKEDW80    <dbl> 382.94937, 5420.72648, 1523.64843, 185.78273, 811.93065, 1…
glimpse(nsduh_2023)
Rows: 56,705
Columns: 22
$ QUESTID2    <dbl> 10000053, 10000679, 10001208, 10001260, 10001588, 10004996…
$ ANALWT2_C   <dbl> 3276.46987, 15630.08295, 4018.17239, 10711.70954, 8195.104…
$ VESTR_C     <dbl> 40031, 40021, 40043, 40030, 40023, 40048, 40003, 40038, 40…
$ VEREP       <dbl> 2, 2, 1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1…
$ NICVAPMON   <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ TOBMON      <int> 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
$ ALCMON      <int> 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0…
$ ILLMON      <int> 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1…
$ ILTOBVAPALC <int> 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1…
$ BNGDRKMON   <int> 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ IRPYUD5ALC  <int> 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ UD5ILLANY   <int> 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
$ UD5ILALANY  <int> 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
$ YMDELT      <fct> NA, NA, NA, No, NA, NA, NA, NA, NA, Yes, NA, NA, Yes, No, …
$ YMDEYR      <fct> NA, NA, NA, No, NA, NA, NA, NA, NA, No, NA, NA, Yes, No, N…
$ MDEIMPY     <fct> NA, NA, NA, No, NA, NA, NA, NA, NA, No, NA, NA, Yes, No, N…
$ AMIPY       <int> 0, 1, 1, NA, 0, 0, 0, 0, 0, NA, 0, 0, NA, NA, 1, 0, 1, 0, …
$ SMIPY       <int> 0, 0, 0, NA, 0, 0, 0, 0, 0, NA, 0, 0, NA, NA, 0, 0, 1, 0, …
$ AGE3        <fct> 50-64, 35-49, 35-49, 12-13, 50-64, 18-20, 30-34, 65+, 30-3…
$ NEWRACE2    <fct> "Other", "White, NH", "Native HI/PI, NH", "Other", "More t…
$ IRSEX       <fct> Male, Male, Female, Male, Male, Male, Male, Male, Female, …
$ POVERTY3    <fct> 201%+ FPL, 201%+ FPL, 0-100% FPL, 0-100% FPL, 201%+ FPL, 2…

Exercises

In these exercises, you will be given a study and the data. How would you create the survey object with design variables or replicate weights, as applicable?

  1. California Health Interview Survey (CHIS) - 2023

# Start with chis_2023
  1. National Survey on Drug Use and Health (NSDUH) - 2023

# Start with nsduh_2023
  1. Advanced bonus exercise

    • Find a public use file of your own
    • Download the data
    • Read in the data
    • Create the survey object

Solutions

See the solutions

In these exercises, you will be given a study and the data. How would you create the survey object with design variables or replicate weights, as applicable?

  1. California Health Interview Survey (CHIS) - 2023

Show code
chis_des <-
  chis_2023 %>%
  as_survey_rep(weights = RAKEDW0, repweights = RAKEDW1:RAKEDW80, type = "other", scale = 1, rscales = 1, mse = TRUE)

# or
chis_des2 <-
  chis_2023 %>%
  as_survey_rep(weights = RAKEDW0, repweights = RAKEDW1:RAKEDW80, type = "JKn", scale = 1, rscales = 1, mse = TRUE)

chis_des
Call: Called via srvyr
with 80 replicates and MSE variances.
Sampling variables:
  - repweights: `RAKEDW1 + RAKEDW2 + RAKEDW3 + RAKEDW4 + RAKEDW5 + RAKEDW6 +
    RAKEDW7 + RAKEDW8 + RAKEDW9 + RAKEDW10 + RAKEDW11 + RAKEDW12 + RAKEDW13 +
    RAKEDW14 + RAKEDW15 + RAKEDW16 + RAKEDW17 + RAKEDW18 + RAKEDW19 + RAKEDW20
    + RAKEDW21 + RAKEDW22 + RAKEDW23 + RAKEDW24 + RAKEDW25 + RAKEDW26 +
    RAKEDW27 + RAKEDW28 + RAKEDW29 + RAKEDW30 + RAKEDW31 + RAKEDW32 + RAKEDW33
    + RAKEDW34 + RAKEDW35 + RAKEDW36 + RAKEDW37 + RAKEDW38 + RAKEDW39 +
    RAKEDW40 + RAKEDW41 + RAKEDW42 + RAKEDW43 + RAKEDW44 + RAKEDW45 + RAKEDW46
    + RAKEDW47 + RAKEDW48 + RAKEDW49 + RAKEDW50 + RAKEDW51 + RAKEDW52 +
    RAKEDW53 + RAKEDW54 + RAKEDW55 + RAKEDW56 + RAKEDW57 + RAKEDW58 + RAKEDW59
    + RAKEDW60 + RAKEDW61 + RAKEDW62 + RAKEDW63 + RAKEDW64 + RAKEDW65 +
    RAKEDW66 + RAKEDW67 + RAKEDW68 + RAKEDW69 + RAKEDW70 + RAKEDW71 + RAKEDW72
    + RAKEDW73 + RAKEDW74 + RAKEDW75 + RAKEDW76 + RAKEDW77 + RAKEDW78 +
    RAKEDW79 + RAKEDW80` 
  - weights: RAKEDW0 
Data variables: 
  - PUF1Y_ID (chr), AH1V2 (fct), AH22 (fct), SMKCUR30 (fct), AB1 (fct),
    DIABETES (fct), BMI_P (dbl), RBMI (fct), AB17 (fct), DSTRS12 (fct), AB29V2
    (fct), SPK_ENG (fct), POVLL2_P1V2 (dbl), POVLL (fct), SRAGE_P1 (ord), SRSEX
    (fct), OMBSRR_P1 (fct), RAKEDW0 (dbl), RAKEDW1 (dbl), RAKEDW2 (dbl),
    RAKEDW3 (dbl), RAKEDW4 (dbl), RAKEDW5 (dbl), RAKEDW6 (dbl), RAKEDW7 (dbl),
    RAKEDW8 (dbl), RAKEDW9 (dbl), RAKEDW10 (dbl), RAKEDW11 (dbl), RAKEDW12
    (dbl), RAKEDW13 (dbl), RAKEDW14 (dbl), RAKEDW15 (dbl), RAKEDW16 (dbl),
    RAKEDW17 (dbl), RAKEDW18 (dbl), RAKEDW19 (dbl), RAKEDW20 (dbl), RAKEDW21
    (dbl), RAKEDW22 (dbl), RAKEDW23 (dbl), RAKEDW24 (dbl), RAKEDW25 (dbl),
    RAKEDW26 (dbl), RAKEDW27 (dbl), RAKEDW28 (dbl), RAKEDW29 (dbl), RAKEDW30
    (dbl), RAKEDW31 (dbl), RAKEDW32 (dbl), RAKEDW33 (dbl), RAKEDW34 (dbl),
    RAKEDW35 (dbl), RAKEDW36 (dbl), RAKEDW37 (dbl), RAKEDW38 (dbl), RAKEDW39
    (dbl), RAKEDW40 (dbl), RAKEDW41 (dbl), RAKEDW42 (dbl), RAKEDW43 (dbl),
    RAKEDW44 (dbl), RAKEDW45 (dbl), RAKEDW46 (dbl), RAKEDW47 (dbl), RAKEDW48
    (dbl), RAKEDW49 (dbl), RAKEDW50 (dbl), RAKEDW51 (dbl), RAKEDW52 (dbl),
    RAKEDW53 (dbl), RAKEDW54 (dbl), RAKEDW55 (dbl), RAKEDW56 (dbl), RAKEDW57
    (dbl), RAKEDW58 (dbl), RAKEDW59 (dbl), RAKEDW60 (dbl), RAKEDW61 (dbl),
    RAKEDW62 (dbl), RAKEDW63 (dbl), RAKEDW64 (dbl), RAKEDW65 (dbl), RAKEDW66
    (dbl), RAKEDW67 (dbl), RAKEDW68 (dbl), RAKEDW69 (dbl), RAKEDW70 (dbl),
    RAKEDW71 (dbl), RAKEDW72 (dbl), RAKEDW73 (dbl), RAKEDW74 (dbl), RAKEDW75
    (dbl), RAKEDW76 (dbl), RAKEDW77 (dbl), RAKEDW78 (dbl), RAKEDW79 (dbl),
    RAKEDW80 (dbl)
  1. National Survey on Drug Use and Health (NSDUH) - 2023

Show code
nsduh_des <-
  nsduh_2023 %>%
  as_survey_design(weights = ANALWT2_C, strata = VESTR_C, ids = VEREP, nest = TRUE)

nsduh_des
Stratified 1 - level Cluster Sampling design (with replacement)
With (100) clusters.
Called via srvyr
Sampling variables:
  - ids: VEREP 
  - strata: VESTR_C 
  - weights: ANALWT2_C 
Data variables: 
  - QUESTID2 (dbl), ANALWT2_C (dbl), VESTR_C (dbl), VEREP (dbl), NICVAPMON
    (int), TOBMON (int), ALCMON (int), ILLMON (int), ILTOBVAPALC (int),
    BNGDRKMON (int), IRPYUD5ALC (int), UD5ILLANY (int), UD5ILALANY (int),
    YMDELT (fct), YMDEYR (fct), MDEIMPY (fct), AMIPY (int), SMIPY (int), AGE3
    (fct), NEWRACE2 (fct), IRSEX (fct), POVERTY3 (fct)