Unrotated booklet design Rasch model with latent regressors - tmatta/lsasim GitHub Wiki


We examined known- and estimated-group differences under the following conditions: 1 (IRT model) x 2 (IRT R packages) x 3 (sample sizes) x 3 (test lengths) x 1 (test booklet). Latent trait and five corresponding background variables were generated based on the specified marginal distributions and correlation matrix.


  • One IRT model was included: Rasch model
    • Item parameters were randomly generated
    • The bounds of the item difficulty parameter, b, are constrained to b_bounds = (-2, 2) where -2 is the lowest generating value and 2 is the highest generating value
  • Two IRT R packages were evaluated: TAM (version 2.4-9) and mirt (version 1.25). Different estimation methods were used:
    • Warm's weighted likelihood estimates (WLE) with/without regressors using TAM
    • Expected-a-posteriori (EAP) using mirt
  • Three sample sizes were used: 2000, 4000, and 6000
    • Simulated samples were generated based on the specified marginal distributions and correlation matrix
  • Three test lengths were used: 20, 40, and 60
  • A single booklet was used

  • One hundred replications were used for each condition for the calibration

  • Latent trait recovery:
    • We compared the known group-differences and group differences estimated by WLE and EAP. The results showed that WLE reproduced known group differences better than EAP

 

# Load libraries
if(!require(lsasim)){  
  install.packages("lsasim")
  library(lsasim) #version 1.0.1
}

if(!require(mirt)){  
  install.packages("mirt")
  library(mirt) #version 1.25
}

if(!require(TAM)){
  install.packages("TAM")
  library(TAM) #version 2.4-9
}
### -- Background questionnaire generation
# Define marginal probabilities 
var_q0 <- 1
var_q1 <- c(.25, 1)
var_q2 <- c(.33, 1)
var_q3 <- c(.50, 1)
var_q4 <- c(.67, 1)
var_q5 <- c(.88, 1)
cat_pr <-  list(var_q0, var_q1, var_q2, var_q3, var_q4, var_q5)

# Define correlation matrix
cor_mat <- matrix(c( 1.000, -0.237,  0.454, -0.487, -0.170, -0.343,
                     -0.237,  1.000, -0.712, -0.307, -0.263, -0.537,
                     0.454, -0.712,  1.000,  0.140, -0.331,  0.523,
                     -0.487, -0.307,  0.140,  1.000,  0.467,  0.239,
                     -0.170, -0.263, -0.331,  0.467,  1.000, -0.258,
                     -0.343, -0.537,  0.523,  0.239, -0.258,  1.000), nrow=6)
# Set up conditions
N.cond <- c(2000, 4000, 6000) #number of sample sizes
I.cond <- c(20, 40, 60)       #number of items 
K.cond  <- 1                  #number of booklets 

# Set up number of replications
reps <- 100

# Create space for outputs
results <- NULL
#==============================================================================#
# START SIMULATION
#==============================================================================#

for (N in N.cond) { #sample size
  
  for (I in I.cond) { #number of items
    
    for (K in K.cond) { #number of booklets
      
      for (r in 1:reps) { #replication
        
        set.seed(8883*(r+1))
        
        ### -- Background questionnaire generation
        # generate background questionnaire data
        backgroud_q <- questionnaire_gen(n_obs = N, 
                                         cat_prop = cat_pr, 
                                         cor_matrix = cor_mat, theta = TRUE)
        
        ### -- Cognitive assessment generation
        # generate item parameters for a 1PL model
        set.seed(4362) # fix item parameters across replications
        item_pool <- lsasim::item_gen(n_1pl = I, 
                                      thresholds = 1, 
                                      b_bounds = c(-2, 2))
        
        set.seed(8883*(r+1))
        
        # assign items to block
        block_bk1 <- lsasim::block_design(n_blocks = K, 
                                          item_parameters = item_pool)
        
        #assign block to booklet
        book_bk1 <- lsasim::booklet_design(item_block_assignment = 
                                             block_bk1$block_assignment,
                                           book_design = matrix(K))
        #assign booklet to subjects
        book_samp <- lsasim::booklet_sample(n_subj = N, 
                                            book_item_design = book_bk1, 
                                            book_prob = NULL)
        
        # generate item responses 
        cog <- lsasim::response_gen(subject = book_samp$subject, item = book_samp$item, 
                                    theta = backgroud_q$theta, 
                                    b_par = item_pool$b)
        
        # -- Merge questionnaire data  with cognitive assessment data
        final_data <- merge(backgroud_q, cog, by = "subject")
        
        # extract item responses (excluding "subject" column)
        resp <- cog[, c(1:I)]
        
        #------------------------------------------------------------------------------#
        # Model estimation
        #------------------------------------------------------------------------------#
        
        # model 1: fit Rasch model using mirt package
        mirt.mod <- NULL
        mirt.mod <- mirt::mirt(resp, 1, itemtype = 'Rasch', verbose = F)
        
        # model 2: fit Rasch model using TAM package
        tam.mod <- NULL
        tam.mod <- TAM::tam.mml(resp)
        
        # model 3: fit Rasch model with latent regressors using TAM package
        
        # latent regressors Y
        Y <- final_data[, paste0("q", 1:5)] # define regressors
        indx <- sapply(Y, is.factor) # convert factor to numeric
        Y[indx] <- lapply(Y[indx], function(x) as.numeric(as.character(x)))
        Y[Y == 2] <- 0 # recode 2 as 0
        
        tam.mod.2 <- NULL
        tam.mod.2 <- TAM::tam.mml(resp,  Y=Y)
        
        #------------------------------------------------------------------------------#
        # Person parameter extraction
        #------------------------------------------------------------------------------#
        
        # extract thetas
        final_data$mirt_EAP <- c(fscores(mirt.mod, method="EAP")) 
        final_data$tam_WLE <- tam.wle(tam.mod)$theta  
        final_data$tam_REG <- tam.wle(tam.mod.2)$theta  
        
        # summarize background variables (qs), generalized theta, and estimated thetas
        FS <- final_data[,c("subject", "theta", paste0("q", 1:5), 
                            "mirt_EAP", "tam_WLE", "tam_REG")]
        
        # summarize results
        person <- data.frame(matrix(c(N, I, K, r), nrow = 1))
        colnames(person) <- c("N", "I", "K", "rep")
        person <- cbind(person, FS)
        
        # combine results
        results <- rbind(results, person)
        
      }
    }
  }
}

 

Summary:

 

  • The five binary background variables were labeled as q1, q2, q3, q4, and q5. For each variable, we compared the generating theta difference between group 1 and group 2 ( θg1 - θg2 ) to estimated theta difference ( $\hat{\theta_{g1}}$ - $\hat{\theta_{g2}}$ ) by WLE and EAP estimates.

  • theta stands for the generating theta value, mirt_EAP stands for the EAP estimator calibrated by mirt package, tam_WLE stands for the WLE estimator calibrated by TAM package, and tam_REG stands for the WLE estimator generated using a model with regressors by TAM package.


 

  • Group differences by variable 1 (q1)

 

q1 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q1 , 
                data=results, mean, na.rm=TRUE)
q1 <- merge(q1[q1$q1==1,], q1[q1$q1==2,], by=c("N", "I", "K"))

q1$gentheta_diff <- round(q1$theta.x - q1$theta.y, 3)
q1$mirt_EAP_diff <- round(q1$mirt_EAP.x - q1$mirt_EAP.y, 3)
q1$tam_WLE_diff <- round(q1$tam_WLE.x - q1$tam_WLE.y, 3)
q1$tam_REG_diff <- round(q1$tam_REG.x - q1$tam_REG.y, 3)

person.q1 <- q1[, c("N", "I", "K", "q1.x", "q1.y", 
                    "gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
print(person.q1)
##      N  I K q1.x q1.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 1    1    2         0.396         0.308        0.398        0.398
## 2 2000 40 1    1    2         0.396         0.347        0.397        0.397
## 3 2000 60 1    1    2         0.396         0.362        0.398        0.398
## 4 4000 20 1    1    2         0.407         0.315        0.407        0.407
## 5 4000 40 1    1    2         0.407         0.356        0.408        0.408
## 6 4000 60 1    1    2         0.407         0.371        0.407        0.407
## 7 6000 20 1    1    2         0.401         0.312        0.403        0.403
## 8 6000 40 1    1    2         0.401         0.349        0.400        0.400
## 9 6000 60 1    1    2         0.401         0.365        0.401        0.402

 

  • Group differences by variable 2 (q2)

 

q2 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q2 , 
                data=results, mean, na.rm=TRUE)
q2 <- merge(q2[q2$q2==1,], q2[q2$q2==2,], by=c("N", "I", "K"))

q2$gentheta_diff <- round(q2$theta.x - q2$theta.y, 3)
q2$mirt_EAP_diff <- round(q2$mirt_EAP.x - q2$mirt_EAP.y, 3)
q2$tam_WLE_diff <- round(q2$tam_WLE.x - q2$tam_WLE.y, 3)
q2$tam_REG_diff <- round(q2$tam_REG.x - q2$tam_REG.y, 3)

person.q2 <- q2[, c("N", "I", "K", "q2.x", "q2.y", 
                    "gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
print(person.q2)
##      N  I K q2.x q2.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 1    1    2        -0.743        -0.575       -0.742       -0.743
## 2 2000 40 1    1    2        -0.743        -0.651       -0.744       -0.745
## 3 2000 60 1    1    2        -0.743        -0.679       -0.745       -0.746
## 4 4000 20 1    1    2        -0.744        -0.577       -0.743       -0.744
## 5 4000 40 1    1    2        -0.744        -0.652       -0.744       -0.745
## 6 4000 60 1    1    2        -0.744        -0.679       -0.744       -0.745
## 7 6000 20 1    1    2        -0.747        -0.579       -0.745       -0.745
## 8 6000 40 1    1    2        -0.747        -0.653       -0.746       -0.746
## 9 6000 60 1    1    2        -0.747        -0.682       -0.748       -0.748

 

  • Group differences by variable 3 (q3)

 

q3 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q3 , 
                data=results, mean, na.rm=TRUE)
q3 <- merge(q3[q3$q3==1,], q3[q3$q3==2,], by=c("N", "I", "K"))

q3$gentheta_diff <- round(q3$theta.x - q3$theta.y, 3)
q3$mirt_EAP_diff <- round(q3$mirt_EAP.x - q3$mirt_EAP.y, 3)
q3$tam_WLE_diff <- round(q3$tam_WLE.x - q3$tam_WLE.y, 3)
q3$tam_REG_diff <- round(q3$tam_REG.x - q3$tam_REG.y, 3)

person.q3 <- q3[, c("N", "I", "K", "q3.x", "q3.y", 
                    "gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
print(person.q3)
##      N  I K q3.x q3.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 1    1    2         0.775         0.602        0.775        0.776
## 2 2000 40 1    1    2         0.775         0.681        0.777        0.777
## 3 2000 60 1    1    2         0.775         0.709        0.777        0.777
## 4 4000 20 1    1    2         0.776         0.603        0.776        0.777
## 5 4000 40 1    1    2         0.776         0.680        0.776        0.776
## 6 4000 60 1    1    2         0.776         0.708        0.776        0.776
## 7 6000 20 1    1    2         0.778         0.603        0.776        0.776
## 8 6000 40 1    1    2         0.778         0.682        0.778        0.779
## 9 6000 60 1    1    2         0.778         0.710        0.778        0.778

 

  • Group differences by variable 4 (q4)

 

q4 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q4 , 
                data=results, mean, na.rm=TRUE)
q4 <- merge(q4[q4$q4==1,], q4[q4$q4==2,], by=c("N", "I", "K"))

q4$gentheta_diff <- round(q4$theta.x - q4$theta.y, 3)
q4$mirt_EAP_diff <- round(q4$mirt_EAP.x - q4$mirt_EAP.y, 3)
q4$tam_WLE_diff <- round(q4$tam_WLE.x - q4$tam_WLE.y, 3)
q4$tam_REG_diff <- round(q4$tam_REG.x - q4$tam_REG.y, 3)

person.q4 <- q4[, c("N", "I", "K", "q4.x", "q4.y", 
                    "gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
print(person.q4)
##      N  I K q4.x q4.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 1    1    2         0.269         0.206        0.267        0.267
## 2 2000 40 1    1    2         0.269         0.237        0.271        0.271
## 3 2000 60 1    1    2         0.269         0.243        0.267        0.267
## 4 4000 20 1    1    2         0.276         0.212        0.274        0.274
## 5 4000 40 1    1    2         0.276         0.242        0.277        0.277
## 6 4000 60 1    1    2         0.276         0.251        0.276        0.276
## 7 6000 20 1    1    2         0.282         0.217        0.280        0.280
## 8 6000 40 1    1    2         0.282         0.247        0.282        0.282
## 9 6000 60 1    1    2         0.282         0.257        0.283        0.283

 

  • Group differences by variable 5 (q5)

 

q5 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q5 , 
                data=results, mean, na.rm=TRUE)
q5 <- merge(q5[q5$q5==1,], q5[q5$q5==2,], by=c("N", "I", "K"))

q5$gentheta_diff <- round(q5$theta.x - q5$theta.y, 3)
q5$mirt_EAP_diff <- round(q5$mirt_EAP.x - q5$mirt_EAP.y, 3)
q5$tam_WLE_diff <- round(q5$tam_WLE.x - q5$tam_WLE.y, 3)
q5$tam_REG_diff <- round(q5$tam_REG.x - q5$tam_REG.y, 3)

person.q5 <- q5[, c("N", "I", "K", "q5.x", "q5.y", 
                    "gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
print(person.q5)
##      N  I K q5.x q5.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 1    1    2         0.661         0.508        0.657        0.658
## 2 2000 40 1    1    2         0.661         0.575        0.658        0.659
## 3 2000 60 1    1    2         0.661         0.603        0.663        0.664
## 4 4000 20 1    1    2         0.651         0.500        0.647        0.647
## 5 4000 40 1    1    2         0.651         0.566        0.649        0.649
## 6 4000 60 1    1    2         0.651         0.594        0.653        0.653
## 7 6000 20 1    1    2         0.651         0.503        0.650        0.650
## 8 6000 40 1    1    2         0.651         0.569        0.652        0.652
## 9 6000 60 1    1    2         0.651         0.591        0.650        0.650
⚠️ **GitHub.com Fallback** ⚠️