Rotated booklet design Rasch model with latent regressors - tmatta/lsasim GitHub Wiki
We examined latent trait recovery under the following conditions: 1 (IRT model) x 2 (IRT R packages) x 3 (sample sizes) x 3 (test lengths) x 3 (test booklets). Latent trait and five corresponding background variables were generated based on the specified marginal distributions and correlation matrix.
- One IRT model was included: Rasch model
- Item parameters were randomly generated
- The bounds of the item difficulty parameter, b, are constrained to
b_bounds = (-2, 2)
where -2 is the lowest generating value and 2 is the highest generating value
- Two IRT R packages were evaluated:
TAM
(version 2.4-9) andmirt
(version 1.25). These employed different estimation methods. These are:- Warm's weighted likelihood estimates (WLE) with/without regressors using
TAM
- Expected-a-posteriori (EAP) using
mirt
- Warm's weighted likelihood estimates (WLE) with/without regressors using
- Three sample sizes were used: 2000, 4000, and 6000
- Simulated samples were generated based on the specified marginal distributions and correlation matrix
- Three test lengths were used: 20, 40, and 60
- Three test booklets were used: 5, 10, and 15 booklets
- One hundred replications were used for each condition for the calibration
- Latent trait recovery:
- We compared the known group-differences and group differences estimated by WLE and EAP. The results showed that WLE reproduced known group differences better than EAP
# Load libraries
if(!require(lsasim)){
install.packages("lsasim")
library(lsasim) #version 1.0.1
}
if(!require(mirt)){
install.packages("mirt")
library(mirt) #version 1.25
}
if(!require(TAM)){
install.packages("TAM")
library(TAM) #version 2.4-9
}
### -- Background questionnaire generation
# Define marginal probabilities
var_q0 <- 1
var_q1 <- c(.15, 1)
var_q2 <- c(.30, 1)
var_q3 <- c(.45, 1)
var_q4 <- c(.60, 1)
var_q5 <- c(.88, 1)
cat_pr <- list(var_q0, var_q1, var_q2, var_q3, var_q4, var_q5)
# Define correlation matrix
cor_mat <- matrix(c( 1.000, -0.012, -0.343, -0.211, 0.489, -0.632,
-0.012, 1.000, 0.359, -0.229, -0.503, 0.461,
-0.343, 0.359, 1.000, -0.386, -0.803, 0.189,
-0.211, -0.229, -0.386, 1.000, -0.029, 0.071,
0.489, -0.503, -0.803, -0.029, 1.000, -0.504,
-0.632, 0.461, 0.189, 0.071, -0.504, 1.000), nrow=6)
# Set up conditions
N.cond <- c(2000, 4000, 6000) #number of sample sizes
I.cond <- c(20, 40, 60) #number of items
K.cond <- c(5, 10, 15) #number of booklets
# Set up number of replications
reps <- 100
# Create space for outputs
results <- NULL
#==============================================================================#
# START SIMULATION
#==============================================================================#
for (N in N.cond) { #sample size
for (I in I.cond) { #number of items
for (K in K.cond) { #number of booklets
for (r in 1:reps) { #replication
set.seed(8088*(r+2))
### -- Background questionnaire generation
# generate background questionnaire data
backgroud_q <- questionnaire_gen(n_obs = N,
cat_prop = cat_pr,
cor_matrix = cor_mat, theta = TRUE)
### -- Cognitive assessment generation
# generate item parameters for a 1PL model
set.seed(4362) # fix item parameters across replications
item_pool <- lsasim::item_gen(n_1pl = I,
thresholds = 1,
b_bounds = c(-2, 2))
set.seed(8883*(r+2))
# assign items to blocks
blocks <- lsasim::block_design(n_blocks = K,
item_parameters = item_pool,
item_block_matrix = NULL)
#assign blocks to booklets
books <- lsasim::booklet_design(item_block_assignment = blocks$block_assignment,
book_design = NULL)
#assign booklets to subjects
book_samp <- lsasim::booklet_sample(n_subj = N,
book_item_design = books,
book_prob = NULL)
# generate item responses
cog <- lsasim::response_gen(subject = book_samp$subject,
item = book_samp$item,
theta = backgroud_q$theta,
b_par = item_pool$b)
# -- Merge questionnaire data with cognitive assessment data
final_data <- merge(backgroud_q, cog, by = "subject")
# extract item responses (excluding "subject" column)
resp <- cog[, c(1:I)]
#------------------------------------------------------------------------------#
# Model estimation
#------------------------------------------------------------------------------#
# model 1: fit Rasch model using mirt package
mirt.mod <- NULL
mirt.mod <- mirt::mirt(resp, 1, itemtype = 'Rasch', verbose = F)
# model 2: fit Rasch model using TAM package
tam.mod <- NULL
tam.mod <- TAM::tam.mml(resp)
# model 3: fit Rasch model with latent regressors using TAM package
# latent regressors Y
Y <- final_data[, paste0("q", 1:5)] # define regressors
indx <- sapply(Y, is.factor) # convert factor to numeric
Y[indx] <- lapply(Y[indx], function(x) as.numeric(as.character(x)))
Y[Y == 2] <- 0 # recode 2 as 0
tam.mod.2 <- NULL
tam.mod.2 <- TAM::tam.mml(resp, Y=Y)
#------------------------------------------------------------------------------#
# Person parameter extraction
#------------------------------------------------------------------------------#
# extract thetas
final_data$mirt_EAP <- c(fscores(mirt.mod, method="EAP"))
final_data$tam_WLE <- tam.wle(tam.mod)$theta
final_data$tam_REG <- tam.wle(tam.mod.2)$theta
# summarize background variables (qs), generalized theta, and estimated thetas
FS <- final_data[,c("subject", "theta", paste0("q", 1:5),
"mirt_EAP", "tam_WLE", "tam_REG")]
# summarize results
person <- data.frame(matrix(c(N, I, K, r), nrow = 1))
colnames(person) <- c("N", "I", "K", "rep")
person <- cbind(person, FS)
# combine results
results <- rbind(results, person)
}
}
}
}
Summary:
-
The five binary background variables were labeled as q1, q2, q3, q4, and q5. For each variable, we compared the generating theta difference between group 1 and group 2 ( θg1 - θg2 ) to estimated theta difference (
$\hat{\theta_{g1}}$ -$\hat{\theta_{g2}}$ ) by WLE and EAP estimates. -
theta
stands for generating theta,mirt_EAP
stands for EAP calibrated bymirt
,tam_WLE
stands for WLE calibrated byTAM
, andtam_REG
stands for WLE generated using a model with regressors byTAM
.
- Group differences by variable 1 (q1)
q1 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q1 ,
data=results, mean, na.rm=TRUE)
q1 <- merge(q1[q1$q1==1,], q1[q1$q1==2,], by=c("N", "I", "K"))
q1$gentheta_diff <- round(q1$theta.x - q1$theta.y, 3)
q1$mirt_EAP_diff <- round(q1$mirt_EAP.x - q1$mirt_EAP.y, 3)
q1$tam_WLE_diff <- round(q1$tam_WLE.x - q1$tam_WLE.y, 3)
q1$tam_REG_diff <- round(q1$tam_REG.x - q1$tam_REG.y, 3)
person.q1 <- q1[, c("N", "I", "K", "q1.x", "q1.y",
"gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
person.q1 <- person.q1[order(person.q1$N, person.q1$I, person.q1$K),]
row.names(person.q1) <- NULL; print(person.q1)
## N I K q1.x q1.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 5 1 2 0.015 0.005 0.008 0.009
## 2 2000 20 10 1 2 0.015 0.005 0.010 0.012
## 3 2000 20 15 1 2 0.015 -0.001 -0.003 -0.003
## 4 2000 40 5 1 2 0.015 0.008 0.011 0.012
## 5 2000 40 10 1 2 0.015 0.005 0.008 0.009
## 6 2000 40 15 1 2 0.015 0.004 0.007 0.008
## 7 2000 60 5 1 2 0.015 0.016 0.019 0.019
## 8 2000 60 10 1 2 0.015 0.007 0.010 0.011
## 9 2000 60 15 1 2 0.015 0.005 0.009 0.010
## 10 4000 20 5 1 2 0.029 0.019 0.034 0.034
## 11 4000 20 10 1 2 0.029 0.008 0.017 0.016
## 12 4000 20 15 1 2 0.029 0.006 0.014 0.014
## 13 4000 40 5 1 2 0.029 0.021 0.029 0.029
## 14 4000 40 10 1 2 0.029 0.017 0.028 0.029
## 15 4000 40 15 1 2 0.029 0.016 0.031 0.031
## 16 4000 60 5 1 2 0.029 0.020 0.025 0.026
## 17 4000 60 10 1 2 0.029 0.020 0.030 0.030
## 18 4000 60 15 1 2 0.029 0.018 0.030 0.030
## 19 6000 20 5 1 2 0.022 0.011 0.019 0.019
## 20 6000 20 10 1 2 0.022 0.009 0.020 0.020
## 21 6000 20 15 1 2 0.022 0.008 0.020 0.020
## 22 6000 40 5 1 2 0.022 0.016 0.021 0.022
## 23 6000 40 10 1 2 0.022 0.015 0.025 0.025
## 24 6000 40 15 1 2 0.022 0.009 0.018 0.018
## 25 6000 60 5 1 2 0.022 0.019 0.023 0.023
## 26 6000 60 10 1 2 0.022 0.019 0.028 0.028
## 27 6000 60 15 1 2 0.022 0.014 0.024 0.024
- Group differences by variable 2 (q2)
q2 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q2 ,
data=results, mean, na.rm=TRUE)
q2 <- merge(q2[q2$q2==1,], q2[q2$q2==2,], by=c("N", "I", "K"))
q2$gentheta_diff <- round(q2$theta.x - q2$theta.y, 3)
q2$mirt_EAP_diff <- round(q2$mirt_EAP.x - q2$mirt_EAP.y, 3)
q2$tam_WLE_diff <- round(q2$tam_WLE.x - q2$tam_WLE.y, 3)
q2$tam_REG_diff <- round(q2$tam_REG.x - q2$tam_REG.y, 3)
person.q2 <- q2[, c("N", "I", "K", "q2.x", "q2.y",
"gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
person.q2 <- person.q2[order(person.q2$N, person.q2$I, person.q2$K),]
row.names(person.q2) <- NULL; print(person.q2)
## N I K q2.x q2.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 5 1 2 0.569 0.331 0.554 0.555
## 2 2000 20 10 1 2 0.569 0.235 0.500 0.502
## 3 2000 20 15 1 2 0.569 0.170 0.409 0.411
## 4 2000 40 5 1 2 0.569 0.420 0.568 0.569
## 5 2000 40 10 1 2 0.569 0.335 0.553 0.554
## 6 2000 40 15 1 2 0.569 0.277 0.533 0.535
## 7 2000 60 5 1 2 0.569 0.459 0.569 0.570
## 8 2000 60 10 1 2 0.569 0.385 0.566 0.567
## 9 2000 60 15 1 2 0.569 0.325 0.547 0.549
## 10 4000 20 5 1 2 0.569 0.327 0.552 0.552
## 11 4000 20 10 1 2 0.569 0.227 0.487 0.488
## 12 4000 20 15 1 2 0.569 0.174 0.417 0.418
## 13 4000 40 5 1 2 0.569 0.419 0.567 0.567
## 14 4000 40 10 1 2 0.569 0.330 0.549 0.550
## 15 4000 40 15 1 2 0.569 0.274 0.530 0.531
## 16 4000 60 5 1 2 0.569 0.457 0.568 0.568
## 17 4000 60 10 1 2 0.569 0.384 0.567 0.567
## 18 4000 60 15 1 2 0.569 0.330 0.555 0.556
## 19 6000 20 5 1 2 0.565 0.325 0.547 0.548
## 20 6000 20 10 1 2 0.565 0.230 0.492 0.492
## 21 6000 20 15 1 2 0.565 0.171 0.414 0.414
## 22 6000 40 5 1 2 0.565 0.417 0.564 0.564
## 23 6000 40 10 1 2 0.565 0.329 0.548 0.548
## 24 6000 40 15 1 2 0.565 0.274 0.529 0.530
## 25 6000 60 5 1 2 0.565 0.455 0.565 0.565
## 26 6000 60 10 1 2 0.565 0.381 0.561 0.561
## 27 6000 60 15 1 2 0.565 0.324 0.545 0.546
- Group differences by variable 3 (q3)
q3 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q3 ,
data=results, mean, na.rm=TRUE)
q3 <- merge(q3[q3$q3==1,], q3[q3$q3==2,], by=c("N", "I", "K"))
q3$gentheta_diff <- round(q3$theta.x - q3$theta.y, 3)
q3$mirt_EAP_diff <- round(q3$mirt_EAP.x - q3$mirt_EAP.y, 3)
q3$tam_WLE_diff <- round(q3$tam_WLE.x - q3$tam_WLE.y, 3)
q3$tam_REG_diff <- round(q3$tam_REG.x - q3$tam_REG.y, 3)
person.q3 <- q3[, c("N", "I", "K", "q3.x", "q3.y",
"gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
person.q3 <- person.q3[order(person.q3$N, person.q3$I, person.q3$K),]
row.names(person.q3) <- NULL; print(person.q3)
## N I K q3.x q3.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 5 1 2 0.344 0.202 0.339 0.339
## 2 2000 20 10 1 2 0.344 0.146 0.311 0.311
## 3 2000 20 15 1 2 0.344 0.104 0.253 0.253
## 4 2000 40 5 1 2 0.344 0.253 0.342 0.343
## 5 2000 40 10 1 2 0.344 0.201 0.333 0.333
## 6 2000 40 15 1 2 0.344 0.166 0.320 0.321
## 7 2000 60 5 1 2 0.344 0.283 0.351 0.351
## 8 2000 60 10 1 2 0.344 0.232 0.343 0.343
## 9 2000 60 15 1 2 0.344 0.201 0.338 0.339
## 10 4000 20 5 1 2 0.337 0.193 0.324 0.325
## 11 4000 20 10 1 2 0.337 0.137 0.295 0.296
## 12 4000 20 15 1 2 0.337 0.105 0.251 0.252
## 13 4000 40 5 1 2 0.337 0.247 0.334 0.335
## 14 4000 40 10 1 2 0.337 0.193 0.320 0.321
## 15 4000 40 15 1 2 0.337 0.161 0.311 0.312
## 16 4000 60 5 1 2 0.337 0.271 0.337 0.337
## 17 4000 60 10 1 2 0.337 0.225 0.332 0.332
## 18 4000 60 15 1 2 0.337 0.195 0.327 0.328
## 19 6000 20 5 1 2 0.337 0.194 0.327 0.327
## 20 6000 20 10 1 2 0.337 0.136 0.293 0.293
## 21 6000 20 15 1 2 0.337 0.102 0.246 0.247
## 22 6000 40 5 1 2 0.337 0.248 0.336 0.337
## 23 6000 40 10 1 2 0.337 0.195 0.324 0.325
## 24 6000 40 15 1 2 0.337 0.162 0.313 0.313
## 25 6000 60 5 1 2 0.337 0.270 0.335 0.336
## 26 6000 60 10 1 2 0.337 0.225 0.332 0.332
## 27 6000 60 15 1 2 0.337 0.195 0.328 0.328
- Group differences by variable 4 (q4)
q4 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q4 ,
data=results, mean, na.rm=TRUE)
q4 <- merge(q4[q4$q4==1,], q4[q4$q4==2,], by=c("N", "I", "K"))
q4$gentheta_diff <- round(q4$theta.x - q4$theta.y, 3)
q4$mirt_EAP_diff <- round(q4$mirt_EAP.x - q4$mirt_EAP.y, 3)
q4$tam_WLE_diff <- round(q4$tam_WLE.x - q4$tam_WLE.y, 3)
q4$tam_REG_diff <- round(q4$tam_REG.x - q4$tam_REG.y, 3)
person.q4 <- q4[, c("N", "I", "K", "q4.x", "q4.y",
"gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
person.q4 <- person.q4[order(person.q4$N, person.q4$I, person.q4$K),]
row.names(person.q4) <- NULL; print(person.q4)
## N I K q4.x q4.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 5 1 2 -0.794 -0.467 -0.781 -0.782
## 2 2000 20 10 1 2 -0.794 -0.330 -0.703 -0.705
## 3 2000 20 15 1 2 -0.794 -0.242 -0.585 -0.586
## 4 2000 40 5 1 2 -0.794 -0.589 -0.794 -0.795
## 5 2000 40 10 1 2 -0.794 -0.466 -0.770 -0.771
## 6 2000 40 15 1 2 -0.794 -0.384 -0.737 -0.740
## 7 2000 60 5 1 2 -0.794 -0.643 -0.796 -0.797
## 8 2000 60 10 1 2 -0.794 -0.539 -0.790 -0.792
## 9 2000 60 15 1 2 -0.794 -0.456 -0.767 -0.769
## 10 4000 20 5 1 2 -0.786 -0.453 -0.763 -0.763
## 11 4000 20 10 1 2 -0.786 -0.318 -0.685 -0.687
## 12 4000 20 15 1 2 -0.786 -0.240 -0.573 -0.574
## 13 4000 40 5 1 2 -0.786 -0.581 -0.784 -0.785
## 14 4000 40 10 1 2 -0.786 -0.454 -0.753 -0.754
## 15 4000 40 15 1 2 -0.786 -0.381 -0.736 -0.738
## 16 4000 60 5 1 2 -0.786 -0.634 -0.786 -0.786
## 17 4000 60 10 1 2 -0.786 -0.531 -0.780 -0.781
## 18 4000 60 15 1 2 -0.786 -0.459 -0.770 -0.771
## 19 6000 20 5 1 2 -0.785 -0.454 -0.764 -0.765
## 20 6000 20 10 1 2 -0.785 -0.321 -0.689 -0.689
## 21 6000 20 15 1 2 -0.785 -0.240 -0.580 -0.581
## 22 6000 40 5 1 2 -0.785 -0.580 -0.783 -0.783
## 23 6000 40 10 1 2 -0.785 -0.459 -0.761 -0.761
## 24 6000 40 15 1 2 -0.785 -0.381 -0.735 -0.736
## 25 6000 60 5 1 2 -0.785 -0.634 -0.787 -0.787
## 26 6000 60 10 1 2 -0.785 -0.534 -0.784 -0.784
## 27 6000 60 15 1 2 -0.785 -0.454 -0.763 -0.764
- Group differences by variable 5 (q5)
q5 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q5 ,
data=results, mean, na.rm=TRUE)
q5 <- merge(q5[q5$q5==1,], q5[q5$q5==2,], by=c("N", "I", "K"))
q5$gentheta_diff <- round(q5$theta.x - q5$theta.y, 3)
q5$mirt_EAP_diff <- round(q5$mirt_EAP.x - q5$mirt_EAP.y, 3)
q5$tam_WLE_diff <- round(q5$tam_WLE.x - q5$tam_WLE.y, 3)
q5$tam_REG_diff <- round(q5$tam_REG.x - q5$tam_REG.y, 3)
person.q5 <- q5[, c("N", "I", "K", "q5.x", "q5.y",
"gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
person.q5 <- person.q5[order(person.q5$N, person.q5$I, person.q5$K),]
row.names(person.q5) <- NULL; print(person.q5)
## N I K q5.x q5.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 5 1 2 1.200 0.691 1.161 1.164
## 2 2000 20 10 1 2 1.200 0.485 1.033 1.035
## 3 2000 20 15 1 2 1.200 0.361 0.873 0.877
## 4 2000 40 5 1 2 1.200 0.882 1.199 1.200
## 5 2000 40 10 1 2 1.200 0.697 1.154 1.156
## 6 2000 40 15 1 2 1.200 0.571 1.100 1.103
## 7 2000 60 5 1 2 1.200 0.966 1.202 1.203
## 8 2000 60 10 1 2 1.200 0.807 1.188 1.189
## 9 2000 60 15 1 2 1.200 0.690 1.162 1.165
## 10 4000 20 5 1 2 1.203 0.691 1.170 1.171
## 11 4000 20 10 1 2 1.203 0.487 1.048 1.049
## 12 4000 20 15 1 2 1.203 0.366 0.871 0.874
## 13 4000 40 5 1 2 1.203 0.883 1.201 1.202
## 14 4000 40 10 1 2 1.203 0.689 1.151 1.153
## 15 4000 40 15 1 2 1.203 0.573 1.110 1.112
## 16 4000 60 5 1 2 1.203 0.965 1.202 1.202
## 17 4000 60 10 1 2 1.203 0.808 1.193 1.194
## 18 4000 60 15 1 2 1.203 0.696 1.171 1.173
## 19 6000 20 5 1 2 1.192 0.685 1.155 1.155
## 20 6000 20 10 1 2 1.192 0.482 1.035 1.036
## 21 6000 20 15 1 2 1.192 0.362 0.872 0.873
## 22 6000 40 5 1 2 1.192 0.872 1.186 1.186
## 23 6000 40 10 1 2 1.192 0.686 1.144 1.144
## 24 6000 40 15 1 2 1.192 0.568 1.099 1.100
## 25 6000 60 5 1 2 1.192 0.959 1.194 1.195
## 26 6000 60 10 1 2 1.192 0.799 1.179 1.180
## 27 6000 60 15 1 2 1.192 0.686 1.156 1.157