Unrotated booklet design Rasch model with latent regressors - tmatta/lsasim GitHub Wiki
We examined known- and estimated-group differences under the following conditions: 1 (IRT model) x 2 (IRT R packages) x 3 (sample sizes) x 3 (test lengths) x 1 (test booklet). Latent trait and five corresponding background variables were generated based on the specified marginal distributions and correlation matrix.
- One IRT model was included: Rasch model
- Item parameters were randomly generated
- The bounds of the item difficulty parameter, b, are constrained to
b_bounds = (-2, 2)
where -2 is the lowest generating value and 2 is the highest generating value
- Two IRT R packages were evaluated:
TAM
(version 2.4-9) andmirt
(version 1.25). Different estimation methods were used:- Warm's weighted likelihood estimates (WLE) with/without regressors using
TAM
- Expected-a-posteriori (EAP) using
mirt
- Warm's weighted likelihood estimates (WLE) with/without regressors using
- Three sample sizes were used: 2000, 4000, and 6000
- Simulated samples were generated based on the specified marginal distributions and correlation matrix
- Three test lengths were used: 20, 40, and 60
- A single booklet was used
- One hundred replications were used for each condition for the calibration
- Latent trait recovery:
- We compared the known group-differences and group differences estimated by WLE and EAP. The results showed that WLE reproduced known group differences better than EAP
# Load libraries
if(!require(lsasim)){
install.packages("lsasim")
library(lsasim) #version 1.0.1
}
if(!require(mirt)){
install.packages("mirt")
library(mirt) #version 1.25
}
if(!require(TAM)){
install.packages("TAM")
library(TAM) #version 2.4-9
}
### -- Background questionnaire generation
# Define marginal probabilities
var_q0 <- 1
var_q1 <- c(.25, 1)
var_q2 <- c(.33, 1)
var_q3 <- c(.50, 1)
var_q4 <- c(.67, 1)
var_q5 <- c(.88, 1)
cat_pr <- list(var_q0, var_q1, var_q2, var_q3, var_q4, var_q5)
# Define correlation matrix
cor_mat <- matrix(c( 1.000, -0.237, 0.454, -0.487, -0.170, -0.343,
-0.237, 1.000, -0.712, -0.307, -0.263, -0.537,
0.454, -0.712, 1.000, 0.140, -0.331, 0.523,
-0.487, -0.307, 0.140, 1.000, 0.467, 0.239,
-0.170, -0.263, -0.331, 0.467, 1.000, -0.258,
-0.343, -0.537, 0.523, 0.239, -0.258, 1.000), nrow=6)
# Set up conditions
N.cond <- c(2000, 4000, 6000) #number of sample sizes
I.cond <- c(20, 40, 60) #number of items
K.cond <- 1 #number of booklets
# Set up number of replications
reps <- 100
# Create space for outputs
results <- NULL
#==============================================================================#
# START SIMULATION
#==============================================================================#
for (N in N.cond) { #sample size
for (I in I.cond) { #number of items
for (K in K.cond) { #number of booklets
for (r in 1:reps) { #replication
set.seed(8883*(r+1))
### -- Background questionnaire generation
# generate background questionnaire data
backgroud_q <- questionnaire_gen(n_obs = N,
cat_prop = cat_pr,
cor_matrix = cor_mat, theta = TRUE)
### -- Cognitive assessment generation
# generate item parameters for a 1PL model
set.seed(4362) # fix item parameters across replications
item_pool <- lsasim::item_gen(n_1pl = I,
thresholds = 1,
b_bounds = c(-2, 2))
set.seed(8883*(r+1))
# assign items to block
block_bk1 <- lsasim::block_design(n_blocks = K,
item_parameters = item_pool)
#assign block to booklet
book_bk1 <- lsasim::booklet_design(item_block_assignment =
block_bk1$block_assignment,
book_design = matrix(K))
#assign booklet to subjects
book_samp <- lsasim::booklet_sample(n_subj = N,
book_item_design = book_bk1,
book_prob = NULL)
# generate item responses
cog <- lsasim::response_gen(subject = book_samp$subject, item = book_samp$item,
theta = backgroud_q$theta,
b_par = item_pool$b)
# -- Merge questionnaire data with cognitive assessment data
final_data <- merge(backgroud_q, cog, by = "subject")
# extract item responses (excluding "subject" column)
resp <- cog[, c(1:I)]
#------------------------------------------------------------------------------#
# Model estimation
#------------------------------------------------------------------------------#
# model 1: fit Rasch model using mirt package
mirt.mod <- NULL
mirt.mod <- mirt::mirt(resp, 1, itemtype = 'Rasch', verbose = F)
# model 2: fit Rasch model using TAM package
tam.mod <- NULL
tam.mod <- TAM::tam.mml(resp)
# model 3: fit Rasch model with latent regressors using TAM package
# latent regressors Y
Y <- final_data[, paste0("q", 1:5)] # define regressors
indx <- sapply(Y, is.factor) # convert factor to numeric
Y[indx] <- lapply(Y[indx], function(x) as.numeric(as.character(x)))
Y[Y == 2] <- 0 # recode 2 as 0
tam.mod.2 <- NULL
tam.mod.2 <- TAM::tam.mml(resp, Y=Y)
#------------------------------------------------------------------------------#
# Person parameter extraction
#------------------------------------------------------------------------------#
# extract thetas
final_data$mirt_EAP <- c(fscores(mirt.mod, method="EAP"))
final_data$tam_WLE <- tam.wle(tam.mod)$theta
final_data$tam_REG <- tam.wle(tam.mod.2)$theta
# summarize background variables (qs), generalized theta, and estimated thetas
FS <- final_data[,c("subject", "theta", paste0("q", 1:5),
"mirt_EAP", "tam_WLE", "tam_REG")]
# summarize results
person <- data.frame(matrix(c(N, I, K, r), nrow = 1))
colnames(person) <- c("N", "I", "K", "rep")
person <- cbind(person, FS)
# combine results
results <- rbind(results, person)
}
}
}
}
Summary:
-
The five binary background variables were labeled as q1, q2, q3, q4, and q5. For each variable, we compared the generating theta difference between group 1 and group 2 ( θg1 - θg2 ) to estimated theta difference (
$\hat{\theta_{g1}}$ -$\hat{\theta_{g2}}$ ) by WLE and EAP estimates. -
theta
stands for the generating theta value,mirt_EAP
stands for the EAP estimator calibrated bymirt
package,tam_WLE
stands for the WLE estimator calibrated byTAM
package, andtam_REG
stands for the WLE estimator generated using a model with regressors byTAM
package.
- Group differences by variable 1 (q1)
q1 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q1 ,
data=results, mean, na.rm=TRUE)
q1 <- merge(q1[q1$q1==1,], q1[q1$q1==2,], by=c("N", "I", "K"))
q1$gentheta_diff <- round(q1$theta.x - q1$theta.y, 3)
q1$mirt_EAP_diff <- round(q1$mirt_EAP.x - q1$mirt_EAP.y, 3)
q1$tam_WLE_diff <- round(q1$tam_WLE.x - q1$tam_WLE.y, 3)
q1$tam_REG_diff <- round(q1$tam_REG.x - q1$tam_REG.y, 3)
person.q1 <- q1[, c("N", "I", "K", "q1.x", "q1.y",
"gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
print(person.q1)
## N I K q1.x q1.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 1 1 2 0.396 0.308 0.398 0.398
## 2 2000 40 1 1 2 0.396 0.347 0.397 0.397
## 3 2000 60 1 1 2 0.396 0.362 0.398 0.398
## 4 4000 20 1 1 2 0.407 0.315 0.407 0.407
## 5 4000 40 1 1 2 0.407 0.356 0.408 0.408
## 6 4000 60 1 1 2 0.407 0.371 0.407 0.407
## 7 6000 20 1 1 2 0.401 0.312 0.403 0.403
## 8 6000 40 1 1 2 0.401 0.349 0.400 0.400
## 9 6000 60 1 1 2 0.401 0.365 0.401 0.402
- Group differences by variable 2 (q2)
q2 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q2 ,
data=results, mean, na.rm=TRUE)
q2 <- merge(q2[q2$q2==1,], q2[q2$q2==2,], by=c("N", "I", "K"))
q2$gentheta_diff <- round(q2$theta.x - q2$theta.y, 3)
q2$mirt_EAP_diff <- round(q2$mirt_EAP.x - q2$mirt_EAP.y, 3)
q2$tam_WLE_diff <- round(q2$tam_WLE.x - q2$tam_WLE.y, 3)
q2$tam_REG_diff <- round(q2$tam_REG.x - q2$tam_REG.y, 3)
person.q2 <- q2[, c("N", "I", "K", "q2.x", "q2.y",
"gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
print(person.q2)
## N I K q2.x q2.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 1 1 2 -0.743 -0.575 -0.742 -0.743
## 2 2000 40 1 1 2 -0.743 -0.651 -0.744 -0.745
## 3 2000 60 1 1 2 -0.743 -0.679 -0.745 -0.746
## 4 4000 20 1 1 2 -0.744 -0.577 -0.743 -0.744
## 5 4000 40 1 1 2 -0.744 -0.652 -0.744 -0.745
## 6 4000 60 1 1 2 -0.744 -0.679 -0.744 -0.745
## 7 6000 20 1 1 2 -0.747 -0.579 -0.745 -0.745
## 8 6000 40 1 1 2 -0.747 -0.653 -0.746 -0.746
## 9 6000 60 1 1 2 -0.747 -0.682 -0.748 -0.748
- Group differences by variable 3 (q3)
q3 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q3 ,
data=results, mean, na.rm=TRUE)
q3 <- merge(q3[q3$q3==1,], q3[q3$q3==2,], by=c("N", "I", "K"))
q3$gentheta_diff <- round(q3$theta.x - q3$theta.y, 3)
q3$mirt_EAP_diff <- round(q3$mirt_EAP.x - q3$mirt_EAP.y, 3)
q3$tam_WLE_diff <- round(q3$tam_WLE.x - q3$tam_WLE.y, 3)
q3$tam_REG_diff <- round(q3$tam_REG.x - q3$tam_REG.y, 3)
person.q3 <- q3[, c("N", "I", "K", "q3.x", "q3.y",
"gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
print(person.q3)
## N I K q3.x q3.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 1 1 2 0.775 0.602 0.775 0.776
## 2 2000 40 1 1 2 0.775 0.681 0.777 0.777
## 3 2000 60 1 1 2 0.775 0.709 0.777 0.777
## 4 4000 20 1 1 2 0.776 0.603 0.776 0.777
## 5 4000 40 1 1 2 0.776 0.680 0.776 0.776
## 6 4000 60 1 1 2 0.776 0.708 0.776 0.776
## 7 6000 20 1 1 2 0.778 0.603 0.776 0.776
## 8 6000 40 1 1 2 0.778 0.682 0.778 0.779
## 9 6000 60 1 1 2 0.778 0.710 0.778 0.778
- Group differences by variable 4 (q4)
q4 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q4 ,
data=results, mean, na.rm=TRUE)
q4 <- merge(q4[q4$q4==1,], q4[q4$q4==2,], by=c("N", "I", "K"))
q4$gentheta_diff <- round(q4$theta.x - q4$theta.y, 3)
q4$mirt_EAP_diff <- round(q4$mirt_EAP.x - q4$mirt_EAP.y, 3)
q4$tam_WLE_diff <- round(q4$tam_WLE.x - q4$tam_WLE.y, 3)
q4$tam_REG_diff <- round(q4$tam_REG.x - q4$tam_REG.y, 3)
person.q4 <- q4[, c("N", "I", "K", "q4.x", "q4.y",
"gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
print(person.q4)
## N I K q4.x q4.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 1 1 2 0.269 0.206 0.267 0.267
## 2 2000 40 1 1 2 0.269 0.237 0.271 0.271
## 3 2000 60 1 1 2 0.269 0.243 0.267 0.267
## 4 4000 20 1 1 2 0.276 0.212 0.274 0.274
## 5 4000 40 1 1 2 0.276 0.242 0.277 0.277
## 6 4000 60 1 1 2 0.276 0.251 0.276 0.276
## 7 6000 20 1 1 2 0.282 0.217 0.280 0.280
## 8 6000 40 1 1 2 0.282 0.247 0.282 0.282
## 9 6000 60 1 1 2 0.282 0.257 0.283 0.283
- Group differences by variable 5 (q5)
q5 <- aggregate(cbind(theta, mirt_EAP, tam_WLE, tam_REG) ~ N + I + K + q5 ,
data=results, mean, na.rm=TRUE)
q5 <- merge(q5[q5$q5==1,], q5[q5$q5==2,], by=c("N", "I", "K"))
q5$gentheta_diff <- round(q5$theta.x - q5$theta.y, 3)
q5$mirt_EAP_diff <- round(q5$mirt_EAP.x - q5$mirt_EAP.y, 3)
q5$tam_WLE_diff <- round(q5$tam_WLE.x - q5$tam_WLE.y, 3)
q5$tam_REG_diff <- round(q5$tam_REG.x - q5$tam_REG.y, 3)
person.q5 <- q5[, c("N", "I", "K", "q5.x", "q5.y",
"gentheta_diff", "mirt_EAP_diff", "tam_WLE_diff", "tam_REG_diff")]
print(person.q5)
## N I K q5.x q5.y gentheta_diff mirt_EAP_diff tam_WLE_diff tam_REG_diff
## 1 2000 20 1 1 2 0.661 0.508 0.657 0.658
## 2 2000 40 1 1 2 0.661 0.575 0.658 0.659
## 3 2000 60 1 1 2 0.661 0.603 0.663 0.664
## 4 4000 20 1 1 2 0.651 0.500 0.647 0.647
## 5 4000 40 1 1 2 0.651 0.566 0.649 0.649
## 6 4000 60 1 1 2 0.651 0.594 0.653 0.653
## 7 6000 20 1 1 2 0.651 0.503 0.650 0.650
## 8 6000 40 1 1 2 0.651 0.569 0.652 0.652
## 9 6000 60 1 1 2 0.651 0.591 0.650 0.650