Overview
This is the guide that, to the best of our knowledge, correctly estimates theta values after running an IRT model (2PL).
Step 1: Generating data
######## Generate the data ###########
# Set seed for reproducibility
set.seed(123)
# Number of subjects and items
n_subjects <- 800
n_items <- 15
# Step 1: Simulate subject abilities (theta) from N(0, 1)
theta <- rnorm(n_subjects, mean = 0, sd = 1)
# Step 2: Define item parameters
b <- seq(-2, 2, length.out = n_items) # Difficulties
a <- runif(n_items, min = 0.5, max = 2.0) # Discriminations
# Step 3: Generate response probabilities using 2PL formula
probs <- outer(theta, b, function(th, bi) 1 / (1 + exp(-a * (th - bi))))
# Step 4: Simulate binary responses (0s and 1s) based on probabilities
response_data2 <- matrix(rbinom(n_subjects * n_items, 1, as.vector(probs)),
nrow = n_subjects, ncol = n_items)
# Step 5: Create data frame with IDs and proper column names
ids <- 1:n_subjects
dummy_data2 <- data.frame(ID = ids, response_data2)
colnames(dummy_data2) <- c("ID", paste0("Item", 1:n_items))
################ Reorder items by difficulty (descending number correct)
items_only <- dummy_data2[,-1]
descened_item_names <- names(sort(desc(colSums(items_only))))
dummy_data3 <- dummy_data2[, c("ID", descened_item_names)]
items_only2 <- dummy_data3[,-1]
colSums(items_only2)
Step 2: Running the IRT Model
################### Run IRT model
library(ltm)
irt_mod <- ltm(items_only2 ~ z1, IRT.param = TRUE)
theta_scores = factor.scores(irt_mod, resp.patterns = items_only2, method = "EAP")
theta <- round(theta_scores$score.dat$z1,3)
Step 3: Introduce Estimates and Metrics Back into the Original Dataset
############## Calculate number correct and z-scores
num_correct <- rowSums(items_only2)
z_scores <- round(scale(num_correct), 3)
################## Add scores back to the dataset
dummy_data4 <- dummy_data3
dummy_data4$num_correct <- num_correct
dummy_data4$z_scores <- z_scores
dummy_data4$theta <- theta
Step 4: Visualize the table
################# Visually inspect the data
round(data.frame(coef(irt_mod))$Dffclt,3)
arranged_data <- arrange(dummy_data4, z_scores, theta)
arranged_data_list <- list()
unique_num_correct <- unique(arranged_data$num_correct)
for(ii in 1:length(unique_num_correct)) {
# Extract each dataset
arranged_data_list[ii](/Private-Projects237/Statistics/wiki/ii) <- filter(arranged_data, num_correct == unique_num_correct[ii])
}
arranged_data_list
Correlation Matrix
## Correlation matrix
dplyr::select(arranged_data, num_correct:theta) %>%
cor() %>%
round(3)
Correlation Matrix Between Num of Correct Items, z-scores, and theta values |
|