Exploring the Theta Estimate in the IRT model (2PL) - Private-Projects237/Statistics GitHub Wiki

Overview

This is the guide that, to the best of our knowledge, correctly estimates theta values after running an IRT model (2PL).

Step 1: Generating data

######## Generate the data ###########
# Set seed for reproducibility
set.seed(123)

# Number of subjects and items
n_subjects <- 800
n_items <- 15

# Step 1: Simulate subject abilities (theta) from N(0, 1)
theta <- rnorm(n_subjects, mean = 0, sd = 1)

# Step 2: Define item parameters
b <- seq(-2, 2, length.out = n_items)                     # Difficulties
a <- runif(n_items, min = 0.5, max = 2.0)                # Discriminations

# Step 3: Generate response probabilities using 2PL formula
probs <- outer(theta, b, function(th, bi) 1 / (1 + exp(-a * (th - bi))))

# Step 4: Simulate binary responses (0s and 1s) based on probabilities
response_data2 <- matrix(rbinom(n_subjects * n_items, 1, as.vector(probs)), 
                         nrow = n_subjects, ncol = n_items)

# Step 5: Create data frame with IDs and proper column names
ids <- 1:n_subjects
dummy_data2 <- data.frame(ID = ids, response_data2)
colnames(dummy_data2) <- c("ID", paste0("Item", 1:n_items))


################ Reorder items by difficulty (descending number correct)
items_only <- dummy_data2[,-1]
descened_item_names <- names(sort(desc(colSums(items_only))))
dummy_data3 <- dummy_data2[, c("ID", descened_item_names)]
items_only2 <- dummy_data3[,-1]
colSums(items_only2)

Step 2: Running the IRT Model

################### Run IRT model
library(ltm)
irt_mod <- ltm(items_only2 ~ z1, IRT.param = TRUE)
theta_scores = factor.scores(irt_mod, resp.patterns = items_only2, method = "EAP")
theta <- round(theta_scores$score.dat$z1,3)

Step 3: Introduce Estimates and Metrics Back into the Original Dataset

############## Calculate number correct and z-scores
num_correct <- rowSums(items_only2)
z_scores <- round(scale(num_correct), 3)

################## Add scores back to the dataset
dummy_data4 <- dummy_data3
dummy_data4$num_correct <- num_correct
dummy_data4$z_scores <- z_scores
dummy_data4$theta <- theta

Step 4: Visualize the table

################# Visually inspect the data
round(data.frame(coef(irt_mod))$Dffclt,3)
arranged_data <- arrange(dummy_data4, z_scores, theta)
arranged_data_list <- list()
unique_num_correct <- unique(arranged_data$num_correct)
for(ii in 1:length(unique_num_correct)) {
  
  # Extract each dataset
  arranged_data_list[ii](/Private-Projects237/Statistics/wiki/ii) <- filter(arranged_data, num_correct == unique_num_correct[ii])
  
}

arranged_data_list
Item Difficulty
0 correct responses
1 correct responses
2 correct responses
3 correct responses

Correlation Matrix

## Correlation matrix
dplyr::select(arranged_data, num_correct:theta) %>%
  cor() %>%
  round(3)
Correlation Matrix Between Num of Correct Items, z-scores, and theta values