Plotting Asterisk Lines in Follow‐Up Tests - Private-Projects237/Statistics GitHub Wiki

Overview

Here we will review to examples using dummy data to plot graphs with significant asterisk lines, which will make your plots much more informative.

Part 1: Contrasting the Levels of a Factor (Main Effect)

Create the data

### Part 1: Simulating Dummy Data (No Interaction)

library(lme4)
library(car)
library(emmeans)
library(ggplot2)
library(ggpubr)

set.seed(123)

# Create dummy data
n <- 30
data <- data.frame(
  subject = factor(rep(1:n, each = 3)),
  condition = factor(rep(c("baseline", "exercise", "sleep_deprivation"), times = n))
)

# Add outcome values
data$performance <- with(data,
                         70 +
                           ifelse(condition == "exercise", -5, ifelse(condition == "sleep_deprivation", -10, 0)) +
                           rnorm(nrow(data), 0, 4)
)

# Add some noise to the outcome
data$performance <- data$performance + rnorm(n=nrow(data), mean = 0, sd = 20)
Dataset Description
Screenshot 2025-03-31 at 7 43 29 PM

Run a model and follow it with the Omnibus Test

# Fit a simple model with only the main effect
model <- lmer(performance ~ condition + (1 | subject), data = data)

# Omnibust Test
Anova(model, type = "III")
Omnibus Test
Screenshot 2025-03-31 at 7 48 59 PM

Calculate the Estimated Marginal Means for Conditions and their pairwise comparison contrasts

emm <- emmeans(model, ~ condition)
emm_df <- as.data.frame(emm)

contrasts <- contrast(emm, method = "pairwise", adjust = "tukey")
contrasts_df <- as.data.frame(contrasts)
summary(contrasts)

Take the contrast information to create a dataset with asterisk information

y.position = c(82, 85, 78) # Indicates the position of the lines/asterisks

pval_df <- data.frame(
  group1 = gsub(" ", "",sapply(str_split(contrasts_df$contrast, "-"), function(x) x[1])),
  group2 = gsub(" ", "",sapply(str_split(contrasts_df$contrast, "-"), function(x) x[2])),
  y.position = y.position,
  p.adj = contrasts_df$p.value
)

# Create a label that converts p-values into asterisks
pval_df$label <- cut(pval_df$p.adj,
                     breaks = c(0, 0.001, 0.01, 0.05, 1),
                     labels = c("***", "**", "*", "ns"))
Contrast Output Asterisk Dataframe

Plot the data

emm_df %>%
  ggplot(aes(x = condition, y = emmean)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  stat_pvalue_manual(pval_df,
                     label = "label",
                     tip.length = 0.01,
                     bracket.size = 0.5) +
  labs(title = "Estimated Marginal Means with Significance and 95% CI",
       y = "Performance",
       x = "Condition") +
  theme_minimal() +
  coord_cartesian(ylim = c(45, 88))
Plot with Asterisk Information

Part 2: Contrasting the Levels of a Factor within Another Factor (Interaction)

Create the data

set.seed(123)
library(lme4)
library(car)
library(emmeans)
library(ggplot2)
library(ggpubr)

# Simulate data
n <- 20
data <- expand.grid(
  subject = factor(1:n),
  stress = c("baseline", "exercise", "sleep_deprivation"),
  sex = c("M", "F")
)

# Add random performance values with some effects
data$performance <- with(data,
                         70 +
                           ifelse(sex == "F", 5, 0) +
                           ifelse(stress == "exercise", -10, ifelse(stress == "sleep_deprivation", -15, 0)) +
                           rnorm(nrow(data), 0, 5)
)
Data Description

Run a model and follow it with the Omnibus Test

# Fit mixed model
model <- lmer(performance ~ stress * sex + (1 | subject), data = data)

# Omnibus Test
Anova(model, type = "III")
Omnibus Test (not significant but who cares)

Calculate the Estimated Marginal Means for Conditions and their pairwise comparison contrasts

emm <- emmeans(model, ~ stress | sex)
emm_df <- as.data.frame(emm)

contrasts <- contrast(emm, method = "pairwise", adjust = "tukey")
contrasts_df <- as.data.frame(contrasts)

Take the contrast information to create a dataset with asterisk information

y.position = c(74, 76, 63, 78, 80, 70)  # adjust above highest mean

pval_df <- data.frame(
  mod = as.character(contrasts_df[2](/Private-Projects237/Statistics/wiki/2)),
  group1 = gsub(" ", "",sapply(str_split(contrasts_df$contrast, "-"), function(x) x[1])),
  group2 = gsub(" ", "",sapply(str_split(contrasts_df$contrast, "-"), function(x) x[2])),
  y.position = y.position,
  p.adj = contrasts_df$p.value
)

# Rename the mod to the name of the actual moderator
names(pval_df)[1] <- names(emm_df)[2]

# Turn into asterisks if preferred
pval_df$label <- cut(pval_df$p.adj,
                     breaks = c(0, 0.001, 0.01, 0.05, 1),
                     labels = c("***", "**", "*", "ns"))
Contrast Output Asterisk Dataframe

Plot the data

emm_df %>%
  ggplot(aes(x = stress, y = emmean, color = sex, group = sex)) +
  geom_point(position = position_dodge(0.3), size = 3) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), 
                width = 0.2, position = position_dodge(0.3)) +
  facet_wrap(~ sex) +
  stat_pvalue_manual(pval_df, 
                     label = "label", 
                     tip.length = 0.01,
                     bracket.size = 0.5,
                     inherit.aes = FALSE) +
  labs(title = "Estimated Marginal Means with Significance and 95% CI",
       y = "Performance",
       x = "Stress Condition") + 
  coord_cartesian(ylim = c(50, 80)) +
  theme_classic()
Plot with Asterisk Information