Plotting Asterisk Lines in Follow‐Up Tests - Private-Projects237/Statistics GitHub Wiki
Overview
Here we will review to examples using dummy data to plot graphs with significant asterisk lines, which will make your plots much more informative.
Part 1: Contrasting the Levels of a Factor (Main Effect)
Create the data
### Part 1: Simulating Dummy Data (No Interaction)
library(lme4)
library(car)
library(emmeans)
library(ggplot2)
library(ggpubr)
set.seed(123)
# Create dummy data
n <- 30
data <- data.frame(
subject = factor(rep(1:n, each = 3)),
condition = factor(rep(c("baseline", "exercise", "sleep_deprivation"), times = n))
)
# Add outcome values
data$performance <- with(data,
70 +
ifelse(condition == "exercise", -5, ifelse(condition == "sleep_deprivation", -10, 0)) +
rnorm(nrow(data), 0, 4)
)
# Add some noise to the outcome
data$performance <- data$performance + rnorm(n=nrow(data), mean = 0, sd = 20)
Dataset Description |
---|
Run a model and follow it with the Omnibus Test
# Fit a simple model with only the main effect
model <- lmer(performance ~ condition + (1 | subject), data = data)
# Omnibust Test
Anova(model, type = "III")
Omnibus Test |
---|
Calculate the Estimated Marginal Means for Conditions and their pairwise comparison contrasts
emm <- emmeans(model, ~ condition)
emm_df <- as.data.frame(emm)
contrasts <- contrast(emm, method = "pairwise", adjust = "tukey")
contrasts_df <- as.data.frame(contrasts)
summary(contrasts)
Take the contrast information to create a dataset with asterisk information
y.position = c(82, 85, 78) # Indicates the position of the lines/asterisks
pval_df <- data.frame(
group1 = gsub(" ", "",sapply(str_split(contrasts_df$contrast, "-"), function(x) x[1])),
group2 = gsub(" ", "",sapply(str_split(contrasts_df$contrast, "-"), function(x) x[2])),
y.position = y.position,
p.adj = contrasts_df$p.value
)
# Create a label that converts p-values into asterisks
pval_df$label <- cut(pval_df$p.adj,
breaks = c(0, 0.001, 0.01, 0.05, 1),
labels = c("***", "**", "*", "ns"))
Contrast Output | Asterisk Dataframe |
---|---|
Plot the data
emm_df %>%
ggplot(aes(x = condition, y = emmean)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
stat_pvalue_manual(pval_df,
label = "label",
tip.length = 0.01,
bracket.size = 0.5) +
labs(title = "Estimated Marginal Means with Significance and 95% CI",
y = "Performance",
x = "Condition") +
theme_minimal() +
coord_cartesian(ylim = c(45, 88))
Plot with Asterisk Information |
---|
Part 2: Contrasting the Levels of a Factor within Another Factor (Interaction)
Create the data
set.seed(123)
library(lme4)
library(car)
library(emmeans)
library(ggplot2)
library(ggpubr)
# Simulate data
n <- 20
data <- expand.grid(
subject = factor(1:n),
stress = c("baseline", "exercise", "sleep_deprivation"),
sex = c("M", "F")
)
# Add random performance values with some effects
data$performance <- with(data,
70 +
ifelse(sex == "F", 5, 0) +
ifelse(stress == "exercise", -10, ifelse(stress == "sleep_deprivation", -15, 0)) +
rnorm(nrow(data), 0, 5)
)
Data Description |
---|
Run a model and follow it with the Omnibus Test
# Fit mixed model
model <- lmer(performance ~ stress * sex + (1 | subject), data = data)
# Omnibus Test
Anova(model, type = "III")
Omnibus Test (not significant but who cares) |
---|
Calculate the Estimated Marginal Means for Conditions and their pairwise comparison contrasts
emm <- emmeans(model, ~ stress | sex)
emm_df <- as.data.frame(emm)
contrasts <- contrast(emm, method = "pairwise", adjust = "tukey")
contrasts_df <- as.data.frame(contrasts)
Take the contrast information to create a dataset with asterisk information
y.position = c(74, 76, 63, 78, 80, 70) # adjust above highest mean
pval_df <- data.frame(
mod = as.character(contrasts_df[2](/Private-Projects237/Statistics/wiki/2)),
group1 = gsub(" ", "",sapply(str_split(contrasts_df$contrast, "-"), function(x) x[1])),
group2 = gsub(" ", "",sapply(str_split(contrasts_df$contrast, "-"), function(x) x[2])),
y.position = y.position,
p.adj = contrasts_df$p.value
)
# Rename the mod to the name of the actual moderator
names(pval_df)[1] <- names(emm_df)[2]
# Turn into asterisks if preferred
pval_df$label <- cut(pval_df$p.adj,
breaks = c(0, 0.001, 0.01, 0.05, 1),
labels = c("***", "**", "*", "ns"))
Contrast Output | Asterisk Dataframe |
---|---|
Plot the data
emm_df %>%
ggplot(aes(x = stress, y = emmean, color = sex, group = sex)) +
geom_point(position = position_dodge(0.3), size = 3) +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL),
width = 0.2, position = position_dodge(0.3)) +
facet_wrap(~ sex) +
stat_pvalue_manual(pval_df,
label = "label",
tip.length = 0.01,
bracket.size = 0.5,
inherit.aes = FALSE) +
labs(title = "Estimated Marginal Means with Significance and 95% CI",
y = "Performance",
x = "Stress Condition") +
coord_cartesian(ylim = c(50, 80)) +
theme_classic()
Plot with Asterisk Information |
---|