Quantile Regression - erynmcfarlane/StatsGenLabProtocols GitHub Wiki
Tutorial
install quantreg first
install.packages("quantreg") library(quantreg) library(ggplot2)
set.seed(123) n <- 50 temperature<- sample(10:40,n, replace= TRUE)
Simulating sales
iced_coffee_sales_base <- 25+ 2 * (temperature-10) + (temperature-30)^2 iced_coffee_sales <- round(iced_coffee_sales_base + rnorm(n, mean = 0, sd = 2), 0)
make a df with temp and sales
data1<- data.frame(temperature,iced_coffee_sales)
show points simulated
ggplot(data1, aes(x = temperature,y = iced_coffee_sales)) + geom_point(alpha = 0.5) + labs(title = "Iced Coffee Sales vs. Temperature", x = "Temperature (°C)", y = "Iced Coffee Sales")
rq_25 <- rq(iced_coffee_sales ~ temperature, tau = 0.25, data = data1) rq_50 <- rq(iced_coffee_sales ~ temperature, tau = 0.50, data = data1) # Median rq_75 <- rq(iced_coffee_sales ~ temperature, tau = 0.75, data = data1)
print(summary(rq_25)) print(summary(rq_50)) print(summary(rq_75))
new_data <- data.frame(temperature = seq(10, 40, by = 1))
##predict new_data$pred_25 <- predict(rq_25, newdata = new_data) new_data$pred_50 <- predict(rq_50, newdata = new_data) new_data$pred_75 <- predict(rq_75, newdata = new_data)
plot(iced_coffee_sales ~ temperature, data = data1, pch = 16, main = "Iced Coffee Sales ~ Temperature", xlab = "Temperature (°C)", ylab = "Iced Coffee Sales") abline(lm(iced_coffee_sales ~ temperature, data = data1), col = "red", lty = 2) # Linear regression lines(new_data$temperature, new_data$pred_25, col = "blue", lty = 2) # 25th percentile lines(new_data$temperature, new_data$pred_50, col = "orange", lty = 2) # 50th percentile lines(new_data$temperature, new_data$pred_75, col = "green", lty = 2) # 75th percentile
legend("topright", legend = c("Linear Model", "25th Percentile", "50th Percentile", "75th Percentile"), col = c("red", "blue", "orange", "green"), lty = 2)