Lecture 9 R Script - mlloyd23/bio211_JAN2018 GitHub Wiki
#Let's go back to our linear model for the effect of seed number on distance
model<-lm()
summary()
plot()
#this command allows us to view 4 plots at once
par(mfrow=c(2,2))
#plot the diagnostic plots for the model
plot()
#calculate residuals
residuals<-resid()
hist()
shapiro.test()
#Let's try a power transformation
tuk<-transformTukey()
hist()
seed.data<-cbind()
head()
#run the model again
model2<-lm()
summary()
#look at the diagnostic plots
par(mfrow=c(2,2))
plot()
#calculate residuals
residuals2<-resid()
hist()
shapiro.test()
#Remove the outlier
no.outlier<-subset()
dim()
dim()
model3<-lm()
summary()
par(mfrow=c(2,2))
plot()
par(mfrow=c(1,1))
plot()
int =
slope =
abline()
##Diagnostic plots for cars linear model
cars.model<-lm()
plot()
summary()
par(mfrow=c(2,2))
plot()
#calculate the residuals
cars.residuals<-resid()
hist()
shapiro.test()
##########################
#Logistic regression
##########################
#How do GRE (Graduate Record Exam scores),
#GPA (grade point average) and prestige of
#the undergraduate institution effect admission into graduate school?
install.packages("aod")
library(aod)
admit.data <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")
head(admit.data)
#two-way contingency table of categorical outcome and predictors we want
#to make sure there are not 0 cells
xtabs()
admit.data$rank <- factor(admit.data$rank)
binom.model <- glm()
summary()
#For every one unit change in gre, the log odds of admission
#(versus non-admission) increases by 0.002.
#For a one unit increase in gpa, the log odds of being
#admitted to graduate school increases by 0.804.
#Ranks 2, 3, and 4 all compared to 1
#having attended an undergraduate institution with rank of 2,
#versus an institution with a rank of 1, changes the log odds
#of admission by -0.675.
#Predict admission to graduate school
#YOU WILL NOT BE EXPECTED TO RECREATE THIS CODE!!!
#First we want to calculate the predicted probability of admission
#at each value of rank, holding gre and gpa at their means
#Star by making a new data frame
newdata1 <- with(admit.data, data.frame(gre = mean(gre),
gpa = mean(gpa), rank = factor(1:4)))
newdata1
#add rankP column which contains the predicted probability
#of admission of the different ranked schools if gpa and gre are constant
newdata1$rankP <- predict(binom.model, newdata = newdata1, type = "response")
newdata1
#the next line of code sets up a data frame with varied values
#for gre and gpa
newdata2 <- with(admit.data, data.frame(gre = rep(seq(from = 200,
to = 800, length.out = 100),4), gpa = mean(gpa),
rank = factor(rep(1:4, each = 100))))
head(newdata2)
#and the next lines of code predicts the admission probabilities
newdata3 <- cbind(newdata2, predict(binom.model, newdata = newdata2,
type = "link", se = TRUE))
newdata3 <- within(newdata3, {
PredictedProb <- plogis(fit)
LL <- plogis(fit - (1.96 * se.fit))
UL <- plogis(fit + (1.96 * se.fit))
})
head(newdata3)
#Now we can plot this to make a nice visual of the model
library(ggplot2)
ggplot(newdata3, aes(x = gre, y = PredictedProb)) +
geom_ribbon(aes(ymin = LL, ymax = UL, fill = rank), alpha = 0.2) +
geom_line(aes(colour = rank), size = 1)