Lecture 9 R Script - mlloyd23/bio211_JAN2018 GitHub Wiki


#Let's go back to our linear model for the effect of seed number on distance

model<-lm()
summary()

plot()

#this command allows us to view 4 plots at once
par(mfrow=c(2,2))

#plot the diagnostic plots for the model
plot()

#calculate residuals
residuals<-resid()
hist()
shapiro.test()

#Let's try a power transformation
tuk<-transformTukey()
hist()
seed.data<-cbind()
head()

#run the model again
model2<-lm()
summary()

#look at the diagnostic plots
par(mfrow=c(2,2))
plot()

#calculate residuals
residuals2<-resid()
hist()
shapiro.test()

#Remove the outlier
no.outlier<-subset()
dim()
dim()

model3<-lm()
summary()

par(mfrow=c(2,2))
plot()

par(mfrow=c(1,1))
plot()
int =  
slope =
abline()

##Diagnostic plots for cars linear model
cars.model<-lm()
plot()
summary()

par(mfrow=c(2,2))
plot()

#calculate the residuals
cars.residuals<-resid()
hist()
shapiro.test()

##########################
#Logistic regression
##########################
#How do GRE (Graduate Record Exam scores), 
#GPA (grade point average) and prestige of 
#the undergraduate institution effect admission into graduate school?

install.packages("aod")
library(aod)
admit.data <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")
head(admit.data)

#two-way contingency table of categorical outcome and predictors we want
#to make sure there are not 0 cells
xtabs()

admit.data$rank <- factor(admit.data$rank)
binom.model <- glm()
summary()

#For every one unit change in gre, the log odds of admission 
#(versus non-admission) increases by 0.002.

#For a one unit increase in gpa, the log odds of being 
#admitted to graduate school increases by 0.804.

#Ranks 2, 3, and 4 all compared to 1
#having attended an undergraduate institution with rank of 2, 
#versus an institution with a rank of 1, changes the log odds 
#of admission by -0.675.

#Predict admission to graduate school
#YOU WILL NOT BE EXPECTED TO RECREATE THIS CODE!!! 


#First we want to calculate the predicted probability of admission 
#at each value of rank, holding gre and gpa at their means

#Star by making a new data frame
newdata1 <- with(admit.data, data.frame(gre = mean(gre), 
                                        gpa = mean(gpa), rank = factor(1:4)))
newdata1

#add rankP column which contains the predicted probability
#of admission of the different ranked schools if gpa and gre are constant
newdata1$rankP <- predict(binom.model, newdata = newdata1, type = "response")
newdata1

#the next line of code sets up a data frame with varied values 
#for gre and gpa
newdata2 <- with(admit.data, data.frame(gre = rep(seq(from = 200, 
                                                      to = 800, length.out = 100),4), gpa = mean(gpa), 
                                        rank = factor(rep(1:4, each = 100))))
head(newdata2)

#and the next lines of code predicts the admission probabilities
newdata3 <- cbind(newdata2, predict(binom.model, newdata = newdata2, 
                                    type = "link", se = TRUE))

newdata3 <- within(newdata3, {
  PredictedProb <- plogis(fit)
  LL <- plogis(fit - (1.96 * se.fit))
  UL <- plogis(fit + (1.96 * se.fit))
})

head(newdata3)

#Now we can plot this to make a nice visual of the model

library(ggplot2)
ggplot(newdata3, aes(x = gre, y = PredictedProb)) + 
  geom_ribbon(aes(ymin = LL, ymax = UL, fill = rank), alpha = 0.2) + 
  geom_line(aes(colour = rank), size = 1)