5B. Day5 Code and Scripts - bioinfokushwaha/Livestock_Genomics GitHub Wiki
This code will be used for Affymetrix expression data analysis
- Set the working directory first before running this code
The working directory is the folder where you want to keep the input file and save the output file
Graphically: Rstudio: Session--->Set working directory ---> Select folder
by Command : setwd("D:/PROJECT/Therese/Final_Processing/QTL_Region_DEGs/")
dir()
- How to run the R script and instructions
put the blinking cursor on the line and press the Ctrl + Enter
or
Select the command and press run button
- Load the libraries
library(affyio)
library(affy)
library(limma)
library(gcrma)
library(biomaRt)
library(maskBAD)
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
annotLookup <- getBM(
mart = human,
attributes = c(
'affy_hg_u133_plus_2',
'external_gene_name',
'hgnc_symbol'))
- loading the annotation file
annotfile <- 'G://My Drive//NIAB workshop//affy_hg_u133_plus_22021_01_15.tsv'
annotLookup <- read.csv(annotfile,header = TRUE,sep = '\t',stringsAsFactors = FALSE)
colnames(annotLookup)[1] <- 'AffyID'
NoSymbol <- (annotLookup$hgnc_symbol=="") | is.na(annotLookup$hgnc_symbol)
annotLookup <- annotLookup[-which(NoSymbol),]
library(stringr)
celpath = 'G://My Drive//NIAB workshop//examples//example 2//E-GEOD-29850'
list = list.files(celpath,full.names=TRUE)
data = ReadAffy(celfile.path=celpath) # Reading the affymetrix CEL files
- reading intensities
ap <- mas5calls(data)
ap <- exprs(ap)
ap.table <- apply(ap,2,table)
ap.freq.table <- ap.table / apply(ap.table, 2, sum)
rsum <- rowSums(ap=='A', na.rm=TRUE)
apr=rownames(ap)[which(rsum==0)]
# running GCRMA normalization
data.gcrma = gcrma(data,background=TRUE, normalize=TRUE,target="core")
data.gcmatrix = exprs(data.gcrma)
idx=match(rownames(data.gcmatrix),apr)
data.gcmatrix=data.gcmatrix[which(!is.na(idx)),]
idx=match(rownames(data.gcmatrix),annotLookup$AffyID)
data.gcmatrix=data.gcmatrix[which(!is.na(idx)),]
idx=idx[which(!is.na(idx))]
rownames(data.gcmatrix)=annotLookup$hgnc_symbol[idx]
matx=as.data.frame(data.gcmatrix)
#matx=cbind(rownames(matx),matx)
#matx$X=rownames(as.data.frame(matx))
#rownames(matx) <- NULL
fname="E-GEOD-29850.csv"
write.csv(matx,paste0("mapped_",fname))