5B. Day5 Code and Scripts - bioinfokushwaha/Livestock_Genomics GitHub Wiki

This code will be used for Affymetrix expression data analysis

  1. Set the working directory first before running this code
The working directory is the folder where you want to keep the input file and save the output file

Graphically: Rstudio: Session--->Set working directory ---> Select folder

by Command : setwd("D:/PROJECT/Therese/Final_Processing/QTL_Region_DEGs/")

dir()
  1. How to run the R script and instructions
put the blinking cursor on the line and press the Ctrl + Enter

or

Select the command and press run button

  1. Load the libraries
library(affyio)
library(affy)
library(limma)
library(gcrma)
library(biomaRt)
library(maskBAD)

human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
  annotLookup <- getBM(
   mart = human,
   attributes = c(
     'affy_hg_u133_plus_2',
      'external_gene_name',
      'hgnc_symbol'))
  1. loading the annotation file
annotfile <- 'G://My Drive//NIAB workshop//affy_hg_u133_plus_22021_01_15.tsv'
annotLookup <- read.csv(annotfile,header = TRUE,sep = '\t',stringsAsFactors = FALSE)

colnames(annotLookup)[1] <- 'AffyID'

NoSymbol <- (annotLookup$hgnc_symbol=="") | is.na(annotLookup$hgnc_symbol)
annotLookup <- annotLookup[-which(NoSymbol),]

library(stringr)

celpath = 'G://My Drive//NIAB workshop//examples//example 2//E-GEOD-29850'

list = list.files(celpath,full.names=TRUE)
data = ReadAffy(celfile.path=celpath) # Reading the affymetrix CEL files

  1. reading intensities
ap <- mas5calls(data)
ap <- exprs(ap)
ap.table <- apply(ap,2,table)
ap.freq.table <- ap.table / apply(ap.table, 2, sum)

rsum <- rowSums(ap=='A', na.rm=TRUE)
apr=rownames(ap)[which(rsum==0)]

# running GCRMA normalization
data.gcrma = gcrma(data,background=TRUE, normalize=TRUE,target="core")
data.gcmatrix = exprs(data.gcrma)


idx=match(rownames(data.gcmatrix),apr)
data.gcmatrix=data.gcmatrix[which(!is.na(idx)),]

idx=match(rownames(data.gcmatrix),annotLookup$AffyID)
data.gcmatrix=data.gcmatrix[which(!is.na(idx)),]
idx=idx[which(!is.na(idx))]
rownames(data.gcmatrix)=annotLookup$hgnc_symbol[idx]
matx=as.data.frame(data.gcmatrix)
#matx=cbind(rownames(matx),matx)
#matx$X=rownames(as.data.frame(matx))
#rownames(matx) <- NULL

fname="E-GEOD-29850.csv"
write.csv(matx,paste0("mapped_",fname))