R包 biomaRt介绍 - ricket-sjtu/bioinformatics GitHub Wiki

关于biomaRt

  • mart: is and object of class Mart, which is created by the useMart function.
    • host: The URI to host the service
    • biomart: Marts of the different versions
    • dataset: The data for different organisms
  • filters: A vector of filters that one will use as input to the query
  • values: A vector of values for the filters. In case multiple filters are in use, the values argument requires a list of values where each position in the list corresponds to the position of the filters in the filters argument
  • attributes: A vector of attributes that one wants to retrieve (= the output of the query)

1. 安装

source("http://bioconductor.org/biocLite.R")
biocLite("biomaRt")
library(biomaRt)

2. 有哪些marts(Marts)

marts <- listMarts(host="www.ensembl.org")
#               biomart               version
#1 ENSEMBL_MART_ENSEMBL      Ensembl Genes 90
#2   ENSEMBL_MART_MOUSE      Mouse strains 90
#3     ENSEMBL_MART_SNP  Ensembl Variation 90
#4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 90

3. 有哪些数据集

ensembl <- useMart(host="www.ensembl.org", biomart="ENSEMBL_MART_ENSEMBL")
datasets <- listDatasets(ensembl)

4. 有哪些信息可以进行过滤(filters)

filters <- listFilters(ensembl)
## get the possible filters through keywords
grep(pattern="refseq", x=filters$description, ignore.case=TRUE)
grep(pattern="ucsc", x=filters$description, ignore.case=TRUE)

4. 有哪些属性(attributes)

ensembl <- useMart(host="www.ensembl.org", biomart="ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl")
attributes <- listAttributes(ensembl)
grep(pattern="entrez", x=attributes$description, ignore.case=TRUE)
attributes[grep(pattern="entrez", x=attributes$description, ignore.case=TRUE), ]

5. 根据filters和attributes返回结果

my_chr <- c(1:23, "M", "X", "Y")
my_refseq_mrna <- getBM(mart=ensembl, 
                        filters="chromosome_name", values=my_chr, 
                        attributes="refseq_mrna")
my_entrez_gene <- getBM(mart = ensembl,
                        filters = 'chromosome_name', values = my_chr,
                        attributes = 'entrezgene')
 
my_ucsc_gene <- getBM(mart = ensembl,
                   filters = 'chromosome_name', values = my_chr,
                   attributes = 'ucsc')
 
my_ensembl_gene_id <- getBM(mart = ensembl,
                 filters = 'chromosome_name', values = my_chr,
                 attributes = 'ensembl_gene_id')

my_annotation <- getBM(mart = ensembl,
                       filters = 'chromosome_name', values = my_chr,
                       attributes = c('ucsc', 'ensembl_gene_id', 'refseq_mrna', 'entrezgene'))
mark_na <- function(x, ...){
               ret <- sapply(list(...), is.na)
               ret <- gsub(pattern=FALSE, replacement=x, x=ret)
               ret <- gsub(pattern=TRUE, replacement=NA, x=ret)
}

my_venn <- my_annotation
for (i in 1:dim(my_annotation)[1]){
         my_venn[i,] <- mark_na(row.names(my_annotation)[i], my_annotation[i,])
}

my_venn_ucsc <- as.vector(na.omit(my_venn[, 1]))
my_venn_ensembl <- as.vector(na.omit(my_venn[, 2]))
my_venn_refseq <- as.vector(na.omit(my_venn[, 3]))
my_venn_entrez <- as.vector(na.omit(my_venn[, 4]))
library(gplots)
VennList <- list(UCSC = my_venn_ucsc,
               Ensembl = my_venn_ensembl,
               RefSeq = my_venn_refseq,
               Entrez = my_venn_entrez)
venn(VennList)