关于biomaRt
mart
: is and object of class Mart, which is created by the useMart
function.
host
: The URI to host the service
biomart
: Marts of the different versions
dataset
: The data for different organisms
filters
: A vector of filters that one will use as input to the query
values
: A vector of values for the filters. In case multiple filters are in use, the values argument requires a list of values where each position in the list corresponds to the position of the filters in the filters argument
attributes
: A vector of attributes that one wants to retrieve (= the output of the query)
1. 安装
source("http://bioconductor.org/biocLite.R")
biocLite("biomaRt")
library(biomaRt)
2. 有哪些marts(Marts)
marts <- listMarts(host="www.ensembl.org")
# biomart version
#1 ENSEMBL_MART_ENSEMBL Ensembl Genes 90
#2 ENSEMBL_MART_MOUSE Mouse strains 90
#3 ENSEMBL_MART_SNP Ensembl Variation 90
#4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 90
3. 有哪些数据集
ensembl <- useMart(host="www.ensembl.org", biomart="ENSEMBL_MART_ENSEMBL")
datasets <- listDatasets(ensembl)
4. 有哪些信息可以进行过滤(filters)
filters <- listFilters(ensembl)
## get the possible filters through keywords
grep(pattern="refseq", x=filters$description, ignore.case=TRUE)
grep(pattern="ucsc", x=filters$description, ignore.case=TRUE)
4. 有哪些属性(attributes)
ensembl <- useMart(host="www.ensembl.org", biomart="ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl")
attributes <- listAttributes(ensembl)
grep(pattern="entrez", x=attributes$description, ignore.case=TRUE)
attributes[grep(pattern="entrez", x=attributes$description, ignore.case=TRUE), ]
5. 根据filters和attributes返回结果
my_chr <- c(1:23, "M", "X", "Y")
my_refseq_mrna <- getBM(mart=ensembl,
filters="chromosome_name", values=my_chr,
attributes="refseq_mrna")
my_entrez_gene <- getBM(mart = ensembl,
filters = 'chromosome_name', values = my_chr,
attributes = 'entrezgene')
my_ucsc_gene <- getBM(mart = ensembl,
filters = 'chromosome_name', values = my_chr,
attributes = 'ucsc')
my_ensembl_gene_id <- getBM(mart = ensembl,
filters = 'chromosome_name', values = my_chr,
attributes = 'ensembl_gene_id')
my_annotation <- getBM(mart = ensembl,
filters = 'chromosome_name', values = my_chr,
attributes = c('ucsc', 'ensembl_gene_id', 'refseq_mrna', 'entrezgene'))
mark_na <- function(x, ...){
ret <- sapply(list(...), is.na)
ret <- gsub(pattern=FALSE, replacement=x, x=ret)
ret <- gsub(pattern=TRUE, replacement=NA, x=ret)
}
my_venn <- my_annotation
for (i in 1:dim(my_annotation)[1]){
my_venn[i,] <- mark_na(row.names(my_annotation)[i], my_annotation[i,])
}
my_venn_ucsc <- as.vector(na.omit(my_venn[, 1]))
my_venn_ensembl <- as.vector(na.omit(my_venn[, 2]))
my_venn_refseq <- as.vector(na.omit(my_venn[, 3]))
my_venn_entrez <- as.vector(na.omit(my_venn[, 4]))
library(gplots)
VennList <- list(UCSC = my_venn_ucsc,
Ensembl = my_venn_ensembl,
RefSeq = my_venn_refseq,
Entrez = my_venn_entrez)
venn(VennList)