lecture 8 id转换

ID转换,就是找到对应关系,然后match所需转换的ID!

  1. 针对芯片数据,可用芯片对应的R包,如下:

1.1 affymetrix芯片处理

rm(list=ls())

library(“hgu95av2.db”)
ls(‘package:hgu95av2.db’) #36种对应关系

probe2entrezID=toTable(hgu95av2ENTREZID)
probe2symbol=toTable(hgu95av2SYMBOL)
probe2genename=toTable(hgu95av2GENENAME)

my_probe = sample(unique(mappedLkeys(hgu95av2ENTREZID)),30)

tmp1 = probe2symbol[match(my_probe,probe2symbol$probe_id),]
tmp2 = probe2entrezID[match(my_probe,probe2entrezID$probe_id),]
tmp3 = probe2genename[match(my_probe,probe2genename$probe_id),]

write.table(my_probe,’my_probe.txt’,quote = F,col.names = F,row.names =F)
write.table(tmp1$symbol,’my_symbol.txt’,quote = F,col.names = F,row.names =F)
write.table(tmp2$gene_id,’my_geneID.txt’,quote = F,col.names = F,row.names =F)

1.2illumina芯片的 探针:

library(“illuminaHumanv4.db”)
ls(‘package:illuminaHumanv4.db’)

probe2entrezID=toTable(illuminaHumanv4ENTREZID)
probe2symbol=toTable(illuminaHumanv4SYMBOL)
probe2genename=toTable(illuminaHumanv4GENENAME)

my_probe = sample(unique(mappedLkeys(illuminaHumanv4ENTREZID)),30)

probe2symbol[match(my_probe,probe2symbol$probe_id),]
probe2entrezID[match(my_probe,probe2entrezID$probe_id),]
probe2genename[match(my_probe,probe2genename$probe_id),]

  1. 基因数据
    library(“illuminaHumanv4.db”)
    ls(‘package:illuminaHumanv4.db’)
    my_entrez_gene = sample(unique(mappedRkeys(illuminaHumanv4ENTREZID)),30)
    my_symbol_gene = sample(unique(mappedRkeys(illuminaHumanv4SYMBOL)),30)

library(“org.Hs.eg.db”)
ls(‘package:org.Hs.eg.db’)

entrezID2symbol <- toTable(org.Hs.egSYMBOL)

entrezID2symbol[match(my_entrez_gene,entrezID2symbol$gene_id),]
entrezID2symbol[match(my_symbol_gene,entrezID2symbol$symbol),]

此外,还有Biomart包

source(“http://bioconductor.org/biocLite.R“)
options(BioC_mirror=”http://mirrors.ustc.edu.cn/bioc/“)
biocLite(“org.Hs.eg.db”)

library(org.Hs.eg.db)
biocLite(“biomaRt”)
library(biomaRt)

ensembl <- useMart(“ensembl”, dataset = “hsapiens_gene_ensembl”)

entrzID=c(“672”,”1”)
getBM(attributes=c(“entrezgene”,”hgnc_symbol”,”ensembl_gene_id”), filters = “entrezgene”, values =entrzID, mart=ensembl)

genesymbol<-getBM(attributes=c(“entrezgene”,”hgnc_symbol”,”ensembl_gene_id”), filters = “entrezgene”, values =entrzID, mart=ensembl)
write.table(genesymbol, file=”symbol.xls”, sep=”t”,quote=F)

tmp<-read.table(“geneid.txt”) ###需要转换的geneid 集
tmp2<-getBM(attributes=c(“entrezgene”,”hgnc_symbol”,”ensembl_gene_id”), filters = “entrezgene”, values =tmp, mart=ensembl )
write.table(tmp2, file=”xxx.xls”, sep=”t”,quote=F)

包的安装:
source(“http://bioconductor.org/biocLite.R“)
options(BioC_mirror=”http://mirrors.ustc.edu.cn/bioc/“)
library(“hgu95av2.db”)
ls(‘package:hgu95av2.db’) #36种对应关系