基因表达分析

上善若水 · 发表于 2014-10-25 18:12:40

本帖最后由上善若水于 2014-10-25 18:16 编辑

求大家来帮我解答下 R语言这样做的对吗

library(hgu133a.db);
oriInData<-read.table("D:/实验1/GSE15852_series_matrix.txt",header=TRUE,sep="\t")
for(i in 2:length(oriInData[1,])){
oriInData[,i]<-log2(oriInData[,i])
}
label<-read.table("D:/实验1/classlable15852.txt")
##############################################################
###############处理数据###########################3
###############################################################
x<-hgu133aENTREZID;
probeGene<-toTable(x);
#Normally, len should be equal to the number of rows of relationship inforinDataion.
locations<-match(as.character(oriInData[,1]),probeGene$probe_id);
probegene<-probeGene$gene_id[locations];
oriInData[,length(oriInData[1,])+1]<-probegene
oriInData<-na.omit(oriInData)
gene<-oriInData[,length(oriInData[1,])];
probe<-unique(gene)
inData<-matrix(0,nrow=length(probe), ncol=length(oriInData[1,])-2)
for(i in 1:length(probe)){
if(length(which(gene==probe))==1){
inData[i,]<-as.numeric(oriInData[which(gene==probe),2:(length(oriInData)-1)])
}
else if(length(which(gene==probe))>1){
inData[i,]<-as.numeric(colMeans(oriInData[which(gene==probe),2:(length(oriInData)-1)]))
}
}
inData<-as.data.frame(inData,row.names=probe)
names(inData)<-names(oriInData)[2:(length(oriInData[1,])-1)]
###############处理类标签######################################
b<-as.vector(label[1,])
b[which(b=="normal breast tissue")]<-rep(0,length(which(b=="normal breast tissue")))
b[which(b=="1breast tumor tissue")]<-rep(1,length(which(b=="breast tumor tissue")))
classLabel<-as.numeric(b)
#########################计算T-test函数######################################
calT <- function(inData, classLabel) {
m <- length(ind1 <- which(classLabel == 1))
n <- length(ind2 <- which(classLabel == 0))
inData1 <- inData[, ind1]
inData2 <- inData[, ind2]
rmean1 <- rowMeans(inData1)
rmean2 <- rowMeans(inData2)
ss1 <- rowSums((inData1 - rmean1)^2)
ss2 <- rowSums((inData2 - rmean2)^2)
tt <- (m + n - 2)^0.5 * (rmean2 - rmean1)/((1/m + 1/n) *
(ss1 + ss2))^0.5
return(list(T = tt, df = m + n - 2))
}
deGene<-calT(inData,classLabel)[[1]];
####################################################################

复制代码

		自动登录	找回密码
密码			立即注册