本帖最后由 上善若水 于 2014-10-25 18:16 编辑
求大家来帮我解答下 R语言这样做的对吗
- library(hgu133a.db);
- oriInData<-read.table("D:/实验1/GSE15852_series_matrix.txt",header=TRUE,sep="\t")
- for(i in 2:length(oriInData[1,])){
- oriInData[,i]<-log2(oriInData[,i])
- }
- label<-read.table("D:/实验1/classlable15852.txt")
- ##############################################################
- ###############处理数据###########################3
- ###############################################################
- x<-hgu133aENTREZID;
- probeGene<-toTable(x);
- #Normally, len should be equal to the number of rows of relationship inforinDataion.
- locations<-match(as.character(oriInData[,1]),probeGene$probe_id);
- probegene<-probeGene$gene_id[locations];
- oriInData[,length(oriInData[1,])+1]<-probegene
- oriInData<-na.omit(oriInData)
- gene<-oriInData[,length(oriInData[1,])];
- probe<-unique(gene)
- inData<-matrix(0,nrow=length(probe), ncol=length(oriInData[1,])-2)
- for(i in 1:length(probe)){
- if(length(which(gene==probe))==1){
- inData[i,]<-as.numeric(oriInData[which(gene==probe),2:(length(oriInData)-1)])
- }
- else if(length(which(gene==probe))>1){
- inData[i,]<-as.numeric(colMeans(oriInData[which(gene==probe),2:(length(oriInData)-1)]))
- }
- }
- inData<-as.data.frame(inData,row.names=probe)
- names(inData)<-names(oriInData)[2:(length(oriInData[1,])-1)]
- ###############处理类标签######################################
- b<-as.vector(label[1,])
- b[which(b=="normal breast tissue")]<-rep(0,length(which(b=="normal breast tissue")))
- b[which(b=="1breast tumor tissue")]<-rep(1,length(which(b=="breast tumor tissue")))
- classLabel<-as.numeric(b)
- #########################计算T-test函数######################################
- calT <- function(inData, classLabel) {
- m <- length(ind1 <- which(classLabel == 1))
- n <- length(ind2 <- which(classLabel == 0))
- inData1 <- inData[, ind1]
- inData2 <- inData[, ind2]
- rmean1 <- rowMeans(inData1)
- rmean2 <- rowMeans(inData2)
- ss1 <- rowSums((inData1 - rmean1)^2)
- ss2 <- rowSums((inData2 - rmean2)^2)
- tt <- (m + n - 2)^0.5 * (rmean2 - rmean1)/((1/m + 1/n) *
- (ss1 + ss2))^0.5
- return(list(T = tt, df = m + n - 2))
- }
- deGene<-calT(inData,classLabel)[[1]];
- ####################################################################
复制代码
|