library(affy) filtrations=function(x){ n=length(x) b=c(x=="P") if ((sum(b)/n)<0.25) FALSE else TRUE } ####naive mapping #mean probe set map = function(Y,ENS) { ens=unique(sort(ENS[,2])); prim=as.factor(rownames(Y)) ep=intersect(ens,prim); ENS=ENS[ENS[,2] %in% ep,] aa=tapply(as.character(ENS[,1]),as.character(ENS[,2]), function(x) length(unique(x))) ep1=ep[aa==1] prim1=prim[prim %in% ep1]; ENS1=ENS[ENS[,2] %in% ep1,] gene=tapply(as.character(ENS1[,1]),as.character(ENS1[,2]), function(x) as.character(unique(x))) gene=as.factor(as.character(gene)); geneId=levels(gene) YG1=as.matrix(Y[prim %in% prim1,]); n1=ncol(Y) YG=matrix(NA,length(geneId),n1) for(i in 1:n1) YG[,i]=tapply(YG1[,i],gene,mean) rownames(YG)=geneId; colnames(YG)=colnames(Y) YG } #best probe set map2=function(Y,ENS){ Y=Y[apply(Y,1,function(y) (length(gr1)-sum(is.na(y[gr1]))>1) & (length(gr2)-sum(is.na(y[gr2]))>1) ),] Y=Y[apply(Y,1, function(y) ((sd(y[gr1],na.rm=T)>0) & (sd(y[gr2],na.rm=T)>0)) ),] ENS=ENS[ENS[,2]!="",] ENS=ENS[ENS[,2]%in%rownames(Y),] id_l=tapply(as.character(ENS[,1]),as.character(ENS[,2]),function(z) length(unique(z))) ENS=ENS[ENS[,2]%in%names(id_l[id_l==1]),] ens_list=tapply(as.character(ENS[,2]),as.character(ENS[,1]),function(x) list(unique(x))) ens_list1=unlist(ens_list[sapply(ens_list,length)==1]) V1=Y[ens_list1,] rownames(V1)=names(ens_list1) ens_list2=ens_list[!names(ens_list)%in%names(ens_list1)] V2=t(sapply(ens_list2,function(x){ AA=Y[x,] aa=apply(AA,1,function(y) t.test(y[gr1],y[gr2])$p.val) as.numeric(AA[names(which(aa==min(aa)))[1],]) })) rownames(V2)=names(ens_list2) colnames(V2)=colnames(V1) V=rbind(V1,V2) V=V[sort(rownames(V)),] V } met=c("MAS5","PLIER","MBEImm","GCRMA","RMA","MBEI") X=NULL X[[1]]=log2(read.table("MAS5_macq.txt",header=T)) X[[2]]=read.table("PLIER_macq.txt",header=T) X[[3]]=log2(read.table("MBEIpmmm_macq.txt",header=T)) X[[4]]=read.table("GCRMA_macq.txt",header=T) X[[5]]=read.table("RMA_macq.txt",header=T) X[[6]]=log2(read.table("MBEIpm_macq.txt",header=T)) #samples selection GR=9:16 ## C and D TAB=read.delim("hgu133plus2_250809.txt",header=T) calls=read.table("calls_macq.txt",header=T) CT=read.delim("norm_MAQC_TAQ_1_POLR2A.txt",header=T,sep="\t") CT=as.matrix(CT[,c(2,seq(3,ncol(CT),by=2))]) CT[,1]=paste("Hs.",sub("Hs","",sub("Hs00","",sapply(CT[,1],function(x) strsplit(x,"_")[[1]][1]))),sep="") opis=read.delim("opis.txt",header=T) CT=CT[which(opis[,2]%in%TAB[,4]),] CT[,1]=as.vector(opis[which(opis[,2]%in%TAB[,4]),2]) row_names=CT[,1] CT=matrix(log2(as.numeric(CT[,-1])),nrow=nrow(CT)) rownames(CT)=row_names CT=CT[,GR] CT=CT[!rownames(CT)%in%rownames(CT[duplicated(rownames(CT)),]),] for(i in 1:length(X)) X[[i]]=X[[i]][,GR] calls=calls[,GR] gr1=1:4 gr2=5:8 ## Filtration b=apply(calls,1,filtration) Xf=lapply(1:length(X),function(i) X[[i]][b,]) names(Xf)=met ## Mapping Xm=lapply(1:length(X),function(i) map(X[[i]],TAB[,2:1])) Xfm=lapply(1:length(Xf),function(i) map(Xf[[i]],TAB[,2:1])) names(Xm)=met names(Xfm)=met Xm2=lapply(1:length(X),function(i) map2(X[[i]],TAB[,2:1])) Xfm2=lapply(1:length(Xf),function(i) map2(Xf[[i]],TAB[,2:1])) names(Xm2)=met names(Xfm2)=met X2=X Xf2=Xf ps=lapply(rownames(CT),function(x) unique(as.vector(TAB[TAB[,4]==x,1]))) ens=lapply(rownames(CT),function(x) unique(as.vector(TAB[TAB[,4]==x,2]))) psX=lapply(ps,function(x) x[x%in%rownames(X[[1]])]) psX2=lapply(ps,function(x) x[x%in%rownames(X2[[1]])]) psXf=lapply(ps,function(x) x[x%in%rownames(Xf[[1]])]) psXf2=lapply(ps,function(x) x[x%in%rownames(Xf2[[1]])]) ensXm=lapply(ens,function(x) x[x%in%rownames(Xm[[1]])]) ensXfm=lapply(ens,function(x) x[x%in%rownames(Xfm[[1]])]) ensXm2=lapply(ens,function(x) x[x%in%rownames(Xm2[[1]])]) ensXfm2=lapply(ens,function(x) x[x%in%rownames(Xfm2[[1]])]) sr=function(Exp,id){ lapply(Exp,function(Y){ t( sapply(id,function(x){ { if (length(x)==0) {rep(NA,ncol(Y))} else { AA=Y[x,] { if (nrow(AA)>0) {apply(AA,2,mean)} else {rep(NA,ncol(Y))} } } } }) ) }) } wyb=function(Exp,id){ lapply(Exp,function(Y){ t( sapply(id,function(x){ { if (length(x)==0) {rep(NA,ncol(Y))} else { AA=Y[x,] AA=AA[apply(AA,1,function(z) sum(is.na(z))==0),] { if (nrow(AA)>0) { aa=unlist(apply(AA,1,function(y) { { if (sd(y)!=0) t.test(y[gr1],y[gr2])$p.val else 1} })) { if (length(aa)>0) {as.numeric(AA[names(which(aa==min(aa))[1]),])} else {rep(NA,ncol(Y))} } } else {rep(NA,ncol(Y))} } } } }) ) }) } for(i in 1:length(Xm)) Xm[[i]]=data.frame(Xm[[i]]) for(i in 1:length(Xfm)) Xfm[[i]]=data.frame(Xfm[[i]]) for(i in 1:length(Xm2)) Xm2[[i]]=data.frame(Xm2[[i]]) for(i in 1:length(Xfm2)) Xfm2[[i]]=data.frame(Xfm2[[i]]) X=sr(X,psX) Xf=sr(Xf,psXf) Xm=sr(Xm,ensXm) Xfm=sr(Xfm,ensXfm) X2=wyb(X2,psX2) Xf2=wyb(Xf2,psXf2) Xm2=wyb(Xm2,ensXm2) Xfm2=wyb(Xfm2,ensXfm2) Kerr_cor=function(X,Y,e1,e2){ a1=apply(X[,e1],1,function(x) mean(x,na.rm=T)) a2=apply(X[,e2],1,function(x) mean(x,na.rm=T)) a=a1-a2 b1=apply(Y[,e1],1,function(x) mean(x,na.rm=T)) b2=apply(Y[,e2],1,function(x) mean(x,na.rm=T)) b=b1-b2 cor(a,-b,use="complete.obs") } Kerr_sp_corSD=function(X,Y,e1,e2){ a1m=apply(X[,e1],1,function(x) mean(x,na.rm=T)) a1s=apply(X[,e1],1,function(x) sd(x,na.rm=T)) a2m=apply(X[,e2],1,function(x) mean(x,na.rm=T)) a2s=apply(X[,e2],1,function(x) sd(x,na.rm=T)) a=(a1m-a2m)/sqrt( ( (a1s^2) / length(e1) )+ ( (a2s^2) / length(e2) ) ) a[c(which(a1s==0),which(a2s==0))]=NA b1m=apply(Y[,e1],1,function(x) mean(x,na.rm=T)) b1s=apply(Y[,e1],1,function(x) sd(x,na.rm=T)) b2m=apply(Y[,e2],1,function(x) mean(x,na.rm=T)) b2s=apply(Y[,e2],1,function(x) sd(x,na.rm=T)) b=(b1m-b2m)/sqrt( ( (b1s^2) / length(e1) )+ ( (b2s^2) / length(e2) ) ) b[unique(c(which(b1s==0),which(b2s==0)))]=NA cor(a,-b,use="complete.obs",method = "spearman") } Kp=rbind( sapply(1:length(X),function(i) Kerr_cor(X[[i]],CT,gr1,gr2)), sapply(1:length(Xf),function(i) Kerr_cor(Xf[[i]],CT,gr1,gr2)), sapply(1:length(Xm),function(i) Kerr_cor(Xm[[i]],CT,gr1,gr2)), sapply(1:length(Xfm),function(i) Kerr_cor(Xfm[[i]],CT,gr1,gr2)) ) rownames(Kp)=c("A","F","M","FM") colnames(Kp)=met Kp2=rbind( sapply(1:length(X2),function(i) Kerr_cor(X2[[i]],CT,gr1,gr2)), sapply(1:length(Xf2),function(i) Kerr_cor(Xf2[[i]],CT,gr1,gr2)), sapply(1:length(Xm2),function(i) Kerr_cor(Xm2[[i]],CT,gr1,gr2)), sapply(1:length(Xfm2),function(i) Kerr_cor(Xfm2[[i]],CT,gr1,gr2)) ) rownames(Kp2)=c("A2","F2","M2","FM2") colnames(Kp2)=met Ksds=rbind( sapply(1:length(X),function(i) Kerr_sp_corSD(X[[i]],CT,gr1,gr2)), sapply(1:length(Xf),function(i) Kerr_sp_corSD(Xf[[i]],CT,gr1,gr2)), sapply(1:length(Xm),function(i) Kerr_sp_corSD(Xm[[i]],CT,gr1,gr2)), sapply(1:length(Xfm),function(i) Kerr_sp_corSD(Xfm[[i]],CT,gr1,gr2)) ) rownames(Ksds)=c("A","F","M","FM") colnames(Ksds)=met Ksds2=rbind( sapply(1:length(X2),function(i) Kerr_sp_corSD(X2[[i]],CT,gr1,gr2)), sapply(1:length(Xf2),function(i) Kerr_sp_corSD(Xf2[[i]],CT,gr1,gr2)), sapply(1:length(Xm2),function(i) Kerr_sp_corSD(Xm2[[i]],CT,gr1,gr2)), sapply(1:length(Xfm2),function(i) Kerr_sp_corSD(Xfm2[[i]],CT,gr1,gr2)) ) rownames(Ksds2)=c("A2","F2","M2","FM2") colnames(Ksds2)=met ########################################################################################################################## ## CDF ## ########################################################################################################################## X=NULL X[[1]]=log2(read.table("Custom_CDF/MAS5_macq_CDF11.txt",header=T))[,9:16] X[[2]]=read.table("Custom_CDF/PLIER_macq_CDF11.txt",header=T)[,9:16] X[[3]]=log2(read.table("Custom_CDF/MBEIpmmm_macq_CDF11.txt",header=T))[,9:16] X[[4]]=read.table("Custom_CDF/GCRMA_macq_CDF11.txt",header=T)[,9:16] X[[5]]=read.table("Custom_CDF/RMA_macq_CDF11.txt",header=T)[,9:16] X[[6]]=log2(read.table("Custom_CDF/MBEIpm_macq_CDF11.txt",header=T))[,9:16] ## Filtration b=apply(calls,1,filtration) Xf=lapply(1:length(X),function(i) X[[i]][b,]) names(Xf)=met ent_names=sapply(rownames(X[[1]]),function(x) substr(x,1,nchar(x)-3)) ent_namesF=sapply(rownames(Xf[[1]]),function(x) substr(x,1,nchar(x)-3)) for(i in 1:length(X)) rownames(X[[i]])=ent_names for(i in 1:length(Xf)) rownames(Xf[[i]])=ent_namesF ens=lapply(rownames(CT),function(x) unique(as.vector(TAB[TAB[,4]==x,2]))) ensX=lapply(ens,function(x) x[x%in%rownames(X[[1]])]) ensXf=lapply(ens,function(x) x[x%in%rownames(Xf[[1]])]) X=lapply(X,function(Y){ t( sapply(ensX,function(x){ { if (length(x)==0) {rep(NA,ncol(Y))} else {as.numeric(Y[x,]) } } }) ) }) Xf=lapply(Xf,function(Y){ t( sapply(ensXf,function(x){ { if (length(x)==0) {rep(NA,ncol(Y))} else {as.numeric(Y[x,]) } } }) ) }) gr1=1:4 gr2=5:8 Kp=rbind( sapply(1:length(X),function(i) Kerr_cor(X[[i]],-CT,gr1,gr2)), sapply(1:length(Xf),function(i) Kerr_cor(Xf[[i]],-CT,gr1,gr2)) ) rownames(Kp)=c("D","FD") colnames(Kp)=met Ksds=rbind( sapply(1:length(X),function(i) Kerr_sp_corSD(X[[i]],-CT,gr1,gr2)), sapply(1:length(Xf),function(i) Kerr_sp_corSD(Xf[[i]],-CT,gr1,gr2)) ) rownames(Ksds)=c("D","FD") colnames(Ksds)=met