library(randomForest) source("para.from.perl") source("getdata.R") sigSNP<-6 MDL<-ifelse(MDLnum==1,"A","E") # Simulate data and attach the dataset pah.dat.nona <- getSNPdata(nobs=NOBS,mdl=MDL) out<-pah.dat.nona$disease pah.dat.nona<-as.data.frame(cbind(out,pah.dat.nona$dat)) ncase<-sum(out) ncontrol<-NOBS-ncase attach((pah.dat.nona)) if(simno==1) { # initialize matrices for storing results bs.pvalue<-chisq.pvalue<-matrix(0,nrow=ncol(pah.dat.nona)-1,ncol=num.sims) perm.mean.gini<-matrix(0,nrow=ncol(pah.dat.nona)-1,ncol=num.sims) perm.mean.overall<-matrix(0,nrow=ncol(pah.dat.nona)-1,ncol=num.sims) perm.mean.out0<-matrix(0,nrow=ncol(pah.dat.nona)-1,ncol=num.sims) perm.mean.out1<-matrix(0,nrow=ncol(pah.dat.nona)-1,ncol=num.sims) perm.results<-perm.results.gini<-matrix(0,nrow=ncol(pah.dat.nona)-1,ncol=num.bs.samples) perm.results.overall<-matrix(0,nrow=ncol(pah.dat.nona)-1,ncol=num.bs.samples) perm.results.out0<-matrix(0,nrow=ncol(pah.dat.nona)-1,ncol=num.bs.samples) perm.results.out1<-matrix(0,nrow=ncol(pah.dat.nona)-1,ncol=num.bs.samples) # rename casecontrol from original data for use in permutation oldcasecontrol<-pah.dat.nona[,1] # Simulate the null distribution for(bs.num in 1:num.bs.samples){ cat(paste(" ",bs.num,sep=" ")) bs.sample.SNP<-pah.dat.nona bs.case.control<-sample(oldcasecontrol,length(oldcasecontrol),replace=FALSE) pah.perm <- randomForest(as.factor(bs.case.control) ~ ., mtry=MTRY,data=bs.sample.SNP[,2:(nsnp+1)],keep.forest=F,ntree=nT,importance=TRUE) perm.results.gini[,bs.num]<-perm.results[,bs.num]<-importance(pah.perm)[,4] perm.results.overall[,bs.num]<-importance(pah.perm)[,3] perm.results.out0[,bs.num]<-importance(pah.perm)[,1] perm.results.out1[,bs.num]<-importance(pah.perm)[,2] } } # Perform univariate tests chi.keep<-apply(pah.dat.nona[,2:(nsnp+1)],2,chisq.test,pah.dat.nona[,1],correct=FALSE) for(ch in 1:nsnp) chisq.pvalue[ch,simno]<-chi.keep[[ch]]$p.value # Perform random forest for the simulated data pah.rf<- randomForest(as.factor(pah.dat.nona[,1]) ~ ., data=pah.dat.nona[,2:(nsnp+1)], mtry=MTRY,ntree=nT, keep.forest=F, importance=TRUE) # Store the results ind.perms<-importance(pah.rf)[,4]>perm.results perm.mean.gini[,simno]<- apply((importance(pah.rf)[,4]>perm.results.gini),1,mean) perm.mean.overall[,simno]<- apply((importance(pah.rf)[,3]>perm.results.overall),1,mean) perm.mean.out0[,simno]<- apply((importance(pah.rf)[,1]>perm.results.out0),1,mean) perm.mean.out1[,simno]<- apply((importance(pah.rf)[,2]>perm.results.out1),1,mean) bs.pvalue[,simno]<- apply(ind.perms,1,mean)