library(randomForest) source("getdata.R") source("initMatrices.R") source("ocCode.R") source("para.from.perl") sigSNP <- 6 MDL <- ifelse(MDLnum==1, "A", "E") ocInit() for (simno in 1:num.sims) { set.seed(simno) if(simno==1) { # Simulate data and attach the dataset pah.dat.nona <- getSNPdata(nobs=NOBS,mdl=MDL) pah.dat.nona <- as.data.frame(cbind(pah.dat.nona$disease,pah.dat.nona$dat)) attach((pah.dat.nona)) # initialize matrices for storing results chisq.pvalue <- matrix(0, nrow=ncol(pah.dat.nona)-1, ncol=num.sims) perm.mean <- initMatrices(ncol(pah.dat.nona)-1, num.sims) perm.results <- initMatrices(ncol(pah.dat.nona)-1, num.bs.samples) # rename casecontrol from original data for use in permutation oldcasecontrol <- pah.dat.nona[, 1] # Simulate the null distribution for(bs.num in 1:num.bs.samples) { if (ocOwn()) { set.seed(num.sims + bs.num) cat(paste(" ", bs.num, sep=" ")) bs.sample.SNP <- pah.dat.nona bs.case.control <- sample(oldcasecontrol, length(oldcasecontrol), replace=FALSE) pah.perm <- randomForest(as.factor(bs.case.control) ~ ., mtry=MTRY, data=bs.sample.SNP[, 2:(nsnp+1)], keep.forest=F, ntree=nT, importance=TRUE) perm.results$gini[, bs.num] <- importance(pah.perm)[, 4] perm.results$overall[, bs.num] <- importance(pah.perm)[, 3] perm.results$out0[, bs.num] <- importance(pah.perm)[, 1] perm.results$out1[, bs.num] <- importance(pah.perm)[, 2] } } perm.results = ocMergePermResults(perm.results) } if (ocOwn()) { if (simno != 1) { pah.dat.nona <- getSNPdata(nobs=NOBS, mdl=MDL) pah.dat.nona <- as.data.frame(cbind(pah.dat.nona$disease, pah.dat.nona$dat)) attach((pah.dat.nona)) } set.seed(num.sims + num.bs.samples + simno) # Perform univariate tests chi.keep <- apply(pah.dat.nona[, 2:(nsnp+1)], 2, chisq.test, pah.dat.nona[, 1], correct=FALSE) for(ch in 1:nsnp) chisq.pvalue[ch, simno] <- chi.keep[[ch]]$p.value # Perform random forest for the simulated data pah.rf <- randomForest(as.factor(pah.dat.nona[, 1]) ~ ., data=pah.dat.nona[, 2:(nsnp+1)], mtry=MTRY, ntree=nT, keep.forest=F, importance=TRUE) # Store the results perm.mean$gini[, simno] <- apply((importance(pah.rf)[, 4]>perm.results$gini), 1, mean) perm.mean$overall[, simno] <- apply((importance(pah.rf)[, 3]>perm.results$overall), 1, mean) perm.mean$out0[, simno] <- apply((importance(pah.rf)[, 1]>perm.results$out0), 1, mean) perm.mean$out1[, simno] <- apply((importance(pah.rf)[, 2]>perm.results$out1), 1, mean) } } # workers exit via this call. mpm = ocMergePermMean(perm.mean, chisq.pvalue) chisq.pvalue = mpm$chisq.pvalue; perm.mean = mpm$perm.mean # R bug -- cannot save these directly? fn = sprintf('ocOut_%d_%d_%d_%d_%d_%d_%d.Rdata', num.sims, nT, nsnp, NOBS, num.bs.samples, MDLnum, MTRY) save(chisq.pvalue, perm.results, perm.mean, num.sims, nT, nsnp, NOBS, num.bs.samples, MDLnum, MTRY, file=fn)