## ---- eval = F----------------------------------------------------------- # vt.data <- function(dataset, outcome.field, treatment.field, interactions = TRUE, ...){ # data <- formatRCTDataset(dataset, outcome.field, treatment.field, interactions = TRUE) # VT.object(data = data, ...) # } ## ------------------------------------------------------------------------ # load library VT library(aVirtualTwins) # load data sepsis data(sepsis) # initialize VT.object vt.o <- vt.data(sepsis, "survival", "THERAPY", TRUE) ## ------------------------------------------------------------------------ # Creation of categorical variable cat.x <- rep(1:5, (nrow(sepsis))/5) cat.x <- as.factor(cat.x) sepsis.tmp <- cbind(sepsis, cat.x) vt.o.tmp <- vt.data(sepsis.tmp, "survival", "THERAPY", TRUE) ## ---- echo = FALSE------------------------------------------------------- rm(vt.o.tmp, cat.x, sepsis.tmp) ## ------------------------------------------------------------------------ # use randomForest::randomForest() library(randomForest, verbose = F) # Reproducibility set.seed(123) # Fit rf model # default params # set interactions to TRUE if using interaction between T and X model.rf <- randomForest(x = vt.o$getX(interactions = T), y = vt.o$getY(), ntree = 500) # initialize VT.forest.one vt.f.rf <- vt.forest("one", vt.data = vt.o, model = model.rf, interactions = T) ### or you can use randomForest inside vt.forest() vt.f.rf <- vt.forest("one", vt.data = vt.o, interactions = T, ntree = 500) ## ------------------------------------------------------------------------ # # use randomForest::randomForest() # library(party, verbose = F) # # Reproducibility # set.seed(123) # # Fit cforest model # # default params # # set interactions to TRUE if using interaction between T and X # model.cf <- cforest(formula = vt.o$getFormula(), data = vt.o$getData(interactions = T)) # # initialize VT.forest.one # vt.f.cf <- vt.forest("one", vt.data = vt.o, model = model.cf) ## ------------------------------------------------------------------------ # Copy new object vt.o.tr <- vt.o$copy() # Change levels tmp <- ifelse(vt.o.tr$data$survival == 1, "y", "n") vt.o.tr$data$survival <- as.factor(tmp) rm(tmp) # Check new data to be sure formatRCTDataset(vt.o.tr$data, "survival", "THERAPY") # use caret::train() library(caret, verbose = F) # Reproducibility set.seed(123) # fit train model fitControl <- trainControl(classProbs = T, method = "none") model.tr <- train(x = vt.o.tr$getX(interactions = T), y = vt.o.tr$getY(), method = "rf", tuneGrid = data.frame(mtry = 5), trControl = fitControl) # initialize VT.forest.one vt.f.tr <- vt.forest("one", vt.o.tr, model = model.tr) ## ------------------------------------------------------------------------ # grow RF for T = 1 model.rf.trt1 <- randomForest(x = vt.o$getX(trt = 1), y = vt.o$getY(trt = 1)) # grow RF for T = 0 model.rf.trt0 <- randomForest(x = vt.o$getX(trt = 0), y = vt.o$getY(trt = 0)) # initialize VT.forest.double() vt.doublef.rf <- vt.forest("double", vt.data = vt.o, model_trt1 = model.rf.trt1, model_trt0 = model.rf.trt0) ### Or you can use randomForest() inside vt.doublef.rf <- vt.forest("double", vt.data = vt.o, ntree = 200) ## ---- cache=F------------------------------------------------------------ # initialize k-fold RF # you can use randomForest options model.fold <- vt.forest("fold", vt.data = vt.o, fold = 5, ratio = 1, interactions = T, ntree = 200) ## ------------------------------------------------------------------------ # you get twin1 and twin2 by your own method # here, i'll use random number between 0 and 1 : twin1_random <- runif(470) twin2_random <- runif(470) # then you can initialize VT.difft class : model.difft <- VT.difft(vt.o, twin1 = twin1_random, twin2 = twin2_random, "absolute") # compute difference of twins : model.difft$computeDifft() # See results head(model.difft$difft) # Graph : # hist(model.difft$difft) ## ------------------------------------------------------------------------ # initialize classification tree tr.class <- vt.tree("class", vt.difft = vt.f.rf, sens = ">", threshold = quantile(vt.f.rf$difft, seq(.5, .8, .1))) # tr.class is a list if threshold is a vectoor class(tr.class) # acce trees with treeXX class(tr.class$tree1) ## ------------------------------------------------------------------------ # initialize regression tree tr.reg <- vt.tree("reg", vt.difft = vt.f.rf, sens = ">", threshold = quantile(vt.f.rf$difft, seq(.5, .8, .1))) # tr.class is a list if threshold is a vectoor class(tr.reg) # acce trees with treeXX class(tr.reg$tree1)