commit 8e4f4136ef3a5329037079860c7c864e64c62d43 Author: prise6 Date: Sun May 31 21:09:04 2015 +0200 Initial commit - welcome diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..91114bf --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,2 @@ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..807ea25 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.Rproj.user +.Rhistory +.RData diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..33fb832 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,9 @@ +Package: VirtualTwins +Type: Package +Title: What the package does (short line) +Version: 1.0 +Date: 2015-05-31 +Author: Who wrote it +Maintainer: Who to complain to +Description: More about what it does (maybe more than one line) +License: What license is it under? diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..d75f824 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1 @@ +exportPattern("^[[:alpha:]]+") diff --git a/R/VirtualTwins.R b/R/VirtualTwins.R new file mode 100644 index 0000000..e69de29 diff --git a/R/difft.R b/R/difft.R new file mode 100644 index 0000000..66a1521 --- /dev/null +++ b/R/difft.R @@ -0,0 +1,33 @@ +# DIFFT ------------------------------------------------------------------- + + + +VT.difft <- setRefClass( + Class = "VT.difft", + + fields = list( + vt.object = "VT.object", + twin1 = "vector", + twin2 = "vector", + difft = "vector" + ), + + methods = list( + initialize = function(vt.object = VT.object(), ...){ + .self$vt.object <- vt.object + + .self$twin1 <- .self$twin2 <- rep(NA, nrow(vt.object$data)) + + .self$initFields(...) + }, + + computeDifft = function(){ + + if(sum(is.na(.self$twin1)) != 0 | sum(is.na(.self$twin2)) != 0 ) stop("Twins must be valid") + + .self$difft <- ifelse(.self$vt.object$data[, 2] == 1, .self$twin1 - .self$twin2, .self$twin2 - .self$twin1) + + return(invisible(.self$difft)) # To see later + } + ) +) \ No newline at end of file diff --git a/R/forest.R b/R/forest.R new file mode 100644 index 0000000..a2ab25f --- /dev/null +++ b/R/forest.R @@ -0,0 +1,52 @@ +# FORESTS ----------------------------------------------------------------- + +# Objet enfant de VT.difft +# Objet Parent de VT.forest.one & VT.forest.double, Abstract class : ne doit pas etre instanciée +# twin1 - Proba du "oui" de la réponse (modalité d'intéret codé par "o" ou 1 contre "n" ou 0 dans le cas binaire) +# sachant le vrai traitement +# twin2 - Proba du "oui" [...] sachant le traitement opposé +# difft - différence de twin1 - twin2 SI le vrai traitement == 1 SINON twin2 - twin1 +# +# $run() - lance le calcul des probas +VT.forest <- setRefClass( + Class = "VT.forest", + + contains = "VT.difft", + + methods = list( + run = function(){ + .self$computeTwin1() + + if(inherits(.self, "VT.forest.one")) .self$vt.object$switchTreatment() #if one forest + + .self$computeTwin2() + + if(inherits(.self, "VT.forest.one")) .self$vt.object$switchTreatment() #if one forest + + .self$computeDifft() + + .self$vt.object$computeDelta() # To see later + + return(invisible(.self)) + }, + + checkModel = function(model){ + if(!(inherits(model, "train") | inherits(model, "RandomForest") | inherits(model, "randomForest"))){ + stop("Model is not recognized. Must be : train, RandomForest, randomForest") + } + }, + + getFullData = function(){ + if(length(.self$twin1) != nrow(.self$vt.object$data)) stop("Twin1 must have same length as data") + if(length(.self$twin2) != nrow(.self$vt.object$data)) stop("Twin2 must have same length as data") + + if(length(.self$difft) != nrow(.self$vt.object$data)) stop("Difft must have same length as data") + + tmp <- cbind(.self$vt.object$data, .self$twin1, .self$twin2, .self$difft) + + colnames(tmp) <- c(colnames(.self$vt.object$data), "twin1", "twin2", "difft") + + return(tmp) + } + ) +) diff --git a/R/forest.double.R b/R/forest.double.R new file mode 100644 index 0000000..39a92c4 --- /dev/null +++ b/R/forest.double.R @@ -0,0 +1,44 @@ +# VT.FOREST.DOUBLE -------------------------------------------------------- +# IF RUNNING DOUBLE FOREST COMPUTATION + +VT.forest.double <- setRefClass( + Class = "VT.forest.double", + + contains = "VT.forest", + + fields = list( + model_trt1 = "ANY", + model_trt0 = "ANY" + ), + + methods = list( + initialize = function(vt.object, model_trt1, model_trt0, ...){ + .self$checkModel(model_trt1) + .self$checkModel(model_trt0) + + .self$model_trt1 <- model_trt1 + .self$model_trt0 <- model_trt0 + + callSuper(vt.object, ...) + }, + + computeTwin1 = function(){ + # Model with treatment (1) + .self$twin1[.self$vt.object$data[, 2] == 1] <- VT.predict(rfor = .self$model_trt1, type = .self$vt.object$type) + + # Model without treatment (0) + .self$twin1[vt.object$data[, 2] == 0] <- VT.predict(rfor = .self$model_trt0, type = .self$vt.object$type) + return(.self$twin1) + return(invisible(.self$twin1)) + }, + + computeTwin2 = function(){ + # Model with treatment (1) + .self$twin2[.self$vt.object$data[, 2] == 1] <- VT.predict(.self$model_trt0, newdata = .self$vt.object$getX(1, interactions = F), type = .self$vt.object$type) + + # Model without treatment (0) + .self$twin2[.self$vt.object$data[, 2] == 0] <- VT.predict(.self$model_trt1, newdata = .self$vt.object$getX(0, interactions = F), type = .self$vt.object$type) + return(invisible(.self$twin2)) + } + ) +) \ No newline at end of file diff --git a/R/forest.fold.R b/R/forest.fold.R new file mode 100644 index 0000000..d97f79c --- /dev/null +++ b/R/forest.fold.R @@ -0,0 +1,79 @@ +# VT.FOREST.FOLD ---------------------------------------------------------- + +VT.forest.fold <- setRefClass( + Class = "VT.forest.fold", + + contains = "VT.forest", + + fields = list( + interactions = "logical", + fold = "numeric", + ratio = "numeric", + groups = "vector" + ), + + methods = list( + initialize = function(vt.object, fold, ratio, interactions = T, ...){ + + .self$fold <- fold + + .self$ratio <- ratio + + .self$interactions <- interactions + + callSuper(vt.object, ...) + }, + + run = function(parallel = F, ...){ + + .self$groups <- sample(1:.self$fold, nrow(.self$vt.object$data), replace = T) + + for(g in 1:.self$fold){ + .self$runOneForest(g, ...) + } + + .self$computeDifft() + }, + + runOneForest = function(group, ...){ + data <- .self$vt.object$getX(interactions = .self$interactions) + X <- data[.self$groups != group, ] + Y <- .self$vt.object$data[.self$groups != group, 1] + Yeff <- table(Y) # 1 -> levels(Y)[1] & 2 -> levels(Y)[2] + sampmin <- min(Yeff[1], Yeff[2]) + + if(sampmin == Yeff[2]){ + samp2 <- sampmin + samp1 <- min(Yeff[1], round(.self$ratio*Yeff[1], digits = 0)) + }else{ + samp2 <- Yeff[2] + samp1 <- sampmin + } + rf <- randomForest(x = X, y = Y, sampsize = c(samp1, samp2), keep.forest = T, ...) + + .self$computeTwin1(rf, group) + .self$computeTwin2(rf, group) + }, + + computeTwin1 = function(rfor, group){ + + data <- .self$vt.object$data[.self$groups == group, -1] + + .self$twin1[.self$groups == group] <- VT.predict(rfor = rfor, newdata = data, type = .self$vt.object$type) + + return(invisible(.self$twin1)) + }, + + computeTwin2 = function(rfor, group){ + + .self$vt.object$switchTreatment() + data <- .self$vt.object$getX(interactions = .self$interactions) + data <- data[.self$groups == group, ] + + .self$twin2[.self$groups == group] <- VT.predict(rfor = rfor, newdata = data, type = .self$vt.object$type) + + .self$vt.object$switchTreatment() + return(invisible(.self$twin2)) + } + ) +) diff --git a/R/forest.one.R b/R/forest.one.R new file mode 100644 index 0000000..dc4c023 --- /dev/null +++ b/R/forest.one.R @@ -0,0 +1,37 @@ +# VT.FOREST.ONE ----------------------------------------------------------- +# IF RUNNING ONE FOREST COMPUTATION +# model - modèle de forêt aléatoire issus du package caret, randomForest ou party (cforest) +VT.forest.one <- setRefClass( + Class = "VT.forest.one", + + contains = "VT.forest", + + fields = list( + model = "ANY", + interactions = "logical" + ), + + methods = list( + initialize = function(vt.object, model, interactions = T, ...){ + .self$checkModel(model) + + .self$model <- model + + .self$interactions <- interactions + + callSuper(vt.object, ...) + }, + + computeTwin1 = function(){ + .self$twin1 <- as.vector(VT.predict(rfor = .self$model, type = .self$vt.object$type)) + return(invisible(.self$twin1)) + }, + + computeTwin2 = function(){ + .self$twin2 <- as.vector(VT.predict(.self$model, + newdata = .self$vt.object$getX(interactions = .self$interactions), + .self$vt.object$type)) + return(invisible(.self$twin2)) + } + ) +) \ No newline at end of file diff --git a/R/incidences.R b/R/incidences.R new file mode 100644 index 0000000..ce02e7d --- /dev/null +++ b/R/incidences.R @@ -0,0 +1,35 @@ +# INCIDENCE TABLES FUNCTION /!\ important + +setGeneric("VT.incidences", + function(vt.difft, select, rr.snd){ standardGeneric("VT.incidences") } +) + +setMethod( + f = "VT.incidences", + signature = c(vt.difft = "VT.difft", select = "character", rr.snd = "logical"), + function(vt.difft, select, rr.snd = F){ + vector.selected <- with(vt.difft$vt.object$data, ifelse(eval(parse(text = select)), 1, 0)) + + return(VT.incidences(vt.difft, select = vector.selected, rr.snd)) + } +) + +setMethod( + f = "VT.incidences", + signature = c(vt.difft = "VT.difft", select = "vector", rr.snd = "logical"), + function(vt.difft, select, rr.snd = F){ + + selected <- with(vt.difft$vt.object$data, vt.difft$vt.object$data[as.logical(select), c(1,2)]) + not.selected <- with(vt.difft$vt.object$data, vt.difft$vt.object$data[!as.logical(select), c(1,2)]) + + list.tmp <- list(vt.getIncidence(selected), vt.getIncidence(not.selected)) + names(list.tmp) <- c("table.selected", "table.not.selected") + + if(isTRUE(rr.snd)){ + list.tmp$table.selected$rr.snd <- vt.rr.snd(vt.difft, select) + list.tmp$table.not.selected$rr.snd <- vt.rr.snd(vt.difft, (1-select)) + } + + return(list.tmp) + } +) diff --git a/R/object.R b/R/object.R new file mode 100644 index 0000000..7780541 --- /dev/null +++ b/R/object.R @@ -0,0 +1,92 @@ +# VT.OBJECT --------------------------------------------------------------- + +# Permet de stocker les données +# alpha - paramètre inutile dans cette version +# screening & varimp permettent de construire des arbres sur les variables +# définies dans varimp si screening = True +# delta - différence d'incidence entre les deux "bras" +# type - type de réponse - binary ou continous - seul binary est disponible +# interactions - si TRUE getX() retourne (X,X*T,X*(1-T)) +# +# $getFormula() - utile pour retourner une formule pour rpart +# $getX(trt = c(0,1,NULL), interactions = c(TRUE, FALSE)) - si trt est non NULL +# getX() retourne les lignes pour le traitement passé paramètre (utile pour les doubles forests) +# $getY() - retourne la réponse / cible +# ... +VT.object <- setRefClass( + Class = "VT.object", + + fields = list( + data = "data.frame", + alpha = "numeric", + screening = "logical", + varimp = "character", + delta = "numeric", + type = "character" + ), + + methods = list( + initialize = function(screening = F, alpha = 1, type = "binary", ...){ + + .self$screening <- screening + + .self$type <- type + + .self$alpha <- alpha + + .self$initFields(...) + }, + + getFormula = function(){ + return(as.formula(paste(colnames(.self$data)[1], ".", sep = "~"))) + }, + + getX = function(interactions = T, trt = NULL){ + # retour les prédicteurs si trt n'est pas null + if(!is.null(trt)) return(.self$data[.self$data[,2] == trt, -c(1,2)]) + # retourne les predicteurs*traitement peut importe le traitement si interactions est à TRUE + if(interactions == T) return(.self$getXwithInt()) + # retourne les predicteurs + return(.self$data[, -1]) + }, + + getY = function(trt = NULL){ + if(is.null(trt)) return(.self$data[, 1]) + return(.self$data[.self$data[,2] == trt, 1]) + }, + + getXwithInt = function(){ + tmp <- .self$data[, -c(1,2)] + return(data.frame(cbind(.self$data[,-1], tmp*.self$data[, 2], tmp*(1 - .self$data[, 2])))) + }, + + switchTreatment = function(){ + cl <- class(.self$data[, 2]) + # Treatments must be numeric or integer and binary + .self$data[, 2] <- 1 - .self$data[, 2] + # keep original class for treatment + if(cl == "integer"){ + .self$data[, 2] <- as.integer(.self$data[, 2]) + }else{ + .self$data[, 2] <- as.numeric(.self$data[, 2]) + } + cat("witch \n") + return(TRUE) + }, + + computeDelta = function(){ + if(.self$type == "binary"){ + .self$delta <- sum((as.numeric(.self$data[, 1]) - 1)*(.self$data[, 2])) / sum(.self$data[, 2]) - + sum((as.numeric(.self$data[, 1]) - 1)*(1 - .self$data[, 2])) / sum(1 - .self$data[, 2]) + + return(.self$delta) + }else{ + stop("Error : type is not Binary") + } + }, + + getIncidences = function(){ + return(vt.getIncidence(.self$data)) + } + ) +) diff --git a/R/predict.R b/R/predict.R new file mode 100644 index 0000000..b0d4d1c --- /dev/null +++ b/R/predict.R @@ -0,0 +1,92 @@ +# PREDICTION -------------------------------------------------------------- + +# LES METHODES SUIVANTES PERMETTENT DE PREDIRE LA PROBA D'INTERET POUR +# LES TROIS CLASSES SUIVANTES : train, randomForest, RandomForest{party} + +setGeneric("VT.predict", + function(rfor, newdata, type){standardGeneric("VT.predict")} +) + +setMethod( + f = "VT.predict", + signature = c(rfor = "RandomForest", newdata = "missing", type = "character"), + function(rfor, type = "binary"){ + if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous") + if(type == "binary"){ + tmp <- predict(rfor, OOB = T, type = "prob") + tmp <- unlist(tmp) + tmp <- tmp[seq(2, length(tmp), 2)] + }else{ + message("continous is not done yet") + tmp <- NULL + } + + return(tmp) + } +) + +setMethod( + f = "VT.predict", + signature = c(rfor = "RandomForest", newdata = "data.frame", type = "character"), + function(rfor, newdata, type = "binary"){ + if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous") + if(type == "binary"){ + tmp <- predict(rfor, newdata = newdata, type = "prob") + tmp <- unlist(tmp) + tmp <- tmp[seq(2, length(tmp), 2)] + }else{ + message("continous is not done yet") + tmp <- NULL + } + + return(tmp) + } +) + +setMethod( + f = "VT.predict", + signature = c(rfor = "randomForest", newdata = "missing", type = "character"), + function(rfor, type = "binary"){ + if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous") + if(type == "binary"){ + # no longer available in all version ?! + # tmp <- rfor$vote[, 2] # get the "o" prob + tmp <- predict(rfor, type = "prob")[, 2] # We want to get the "o" prob + }else{ + message("continous is not done yet") + tmp <- NULL + } + return(tmp) + } +) + +setMethod( + f = "VT.predict", + signature = c(rfor = "randomForest", newdata = "data.frame", type = "character"), + function(rfor, newdata, type = "binary"){ + if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous") + if(type == "binary"){ + tmp <- predict(rfor, newdata = newdata, type = "prob")[, 2] # We want to get the "o" prob + }else{ + message("continous is not done yet") + tmp <- NULL + } + return(tmp) + } +) + +setMethod( + f = "VT.predict", + signature = c(rfor = "train", newdata = "ANY", type = "character"), + function(rfor, newdata, type = "binary"){ + return(VT.predict(rfor$finalModel, newdata, type)) + } +) + +setMethod( + f = "VT.predict", + signature = c(rfor = "train", newdata = "missing", type = "character"), + function(rfor, type = "binary"){ + return(VT.predict(rfor=rfor$finalModel, type=type)) + } +) \ No newline at end of file diff --git a/R/tools.R b/R/tools.R new file mode 100644 index 0000000..19e76e0 --- /dev/null +++ b/R/tools.R @@ -0,0 +1,43 @@ +vt.getQAOriginal <- function(response, trt, ahat){ + if(is.factor(response)) response = as.numeric(response) - 1 + + if(sum(ahat) == 0){ + tmp <- 0 + }else{ + tmp <- sum(response*ahat*trt)/sum(ahat*trt) - + sum(response*ahat*(1-trt))/sum(ahat*(1-trt)) - + (sum(response*trt)/sum(trt) - + sum(response*(1-trt))/sum(1-trt)) + } + return(tmp) +} + +vt.getTable <- function(table){ + if(is.list(table)) table <- table[[1]] + Incidence <- function(X) as.character(round(X[2] / X[3], digits = 3)) + t <- addmargins(table, margin = c(1,2), FUN = sum, quiet = T) + t <- addmargins(t, FUN = Incidence, margin = 1, quiet = T) + rr <- as.numeric(t["Incidence", "1"]) / as.numeric(t["Incidence", "0"]) + return(list(table = t, rr = rr)) +} + +vt.getIncidence <- function(df){ + if (nrow(df) == 0) table.res <- NULL + else{ + table.res <- vt.getTable(table(df[, 1], + df[, 2], + deparse.level = 2, + dnn = c("resp", "trt"))) + } + return(table.res) +} + + +vt.rr.snd <- function(vt.difft, selected){ + if(sum(selected) == 0){ + return(0) + }else{ + return((sum(vt.difft$twin1*selected*vt.difft$vt.object$data[, 2])/sum(selected*vt.difft$vt.object$data[, 2])) + /(sum(vt.difft$twin1*selected*(1-vt.difft$vt.object$data[, 2]))/sum(selected*(1-vt.difft$vt.object$data[, 2])))) + } +} \ No newline at end of file diff --git a/R/tree.R b/R/tree.R new file mode 100644 index 0000000..98aa97c --- /dev/null +++ b/R/tree.R @@ -0,0 +1,233 @@ +# TREES COMPUTATIONS ------------------------------------------------------ + +# outcome - variable à expliquer par l'arbre (continue ou binaire) +# threshold - seuil à dépasser par difft +# screening - override de vt.forest$screening sinon prend la valeur de vt.forest$screening +# name - nom de l'arbre +# tree - objet rpart (l'arbre en lui même) +# Ahat - indicatrice des observations appartement à Ahat (toujours en fonction du treshold) +# NE PAS OUBLIER QUE CELA DEPEND EGALEMENT DE LA FORET ASSOCIEE +# +# $getData() - les variables explicatives de l'arbre: soit les X, soit intersections de X et vt.forest$varimp +# $run(...) - lance l'arbre, en options les paramètre de rpart(...) +# $getInfos() - Résume threshold, delta, sizeof Ahat. +# $getRules() - Récupère les incidences, les règles, les stats de chaques noeuds (terminaux ou non, favorable ou non) +# $getIncidences() - Récupère un tableau d'incidence d'une rule +# $getAhatIncidence - Récupère le tableau d'incidence de Ahat +# $getAhatQualty - Récupère la qualité de Ahat (snd, resub) +VT.tree <- setRefClass( + Class = "VT.tree", + + fields = list( + vt.difft = "VT.difft", + outcome = "vector", + threshold = "numeric", + screening = "logical", + sens = "character", + name = "character", + tree = "rpart", + Ahat = "vector" + ), + + methods = list( + initialize = function(vt.difft = VT.difft(), threshold = 0.05, sens = ">", screening = NULL){ + .self$vt.difft <- vt.difft + + .self$threshold <- threshold + + .self$sens <- sens + + .self$screening <- ifelse(is.null(screening), vt.difft$vt.object$screening, screening) + + }, + + getData = function(){ + d <- .self$vt.difft$vt.object$data[, 3:ncol(.self$vt.difft$vt.object$data)] + + if(.self$screening == T){ + d.tmp <- d + d <- d.tmp[, colnames(d.tmp) %in% .self$vt.difft$vt.object$varimp] # To see later + } + + d <- data.frame(.self$outcome, d) + names(d) <- c(.self$name, colnames(d)[-1]) + + return(d) + }, + + computeNameOfTree = function(type){ + if(.self$threshold < 0 ){ + threshold.chr <- paste0("m", -.self$threshold) + }else{ + threshold.chr <- as.character(.self$threshold) + } + + tmp = strsplit(threshold.chr, "[.]")[[1]] + return(paste(type, tmp[1], tmp[2], sep = "")) + }, + + run = function(){ + if(length(.self$vt.difft$difft) == 0) stop("VT.difft::difft is an empty vector") + }, + + getInfos = function(){ + cat("\n") + cat(sprintf("Threshold = %0.4f", .self$threshold)) + cat("\n") + cat(sprintf("Delta = %0.4f", .self$vt.difft$vt.object$delta)) + cat("\n") + cat(sprintf("Sens : %s", .self$sens)) + cat("\n") + # cat(sprintf("Bounds = %0.4f", (.self$vt.difft$vt.object$delta + .self$threshold))) + # cat("\n") + cat(sprintf("Size of Ahat : %i", (sum(.self$Ahat)))) + + return(invisible(NULL)) + }, + + getRules = function(only.leaf = F, only.fav = F, tables = T, verbose = T){ + + # On supprime le root node, inutile pour les stats d'incidences et autres... + full.frame <- .self$tree$frame[-1, ] + + if (only.fav == T){ + if(inherits(.self, "VT.tree.reg")){ + if(.self$sens == ">"){ + frm.only.fav <- full.frame[full.frame$yval >= (.self$threshold), ] + } else { + frm.only.fav <- full.frame[full.frame$yval <= (.self$threshold), ] + } + }else if(inherits(.self, "VT.tree.class")){ + frm.only.fav <- full.frame[full.frame$yval == 2, ] + } + frm <- frm.only.fav + } + + if (only.leaf == T){ + if(inherits(.self, "VT.tree.reg")){ + frm.only.leaf <- full.frame[full.frame$var == "", ] + }else if(inherits(.self, "VT.tree.class")){ + frm.only.leaf <- full.frame[full.frame$var == "", ] + } + frm <- frm.only.leaf + } + + if (only.fav == T & only.leaf == T){ + frm <- frm.only.leaf[ intersect(rownames(frm.only.leaf), rownames(frm.only.fav)) , ] + }else if (only.fav == F & only.leaf == F){ + frm <- full.frame + } + + # Le cas où l'arbre est vide ou n'existe pas: + if (length(frm) == 0) stop("VT.tree : no tree"); + if (ncol(frm)==0) stop("VT.tree : no rules"); + + pth <- path.rpart(.self$tree, nodes = row.names(frm), print.it = F) + # Delete 'root' node des règles + pth <- lapply(pth, FUN = function(d) return(d[-1])) + + depth <- 0 + nodes <- names(pth) + rules <- data.frame(replicate(6, character(0), simplify = T), replicate(2, numeric(0), simplify = T), stringsAsFactors = F) + + colnames(rules) <- c("Subgroup", "Subgroup size", "Treatement event rate", "Control event rate", + "Treatment sample size", "Control sample size", "RR (resub)", "RR (snd)") + for(i in nodes){ + pth.text <- paste(pth[[i]], collapse = " & ") + incid <- .self$getIncidences(pth.text) + + rules[i, 1] <- pth.text + rules[i, 2] <- incid$table.selected$table[3, 3] #size subgroupg + rules[i, 3] <- incid$table.selected$table[4, 2] #treatment event rate + rules[i, 4] <- incid$table.selected$table[4, 1] #control event rate + rules[i, 5] <- incid$table.selected$table[3, 2] #treatment sample size + rules[i, 6] <- incid$table.selected$table[3, 1] #control sample size + rules[i, 7] <- round(incid$table.selected$rr, digits = 3) # rr (resub) + rules[i, 8] <- round(incid$table.selected$rr.snd, digits = 3) # rr (snd) + + if(isTRUE(verbose)){ + cat("----------------------------\n") + cat(sprintf("| Rule number %s : ", i)) + + + if(inherits(.self, "VT.tree.reg")){ + cat(sprintf("Y val = %0.3f \n", frm[i, ]$yval)) + }else{ + cat(sprintf("Y val = %i \n", frm[i, ]$yval)) + } + + cat("----------------------------\n") + + cat(sprintf("[n = %i", frm[i, ]$n)) + cat(sprintf(", loss = %s, prob = %0.2f", + frm[i, ]$dev, + frm[i, ]$yval2[, 5])) + + cat("] \n") + cat("\t\t") + cat(pth[[i]], sep="\n\t\t") + + if(isTRUE(tables)){ + cat("\n") + cat(sprintf("Incidence dans la selection \n")) + print(incid$table.selected$table) + cat("\n") + cat(sprintf("Risque relatif (resub) : %0.3f \n", incid$table.selected$rr)) + cat(sprintf("Risque relatif (snd) : %0.3f \n\n", incid$table.selected$rr.snd)) + + cat(sprintf("Incidence dans le complementaire\n")) + print(incid$table.not.selected$table) + cat("\n") + cat(sprintf("Risque relatif (resub) : %0.3f \n", incid$table.not.selected$rr)) + cat(sprintf("Risque relatif (snd) : %0.3f \n\n", incid$table.not.selected$rr.snd)) + } + + cat("\n\n") + } + } + + return(invisible(rules)) + }, + + getIncidences = function(rule, rr.snd = T){ + return(VT.incidences(.self$vt.difft, rule, rr.snd)) + }, + + getAhatIncidence = function(){ + if(sum(.self$Ahat)!=0){ + + table.inc <- VT.incidences(vt.object = .self$vt.difft$vt.object, select = .self$Ahat) + + table.A <- table.inc$table.selected + table.A.cmpl <- table.inc$table.not.selected + + cat(sprintf("Incidence dans le sous groupe A\n")) + print(table.A$table) + cat("\n") + cat(sprintf("Risque relatif : %0.3f \n\n", table.A$risque_relatif)) + + cat(sprintf("Incidence dans le sous groupe A complementaire\n")) + print(table.A.cmpl$table) + cat("\n") + cat(sprintf("Risque relatif : %0.3f \n\n", table.A.cmpl$risque_relatif)) + }else{ + return("Empty set") + } + }, + + getAhatQuality = function(){ + + resub <- vt.getQAOriginal(.self$Ahat, response = .self$vt.difft$vt.object$getY(), trt = .self$vt.difft$vt.object$data[, 2]) + + snd <- vt.getQAOriginal(.self$Ahat, response = .self$vt.difft$twin1, trt = .self$vt.difft$vt.object$data[, 2]) + + # on ajoute la taille de Ahat + size <- sum(.self$Ahat) + + res <- cbind(size, resub, snd) + names(res) <- c("size", "resub", "snd") + + return(res) + } + ) +) diff --git a/R/tree.class.R b/R/tree.class.R new file mode 100644 index 0000000..67ab9e3 --- /dev/null +++ b/R/tree.class.R @@ -0,0 +1,42 @@ +# VT.TREE.CLASS ----------------------------------------------------------- + +VT.tree.class <- setRefClass( + Class = "VT.tree.class", + + contains = "VT.tree", + + methods = list( + initialize = function(vt.difft, threshold = 0.05, sens = ">", screening = NULL){ + callSuper(vt.difft, threshold, sens, screening) + + .self$name <- .self$computeNameOfTree("class") + + if(.self$sens == ">"){ + .self$outcome <- ifelse(.self$vt.difft$difft >= .self$threshold, 1, 0) + } else { + .self$outcome <- ifelse(.self$vt.difft$difft <= .self$threshold, 1, 0) + } + }, + + run = function(...){ + callSuper() + + data <- .self$getData() + if(sum(data[,1]) != 0){ + .self$tree <- rpart(as.formula(paste(.self$name, ".", sep = "~")), data = data, method = "class", ...) + .self$Ahat <- as.numeric(predict(.self$tree, data, type = "class")) - 1 + }else{ + .self$Ahat <- .self$outcome + } + + return(invisible(tree)) + }, + + sumup = function(){ + cat("Classification Tree") + callSuper() + } + ) +) + +VT.tree.class$lock("threshold", "vt.difft") \ No newline at end of file diff --git a/R/tree.reg.R b/R/tree.reg.R new file mode 100644 index 0000000..13d5e96 --- /dev/null +++ b/R/tree.reg.R @@ -0,0 +1,44 @@ + + +# VT.TREE.REG ------------------------------------------------------------- + +VT.tree.reg <- setRefClass( + Class = "VT.tree.reg", + + contains = "VT.tree", + + methods = list( + initialize = function(vt.difft, threshold = 0.05, sens = ">", screening = NULL){ + callSuper(vt.difft, threshold, sens, screening) + + .self$name <- .self$computeNameOfTree("reg") + + .self$outcome <- .self$vt.difft$difft + }, + + run = function(...){ + callSuper() + + data <- .self$getData() + + .self$tree <- rpart(as.formula(paste(.self$name, ".", sep = "~")), data = data, ...) + + if(.self$sens == ">") + res <- ifelse(predict(.self$tree) >= (.self$threshold), 1, 0) + else + res <- ifelse(predict(.self$tree) <= (.self$threshold), 1, 0) + + .self$Ahat <- res + # if(sum(res) != 0) .self$vt.forest$addAhatColumn(name, res) + return(invisible(tree)) + }, + + sumup = function(){ + cat("Regression Tree") + callSuper() + } + ) +) + +VT.tree.reg$lock("threshold", "vt.difft") + diff --git a/Read-and-delete-me b/Read-and-delete-me new file mode 100644 index 0000000..dcaca43 --- /dev/null +++ b/Read-and-delete-me @@ -0,0 +1,8 @@ +* Edit the help file skeletons in 'man', possibly combining help files for multiple functions. +* Edit the exports in 'NAMESPACE', and add necessary imports. +* Put any C/C++/Fortran code in 'src'. +* If you have compiled code, add a useDynLib() directive to 'NAMESPACE'. +* Run R CMD build to build the package tarball. +* Run R CMD check to check the package tarball. + +Read "Writing R Extensions" for more information. diff --git a/VirtualTwins.Rproj b/VirtualTwins.Rproj new file mode 100644 index 0000000..3cecd82 --- /dev/null +++ b/VirtualTwins.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Yes +SaveWorkspace: Yes +AlwaysSaveHistory: Yes + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/man/VirtualTwins-package.Rd b/man/VirtualTwins-package.Rd new file mode 100644 index 0000000..2c85bfd --- /dev/null +++ b/man/VirtualTwins-package.Rd @@ -0,0 +1,34 @@ +\name{VirtualTwins-package} +\alias{VirtualTwins-package} +\alias{VirtualTwins} +\docType{package} +\title{ +\packageTitle{VirtualTwins} +} +\description{ +\packageDescription{VirtualTwins} +} +\details{ + +The DESCRIPTION file: +\packageDESCRIPTION{VirtualTwins} +\packageIndices{VirtualTwins} +~~ An overview of how to use the package, including the most important functions ~~ +} +\author{ +\packageAuthor{VirtualTwins} + +Maintainer: \packageMaintainer{VirtualTwins} +} +\references{ +~~ Literature or other references for background information ~~ +} +~~ Optionally other standard keywords, one per line, from file KEYWORDS in the R documentation directory ~~ +\keyword{ package } +\seealso{ +~~ Optional links to other man pages, e.g. ~~ +~~ \code{\link[:-package]{}} ~~ +} +\examples{ +~~ simple examples of the most important functions ~~ +}