mirror of
https://github.com/prise6/aVirtualTwins.git
synced 2024-05-22 22:52:12 +02:00
Initial commit - welcome
This commit is contained in:
commit
8e4f4136ef
2
.Rbuildignore
Normal file
2
.Rbuildignore
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
^.*\.Rproj$
|
||||||
|
^\.Rproj\.user$
|
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
.Rproj.user
|
||||||
|
.Rhistory
|
||||||
|
.RData
|
9
DESCRIPTION
Normal file
9
DESCRIPTION
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
Package: VirtualTwins
|
||||||
|
Type: Package
|
||||||
|
Title: What the package does (short line)
|
||||||
|
Version: 1.0
|
||||||
|
Date: 2015-05-31
|
||||||
|
Author: Who wrote it
|
||||||
|
Maintainer: Who to complain to <yourfault@somewhere.net>
|
||||||
|
Description: More about what it does (maybe more than one line)
|
||||||
|
License: What license is it under?
|
0
R/VirtualTwins.R
Normal file
0
R/VirtualTwins.R
Normal file
33
R/difft.R
Normal file
33
R/difft.R
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
# DIFFT -------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
VT.difft <- setRefClass(
|
||||||
|
Class = "VT.difft",
|
||||||
|
|
||||||
|
fields = list(
|
||||||
|
vt.object = "VT.object",
|
||||||
|
twin1 = "vector",
|
||||||
|
twin2 = "vector",
|
||||||
|
difft = "vector"
|
||||||
|
),
|
||||||
|
|
||||||
|
methods = list(
|
||||||
|
initialize = function(vt.object = VT.object(), ...){
|
||||||
|
.self$vt.object <- vt.object
|
||||||
|
|
||||||
|
.self$twin1 <- .self$twin2 <- rep(NA, nrow(vt.object$data))
|
||||||
|
|
||||||
|
.self$initFields(...)
|
||||||
|
},
|
||||||
|
|
||||||
|
computeDifft = function(){
|
||||||
|
|
||||||
|
if(sum(is.na(.self$twin1)) != 0 | sum(is.na(.self$twin2)) != 0 ) stop("Twins must be valid")
|
||||||
|
|
||||||
|
.self$difft <- ifelse(.self$vt.object$data[, 2] == 1, .self$twin1 - .self$twin2, .self$twin2 - .self$twin1)
|
||||||
|
|
||||||
|
return(invisible(.self$difft)) # To see later
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
52
R/forest.R
Normal file
52
R/forest.R
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
# FORESTS -----------------------------------------------------------------
|
||||||
|
|
||||||
|
# Objet enfant de VT.difft
|
||||||
|
# Objet Parent de VT.forest.one & VT.forest.double, Abstract class : ne doit pas etre instanciée
|
||||||
|
# twin1 - Proba du "oui" de la réponse (modalité d'intéret codé par "o" ou 1 contre "n" ou 0 dans le cas binaire)
|
||||||
|
# sachant le vrai traitement
|
||||||
|
# twin2 - Proba du "oui" [...] sachant le traitement opposé
|
||||||
|
# difft - différence de twin1 - twin2 SI le vrai traitement == 1 SINON twin2 - twin1
|
||||||
|
#
|
||||||
|
# $run() - lance le calcul des probas
|
||||||
|
VT.forest <- setRefClass(
|
||||||
|
Class = "VT.forest",
|
||||||
|
|
||||||
|
contains = "VT.difft",
|
||||||
|
|
||||||
|
methods = list(
|
||||||
|
run = function(){
|
||||||
|
.self$computeTwin1()
|
||||||
|
|
||||||
|
if(inherits(.self, "VT.forest.one")) .self$vt.object$switchTreatment() #if one forest
|
||||||
|
|
||||||
|
.self$computeTwin2()
|
||||||
|
|
||||||
|
if(inherits(.self, "VT.forest.one")) .self$vt.object$switchTreatment() #if one forest
|
||||||
|
|
||||||
|
.self$computeDifft()
|
||||||
|
|
||||||
|
.self$vt.object$computeDelta() # To see later
|
||||||
|
|
||||||
|
return(invisible(.self))
|
||||||
|
},
|
||||||
|
|
||||||
|
checkModel = function(model){
|
||||||
|
if(!(inherits(model, "train") | inherits(model, "RandomForest") | inherits(model, "randomForest"))){
|
||||||
|
stop("Model is not recognized. Must be : train, RandomForest, randomForest")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
getFullData = function(){
|
||||||
|
if(length(.self$twin1) != nrow(.self$vt.object$data)) stop("Twin1 must have same length as data")
|
||||||
|
if(length(.self$twin2) != nrow(.self$vt.object$data)) stop("Twin2 must have same length as data")
|
||||||
|
|
||||||
|
if(length(.self$difft) != nrow(.self$vt.object$data)) stop("Difft must have same length as data")
|
||||||
|
|
||||||
|
tmp <- cbind(.self$vt.object$data, .self$twin1, .self$twin2, .self$difft)
|
||||||
|
|
||||||
|
colnames(tmp) <- c(colnames(.self$vt.object$data), "twin1", "twin2", "difft")
|
||||||
|
|
||||||
|
return(tmp)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
44
R/forest.double.R
Normal file
44
R/forest.double.R
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
# VT.FOREST.DOUBLE --------------------------------------------------------
|
||||||
|
# IF RUNNING DOUBLE FOREST COMPUTATION
|
||||||
|
|
||||||
|
VT.forest.double <- setRefClass(
|
||||||
|
Class = "VT.forest.double",
|
||||||
|
|
||||||
|
contains = "VT.forest",
|
||||||
|
|
||||||
|
fields = list(
|
||||||
|
model_trt1 = "ANY",
|
||||||
|
model_trt0 = "ANY"
|
||||||
|
),
|
||||||
|
|
||||||
|
methods = list(
|
||||||
|
initialize = function(vt.object, model_trt1, model_trt0, ...){
|
||||||
|
.self$checkModel(model_trt1)
|
||||||
|
.self$checkModel(model_trt0)
|
||||||
|
|
||||||
|
.self$model_trt1 <- model_trt1
|
||||||
|
.self$model_trt0 <- model_trt0
|
||||||
|
|
||||||
|
callSuper(vt.object, ...)
|
||||||
|
},
|
||||||
|
|
||||||
|
computeTwin1 = function(){
|
||||||
|
# Model with treatment (1)
|
||||||
|
.self$twin1[.self$vt.object$data[, 2] == 1] <- VT.predict(rfor = .self$model_trt1, type = .self$vt.object$type)
|
||||||
|
|
||||||
|
# Model without treatment (0)
|
||||||
|
.self$twin1[vt.object$data[, 2] == 0] <- VT.predict(rfor = .self$model_trt0, type = .self$vt.object$type)
|
||||||
|
return(.self$twin1)
|
||||||
|
return(invisible(.self$twin1))
|
||||||
|
},
|
||||||
|
|
||||||
|
computeTwin2 = function(){
|
||||||
|
# Model with treatment (1)
|
||||||
|
.self$twin2[.self$vt.object$data[, 2] == 1] <- VT.predict(.self$model_trt0, newdata = .self$vt.object$getX(1, interactions = F), type = .self$vt.object$type)
|
||||||
|
|
||||||
|
# Model without treatment (0)
|
||||||
|
.self$twin2[.self$vt.object$data[, 2] == 0] <- VT.predict(.self$model_trt1, newdata = .self$vt.object$getX(0, interactions = F), type = .self$vt.object$type)
|
||||||
|
return(invisible(.self$twin2))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
79
R/forest.fold.R
Normal file
79
R/forest.fold.R
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
# VT.FOREST.FOLD ----------------------------------------------------------
|
||||||
|
|
||||||
|
VT.forest.fold <- setRefClass(
|
||||||
|
Class = "VT.forest.fold",
|
||||||
|
|
||||||
|
contains = "VT.forest",
|
||||||
|
|
||||||
|
fields = list(
|
||||||
|
interactions = "logical",
|
||||||
|
fold = "numeric",
|
||||||
|
ratio = "numeric",
|
||||||
|
groups = "vector"
|
||||||
|
),
|
||||||
|
|
||||||
|
methods = list(
|
||||||
|
initialize = function(vt.object, fold, ratio, interactions = T, ...){
|
||||||
|
|
||||||
|
.self$fold <- fold
|
||||||
|
|
||||||
|
.self$ratio <- ratio
|
||||||
|
|
||||||
|
.self$interactions <- interactions
|
||||||
|
|
||||||
|
callSuper(vt.object, ...)
|
||||||
|
},
|
||||||
|
|
||||||
|
run = function(parallel = F, ...){
|
||||||
|
|
||||||
|
.self$groups <- sample(1:.self$fold, nrow(.self$vt.object$data), replace = T)
|
||||||
|
|
||||||
|
for(g in 1:.self$fold){
|
||||||
|
.self$runOneForest(g, ...)
|
||||||
|
}
|
||||||
|
|
||||||
|
.self$computeDifft()
|
||||||
|
},
|
||||||
|
|
||||||
|
runOneForest = function(group, ...){
|
||||||
|
data <- .self$vt.object$getX(interactions = .self$interactions)
|
||||||
|
X <- data[.self$groups != group, ]
|
||||||
|
Y <- .self$vt.object$data[.self$groups != group, 1]
|
||||||
|
Yeff <- table(Y) # 1 -> levels(Y)[1] & 2 -> levels(Y)[2]
|
||||||
|
sampmin <- min(Yeff[1], Yeff[2])
|
||||||
|
|
||||||
|
if(sampmin == Yeff[2]){
|
||||||
|
samp2 <- sampmin
|
||||||
|
samp1 <- min(Yeff[1], round(.self$ratio*Yeff[1], digits = 0))
|
||||||
|
}else{
|
||||||
|
samp2 <- Yeff[2]
|
||||||
|
samp1 <- sampmin
|
||||||
|
}
|
||||||
|
rf <- randomForest(x = X, y = Y, sampsize = c(samp1, samp2), keep.forest = T, ...)
|
||||||
|
|
||||||
|
.self$computeTwin1(rf, group)
|
||||||
|
.self$computeTwin2(rf, group)
|
||||||
|
},
|
||||||
|
|
||||||
|
computeTwin1 = function(rfor, group){
|
||||||
|
|
||||||
|
data <- .self$vt.object$data[.self$groups == group, -1]
|
||||||
|
|
||||||
|
.self$twin1[.self$groups == group] <- VT.predict(rfor = rfor, newdata = data, type = .self$vt.object$type)
|
||||||
|
|
||||||
|
return(invisible(.self$twin1))
|
||||||
|
},
|
||||||
|
|
||||||
|
computeTwin2 = function(rfor, group){
|
||||||
|
|
||||||
|
.self$vt.object$switchTreatment()
|
||||||
|
data <- .self$vt.object$getX(interactions = .self$interactions)
|
||||||
|
data <- data[.self$groups == group, ]
|
||||||
|
|
||||||
|
.self$twin2[.self$groups == group] <- VT.predict(rfor = rfor, newdata = data, type = .self$vt.object$type)
|
||||||
|
|
||||||
|
.self$vt.object$switchTreatment()
|
||||||
|
return(invisible(.self$twin2))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
37
R/forest.one.R
Normal file
37
R/forest.one.R
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
# VT.FOREST.ONE -----------------------------------------------------------
|
||||||
|
# IF RUNNING ONE FOREST COMPUTATION
|
||||||
|
# model - modèle de forêt aléatoire issus du package caret, randomForest ou party (cforest)
|
||||||
|
VT.forest.one <- setRefClass(
|
||||||
|
Class = "VT.forest.one",
|
||||||
|
|
||||||
|
contains = "VT.forest",
|
||||||
|
|
||||||
|
fields = list(
|
||||||
|
model = "ANY",
|
||||||
|
interactions = "logical"
|
||||||
|
),
|
||||||
|
|
||||||
|
methods = list(
|
||||||
|
initialize = function(vt.object, model, interactions = T, ...){
|
||||||
|
.self$checkModel(model)
|
||||||
|
|
||||||
|
.self$model <- model
|
||||||
|
|
||||||
|
.self$interactions <- interactions
|
||||||
|
|
||||||
|
callSuper(vt.object, ...)
|
||||||
|
},
|
||||||
|
|
||||||
|
computeTwin1 = function(){
|
||||||
|
.self$twin1 <- as.vector(VT.predict(rfor = .self$model, type = .self$vt.object$type))
|
||||||
|
return(invisible(.self$twin1))
|
||||||
|
},
|
||||||
|
|
||||||
|
computeTwin2 = function(){
|
||||||
|
.self$twin2 <- as.vector(VT.predict(.self$model,
|
||||||
|
newdata = .self$vt.object$getX(interactions = .self$interactions),
|
||||||
|
.self$vt.object$type))
|
||||||
|
return(invisible(.self$twin2))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
35
R/incidences.R
Normal file
35
R/incidences.R
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# INCIDENCE TABLES FUNCTION /!\ important
|
||||||
|
|
||||||
|
setGeneric("VT.incidences",
|
||||||
|
function(vt.difft, select, rr.snd){ standardGeneric("VT.incidences") }
|
||||||
|
)
|
||||||
|
|
||||||
|
setMethod(
|
||||||
|
f = "VT.incidences",
|
||||||
|
signature = c(vt.difft = "VT.difft", select = "character", rr.snd = "logical"),
|
||||||
|
function(vt.difft, select, rr.snd = F){
|
||||||
|
vector.selected <- with(vt.difft$vt.object$data, ifelse(eval(parse(text = select)), 1, 0))
|
||||||
|
|
||||||
|
return(VT.incidences(vt.difft, select = vector.selected, rr.snd))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
setMethod(
|
||||||
|
f = "VT.incidences",
|
||||||
|
signature = c(vt.difft = "VT.difft", select = "vector", rr.snd = "logical"),
|
||||||
|
function(vt.difft, select, rr.snd = F){
|
||||||
|
|
||||||
|
selected <- with(vt.difft$vt.object$data, vt.difft$vt.object$data[as.logical(select), c(1,2)])
|
||||||
|
not.selected <- with(vt.difft$vt.object$data, vt.difft$vt.object$data[!as.logical(select), c(1,2)])
|
||||||
|
|
||||||
|
list.tmp <- list(vt.getIncidence(selected), vt.getIncidence(not.selected))
|
||||||
|
names(list.tmp) <- c("table.selected", "table.not.selected")
|
||||||
|
|
||||||
|
if(isTRUE(rr.snd)){
|
||||||
|
list.tmp$table.selected$rr.snd <- vt.rr.snd(vt.difft, select)
|
||||||
|
list.tmp$table.not.selected$rr.snd <- vt.rr.snd(vt.difft, (1-select))
|
||||||
|
}
|
||||||
|
|
||||||
|
return(list.tmp)
|
||||||
|
}
|
||||||
|
)
|
92
R/object.R
Normal file
92
R/object.R
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
# VT.OBJECT ---------------------------------------------------------------
|
||||||
|
|
||||||
|
# Permet de stocker les données
|
||||||
|
# alpha - paramètre inutile dans cette version
|
||||||
|
# screening & varimp permettent de construire des arbres sur les variables
|
||||||
|
# définies dans varimp si screening = True
|
||||||
|
# delta - différence d'incidence entre les deux "bras"
|
||||||
|
# type - type de réponse - binary ou continous - seul binary est disponible
|
||||||
|
# interactions - si TRUE getX() retourne (X,X*T,X*(1-T))
|
||||||
|
#
|
||||||
|
# $getFormula() - utile pour retourner une formule pour rpart
|
||||||
|
# $getX(trt = c(0,1,NULL), interactions = c(TRUE, FALSE)) - si trt est non NULL
|
||||||
|
# getX() retourne les lignes pour le traitement passé paramètre (utile pour les doubles forests)
|
||||||
|
# $getY() - retourne la réponse / cible
|
||||||
|
# ...
|
||||||
|
VT.object <- setRefClass(
|
||||||
|
Class = "VT.object",
|
||||||
|
|
||||||
|
fields = list(
|
||||||
|
data = "data.frame",
|
||||||
|
alpha = "numeric",
|
||||||
|
screening = "logical",
|
||||||
|
varimp = "character",
|
||||||
|
delta = "numeric",
|
||||||
|
type = "character"
|
||||||
|
),
|
||||||
|
|
||||||
|
methods = list(
|
||||||
|
initialize = function(screening = F, alpha = 1, type = "binary", ...){
|
||||||
|
|
||||||
|
.self$screening <- screening
|
||||||
|
|
||||||
|
.self$type <- type
|
||||||
|
|
||||||
|
.self$alpha <- alpha
|
||||||
|
|
||||||
|
.self$initFields(...)
|
||||||
|
},
|
||||||
|
|
||||||
|
getFormula = function(){
|
||||||
|
return(as.formula(paste(colnames(.self$data)[1], ".", sep = "~")))
|
||||||
|
},
|
||||||
|
|
||||||
|
getX = function(interactions = T, trt = NULL){
|
||||||
|
# retour les prédicteurs si trt n'est pas null
|
||||||
|
if(!is.null(trt)) return(.self$data[.self$data[,2] == trt, -c(1,2)])
|
||||||
|
# retourne les predicteurs*traitement peut importe le traitement si interactions est à TRUE
|
||||||
|
if(interactions == T) return(.self$getXwithInt())
|
||||||
|
# retourne les predicteurs
|
||||||
|
return(.self$data[, -1])
|
||||||
|
},
|
||||||
|
|
||||||
|
getY = function(trt = NULL){
|
||||||
|
if(is.null(trt)) return(.self$data[, 1])
|
||||||
|
return(.self$data[.self$data[,2] == trt, 1])
|
||||||
|
},
|
||||||
|
|
||||||
|
getXwithInt = function(){
|
||||||
|
tmp <- .self$data[, -c(1,2)]
|
||||||
|
return(data.frame(cbind(.self$data[,-1], tmp*.self$data[, 2], tmp*(1 - .self$data[, 2]))))
|
||||||
|
},
|
||||||
|
|
||||||
|
switchTreatment = function(){
|
||||||
|
cl <- class(.self$data[, 2])
|
||||||
|
# Treatments must be numeric or integer and binary
|
||||||
|
.self$data[, 2] <- 1 - .self$data[, 2]
|
||||||
|
# keep original class for treatment
|
||||||
|
if(cl == "integer"){
|
||||||
|
.self$data[, 2] <- as.integer(.self$data[, 2])
|
||||||
|
}else{
|
||||||
|
.self$data[, 2] <- as.numeric(.self$data[, 2])
|
||||||
|
}
|
||||||
|
cat("witch \n")
|
||||||
|
return(TRUE)
|
||||||
|
},
|
||||||
|
|
||||||
|
computeDelta = function(){
|
||||||
|
if(.self$type == "binary"){
|
||||||
|
.self$delta <- sum((as.numeric(.self$data[, 1]) - 1)*(.self$data[, 2])) / sum(.self$data[, 2]) -
|
||||||
|
sum((as.numeric(.self$data[, 1]) - 1)*(1 - .self$data[, 2])) / sum(1 - .self$data[, 2])
|
||||||
|
|
||||||
|
return(.self$delta)
|
||||||
|
}else{
|
||||||
|
stop("Error : type is not Binary")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
getIncidences = function(){
|
||||||
|
return(vt.getIncidence(.self$data))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
92
R/predict.R
Normal file
92
R/predict.R
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
# PREDICTION --------------------------------------------------------------
|
||||||
|
|
||||||
|
# LES METHODES SUIVANTES PERMETTENT DE PREDIRE LA PROBA D'INTERET POUR
|
||||||
|
# LES TROIS CLASSES SUIVANTES : train, randomForest, RandomForest{party}
|
||||||
|
|
||||||
|
setGeneric("VT.predict",
|
||||||
|
function(rfor, newdata, type){standardGeneric("VT.predict")}
|
||||||
|
)
|
||||||
|
|
||||||
|
setMethod(
|
||||||
|
f = "VT.predict",
|
||||||
|
signature = c(rfor = "RandomForest", newdata = "missing", type = "character"),
|
||||||
|
function(rfor, type = "binary"){
|
||||||
|
if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous")
|
||||||
|
if(type == "binary"){
|
||||||
|
tmp <- predict(rfor, OOB = T, type = "prob")
|
||||||
|
tmp <- unlist(tmp)
|
||||||
|
tmp <- tmp[seq(2, length(tmp), 2)]
|
||||||
|
}else{
|
||||||
|
message("continous is not done yet")
|
||||||
|
tmp <- NULL
|
||||||
|
}
|
||||||
|
|
||||||
|
return(tmp)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
setMethod(
|
||||||
|
f = "VT.predict",
|
||||||
|
signature = c(rfor = "RandomForest", newdata = "data.frame", type = "character"),
|
||||||
|
function(rfor, newdata, type = "binary"){
|
||||||
|
if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous")
|
||||||
|
if(type == "binary"){
|
||||||
|
tmp <- predict(rfor, newdata = newdata, type = "prob")
|
||||||
|
tmp <- unlist(tmp)
|
||||||
|
tmp <- tmp[seq(2, length(tmp), 2)]
|
||||||
|
}else{
|
||||||
|
message("continous is not done yet")
|
||||||
|
tmp <- NULL
|
||||||
|
}
|
||||||
|
|
||||||
|
return(tmp)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
setMethod(
|
||||||
|
f = "VT.predict",
|
||||||
|
signature = c(rfor = "randomForest", newdata = "missing", type = "character"),
|
||||||
|
function(rfor, type = "binary"){
|
||||||
|
if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous")
|
||||||
|
if(type == "binary"){
|
||||||
|
# no longer available in all version ?!
|
||||||
|
# tmp <- rfor$vote[, 2] # get the "o" prob
|
||||||
|
tmp <- predict(rfor, type = "prob")[, 2] # We want to get the "o" prob
|
||||||
|
}else{
|
||||||
|
message("continous is not done yet")
|
||||||
|
tmp <- NULL
|
||||||
|
}
|
||||||
|
return(tmp)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
setMethod(
|
||||||
|
f = "VT.predict",
|
||||||
|
signature = c(rfor = "randomForest", newdata = "data.frame", type = "character"),
|
||||||
|
function(rfor, newdata, type = "binary"){
|
||||||
|
if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous")
|
||||||
|
if(type == "binary"){
|
||||||
|
tmp <- predict(rfor, newdata = newdata, type = "prob")[, 2] # We want to get the "o" prob
|
||||||
|
}else{
|
||||||
|
message("continous is not done yet")
|
||||||
|
tmp <- NULL
|
||||||
|
}
|
||||||
|
return(tmp)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
setMethod(
|
||||||
|
f = "VT.predict",
|
||||||
|
signature = c(rfor = "train", newdata = "ANY", type = "character"),
|
||||||
|
function(rfor, newdata, type = "binary"){
|
||||||
|
return(VT.predict(rfor$finalModel, newdata, type))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
setMethod(
|
||||||
|
f = "VT.predict",
|
||||||
|
signature = c(rfor = "train", newdata = "missing", type = "character"),
|
||||||
|
function(rfor, type = "binary"){
|
||||||
|
return(VT.predict(rfor=rfor$finalModel, type=type))
|
||||||
|
}
|
||||||
|
)
|
43
R/tools.R
Normal file
43
R/tools.R
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
vt.getQAOriginal <- function(response, trt, ahat){
|
||||||
|
if(is.factor(response)) response = as.numeric(response) - 1
|
||||||
|
|
||||||
|
if(sum(ahat) == 0){
|
||||||
|
tmp <- 0
|
||||||
|
}else{
|
||||||
|
tmp <- sum(response*ahat*trt)/sum(ahat*trt) -
|
||||||
|
sum(response*ahat*(1-trt))/sum(ahat*(1-trt)) -
|
||||||
|
(sum(response*trt)/sum(trt) -
|
||||||
|
sum(response*(1-trt))/sum(1-trt))
|
||||||
|
}
|
||||||
|
return(tmp)
|
||||||
|
}
|
||||||
|
|
||||||
|
vt.getTable <- function(table){
|
||||||
|
if(is.list(table)) table <- table[[1]]
|
||||||
|
Incidence <- function(X) as.character(round(X[2] / X[3], digits = 3))
|
||||||
|
t <- addmargins(table, margin = c(1,2), FUN = sum, quiet = T)
|
||||||
|
t <- addmargins(t, FUN = Incidence, margin = 1, quiet = T)
|
||||||
|
rr <- as.numeric(t["Incidence", "1"]) / as.numeric(t["Incidence", "0"])
|
||||||
|
return(list(table = t, rr = rr))
|
||||||
|
}
|
||||||
|
|
||||||
|
vt.getIncidence <- function(df){
|
||||||
|
if (nrow(df) == 0) table.res <- NULL
|
||||||
|
else{
|
||||||
|
table.res <- vt.getTable(table(df[, 1],
|
||||||
|
df[, 2],
|
||||||
|
deparse.level = 2,
|
||||||
|
dnn = c("resp", "trt")))
|
||||||
|
}
|
||||||
|
return(table.res)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
vt.rr.snd <- function(vt.difft, selected){
|
||||||
|
if(sum(selected) == 0){
|
||||||
|
return(0)
|
||||||
|
}else{
|
||||||
|
return((sum(vt.difft$twin1*selected*vt.difft$vt.object$data[, 2])/sum(selected*vt.difft$vt.object$data[, 2]))
|
||||||
|
/(sum(vt.difft$twin1*selected*(1-vt.difft$vt.object$data[, 2]))/sum(selected*(1-vt.difft$vt.object$data[, 2]))))
|
||||||
|
}
|
||||||
|
}
|
233
R/tree.R
Normal file
233
R/tree.R
Normal file
|
@ -0,0 +1,233 @@
|
||||||
|
# TREES COMPUTATIONS ------------------------------------------------------
|
||||||
|
|
||||||
|
# outcome - variable à expliquer par l'arbre (continue ou binaire)
|
||||||
|
# threshold - seuil à dépasser par difft
|
||||||
|
# screening - override de vt.forest$screening sinon prend la valeur de vt.forest$screening
|
||||||
|
# name - nom de l'arbre
|
||||||
|
# tree - objet rpart (l'arbre en lui même)
|
||||||
|
# Ahat - indicatrice des observations appartement à Ahat (toujours en fonction du treshold)
|
||||||
|
# NE PAS OUBLIER QUE CELA DEPEND EGALEMENT DE LA FORET ASSOCIEE
|
||||||
|
#
|
||||||
|
# $getData() - les variables explicatives de l'arbre: soit les X, soit intersections de X et vt.forest$varimp
|
||||||
|
# $run(...) - lance l'arbre, en options les paramètre de rpart(...)
|
||||||
|
# $getInfos() - Résume threshold, delta, sizeof Ahat.
|
||||||
|
# $getRules() - Récupère les incidences, les règles, les stats de chaques noeuds (terminaux ou non, favorable ou non)
|
||||||
|
# $getIncidences() - Récupère un tableau d'incidence d'une rule
|
||||||
|
# $getAhatIncidence - Récupère le tableau d'incidence de Ahat
|
||||||
|
# $getAhatQualty - Récupère la qualité de Ahat (snd, resub)
|
||||||
|
VT.tree <- setRefClass(
|
||||||
|
Class = "VT.tree",
|
||||||
|
|
||||||
|
fields = list(
|
||||||
|
vt.difft = "VT.difft",
|
||||||
|
outcome = "vector",
|
||||||
|
threshold = "numeric",
|
||||||
|
screening = "logical",
|
||||||
|
sens = "character",
|
||||||
|
name = "character",
|
||||||
|
tree = "rpart",
|
||||||
|
Ahat = "vector"
|
||||||
|
),
|
||||||
|
|
||||||
|
methods = list(
|
||||||
|
initialize = function(vt.difft = VT.difft(), threshold = 0.05, sens = ">", screening = NULL){
|
||||||
|
.self$vt.difft <- vt.difft
|
||||||
|
|
||||||
|
.self$threshold <- threshold
|
||||||
|
|
||||||
|
.self$sens <- sens
|
||||||
|
|
||||||
|
.self$screening <- ifelse(is.null(screening), vt.difft$vt.object$screening, screening)
|
||||||
|
|
||||||
|
},
|
||||||
|
|
||||||
|
getData = function(){
|
||||||
|
d <- .self$vt.difft$vt.object$data[, 3:ncol(.self$vt.difft$vt.object$data)]
|
||||||
|
|
||||||
|
if(.self$screening == T){
|
||||||
|
d.tmp <- d
|
||||||
|
d <- d.tmp[, colnames(d.tmp) %in% .self$vt.difft$vt.object$varimp] # To see later
|
||||||
|
}
|
||||||
|
|
||||||
|
d <- data.frame(.self$outcome, d)
|
||||||
|
names(d) <- c(.self$name, colnames(d)[-1])
|
||||||
|
|
||||||
|
return(d)
|
||||||
|
},
|
||||||
|
|
||||||
|
computeNameOfTree = function(type){
|
||||||
|
if(.self$threshold < 0 ){
|
||||||
|
threshold.chr <- paste0("m", -.self$threshold)
|
||||||
|
}else{
|
||||||
|
threshold.chr <- as.character(.self$threshold)
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = strsplit(threshold.chr, "[.]")[[1]]
|
||||||
|
return(paste(type, tmp[1], tmp[2], sep = ""))
|
||||||
|
},
|
||||||
|
|
||||||
|
run = function(){
|
||||||
|
if(length(.self$vt.difft$difft) == 0) stop("VT.difft::difft is an empty vector")
|
||||||
|
},
|
||||||
|
|
||||||
|
getInfos = function(){
|
||||||
|
cat("\n")
|
||||||
|
cat(sprintf("Threshold = %0.4f", .self$threshold))
|
||||||
|
cat("\n")
|
||||||
|
cat(sprintf("Delta = %0.4f", .self$vt.difft$vt.object$delta))
|
||||||
|
cat("\n")
|
||||||
|
cat(sprintf("Sens : %s", .self$sens))
|
||||||
|
cat("\n")
|
||||||
|
# cat(sprintf("Bounds = %0.4f", (.self$vt.difft$vt.object$delta + .self$threshold)))
|
||||||
|
# cat("\n")
|
||||||
|
cat(sprintf("Size of Ahat : %i", (sum(.self$Ahat))))
|
||||||
|
|
||||||
|
return(invisible(NULL))
|
||||||
|
},
|
||||||
|
|
||||||
|
getRules = function(only.leaf = F, only.fav = F, tables = T, verbose = T){
|
||||||
|
|
||||||
|
# On supprime le root node, inutile pour les stats d'incidences et autres...
|
||||||
|
full.frame <- .self$tree$frame[-1, ]
|
||||||
|
|
||||||
|
if (only.fav == T){
|
||||||
|
if(inherits(.self, "VT.tree.reg")){
|
||||||
|
if(.self$sens == ">"){
|
||||||
|
frm.only.fav <- full.frame[full.frame$yval >= (.self$threshold), ]
|
||||||
|
} else {
|
||||||
|
frm.only.fav <- full.frame[full.frame$yval <= (.self$threshold), ]
|
||||||
|
}
|
||||||
|
}else if(inherits(.self, "VT.tree.class")){
|
||||||
|
frm.only.fav <- full.frame[full.frame$yval == 2, ]
|
||||||
|
}
|
||||||
|
frm <- frm.only.fav
|
||||||
|
}
|
||||||
|
|
||||||
|
if (only.leaf == T){
|
||||||
|
if(inherits(.self, "VT.tree.reg")){
|
||||||
|
frm.only.leaf <- full.frame[full.frame$var == "<leaf>", ]
|
||||||
|
}else if(inherits(.self, "VT.tree.class")){
|
||||||
|
frm.only.leaf <- full.frame[full.frame$var == "<leaf>", ]
|
||||||
|
}
|
||||||
|
frm <- frm.only.leaf
|
||||||
|
}
|
||||||
|
|
||||||
|
if (only.fav == T & only.leaf == T){
|
||||||
|
frm <- frm.only.leaf[ intersect(rownames(frm.only.leaf), rownames(frm.only.fav)) , ]
|
||||||
|
}else if (only.fav == F & only.leaf == F){
|
||||||
|
frm <- full.frame
|
||||||
|
}
|
||||||
|
|
||||||
|
# Le cas où l'arbre est vide ou n'existe pas:
|
||||||
|
if (length(frm) == 0) stop("VT.tree : no tree");
|
||||||
|
if (ncol(frm)==0) stop("VT.tree : no rules");
|
||||||
|
|
||||||
|
pth <- path.rpart(.self$tree, nodes = row.names(frm), print.it = F)
|
||||||
|
# Delete 'root' node des règles
|
||||||
|
pth <- lapply(pth, FUN = function(d) return(d[-1]))
|
||||||
|
|
||||||
|
depth <- 0
|
||||||
|
nodes <- names(pth)
|
||||||
|
rules <- data.frame(replicate(6, character(0), simplify = T), replicate(2, numeric(0), simplify = T), stringsAsFactors = F)
|
||||||
|
|
||||||
|
colnames(rules) <- c("Subgroup", "Subgroup size", "Treatement event rate", "Control event rate",
|
||||||
|
"Treatment sample size", "Control sample size", "RR (resub)", "RR (snd)")
|
||||||
|
for(i in nodes){
|
||||||
|
pth.text <- paste(pth[[i]], collapse = " & ")
|
||||||
|
incid <- .self$getIncidences(pth.text)
|
||||||
|
|
||||||
|
rules[i, 1] <- pth.text
|
||||||
|
rules[i, 2] <- incid$table.selected$table[3, 3] #size subgroupg
|
||||||
|
rules[i, 3] <- incid$table.selected$table[4, 2] #treatment event rate
|
||||||
|
rules[i, 4] <- incid$table.selected$table[4, 1] #control event rate
|
||||||
|
rules[i, 5] <- incid$table.selected$table[3, 2] #treatment sample size
|
||||||
|
rules[i, 6] <- incid$table.selected$table[3, 1] #control sample size
|
||||||
|
rules[i, 7] <- round(incid$table.selected$rr, digits = 3) # rr (resub)
|
||||||
|
rules[i, 8] <- round(incid$table.selected$rr.snd, digits = 3) # rr (snd)
|
||||||
|
|
||||||
|
if(isTRUE(verbose)){
|
||||||
|
cat("----------------------------\n")
|
||||||
|
cat(sprintf("| Rule number %s : ", i))
|
||||||
|
|
||||||
|
|
||||||
|
if(inherits(.self, "VT.tree.reg")){
|
||||||
|
cat(sprintf("Y val = %0.3f \n", frm[i, ]$yval))
|
||||||
|
}else{
|
||||||
|
cat(sprintf("Y val = %i \n", frm[i, ]$yval))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("----------------------------\n")
|
||||||
|
|
||||||
|
cat(sprintf("[n = %i", frm[i, ]$n))
|
||||||
|
cat(sprintf(", loss = %s, prob = %0.2f",
|
||||||
|
frm[i, ]$dev,
|
||||||
|
frm[i, ]$yval2[, 5]))
|
||||||
|
|
||||||
|
cat("] \n")
|
||||||
|
cat("\t\t")
|
||||||
|
cat(pth[[i]], sep="\n\t\t")
|
||||||
|
|
||||||
|
if(isTRUE(tables)){
|
||||||
|
cat("\n")
|
||||||
|
cat(sprintf("Incidence dans la selection \n"))
|
||||||
|
print(incid$table.selected$table)
|
||||||
|
cat("\n")
|
||||||
|
cat(sprintf("Risque relatif (resub) : %0.3f \n", incid$table.selected$rr))
|
||||||
|
cat(sprintf("Risque relatif (snd) : %0.3f \n\n", incid$table.selected$rr.snd))
|
||||||
|
|
||||||
|
cat(sprintf("Incidence dans le complementaire\n"))
|
||||||
|
print(incid$table.not.selected$table)
|
||||||
|
cat("\n")
|
||||||
|
cat(sprintf("Risque relatif (resub) : %0.3f \n", incid$table.not.selected$rr))
|
||||||
|
cat(sprintf("Risque relatif (snd) : %0.3f \n\n", incid$table.not.selected$rr.snd))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return(invisible(rules))
|
||||||
|
},
|
||||||
|
|
||||||
|
getIncidences = function(rule, rr.snd = T){
|
||||||
|
return(VT.incidences(.self$vt.difft, rule, rr.snd))
|
||||||
|
},
|
||||||
|
|
||||||
|
getAhatIncidence = function(){
|
||||||
|
if(sum(.self$Ahat)!=0){
|
||||||
|
|
||||||
|
table.inc <- VT.incidences(vt.object = .self$vt.difft$vt.object, select = .self$Ahat)
|
||||||
|
|
||||||
|
table.A <- table.inc$table.selected
|
||||||
|
table.A.cmpl <- table.inc$table.not.selected
|
||||||
|
|
||||||
|
cat(sprintf("Incidence dans le sous groupe A\n"))
|
||||||
|
print(table.A$table)
|
||||||
|
cat("\n")
|
||||||
|
cat(sprintf("Risque relatif : %0.3f \n\n", table.A$risque_relatif))
|
||||||
|
|
||||||
|
cat(sprintf("Incidence dans le sous groupe A complementaire\n"))
|
||||||
|
print(table.A.cmpl$table)
|
||||||
|
cat("\n")
|
||||||
|
cat(sprintf("Risque relatif : %0.3f \n\n", table.A.cmpl$risque_relatif))
|
||||||
|
}else{
|
||||||
|
return("Empty set")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
getAhatQuality = function(){
|
||||||
|
|
||||||
|
resub <- vt.getQAOriginal(.self$Ahat, response = .self$vt.difft$vt.object$getY(), trt = .self$vt.difft$vt.object$data[, 2])
|
||||||
|
|
||||||
|
snd <- vt.getQAOriginal(.self$Ahat, response = .self$vt.difft$twin1, trt = .self$vt.difft$vt.object$data[, 2])
|
||||||
|
|
||||||
|
# on ajoute la taille de Ahat
|
||||||
|
size <- sum(.self$Ahat)
|
||||||
|
|
||||||
|
res <- cbind(size, resub, snd)
|
||||||
|
names(res) <- c("size", "resub", "snd")
|
||||||
|
|
||||||
|
return(res)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
42
R/tree.class.R
Normal file
42
R/tree.class.R
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
# VT.TREE.CLASS -----------------------------------------------------------
|
||||||
|
|
||||||
|
VT.tree.class <- setRefClass(
|
||||||
|
Class = "VT.tree.class",
|
||||||
|
|
||||||
|
contains = "VT.tree",
|
||||||
|
|
||||||
|
methods = list(
|
||||||
|
initialize = function(vt.difft, threshold = 0.05, sens = ">", screening = NULL){
|
||||||
|
callSuper(vt.difft, threshold, sens, screening)
|
||||||
|
|
||||||
|
.self$name <- .self$computeNameOfTree("class")
|
||||||
|
|
||||||
|
if(.self$sens == ">"){
|
||||||
|
.self$outcome <- ifelse(.self$vt.difft$difft >= .self$threshold, 1, 0)
|
||||||
|
} else {
|
||||||
|
.self$outcome <- ifelse(.self$vt.difft$difft <= .self$threshold, 1, 0)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
run = function(...){
|
||||||
|
callSuper()
|
||||||
|
|
||||||
|
data <- .self$getData()
|
||||||
|
if(sum(data[,1]) != 0){
|
||||||
|
.self$tree <- rpart(as.formula(paste(.self$name, ".", sep = "~")), data = data, method = "class", ...)
|
||||||
|
.self$Ahat <- as.numeric(predict(.self$tree, data, type = "class")) - 1
|
||||||
|
}else{
|
||||||
|
.self$Ahat <- .self$outcome
|
||||||
|
}
|
||||||
|
|
||||||
|
return(invisible(tree))
|
||||||
|
},
|
||||||
|
|
||||||
|
sumup = function(){
|
||||||
|
cat("Classification Tree")
|
||||||
|
callSuper()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
VT.tree.class$lock("threshold", "vt.difft")
|
44
R/tree.reg.R
Normal file
44
R/tree.reg.R
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
|
||||||
|
|
||||||
|
# VT.TREE.REG -------------------------------------------------------------
|
||||||
|
|
||||||
|
VT.tree.reg <- setRefClass(
|
||||||
|
Class = "VT.tree.reg",
|
||||||
|
|
||||||
|
contains = "VT.tree",
|
||||||
|
|
||||||
|
methods = list(
|
||||||
|
initialize = function(vt.difft, threshold = 0.05, sens = ">", screening = NULL){
|
||||||
|
callSuper(vt.difft, threshold, sens, screening)
|
||||||
|
|
||||||
|
.self$name <- .self$computeNameOfTree("reg")
|
||||||
|
|
||||||
|
.self$outcome <- .self$vt.difft$difft
|
||||||
|
},
|
||||||
|
|
||||||
|
run = function(...){
|
||||||
|
callSuper()
|
||||||
|
|
||||||
|
data <- .self$getData()
|
||||||
|
|
||||||
|
.self$tree <- rpart(as.formula(paste(.self$name, ".", sep = "~")), data = data, ...)
|
||||||
|
|
||||||
|
if(.self$sens == ">")
|
||||||
|
res <- ifelse(predict(.self$tree) >= (.self$threshold), 1, 0)
|
||||||
|
else
|
||||||
|
res <- ifelse(predict(.self$tree) <= (.self$threshold), 1, 0)
|
||||||
|
|
||||||
|
.self$Ahat <- res
|
||||||
|
# if(sum(res) != 0) .self$vt.forest$addAhatColumn(name, res)
|
||||||
|
return(invisible(tree))
|
||||||
|
},
|
||||||
|
|
||||||
|
sumup = function(){
|
||||||
|
cat("Regression Tree")
|
||||||
|
callSuper()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
VT.tree.reg$lock("threshold", "vt.difft")
|
||||||
|
|
8
Read-and-delete-me
Normal file
8
Read-and-delete-me
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
* Edit the help file skeletons in 'man', possibly combining help files for multiple functions.
|
||||||
|
* Edit the exports in 'NAMESPACE', and add necessary imports.
|
||||||
|
* Put any C/C++/Fortran code in 'src'.
|
||||||
|
* If you have compiled code, add a useDynLib() directive to 'NAMESPACE'.
|
||||||
|
* Run R CMD build to build the package tarball.
|
||||||
|
* Run R CMD check to check the package tarball.
|
||||||
|
|
||||||
|
Read "Writing R Extensions" for more information.
|
17
VirtualTwins.Rproj
Normal file
17
VirtualTwins.Rproj
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
Version: 1.0
|
||||||
|
|
||||||
|
RestoreWorkspace: Yes
|
||||||
|
SaveWorkspace: Yes
|
||||||
|
AlwaysSaveHistory: Yes
|
||||||
|
|
||||||
|
EnableCodeIndexing: Yes
|
||||||
|
UseSpacesForTab: Yes
|
||||||
|
NumSpacesForTab: 2
|
||||||
|
Encoding: UTF-8
|
||||||
|
|
||||||
|
RnwWeave: Sweave
|
||||||
|
LaTeX: pdfLaTeX
|
||||||
|
|
||||||
|
BuildType: Package
|
||||||
|
PackageUseDevtools: Yes
|
||||||
|
PackageInstallArgs: --no-multiarch --with-keep.source
|
34
man/VirtualTwins-package.Rd
Normal file
34
man/VirtualTwins-package.Rd
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
\name{VirtualTwins-package}
|
||||||
|
\alias{VirtualTwins-package}
|
||||||
|
\alias{VirtualTwins}
|
||||||
|
\docType{package}
|
||||||
|
\title{
|
||||||
|
\packageTitle{VirtualTwins}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
\packageDescription{VirtualTwins}
|
||||||
|
}
|
||||||
|
\details{
|
||||||
|
|
||||||
|
The DESCRIPTION file:
|
||||||
|
\packageDESCRIPTION{VirtualTwins}
|
||||||
|
\packageIndices{VirtualTwins}
|
||||||
|
~~ An overview of how to use the package, including the most important functions ~~
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
\packageAuthor{VirtualTwins}
|
||||||
|
|
||||||
|
Maintainer: \packageMaintainer{VirtualTwins}
|
||||||
|
}
|
||||||
|
\references{
|
||||||
|
~~ Literature or other references for background information ~~
|
||||||
|
}
|
||||||
|
~~ Optionally other standard keywords, one per line, from file KEYWORDS in the R documentation directory ~~
|
||||||
|
\keyword{ package }
|
||||||
|
\seealso{
|
||||||
|
~~ Optional links to other man pages, e.g. ~~
|
||||||
|
~~ \code{\link[<pkg>:<pkg>-package]{<pkg>}} ~~
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
~~ simple examples of the most important functions ~~
|
||||||
|
}
|
Loading…
Reference in a new issue