mirror of
https://github.com/prise6/aVirtualTwins.git
synced 2024-05-22 22:52:12 +02:00
Initial commit - welcome
This commit is contained in:
commit
8e4f4136ef
2
.Rbuildignore
Normal file
2
.Rbuildignore
Normal file
|
@ -0,0 +1,2 @@
|
|||
^.*\.Rproj$
|
||||
^\.Rproj\.user$
|
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
.Rproj.user
|
||||
.Rhistory
|
||||
.RData
|
9
DESCRIPTION
Normal file
9
DESCRIPTION
Normal file
|
@ -0,0 +1,9 @@
|
|||
Package: VirtualTwins
|
||||
Type: Package
|
||||
Title: What the package does (short line)
|
||||
Version: 1.0
|
||||
Date: 2015-05-31
|
||||
Author: Who wrote it
|
||||
Maintainer: Who to complain to <yourfault@somewhere.net>
|
||||
Description: More about what it does (maybe more than one line)
|
||||
License: What license is it under?
|
0
R/VirtualTwins.R
Normal file
0
R/VirtualTwins.R
Normal file
33
R/difft.R
Normal file
33
R/difft.R
Normal file
|
@ -0,0 +1,33 @@
|
|||
# DIFFT -------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
VT.difft <- setRefClass(
|
||||
Class = "VT.difft",
|
||||
|
||||
fields = list(
|
||||
vt.object = "VT.object",
|
||||
twin1 = "vector",
|
||||
twin2 = "vector",
|
||||
difft = "vector"
|
||||
),
|
||||
|
||||
methods = list(
|
||||
initialize = function(vt.object = VT.object(), ...){
|
||||
.self$vt.object <- vt.object
|
||||
|
||||
.self$twin1 <- .self$twin2 <- rep(NA, nrow(vt.object$data))
|
||||
|
||||
.self$initFields(...)
|
||||
},
|
||||
|
||||
computeDifft = function(){
|
||||
|
||||
if(sum(is.na(.self$twin1)) != 0 | sum(is.na(.self$twin2)) != 0 ) stop("Twins must be valid")
|
||||
|
||||
.self$difft <- ifelse(.self$vt.object$data[, 2] == 1, .self$twin1 - .self$twin2, .self$twin2 - .self$twin1)
|
||||
|
||||
return(invisible(.self$difft)) # To see later
|
||||
}
|
||||
)
|
||||
)
|
52
R/forest.R
Normal file
52
R/forest.R
Normal file
|
@ -0,0 +1,52 @@
|
|||
# FORESTS -----------------------------------------------------------------
|
||||
|
||||
# Objet enfant de VT.difft
|
||||
# Objet Parent de VT.forest.one & VT.forest.double, Abstract class : ne doit pas etre instanciée
|
||||
# twin1 - Proba du "oui" de la réponse (modalité d'intéret codé par "o" ou 1 contre "n" ou 0 dans le cas binaire)
|
||||
# sachant le vrai traitement
|
||||
# twin2 - Proba du "oui" [...] sachant le traitement opposé
|
||||
# difft - différence de twin1 - twin2 SI le vrai traitement == 1 SINON twin2 - twin1
|
||||
#
|
||||
# $run() - lance le calcul des probas
|
||||
VT.forest <- setRefClass(
|
||||
Class = "VT.forest",
|
||||
|
||||
contains = "VT.difft",
|
||||
|
||||
methods = list(
|
||||
run = function(){
|
||||
.self$computeTwin1()
|
||||
|
||||
if(inherits(.self, "VT.forest.one")) .self$vt.object$switchTreatment() #if one forest
|
||||
|
||||
.self$computeTwin2()
|
||||
|
||||
if(inherits(.self, "VT.forest.one")) .self$vt.object$switchTreatment() #if one forest
|
||||
|
||||
.self$computeDifft()
|
||||
|
||||
.self$vt.object$computeDelta() # To see later
|
||||
|
||||
return(invisible(.self))
|
||||
},
|
||||
|
||||
checkModel = function(model){
|
||||
if(!(inherits(model, "train") | inherits(model, "RandomForest") | inherits(model, "randomForest"))){
|
||||
stop("Model is not recognized. Must be : train, RandomForest, randomForest")
|
||||
}
|
||||
},
|
||||
|
||||
getFullData = function(){
|
||||
if(length(.self$twin1) != nrow(.self$vt.object$data)) stop("Twin1 must have same length as data")
|
||||
if(length(.self$twin2) != nrow(.self$vt.object$data)) stop("Twin2 must have same length as data")
|
||||
|
||||
if(length(.self$difft) != nrow(.self$vt.object$data)) stop("Difft must have same length as data")
|
||||
|
||||
tmp <- cbind(.self$vt.object$data, .self$twin1, .self$twin2, .self$difft)
|
||||
|
||||
colnames(tmp) <- c(colnames(.self$vt.object$data), "twin1", "twin2", "difft")
|
||||
|
||||
return(tmp)
|
||||
}
|
||||
)
|
||||
)
|
44
R/forest.double.R
Normal file
44
R/forest.double.R
Normal file
|
@ -0,0 +1,44 @@
|
|||
# VT.FOREST.DOUBLE --------------------------------------------------------
|
||||
# IF RUNNING DOUBLE FOREST COMPUTATION
|
||||
|
||||
VT.forest.double <- setRefClass(
|
||||
Class = "VT.forest.double",
|
||||
|
||||
contains = "VT.forest",
|
||||
|
||||
fields = list(
|
||||
model_trt1 = "ANY",
|
||||
model_trt0 = "ANY"
|
||||
),
|
||||
|
||||
methods = list(
|
||||
initialize = function(vt.object, model_trt1, model_trt0, ...){
|
||||
.self$checkModel(model_trt1)
|
||||
.self$checkModel(model_trt0)
|
||||
|
||||
.self$model_trt1 <- model_trt1
|
||||
.self$model_trt0 <- model_trt0
|
||||
|
||||
callSuper(vt.object, ...)
|
||||
},
|
||||
|
||||
computeTwin1 = function(){
|
||||
# Model with treatment (1)
|
||||
.self$twin1[.self$vt.object$data[, 2] == 1] <- VT.predict(rfor = .self$model_trt1, type = .self$vt.object$type)
|
||||
|
||||
# Model without treatment (0)
|
||||
.self$twin1[vt.object$data[, 2] == 0] <- VT.predict(rfor = .self$model_trt0, type = .self$vt.object$type)
|
||||
return(.self$twin1)
|
||||
return(invisible(.self$twin1))
|
||||
},
|
||||
|
||||
computeTwin2 = function(){
|
||||
# Model with treatment (1)
|
||||
.self$twin2[.self$vt.object$data[, 2] == 1] <- VT.predict(.self$model_trt0, newdata = .self$vt.object$getX(1, interactions = F), type = .self$vt.object$type)
|
||||
|
||||
# Model without treatment (0)
|
||||
.self$twin2[.self$vt.object$data[, 2] == 0] <- VT.predict(.self$model_trt1, newdata = .self$vt.object$getX(0, interactions = F), type = .self$vt.object$type)
|
||||
return(invisible(.self$twin2))
|
||||
}
|
||||
)
|
||||
)
|
79
R/forest.fold.R
Normal file
79
R/forest.fold.R
Normal file
|
@ -0,0 +1,79 @@
|
|||
# VT.FOREST.FOLD ----------------------------------------------------------
|
||||
|
||||
VT.forest.fold <- setRefClass(
|
||||
Class = "VT.forest.fold",
|
||||
|
||||
contains = "VT.forest",
|
||||
|
||||
fields = list(
|
||||
interactions = "logical",
|
||||
fold = "numeric",
|
||||
ratio = "numeric",
|
||||
groups = "vector"
|
||||
),
|
||||
|
||||
methods = list(
|
||||
initialize = function(vt.object, fold, ratio, interactions = T, ...){
|
||||
|
||||
.self$fold <- fold
|
||||
|
||||
.self$ratio <- ratio
|
||||
|
||||
.self$interactions <- interactions
|
||||
|
||||
callSuper(vt.object, ...)
|
||||
},
|
||||
|
||||
run = function(parallel = F, ...){
|
||||
|
||||
.self$groups <- sample(1:.self$fold, nrow(.self$vt.object$data), replace = T)
|
||||
|
||||
for(g in 1:.self$fold){
|
||||
.self$runOneForest(g, ...)
|
||||
}
|
||||
|
||||
.self$computeDifft()
|
||||
},
|
||||
|
||||
runOneForest = function(group, ...){
|
||||
data <- .self$vt.object$getX(interactions = .self$interactions)
|
||||
X <- data[.self$groups != group, ]
|
||||
Y <- .self$vt.object$data[.self$groups != group, 1]
|
||||
Yeff <- table(Y) # 1 -> levels(Y)[1] & 2 -> levels(Y)[2]
|
||||
sampmin <- min(Yeff[1], Yeff[2])
|
||||
|
||||
if(sampmin == Yeff[2]){
|
||||
samp2 <- sampmin
|
||||
samp1 <- min(Yeff[1], round(.self$ratio*Yeff[1], digits = 0))
|
||||
}else{
|
||||
samp2 <- Yeff[2]
|
||||
samp1 <- sampmin
|
||||
}
|
||||
rf <- randomForest(x = X, y = Y, sampsize = c(samp1, samp2), keep.forest = T, ...)
|
||||
|
||||
.self$computeTwin1(rf, group)
|
||||
.self$computeTwin2(rf, group)
|
||||
},
|
||||
|
||||
computeTwin1 = function(rfor, group){
|
||||
|
||||
data <- .self$vt.object$data[.self$groups == group, -1]
|
||||
|
||||
.self$twin1[.self$groups == group] <- VT.predict(rfor = rfor, newdata = data, type = .self$vt.object$type)
|
||||
|
||||
return(invisible(.self$twin1))
|
||||
},
|
||||
|
||||
computeTwin2 = function(rfor, group){
|
||||
|
||||
.self$vt.object$switchTreatment()
|
||||
data <- .self$vt.object$getX(interactions = .self$interactions)
|
||||
data <- data[.self$groups == group, ]
|
||||
|
||||
.self$twin2[.self$groups == group] <- VT.predict(rfor = rfor, newdata = data, type = .self$vt.object$type)
|
||||
|
||||
.self$vt.object$switchTreatment()
|
||||
return(invisible(.self$twin2))
|
||||
}
|
||||
)
|
||||
)
|
37
R/forest.one.R
Normal file
37
R/forest.one.R
Normal file
|
@ -0,0 +1,37 @@
|
|||
# VT.FOREST.ONE -----------------------------------------------------------
|
||||
# IF RUNNING ONE FOREST COMPUTATION
|
||||
# model - modèle de forêt aléatoire issus du package caret, randomForest ou party (cforest)
|
||||
VT.forest.one <- setRefClass(
|
||||
Class = "VT.forest.one",
|
||||
|
||||
contains = "VT.forest",
|
||||
|
||||
fields = list(
|
||||
model = "ANY",
|
||||
interactions = "logical"
|
||||
),
|
||||
|
||||
methods = list(
|
||||
initialize = function(vt.object, model, interactions = T, ...){
|
||||
.self$checkModel(model)
|
||||
|
||||
.self$model <- model
|
||||
|
||||
.self$interactions <- interactions
|
||||
|
||||
callSuper(vt.object, ...)
|
||||
},
|
||||
|
||||
computeTwin1 = function(){
|
||||
.self$twin1 <- as.vector(VT.predict(rfor = .self$model, type = .self$vt.object$type))
|
||||
return(invisible(.self$twin1))
|
||||
},
|
||||
|
||||
computeTwin2 = function(){
|
||||
.self$twin2 <- as.vector(VT.predict(.self$model,
|
||||
newdata = .self$vt.object$getX(interactions = .self$interactions),
|
||||
.self$vt.object$type))
|
||||
return(invisible(.self$twin2))
|
||||
}
|
||||
)
|
||||
)
|
35
R/incidences.R
Normal file
35
R/incidences.R
Normal file
|
@ -0,0 +1,35 @@
|
|||
# INCIDENCE TABLES FUNCTION /!\ important
|
||||
|
||||
setGeneric("VT.incidences",
|
||||
function(vt.difft, select, rr.snd){ standardGeneric("VT.incidences") }
|
||||
)
|
||||
|
||||
setMethod(
|
||||
f = "VT.incidences",
|
||||
signature = c(vt.difft = "VT.difft", select = "character", rr.snd = "logical"),
|
||||
function(vt.difft, select, rr.snd = F){
|
||||
vector.selected <- with(vt.difft$vt.object$data, ifelse(eval(parse(text = select)), 1, 0))
|
||||
|
||||
return(VT.incidences(vt.difft, select = vector.selected, rr.snd))
|
||||
}
|
||||
)
|
||||
|
||||
setMethod(
|
||||
f = "VT.incidences",
|
||||
signature = c(vt.difft = "VT.difft", select = "vector", rr.snd = "logical"),
|
||||
function(vt.difft, select, rr.snd = F){
|
||||
|
||||
selected <- with(vt.difft$vt.object$data, vt.difft$vt.object$data[as.logical(select), c(1,2)])
|
||||
not.selected <- with(vt.difft$vt.object$data, vt.difft$vt.object$data[!as.logical(select), c(1,2)])
|
||||
|
||||
list.tmp <- list(vt.getIncidence(selected), vt.getIncidence(not.selected))
|
||||
names(list.tmp) <- c("table.selected", "table.not.selected")
|
||||
|
||||
if(isTRUE(rr.snd)){
|
||||
list.tmp$table.selected$rr.snd <- vt.rr.snd(vt.difft, select)
|
||||
list.tmp$table.not.selected$rr.snd <- vt.rr.snd(vt.difft, (1-select))
|
||||
}
|
||||
|
||||
return(list.tmp)
|
||||
}
|
||||
)
|
92
R/object.R
Normal file
92
R/object.R
Normal file
|
@ -0,0 +1,92 @@
|
|||
# VT.OBJECT ---------------------------------------------------------------
|
||||
|
||||
# Permet de stocker les données
|
||||
# alpha - paramètre inutile dans cette version
|
||||
# screening & varimp permettent de construire des arbres sur les variables
|
||||
# définies dans varimp si screening = True
|
||||
# delta - différence d'incidence entre les deux "bras"
|
||||
# type - type de réponse - binary ou continous - seul binary est disponible
|
||||
# interactions - si TRUE getX() retourne (X,X*T,X*(1-T))
|
||||
#
|
||||
# $getFormula() - utile pour retourner une formule pour rpart
|
||||
# $getX(trt = c(0,1,NULL), interactions = c(TRUE, FALSE)) - si trt est non NULL
|
||||
# getX() retourne les lignes pour le traitement passé paramètre (utile pour les doubles forests)
|
||||
# $getY() - retourne la réponse / cible
|
||||
# ...
|
||||
VT.object <- setRefClass(
|
||||
Class = "VT.object",
|
||||
|
||||
fields = list(
|
||||
data = "data.frame",
|
||||
alpha = "numeric",
|
||||
screening = "logical",
|
||||
varimp = "character",
|
||||
delta = "numeric",
|
||||
type = "character"
|
||||
),
|
||||
|
||||
methods = list(
|
||||
initialize = function(screening = F, alpha = 1, type = "binary", ...){
|
||||
|
||||
.self$screening <- screening
|
||||
|
||||
.self$type <- type
|
||||
|
||||
.self$alpha <- alpha
|
||||
|
||||
.self$initFields(...)
|
||||
},
|
||||
|
||||
getFormula = function(){
|
||||
return(as.formula(paste(colnames(.self$data)[1], ".", sep = "~")))
|
||||
},
|
||||
|
||||
getX = function(interactions = T, trt = NULL){
|
||||
# retour les prédicteurs si trt n'est pas null
|
||||
if(!is.null(trt)) return(.self$data[.self$data[,2] == trt, -c(1,2)])
|
||||
# retourne les predicteurs*traitement peut importe le traitement si interactions est à TRUE
|
||||
if(interactions == T) return(.self$getXwithInt())
|
||||
# retourne les predicteurs
|
||||
return(.self$data[, -1])
|
||||
},
|
||||
|
||||
getY = function(trt = NULL){
|
||||
if(is.null(trt)) return(.self$data[, 1])
|
||||
return(.self$data[.self$data[,2] == trt, 1])
|
||||
},
|
||||
|
||||
getXwithInt = function(){
|
||||
tmp <- .self$data[, -c(1,2)]
|
||||
return(data.frame(cbind(.self$data[,-1], tmp*.self$data[, 2], tmp*(1 - .self$data[, 2]))))
|
||||
},
|
||||
|
||||
switchTreatment = function(){
|
||||
cl <- class(.self$data[, 2])
|
||||
# Treatments must be numeric or integer and binary
|
||||
.self$data[, 2] <- 1 - .self$data[, 2]
|
||||
# keep original class for treatment
|
||||
if(cl == "integer"){
|
||||
.self$data[, 2] <- as.integer(.self$data[, 2])
|
||||
}else{
|
||||
.self$data[, 2] <- as.numeric(.self$data[, 2])
|
||||
}
|
||||
cat("witch \n")
|
||||
return(TRUE)
|
||||
},
|
||||
|
||||
computeDelta = function(){
|
||||
if(.self$type == "binary"){
|
||||
.self$delta <- sum((as.numeric(.self$data[, 1]) - 1)*(.self$data[, 2])) / sum(.self$data[, 2]) -
|
||||
sum((as.numeric(.self$data[, 1]) - 1)*(1 - .self$data[, 2])) / sum(1 - .self$data[, 2])
|
||||
|
||||
return(.self$delta)
|
||||
}else{
|
||||
stop("Error : type is not Binary")
|
||||
}
|
||||
},
|
||||
|
||||
getIncidences = function(){
|
||||
return(vt.getIncidence(.self$data))
|
||||
}
|
||||
)
|
||||
)
|
92
R/predict.R
Normal file
92
R/predict.R
Normal file
|
@ -0,0 +1,92 @@
|
|||
# PREDICTION --------------------------------------------------------------
|
||||
|
||||
# LES METHODES SUIVANTES PERMETTENT DE PREDIRE LA PROBA D'INTERET POUR
|
||||
# LES TROIS CLASSES SUIVANTES : train, randomForest, RandomForest{party}
|
||||
|
||||
setGeneric("VT.predict",
|
||||
function(rfor, newdata, type){standardGeneric("VT.predict")}
|
||||
)
|
||||
|
||||
setMethod(
|
||||
f = "VT.predict",
|
||||
signature = c(rfor = "RandomForest", newdata = "missing", type = "character"),
|
||||
function(rfor, type = "binary"){
|
||||
if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous")
|
||||
if(type == "binary"){
|
||||
tmp <- predict(rfor, OOB = T, type = "prob")
|
||||
tmp <- unlist(tmp)
|
||||
tmp <- tmp[seq(2, length(tmp), 2)]
|
||||
}else{
|
||||
message("continous is not done yet")
|
||||
tmp <- NULL
|
||||
}
|
||||
|
||||
return(tmp)
|
||||
}
|
||||
)
|
||||
|
||||
setMethod(
|
||||
f = "VT.predict",
|
||||
signature = c(rfor = "RandomForest", newdata = "data.frame", type = "character"),
|
||||
function(rfor, newdata, type = "binary"){
|
||||
if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous")
|
||||
if(type == "binary"){
|
||||
tmp <- predict(rfor, newdata = newdata, type = "prob")
|
||||
tmp <- unlist(tmp)
|
||||
tmp <- tmp[seq(2, length(tmp), 2)]
|
||||
}else{
|
||||
message("continous is not done yet")
|
||||
tmp <- NULL
|
||||
}
|
||||
|
||||
return(tmp)
|
||||
}
|
||||
)
|
||||
|
||||
setMethod(
|
||||
f = "VT.predict",
|
||||
signature = c(rfor = "randomForest", newdata = "missing", type = "character"),
|
||||
function(rfor, type = "binary"){
|
||||
if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous")
|
||||
if(type == "binary"){
|
||||
# no longer available in all version ?!
|
||||
# tmp <- rfor$vote[, 2] # get the "o" prob
|
||||
tmp <- predict(rfor, type = "prob")[, 2] # We want to get the "o" prob
|
||||
}else{
|
||||
message("continous is not done yet")
|
||||
tmp <- NULL
|
||||
}
|
||||
return(tmp)
|
||||
}
|
||||
)
|
||||
|
||||
setMethod(
|
||||
f = "VT.predict",
|
||||
signature = c(rfor = "randomForest", newdata = "data.frame", type = "character"),
|
||||
function(rfor, newdata, type = "binary"){
|
||||
if(! type %in% c("binary", "continous")) stop("Type must be Binary or continous")
|
||||
if(type == "binary"){
|
||||
tmp <- predict(rfor, newdata = newdata, type = "prob")[, 2] # We want to get the "o" prob
|
||||
}else{
|
||||
message("continous is not done yet")
|
||||
tmp <- NULL
|
||||
}
|
||||
return(tmp)
|
||||
}
|
||||
)
|
||||
|
||||
setMethod(
|
||||
f = "VT.predict",
|
||||
signature = c(rfor = "train", newdata = "ANY", type = "character"),
|
||||
function(rfor, newdata, type = "binary"){
|
||||
return(VT.predict(rfor$finalModel, newdata, type))
|
||||
}
|
||||
)
|
||||
|
||||
setMethod(
|
||||
f = "VT.predict",
|
||||
signature = c(rfor = "train", newdata = "missing", type = "character"),
|
||||
function(rfor, type = "binary"){
|
||||
return(VT.predict(rfor=rfor$finalModel, type=type))
|
||||
}
|
||||
)
|
43
R/tools.R
Normal file
43
R/tools.R
Normal file
|
@ -0,0 +1,43 @@
|
|||
vt.getQAOriginal <- function(response, trt, ahat){
|
||||
if(is.factor(response)) response = as.numeric(response) - 1
|
||||
|
||||
if(sum(ahat) == 0){
|
||||
tmp <- 0
|
||||
}else{
|
||||
tmp <- sum(response*ahat*trt)/sum(ahat*trt) -
|
||||
sum(response*ahat*(1-trt))/sum(ahat*(1-trt)) -
|
||||
(sum(response*trt)/sum(trt) -
|
||||
sum(response*(1-trt))/sum(1-trt))
|
||||
}
|
||||
return(tmp)
|
||||
}
|
||||
|
||||
vt.getTable <- function(table){
|
||||
if(is.list(table)) table <- table[[1]]
|
||||
Incidence <- function(X) as.character(round(X[2] / X[3], digits = 3))
|
||||
t <- addmargins(table, margin = c(1,2), FUN = sum, quiet = T)
|
||||
t <- addmargins(t, FUN = Incidence, margin = 1, quiet = T)
|
||||
rr <- as.numeric(t["Incidence", "1"]) / as.numeric(t["Incidence", "0"])
|
||||
return(list(table = t, rr = rr))
|
||||
}
|
||||
|
||||
vt.getIncidence <- function(df){
|
||||
if (nrow(df) == 0) table.res <- NULL
|
||||
else{
|
||||
table.res <- vt.getTable(table(df[, 1],
|
||||
df[, 2],
|
||||
deparse.level = 2,
|
||||
dnn = c("resp", "trt")))
|
||||
}
|
||||
return(table.res)
|
||||
}
|
||||
|
||||
|
||||
vt.rr.snd <- function(vt.difft, selected){
|
||||
if(sum(selected) == 0){
|
||||
return(0)
|
||||
}else{
|
||||
return((sum(vt.difft$twin1*selected*vt.difft$vt.object$data[, 2])/sum(selected*vt.difft$vt.object$data[, 2]))
|
||||
/(sum(vt.difft$twin1*selected*(1-vt.difft$vt.object$data[, 2]))/sum(selected*(1-vt.difft$vt.object$data[, 2]))))
|
||||
}
|
||||
}
|
233
R/tree.R
Normal file
233
R/tree.R
Normal file
|
@ -0,0 +1,233 @@
|
|||
# TREES COMPUTATIONS ------------------------------------------------------
|
||||
|
||||
# outcome - variable à expliquer par l'arbre (continue ou binaire)
|
||||
# threshold - seuil à dépasser par difft
|
||||
# screening - override de vt.forest$screening sinon prend la valeur de vt.forest$screening
|
||||
# name - nom de l'arbre
|
||||
# tree - objet rpart (l'arbre en lui même)
|
||||
# Ahat - indicatrice des observations appartement à Ahat (toujours en fonction du treshold)
|
||||
# NE PAS OUBLIER QUE CELA DEPEND EGALEMENT DE LA FORET ASSOCIEE
|
||||
#
|
||||
# $getData() - les variables explicatives de l'arbre: soit les X, soit intersections de X et vt.forest$varimp
|
||||
# $run(...) - lance l'arbre, en options les paramètre de rpart(...)
|
||||
# $getInfos() - Résume threshold, delta, sizeof Ahat.
|
||||
# $getRules() - Récupère les incidences, les règles, les stats de chaques noeuds (terminaux ou non, favorable ou non)
|
||||
# $getIncidences() - Récupère un tableau d'incidence d'une rule
|
||||
# $getAhatIncidence - Récupère le tableau d'incidence de Ahat
|
||||
# $getAhatQualty - Récupère la qualité de Ahat (snd, resub)
|
||||
VT.tree <- setRefClass(
|
||||
Class = "VT.tree",
|
||||
|
||||
fields = list(
|
||||
vt.difft = "VT.difft",
|
||||
outcome = "vector",
|
||||
threshold = "numeric",
|
||||
screening = "logical",
|
||||
sens = "character",
|
||||
name = "character",
|
||||
tree = "rpart",
|
||||
Ahat = "vector"
|
||||
),
|
||||
|
||||
methods = list(
|
||||
initialize = function(vt.difft = VT.difft(), threshold = 0.05, sens = ">", screening = NULL){
|
||||
.self$vt.difft <- vt.difft
|
||||
|
||||
.self$threshold <- threshold
|
||||
|
||||
.self$sens <- sens
|
||||
|
||||
.self$screening <- ifelse(is.null(screening), vt.difft$vt.object$screening, screening)
|
||||
|
||||
},
|
||||
|
||||
getData = function(){
|
||||
d <- .self$vt.difft$vt.object$data[, 3:ncol(.self$vt.difft$vt.object$data)]
|
||||
|
||||
if(.self$screening == T){
|
||||
d.tmp <- d
|
||||
d <- d.tmp[, colnames(d.tmp) %in% .self$vt.difft$vt.object$varimp] # To see later
|
||||
}
|
||||
|
||||
d <- data.frame(.self$outcome, d)
|
||||
names(d) <- c(.self$name, colnames(d)[-1])
|
||||
|
||||
return(d)
|
||||
},
|
||||
|
||||
computeNameOfTree = function(type){
|
||||
if(.self$threshold < 0 ){
|
||||
threshold.chr <- paste0("m", -.self$threshold)
|
||||
}else{
|
||||
threshold.chr <- as.character(.self$threshold)
|
||||
}
|
||||
|
||||
tmp = strsplit(threshold.chr, "[.]")[[1]]
|
||||
return(paste(type, tmp[1], tmp[2], sep = ""))
|
||||
},
|
||||
|
||||
run = function(){
|
||||
if(length(.self$vt.difft$difft) == 0) stop("VT.difft::difft is an empty vector")
|
||||
},
|
||||
|
||||
getInfos = function(){
|
||||
cat("\n")
|
||||
cat(sprintf("Threshold = %0.4f", .self$threshold))
|
||||
cat("\n")
|
||||
cat(sprintf("Delta = %0.4f", .self$vt.difft$vt.object$delta))
|
||||
cat("\n")
|
||||
cat(sprintf("Sens : %s", .self$sens))
|
||||
cat("\n")
|
||||
# cat(sprintf("Bounds = %0.4f", (.self$vt.difft$vt.object$delta + .self$threshold)))
|
||||
# cat("\n")
|
||||
cat(sprintf("Size of Ahat : %i", (sum(.self$Ahat))))
|
||||
|
||||
return(invisible(NULL))
|
||||
},
|
||||
|
||||
getRules = function(only.leaf = F, only.fav = F, tables = T, verbose = T){
|
||||
|
||||
# On supprime le root node, inutile pour les stats d'incidences et autres...
|
||||
full.frame <- .self$tree$frame[-1, ]
|
||||
|
||||
if (only.fav == T){
|
||||
if(inherits(.self, "VT.tree.reg")){
|
||||
if(.self$sens == ">"){
|
||||
frm.only.fav <- full.frame[full.frame$yval >= (.self$threshold), ]
|
||||
} else {
|
||||
frm.only.fav <- full.frame[full.frame$yval <= (.self$threshold), ]
|
||||
}
|
||||
}else if(inherits(.self, "VT.tree.class")){
|
||||
frm.only.fav <- full.frame[full.frame$yval == 2, ]
|
||||
}
|
||||
frm <- frm.only.fav
|
||||
}
|
||||
|
||||
if (only.leaf == T){
|
||||
if(inherits(.self, "VT.tree.reg")){
|
||||
frm.only.leaf <- full.frame[full.frame$var == "<leaf>", ]
|
||||
}else if(inherits(.self, "VT.tree.class")){
|
||||
frm.only.leaf <- full.frame[full.frame$var == "<leaf>", ]
|
||||
}
|
||||
frm <- frm.only.leaf
|
||||
}
|
||||
|
||||
if (only.fav == T & only.leaf == T){
|
||||
frm <- frm.only.leaf[ intersect(rownames(frm.only.leaf), rownames(frm.only.fav)) , ]
|
||||
}else if (only.fav == F & only.leaf == F){
|
||||
frm <- full.frame
|
||||
}
|
||||
|
||||
# Le cas où l'arbre est vide ou n'existe pas:
|
||||
if (length(frm) == 0) stop("VT.tree : no tree");
|
||||
if (ncol(frm)==0) stop("VT.tree : no rules");
|
||||
|
||||
pth <- path.rpart(.self$tree, nodes = row.names(frm), print.it = F)
|
||||
# Delete 'root' node des règles
|
||||
pth <- lapply(pth, FUN = function(d) return(d[-1]))
|
||||
|
||||
depth <- 0
|
||||
nodes <- names(pth)
|
||||
rules <- data.frame(replicate(6, character(0), simplify = T), replicate(2, numeric(0), simplify = T), stringsAsFactors = F)
|
||||
|
||||
colnames(rules) <- c("Subgroup", "Subgroup size", "Treatement event rate", "Control event rate",
|
||||
"Treatment sample size", "Control sample size", "RR (resub)", "RR (snd)")
|
||||
for(i in nodes){
|
||||
pth.text <- paste(pth[[i]], collapse = " & ")
|
||||
incid <- .self$getIncidences(pth.text)
|
||||
|
||||
rules[i, 1] <- pth.text
|
||||
rules[i, 2] <- incid$table.selected$table[3, 3] #size subgroupg
|
||||
rules[i, 3] <- incid$table.selected$table[4, 2] #treatment event rate
|
||||
rules[i, 4] <- incid$table.selected$table[4, 1] #control event rate
|
||||
rules[i, 5] <- incid$table.selected$table[3, 2] #treatment sample size
|
||||
rules[i, 6] <- incid$table.selected$table[3, 1] #control sample size
|
||||
rules[i, 7] <- round(incid$table.selected$rr, digits = 3) # rr (resub)
|
||||
rules[i, 8] <- round(incid$table.selected$rr.snd, digits = 3) # rr (snd)
|
||||
|
||||
if(isTRUE(verbose)){
|
||||
cat("----------------------------\n")
|
||||
cat(sprintf("| Rule number %s : ", i))
|
||||
|
||||
|
||||
if(inherits(.self, "VT.tree.reg")){
|
||||
cat(sprintf("Y val = %0.3f \n", frm[i, ]$yval))
|
||||
}else{
|
||||
cat(sprintf("Y val = %i \n", frm[i, ]$yval))
|
||||
}
|
||||
|
||||
cat("----------------------------\n")
|
||||
|
||||
cat(sprintf("[n = %i", frm[i, ]$n))
|
||||
cat(sprintf(", loss = %s, prob = %0.2f",
|
||||
frm[i, ]$dev,
|
||||
frm[i, ]$yval2[, 5]))
|
||||
|
||||
cat("] \n")
|
||||
cat("\t\t")
|
||||
cat(pth[[i]], sep="\n\t\t")
|
||||
|
||||
if(isTRUE(tables)){
|
||||
cat("\n")
|
||||
cat(sprintf("Incidence dans la selection \n"))
|
||||
print(incid$table.selected$table)
|
||||
cat("\n")
|
||||
cat(sprintf("Risque relatif (resub) : %0.3f \n", incid$table.selected$rr))
|
||||
cat(sprintf("Risque relatif (snd) : %0.3f \n\n", incid$table.selected$rr.snd))
|
||||
|
||||
cat(sprintf("Incidence dans le complementaire\n"))
|
||||
print(incid$table.not.selected$table)
|
||||
cat("\n")
|
||||
cat(sprintf("Risque relatif (resub) : %0.3f \n", incid$table.not.selected$rr))
|
||||
cat(sprintf("Risque relatif (snd) : %0.3f \n\n", incid$table.not.selected$rr.snd))
|
||||
}
|
||||
|
||||
cat("\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
return(invisible(rules))
|
||||
},
|
||||
|
||||
getIncidences = function(rule, rr.snd = T){
|
||||
return(VT.incidences(.self$vt.difft, rule, rr.snd))
|
||||
},
|
||||
|
||||
getAhatIncidence = function(){
|
||||
if(sum(.self$Ahat)!=0){
|
||||
|
||||
table.inc <- VT.incidences(vt.object = .self$vt.difft$vt.object, select = .self$Ahat)
|
||||
|
||||
table.A <- table.inc$table.selected
|
||||
table.A.cmpl <- table.inc$table.not.selected
|
||||
|
||||
cat(sprintf("Incidence dans le sous groupe A\n"))
|
||||
print(table.A$table)
|
||||
cat("\n")
|
||||
cat(sprintf("Risque relatif : %0.3f \n\n", table.A$risque_relatif))
|
||||
|
||||
cat(sprintf("Incidence dans le sous groupe A complementaire\n"))
|
||||
print(table.A.cmpl$table)
|
||||
cat("\n")
|
||||
cat(sprintf("Risque relatif : %0.3f \n\n", table.A.cmpl$risque_relatif))
|
||||
}else{
|
||||
return("Empty set")
|
||||
}
|
||||
},
|
||||
|
||||
getAhatQuality = function(){
|
||||
|
||||
resub <- vt.getQAOriginal(.self$Ahat, response = .self$vt.difft$vt.object$getY(), trt = .self$vt.difft$vt.object$data[, 2])
|
||||
|
||||
snd <- vt.getQAOriginal(.self$Ahat, response = .self$vt.difft$twin1, trt = .self$vt.difft$vt.object$data[, 2])
|
||||
|
||||
# on ajoute la taille de Ahat
|
||||
size <- sum(.self$Ahat)
|
||||
|
||||
res <- cbind(size, resub, snd)
|
||||
names(res) <- c("size", "resub", "snd")
|
||||
|
||||
return(res)
|
||||
}
|
||||
)
|
||||
)
|
42
R/tree.class.R
Normal file
42
R/tree.class.R
Normal file
|
@ -0,0 +1,42 @@
|
|||
# VT.TREE.CLASS -----------------------------------------------------------
|
||||
|
||||
VT.tree.class <- setRefClass(
|
||||
Class = "VT.tree.class",
|
||||
|
||||
contains = "VT.tree",
|
||||
|
||||
methods = list(
|
||||
initialize = function(vt.difft, threshold = 0.05, sens = ">", screening = NULL){
|
||||
callSuper(vt.difft, threshold, sens, screening)
|
||||
|
||||
.self$name <- .self$computeNameOfTree("class")
|
||||
|
||||
if(.self$sens == ">"){
|
||||
.self$outcome <- ifelse(.self$vt.difft$difft >= .self$threshold, 1, 0)
|
||||
} else {
|
||||
.self$outcome <- ifelse(.self$vt.difft$difft <= .self$threshold, 1, 0)
|
||||
}
|
||||
},
|
||||
|
||||
run = function(...){
|
||||
callSuper()
|
||||
|
||||
data <- .self$getData()
|
||||
if(sum(data[,1]) != 0){
|
||||
.self$tree <- rpart(as.formula(paste(.self$name, ".", sep = "~")), data = data, method = "class", ...)
|
||||
.self$Ahat <- as.numeric(predict(.self$tree, data, type = "class")) - 1
|
||||
}else{
|
||||
.self$Ahat <- .self$outcome
|
||||
}
|
||||
|
||||
return(invisible(tree))
|
||||
},
|
||||
|
||||
sumup = function(){
|
||||
cat("Classification Tree")
|
||||
callSuper()
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
VT.tree.class$lock("threshold", "vt.difft")
|
44
R/tree.reg.R
Normal file
44
R/tree.reg.R
Normal file
|
@ -0,0 +1,44 @@
|
|||
|
||||
|
||||
# VT.TREE.REG -------------------------------------------------------------
|
||||
|
||||
VT.tree.reg <- setRefClass(
|
||||
Class = "VT.tree.reg",
|
||||
|
||||
contains = "VT.tree",
|
||||
|
||||
methods = list(
|
||||
initialize = function(vt.difft, threshold = 0.05, sens = ">", screening = NULL){
|
||||
callSuper(vt.difft, threshold, sens, screening)
|
||||
|
||||
.self$name <- .self$computeNameOfTree("reg")
|
||||
|
||||
.self$outcome <- .self$vt.difft$difft
|
||||
},
|
||||
|
||||
run = function(...){
|
||||
callSuper()
|
||||
|
||||
data <- .self$getData()
|
||||
|
||||
.self$tree <- rpart(as.formula(paste(.self$name, ".", sep = "~")), data = data, ...)
|
||||
|
||||
if(.self$sens == ">")
|
||||
res <- ifelse(predict(.self$tree) >= (.self$threshold), 1, 0)
|
||||
else
|
||||
res <- ifelse(predict(.self$tree) <= (.self$threshold), 1, 0)
|
||||
|
||||
.self$Ahat <- res
|
||||
# if(sum(res) != 0) .self$vt.forest$addAhatColumn(name, res)
|
||||
return(invisible(tree))
|
||||
},
|
||||
|
||||
sumup = function(){
|
||||
cat("Regression Tree")
|
||||
callSuper()
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
VT.tree.reg$lock("threshold", "vt.difft")
|
||||
|
8
Read-and-delete-me
Normal file
8
Read-and-delete-me
Normal file
|
@ -0,0 +1,8 @@
|
|||
* Edit the help file skeletons in 'man', possibly combining help files for multiple functions.
|
||||
* Edit the exports in 'NAMESPACE', and add necessary imports.
|
||||
* Put any C/C++/Fortran code in 'src'.
|
||||
* If you have compiled code, add a useDynLib() directive to 'NAMESPACE'.
|
||||
* Run R CMD build to build the package tarball.
|
||||
* Run R CMD check to check the package tarball.
|
||||
|
||||
Read "Writing R Extensions" for more information.
|
17
VirtualTwins.Rproj
Normal file
17
VirtualTwins.Rproj
Normal file
|
@ -0,0 +1,17 @@
|
|||
Version: 1.0
|
||||
|
||||
RestoreWorkspace: Yes
|
||||
SaveWorkspace: Yes
|
||||
AlwaysSaveHistory: Yes
|
||||
|
||||
EnableCodeIndexing: Yes
|
||||
UseSpacesForTab: Yes
|
||||
NumSpacesForTab: 2
|
||||
Encoding: UTF-8
|
||||
|
||||
RnwWeave: Sweave
|
||||
LaTeX: pdfLaTeX
|
||||
|
||||
BuildType: Package
|
||||
PackageUseDevtools: Yes
|
||||
PackageInstallArgs: --no-multiarch --with-keep.source
|
34
man/VirtualTwins-package.Rd
Normal file
34
man/VirtualTwins-package.Rd
Normal file
|
@ -0,0 +1,34 @@
|
|||
\name{VirtualTwins-package}
|
||||
\alias{VirtualTwins-package}
|
||||
\alias{VirtualTwins}
|
||||
\docType{package}
|
||||
\title{
|
||||
\packageTitle{VirtualTwins}
|
||||
}
|
||||
\description{
|
||||
\packageDescription{VirtualTwins}
|
||||
}
|
||||
\details{
|
||||
|
||||
The DESCRIPTION file:
|
||||
\packageDESCRIPTION{VirtualTwins}
|
||||
\packageIndices{VirtualTwins}
|
||||
~~ An overview of how to use the package, including the most important functions ~~
|
||||
}
|
||||
\author{
|
||||
\packageAuthor{VirtualTwins}
|
||||
|
||||
Maintainer: \packageMaintainer{VirtualTwins}
|
||||
}
|
||||
\references{
|
||||
~~ Literature or other references for background information ~~
|
||||
}
|
||||
~~ Optionally other standard keywords, one per line, from file KEYWORDS in the R documentation directory ~~
|
||||
\keyword{ package }
|
||||
\seealso{
|
||||
~~ Optional links to other man pages, e.g. ~~
|
||||
~~ \code{\link[<pkg>:<pkg>-package]{<pkg>}} ~~
|
||||
}
|
||||
\examples{
|
||||
~~ simple examples of the most important functions ~~
|
||||
}
|
Loading…
Reference in a new issue