1
0
Fork 0
mirror of https://github.com/prise6/aVirtualTwins.git synced 2024-04-25 19:00:27 +02:00

Writting wrapper of RefClass

This commit is contained in:
prise6 2015-07-25 02:10:28 +02:00
parent 7b5c7d4ec4
commit a4bd8fdbb7
20 changed files with 297 additions and 38 deletions

View file

@ -19,7 +19,7 @@ Depends:
R (>= 3.2.0),
methods
Collate:
'VirtualTwins.R'
'aVirtualTwins.R'
'data.R'
'object.R'
'difft.R'
@ -29,6 +29,7 @@ Collate:
'forest.double.R'
'forest.fold.R'
'forest.one.R'
'forest.wrapper.R'
'formatRCTDataset.R'
'incidences.R'
'object.wrapper.R'
@ -36,4 +37,5 @@ Collate:
'tree.R'
'tree.class.R'
'tree.reg.R'
'tree.wrapper.R'
VignetteBuilder: knitr

View file

@ -2,11 +2,15 @@
export(VT.difft)
export(VT.forest.double)
export(VT.forest.fold)
export(VT.forest.one)
export(VT.object)
export(VT.tree.class)
export(VT.tree.reg)
export(formatRCTDataset)
export(vt.data)
export(vt.forest)
export(vt.subgroups)
export(vt.tree)
import(methods)
importClassesFrom(party,RandomForest)

View file

@ -18,7 +18,7 @@
#' }
#'
#' @docType package
#' @name VirtualTwins
#' @aliases VirtualTwins-package
#' @name aVirtualTwins
#' @aliases aVirtualTwins-package
#'
NULL

View file

@ -25,6 +25,10 @@
#' \code{\link{VT.forest.one}}, \code{\link{VT.forest.double}}
#'
#' @import methods
#'
#' @export VT.forest.fold
#'
VT.forest.fold <- setRefClass(
Class = "VT.forest.fold",

68
R/forest.wrapper.R Normal file
View file

@ -0,0 +1,68 @@
#'
#' Create forest to compute difft
#'
#' \code{vt.forest} is a wrapper of \code{\link{VT.forest.one}},
#' \code{\link{VT.forest.double}} and \code{\link{VT.forest.fold}}.
#'
#' @param forest.type character one / double / fold
#' @param vt.data \code{\link{VT.data}} or return of \code{vt.data()} function
#' @param interactions logical. If running VirtualTwins with treatment's
#' interactions, set to TRUE (default value)
#' @param method character absolute / relative / logit
#' @param ... parameters of \code{\link{VT.difft}} or \code{\link{VT.forest}}
#'
#' @return \code{VT.difft}
#'
#' @include forest.R difft.R
#'
#' @name vt.forest
#'
#' @export vt.forest
vt.forest <- function(forest.type = "one", vt.data, interactions = T, method = "absolute", ...){
if(!inherits(vt.data, "VT.object"))
stop("vt.data must be VT.object class")
params <- list(...)
if (forest.type == "one"){
if(! "rf" %in% names(params) ){
rf <- randomForest(x = vt.data$getX(interactions = interactions, trt = NULL),
y = vt.data$getY(),
...)
} else{
rf <- params["rf"]
}
vt.difft <- VT.forest.one(vt.object = vt.data, model = rf, interactions = interactions, method = method)
} else if (forest.type == "double"){
if(! "model_trt1" %in% names(params) ){
rf_trt1 <- randomForest(x = vt.data$getX(trt = 1, interactions = interactions),
y = vt.data$getY(1),
...)
} else
rf_trt1 <- params["model_trt1"]
if(! "model_trt0" %in% names(params) ){
rf_trt0 <- randomForest(x = vt.data$getX(trt = 1, interactions = interactions),
y = vt.data$getY(1),
...)
} else
rf_trt0 <- params["rf_trt0"]
vt.difft <- VT.forest.double(vt.object = vt.data, model_trt1 = rf_trt1, model_trt0 = rf_trt0, method = method, ...)
} else if (forest.type == "fold"){
fold <- ifelse(! "fold" %in% names(params) , 5, params["fold"])
fold <- ifelse(! "ratio" %in% names(params) , 1, params["ratio"])
vt.difft <- aVirtualTwins:::VT.forest.fold(vt.object = vt.data, fold = fold, ratio = ratio,
interactions = interactions, method = method)
} else
stop("forest.type must be one, double or fold")
if(forest.type %in% c("one", "double"))
vt.difft$run()
else
vt.difft$run(...)
return(vt.difft)
}

View file

@ -1,6 +1,6 @@
# VT.OBJECT ---------------------------------------------------------------
#' VirtualTwins.object
#' VT.object
#'
#' A Reference Class to deal with RCT dataset
#'

View file

@ -13,7 +13,11 @@
#'
#' @return \code{VT.object}
#'
#' @export
#' @include object.R
#'
#' @name vt.data
#'
#' @export vt.data
vt.data <- function(dataset, outcome.field, treatment.field, interactions = TRUE, ...){
data <- formatRCTDataset(dataset, outcome.field, treatment.field, interactions = TRUE)

View file

@ -1,3 +1,30 @@
#' Visualize subgroups
#'
#' @param vt.trees \code{\link{VT.tree}} object (can be a list)
#' @param only.leaf logical select only leaf of trees
#' @param only.fav logical select only favorable subgroup (meaning with favorable label of the tree)
#' @param tables logical show tables of incidence
#' @param verbose print tables during computation
#'
#' @return data.frame of rules
#'
#' @export vt.subgroups
#'
#' @name vt.subgroups
#'
vt.subgroups <- function(vt.trees, only.leaf = T, only.fav = T, tables = F, verbose = F){
if(is.list(vt.trees)){
subgroups <- lapply(vt.trees, function(x)x$getRules(only.leaf = only.leaf, only.fav = only.fav, tables = tables, verbose = verbose))
unique(do.call(rbind, subgroups))
}
else{
subgroups <- vt.tree$getRules(only.leaf = only.leaf, only.fav = only.fav, tables = tables, verbose = verbose)
}
}
vt.getQAOriginal <- function(response, trt, ahat){
if(is.factor(response)) response = as.numeric(response) - 1
@ -43,4 +70,3 @@ vt.rr.snd <- function(vt.difft, selected){

43
R/tree.wrapper.R Normal file
View file

@ -0,0 +1,43 @@
#' Trees to find Subgroups
#'
#' A wrapper of class VT.tree.xxx
#'
#'
#' See \code{\link{VT.tree}}
#'
#' @param tree.type character "class" for classification tree, "reg" for regression tree
#' @param vt.difft \code{\link{VT.difft}} object
#' @param sens character c(">","<"). See details.
#' @param threshold numeric It can be a unique value or a vector
#'
#' @return \code{VT.tree} or a list of \code{VT.tree} depending on threshold dimension
#'
#' @include tree.R
#'
#' @name vt.tree
#'
#' @export vt.tree
vt.tree <- function(tree.type = "class", vt.difft, sens = ">", threshold = seq(.5, .8, .1), screening = NULL, ...){
if(!inherits(vt.difft, "VT.difft"))
stop("vt.difft parameter must be aVirtualTwins::VT.difft class")
if(is.numeric(threshold)){
if(length(threshold)>1){
res.name <- paste0("tree", 1:length(threshold))
res.list <- lapply(X = threshold, FUN = vt.tree, tree.type = tree.type, vt.difft = vt.difft, sens = sens, screening = screening, ...)
names(res.list) <- res.name
return(res.list)
}else{
if(tree.type == "class")
tree <- aVirtualTwins:::VT.tree.class(vt.difft = vt.difft, sens = sens, threshold = threshold, screening = screening)
else
tree <- aVirtualTwins:::VT.tree.reg(vt.difft = vt.difft, sens = sens, threshold = threshold, screening = screening)
tree$run(...)
return(tree)
}
}else
stop("threshold must be numeric")
}

View file

@ -6,6 +6,31 @@ VirtualTwins is a method of subgroup identification from randomized clinical tri
As an intern in a french pharmaceutical group, i worked on this method and develop a package based on Jared Foster and al method.
## (Very) Quick Preview
```r
# Load data
data(sepsis)
# Format data
vt.obj <- vt.data(sepsis, "survival", "THERAPY", T)
# First step : create random forest model
vt.for <- vt.forest("one", vt.obj, T, ntree = 500)
# Second step : find rules in data
vt.trees <- vt.tree("class", vt.for, threshold = quantile(vt.for$difft, seq(.5,.8,.1)), maxdepth = 2)
# Print results
(vt.subgroups <- lapply(vt.trees, function(x)x$getRules(only.fav = T, verbose = F)))
```
| |Subgroup |Subgroup size |Treatement event rate |Control event rate |Treatment sample size |Control sample size | RR (resub)| RR (snd)|
|:-------|:---------------------------|:-------------|:---------------------|:------------------|:---------------------|:-------------------|----------:|--------:|
|tree1 |PRAPACHE>=26.5 |157 |0.752 |0.327 |105 |52 | 2.300| 1.873|
|tree2 |PRAPACHE>=26.5 |157 |0.752 |0.327 |105 |52 | 2.300| 1.873|
|tree3.3 |PRAPACHE>=26.5 |157 |0.752 |0.327 |105 |52 | 2.300| 1.873|
|tree3.7 |PRAPACHE>=26.5 & AGE>=54.88 |111 |0.887 |0.325 |71 |40 | 2.729| 2.026|
|tree4.3 |PRAPACHE>=26.5 |157 |0.752 |0.327 |105 |52 | 2.300| 1.873|
|tree4.7 |PRAPACHE>=26.5 & AGE>=54.88 |111 |0.887 |0.325 |71 |40 | 2.729| 2.026|
## Infos
Currently this package works for RCT with two treatments groups and binary outcome.

View file

@ -2,7 +2,7 @@
# Create sepsis data ------------------------------------------------------
# Load some libraries
library(VirtualTwins)
library(aVirtualTwins)
library(randomForest)
# Sepsis is a csv file available in SIDES example to this address:

View file

@ -6,7 +6,7 @@
## ------------------------------------------------------------------------
# load library VT
library(VirtualTwins)
library(aVirtualTwins)
# load data sepsis
data(sepsis)
# initialize VT.object
@ -88,9 +88,10 @@ vt.doublef.rf <- VT.forest.double(vt.o, model.rf.trt1, model.rf.trt0)
# Then, use run() to compute probabilities
vt.doublef.rf$run()
## ---- cache=TRUE---------------------------------------------------------
## ---- cache=F------------------------------------------------------------
# initialize k-fold RF
model.fold <- VirtualTwins:::VT.forest.fold(vt.o, fold = 5, ratio = 1, interactions = T)
model.fold <- aVirtualTwins:::VT.forest.fold(vt.o, fold = 5, ratio = 1, interactions = T)
# grow RF with randomForest package options
# set do.trace option to see the 5 folds
model.fold$run(ntree = 500, do.trace = 500)

View file

@ -129,7 +129,7 @@ vt.data <- function(dataset, outcome.field, treatment.field, interactions = TRUE
__Example with Sepsis__
```{r}
# load library VT
library(VirtualTwins)
library(aVirtualTwins)
# load data sepsis
data(sepsis)
# initialize VT.object
@ -278,7 +278,7 @@ This idea is taken from *method 3* of Jared Foster paper :
> A modification of [previous methods] is to obtain $\hat{P_{1i}}$ and $\hat{P_{0i}}$ via cross-validation. In this méthod the specific data for subject $i$ is not used to obtain $\hat{P_{1i}}$ and $\hat{P_{0i}}$. Using k-fold cross-validation, we apply random forest regression approach to $\frac{k-1}{k}$ of the data and use the resulting predictor to obtain estimates of $P_{1i}$ and $P_{0i}$ for the remaining $\frac{1}{k}$ of the observations. This is repeated $k$ times.
To use this approach, type `VirtualTwins:::VT.forest.fold()`. This class takes in argument :
To use this approach, type `aVirtualTwins:::VT.forest.fold()`. This class takes in argument :
* `vt.object` : return of `vt.data()` function
* `fold` : number of fold (e.g. $5$)
@ -287,9 +287,10 @@ To use this approach, type `VirtualTwins:::VT.forest.fold()`. This class takes i
__NOTE:__ This function use only `randomForest` package.
```{r, cache=TRUE}
```{r, cache=F}
# initialize k-fold RF
model.fold <- VirtualTwins:::VT.forest.fold(vt.o, fold = 5, ratio = 1, interactions = T)
model.fold <- aVirtualTwins:::VT.forest.fold(vt.o, fold = 5, ratio = 1, interactions = T)
# grow RF with randomForest package options
# set do.trace option to see the 5 folds
model.fold$run(ntree = 500, do.trace = 500)

View file

@ -10,7 +10,7 @@
<meta name="author" content="Francois Vieille" />
<meta name="date" content="2015-07-24" />
<meta name="date" content="2015-07-25" />
<title>Virtual Twins Examples</title>
@ -54,7 +54,7 @@ code > span.er { color: #ff0000; font-weight: bold; }
<div id="header">
<h1 class="title">Virtual Twins Examples</h1>
<h4 class="author"><em>Francois Vieille</em></h4>
<h4 class="date"><em>2015-07-24</em></h4>
<h4 class="date"><em>2015-07-25</em></h4>
</div>
@ -150,7 +150,7 @@ code > span.er { color: #ff0000; font-weight: bold; }
}</code></pre>
<p><strong>Example with Sepsis</strong></p>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># load library VT</span>
<span class="kw">library</span>(VirtualTwins)
<span class="kw">library</span>(aVirtualTwins)
<span class="co"># load data sepsis</span>
<span class="kw">data</span>(sepsis)
<span class="co"># initialize VT.object</span>
@ -189,10 +189,8 @@ vt.o.tmp &lt;-<span class="st"> </span><span class="kw">vt.data</span>(sepsis.tm
</ul>
<p><strong>with <code>randomForest</code></strong></p>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># use randomForest::randomForest()</span>
<span class="kw">library</span>(randomForest, <span class="dt">verbose =</span> F)</code></pre>
<pre><code>## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.</code></pre>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Reproducibility</span>
<span class="kw">library</span>(randomForest, <span class="dt">verbose =</span> F)
<span class="co"># Reproducibility</span>
<span class="kw">set.seed</span>(<span class="dv">123</span>)
<span class="co"># Fit rf model </span>
<span class="co"># default params</span>
@ -230,10 +228,8 @@ vt.o.tr$data$survival &lt;-<span class="st"> </span><span class="kw">as.factor</
<span class="kw">formatRCTDataset</span>(vt.o.tr$data, <span class="st">&quot;survival&quot;</span>, <span class="st">&quot;THERAPY&quot;</span>)</code></pre>
<pre><code>## &quot;y&quot; will be the favorable outcome</code></pre>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># use caret::train()</span>
<span class="kw">library</span>(caret, <span class="dt">verbose =</span> F)</code></pre>
<pre><code>## Loading required package: lattice
## Loading required package: ggplot2</code></pre>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Reproducibility</span>
<span class="kw">library</span>(caret, <span class="dt">verbose =</span> F)
<span class="co"># Reproducibility</span>
<span class="kw">set.seed</span>(<span class="dv">123</span>)
<span class="co"># fit train model</span>
fitControl &lt;-<span class="st"> </span><span class="kw">trainControl</span>(<span class="dt">classProbs =</span> T, <span class="dt">method =</span> <span class="st">&quot;none&quot;</span>)
@ -276,7 +272,7 @@ vt.doublef.rf$<span class="kw">run</span>()</code></pre>
<blockquote>
<p>A modification of [previous methods] is to obtain <span class="math">\(\hat{P_{1i}}\)</span> and <span class="math">\(\hat{P_{0i}}\)</span> via cross-validation. In this méthod the specific data for subject <span class="math">\(i\)</span> is not used to obtain <span class="math">\(\hat{P_{1i}}\)</span> and <span class="math">\(\hat{P_{0i}}\)</span>. Using k-fold cross-validation, we apply random forest regression approach to <span class="math">\(\frac{k-1}{k}\)</span> of the data and use the resulting predictor to obtain estimates of <span class="math">\(P_{1i}\)</span> and <span class="math">\(P_{0i}\)</span> for the remaining <span class="math">\(\frac{1}{k}\)</span> of the observations. This is repeated <span class="math">\(k\)</span> times.</p>
</blockquote>
<p>To use this approach, type <code>VirtualTwins:::VT.forest.fold()</code>. This class takes in argument :</p>
<p>To use this approach, type <code>aVirtualTwins:::VT.forest.fold()</code>. This class takes in argument :</p>
<ul>
<li><code>vt.object</code> : return of <code>vt.data()</code> function</li>
<li><code>fold</code> : number of fold (e.g. <span class="math">\(5\)</span>)</li>
@ -285,7 +281,7 @@ vt.doublef.rf$<span class="kw">run</span>()</code></pre>
</ul>
<p><strong>NOTE:</strong> This function use only <code>randomForest</code> package.</p>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># initialize k-fold RF</span>
model.fold &lt;-<span class="st"> </span>VirtualTwins:::<span class="kw">VT.forest.fold</span>(vt.o, <span class="dt">fold =</span> <span class="dv">5</span>, <span class="dt">ratio =</span> <span class="dv">1</span>, <span class="dt">interactions =</span> T)
model.fold &lt;-<span class="st"> </span>aVirtualTwins:::<span class="kw">VT.forest.fold</span>(vt.o, <span class="dt">fold =</span> <span class="dv">5</span>, <span class="dt">ratio =</span> <span class="dv">1</span>, <span class="dt">interactions =</span> T)
<span class="co"># grow RF with randomForest package options</span>
<span class="co"># set do.trace option to see the 5 folds</span>
model.fold$<span class="kw">run</span>(<span class="dt">ntree =</span> <span class="dv">500</span>, <span class="dt">do.trace =</span> <span class="dv">500</span>)</code></pre>

View file

@ -3,7 +3,7 @@
\docType{class}
\name{VT.object}
\alias{VT.object}
\title{VirtualTwins.object}
\title{VT.object}
\description{
A Reference Class to deal with RCT dataset
}

View file

@ -1,21 +1,21 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/VirtualTwins.R
% Please edit documentation in R/aVirtualTwins.R
\docType{package}
\name{VirtualTwins}
\alias{VirtualTwins}
\alias{VirtualTwins-package}
\title{VirtualTwins : An adapation of VirtualTwins method created by Jared Foster.}
\name{aVirtualTwins}
\alias{aVirtualTwins}
\alias{aVirtualTwins-package}
\title{aVirtualTwins : An adapation of VirtualTwins method created by Jared Foster.}
\description{
VirtualTwins is written mainly with reference classes. Briefly, there is three kinds of class :
aVirtualTwins is written mainly with reference classes. Briefly, there is three kinds of class :
\itemize{
\item \code{\link{VT.object}} class to represent RCT dataset used by VirtualTwins. To format correctly RCT dataset, use \code{\link{formatRCTDataset}}.
\item \code{\link{VT.object}} class to represent RCT dataset used by aVirtualTwins. To format correctly RCT dataset, use \code{\link{formatRCTDataset}}.
\item \code{\link{VT.difft}} class to compute difference between twins. Family \code{\link{VT.forest}} extends it to compute twins by random forest.
\item \code{\link{VT.tree}} class to find subgroups from \code{difft} by CART trees. \code{\link{VT.tree.class}} and \code{\link{VT.tree.reg}} extend it.
}
}
\section{TODO LIST}{
\emph{last update : 11.06.2015}
\emph{last update : 24.07.2015}
\itemize{
\item More detailed documentation and vignettes
\item Write wrappers for classes

29
man/vt.forest.Rd Normal file
View file

@ -0,0 +1,29 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/forest.wrapper.R
\name{vt.forest}
\alias{vt.forest}
\title{Create forest to compute difft}
\usage{
vt.forest(forest.type = "one", vt.data, interactions = T,
method = "absolute", ...)
}
\arguments{
\item{forest.type}{character one / double / fold}
\item{vt.data}{\code{\link{VT.data}} or return of \code{vt.data()} function}
\item{interactions}{logical. If running VirtualTwins with treatment's
interactions, set to TRUE (default value)}
\item{method}{character absolute / relative / logit}
\item{...}{parameters of \code{\link{VT.difft}} or \code{\link{VT.forest}}}
}
\value{
\code{VT.difft}
}
\description{
\code{vt.forest} is a wrapper of \code{\link{VT.forest.one}},
\code{\link{VT.forest.double}} and \code{\link{VT.forest.fold}}.
}

27
man/vt.subgroups.Rd Normal file
View file

@ -0,0 +1,27 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/tools.R
\name{vt.subgroups}
\alias{vt.subgroups}
\title{Visualize subgroups}
\usage{
vt.subgroups(vt.trees, only.leaf = T, only.fav = T, tables = F,
verbose = F)
}
\arguments{
\item{vt.trees}{\code{\link{VT.tree}} object (can be a list)}
\item{only.leaf}{logical select only leaf of trees}
\item{only.fav}{logical select only favorable subgroup (meaning with favorable label of the tree)}
\item{tables}{logical show tables of incidence}
\item{verbose}{print tables during computation}
}
\value{
data.frame of rules
}
\description{
Visualize subgroups
}

28
man/vt.tree.Rd Normal file
View file

@ -0,0 +1,28 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/tree.wrapper.R
\name{vt.tree}
\alias{vt.tree}
\title{Trees to find Subgroups}
\usage{
vt.tree(tree.type = "class", vt.difft, sens = ">", threshold = seq(0.5,
0.8, 0.1), screening = NULL, ...)
}
\arguments{
\item{tree.type}{character "class" for classification tree, "reg" for regression tree}
\item{vt.difft}{\code{\link{VT.difft}} object}
\item{sens}{character c(">","<"). See details.}
\item{threshold}{numeric It can be a unique value or a vector}
}
\value{
\code{VT.tree} or a list of \code{VT.tree} depending on threshold dimension
}
\description{
A wrapper of class VT.tree.xxx
}
\details{
See \code{\link{VT.tree}}
}

View file

@ -129,7 +129,7 @@ vt.data <- function(dataset, outcome.field, treatment.field, interactions = TRUE
__Example with Sepsis__
```{r}
# load library VT
library(VirtualTwins)
library(aVirtualTwins)
# load data sepsis
data(sepsis)
# initialize VT.object
@ -287,7 +287,8 @@ To use this approach, type `aVirtualTwins:::VT.forest.fold()`. This class takes
__NOTE:__ This function use only `randomForest` package.
```{r, cache=TRUE}
```{r, cache=F}
# initialize k-fold RF
model.fold <- aVirtualTwins:::VT.forest.fold(vt.o, fold = 5, ratio = 1, interactions = T)
# grow RF with randomForest package options