mirror of
https://github.com/prise6/aVirtualTwins.git
synced 2024-06-19 03:14:59 +02:00
116 lines
4.4 KiB
R
116 lines
4.4 KiB
R
#'
|
|
#' RCT format for Virtual Twins
|
|
#'
|
|
#' \code{formatRCTDataset} returns dataset that Virtual Twins is able to
|
|
#' analyze.
|
|
#'
|
|
#' This function check these differents topic: Outcome must be binary and a
|
|
#' factor. If numeric with two distincts values, outcome becomes a factor where
|
|
#' the favorable reponse is the second level. Also, outcome is moved on the
|
|
#' first column of \code{dataset}.
|
|
#'
|
|
#' Treatment must have two distinct numeric values, 0 : no treatment, 1 :
|
|
#' treatment. Treatment is moved to the second column.
|
|
#'
|
|
#' Qualitatives variables must be factor. If it has more than two levels, if
|
|
#' running VirtualTwins with interaction, it creates dummy variables.
|
|
#'
|
|
#' @param dataset data.frame representing RCT's
|
|
#' @param outcome.field name of the outcome's field in \code{dataset}
|
|
#' @param treatment.field name of the treatment's field in \code{dataset}
|
|
#' @param interactions logical. If running VirtualTwins with treatment's
|
|
#' interactions, set to TRUE (default value)
|
|
#'
|
|
#' @return return data.frame with good format (explained in details section) to run VirtualTwins
|
|
#'
|
|
#' @examples
|
|
#' \dontrun{
|
|
#' data.format <- formatRCTDataset(data, "outcome", "treatment", TRUE)
|
|
#' }
|
|
#' data(sepsis)
|
|
#' data.format <- formatRCTDataset(sepsis, "survival", "THERAPY", T)
|
|
#'
|
|
#'
|
|
#'
|
|
#'
|
|
#' @export
|
|
formatRCTDataset <- function(dataset, outcome.field, treatment.field, interactions = TRUE){
|
|
|
|
if(!is.data.frame(dataset)) stop("Dataset parameter must be data.frame")
|
|
if(!is.character(outcome.field)) stop(sprintf("%s, outcome.field parameter must be a string", outcome.field))
|
|
if(!is.character(treatment.field)) stop(sprintf("%s, treatment.field parameter must be a string", treatment.field))
|
|
|
|
if(!outcome.field %in% colnames(dataset)) stop(sprintf("%s must be in data.frame colnames", outcome.field))
|
|
outcome.field.which <- which(outcome.field == colnames(dataset))
|
|
if(!treatment.field %in% colnames(dataset)) stop(sprintf("%s must be in data.frame colnames", treatment.field))
|
|
treatment.field.which <- which(treatment.field == colnames(dataset))
|
|
|
|
d <- dataset
|
|
|
|
outcome <- d[, outcome.field.which]
|
|
if(!is.factor(outcome)) outcome <- as.factor(outcome)
|
|
if(!length(levels(outcome)) == 2L) stop(sprintf("outcome %s must be binary", outcome.field))
|
|
cat(sprintf("\"%s\" will be the favorable outcome \n", levels(outcome)[2]))
|
|
d[, outcome.field.which] <- outcome
|
|
|
|
treatment <- d[, treatment.field.which]
|
|
if(!is.numeric(treatment) & !is.integer(treatment)) treatment <- as.numeric(treatment)
|
|
if(!( length(unique(treatment)) == 2L & all(c(0,1) %in% unique(treatment)) ))
|
|
stop(sprintf("%s, response must be numeric:\n 0 = no treatment \n 1 = treatment \n", treatment.field))
|
|
d[, treatment.field.which] <- treatment
|
|
|
|
predictors <- colnames(dataset)[-c(outcome.field.which, treatment.field.which)]
|
|
|
|
predictors.next <- vector()
|
|
iter = 1
|
|
for(i in predictors){
|
|
iter <- length(predictors.next)+1
|
|
|
|
var <- d[, i]
|
|
|
|
if(is.numeric(var) | is.integer(var)){
|
|
predictors.next[iter] <- i
|
|
}
|
|
|
|
if(is.character(var)){
|
|
var <- as.factor(var)
|
|
}
|
|
|
|
if(is.factor(var)){
|
|
if(length(levels(var))>2){
|
|
if(isTRUE(interactions)){
|
|
cat(sprintf("Creation of dummy variables for %s \n", i))
|
|
for(l in levels(var)){
|
|
n <- paste(i, l, sep = "_")
|
|
d[, n] <- ifelse(var == l, 1, 0)
|
|
predictors.next[iter] <- n
|
|
cat(sprintf("Dummy variable %s created \n", n))
|
|
iter <- iter + 1
|
|
}
|
|
# d <- d[, predictors != i]
|
|
} else {
|
|
if(length(levels(var))>32){
|
|
stop(cat(sprintf("%s has too many levels (superior to 32)", i)))
|
|
}else{
|
|
warning(sprintf("%s has more than 2 levels. Virtual Twins won't be able to run with interactions.", i))
|
|
predictors.next[iter] <- i
|
|
}
|
|
}
|
|
}else if(length(levels(var)) == 2){
|
|
cat(sprintf("%s is two-level factor. It has to be transformed into numeric value : \n", i))
|
|
cat(sprintf("%s becomes 0 \n", levels(var)[1]))
|
|
cat(sprintf("%s becomes 1 \n", levels(var)[2]))
|
|
d[, i] <- ifelse(var == levels(var)[1], 0, 1)
|
|
predictors.next[iter] <- i
|
|
}else{
|
|
cat(sprintf("%s is deleted because only one level", i))
|
|
}
|
|
}
|
|
}
|
|
|
|
colnames.order <- c(outcome.field, treatment.field, predictors.next)
|
|
d <- d[, colnames.order]
|
|
|
|
return(invisible(d))
|
|
}
|