mirror of
https://github.com/prise6/aVirtualTwins.git
synced 2024-05-01 19:52:43 +02:00
Add formatRCTDataset and explanations
This commit is contained in:
parent
88494d0472
commit
8ef0aa1d5c
109
R/formatRCTDataset.R
Normal file
109
R/formatRCTDataset.R
Normal file
|
@ -0,0 +1,109 @@
|
|||
#'
|
||||
#' RCT format for Virtual Twins
|
||||
#'
|
||||
#' \code{formatRCTDataset} returns dataset that Virtual Twins is able to
|
||||
#' analyze.
|
||||
#'
|
||||
#' This function check these differents topic: Outcome must be binary and a
|
||||
#' factor. If numeric with two distincts values, outcome becomes a factor where
|
||||
#' the favorable reponse is the second level. Also, outcome is moved on the
|
||||
#' first column of \code{dataset}.
|
||||
#'
|
||||
#' Treatment must have two distinct numeric values, 0 : no treatment, 1 :
|
||||
#' treatment. Treatment is moved to the second column.
|
||||
#'
|
||||
#' Qualitatives variables must be factor. If it has more than two levels, if
|
||||
#' running VirtualTwins with interaction, it creates dummy variables.
|
||||
#'
|
||||
#' @param dataset data.frame representing RCT's
|
||||
#' @param outcome.field name of the outcome's field in \code{dataset}
|
||||
#' @param treatment.field name of the treatment's field in \code{dataset}
|
||||
#' @param interactions logical. If running VirtualTwins with treatment's
|
||||
#' interactions, set to TRUE (default value)
|
||||
#'
|
||||
#' @return return data.frame with good format (explained in details section) to run VirtualTwins
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' data.format <- formatRCTDataset(data, "outcome", "treatment", TRUE)
|
||||
#' }
|
||||
#'
|
||||
#' @export
|
||||
formatRCTDataset <- function(dataset, outcome.field, treatment.field, interactions = TRUE){
|
||||
|
||||
if(!is.data.frame(dataset)) stop("Dataset parameter must be data.frame")
|
||||
if(!is.character(outcome.field)) stop(sprintf("%s, outcome.field parameter must be a string", outcome.field))
|
||||
if(!is.character(treatment.field)) stop(sprintf("%s, treatment.field parameter must be a string", treatment.field))
|
||||
|
||||
if(!outcome.field %in% colnames(dataset)) stop(sprintf("%s must be in data.frame colnames", outcome.field))
|
||||
outcome.field.which <- which(outcome.field == colnames(dataset))
|
||||
if(!treatment.field %in% colnames(dataset)) stop(sprintf("%s must be in data.frame colnames", treatment.field))
|
||||
treatment.field.which <- which(treatment.field == colnames(dataset))
|
||||
|
||||
d <- dataset
|
||||
|
||||
outcome <- d[, outcome.field.which]
|
||||
if(!is.factor(outcome)) outcome <- as.factor(outcome)
|
||||
if(!length(levels(outcome)) == 2L) stop(sprintf("outcome %s must be binary", outcome.field))
|
||||
cat(sprintf("\"%s\" will be the favorable outcome \n", levels(outcome)[2]))
|
||||
|
||||
treatment <- d[, treatment.field.which]
|
||||
if(!is.numeric(treatment) & !is.integer(treatment)) treatment <- as.numeric(treatment)
|
||||
if(!( length(unique(treatment)) == 2L & all(c(0,1) %in% unique(treatment)) ))
|
||||
stop(sprintf("%s, response must be numeric:\n 0 = no treatment \n 1 = treatment \n", treatment.field))
|
||||
|
||||
predictors <- colnames(dataset)[-c(outcome.field.which, treatment.field.which)]
|
||||
|
||||
predictors.next <- vector()
|
||||
iter = 1
|
||||
for(i in predictors){
|
||||
iter <- length(predictors.next)+1
|
||||
|
||||
var <- d[, i]
|
||||
|
||||
if(is.numeric(var) | is.integer(var)){
|
||||
predictors.next[iter] <- i
|
||||
}
|
||||
|
||||
if(is.character(var)){
|
||||
var <- as.factor(var)
|
||||
}
|
||||
|
||||
if(is.factor(var)){
|
||||
if(length(levels(var))>2){
|
||||
if(isTRUE(interactions)){
|
||||
cat(sprintf("Creation of dummy variables for %s \n", i))
|
||||
for(l in levels(var)){
|
||||
n <- paste(i, l, sep = "_")
|
||||
d[, n] <- ifelse(var == l, 1, 0)
|
||||
predictors.next[iter] <- n
|
||||
cat(sprintf("Dummy variable %s created \n", n))
|
||||
iter <- iter + 1
|
||||
}
|
||||
# d <- d[, predictors != i]
|
||||
} else {
|
||||
if(length(levels(var))>32){
|
||||
stop(cat(sprintf("%s has too many levels (superior to 32)", i)))
|
||||
}else{
|
||||
warning(sprintf("%s has more than 2 levels. Virtual Twins won't be able to run with interactions.", i))
|
||||
predictors.next[iter] <- i
|
||||
}
|
||||
}
|
||||
}else if(length(levels(var)) == 2){
|
||||
cat(sprintf("%s is two-level factor. It has to be transformed into numeric value : \n", i))
|
||||
cat(srpintf("%s becomes 0 \n", levels(var)[1]))
|
||||
cat(srpintf("%s becomes 1 \n", levels(var)[2]))
|
||||
d[, i] <- ifelse(var == levels(var)[1], 0, 1)
|
||||
predictors.next[iter] <- i
|
||||
}else{
|
||||
cat(sprintf("%s is deleted because only one level", i))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
colnames.order <- c(outcome.field, treatment.field, predictors.next)
|
||||
cat(colnames.order)
|
||||
d <- d[, colnames.order]
|
||||
|
||||
return(invisible(d))
|
||||
}
|
Loading…
Reference in a new issue