mirror of
https://github.com/prise6/aVirtualTwins.git
synced 2024-05-22 22:52:12 +02:00
45 lines
1.4 KiB
R
45 lines
1.4 KiB
R
|
|
# Create sepsis data ------------------------------------------------------
|
|
|
|
# Load some libraries
|
|
library(aVirtualTwins)
|
|
library(randomForest)
|
|
|
|
# Sepsis is a csv file available in SIDES example to this address:
|
|
# http://biopharmnet.com/wiki/Software_for_subgroup_identification_and_analysis
|
|
# type ?sepsis to see details
|
|
# I downloaded zip file and extract the sepsis.csv in data-raw folder.
|
|
sepsis.csv <- read.csv(file = "data-raw/sepsis.csv", na.strings = ".")
|
|
|
|
# Check data
|
|
str(sepsis.csv)
|
|
|
|
# Count NA's
|
|
sum(is.na(sepsis.csv))
|
|
# No NA's in outcome
|
|
sum(is.na(sepsis.csv$survival))
|
|
|
|
# For futures computation i need to impute missing values
|
|
# I use random forest imputation with randomForest package with simple parameters
|
|
# I need to make survival field as factor
|
|
sepsis.csv$survival <- factor(sepsis.csv$survival, levels = 0:1)
|
|
# reproducibility:
|
|
set.seed(123)
|
|
sepsis.imp <- with(sepsis.csv, rfImpute(y = survival, x = sepsis.csv[, -1], iter = 5, ntree = 500))
|
|
str(sepsis.imp)
|
|
|
|
# Change THERAPY levels in 0/1 and type will be numeric
|
|
sepsis.imp$THERAPY <- with(sepsis.csv, ifelse(THERAPY == "control", 0, 1))
|
|
|
|
# Change survival into numeric values
|
|
sepsis.imp$survival <- as.numeric(as.character(sepsis.imp$survival))
|
|
|
|
# Check data
|
|
sepsis <- formatRCTDataset(sepsis.imp, "survival", "THERAPY", T)
|
|
|
|
# Save file into data folder
|
|
save(sepsis, file = "data/sepsis.rdata")
|
|
|
|
# End.
|
|
|