1
0
Fork 0
mirror of https://github.com/prise6/aVirtualTwins.git synced 2024-05-02 19:53:09 +02:00
aVirtualTwins/data-raw/sepsis.R

45 lines
1.3 KiB
R
Raw Permalink Normal View History

2015-06-15 00:20:25 +02:00
# Create sepsis data ------------------------------------------------------
# Load some libraries
2015-07-25 02:10:28 +02:00
library(aVirtualTwins)
2015-06-15 00:20:25 +02:00
library(randomForest)
# Sepsis is a csv file available in SIDES example to this address:
2016-10-09 02:45:00 +02:00
# http://biopharmnet.com/subgroup-analysis-software/
2015-06-15 00:20:25 +02:00
# type ?sepsis to see details
# I downloaded zip file and extract the sepsis.csv in data-raw folder.
sepsis.csv <- read.csv(file = "data-raw/sepsis.csv", na.strings = ".")
# Check data
str(sepsis.csv)
# Count NA's
sum(is.na(sepsis.csv))
# No NA's in outcome
sum(is.na(sepsis.csv$survival))
# For futures computation i need to impute missing values
# I use random forest imputation with randomForest package with simple parameters
# I need to make survival field as factor
2015-06-21 02:29:01 +02:00
sepsis.csv$survival <- factor(sepsis.csv$survival, levels = 0:1)
# reproducibility:
set.seed(123)
2015-06-15 00:20:25 +02:00
sepsis.imp <- with(sepsis.csv, rfImpute(y = survival, x = sepsis.csv[, -1], iter = 5, ntree = 500))
str(sepsis.imp)
# Change THERAPY levels in 0/1 and type will be numeric
sepsis.imp$THERAPY <- with(sepsis.csv, ifelse(THERAPY == "control", 0, 1))
# Change survival into numeric values
sepsis.imp$survival <- as.numeric(as.character(sepsis.imp$survival))
# Check data
sepsis <- formatRCTDataset(sepsis.imp, "survival", "THERAPY", T)
# Save file into data folder
save(sepsis, file = "data/sepsis.rdata")
# End.