| 1 |
#' Time-to-Event Analysis Dataset (ADTTE) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Time-to-Event Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @inheritParams radaette |
|
| 14 |
#' @template param_cached |
|
| 15 |
#' @templateVar data adtte |
|
| 16 |
#' |
|
| 17 |
#' @return `data.frame` |
|
| 18 |
#' @export |
|
| 19 |
#' |
|
| 20 |
#' @examples |
|
| 21 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 22 |
#' |
|
| 23 |
#' adtte <- radtte(adsl, seed = 2) |
|
| 24 |
#' adtte |
|
| 25 |
radtte <- function(adsl, |
|
| 26 |
event_descr = NULL, |
|
| 27 |
censor_descr = NULL, |
|
| 28 |
lookup = NULL, |
|
| 29 |
seed = NULL, |
|
| 30 |
na_percentage = 0, |
|
| 31 |
na_vars = list(CNSR = c(NA, 0.1), AVAL = c(1234, 0.1), AVALU = c(1234, 0.1)), |
|
| 32 |
cached = FALSE) {
|
|
| 33 | 4x |
checkmate::assert_flag(cached) |
| 34 | 4x |
if (cached) {
|
| 35 | 1x |
return(get_cached_data("cadtte"))
|
| 36 |
} |
|
| 37 | ||
| 38 | 3x |
checkmate::assert_data_frame(adsl) |
| 39 | 3x |
checkmate::assert_character(censor_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
| 40 | 3x |
checkmate::assert_character(event_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
| 41 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 42 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 43 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 44 | ||
| 45 | 3x |
if (!is.null(seed)) {
|
| 46 | 3x |
set.seed(seed) |
| 47 |
} |
|
| 48 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 49 | ||
| 50 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 51 | 3x |
lookup_tte <- if (!is.null(lookup)) {
|
| 52 | ! |
lookup |
| 53 |
} else {
|
|
| 54 | 3x |
tibble::tribble( |
| 55 | 3x |
~ARM, ~PARAMCD, ~PARAM, ~LAMBDA, ~CNSR_P, |
| 56 | 3x |
"ARM A", "EFS", "Event Free Survival", log(2) / 365, 0.4, |
| 57 | 3x |
"ARM B", "EFS", "Event Free Survival", log(2) / 305, 0.3, |
| 58 | 3x |
"ARM C", "EFS", "Event Free Survival", log(2) / 243, 0.2, |
| 59 | 3x |
"ARM A", "CRSD", "Duration of Confirmed Response", log(2) / 305, 0.4, |
| 60 | 3x |
"ARM B", "CRSD", "Duration of Confirmed Response", log(2) / 243, 0.3, |
| 61 | 3x |
"ARM C", "CRSD", "Duration of Confirmed Response", log(2) / 182, 0.2, |
| 62 | 3x |
"ARM A", "PFS", "Progression Free Survival", log(2) / 365, 0.4, |
| 63 | 3x |
"ARM B", "PFS", "Progression Free Survival", log(2) / 305, 0.3, |
| 64 | 3x |
"ARM C", "PFS", "Progression Free Survival", log(2) / 243, 0.2, |
| 65 | 3x |
"ARM A", "OS", "Overall Survival", log(2) / 610, 0.4, |
| 66 | 3x |
"ARM B", "OS", "Overall Survival", log(2) / 490, 0.3, |
| 67 | 3x |
"ARM C", "OS", "Overall Survival", log(2) / 365, 0.2, |
| 68 |
) |
|
| 69 |
} |
|
| 70 | ||
| 71 | 3x |
evntdescr_sel <- if (!is.null(event_descr)) {
|
| 72 | ! |
event_descr |
| 73 |
} else {
|
|
| 74 | 3x |
c( |
| 75 | 3x |
"Death", |
| 76 | 3x |
"Disease Progression", |
| 77 | 3x |
"Last Tumor Assessment", |
| 78 | 3x |
"Adverse Event", |
| 79 | 3x |
"Alive" |
| 80 |
) |
|
| 81 |
} |
|
| 82 | ||
| 83 | 3x |
cnsdtdscr_sel <- if (!is.null(censor_descr)) {
|
| 84 | ! |
censor_descr |
| 85 |
} else {
|
|
| 86 | 3x |
c( |
| 87 | 3x |
"Preferred Term", |
| 88 | 3x |
"Clinical Cut Off", |
| 89 | 3x |
"Completion or Discontinuation", |
| 90 | 3x |
"End of AE Reporting Period" |
| 91 |
) |
|
| 92 |
} |
|
| 93 | ||
| 94 | 3x |
adtte <- split(adsl, adsl$USUBJID) %>% |
| 95 | 3x |
lapply(FUN = function(pinfo) {
|
| 96 | 30x |
lookup_tte %>% |
| 97 | 30x |
dplyr::filter(ARM == as.character(pinfo$ACTARMCD)) %>% |
| 98 | 30x |
dplyr::rowwise() %>% |
| 99 | 30x |
dplyr::mutate( |
| 100 | 30x |
STUDYID = pinfo$STUDYID, |
| 101 | 30x |
SITEID = pinfo$SITEID, |
| 102 | 30x |
USUBJID = pinfo$USUBJID, |
| 103 | 30x |
AVALU = "DAYS" |
| 104 |
) %>% |
|
| 105 | 30x |
dplyr::select(-"LAMBDA", -"CNSR_P") |
| 106 |
}) %>% |
|
| 107 | 3x |
Reduce(rbind, .) %>% |
| 108 | 3x |
rcd_var_relabel( |
| 109 | 3x |
STUDYID = "Study Identifier", |
| 110 | 3x |
USUBJID = "Unique Subject Identifier" # ) |
| 111 |
) |
|
| 112 | ||
| 113 |
# Loop through each patient and randomly assign a value for EVNTDESC |
|
| 114 | 3x |
adtte_split <- split(adtte, adtte$USUBJID) |
| 115 | ||
| 116 |
# Add EVNTDESC column |
|
| 117 | 3x |
adtte_lst <- lapply(adtte_split, function(split_df) {
|
| 118 |
# First create an empty EVNTDESC variable to populate |
|
| 119 | 30x |
split_df$EVNTDESC <- NA |
| 120 | 30x |
for (i in 1:nrow(split_df)) { # nolint
|
| 121 |
# If this is the first row then create a random value from evntdescr_sel for EVNTDESC |
|
| 122 | 120x |
if (i == 1) {
|
| 123 | 30x |
split_df$EVNTDESC[i] <- sample(evntdescr_sel[c(1:4)], 1, prob = c(0.1, 0.3, 0.4, 0.2)) |
| 124 | 90x |
} else if (i != 1 & i != nrow(split_df)) {
|
| 125 |
# First check to see if "Death" has been entered in as a previous value |
|
| 126 |
# If so we need to make the rest of the EVNTDESC values "Death" to make sense |
|
| 127 |
# The patient cannot die and then come back to life |
|
| 128 | 60x |
if (any(grepl("Death", split_df$EVNTDESC))) { # If previous value has "Death" the following need to be "Death"
|
| 129 | 21x |
split_df$EVNTDESC[i] <- "Death" |
| 130 | 3x |
} else { # If there are no "Death" values randomly select another value
|
| 131 | 39x |
split_df$EVNTDESC[i] <- sample(evntdescr_sel[c(1:4)], 1) |
| 132 |
} |
|
| 133 | 3x |
} else { # This is for processing OS as this can only be "Death" or "Alive"
|
| 134 | 30x |
if (any(grepl("Death", split_df$EVNTDESC))) { # If previous value has "Death" the following need to be "Death"
|
| 135 | 21x |
split_df$EVNTDESC[i] <- "Death" |
| 136 | 3x |
} else { # If there are no "Death" values randomly select another value
|
| 137 | 9x |
split_df$EVNTDESC[i] <- "Alive" |
| 138 |
} |
|
| 139 |
} |
|
| 140 |
} |
|
| 141 | 30x |
split_df |
| 142 |
}) |
|
| 143 | ||
| 144 |
# Add CNSR column |
|
| 145 | 3x |
adtte_lst <- lapply(adtte_lst, function(split_df) {
|
| 146 |
# First create an empty CNSR variable to populate |
|
| 147 | 30x |
split_df$CNSR <- NA |
| 148 | 30x |
for (i in 1:nrow(split_df)) { # nolint
|
| 149 |
# If this is the first row then create a random value from evntdescr_sel for EVNTDESC |
|
| 150 | 120x |
if (split_df$EVNTDESC[i] == "Death" | split_df$EVNTDESC[i] == "Disease Progression") {
|
| 151 | 81x |
split_df$CNSR[i] <- 0 |
| 152 |
} else {
|
|
| 153 | 39x |
split_df$CNSR[i] <- 1 |
| 154 |
} |
|
| 155 |
} |
|
| 156 | 30x |
split_df |
| 157 |
}) |
|
| 158 | ||
| 159 |
# Add AVAL column |
|
| 160 | 3x |
adtte_lst <- lapply(adtte_lst, function(split_df) {
|
| 161 |
# First create an empty CNSR variable to populate |
|
| 162 | 30x |
split_df$AVAL <- NA |
| 163 | 30x |
for (i in 1:nrow(split_df)) { # nolint
|
| 164 | 120x |
if (i == 1) {
|
| 165 | 30x |
split_df$AVAL[i] <- stats::runif(1, 15, 100) |
| 166 | 90x |
} else if (i != 1 & any(grepl("Death", split_df[1:i - 1, "EVNTDESC"]))) {
|
| 167 |
# Check if there are any death values before the current row |
|
| 168 |
# Set the AVAL to the value of the row that has the "Death" value |
|
| 169 |
# as the patient cannot live longer than this value |
|
| 170 | 42x |
death_position <- match("Death", split_df[1:i - 1, "EVNTDESC"][[1]])
|
| 171 | 42x |
split_df$AVAL[i] <- split_df$AVAL[death_position] |
| 172 | 48x |
} else if (i == 2) {
|
| 173 | 24x |
split_df$AVAL[i] <- stats::runif(1, 100, 200) |
| 174 | 24x |
} else if (i == 3) {
|
| 175 | 15x |
split_df$AVAL[i] <- stats::runif(1, 200, 300) |
| 176 | 9x |
} else if (i == 4) {
|
| 177 | 9x |
split_df$AVAL[i] <- stats::runif(1, 300, 500) |
| 178 |
} |
|
| 179 |
} |
|
| 180 | 30x |
split_df |
| 181 |
}) |
|
| 182 | ||
| 183 |
# Add CNSDTDSC column |
|
| 184 | 3x |
adtte_lst <- lapply(adtte_lst, function(split_df) {
|
| 185 |
# First create an empty CNSDTDSC variable to populate |
|
| 186 | 30x |
split_df$CNSDTDSC <- NA |
| 187 | 30x |
for (i in 1:nrow(split_df)) { # nolint
|
| 188 | 120x |
if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Last Tumor Assessment") {
|
| 189 | 27x |
split_df$CNSDTDSC[i] <- "Completion or Discontinuation" |
| 190 | 93x |
} else if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Adverse Event") {
|
| 191 | 3x |
split_df$CNSDTDSC[i] <- "Preferred Term" |
| 192 | 90x |
} else if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Alive") {
|
| 193 | 9x |
split_df$CNSDTDSC[i] <- "Alive During Study" |
| 194 |
} else {
|
|
| 195 | 81x |
split_df$CNSDTDSC[i] <- "" |
| 196 |
} |
|
| 197 |
} |
|
| 198 | 30x |
split_df |
| 199 |
}) |
|
| 200 | ||
| 201 |
# Take the split df and combine them back together |
|
| 202 | 3x |
adtte <- do.call("rbind", adtte_lst)
|
| 203 | 3x |
rownames(adtte) <- NULL |
| 204 | ||
| 205 | 3x |
adtte <- rcd_var_relabel( |
| 206 | 3x |
adtte, |
| 207 | 3x |
STUDYID = "Study Identifier", |
| 208 | 3x |
USUBJID = "Unique Subject Identifier" |
| 209 |
) |
|
| 210 | ||
| 211 |
# merge ADSL to be able to add TTE date and study day variables |
|
| 212 | 3x |
adtte <- dplyr::inner_join( |
| 213 | 3x |
dplyr::select(adtte, -"SITEID", -"ARM"), |
| 214 | 3x |
adsl, |
| 215 | 3x |
by = c("STUDYID", "USUBJID")
|
| 216 |
) %>% |
|
| 217 | 3x |
dplyr::rowwise() %>% |
| 218 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
| 219 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
| 220 | 3x |
TRUE ~ TRTEDTM |
| 221 |
))) %>% |
|
| 222 | 3x |
dplyr::mutate(ADTM = sample( |
| 223 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
| 224 | 3x |
size = 1 |
| 225 |
)) %>% |
|
| 226 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
| 227 | 3x |
dplyr::select(-TRTENDT) %>% |
| 228 | 3x |
dplyr::ungroup() %>% |
| 229 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
| 230 | ||
| 231 | 3x |
adtte <- adtte %>% |
| 232 | 3x |
dplyr::group_by(USUBJID) %>% |
| 233 | 3x |
dplyr::mutate(TTESEQ = seq_len(dplyr::n())) %>% |
| 234 | 3x |
dplyr::mutate(ASEQ = TTESEQ) %>% |
| 235 | 3x |
dplyr::mutate(PARAM = as.factor(PARAM)) %>% |
| 236 | 3x |
dplyr::mutate(PARAMCD = as.factor(PARAMCD)) %>% |
| 237 | 3x |
dplyr::ungroup() %>% |
| 238 | 3x |
dplyr::arrange( |
| 239 | 3x |
STUDYID, |
| 240 | 3x |
USUBJID, |
| 241 | 3x |
PARAMCD, |
| 242 | 3x |
ADTM, |
| 243 | 3x |
TTESEQ |
| 244 |
) |
|
| 245 | ||
| 246 | 3x |
mod_before_adtte <- adtte |
| 247 | ||
| 248 |
# adding adverse event counts and log follow-up time |
|
| 249 | 3x |
adtte <- dplyr::bind_rows( |
| 250 | 3x |
adtte, |
| 251 | 3x |
data.frame( |
| 252 | 3x |
adtte %>% |
| 253 | 3x |
dplyr::group_by(USUBJID) %>% |
| 254 | 3x |
dplyr::slice_head(n = 1) %>% |
| 255 | 3x |
dplyr::mutate( |
| 256 | 3x |
PARAMCD = "TNE", |
| 257 | 3x |
PARAM = "Total Number of Exacerbations", |
| 258 | 3x |
AVAL = stats::rpois(1, 3), |
| 259 | 3x |
AVALU = "COUNT", |
| 260 | 3x |
lgTMATRSK = log(stats::rexp(1, rate = 3)), |
| 261 | 3x |
dplyr::across( |
| 262 | 3x |
c("ASEQ", "TTESEQ", "ADY", "ADTM", "EVNTDESC"),
|
| 263 | 3x |
~NA |
| 264 |
) |
|
| 265 |
) |
|
| 266 |
) |
|
| 267 |
) %>% |
|
| 268 | 3x |
dplyr::arrange( |
| 269 | 3x |
STUDYID, |
| 270 | 3x |
USUBJID, |
| 271 | 3x |
PARAMCD, |
| 272 | 3x |
ADTM, |
| 273 | 3x |
TTESEQ |
| 274 |
) |
|
| 275 | ||
| 276 | 3x |
mod_after_adtte <- adtte |
| 277 | ||
| 278 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 279 | ! |
adtte <- mutate_na(ds = adtte, na_vars = na_vars, na_percentage = na_percentage) |
| 280 |
} |
|
| 281 | ||
| 282 |
# apply metadata |
|
| 283 | 3x |
adtte <- apply_metadata(adtte, "metadata/ADTTE.yml") |
| 284 | ||
| 285 | 3x |
return(adtte) |
| 286 |
} |
| 1 |
#' ECG Analysis Dataset (ADEG) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating random dataset from ECG Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `EGSEQ`, `ASPID` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param egcat (`character vector`)\cr EG category values. |
|
| 14 |
#' @param max_n_eg (`integer`)\cr Maximum number of EG results per patient. Defaults to 10. |
|
| 15 |
#' @template param_cached |
|
| 16 |
#' @templateVar data adeg |
|
| 17 |
#' |
|
| 18 |
#' @return `data.frame` |
|
| 19 |
#' @export |
|
| 20 |
#' |
|
| 21 |
#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc |
|
| 22 |
#' |
|
| 23 |
#' @examples |
|
| 24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 25 |
#' |
|
| 26 |
#' adeg <- radeg(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
| 27 |
#' adeg |
|
| 28 |
#' |
|
| 29 |
#' adeg <- radeg(adsl, visit_format = "CYCLE", n_assessments = 2L, seed = 2) |
|
| 30 |
#' adeg |
|
| 31 |
radeg <- function(adsl, |
|
| 32 |
egcat = c("INTERVAL", "INTERVAL", "MEASUREMENT", "FINDING"),
|
|
| 33 |
param = c( |
|
| 34 |
"QT Duration", |
|
| 35 |
"RR Duration", |
|
| 36 |
"Heart Rate", |
|
| 37 |
"ECG Interpretation" |
|
| 38 |
), |
|
| 39 |
paramcd = c("QT", "RR", "HR", "ECGINTP"),
|
|
| 40 |
paramu = c("msec", "msec", "beats/min", ""),
|
|
| 41 |
visit_format = "WEEK", |
|
| 42 |
n_assessments = 5L, |
|
| 43 |
n_days = 5L, |
|
| 44 |
max_n_eg = 10L, |
|
| 45 |
lookup = NULL, |
|
| 46 |
seed = NULL, |
|
| 47 |
na_percentage = 0, |
|
| 48 |
na_vars = list( |
|
| 49 |
ABLFL = c(1235, 0.1), BASE = c(NA, 0.1), BASEC = c(NA, 0.1), |
|
| 50 |
CHG = c(1234, 0.1), PCHG = c(1234, 0.1) |
|
| 51 |
), |
|
| 52 |
cached = FALSE) {
|
|
| 53 | 4x |
checkmate::assert_flag(cached) |
| 54 | 4x |
if (cached) {
|
| 55 | 1x |
return(get_cached_data("cadeg"))
|
| 56 |
} |
|
| 57 | ||
| 58 | 3x |
checkmate::assert_data_frame(adsl) |
| 59 | 3x |
checkmate::assert_character(egcat, min.len = 1, any.missing = FALSE) |
| 60 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
| 61 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
| 62 | 3x |
checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE) |
| 63 | 3x |
checkmate::assert_string(visit_format) |
| 64 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
| 65 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
| 66 | 3x |
checkmate::assert_integer(max_n_eg, len = 1, any.missing = FALSE) |
| 67 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 68 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 69 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 70 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 71 | ||
| 72 |
# validate and initialize related variables |
|
| 73 | 3x |
egcat_init_list <- relvar_init(param, egcat) |
| 74 | 3x |
param_init_list <- relvar_init(param, paramcd) |
| 75 | 3x |
unit_init_list <- relvar_init(param, paramu) |
| 76 | ||
| 77 | 3x |
if (!is.null(seed)) {
|
| 78 | 3x |
set.seed(seed) |
| 79 |
} |
|
| 80 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 81 | ||
| 82 | 3x |
adeg <- expand.grid( |
| 83 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 84 | 3x |
USUBJID = adsl$USUBJID, |
| 85 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
| 86 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), |
| 87 | 3x |
stringsAsFactors = FALSE |
| 88 |
) |
|
| 89 | ||
| 90 |
# assign related variable values: PARAMxEGCAT are related |
|
| 91 | 3x |
adeg <- adeg %>% rel_var( |
| 92 | 3x |
var_name = "EGCAT", |
| 93 | 3x |
related_var = "PARAM", |
| 94 | 3x |
var_values = egcat_init_list$relvar2 |
| 95 |
) |
|
| 96 | ||
| 97 |
# assign related variable values: PARAMxPARAMCD are related |
|
| 98 | 3x |
adeg <- adeg %>% rel_var( |
| 99 | 3x |
var_name = "PARAMCD", |
| 100 | 3x |
related_var = "PARAM", |
| 101 | 3x |
var_values = param_init_list$relvar2 |
| 102 |
) |
|
| 103 | ||
| 104 | 3x |
adeg <- adeg %>% dplyr::mutate(AVAL = dplyr::case_when( |
| 105 | 3x |
PARAMCD == "QT" ~ stats::rnorm(nrow(adeg), mean = 350, sd = 100), |
| 106 | 3x |
PARAMCD == "RR" ~ stats::rnorm(nrow(adeg), mean = 1050, sd = 300), |
| 107 | 3x |
PARAMCD == "HR" ~ stats::rnorm(nrow(adeg), mean = 70, sd = 20), |
| 108 | 3x |
PARAMCD == "ECGINTP" ~ NA_real_ |
| 109 |
)) |
|
| 110 | ||
| 111 | 3x |
adeg <- adeg %>% |
| 112 | 3x |
dplyr::mutate(EGTESTCD = PARAMCD) %>% |
| 113 | 3x |
dplyr::mutate(EGTEST = PARAM) |
| 114 | ||
| 115 | 3x |
adeg <- adeg %>% dplyr::mutate(AVISITN = dplyr::case_when( |
| 116 | 3x |
AVISIT == "SCREENING" ~ -1, |
| 117 | 3x |
AVISIT == "BASELINE" ~ 0, |
| 118 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
|
| 119 | 3x |
TRUE ~ NA_real_ |
| 120 |
)) |
|
| 121 | ||
| 122 | 3x |
adeg <- adeg %>% rel_var( |
| 123 | 3x |
var_name = "AVALU", |
| 124 | 3x |
related_var = "PARAM", |
| 125 | 3x |
var_values = unit_init_list$relvar2 |
| 126 |
) |
|
| 127 | ||
| 128 |
# order to prepare for change from screening and baseline values |
|
| 129 | 3x |
adeg <- adeg[order(adeg$STUDYID, adeg$USUBJID, adeg$PARAMCD, adeg$AVISITN), ] |
| 130 | ||
| 131 | 3x |
adeg <- Reduce(rbind, lapply(split(adeg, adeg$USUBJID), function(x) {
|
| 132 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
| 133 | 30x |
x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
| 134 | 30x |
"Y", |
| 135 | 30x |
ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "") |
| 136 |
) |
|
| 137 | 30x |
x |
| 138 |
})) |
|
| 139 | ||
| 140 | 3x |
adeg$BASE <- ifelse(adeg$AVISITN >= 0, retain(adeg, adeg$AVAL, adeg$ABLFL == "Y"), adeg$AVAL) |
| 141 | ||
| 142 | 3x |
adeg <- adeg %>% dplyr::mutate(ANRLO = dplyr::case_when( |
| 143 | 3x |
PARAMCD == "QT" ~ 200, |
| 144 | 3x |
PARAMCD == "RR" ~ 600, |
| 145 | 3x |
PARAMCD == "HR" ~ 40, |
| 146 | 3x |
PARAMCD == "ECGINTP" ~ NA_real_ |
| 147 |
)) |
|
| 148 | ||
| 149 | 3x |
adeg <- adeg %>% dplyr::mutate(ANRHI = dplyr::case_when( |
| 150 | 3x |
PARAMCD == "QT" ~ 500, |
| 151 | 3x |
PARAMCD == "RR" ~ 1500, |
| 152 | 3x |
PARAMCD == "HR" ~ 100, |
| 153 | 3x |
PARAMCD == "ECGINTP" ~ NA_real_ |
| 154 |
)) |
|
| 155 | ||
| 156 | 3x |
adeg <- adeg %>% dplyr::mutate(ANRIND = factor(dplyr::case_when( |
| 157 | 3x |
AVAL < ANRLO ~ "LOW", |
| 158 | 3x |
AVAL >= ANRLO & AVAL <= ANRHI ~ "NORMAL", |
| 159 | 3x |
AVAL > ANRHI ~ "HIGH" |
| 160 |
))) |
|
| 161 | ||
| 162 | 3x |
adeg <- adeg %>% |
| 163 | 3x |
dplyr::mutate(CHG = ifelse(AVISITN > 0, AVAL - BASE, NA)) %>% |
| 164 | 3x |
dplyr::mutate(PCHG = ifelse(AVISITN > 0, 100 * (CHG / BASE), NA)) %>% |
| 165 | 3x |
dplyr::mutate(BASETYPE = "LAST") %>% |
| 166 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
| 167 | 3x |
dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>% |
| 168 | 3x |
dplyr::ungroup() %>% |
| 169 | 3x |
dplyr::mutate(ATPTN = 1) %>% |
| 170 | 3x |
dplyr::mutate(DTYPE = NA) |
| 171 | ||
| 172 | 3x |
adeg$ANRIND <- factor(adeg$ANRIND, levels = c("LOW", "NORMAL", "HIGH"))
|
| 173 | 3x |
adeg$BNRIND <- factor(adeg$BNRIND, levels = c("LOW", "NORMAL", "HIGH"))
|
| 174 | ||
| 175 | 3x |
adeg <- rcd_var_relabel( |
| 176 | 3x |
adeg, |
| 177 | 3x |
STUDYID = "Study Identifier", |
| 178 | 3x |
USUBJID = "Unique Subject Identifier" |
| 179 |
) |
|
| 180 | ||
| 181 |
# merge ADSL to be able to add EG date and study day variables |
|
| 182 | 3x |
adeg <- dplyr::inner_join( |
| 183 | 3x |
adeg, |
| 184 | 3x |
adsl, |
| 185 | 3x |
by = c("STUDYID", "USUBJID")
|
| 186 |
) %>% |
|
| 187 | 3x |
dplyr::rowwise() %>% |
| 188 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
| 189 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
| 190 | 3x |
TRUE ~ TRTEDTM |
| 191 |
))) %>% |
|
| 192 | 3x |
dplyr::ungroup() |
| 193 | ||
| 194 | 3x |
adeg <- adeg %>% |
| 195 | 3x |
dplyr::group_by(USUBJID) %>% |
| 196 | 3x |
dplyr::arrange(USUBJID, AVISITN) %>% |
| 197 | 3x |
dplyr::mutate(ADTM = rep( |
| 198 | 3x |
sort(sample( |
| 199 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
| 200 | 3x |
size = nlevels(AVISIT) |
| 201 |
)), |
|
| 202 | 3x |
each = n() / nlevels(AVISIT) |
| 203 |
)) %>% |
|
| 204 | 3x |
dplyr::ungroup() %>% |
| 205 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
| 206 | 3x |
dplyr::select(-TRTENDT) %>% |
| 207 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
| 208 | ||
| 209 | 3x |
adeg <- adeg %>% |
| 210 | 3x |
dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>% |
| 211 | 3x |
dplyr::group_by(USUBJID) %>% |
| 212 | 3x |
dplyr::mutate(EGSEQ = seq_len(dplyr::n())) %>% |
| 213 | 3x |
dplyr::mutate(ASEQ = EGSEQ) %>% |
| 214 | 3x |
dplyr::ungroup() %>% |
| 215 | 3x |
dplyr::arrange( |
| 216 | 3x |
STUDYID, |
| 217 | 3x |
USUBJID, |
| 218 | 3x |
PARAMCD, |
| 219 | 3x |
BASETYPE, |
| 220 | 3x |
AVISITN, |
| 221 | 3x |
ATPTN, |
| 222 | 3x |
DTYPE, |
| 223 | 3x |
ADTM, |
| 224 | 3x |
EGSEQ, |
| 225 | 3x |
ASPID |
| 226 |
) |
|
| 227 | ||
| 228 | 3x |
adeg <- adeg %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when( |
| 229 | 3x |
!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y",
|
| 230 | 3x |
TRUE ~ "" |
| 231 |
))) |
|
| 232 | ||
| 233 | 3x |
adeg <- adeg %>% dplyr::mutate(AVALC = ifelse( |
| 234 | 3x |
PARAMCD == "ECGINTP", |
| 235 | 3x |
as.character(sample_fct(c("ABNORMAL", "NORMAL"), nrow(adeg), prob = c(0.25, 0.75))),
|
| 236 | 3x |
as.character(AVAL) |
| 237 |
)) |
|
| 238 | ||
| 239 |
# Temporarily creating a row_check column to easily match newly created |
|
| 240 |
# observations with their row correct arrangement. |
|
| 241 | 3x |
adeg <- adeg %>% |
| 242 | 3x |
dplyr::mutate(row_check = seq_len(nrow(adeg))) |
| 243 | ||
| 244 |
# Created function to add in new observations for DTYPE, "MINIMUM" & "MAXIMUM" in this case. |
|
| 245 | 3x |
get_groups <- function(data, |
| 246 | 3x |
minimum) {
|
| 247 | 6x |
data <- data %>% |
| 248 | 6x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
| 249 | 6x |
dplyr::arrange(ADTM, ASPID, EGSEQ) %>% |
| 250 | 6x |
dplyr::filter( |
| 251 | 6x |
(AVISIT != "BASELINE" & AVISIT != "SCREENING") & |
| 252 | 6x |
(ONTRTFL == "Y" | ADTM <= TRTSDTM) |
| 253 |
) %>% |
|
| 254 |
{
|
|
| 255 | 6x |
if (minimum == TRUE) {
|
| 256 | 3x |
dplyr::filter(., AVAL == min(AVAL)) %>% |
| 257 | 3x |
dplyr::mutate(., DTYPE = "MINIMUM", AVISIT = "POST-BASELINE MINIMUM") |
| 258 |
} else {
|
|
| 259 | 3x |
dplyr::filter(., AVAL == max(AVAL)) %>% |
| 260 | 3x |
dplyr::mutate(., DTYPE = "MAXIMUM", AVISIT = "POST-BASELINE MAXIMUM") |
| 261 |
} |
|
| 262 |
} %>% |
|
| 263 | 6x |
dplyr::slice(1) %>% |
| 264 | 6x |
dplyr::ungroup() |
| 265 | ||
| 266 | 6x |
return(data) |
| 267 |
} |
|
| 268 | ||
| 269 |
# Binding the new observations to the dataset from the function above and rearranging in the correct order. |
|
| 270 | 3x |
adeg <- rbind(adeg, get_groups(adeg, TRUE), get_groups(adeg, FALSE)) %>% |
| 271 | 3x |
dplyr::arrange(row_check) %>% |
| 272 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
| 273 | 3x |
dplyr::arrange(AVISIT, .by_group = TRUE) %>% |
| 274 | 3x |
dplyr::ungroup() |
| 275 | ||
| 276 |
# Dropping the "row_check" column created above. |
|
| 277 | 3x |
adeg <- adeg[, -which(names(adeg) %in% c("row_check"))]
|
| 278 | ||
| 279 |
# Created function to easily match rows which comply to ONTRTFL derivation |
|
| 280 | 3x |
flag_variables <- function(data, worst_obs) {
|
| 281 | 6x |
data_compare <- data %>% |
| 282 | 6x |
dplyr::mutate(row_check = seq_len(nrow(data))) |
| 283 | ||
| 284 | 6x |
data <- data_compare %>% |
| 285 |
{
|
|
| 286 | 6x |
if (worst_obs == FALSE) {
|
| 287 | 3x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE, AVISIT) %>% |
| 288 | 3x |
dplyr::arrange(., ADTM, ASPID, EGSEQ) |
| 289 |
} else {
|
|
| 290 | 3x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE) |
| 291 |
} |
|
| 292 |
} %>% |
|
| 293 | 6x |
dplyr::filter( |
| 294 | 6x |
AVISITN > 0 & (ONTRTFL == "Y" | ADTM <= TRTSDTM) & |
| 295 | 6x |
is.na(DTYPE) |
| 296 |
) %>% |
|
| 297 |
{
|
|
| 298 | 6x |
if (worst_obs == TRUE) {
|
| 299 | 3x |
dplyr::arrange(., AVALC) %>% dplyr::filter(., ifelse( |
| 300 | 3x |
PARAMCD == "ECGINTP", |
| 301 | 3x |
ifelse(AVALC == "ABNORMAL", AVALC == "ABNORMAL", AVALC == "NORMAL"), |
| 302 | 3x |
AVAL == min(AVAL) |
| 303 |
)) |
|
| 304 |
} else {
|
|
| 305 | 3x |
dplyr::filter(., ifelse( |
| 306 | 3x |
PARAMCD == "ECGINTP", |
| 307 | 3x |
AVALC == "ABNORMAL" | AVALC == "NORMAL", |
| 308 | 3x |
AVAL == min(AVAL) |
| 309 |
)) |
|
| 310 |
} |
|
| 311 |
} %>% |
|
| 312 | 6x |
dplyr::slice(1) %>% |
| 313 |
{
|
|
| 314 | 6x |
if (worst_obs == TRUE) {
|
| 315 | 3x |
dplyr::mutate(., new_var = dplyr::case_when( |
| 316 | 3x |
(AVALC == "ABNORMAL" | AVALC == "NORMAL") ~ "Y", |
| 317 | 3x |
(!is.na(AVAL) & is.na(DTYPE)) ~ "Y", |
| 318 | 3x |
TRUE ~ "" |
| 319 |
)) |
|
| 320 |
} else {
|
|
| 321 | 3x |
dplyr::mutate(., new_var = dplyr::case_when( |
| 322 | 3x |
(AVALC == "ABNORMAL" | AVALC == "NORMAL") ~ "Y", |
| 323 | 3x |
(!is.na(AVAL) & is.na(DTYPE)) ~ "Y", |
| 324 | 3x |
TRUE ~ "" |
| 325 |
)) |
|
| 326 |
} |
|
| 327 |
} %>% |
|
| 328 | 6x |
dplyr::ungroup() |
| 329 | ||
| 330 | 6x |
data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "") |
| 331 | 6x |
data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))]
|
| 332 | ||
| 333 | 6x |
return(data_compare) |
| 334 |
} |
|
| 335 | ||
| 336 | 3x |
adeg <- flag_variables(adeg, FALSE) %>% dplyr::rename(WORS01FL = "new_var") |
| 337 | 3x |
adeg <- flag_variables(adeg, TRUE) %>% dplyr::rename(WORS02FL = "new_var") |
| 338 | ||
| 339 | 3x |
adeg <- adeg %>% dplyr::mutate(ANL01FL = factor(ifelse( |
| 340 | 3x |
(ABLFL == "Y" | (is.na(DTYPE) & WORS01FL == "Y")) & |
| 341 | 3x |
(AVISIT != "SCREENING"), |
| 342 | 3x |
"Y", |
| 343 |
"" |
|
| 344 |
))) |
|
| 345 | ||
| 346 | 3x |
adeg <- adeg %>% |
| 347 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
| 348 | 3x |
dplyr::mutate(BASEC = ifelse( |
| 349 | 3x |
PARAMCD == "ECGINTP", |
| 350 | 3x |
AVALC[AVISIT == "BASELINE"], |
| 351 | 3x |
as.character(BASE) |
| 352 |
)) %>% |
|
| 353 | 3x |
dplyr::mutate(ANL03FL = dplyr::case_when( |
| 354 | 3x |
DTYPE == "MINIMUM" ~ "Y", |
| 355 | 3x |
ABLFL == "Y" & PARAMCD != "ECGINTP" ~ "Y", |
| 356 | 3x |
TRUE ~ "" |
| 357 |
)) %>% |
|
| 358 | 3x |
dplyr::mutate(ANL04FL = dplyr::case_when( |
| 359 | 3x |
DTYPE == "MAXIMUM" ~ "Y", |
| 360 | 3x |
ABLFL == "Y" & PARAMCD != "ECGINTP" ~ "Y", |
| 361 | 3x |
TRUE ~ "" |
| 362 |
)) %>% |
|
| 363 | 3x |
dplyr::ungroup() |
| 364 | ||
| 365 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 366 | ! |
adeg <- mutate_na(ds = adeg, na_vars = na_vars, na_percentage = na_percentage) |
| 367 |
} |
|
| 368 | ||
| 369 |
# apply metadata |
|
| 370 | 3x |
adeg <- apply_metadata(adeg, "metadata/ADEG.yml") |
| 371 | ||
| 372 | 3x |
return(adeg) |
| 373 |
} |
| 1 |
#' Time to Adverse Event Analysis Dataset (ADAETTE) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function to generate random Time-to-AE Dataset for a |
|
| 6 |
#' given Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param event_descr (`character vector`)\cr Descriptions of events. Defaults to `NULL`. |
|
| 14 |
#' @param censor_descr (`character vector`)\cr Descriptions of censors. Defaults to `NULL`. |
|
| 15 |
#' @template param_cached |
|
| 16 |
#' @templateVar data adaette |
|
| 17 |
#' |
|
| 18 |
#' @return `data.frame` |
|
| 19 |
#' @export |
|
| 20 |
#' |
|
| 21 |
#' @author Xiuting Mi |
|
| 22 |
#' |
|
| 23 |
#' @examples |
|
| 24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 25 |
#' |
|
| 26 |
#' adaette <- radaette(adsl, seed = 2) |
|
| 27 |
#' adaette |
|
| 28 |
radaette <- function(adsl, |
|
| 29 |
event_descr = NULL, |
|
| 30 |
censor_descr = NULL, |
|
| 31 |
lookup = NULL, |
|
| 32 |
seed = NULL, |
|
| 33 |
na_percentage = 0, |
|
| 34 |
na_vars = list(CNSR = c(NA, 0.1), AVAL = c(1234, 0.1)), |
|
| 35 |
cached = FALSE) {
|
|
| 36 | 6x |
checkmate::assert_flag(cached) |
| 37 | 6x |
if (cached) {
|
| 38 | 1x |
return(get_cached_data("cadaette"))
|
| 39 |
} |
|
| 40 | ||
| 41 | 5x |
checkmate::assert_data_frame(adsl) |
| 42 | 5x |
checkmate::assert_character(censor_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
| 43 | 5x |
checkmate::assert_character(event_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE) |
| 44 | 5x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 45 | 5x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 46 | 5x |
checkmate::assert_true(na_percentage < 1) |
| 47 | ||
| 48 | 5x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 49 | 5x |
lookup_adaette <- if (!is.null(lookup)) {
|
| 50 | ! |
lookup |
| 51 |
} else {
|
|
| 52 | 5x |
tibble::tribble( |
| 53 | 5x |
~ARM, ~CATCD, ~CAT, ~LAMBDA, ~CNSR_P, |
| 54 | 5x |
"ARM A", "1", "any adverse event", 1 / 80, 0.4, |
| 55 | 5x |
"ARM B", "1", "any adverse event", 1 / 100, 0.2, |
| 56 | 5x |
"ARM C", "1", "any adverse event", 1 / 60, 0.42, |
| 57 | 5x |
"ARM A", "2", "any serious adverse event", 1 / 100, 0.3, |
| 58 | 5x |
"ARM B", "2", "any serious adverse event", 1 / 150, 0.1, |
| 59 | 5x |
"ARM C", "2", "any serious adverse event", 1 / 80, 0.32, |
| 60 | 5x |
"ARM A", "3", "a grade 3-5 adverse event", 1 / 80, 0.2, |
| 61 | 5x |
"ARM B", "3", "a grade 3-5 adverse event", 1 / 100, 0.08, |
| 62 | 5x |
"ARM C", "3", "a grade 3-5 adverse event", 1 / 60, 0.23 |
| 63 |
) |
|
| 64 |
} |
|
| 65 | ||
| 66 | 5x |
if (!is.null(seed)) {
|
| 67 | 5x |
set.seed(seed) |
| 68 |
} |
|
| 69 | 5x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 70 | ||
| 71 | 5x |
evntdescr_sel <- if (!is.null(event_descr)) {
|
| 72 | ! |
event_descr |
| 73 |
} else {
|
|
| 74 | 5x |
"Preferred Term" |
| 75 |
} |
|
| 76 | ||
| 77 | 5x |
cnsdtdscr_sel <- if (!is.null(censor_descr)) {
|
| 78 | ! |
censor_descr |
| 79 |
} else {
|
|
| 80 | 5x |
c( |
| 81 | 5x |
"Clinical Cut Off", |
| 82 | 5x |
"Completion or Discontinuation", |
| 83 | 5x |
"End of AE Reporting Period" |
| 84 |
) |
|
| 85 |
} |
|
| 86 | ||
| 87 | 5x |
random_patient_data <- function(patient_info) {
|
| 88 | 50x |
startdt <- lubridate::date(patient_info$TRTSDTM) |
| 89 | 50x |
trtedtm <- lubridate::floor_date(dplyr::case_when( |
| 90 | 50x |
is.na(patient_info$TRTEDTM) ~ lubridate::date(patient_info$TRTSDTM) + study_duration_secs, |
| 91 | 50x |
TRUE ~ lubridate::date(patient_info$TRTEDTM) |
| 92 | 50x |
), unit = "day") |
| 93 | 50x |
enddts <- c(patient_info$EOSDT, lubridate::date(trtedtm)) |
| 94 | 50x |
enddts_min_index <- which.min(enddts) |
| 95 | 50x |
adt <- enddts[enddts_min_index] |
| 96 | 50x |
adtm <- lubridate::as_datetime(adt) |
| 97 | 50x |
ady <- as.numeric(adt - startdt + 1) |
| 98 | 50x |
data.frame( |
| 99 | 50x |
ARM = patient_info$ARM, |
| 100 | 50x |
STUDYID = patient_info$STUDYID, |
| 101 | 50x |
SITEID = patient_info$SITEID, |
| 102 | 50x |
USUBJID = patient_info$USUBJID, |
| 103 | 50x |
PARAMCD = "AEREPTTE", |
| 104 | 50x |
PARAM = "Time to end of AE reporting period", |
| 105 | 50x |
CNSR = 0, |
| 106 | 50x |
AVAL = lubridate::days(ady) / lubridate::years(1), |
| 107 | 50x |
AVALU = "YEARS", |
| 108 | 50x |
EVNTDESC = ifelse(enddts_min_index == 1, "Completion or Discontinuation", "End of AE Reporting Period"), |
| 109 | 50x |
CNSDTDSC = NA, |
| 110 | 50x |
ADTM = adtm, |
| 111 | 50x |
ADY = ady, |
| 112 | 50x |
stringsAsFactors = FALSE |
| 113 |
) |
|
| 114 |
} |
|
| 115 | ||
| 116 |
# validate and initialize related variables for Hy's law |
|
| 117 | 5x |
paramcd_hy <- c("HYSTTEUL", "HYSTTEBL")
|
| 118 | 5x |
param_hy <- c("Time to Hy's Law Elevation in relation to ULN", "Time to Hy's Law Elevation in relation to Baseline")
|
| 119 | 5x |
param_init_list <- relvar_init(param_hy, paramcd_hy) |
| 120 | 5x |
adsl_hy <- dplyr::select(adsl, "STUDYID", "USUBJID", "TRTSDTM", "SITEID", "ARM") |
| 121 | ||
| 122 |
# create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT |
|
| 123 | 5x |
adaette_hy <- expand.grid( |
| 124 | 5x |
STUDYID = unique(adsl$STUDYID), |
| 125 | 5x |
USUBJID = adsl$USUBJID, |
| 126 | 5x |
PARAM = as.factor(param_init_list$relvar1), |
| 127 | 5x |
stringsAsFactors = FALSE |
| 128 |
) |
|
| 129 | ||
| 130 |
# Add other variables to adaette_hy |
|
| 131 | 5x |
adaette_hy <- dplyr::left_join(adaette_hy, adsl_hy, by = c("STUDYID", "USUBJID")) %>%
|
| 132 | 5x |
rel_var( |
| 133 | 5x |
var_name = "PARAMCD", |
| 134 | 5x |
related_var = "PARAM", |
| 135 | 5x |
var_values = param_init_list$relvar2 |
| 136 |
) %>% |
|
| 137 | 5x |
dplyr::mutate( |
| 138 | 5x |
CNSR = sample(c(0, 1), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE), |
| 139 | 5x |
EVNTDESC = dplyr::if_else( |
| 140 | 5x |
CNSR == 0, |
| 141 | 5x |
"First Post-Baseline Raised ALT or AST Elevation Result", |
| 142 | 5x |
NA_character_ |
| 143 |
), |
|
| 144 | 5x |
CNSDTDSC = dplyr::if_else(CNSR == 0, NA_character_, |
| 145 | 5x |
sample(c("Last Post-Baseline ALT or AST Result", "Treatment Start"),
|
| 146 | 5x |
prob = c(0.9, 0.1), |
| 147 | 5x |
size = dplyr::n(), replace = TRUE |
| 148 |
) |
|
| 149 |
) |
|
| 150 |
) %>% |
|
| 151 | 5x |
dplyr::rowwise() %>% |
| 152 | 5x |
dplyr::mutate(ADTM = dplyr::case_when( |
| 153 | 5x |
CNSDTDSC == "Treatment Start" ~ TRTSDTM, |
| 154 | 5x |
TRUE ~ TRTSDTM + sample(seq(0, study_duration_secs), size = dplyr::n(), replace = TRUE) |
| 155 |
)) %>% |
|
| 156 | 5x |
dplyr::mutate( |
| 157 | 5x |
ADY_int = lubridate::date(ADTM) - lubridate::date(TRTSDTM) + 1, |
| 158 | 5x |
ADY = as.numeric(ADY_int), |
| 159 | 5x |
AVAL = lubridate::days(ADY_int) / lubridate::weeks(1), |
| 160 | 5x |
AVALU = "WEEKS" |
| 161 |
) %>% |
|
| 162 | 5x |
dplyr::select(-TRTSDTM, -ADY_int) |
| 163 | ||
| 164 | 5x |
random_ae_data <- function(lookup_info, patient_info, patient_data) {
|
| 165 | 150x |
cnsr <- sample(c(0, 1), 1, prob = c(1 - lookup_info$CNSR_P, lookup_info$CNSR_P)) |
| 166 | 150x |
ae_rep_tte <- patient_data$AVAL[patient_data$PARAMCD == "AEREPTTE"] |
| 167 | 150x |
data.frame( |
| 168 | 150x |
ARM = rep(patient_data$ARM, 2), |
| 169 | 150x |
STUDYID = rep(patient_data$STUDYID, 2), |
| 170 | 150x |
SITEID = rep(patient_data$SITEID, 2), |
| 171 | 150x |
USUBJID = rep(patient_data$USUBJID, 2), |
| 172 | 150x |
PARAMCD = c( |
| 173 | 150x |
paste0("AETTE", lookup_info$CATCD),
|
| 174 | 150x |
paste0("AETOT", lookup_info$CATCD)
|
| 175 |
), |
|
| 176 | 150x |
PARAM = c( |
| 177 | 150x |
paste("Time to first occurrence of", lookup_info$CAT),
|
| 178 | 150x |
paste("Number of occurrences of", lookup_info$CAT)
|
| 179 |
), |
|
| 180 | 150x |
CNSR = c( |
| 181 | 150x |
cnsr, |
| 182 | 150x |
NA |
| 183 |
), |
|
| 184 | 150x |
AVAL = c( |
| 185 |
# We generate these values conditional on the censoring information. |
|
| 186 |
# If this time to event is censored, then there were no AEs reported and the time is set |
|
| 187 |
# to the AE reporting period time. Otherwise we draw from truncated distributions to make |
|
| 188 |
# sure that we are within the AE reporting time and above 0 AEs. |
|
| 189 | 150x |
ifelse(cnsr == 1, ae_rep_tte, rtexp(1, lookup_info$LAMBDA * 365.25, r = ae_rep_tte)), |
| 190 | 150x |
ifelse(cnsr == 1, 0, rtpois(1, lookup_info$LAMBDA * 365.25)) |
| 191 |
), |
|
| 192 | 150x |
AVALU = c( |
| 193 | 150x |
"YEARS", |
| 194 | 150x |
NA |
| 195 |
), |
|
| 196 | 150x |
EVNTDESC = c( |
| 197 | 150x |
ifelse(cnsr == 0, sample(evntdescr_sel, 1), ""), |
| 198 | 150x |
NA |
| 199 |
), |
|
| 200 | 150x |
CNSDTDSC = c( |
| 201 | 150x |
ifelse(cnsr == 1, sample(cnsdtdscr_sel, 1), ""), |
| 202 | 150x |
NA |
| 203 |
), |
|
| 204 | 150x |
stringsAsFactors = FALSE |
| 205 | 150x |
) %>% dplyr::mutate( |
| 206 | 150x |
ADY = dplyr::if_else(is.na(AVALU), NA_real_, ceiling(as.numeric(lubridate::dyears(AVAL), "days"))), |
| 207 | 150x |
ADTM = dplyr::if_else( |
| 208 | 150x |
is.na(AVALU), |
| 209 | 150x |
lubridate::as_datetime(NA), |
| 210 | 150x |
patient_info$TRTSDTM + lubridate::days(ADY) |
| 211 |
) |
|
| 212 |
) |
|
| 213 |
} |
|
| 214 | ||
| 215 | 5x |
adaette <- split(adsl, adsl$USUBJID) %>% |
| 216 | 5x |
lapply(function(patient_info) {
|
| 217 | 50x |
patient_data <- random_patient_data(patient_info) |
| 218 | 50x |
lookup_arm <- lookup_adaette %>% |
| 219 | 50x |
dplyr::filter(ARM == as.character(patient_info$ARMCD)) |
| 220 | 50x |
ae_data <- split(lookup_arm, lookup_arm$CATCD) %>% |
| 221 | 50x |
lapply(random_ae_data, patient_data = patient_data, patient_info = patient_info) %>% |
| 222 | 50x |
Reduce(rbind, .) |
| 223 | 50x |
dplyr::bind_rows(patient_data, ae_data) |
| 224 |
}) %>% |
|
| 225 | 5x |
Reduce(rbind, .) %>% |
| 226 | 5x |
rcd_var_relabel( |
| 227 | 5x |
STUDYID = "Study Identifier", |
| 228 | 5x |
USUBJID = "Unique Subject Identifier" |
| 229 |
) |
|
| 230 | ||
| 231 | 5x |
adaette <- rcd_var_relabel( |
| 232 | 5x |
adaette, |
| 233 | 5x |
STUDYID = "Study Identifier", |
| 234 | 5x |
USUBJID = "Unique Subject Identifier" |
| 235 |
) |
|
| 236 | ||
| 237 | 5x |
adaette <- rbind(adaette, adaette_hy) |
| 238 | ||
| 239 | 5x |
adaette <- dplyr::inner_join( |
| 240 | 5x |
dplyr::select(adaette, -"SITEID", -"ARM"), |
| 241 | 5x |
adsl, |
| 242 | 5x |
by = c("STUDYID", "USUBJID")
|
| 243 |
) %>% |
|
| 244 | 5x |
dplyr::group_by(USUBJID) %>% |
| 245 | 5x |
dplyr::arrange(ADTM) %>% |
| 246 | 5x |
dplyr::mutate(TTESEQ = seq_len(dplyr::n())) %>% |
| 247 | 5x |
dplyr::mutate(ASEQ = TTESEQ) %>% |
| 248 | 5x |
dplyr::mutate(PARAM = as.factor(PARAM)) %>% |
| 249 | 5x |
dplyr::mutate(PARAMCD = as.factor(PARAMCD)) %>% |
| 250 | 5x |
dplyr::ungroup() %>% |
| 251 | 5x |
dplyr::arrange( |
| 252 | 5x |
STUDYID, |
| 253 | 5x |
USUBJID, |
| 254 | 5x |
PARAMCD, |
| 255 | 5x |
ADTM, |
| 256 | 5x |
TTESEQ |
| 257 |
) |
|
| 258 | ||
| 259 | 5x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 260 | ! |
adaette <- dplyr::mutate(ds = adaette, na_vars = na_vars, na_percentage = na_percentage) |
| 261 |
} |
|
| 262 | ||
| 263 |
# apply metadata |
|
| 264 | 5x |
adaette <- apply_metadata(adaette, "metadata/ADAETTE.yml") |
| 265 | ||
| 266 | 5x |
return(adaette) |
| 267 |
} |
| 1 |
#' Hy's Law Analysis Dataset (ADHY) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Hy's Law Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADTM`, `SRCSEQ` |
|
| 11 |
# |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @template param_cached |
|
| 14 |
#' @templateVar data adhy |
|
| 15 |
#' |
|
| 16 |
#' @return `data.frame` |
|
| 17 |
#' @export |
|
| 18 |
#' |
|
| 19 |
#' @author wojciakw |
|
| 20 |
#' |
|
| 21 |
#' @examples |
|
| 22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 23 |
#' |
|
| 24 |
#' adhy <- radhy(adsl, seed = 2) |
|
| 25 |
#' adhy |
|
| 26 |
radhy <- function(adsl, |
|
| 27 |
param = c( |
|
| 28 |
"TBILI <= 2 times ULN and ALT value category", |
|
| 29 |
"TBILI > 2 times ULN and AST value category", |
|
| 30 |
"TBILI > 2 times ULN and ALT value category", |
|
| 31 |
"TBILI <= 2 times ULN and AST value category", |
|
| 32 |
"TBILI > 2 times ULN and ALKPH <= 2 times ULN and ALT value category", |
|
| 33 |
"TBILI > 2 times ULN and ALKPH <= 2 times ULN and AST value category", |
|
| 34 |
"TBILI > 2 times ULN and ALKPH <= 5 times ULN and ALT value category", |
|
| 35 |
"TBILI > 2 times ULN and ALKPH <= 5 times ULN and AST value category", |
|
| 36 |
"TBILI <= 2 times ULN and two consecutive elevations of ALT in relation to ULN", |
|
| 37 |
"TBILI > 2 times ULN and two consecutive elevations of AST in relation to ULN", |
|
| 38 |
"TBILI <= 2 times ULN and two consecutive elevations of AST in relation to ULN", |
|
| 39 |
"TBILI > 2 times ULN and two consecutive elevations of ALT in relation to ULN", |
|
| 40 |
"TBILI > 2 times ULN and two consecutive elevations of ALT in relation to Baseline", |
|
| 41 |
"TBILI <= 2 times ULN and two consecutive elevations of ALT in relation to Baseline", |
|
| 42 |
"TBILI > 2 times ULN and two consecutive elevations of AST in relation to Baseline", |
|
| 43 |
"TBILI <= 2 times ULN and two consecutive elevations of AST in relation to Baseline", |
|
| 44 |
"ALT > 3 times ULN by Period", |
|
| 45 |
"AST > 3 times ULN by Period", |
|
| 46 |
"ALT or AST > 3 times ULN by Period", |
|
| 47 |
"ALT > 3 times Baseline by Period", |
|
| 48 |
"AST > 3 times Baseline by Period", |
|
| 49 |
"ALT or AST > 3 times Baseline by Period" |
|
| 50 |
), |
|
| 51 |
paramcd = c( |
|
| 52 |
"BLAL", |
|
| 53 |
"BGAS", |
|
| 54 |
"BGAL", |
|
| 55 |
"BLAS", |
|
| 56 |
"BA2AL", |
|
| 57 |
"BA2AS", |
|
| 58 |
"BA5AL", |
|
| 59 |
"BA5AS", |
|
| 60 |
"BL2AL2CU", |
|
| 61 |
"BG2AS2CU", |
|
| 62 |
"BL2AS2CU", |
|
| 63 |
"BG2AL2CU", |
|
| 64 |
"BG2AL2CB", |
|
| 65 |
"BL2AL2CB", |
|
| 66 |
"BG2AS2CB", |
|
| 67 |
"BL2AS2CB", |
|
| 68 |
"ALTPULN", |
|
| 69 |
"ASTPULN", |
|
| 70 |
"ALTASTPU", |
|
| 71 |
"ALTPBASE", |
|
| 72 |
"ASTPBASE", |
|
| 73 |
"ALTASTPB" |
|
| 74 |
), |
|
| 75 |
seed = NULL, |
|
| 76 |
cached = FALSE) {
|
|
| 77 | 4x |
checkmate::assert_flag(cached) |
| 78 | ||
| 79 | 4x |
if (cached) {
|
| 80 | 1x |
return(get_cached_data("cadhy"))
|
| 81 |
} |
|
| 82 | ||
| 83 | 3x |
checkmate::assert_data_frame(adsl) |
| 84 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
| 85 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
| 86 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 87 | ||
| 88 |
# validate and initialize related variables |
|
| 89 | 3x |
param_init_list <- relvar_init(param, paramcd) |
| 90 | ||
| 91 | 3x |
if (!is.null(seed)) {
|
| 92 | 3x |
set.seed(seed) |
| 93 |
} |
|
| 94 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 95 | ||
| 96 |
# create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT |
|
| 97 | 3x |
adhy <- expand.grid( |
| 98 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 99 | 3x |
USUBJID = adsl$USUBJID, |
| 100 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
| 101 | 3x |
AVISIT = as.factor(c("BASELINE", "POST-BASELINE")),
|
| 102 | 3x |
APERIODC = as.factor(c("PERIOD 1", "PERIOD 2")),
|
| 103 | 3x |
stringsAsFactors = FALSE |
| 104 |
) |
|
| 105 | ||
| 106 |
# remove records that are not needed and were created as a side product of expand.grid above |
|
| 107 | 3x |
adhy <- dplyr::filter(adhy, !(AVISIT == "BASELINE" & APERIODC == "PERIOD 2")) |
| 108 | ||
| 109 |
# define TBILI ALT/AST params, period dependent parameters and the parameters that will be assigned values "Y" or "N" |
|
| 110 | 3x |
paramcd_tbilialtast <- c("BLAL", "BGAS", "BGAL", "BLAS", "BA2AL", "BA2AS", "BA5AL", "BA5AS")
|
| 111 | 3x |
paramcd_by_period <- c("ALTPULN", "ASTPULN", "ALTASTPU", "ALTPBASE", "ASTPBASE", "ALTASTPB")
|
| 112 | 3x |
paramcd_yn <- c( |
| 113 | 3x |
"BL2AL2CU", "BG2AS2CU", "BL2AS2CU", "BG2AL2CU", "BG2AL2CB", "BL2AL2CB", "BG2AS2CB", "BL2AS2CB", |
| 114 | 3x |
paramcd_by_period |
| 115 |
) |
|
| 116 | ||
| 117 |
# add other variables to adhy |
|
| 118 | 3x |
adhy <- adhy %>% |
| 119 | 3x |
rel_var( |
| 120 | 3x |
var_name = "PARAMCD", |
| 121 | 3x |
related_var = "PARAM", |
| 122 | 3x |
var_values = param_init_list$relvar2 |
| 123 |
) %>% |
|
| 124 | 3x |
dplyr::mutate( |
| 125 | 3x |
AVALC = dplyr::case_when( |
| 126 | 3x |
PARAMCD %in% paramcd_tbilialtast ~ sample( |
| 127 | 3x |
x = c(">3-5ULN", ">5-10ULN", ">10-20ULN", ">20ULN", "Criteria not met"), size = dplyr::n(), replace = TRUE
|
| 128 |
), |
|
| 129 | 3x |
PARAMCD %in% paramcd_yn ~ sample( |
| 130 | 3x |
x = c("Y", "N"), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE
|
| 131 |
) |
|
| 132 |
), |
|
| 133 | 3x |
AVAL = dplyr::case_when( |
| 134 | 3x |
AVALC == ">3-5ULN" ~ 1, |
| 135 | 3x |
AVALC == ">5-10ULN" ~ 2, |
| 136 | 3x |
AVALC == ">10-20ULN" ~ 3, |
| 137 | 3x |
AVALC == ">20ULN" ~ 4, |
| 138 | 3x |
AVALC == "Y" ~ 1, |
| 139 | 3x |
AVALC == "N" ~ 0, |
| 140 | 3x |
AVALC == "Criteria not met" ~ 0 |
| 141 |
), |
|
| 142 | 3x |
AVISITN = dplyr::case_when( |
| 143 | 3x |
AVISIT == "BASELINE" ~ 0L, |
| 144 | 3x |
AVISIT == "POST-BASELINE" ~ 9995L, |
| 145 | 3x |
TRUE ~ NA_integer_ |
| 146 |
), |
|
| 147 | 3x |
APERIOD = dplyr::case_when( |
| 148 | 3x |
APERIODC == "PERIOD 1" ~ 1L, |
| 149 | 3x |
APERIODC == "PERIOD 2" ~ 2L, |
| 150 | 3x |
TRUE ~ NA_integer_ |
| 151 |
), |
|
| 152 | 3x |
ABLFL = dplyr::if_else(AVISIT == "BASELINE", "Y", NA_character_), |
| 153 | 3x |
ONTRTFL = dplyr::if_else(AVISIT == "POST-BASELINE", "Y", NA_character_), |
| 154 | 3x |
ANL01FL = "Y", |
| 155 | 3x |
SRCSEQ = NA_integer_ |
| 156 |
) |
|
| 157 | ||
| 158 |
# remove records for parameters with period 2 and not in paramcd_by_period |
|
| 159 | 3x |
adhy <- dplyr::filter(adhy, PARAMCD %in% paramcd_by_period | APERIODC == "PERIOD 1") |
| 160 | ||
| 161 |
# add baseline variables |
|
| 162 | 3x |
adhy <- adhy %>% |
| 163 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
| 164 | 3x |
dplyr::mutate( |
| 165 | 3x |
BASEC = AVALC[AVISIT == "BASELINE"], |
| 166 | 3x |
BASE = AVAL[AVISIT == "BASELINE"] |
| 167 |
) %>% |
|
| 168 | 3x |
dplyr::ungroup() |
| 169 | ||
| 170 | 3x |
adhy <- adhy %>% |
| 171 | 3x |
rcd_var_relabel( |
| 172 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
| 173 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
| 174 |
) |
|
| 175 | ||
| 176 |
# merge ADSL to be able to add analysis datetime and analysis relative day variables |
|
| 177 | 3x |
adhy <- dplyr::inner_join(adhy, adsl, by = c("STUDYID", "USUBJID"))
|
| 178 | ||
| 179 |
# define a simple helper function to create ADY variable |
|
| 180 | 3x |
add_ady <- function(x, avisit) {
|
| 181 | 6x |
if (avisit == "BASELINE") {
|
| 182 | 3x |
dplyr::mutate( |
| 183 | 3x |
x, |
| 184 | 3x |
ADY = sample(x = -(1:14), size = dplyr::n(), replace = TRUE) |
| 185 |
) |
|
| 186 | 3x |
} else if (avisit == "POST-BASELINE") {
|
| 187 | 3x |
dplyr::rowwise(x) %>% |
| 188 | 3x |
dplyr::mutate(ADY = as.integer(sample( |
| 189 | 3x |
dplyr::if_else( |
| 190 | 3x |
!is.na(TRTEDTM), |
| 191 | 3x |
as.numeric(difftime(TRTEDTM, TRTSDTM, units = "days")), |
| 192 | 3x |
as.numeric(study_duration_secs, "days") |
| 193 |
), |
|
| 194 | 3x |
size = 1, |
| 195 | 3x |
replace = TRUE |
| 196 |
))) |
|
| 197 |
} else {
|
|
| 198 | ! |
dplyr::mutate(x, ADY = NA_integer_) |
| 199 |
} |
|
| 200 |
} |
|
| 201 | ||
| 202 |
# add ADY and ADTM variables |
|
| 203 | 3x |
adhy <- adhy %>% |
| 204 | 3x |
dplyr::group_by(AVISIT, .add = FALSE) %>% |
| 205 | 3x |
dplyr::group_modify(~ add_ady(.x, .y$AVISIT)) %>% |
| 206 | 3x |
dplyr::ungroup() %>% |
| 207 | 3x |
dplyr::mutate(ADTM = TRTSDTM + lubridate::days(ADY)) |
| 208 | ||
| 209 |
# order columns and arrange rows; column order follows ADaM_1.1 specification |
|
| 210 | 3x |
adhy <- |
| 211 | 3x |
adhy[, c( |
| 212 | 3x |
colnames(adsl), |
| 213 | 3x |
"PARAM", |
| 214 | 3x |
"PARAMCD", |
| 215 | 3x |
"AVAL", |
| 216 | 3x |
"AVALC", |
| 217 | 3x |
"BASE", |
| 218 | 3x |
"BASEC", |
| 219 | 3x |
"ABLFL", |
| 220 | 3x |
"ADTM", |
| 221 | 3x |
"ADY", |
| 222 | 3x |
"AVISIT", |
| 223 | 3x |
"AVISITN", |
| 224 | 3x |
"APERIOD", |
| 225 | 3x |
"APERIODC", |
| 226 | 3x |
"ONTRTFL", |
| 227 | 3x |
"SRCSEQ", |
| 228 | 3x |
"ANL01FL" |
| 229 |
)] |
|
| 230 | ||
| 231 | 3x |
adhy <- adhy %>% |
| 232 | 3x |
dplyr::arrange( |
| 233 | 3x |
STUDYID, |
| 234 | 3x |
USUBJID, |
| 235 | 3x |
PARAMCD, |
| 236 | 3x |
AVISITN, |
| 237 | 3x |
ADTM, |
| 238 | 3x |
SRCSEQ |
| 239 |
) |
|
| 240 | ||
| 241 |
# apply metadata |
|
| 242 | 3x |
adhy <- apply_metadata(adhy, "metadata/ADHY.yml") |
| 243 | ||
| 244 | 3x |
return(adhy) |
| 245 |
} |
| 1 |
#' EORTC QLQ-C30 V3 Analysis Dataset (ADQLQC) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random EORTC QLQ-C30 V3 Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `PARCAT1N`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `ADTM`, `QSSEQ` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param percent (`numeric`)\cr Completion - Completed at least y percent of questions, 1 record per visit |
|
| 14 |
#' @param number (`numeric`)\cr Completion - Completed at least x question(s), 1 record per visit |
|
| 15 |
#' @template param_cached |
|
| 16 |
#' @templateVar data adqlqc |
|
| 17 |
#' |
|
| 18 |
#' @return `data.frame` |
|
| 19 |
#' @export |
|
| 20 |
#' |
|
| 21 |
#' @examples |
|
| 22 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
| 23 |
#' |
|
| 24 |
#' adqlqc <- radqlqc(adsl, seed = 1, percent = 80, number = 2) |
|
| 25 |
#' adqlqc |
|
| 26 |
radqlqc <- function(adsl, |
|
| 27 |
percent, |
|
| 28 |
number, |
|
| 29 |
seed = NULL, |
|
| 30 |
cached = FALSE) {
|
|
| 31 | 4x |
checkmate::assert_flag(cached) |
| 32 | 4x |
if (cached) {
|
| 33 | 1x |
return(get_cached_data("cadqlqc"))
|
| 34 |
} |
|
| 35 | ||
| 36 | 3x |
checkmate::assert_data_frame(adsl) |
| 37 | 3x |
checkmate::assert_number(percent, lower = 1, upper = 100) |
| 38 | 3x |
checkmate::assert_number(number, lower = 1) |
| 39 | ||
| 40 | 3x |
if (!is.null(seed)) {
|
| 41 | 3x |
set.seed(seed) |
| 42 |
} |
|
| 43 | ||
| 44 |
# ADQLQC data ------------------------------------------------------------- |
|
| 45 | 3x |
qs <- get_qs_data(adsl, n_assessments = 5L, seed = seed, na_percentage = 0.1) |
| 46 |
# prepare ADaM ADQLQC data |
|
| 47 | 3x |
adqlqc1 <- prep_adqlqc(df = qs) |
| 48 |
# derive AVAL and AVALC |
|
| 49 | 3x |
adqlqc1 <- mutate( |
| 50 | 3x |
adqlqc1, |
| 51 | 3x |
AVAL = as.numeric(QSSTRESC), |
| 52 | 3x |
AVALC = case_when( |
| 53 | 3x |
QSTESTCD == "QSALL" ~ QSREASND, |
| 54 | 3x |
TRUE ~ QSORRES |
| 55 |
), |
|
| 56 | 3x |
AVISIT = VISIT, |
| 57 | 3x |
AVISITN = VISITNUM, |
| 58 | 3x |
ADTM = QSDTC |
| 59 |
) |
|
| 60 |
# include scale calculation |
|
| 61 | 3x |
adqlqc_tmp <- calc_scales(adqlqc1) |
| 62 |
# order to prepare for change from screening and baseline values |
|
| 63 | 3x |
adqlqc_tmp <- adqlqc_tmp[order(adqlqc_tmp$STUDYID, adqlqc_tmp$USUBJID, adqlqc_tmp$PARAMCD, adqlqc_tmp$AVISITN), ] |
| 64 | ||
| 65 | 3x |
adqlqc_tmp <- Reduce( |
| 66 | 3x |
rbind, |
| 67 | 3x |
lapply( |
| 68 | 3x |
split(adqlqc_tmp, adqlqc_tmp$USUBJID), |
| 69 | 3x |
function(x) {
|
| 70 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
| 71 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
| 72 | 30x |
x$ABLFL <- ifelse( |
| 73 | 30x |
x$AVISIT == "BASELINE" & |
| 74 | 30x |
x$PARAMCD != "EX028", |
| 75 | 30x |
"Y", |
| 76 | 30x |
ifelse( |
| 77 | 30x |
x$AVISIT == "CYCLE 1 DAY 1" & |
| 78 | 30x |
x$PARAMCD != "EX028", |
| 79 | 30x |
"Y", |
| 80 |
"" |
|
| 81 |
) |
|
| 82 |
) |
|
| 83 | 30x |
x |
| 84 |
} |
|
| 85 |
) |
|
| 86 |
) |
|
| 87 | ||
| 88 | 3x |
adqlqc_tmp$BASE2 <- ifelse( |
| 89 | 3x |
str_detect(adqlqc_tmp$PARCAT2, "Completion", negate = TRUE), |
| 90 | 3x |
retain( |
| 91 | 3x |
df = adqlqc_tmp, |
| 92 | 3x |
value_var = adqlqc_tmp$AVAL, |
| 93 | 3x |
event = adqlqc_tmp$ABLFL2 == "Y" |
| 94 |
), |
|
| 95 | 3x |
NA |
| 96 |
) |
|
| 97 | ||
| 98 | 3x |
adqlqc_tmp$BASE <- ifelse( |
| 99 | 3x |
adqlqc_tmp$ABLFL2 != "Y" & |
| 100 | 3x |
str_detect(adqlqc_tmp$PARCAT2, "Completion", negate = TRUE), |
| 101 | 3x |
retain( |
| 102 | 3x |
adqlqc_tmp, |
| 103 | 3x |
adqlqc_tmp$AVAL, |
| 104 | 3x |
adqlqc_tmp$ABLFL == "Y" |
| 105 |
), |
|
| 106 | 3x |
NA |
| 107 |
) |
|
| 108 | ||
| 109 | 3x |
adqlqc_tmp <- adqlqc_tmp %>% |
| 110 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
| 111 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
| 112 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
| 113 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
| 114 | 3x |
rcd_var_relabel( |
| 115 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
| 116 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
| 117 |
) |
|
| 118 |
# derive CHGCAT1 ---------------------------------------------------------- |
|
| 119 | 3x |
adqlqc_tmp <- derv_chgcat1(dataset = adqlqc_tmp) |
| 120 | ||
| 121 | 3x |
adqlqc_tmp <- rcd_var_relabel( |
| 122 | 3x |
adqlqc_tmp, |
| 123 | 3x |
STUDYID = "Study Identifier", |
| 124 | 3x |
USUBJID = "Unique Subject Identifier" |
| 125 |
) |
|
| 126 | ||
| 127 | 3x |
adqlqc_tmp <- arrange( |
| 128 | 3x |
adqlqc_tmp, |
| 129 | 3x |
USUBJID, |
| 130 | 3x |
AVISITN |
| 131 |
) |
|
| 132 |
# Merge ADSL -------------------------------------------------------------- |
|
| 133 |
# ADSL variables needed for ADQLQC |
|
| 134 | 3x |
adsl_vars <- c( |
| 135 | 3x |
"STUDYID", "USUBJID", "SUBJID", "SITEID", "REGION1", "COUNTRY", "ETHNIC", "AGE", |
| 136 | 3x |
"AGEU", "AAGE", "AAGEU", "AGEGR1", "AGEGR2", "AGEGR3", "STRATwNM", "STRATw", "STRATwV", |
| 137 | 3x |
"SEX", "RACE", "ITTFL", "SAFFL", "PPROTFL", "TRT01P", "TRT01A", |
| 138 | 3x |
"TRTSEQP", "TRTSEQA", "TRTSDTM", "TRTSDT", "TRTEDTM", "TRTEDT", "DCUTDT" |
| 139 |
) |
|
| 140 | 3x |
adsl <- select( |
| 141 | 3x |
adsl, |
| 142 | 3x |
any_of(adsl_vars) |
| 143 |
) |
|
| 144 | 3x |
adqlqc <- dplyr::inner_join( |
| 145 | 3x |
adqlqc_tmp, |
| 146 | 3x |
adsl, |
| 147 | 3x |
by = c("STUDYID", "USUBJID")
|
| 148 |
) %>% |
|
| 149 | 3x |
dplyr::mutate( |
| 150 | 3x |
ADY_der = ceiling(difftime(ADTM, TRTSDTM, units = "days")), |
| 151 | 3x |
ADY = case_when( |
| 152 | 3x |
ADY_der >= 0 ~ ADY_der + 1, |
| 153 | 3x |
TRUE ~ ADY_der |
| 154 |
) |
|
| 155 |
) %>% |
|
| 156 | 3x |
select(-ADY_der) |
| 157 | ||
| 158 |
# get compliance data --------------------------------------------------- |
|
| 159 | 3x |
compliance_data <- comp_derv( |
| 160 | 3x |
dataset = adqlqc, |
| 161 | 3x |
percent = percent, |
| 162 | 3x |
number = number |
| 163 |
) |
|
| 164 |
# add ADSL variables |
|
| 165 | 3x |
compliance_data <- left_join( |
| 166 | 3x |
compliance_data, |
| 167 | 3x |
adsl, |
| 168 | 3x |
by = c("STUDYID", "USUBJID")
|
| 169 |
) |
|
| 170 |
# add completion to ADQLQC |
|
| 171 | 3x |
adqlqc <- bind_rows( |
| 172 | 3x |
adqlqc, |
| 173 | 3x |
compliance_data |
| 174 |
) %>% |
|
| 175 | 3x |
arrange( |
| 176 | 3x |
USUBJID, |
| 177 | 3x |
AVISITN, |
| 178 | 3x |
QSTESTCD |
| 179 |
) |
|
| 180 |
# find first set of questionnaire observations |
|
| 181 | 3x |
adqlqc_x <- arrange( |
| 182 | 3x |
adqlqc, |
| 183 | 3x |
USUBJID, |
| 184 | 3x |
ADTM |
| 185 |
) %>% |
|
| 186 | 3x |
filter( |
| 187 | 3x |
PARAMCD != "QSALL" & |
| 188 | 3x |
!str_detect(AVISIT, "SCREENING|UNSCHEDULED") |
| 189 |
) %>% |
|
| 190 | 3x |
group_by( |
| 191 | 3x |
USUBJID, |
| 192 | 3x |
ADTM |
| 193 |
) %>% |
|
| 194 | 3x |
summarise(first_date = first(ADTM), .groups = "drop") |
| 195 | ||
| 196 | 3x |
adqlqc <- left_join( |
| 197 | 3x |
adqlqc, |
| 198 | 3x |
adqlqc_x, |
| 199 | 3x |
by = c("USUBJID", "ADTM")
|
| 200 |
) %>% |
|
| 201 | 3x |
mutate( |
| 202 | 3x |
ANL01FL = case_when( |
| 203 | 3x |
PARAMCD != "QSALL" & ABLFL == "Y" ~ "Y", |
| 204 | 3x |
PARAMCD != "QSALL" & |
| 205 | 3x |
!str_detect(AVISIT, "UNSCHEDULED") & |
| 206 | 3x |
!is.na(first_date) ~ "Y" |
| 207 |
) |
|
| 208 |
) %>% |
|
| 209 | 3x |
select(-first_date) |
| 210 | ||
| 211 |
# final dataset ----------------------------------------------------------- |
|
| 212 | 3x |
adqlqc_final <- adqlqc %>% |
| 213 | 3x |
dplyr::group_by(USUBJID) %>% |
| 214 | 3x |
dplyr::mutate(ASEQ = row_number()) %>% |
| 215 | 3x |
dplyr::ungroup() %>% |
| 216 | 3x |
dplyr::arrange( |
| 217 | 3x |
STUDYID, |
| 218 | 3x |
USUBJID, |
| 219 | 3x |
AVISITN |
| 220 |
) %>% |
|
| 221 | 3x |
select( |
| 222 | 3x |
-c("BASE2", "CHG2", "PCHG2", "ABLFL2")
|
| 223 |
) %>% |
|
| 224 | 3x |
ungroup() |
| 225 | ||
| 226 | 3x |
adam_vars <- c( |
| 227 | 3x |
adsl_vars, "QSSEQ", "QSCAT", "QSSCAT", "QSDTC", "QSSPID", "QSSTAT", "QSSTRESN", |
| 228 | 3x |
"QSSTRESC", "QSSTRESU", "QSORRES", "QSORRESU", "QSTEST", "QSTESTCD", "QSTPT", |
| 229 | 3x |
"QSTPTNUM", "QSTPTREF", "QSDY", "QSREASND", "QSTSTDTL", "QSEVAL", "VISIT", "VISITNUM", |
| 230 | 3x |
"PARAM", "PARAMCD", "PARCAT1", "PARCAT1N", "PARCAT2", "AVAL", "AVALC", "AREASND", |
| 231 | 3x |
"BASE", "BASETYPE", "ABLFL", "CHG", "PCHG", "CHGCAT1", "CRIT1", "CRIT1FL", "DTYPE", |
| 232 | 3x |
"ADTM", "ADT", "ADY", "ADTF", "ATMF", "ATPT", "ATPTN", "AVISIT", "AVISITN", "APHASE", |
| 233 | 3x |
"APHASEN", "APERIOD", "APERIODC", "APERIODC", "ASPER", "ASPERC", "PERADY", "TRTP", |
| 234 | 3x |
"TRTA", "ONTRTFL", "LAST02FL", "FIRS02FL", "ANL01FL", "ANL02FL", "ANL03FL", |
| 235 | 3x |
"ANL04FL", "CGCAT1NX" |
| 236 |
) |
|
| 237 |
# order variables in mapped qs by variables in adam_vars |
|
| 238 | 3x |
adqlqc_name_ordered <- names(adqlqc_final)[order(match(names(adqlqc_final), adam_vars))] |
| 239 |
# adqlqc with variables ordered per gdsr |
|
| 240 | 3x |
adqlqc_final <- adqlqc_final %>% |
| 241 | 3x |
select( |
| 242 | 3x |
any_of(adqlqc_name_ordered) |
| 243 |
) |
|
| 244 | ||
| 245 | 3x |
adqlqc_final <- relocate(adqlqc_final, "QSEVLINT", .after = "QSTESTCD") %>% |
| 246 | 3x |
arrange( |
| 247 | 3x |
USUBJID, |
| 248 | 3x |
AVISITN, |
| 249 | 3x |
ASEQ, |
| 250 | 3x |
QSTESTCD |
| 251 |
) |
|
| 252 |
# apply metadata |
|
| 253 | 3x |
adqlqc_final <- apply_metadata(adqlqc_final, "metadata/ADQLQC.yml") |
| 254 | 3x |
return(adqlqc_final) |
| 255 |
} |
|
| 256 | ||
| 257 |
#' Helper Functions for Constructing ADQLQC |
|
| 258 |
#' |
|
| 259 |
#' Internal functions used by `radqlqc`. |
|
| 260 |
#' |
|
| 261 |
#' @inheritParams argument_convention |
|
| 262 |
#' @inheritParams radqlqc |
|
| 263 |
#' |
|
| 264 |
#' @examples |
|
| 265 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
| 266 |
#' adqlqc <- radqlqc(adsl, seed = 1, percent = 80, number = 2) |
|
| 267 |
#' |
|
| 268 |
#' @name h_adqlqc |
|
| 269 |
NULL |
|
| 270 | ||
| 271 |
#' @describeIn h_adqlqc Questionnaires EORTC QLQ-C30 V3.0 SDTM (QS) |
|
| 272 |
#' |
|
| 273 |
#' Function for generating random Questionnaires SDTM domain |
|
| 274 |
#' |
|
| 275 |
#' @return a dataframe with SDTM questionnaire data |
|
| 276 |
#' @keywords internal |
|
| 277 |
get_qs_data <- function(adsl, |
|
| 278 |
visit_format = "CYCLE", |
|
| 279 |
n_assessments = 5L, |
|
| 280 |
n_days = 1L, |
|
| 281 |
lookup = NULL, |
|
| 282 |
seed = NULL, |
|
| 283 |
na_percentage = 0, |
|
| 284 |
na_vars = list( |
|
| 285 |
QSORRES = c(1234, 0.2), |
|
| 286 |
QSSTRESC = c(1234, 0.2) |
|
| 287 |
)) {
|
|
| 288 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data"))
|
| 289 | 3x |
checkmate::assert_string(visit_format) |
| 290 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
| 291 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
| 292 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 293 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE) |
| 294 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 295 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 296 | ||
| 297 |
# get subjects for QS data from ADSL |
|
| 298 |
# get studyid, subject for QS generation |
|
| 299 | 3x |
qs <- select( |
| 300 | 3x |
adsl, |
| 301 | 3x |
STUDYID, |
| 302 | 3x |
USUBJID |
| 303 |
) %>% |
|
| 304 | 3x |
mutate( |
| 305 | 3x |
DOMAIN = "QS" |
| 306 |
) |
|
| 307 | ||
| 308 |
# QS prep ----------------------------------------------------------------- |
|
| 309 |
# get questionnaire function for QS |
|
| 310 |
# QSTESTCD: EOR0101 to EOR0130 |
|
| 311 | 3x |
eortc_qlq_c30_sub <- filter( |
| 312 | 3x |
eortc_qlq_c30, |
| 313 | 3x |
as.numeric(str_extract(QSTESTCD, "\\d+$")) >= 101 & |
| 314 | 3x |
as.numeric(str_extract(QSTESTCD, "\\d+$")) <= 130 |
| 315 |
) %>% |
|
| 316 | 3x |
select(-publication_name) |
| 317 | ||
| 318 |
# validate and initialize QSTEST vectors |
|
| 319 | 3x |
qstest_init_list <- relvar_init( |
| 320 | 3x |
unique(eortc_qlq_c30_sub$QSTEST), |
| 321 | 3x |
unique(eortc_qlq_c30_sub$QSTESTCD) |
| 322 |
) |
|
| 323 | ||
| 324 | 3x |
if (!is.null(seed)) {
|
| 325 | 3x |
set.seed(seed) |
| 326 |
} |
|
| 327 | ||
| 328 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 329 | ||
| 330 | 3x |
lookup_qs <- if (!is.null(lookup)) {
|
| 331 | ! |
lookup |
| 332 |
} else {
|
|
| 333 | 3x |
expand.grid( |
| 334 | 3x |
STUDYID = unique(qs$STUDYID), |
| 335 | 3x |
USUBJID = qs$USUBJID, |
| 336 | 3x |
QSTEST = qstest_init_list$relvar1, |
| 337 | 3x |
VISIT = visit_schedule( |
| 338 | 3x |
visit_format = visit_format, |
| 339 | 3x |
n_assessments = n_assessments, |
| 340 | 3x |
n_days = n_days |
| 341 |
), |
|
| 342 | 3x |
stringsAsFactors = FALSE |
| 343 |
) |
|
| 344 |
} |
|
| 345 | ||
| 346 |
# assign related variable values: QSTESTxQSTESTCD are related |
|
| 347 | 3x |
lookup_qs <- lookup_qs %>% rel_var( |
| 348 | 3x |
var_name = "QSTESTCD", |
| 349 | 3x |
related_var = "QSTEST", |
| 350 | 3x |
var_values = qstest_init_list$relvar2 |
| 351 |
) |
|
| 352 | ||
| 353 | 3x |
lookup_qs <- left_join( |
| 354 | 3x |
lookup_qs, |
| 355 | 3x |
eortc_qlq_c30_sub, |
| 356 | 3x |
by = c( |
| 357 | 3x |
"QSTEST", |
| 358 | 3x |
"QSTESTCD" |
| 359 |
), |
|
| 360 | 3x |
multiple = "all", |
| 361 | 3x |
relationship = "many-to-many" |
| 362 |
) |
|
| 363 | ||
| 364 | 3x |
lookup_qs <- dplyr::mutate( |
| 365 | 3x |
lookup_qs, |
| 366 | 3x |
VISITNUM = dplyr::case_when( |
| 367 | 3x |
VISIT == "SCREENING" ~ -1, |
| 368 | 3x |
VISIT == "BASELINE" ~ 0, |
| 369 | 3x |
(grepl("^WEEK", VISIT) | grepl("^CYCLE", VISIT)) ~ as.numeric(VISIT) - 2,
|
| 370 | 3x |
TRUE ~ NA_real_ |
| 371 |
) |
|
| 372 | 3x |
) %>% arrange(USUBJID) |
| 373 | ||
| 374 |
# # prep QSALL -------------------------------------------------------------- |
|
| 375 |
# get last subject and visit for QSALL |
|
| 376 | 3x |
last_subj_vis <- select(lookup_qs, USUBJID, VISIT) %>% |
| 377 | 3x |
distinct() %>% |
| 378 | 3x |
slice(n()) |
| 379 | 3x |
last_subj_vis_full <- filter( |
| 380 | 3x |
lookup_qs, |
| 381 | 3x |
USUBJID == last_subj_vis$USUBJID, |
| 382 | 3x |
VISIT == last_subj_vis$VISIT |
| 383 |
) |
|
| 384 | ||
| 385 | 3x |
qsall_data1 <- tibble::tibble( |
| 386 | 3x |
STUDYID = unique(last_subj_vis_full$STUDYID), |
| 387 | 3x |
USUBJID = unique(last_subj_vis_full$USUBJID), |
| 388 | 3x |
VISIT = unique(last_subj_vis_full$VISIT), |
| 389 | 3x |
VISITNUM = unique(last_subj_vis_full$VISITNUM), |
| 390 | 3x |
QSTESTCD = "QSALL", |
| 391 | 3x |
QSTEST = "Questionnaires", |
| 392 | 3x |
QSSTAT = "NOT DONE", |
| 393 | 3x |
QSREASND = "SUBJECT REFUSED" |
| 394 |
) |
|
| 395 | ||
| 396 |
# remove last subject and visit from main data |
|
| 397 | 3x |
lookup_qs_sub <- anti_join( |
| 398 | 3x |
lookup_qs, |
| 399 | 3x |
last_subj_vis_full, |
| 400 | 3x |
by = c("USUBJID", "VISIT")
|
| 401 |
) |
|
| 402 | ||
| 403 | 3x |
set.seed(seed) |
| 404 | 3x |
lookup_qs_sub_x <- lookup_qs_sub %>% |
| 405 | 3x |
group_by( |
| 406 | 3x |
USUBJID, |
| 407 | 3x |
QSTESTCD, |
| 408 | 3x |
VISIT |
| 409 |
) %>% |
|
| 410 | 3x |
slice_sample(n = 1) %>% |
| 411 | 3x |
ungroup() %>% |
| 412 | 3x |
as.data.frame() |
| 413 | ||
| 414 | 3x |
lookup_qs_sub_x <- arrange( |
| 415 | 3x |
lookup_qs_sub_x, |
| 416 | 3x |
USUBJID, |
| 417 | 3x |
VISITNUM |
| 418 |
) |
|
| 419 | ||
| 420 |
# add date: QSDTC --------------------------------------------------------- |
|
| 421 |
# get treatment dates from ADSL |
|
| 422 | 3x |
adsl_trt <- select( |
| 423 | 3x |
adsl, |
| 424 | 3x |
USUBJID, |
| 425 | 3x |
TRTSDTM, |
| 426 | 3x |
TRTEDTM |
| 427 |
) |
|
| 428 |
# use to derive QSDTC |
|
| 429 |
# if no treatment end date, create an arbituary one |
|
| 430 | 3x |
trt_end_date <- max(adsl_trt$TRTEDTM, na.rm = TRUE) |
| 431 | ||
| 432 | 3x |
lookup_qs_sub_x <- left_join( |
| 433 | 3x |
lookup_qs_sub_x, |
| 434 | 3x |
adsl_trt, |
| 435 | 3x |
by = "USUBJID" |
| 436 |
) %>% |
|
| 437 | 3x |
group_by( |
| 438 | 3x |
USUBJID |
| 439 |
) %>% |
|
| 440 | 3x |
mutate(QSDTC = get_random_dates_between( |
| 441 | 3x |
from = TRTSDTM, |
| 442 | 3x |
to = ifelse( |
| 443 | 3x |
is.na(TRTEDTM), |
| 444 | 3x |
trt_end_date, |
| 445 | 3x |
TRTEDTM |
| 446 |
), |
|
| 447 | 3x |
visit_id = VISITNUM |
| 448 |
)) %>% |
|
| 449 | 3x |
select(-c("TRTSDTM", "TRTEDTM"))
|
| 450 | ||
| 451 |
# filter out subjects with missing dates |
|
| 452 | 3x |
lookup_qs_sub_x1 <- filter( |
| 453 | 3x |
lookup_qs_sub_x, |
| 454 | 3x |
!is.na(QSDTC) |
| 455 |
) |
|
| 456 | ||
| 457 |
# subjects with missing dates |
|
| 458 | 3x |
lookup_qs_sub_x2 <- filter( |
| 459 | 3x |
lookup_qs_sub_x, |
| 460 | 3x |
is.na(QSDTC) |
| 461 |
) %>% |
|
| 462 | 3x |
select( |
| 463 | 3x |
STUDYID, |
| 464 | 3x |
USUBJID, |
| 465 | 3x |
VISIT, |
| 466 | 3x |
VISITNUM |
| 467 |
) %>% |
|
| 468 | 3x |
distinct() |
| 469 | ||
| 470 |
# generate QSALL for subjects with missing dates |
|
| 471 | 3x |
qsall_data2 <- mutate( |
| 472 | 3x |
lookup_qs_sub_x2, |
| 473 | 3x |
QSTESTCD = "QSALL", |
| 474 | 3x |
QSTEST = "Questionnaires", |
| 475 | 3x |
QSSTAT = "NOT DONE", |
| 476 | 3x |
QSREASND = "SUBJECT REFUSED" |
| 477 |
) |
|
| 478 | ||
| 479 |
# add qsall data to original item data |
|
| 480 | 3x |
lookup_qs_sub_all <- bind_rows( |
| 481 | 3x |
lookup_qs_sub_x1, |
| 482 | 3x |
qsall_data1, |
| 483 | 3x |
qsall_data2 |
| 484 |
) |
|
| 485 | ||
| 486 | 3x |
qs_all <- lookup_qs_sub_all %>% |
| 487 | 3x |
arrange( |
| 488 | 3x |
STUDYID, |
| 489 | 3x |
USUBJID, |
| 490 | 3x |
VISITNUM |
| 491 |
) %>% |
|
| 492 | 3x |
dplyr::group_by(USUBJID) %>% |
| 493 | 3x |
dplyr::ungroup() |
| 494 | ||
| 495 |
# get first and second subject ids |
|
| 496 | 3x |
first_second_subj <- select(qs_all, USUBJID) %>% |
| 497 | 3x |
distinct() %>% |
| 498 | 3x |
slice(1:2) |
| 499 | ||
| 500 | 3x |
qs1 <- filter( |
| 501 | 3x |
qs_all, |
| 502 | 3x |
USUBJID %in% first_second_subj$USUBJID |
| 503 |
) |
|
| 504 | ||
| 505 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 506 | 3x |
qs1 <- mutate_na(ds = qs1, na_vars = na_vars, na_percentage = na_percentage) |
| 507 |
} |
|
| 508 | ||
| 509 |
# QSSTAT = NOT DONE |
|
| 510 | 3x |
qs1 <- mutate( |
| 511 | 3x |
qs1, |
| 512 | 3x |
QSSTAT = case_when( |
| 513 | 3x |
is.na(QSORRES) & is.na(QSSTRESC) ~ "NOT DONE" |
| 514 |
) |
|
| 515 |
) |
|
| 516 | ||
| 517 |
# remove first and second subjects from main data |
|
| 518 | 3x |
qs2 <- anti_join( |
| 519 | 3x |
qs_all, |
| 520 | 3x |
qs1, |
| 521 | 3x |
by = c("USUBJID")
|
| 522 |
) |
|
| 523 | ||
| 524 | 3x |
final_qs <- rbind( |
| 525 | 3x |
qs1, |
| 526 | 3x |
qs2 |
| 527 |
) %>% |
|
| 528 | 3x |
group_by(USUBJID) %>% |
| 529 | 3x |
dplyr::mutate(QSSEQ = row_number()) %>% |
| 530 | 3x |
arrange( |
| 531 | 3x |
STUDYID, |
| 532 | 3x |
USUBJID, |
| 533 | 3x |
VISITNUM |
| 534 |
) %>% |
|
| 535 | 3x |
ungroup() |
| 536 | ||
| 537 |
# ordered variables as per gdsr |
|
| 538 | 3x |
final_qs <- select( |
| 539 | 3x |
final_qs, |
| 540 | 3x |
STUDYID, |
| 541 | 3x |
USUBJID, |
| 542 | 3x |
QSSEQ, |
| 543 | 3x |
QSTESTCD, |
| 544 | 3x |
QSTEST, |
| 545 | 3x |
QSCAT, |
| 546 | 3x |
QSSCAT, |
| 547 | 3x |
QSORRES, |
| 548 | 3x |
QSORRESU, |
| 549 | 3x |
QSSTRESC, |
| 550 | 3x |
QSSTRESU, |
| 551 | 3x |
QSSTAT, |
| 552 | 3x |
QSREASND, |
| 553 | 3x |
VISITNUM, |
| 554 | 3x |
VISIT, |
| 555 | 3x |
QSDTC, |
| 556 | 3x |
QSEVLINT |
| 557 |
) |
|
| 558 | 3x |
return(final_qs) |
| 559 |
} |
|
| 560 | ||
| 561 |
#' @describeIn h_adqlqc Function for generating random dates between 2 dates |
|
| 562 |
#' |
|
| 563 |
#' @param from (`datetime vector`)\cr Start date/times. |
|
| 564 |
#' @param to (`datetime vector`)\cr End date/times. |
|
| 565 |
#' @param visit_id (`vector`)\cr Visit identifiers. |
|
| 566 |
#' |
|
| 567 |
#' @return Data frame with new randomly generated dates variable. |
|
| 568 |
#' @keywords internal |
|
| 569 |
get_random_dates_between <- function(from, to, visit_id) {
|
|
| 570 | 30x |
min_date <- min(lubridate::as_datetime(from), na.rm = TRUE) |
| 571 | 30x |
max_date <- max(lubridate::as_datetime(to), na.rm = TRUE) |
| 572 | 30x |
date_seq <- seq(from = min_date + lubridate::days(1), to = max_date, by = "28 days") |
| 573 | ||
| 574 | 30x |
visit_ids <- unique(visit_id) |
| 575 | 30x |
out <- sapply(visit_ids, simplify = TRUE, USE.NAMES = TRUE, FUN = function(x) {
|
| 576 | 177x |
if (x == -1) {
|
| 577 | 30x |
random_days_to_subtract <- lubridate::days(sample(1:10, size = 1)) |
| 578 | 30x |
min_date - random_days_to_subtract |
| 579 | 147x |
} else if (x == 0) {
|
| 580 | 30x |
min_date |
| 581 | 117x |
} else if (x > 0) {
|
| 582 | 117x |
if (x %in% seq_along(date_seq)) {
|
| 583 | 117x |
date_seq[[x]] |
| 584 |
} else {
|
|
| 585 | 30x |
NA |
| 586 |
} |
|
| 587 |
} |
|
| 588 |
}) |
|
| 589 | 30x |
lubridate::as_datetime(out[match(visit_id, visit_ids)]) |
| 590 |
} |
|
| 591 | ||
| 592 |
#' @describeIn h_adqlqc Prepare ADaM ADQLQC data, adding PARAMCD to SDTM QS data |
|
| 593 |
#' |
|
| 594 |
#' @param df (`data.frame`)\cr SDTM QS dataset. |
|
| 595 |
#' |
|
| 596 |
#' @return `data.frame` |
|
| 597 |
#' @keywords internal |
|
| 598 |
prep_adqlqc <- function(df) {
|
|
| 599 |
# create PARAMCD from QSTESTCD |
|
| 600 | 3x |
adqlqc <- dplyr::mutate( |
| 601 | 3x |
df, |
| 602 | 3x |
PARAMCD = case_when( |
| 603 | 3x |
QSTESTCD == "EOR0101" ~ "QS02801", |
| 604 | 3x |
QSTESTCD == "EOR0102" ~ "QS02802", |
| 605 | 3x |
QSTESTCD == "EOR0103" ~ "QS02803", |
| 606 | 3x |
QSTESTCD == "EOR0104" ~ "QS02804", |
| 607 | 3x |
QSTESTCD == "EOR0105" ~ "QS02805", |
| 608 | 3x |
QSTESTCD == "EOR0106" ~ "QS02806", |
| 609 | 3x |
QSTESTCD == "EOR0107" ~ "QS02807", |
| 610 | 3x |
QSTESTCD == "EOR0108" ~ "QS02808", |
| 611 | 3x |
QSTESTCD == "EOR0109" ~ "QS02809", |
| 612 | 3x |
QSTESTCD == "EOR0110" ~ "QS02810", |
| 613 | 3x |
QSTESTCD == "EOR0111" ~ "QS02811", |
| 614 | 3x |
QSTESTCD == "EOR0112" ~ "QS02812", |
| 615 | 3x |
QSTESTCD == "EOR0113" ~ "QS02813", |
| 616 | 3x |
QSTESTCD == "EOR0114" ~ "QS02814", |
| 617 | 3x |
QSTESTCD == "EOR0115" ~ "QS02815", |
| 618 | 3x |
QSTESTCD == "EOR0116" ~ "QS02816", |
| 619 | 3x |
QSTESTCD == "EOR0117" ~ "QS02817", |
| 620 | 3x |
QSTESTCD == "EOR0118" ~ "QS02818", |
| 621 | 3x |
QSTESTCD == "EOR0119" ~ "QS02819", |
| 622 | 3x |
QSTESTCD == "EOR0120" ~ "QS02820", |
| 623 | 3x |
QSTESTCD == "EOR0121" ~ "QS02821", |
| 624 | 3x |
QSTESTCD == "EOR0122" ~ "QS02822", |
| 625 | 3x |
QSTESTCD == "EOR0123" ~ "QS02823", |
| 626 | 3x |
QSTESTCD == "EOR0124" ~ "QS02824", |
| 627 | 3x |
QSTESTCD == "EOR0125" ~ "QS02825", |
| 628 | 3x |
QSTESTCD == "EOR0126" ~ "QS02826", |
| 629 | 3x |
QSTESTCD == "EOR0127" ~ "QS02827", |
| 630 | 3x |
QSTESTCD == "EOR0128" ~ "QS02828", |
| 631 | 3x |
QSTESTCD == "EOR0129" ~ "QS02829", |
| 632 | 3x |
QSTESTCD == "EOR0130" ~ "QS02830", |
| 633 | 3x |
TRUE ~ QSTESTCD |
| 634 |
) |
|
| 635 |
) |
|
| 636 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data"))
|
| 637 | 3x |
adqlqc1 <- dplyr::left_join( |
| 638 | 3x |
adqlqc, |
| 639 | 3x |
gdsr_param_adqlqc, |
| 640 | 3x |
by = "PARAMCD" |
| 641 |
) |
|
| 642 | 3x |
return(adqlqc1) |
| 643 |
} |
|
| 644 | ||
| 645 |
#' @describeIn h_adqlqc Scale calculation for ADQLQC data |
|
| 646 |
#' |
|
| 647 |
#' @param adqlqc1 (`data.frame`)\cr Prepared data generated from the [prep_adqlqc()] function. |
|
| 648 |
#' |
|
| 649 |
#' @return `data.frame` |
|
| 650 |
#' @keywords internal |
|
| 651 |
calc_scales <- function(adqlqc1) {
|
|
| 652 |
# Prep scale data --------------------------------------------------------- |
|
| 653 |
# parcat2 = scales or global health status |
|
| 654 |
# global health status/scales data |
|
| 655 |
# QSTESTCD: EOR0131 to EOR0145 (global health status and scales) |
|
| 656 | 3x |
load(system.file("sysdata.rda", package = "random.cdisc.data"))
|
| 657 | 3x |
eortc_qlq_c30_sub <- filter( |
| 658 | 3x |
eortc_qlq_c30, |
| 659 | 3x |
!(as.numeric(str_extract(QSTESTCD, "\\d+$")) >= 101 & as.numeric(str_extract(QSTESTCD, "\\d+$")) <= 130) |
| 660 |
) %>% |
|
| 661 | 3x |
mutate( |
| 662 | 3x |
PARAMCD = case_when( |
| 663 | 3x |
QSTESTCD == "EOR0131" ~ "QS028QL2", |
| 664 | 3x |
QSTESTCD == "EOR0132" ~ "QS028PF2", |
| 665 | 3x |
QSTESTCD == "EOR0133" ~ "QS028RF2", |
| 666 | 3x |
QSTESTCD == "EOR0134" ~ "QS028EF", |
| 667 | 3x |
QSTESTCD == "EOR0135" ~ "QS028CF", |
| 668 | 3x |
QSTESTCD == "EOR0136" ~ "QS028SF", |
| 669 | 3x |
QSTESTCD == "EOR0137" ~ "QS028FA", |
| 670 | 3x |
QSTESTCD == "EOR0138" ~ "QS028NV", |
| 671 | 3x |
QSTESTCD == "EOR0139" ~ "QS028PA", |
| 672 | 3x |
QSTESTCD == "EOR0140" ~ "QS028DY", |
| 673 | 3x |
QSTESTCD == "EOR0141" ~ "QS028SL", |
| 674 | 3x |
QSTESTCD == "EOR0142" ~ "QS028AP", |
| 675 | 3x |
QSTESTCD == "EOR0143" ~ "QS028CO", |
| 676 | 3x |
QSTESTCD == "EOR0144" ~ "QS028DI", |
| 677 | 3x |
QSTESTCD == "EOR0145" ~ "QS028FI", |
| 678 | 3x |
TRUE ~ QSTESTCD |
| 679 |
) |
|
| 680 |
) %>% |
|
| 681 | 3x |
select(-publication_name) |
| 682 | ||
| 683 |
# ADaM global health status and scales from gdsr |
|
| 684 | 3x |
gdsr_param_adqlqc <- gdsr_param_adqlqc %>% |
| 685 | 3x |
filter( |
| 686 | 3x |
!str_detect(PARCAT2, "Original Items|Completion") |
| 687 |
) |
|
| 688 | ||
| 689 | 3x |
ghs_scales <- left_join( |
| 690 | 3x |
eortc_qlq_c30_sub, |
| 691 | 3x |
gdsr_param_adqlqc, |
| 692 | 3x |
by = "PARAMCD" |
| 693 |
) |
|
| 694 |
# scale data |
|
| 695 | 3x |
df <- data.frame(index = seq_len(nrow(ghs_scales))) |
| 696 | 3x |
df$previous <- list( |
| 697 | 3x |
c("QS02826", "QS02827"),
|
| 698 | 3x |
c("QS02811"),
|
| 699 | 3x |
c("QS02810", "QS02812", "QS02818"),
|
| 700 | 3x |
c("QS02806", "QS02807"),
|
| 701 | 3x |
c("QS02814", "QS02815"),
|
| 702 | 3x |
c("QS02808"),
|
| 703 | 3x |
c("QS02817"),
|
| 704 | 3x |
c("QS02816"),
|
| 705 | 3x |
c("QS02821", "QS02822", "QS02823", "QS02824"),
|
| 706 | 3x |
c("QS02829", "QS02830"),
|
| 707 | 3x |
c("QS02813"),
|
| 708 | 3x |
c("QS02801", "QS02802", "QS02803", "QS02804", "QS02805"),
|
| 709 | 3x |
c("QS02809", "QS02819"),
|
| 710 | 3x |
c("QS02820", "QS02825"),
|
| 711 | 3x |
c("QS02828")
|
| 712 |
) |
|
| 713 | 3x |
df$newName <- list( |
| 714 | 3x |
"QS028SF", |
| 715 | 3x |
"QS028SL", |
| 716 | 3x |
"QS028FA", |
| 717 | 3x |
"QS028RF2", |
| 718 | 3x |
"QS028NV", |
| 719 | 3x |
"QS028DY", |
| 720 | 3x |
"QS028DI", |
| 721 | 3x |
"QS028CO", |
| 722 | 3x |
"QS028EF", |
| 723 | 3x |
"QS028QL2", |
| 724 | 3x |
"QS028AP", |
| 725 | 3x |
"QS028PF2", |
| 726 | 3x |
"QS028PA", |
| 727 | 3x |
"QS028CF", |
| 728 | 3x |
"QS028FI" |
| 729 |
) |
|
| 730 | 3x |
df$newNamelabel <- list( |
| 731 | 3x |
"EORTC QLQ-C30: Social functioning", |
| 732 | 3x |
"EORTC QLQ-C30: Insomnia", |
| 733 | 3x |
"EORTC QLQ-C30: Fatigue", |
| 734 | 3x |
"EORTC QLQ-C30: Role functioning (revised)", |
| 735 | 3x |
"EORTC QLQ-C30: Nausea and vomiting", |
| 736 | 3x |
"EORTC QLQ-C30: Dyspnoea", |
| 737 | 3x |
"EORTC QLQ-C30: Diarrhoea", |
| 738 | 3x |
"EORTC QLQ-C30: Constipation", |
| 739 | 3x |
"EORTC QLQ-C30: Emotional functioning", |
| 740 | 3x |
"EORTC QLQ-C30: Global health status/QoL (revised)", |
| 741 | 3x |
"EORTC QLQ-C30: Appetite loss", |
| 742 | 3x |
"EORTC QLQ-C30: Physical functioning (revised)", |
| 743 | 3x |
"EORTC QLQ-C30: Pain", |
| 744 | 3x |
"EORTC QLQ-C30: Cognitive functioning", |
| 745 | 3x |
"EORTC QLQ-C30: Financial difficulties" |
| 746 |
) |
|
| 747 | 3x |
df$newNameCategory <- list( |
| 748 | 3x |
"Functional Scales", |
| 749 | 3x |
"Symptom Scales", |
| 750 | 3x |
"Symptom Scales", |
| 751 | 3x |
"Functional Scales", |
| 752 | 3x |
"Symptom Scales", |
| 753 | 3x |
"Symptom Scales", |
| 754 | 3x |
"Symptom Scales", |
| 755 | 3x |
"Symptom Scales", |
| 756 | 3x |
"Functional Scales", |
| 757 | 3x |
"Global Health Status", |
| 758 | 3x |
"Symptom Scales", |
| 759 | 3x |
"Functional Scales", |
| 760 | 3x |
"Symptom Scales", |
| 761 | 3x |
"Functional Scales", |
| 762 | 3x |
"Symptom Scales" |
| 763 |
) |
|
| 764 | 3x |
df$num_param <- list( |
| 765 | 3x |
"1", |
| 766 | 3x |
"1", |
| 767 | 3x |
"2", |
| 768 | 3x |
"1", |
| 769 | 3x |
"1", |
| 770 | 3x |
"1", |
| 771 | 3x |
"1", |
| 772 | 3x |
"1", |
| 773 | 3x |
"2", |
| 774 | 3x |
"1", |
| 775 | 3x |
"1", |
| 776 | 3x |
"3", |
| 777 | 3x |
"1", |
| 778 | 3x |
"1", |
| 779 | 3x |
"1" |
| 780 |
) |
|
| 781 | 3x |
df$equation <- list( |
| 782 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
| 783 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
| 784 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
| 785 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
| 786 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
| 787 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
| 788 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
| 789 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
| 790 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
| 791 | 3x |
"new_value = ((temp_val/var_length-1)/6)*100.0", |
| 792 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
| 793 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
| 794 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0", |
| 795 | 3x |
"new_value = (1 - ((temp_val/var_length)-1)/3)*100.0", |
| 796 | 3x |
"new_value = ((temp_val/var_length-1)/3)*100.0" |
| 797 |
) |
|
| 798 | ||
| 799 | 3x |
expect_data <- data.frame( |
| 800 | 3x |
PARAM = expect$PARAM, |
| 801 | 3x |
PARAMCD = expect$PARAMCD, |
| 802 | 3x |
PARCAT2 = expect$PARCAT2, |
| 803 | 3x |
PARCAT1N = expect$PARCAT1N, |
| 804 | 3x |
AVAL = c(0, 1), |
| 805 | 3x |
AVALC = c( |
| 806 | 3x |
"Not expected to complete questionnaire", |
| 807 | 3x |
"Expected to complete questionnaire" |
| 808 |
) |
|
| 809 |
) |
|
| 810 | ||
| 811 | 3x |
df_saved <- data.frame() |
| 812 | ||
| 813 | 3x |
unique_id <- unique(adqlqc1$USUBJID) |
| 814 | ||
| 815 | 3x |
for (id in unique_id) {
|
| 816 | 30x |
id_data <- adqlqc1[adqlqc1$USUBJID == id, ] |
| 817 | 30x |
unique_avisit <- unique(id_data$AVISIT) |
| 818 | 30x |
for (visit in unique_avisit) {
|
| 819 | 180x |
if (is.na(visit)) {
|
| 820 | ! |
next |
| 821 |
} |
|
| 822 | 180x |
id_data_at_visit <- id_data[id_data$AVISIT == visit, ] |
| 823 | ||
| 824 | 180x |
if (any(id_data_at_visit$PARAMCD != "QSALL")) {
|
| 825 | 177x |
for (idx in seq_along(df$index)) {
|
| 826 | 2655x |
previous_names <- df$previous[idx] |
| 827 | 2655x |
current_name <- df$newName[idx] |
| 828 | 2655x |
current_name_label <- df$newNamelabel[idx] |
| 829 | 2655x |
current_name_category <- df$newNameCategory[idx] |
| 830 | 2655x |
eqn <- df$equation[idx] |
| 831 | 2655x |
temp_val <- 0 |
| 832 | 2655x |
var_length <- 0 |
| 833 | 2655x |
for (param_name in previous_names[[1]]) {
|
| 834 | 5310x |
if (param_name %in% id_data_at_visit$PARAMCD) { ####
|
| 835 | 5310x |
current_val <- as.numeric(as.character(id_data_at_visit$AVAL[id_data_at_visit$PARAMCD == param_name])) |
| 836 | 5310x |
if (!is.na(current_val)) {
|
| 837 | 5094x |
temp_val <- temp_val + current_val ### |
| 838 | 5094x |
var_length <- var_length + 1 |
| 839 |
} |
|
| 840 |
} # if |
|
| 841 |
} # param_name |
|
| 842 |
# eval |
|
| 843 | 2655x |
if (var_length >= as.numeric(df$num_param[idx])) {
|
| 844 | 2604x |
eval(parse(text = eqn)) ##### |
| 845 |
} else {
|
|
| 846 | 51x |
new_value <- NA |
| 847 |
} |
|
| 848 | ||
| 849 | 2655x |
new_data_row <- data.frame( |
| 850 | 2655x |
study = str_extract(id, "[A-Z]+[0-9]+"), |
| 851 | 2655x |
id, |
| 852 | 2655x |
visit, |
| 853 | 2655x |
id_data_at_visit$AVISITN[1], |
| 854 | 2655x |
id_data_at_visit$QSDTC[1], |
| 855 | 2655x |
current_name_category, |
| 856 | 2655x |
current_name_label, |
| 857 | 2655x |
current_name, |
| 858 | 2655x |
new_value, |
| 859 | 2655x |
NA, |
| 860 | 2655x |
stringsAsFactors = FALSE |
| 861 |
) |
|
| 862 | 2655x |
colnames(new_data_row) <- c( |
| 863 | 2655x |
"STUDYID", "USUBJID", "AVISIT", "AVISITN", |
| 864 | 2655x |
"ADTM", "PARCAT2", "PARAM", "PARAMCD", |
| 865 | 2655x |
"AVAL", "AVALC" |
| 866 |
) ### |
|
| 867 | 2655x |
df_saved <- rbind(df_saved, new_data_row) ##### |
| 868 |
} # idx |
|
| 869 |
} |
|
| 870 |
# add expect data |
|
| 871 | 180x |
expect_value <- sample(expect_data$AVAL, 1, prob = c(0.10, 0.90)) |
| 872 | 180x |
expect_valuec <- expect_data$AVALC[expect_data$AVAL == expect_value] |
| 873 | ||
| 874 | 180x |
new_data_row <- data.frame( |
| 875 | 180x |
study = str_extract(id, "[A-Z]+[0-9]+"), |
| 876 | 180x |
id, |
| 877 | 180x |
visit, |
| 878 | 180x |
id_data_at_visit$AVISITN[1], |
| 879 | 180x |
datetime = NA, |
| 880 | 180x |
expect_data$PARCAT2[1], |
| 881 | 180x |
expect_data$PARAM[1], |
| 882 | 180x |
expect_data$PARAMCD[1], |
| 883 | 180x |
expect_value, |
| 884 | 180x |
expect_valuec, |
| 885 | 180x |
stringsAsFactors = FALSE |
| 886 |
) |
|
| 887 | 180x |
colnames(new_data_row) <- c( |
| 888 | 180x |
"STUDYID", "USUBJID", "AVISIT", "AVISITN", |
| 889 | 180x |
"ADTM", "PARCAT2", "PARAM", "PARAMCD", "AVAL", |
| 890 | 180x |
"AVALC" |
| 891 |
) ### |
|
| 892 | 180x |
df_saved <- rbind(df_saved, new_data_row) |
| 893 |
} # visit |
|
| 894 |
} # id |
|
| 895 | ||
| 896 | 3x |
df_saved1 <- left_join( |
| 897 | 3x |
df_saved, |
| 898 | 3x |
ghs_scales, |
| 899 | 3x |
by = c( |
| 900 | 3x |
"PARAM", |
| 901 | 3x |
"PARAMCD", |
| 902 | 3x |
"PARCAT2" |
| 903 |
) |
|
| 904 |
) %>% |
|
| 905 | 3x |
mutate( |
| 906 | 3x |
AVALC = ifelse(is.na(AVALC), as.character(AVAL), AVALC), |
| 907 | 3x |
PARCAT1 = ifelse(PARAMCD == "EX028", expect$PARCAT1, PARCAT1), |
| 908 | 3x |
PARCAT1N = ifelse(PARAMCD == "EX028", expect$PARCAT1N, PARCAT1N) |
| 909 |
) |
|
| 910 | ||
| 911 | 3x |
adqlqc_tmp <- bind_rows(adqlqc1, df_saved1) %>% |
| 912 | 3x |
arrange( |
| 913 | 3x |
USUBJID, |
| 914 | 3x |
AVISITN, |
| 915 | 3x |
QSTESTCD |
| 916 |
) |
|
| 917 | 3x |
return(adqlqc_tmp) |
| 918 |
} |
|
| 919 | ||
| 920 |
#' @describeIn h_adqlqc Calculate Change from Baseline Category 1 |
|
| 921 |
#' |
|
| 922 |
#' @param dataset (`data.frame`)\cr ADaM dataset. |
|
| 923 |
#' |
|
| 924 |
#' @return `data.frame` |
|
| 925 |
#' @keywords internal |
|
| 926 |
derv_chgcat1 <- function(dataset) {
|
|
| 927 |
# derivation of CHGCAT1 |
|
| 928 | 3x |
check_vars <- c("PARCAT2", "CHG")
|
| 929 | ||
| 930 | 3x |
if (all(check_vars %in% names(dataset))) {
|
| 931 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 932 | 3x |
dataset$PARCAT2 == "Symptom Scales" & !is.na(dataset$CHG) & dataset$CHG <= -10, |
| 933 | 3x |
"Improved", "" |
| 934 |
) |
|
| 935 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 936 | 3x |
dataset$PARCAT2 == "Symptom Scales" & !is.na(dataset$CHG) & dataset$CHG >= 10, |
| 937 | 3x |
"Worsened", dataset$CHGCAT1 |
| 938 |
) |
|
| 939 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 940 | 3x |
dataset$PARCAT2 == "Symptom Scales" & |
| 941 | 3x |
!is.na(dataset$CHG) & dataset$CHG > -10 & |
| 942 | 3x |
dataset$CHG < 10, |
| 943 | 3x |
"No change", dataset$CHGCAT1 |
| 944 |
) |
|
| 945 | ||
| 946 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 947 | 3x |
dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") &
|
| 948 | 3x |
!is.na(dataset$CHG) & dataset$CHG >= 10, |
| 949 | 3x |
"Improved", dataset$CHGCAT1 |
| 950 |
) |
|
| 951 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 952 | 3x |
dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") &
|
| 953 | 3x |
!is.na(dataset$CHG) & dataset$CHG <= -10, |
| 954 | 3x |
"Worsened", dataset$CHGCAT1 |
| 955 |
) |
|
| 956 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 957 | 3x |
dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") &
|
| 958 | 3x |
!is.na(dataset$CHG) & |
| 959 | 3x |
dataset$CHG > -10 & dataset$CHG < 10, |
| 960 | 3x |
"No change", dataset$CHGCAT1 |
| 961 |
) |
|
| 962 | ||
| 963 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 964 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 6,
|
| 965 | 3x |
"Improved by six levels", dataset$CHGCAT1 |
| 966 |
) |
|
| 967 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 968 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 5,
|
| 969 | 3x |
"Improved by five levels", dataset$CHGCAT1 |
| 970 |
) |
|
| 971 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 972 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 4,
|
| 973 | 3x |
"Improved by four levels", dataset$CHGCAT1 |
| 974 |
) |
|
| 975 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 976 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 3,
|
| 977 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
| 978 |
) |
|
| 979 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 980 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 2,
|
| 981 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
| 982 |
) |
|
| 983 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 984 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 1,
|
| 985 | 3x |
"Improved by one level", dataset$CHGCAT1 |
| 986 |
) |
|
| 987 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 988 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 0,
|
| 989 | 3x |
"No change", dataset$CHGCAT1 |
| 990 |
) |
|
| 991 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 992 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -1,
|
| 993 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
| 994 |
) |
|
| 995 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 996 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -2,
|
| 997 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
| 998 |
) |
|
| 999 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1000 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -3,
|
| 1001 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
| 1002 |
) |
|
| 1003 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1004 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -4,
|
| 1005 | 3x |
"Worsened by four levels", dataset$CHGCAT1 |
| 1006 |
) |
|
| 1007 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1008 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -5,
|
| 1009 | 3x |
"Worsened by five levels", dataset$CHGCAT1 |
| 1010 |
) |
|
| 1011 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1012 | 3x |
dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -6,
|
| 1013 | 3x |
"Worsened by six levels", dataset$CHGCAT1 |
| 1014 |
) |
|
| 1015 | ||
| 1016 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1017 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -3,
|
| 1018 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
| 1019 |
) |
|
| 1020 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1021 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -2,
|
| 1022 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
| 1023 |
) |
|
| 1024 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1025 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -1,
|
| 1026 | 3x |
"Improved by one level", dataset$CHGCAT1 |
| 1027 |
) |
|
| 1028 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1029 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 0,
|
| 1030 | 3x |
"No change", dataset$CHGCAT1 |
| 1031 |
) |
|
| 1032 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1033 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 1,
|
| 1034 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
| 1035 |
) |
|
| 1036 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1037 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 2,
|
| 1038 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
| 1039 |
) |
|
| 1040 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1041 | 3x |
dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 3,
|
| 1042 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
| 1043 |
) |
|
| 1044 | ||
| 1045 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1046 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == -3, |
| 1047 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
| 1048 |
) |
|
| 1049 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1050 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == -2, |
| 1051 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
| 1052 |
) |
|
| 1053 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1054 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == -1, |
| 1055 | 3x |
"Improved by one level", dataset$CHGCAT1 |
| 1056 |
) |
|
| 1057 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1058 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 0, |
| 1059 | 3x |
"No changed", dataset$CHGCAT1 |
| 1060 |
) |
|
| 1061 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1062 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 1, |
| 1063 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
| 1064 |
) |
|
| 1065 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1066 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 2, |
| 1067 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
| 1068 |
) |
|
| 1069 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1070 | 3x |
dataset$PARAMCD == "QS02801" & dataset$CHG == 3, |
| 1071 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
| 1072 |
) |
|
| 1073 | ||
| 1074 | 3x |
paramcd_vec <- c( |
| 1075 | 3x |
"QS02803", "QS02804", "QS02805", "QS02807", "QS02808", "QS02809", "QS02810", |
| 1076 | 3x |
"QS02811", "QS02812", "QS02813", "QS02814", "QS02815", "QS02816", "QS02817", |
| 1077 | 3x |
"QS02818", "QS02819", "QS02820", "QS02821", "QS02822", "QS02823", "QS02824", |
| 1078 | 3x |
"QS02825", "QS02826", "QS02827", "QS02828" |
| 1079 |
) |
|
| 1080 | ||
| 1081 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1082 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -3, |
| 1083 | 3x |
"Improved by three levels", dataset$CHGCAT1 |
| 1084 |
) |
|
| 1085 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1086 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -2, |
| 1087 | 3x |
"Improved by two levels", dataset$CHGCAT1 |
| 1088 |
) |
|
| 1089 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1090 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -1, |
| 1091 | 3x |
"Improved by one level", dataset$CHGCAT1 |
| 1092 |
) |
|
| 1093 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1094 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 0, |
| 1095 | 3x |
"No change", dataset$CHGCAT1 |
| 1096 |
) |
|
| 1097 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1098 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 1, |
| 1099 | 3x |
"Worsened by one level", dataset$CHGCAT1 |
| 1100 |
) |
|
| 1101 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1102 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 2, |
| 1103 | 3x |
"Worsened by two levels", dataset$CHGCAT1 |
| 1104 |
) |
|
| 1105 | 3x |
dataset$CHGCAT1 <- ifelse( |
| 1106 | 3x |
dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 3, |
| 1107 | 3x |
"Worsened by three levels", dataset$CHGCAT1 |
| 1108 |
) |
|
| 1109 | ||
| 1110 | 3x |
return(dataset) |
| 1111 |
} else {
|
|
| 1112 | ! |
collapse_vars <- paste(check_vars, collapse = ", ") |
| 1113 | ! |
stop(sprintf( |
| 1114 | ! |
"%s: one or both variables is/are missing, needed for derivation", |
| 1115 | ! |
collapse_vars |
| 1116 |
)) |
|
| 1117 |
} |
|
| 1118 |
} |
|
| 1119 | ||
| 1120 |
#' @describeIn h_adqlqc Completion/Compliance Data Calculation |
|
| 1121 |
#' |
|
| 1122 |
#' @param dataset (`data.frame`)\cr Dataset. |
|
| 1123 |
#' |
|
| 1124 |
#' @return `data.frame` |
|
| 1125 |
#' @keywords internal |
|
| 1126 |
comp_derv <- function(dataset, percent, number) {
|
|
| 1127 |
# original items data |
|
| 1128 | 3x |
orig_data <- filter( |
| 1129 | 3x |
dataset, |
| 1130 | 3x |
PARCAT2 == "Original Items" |
| 1131 |
) |
|
| 1132 |
# total number of questionnaires |
|
| 1133 | 3x |
comp_count_all <- select( |
| 1134 | 3x |
orig_data, |
| 1135 | 3x |
PARAMCD |
| 1136 |
) %>% |
|
| 1137 | 3x |
distinct() %>% |
| 1138 | 3x |
count() |
| 1139 | 3x |
comp_count_all <- comp_count_all$n |
| 1140 |
# original items data count of questions answered |
|
| 1141 | 3x |
orig_data_summ <- group_by( |
| 1142 | 3x |
orig_data, |
| 1143 | 3x |
STUDYID, |
| 1144 | 3x |
USUBJID, |
| 1145 | 3x |
PARCAT1, |
| 1146 | 3x |
AVISIT, |
| 1147 | 3x |
AVISITN, |
| 1148 | 3x |
ADTM, |
| 1149 | 3x |
ADY |
| 1150 |
) %>% |
|
| 1151 | 3x |
summarise( |
| 1152 | 3x |
comp_count = sum(!is.na(AVAL)), |
| 1153 | 3x |
comp_count_all = comp_count_all, |
| 1154 | 3x |
.groups = "drop" |
| 1155 |
) %>% |
|
| 1156 | 3x |
mutate( |
| 1157 | 3x |
per_comp = trunc((comp_count / comp_count_all) * 100) |
| 1158 |
) |
|
| 1159 |
# expected data |
|
| 1160 | 3x |
ex028_data <- filter( |
| 1161 | 3x |
dataset, |
| 1162 | 3x |
PARAMCD == "EX028", |
| 1163 | 3x |
AVAL == 1 |
| 1164 |
) %>% |
|
| 1165 | 3x |
select( |
| 1166 | 3x |
STUDYID, |
| 1167 | 3x |
USUBJID, |
| 1168 | 3x |
PARCAT1, |
| 1169 | 3x |
AVISIT, |
| 1170 | 3x |
AVISITN, |
| 1171 | 3x |
ADTM, |
| 1172 | 3x |
ADY, |
| 1173 | 3x |
AVAL_ex028 = AVAL |
| 1174 |
) %>% |
|
| 1175 | 3x |
mutate( |
| 1176 | 3x |
comp_count_all = comp_count_all |
| 1177 |
) |
|
| 1178 | ||
| 1179 | 3x |
joined <- left_join( |
| 1180 | 3x |
ex028_data, |
| 1181 | 3x |
orig_data_summ, |
| 1182 | 3x |
by = c( |
| 1183 | 3x |
"STUDYID", |
| 1184 | 3x |
"USUBJID", |
| 1185 | 3x |
"PARCAT1", |
| 1186 | 3x |
"AVISIT", |
| 1187 | 3x |
"AVISITN", |
| 1188 | 3x |
"comp_count_all" |
| 1189 |
) |
|
| 1190 |
) %>% |
|
| 1191 | 3x |
select(-c("ADTM.x", "ADY.x"))
|
| 1192 | ||
| 1193 | 3x |
joined <- rename( |
| 1194 | 3x |
joined, |
| 1195 | 3x |
ADTM = ADTM.y, |
| 1196 | 3x |
ADY = ADY.y |
| 1197 |
) |
|
| 1198 |
# CO028ALL |
|
| 1199 | 3x |
co028all <- mutate( |
| 1200 | 3x |
joined, |
| 1201 | 3x |
PARAMCD = "CO028ALL", |
| 1202 | 3x |
PARAM = "EORTC QLQ-C30: Completion - Completed all questions", |
| 1203 | 3x |
PARCAT2 = "Completion", |
| 1204 | 3x |
AVAL = case_when( |
| 1205 | 3x |
AVAL_ex028 == 1 & comp_count == comp_count_all ~ 1, |
| 1206 | 3x |
AVAL_ex028 == 1 & (is.na(comp_count) | comp_count < comp_count_all) ~ 0 |
| 1207 |
), |
|
| 1208 | 3x |
AVALC = case_when( |
| 1209 | 3x |
AVAL == 1 ~ "Completed all questions", |
| 1210 | 3x |
AVAL == 0 ~ "Did not complete all questions" |
| 1211 |
) |
|
| 1212 |
) |
|
| 1213 |
# CO028<y>P |
|
| 1214 | 3x |
co028p <- mutate( |
| 1215 | 3x |
joined, |
| 1216 | 3x |
PARAMCD = paste0("CO028", as.character(percent), "P"),
|
| 1217 | 3x |
PARAM = sprintf( |
| 1218 | 3x |
"EORTC QLQ-C30: Completion - Completed at least %s%% of questions", |
| 1219 | 3x |
as.character(percent) |
| 1220 |
), |
|
| 1221 | 3x |
PARCAT2 = "Completion", |
| 1222 | 3x |
AVAL = case_when( |
| 1223 | 3x |
AVAL_ex028 == 1 & per_comp >= percent ~ 1, |
| 1224 | 3x |
AVAL_ex028 == 1 & (is.na(per_comp) | per_comp < percent) ~ 0 |
| 1225 |
), |
|
| 1226 | 3x |
AVALC = case_when( |
| 1227 | 3x |
AVAL == 1 ~ sprintf( |
| 1228 | 3x |
"Completed at least %s%% of questions", |
| 1229 | 3x |
as.character(percent) |
| 1230 |
), |
|
| 1231 | 3x |
AVAL == 0 ~ sprintf( |
| 1232 | 3x |
"Did not complete at least %s%% of questions", |
| 1233 | 3x |
as.character(percent) |
| 1234 |
) |
|
| 1235 |
) |
|
| 1236 |
) |
|
| 1237 |
# CO028<x>Q |
|
| 1238 | 3x |
co028q <- mutate( |
| 1239 | 3x |
joined, |
| 1240 | 3x |
PARAMCD = paste0("CO028", as.character(number), "Q"),
|
| 1241 | 3x |
PARAM = sprintf( |
| 1242 | 3x |
"EORTC QLQ-C30: Completion - Completed at least %s question(s)", |
| 1243 | 3x |
as.character(number) |
| 1244 |
), |
|
| 1245 | 3x |
PARCAT2 = "Completion", |
| 1246 | 3x |
AVAL = case_when( |
| 1247 | 3x |
AVAL_ex028 == 1 & comp_count >= number ~ 1, |
| 1248 | 3x |
AVAL_ex028 == 1 & (comp_count < number | is.na(comp_count)) ~ 0 |
| 1249 |
), |
|
| 1250 | 3x |
AVALC = case_when( |
| 1251 | 3x |
AVAL == 1 ~ sprintf( |
| 1252 | 3x |
"Completed at least %s questions", |
| 1253 | 3x |
as.character(number) |
| 1254 |
), |
|
| 1255 | 3x |
AVAL == 0 ~ sprintf( |
| 1256 | 3x |
"Did not complete at least %s question(s)", |
| 1257 | 3x |
as.character(number) |
| 1258 |
) |
|
| 1259 |
) |
|
| 1260 |
) |
|
| 1261 | ||
| 1262 | 3x |
co028_bind <- rbind( |
| 1263 | 3x |
co028all, |
| 1264 | 3x |
co028p, |
| 1265 | 3x |
co028q |
| 1266 |
) %>% |
|
| 1267 | 3x |
select( |
| 1268 | 3x |
-c("AVAL_ex028", "comp_count", "comp_count_all", "per_comp")
|
| 1269 |
) |
|
| 1270 | 3x |
return(co028_bind) |
| 1271 |
} |
| 1 |
#' Exposure Analysis Dataset (ADEX) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating random Exposure Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `EXSEQ`, `PARAMCD`, `PARCAT1`, `ASTDTM`, `AENDTM`, `ASTDY`, `AENDY`, |
|
| 11 |
#' `AVISITN`, `EXDOSFRQ`, `EXROUTE`, `VISIT`, `VISITDY`, `EXSTDTC`, `EXENDTC`, `EXSTDY`, `EXENDY` |
|
| 12 |
#' |
|
| 13 |
#' @inheritParams argument_convention |
|
| 14 |
#' @param parcat1 (`character vector`)\cr Dose amount categories. Defaults to "Individual" and "Overall". |
|
| 15 |
#' @param parcat2 (`character vector`)\cr Types of drug received. Defaults to "Drug A" and "Drug B". |
|
| 16 |
#' @param max_n_exs (`integer`)\cr Maximum number of exposures per patient. Defaults to 6. |
|
| 17 |
#' @template param_cached |
|
| 18 |
#' @templateVar data adex |
|
| 19 |
#' |
|
| 20 |
#' @return `data.frame` |
|
| 21 |
#' @export |
|
| 22 |
#' |
|
| 23 |
#' @examples |
|
| 24 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
| 25 |
#' |
|
| 26 |
#' adex <- radex(adsl, seed = 2) |
|
| 27 |
#' adex |
|
| 28 |
radex <- function(adsl, |
|
| 29 |
param = c( |
|
| 30 |
"Dose administered during constant dosing interval", |
|
| 31 |
"Number of doses administered during constant dosing interval", |
|
| 32 |
"Total dose administered", |
|
| 33 |
"Total number of doses administered" |
|
| 34 |
), |
|
| 35 |
paramcd = c("DOSE", "NDOSE", "TDOSE", "TNDOSE"),
|
|
| 36 |
paramu = c("mg", " ", "mg", " "),
|
|
| 37 |
parcat1 = c("INDIVIDUAL", "OVERALL"),
|
|
| 38 |
parcat2 = c("Drug A", "Drug B"),
|
|
| 39 |
visit_format = "WEEK", |
|
| 40 |
n_assessments = 5L, |
|
| 41 |
n_days = 5L, |
|
| 42 |
max_n_exs = 6L, |
|
| 43 |
lookup = NULL, |
|
| 44 |
seed = NULL, |
|
| 45 |
na_percentage = 0, |
|
| 46 |
na_vars = list(AVAL = c(NA, 0.1), AVALU = c(NA), 0.1), |
|
| 47 |
cached = FALSE) {
|
|
| 48 | 4x |
checkmate::assert_flag(cached) |
| 49 | 4x |
if (cached) {
|
| 50 | 1x |
return(get_cached_data("cadex"))
|
| 51 |
} |
|
| 52 | ||
| 53 | 3x |
checkmate::assert_data_frame(adsl) |
| 54 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
| 55 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
| 56 | 3x |
checkmate::assert_character(parcat1, min.len = 1, any.missing = FALSE) |
| 57 | 3x |
checkmate::assert_character(parcat2, min.len = 1, any.missing = FALSE) |
| 58 | 3x |
checkmate::assert_string(visit_format) |
| 59 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
| 60 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
| 61 | 3x |
checkmate::assert_integer(max_n_exs, len = 1, any.missing = FALSE) |
| 62 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 63 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 64 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 65 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 66 | ||
| 67 |
# validate and initialize related variables |
|
| 68 | 3x |
param_init_list <- relvar_init(param, paramcd) |
| 69 | 3x |
unit_init_list <- relvar_init(param, paramu) |
| 70 | ||
| 71 | 3x |
if (!is.null(seed)) {
|
| 72 | 3x |
set.seed(seed) |
| 73 |
} |
|
| 74 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 75 | ||
| 76 | 3x |
adex <- expand.grid( |
| 77 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 78 | 3x |
USUBJID = adsl$USUBJID, |
| 79 | 3x |
PARAM = c( |
| 80 | 3x |
rep( |
| 81 | 3x |
param_init_list$relvar1[1], |
| 82 | 3x |
length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days))) |
| 83 |
), |
|
| 84 | 3x |
rep( |
| 85 | 3x |
param_init_list$relvar1[2], |
| 86 | 3x |
length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days))) |
| 87 |
), |
|
| 88 | 3x |
param_init_list$relvar1[3:4] |
| 89 |
), |
|
| 90 | 3x |
stringsAsFactors = FALSE |
| 91 |
) |
|
| 92 | ||
| 93 |
# assign related variable values: PARAMxPARAMCD are related |
|
| 94 | 3x |
adex <- adex %>% rel_var( |
| 95 | 3x |
var_name = "PARAMCD", |
| 96 | 3x |
related_var = "PARAM", |
| 97 | 3x |
var_values = param_init_list$relvar2 |
| 98 |
) |
|
| 99 | ||
| 100 |
# assign related variable values: AVALUxPARAM are related |
|
| 101 | 3x |
adex <- adex %>% rel_var( |
| 102 | 3x |
var_name = "AVALU", |
| 103 | 3x |
related_var = "PARAM", |
| 104 | 3x |
var_values = unit_init_list$relvar2 |
| 105 |
) |
|
| 106 | ||
| 107 | 3x |
adex <- adex %>% |
| 108 | 3x |
dplyr::group_by(USUBJID) %>% |
| 109 | 3x |
dplyr::mutate(PARCAT_ind = sample(c(1, 2), size = 1)) %>% |
| 110 | 3x |
dplyr::mutate(PARCAT2 = ifelse(PARCAT_ind == 1, parcat2[1], parcat2[2])) %>% |
| 111 | 3x |
dplyr::select(-"PARCAT_ind") |
| 112 | ||
| 113 |
# Add in PARCAT1 |
|
| 114 | 3x |
adex <- adex %>% dplyr::mutate(PARCAT1 = dplyr::case_when( |
| 115 | 3x |
(PARAMCD == "TNDOSE" | PARAMCD == "TDOSE") ~ "OVERALL", |
| 116 | 3x |
PARAMCD == "DOSE" | PARAMCD == "NDOSE" ~ "INDIVIDUAL" |
| 117 |
)) |
|
| 118 | ||
| 119 | 3x |
adex_visit <- adex %>% |
| 120 | 3x |
dplyr::filter(PARAMCD == "DOSE" | PARAMCD == "NDOSE") %>% |
| 121 | 3x |
dplyr::mutate( |
| 122 | 3x |
AVISIT = rep(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), 2) |
| 123 |
) |
|
| 124 | ||
| 125 | 3x |
adex <- dplyr::left_join( |
| 126 | 3x |
adex %>% |
| 127 | 3x |
dplyr::group_by( |
| 128 | 3x |
USUBJID, |
| 129 | 3x |
STUDYID, |
| 130 | 3x |
PARAM, |
| 131 | 3x |
PARAMCD, |
| 132 | 3x |
AVALU, |
| 133 | 3x |
PARCAT1, |
| 134 | 3x |
PARCAT2 |
| 135 |
) %>% |
|
| 136 | 3x |
dplyr::mutate(id = dplyr::row_number()), |
| 137 | 3x |
adex_visit %>% |
| 138 | 3x |
dplyr::group_by( |
| 139 | 3x |
USUBJID, |
| 140 | 3x |
STUDYID, |
| 141 | 3x |
PARAM, |
| 142 | 3x |
PARAMCD, |
| 143 | 3x |
AVALU, |
| 144 | 3x |
PARCAT1, |
| 145 | 3x |
PARCAT2 |
| 146 |
) %>% |
|
| 147 | 3x |
dplyr::mutate(id = dplyr::row_number()), |
| 148 | 3x |
by = c("USUBJID", "STUDYID", "PARCAT1", "PARCAT2", "id", "PARAMCD", "PARAM", "AVALU")
|
| 149 |
) %>% |
|
| 150 | 3x |
dplyr::select(-"id") |
| 151 | ||
| 152 |
# Visit numbers |
|
| 153 | 3x |
adex <- adex %>% dplyr::mutate(AVISITN = dplyr::case_when( |
| 154 | 3x |
AVISIT == "SCREENING" ~ -1, |
| 155 | 3x |
AVISIT == "BASELINE" ~ 0, |
| 156 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
|
| 157 | 3x |
TRUE ~ 999000 |
| 158 |
)) |
|
| 159 | ||
| 160 | ||
| 161 | 3x |
adex2 <- split(adex, adex$USUBJID) %>% |
| 162 | 3x |
lapply(function(pinfo) {
|
| 163 | 30x |
pinfo %>% |
| 164 | 30x |
dplyr::filter(PARAMCD == "DOSE") %>% |
| 165 | 30x |
dplyr::group_by(USUBJID, PARCAT2, AVISIT) %>% |
| 166 | 30x |
dplyr::mutate(changeind = dplyr::case_when( |
| 167 | 30x |
AVISIT == "SCREENING" ~ 0, |
| 168 | 30x |
AVISIT != "SCREENING" ~ sample(c(-1, 0, 1), |
| 169 | 30x |
size = 1, |
| 170 | 30x |
prob = c(0.25, 0.5, 0.25), |
| 171 | 30x |
replace = TRUE |
| 172 |
) |
|
| 173 |
)) %>% |
|
| 174 | 30x |
dplyr::ungroup() %>% |
| 175 | 30x |
dplyr::group_by(USUBJID, PARCAT2) %>% |
| 176 | 30x |
dplyr::mutate( |
| 177 | 30x |
csum = cumsum(changeind), |
| 178 | 30x |
changeind = dplyr::case_when( |
| 179 | 30x |
csum <= -3 ~ sample(c(0, 1), size = 1, prob = c(0.5, 0.5)), |
| 180 | 30x |
csum >= 3 ~ sample(c(0, -1), size = 1, prob = c(0.5, 0.5)), |
| 181 | 30x |
TRUE ~ changeind |
| 182 |
) |
|
| 183 |
) %>% |
|
| 184 | 30x |
dplyr::mutate(csum = cumsum(changeind)) %>% |
| 185 | 30x |
dplyr::ungroup() %>% |
| 186 | 30x |
dplyr::group_by(USUBJID, PARCAT2, AVISIT) %>% |
| 187 | 30x |
dplyr::mutate(AVAL = dplyr::case_when( |
| 188 | 30x |
csum == -2 ~ 480, |
| 189 | 30x |
csum == -1 ~ 720, |
| 190 | 30x |
csum == 0 ~ 960, |
| 191 | 30x |
csum == 1 ~ 1200, |
| 192 | 30x |
csum == 2 ~ 1440 |
| 193 |
)) %>% |
|
| 194 | 30x |
dplyr::select(-c("csum", "changeind")) %>%
|
| 195 | 30x |
dplyr::ungroup() |
| 196 |
}) %>% |
|
| 197 | 3x |
Reduce(rbind, .) |
| 198 | ||
| 199 | 3x |
adex_tmp <- dplyr::full_join(adex2, adex, by = names(adex)) |
| 200 | 3x |
adex <- adex_tmp %>% |
| 201 | 3x |
dplyr::group_by(USUBJID) %>% |
| 202 | 3x |
dplyr::mutate(AVAL = ifelse(PARAMCD == "NDOSE", 1, AVAL)) %>% |
| 203 | 3x |
dplyr::mutate(AVAL = ifelse( |
| 204 | 3x |
PARAMCD == "TNDOSE", |
| 205 | 3x |
sum(AVAL[PARAMCD == "NDOSE"]), |
| 206 | 3x |
AVAL |
| 207 |
)) %>% |
|
| 208 | 3x |
dplyr::ungroup() %>% |
| 209 | 3x |
dplyr::group_by(USUBJID, STUDYID, PARCAT2) %>% |
| 210 | 3x |
dplyr::mutate(AVAL = ifelse( |
| 211 | 3x |
PARAMCD == "TDOSE", |
| 212 | 3x |
sum(AVAL[PARAMCD == "DOSE"]), |
| 213 | 3x |
AVAL |
| 214 |
)) |
|
| 215 | ||
| 216 | 3x |
adex <- rcd_var_relabel( |
| 217 | 3x |
adex, |
| 218 | 3x |
STUDYID = "Study Identifier", |
| 219 | 3x |
USUBJID = "Unique Subject Identifier" |
| 220 |
) |
|
| 221 | ||
| 222 |
# merge ADSL to be able to add ADEX date and study day variables |
|
| 223 | 3x |
adex <- dplyr::inner_join(adex, adsl, by = c("STUDYID", "USUBJID")) %>%
|
| 224 | 3x |
dplyr::rowwise() %>% |
| 225 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
| 226 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
| 227 | 3x |
TRUE ~ TRTEDTM |
| 228 |
))) %>% |
|
| 229 | 3x |
dplyr::mutate(ASTDTM = sample( |
| 230 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
| 231 | 3x |
size = 1 |
| 232 |
)) %>% |
|
| 233 |
# add 1 to end of range incase both values passed to sample() are the same |
|
| 234 | 3x |
dplyr::mutate(AENDTM = sample( |
| 235 | 3x |
seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"), |
| 236 | 3x |
size = 1 |
| 237 |
)) %>% |
|
| 238 | 3x |
dplyr::select(-TRTENDT) %>% |
| 239 | 3x |
dplyr::ungroup() %>% |
| 240 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM) |
| 241 | ||
| 242 | ||
| 243 | 3x |
adex <- adex %>% |
| 244 | 3x |
dplyr::group_by(USUBJID) %>% |
| 245 | 3x |
dplyr::mutate(EXSEQ = seq_len(dplyr::n())) %>% |
| 246 | 3x |
dplyr::mutate(ASEQ = EXSEQ) %>% |
| 247 | 3x |
dplyr::ungroup() %>% |
| 248 | 3x |
dplyr::arrange( |
| 249 | 3x |
STUDYID, |
| 250 | 3x |
USUBJID, |
| 251 | 3x |
PARAMCD, |
| 252 | 3x |
ASTDTM, |
| 253 | 3x |
AVISITN, |
| 254 | 3x |
EXSEQ |
| 255 |
) |
|
| 256 | ||
| 257 |
# Adding EXDOSFRQ |
|
| 258 | 3x |
adex <- adex %>% |
| 259 | 3x |
dplyr::mutate(EXDOSFRQ = dplyr::case_when( |
| 260 | 3x |
PARCAT1 == "INDIVIDUAL" ~ "ONCE", |
| 261 | 3x |
TRUE ~ "" |
| 262 |
)) |
|
| 263 | ||
| 264 |
# Adding EXROUTE |
|
| 265 | 3x |
adex <- adex %>% |
| 266 | 3x |
dplyr::mutate(EXROUTE = dplyr::case_when( |
| 267 | 3x |
PARCAT1 == "INDIVIDUAL" ~ sample(c("INTRAVENOUS", "SUBCUTANEOUS"),
|
| 268 | 3x |
nrow(adex), |
| 269 | 3x |
replace = TRUE, |
| 270 | 3x |
prob = c(0.9, 0.1) |
| 271 |
), |
|
| 272 | 3x |
TRUE ~ "" |
| 273 |
)) |
|
| 274 | ||
| 275 |
# Fix VISIT according to AVISIT |
|
| 276 | 3x |
adex <- adex %>% |
| 277 | 3x |
dplyr::mutate(VISIT = AVISIT) |
| 278 | ||
| 279 |
# Hack for VISITDY - to fix in ADSL |
|
| 280 | 3x |
visit_levels <- str_extract(levels(adex$VISIT), pattern = "[0-9]+") |
| 281 | 3x |
vl_extracted <- vapply(visit_levels, function(x) as.numeric(x[2]), numeric(1)) |
| 282 | 3x |
vl_extracted <- c(-1, 1, vl_extracted[!is.na(vl_extracted)]) |
| 283 | ||
| 284 |
# Adding VISITDY |
|
| 285 | 3x |
adex <- adex %>% |
| 286 | 3x |
dplyr::mutate(VISITDY = as.numeric(as.character(factor(VISIT, labels = vl_extracted)))) |
| 287 | ||
| 288 |
# Exposure time stamps |
|
| 289 | 3x |
adex <- adex %>% |
| 290 | 3x |
dplyr::mutate( |
| 291 | 3x |
EXSTDTC = TRTSDTM + lubridate::days(VISITDY), |
| 292 | 3x |
EXENDTC = EXSTDTC + lubridate::hours(1), |
| 293 | 3x |
EXSTDY = VISITDY, |
| 294 | 3x |
EXENDY = VISITDY |
| 295 |
) |
|
| 296 | ||
| 297 |
# Correcting last exposure to treatment |
|
| 298 | 3x |
adex <- adex %>% |
| 299 | 3x |
dplyr::group_by(SUBJID) %>% |
| 300 | 3x |
dplyr::mutate(TRTEDTM = lubridate::as_datetime(max(EXENDTC, na.rm = TRUE))) %>% |
| 301 | 3x |
dplyr::ungroup() |
| 302 | ||
| 303 |
# Fixing Date - to add into ADSL |
|
| 304 | 3x |
adex <- adex %>% |
| 305 | 3x |
dplyr::mutate( |
| 306 | 3x |
TRTSDT = lubridate::date(TRTSDTM), |
| 307 | 3x |
TRTEDT = lubridate::date(TRTEDTM) |
| 308 |
) |
|
| 309 | ||
| 310 |
# Fixing analysis time stamps |
|
| 311 | 3x |
adex <- adex %>% |
| 312 | 3x |
dplyr::mutate( |
| 313 | 3x |
ASTDY = EXSTDY, |
| 314 | 3x |
AENDY = EXENDY, |
| 315 | 3x |
ASTDTM = EXSTDTC, |
| 316 | 3x |
AENDTM = EXENDTC |
| 317 |
) |
|
| 318 | ||
| 319 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 320 | ! |
adex <- mutate_na(ds = adex, na_vars = na_vars, na_percentage = na_percentage) |
| 321 |
} |
|
| 322 | ||
| 323 |
# apply metadata |
|
| 324 | 3x |
adex <- apply_metadata(adex, "metadata/ADEX.yml") |
| 325 |
} |
|
| 326 | ||
| 327 |
# Equivalent of stringr::str_extract_all() |
|
| 328 |
str_extract <- function(string, pattern) {
|
|
| 329 | 2850x |
regmatches(string, gregexpr(pattern, string)) |
| 330 |
} |
| 1 |
#' Laboratory Data Analysis Dataset (ADLB) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Laboratory Data Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `LBSEQ`, `ASPID` |
|
| 11 |
# |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param lbcat (`character vector`)\cr LB category values. |
|
| 14 |
#' @param max_n_lbs (`integer`)\cr Maximum number of labs per patient. Defaults to 10. |
|
| 15 |
#' @template param_cached |
|
| 16 |
#' @templateVar data adlb |
|
| 17 |
#' |
|
| 18 |
#' @return `data.frame` |
|
| 19 |
#' @export |
|
| 20 |
#' |
|
| 21 |
#' @author tomlinsj, npaszty, Xuefeng Hou |
|
| 22 |
#' |
|
| 23 |
#' @examples |
|
| 24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 25 |
#' |
|
| 26 |
#' adlb <- radlb(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
| 27 |
#' adlb |
|
| 28 |
#' |
|
| 29 |
#' adlb <- radlb(adsl, visit_format = "CYCLE", n_assessments = 2L, seed = 2) |
|
| 30 |
#' adlb |
|
| 31 |
radlb <- function(adsl, |
|
| 32 |
lbcat = c("CHEMISTRY", "CHEMISTRY", "IMMUNOLOGY"),
|
|
| 33 |
param = c( |
|
| 34 |
"Alanine Aminotransferase Measurement", |
|
| 35 |
"C-Reactive Protein Measurement", |
|
| 36 |
"Immunoglobulin A Measurement" |
|
| 37 |
), |
|
| 38 |
paramcd = c("ALT", "CRP", "IGA"),
|
|
| 39 |
paramu = c("U/L", "mg/L", "g/L"),
|
|
| 40 |
aval_mean = c(18, 9, 2.9), |
|
| 41 |
visit_format = "WEEK", |
|
| 42 |
n_assessments = 5L, |
|
| 43 |
n_days = 5L, |
|
| 44 |
max_n_lbs = 10L, |
|
| 45 |
lookup = NULL, |
|
| 46 |
seed = NULL, |
|
| 47 |
na_percentage = 0, |
|
| 48 |
na_vars = list( |
|
| 49 |
LOQFL = c(NA, 0.1), ABLFL2 = c(1234, 0.1), ABLFL = c(1235, 0.1), |
|
| 50 |
BASE2 = c(NA, 0.1), BASE = c(NA, 0.1), |
|
| 51 |
CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1) |
|
| 52 |
), |
|
| 53 |
cached = FALSE) {
|
|
| 54 | 4x |
checkmate::assert_flag(cached) |
| 55 | 4x |
if (cached) {
|
| 56 | 1x |
return(get_cached_data("cadlb"))
|
| 57 |
} |
|
| 58 | ||
| 59 | 3x |
checkmate::assert_data_frame(adsl) |
| 60 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
| 61 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
| 62 | 3x |
checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE) |
| 63 | 3x |
checkmate::assert_character(lbcat, min.len = 1, any.missing = FALSE) |
| 64 | 3x |
checkmate::assert_string(visit_format) |
| 65 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
| 66 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
| 67 | 3x |
checkmate::assert_integer(max_n_lbs, len = 1, any.missing = FALSE) |
| 68 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 69 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 70 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 71 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 72 | ||
| 73 |
# validate and initialize related variables |
|
| 74 | 3x |
lbcat_init_list <- relvar_init(param, lbcat) |
| 75 | 3x |
param_init_list <- relvar_init(param, paramcd) |
| 76 | 3x |
unit_init_list <- relvar_init(param, paramu) |
| 77 | ||
| 78 | 3x |
if (!is.null(seed)) {
|
| 79 | 3x |
set.seed(seed) |
| 80 |
} |
|
| 81 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 82 | ||
| 83 | 3x |
adlb <- expand.grid( |
| 84 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 85 | 3x |
USUBJID = adsl$USUBJID, |
| 86 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
| 87 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), |
| 88 | 3x |
stringsAsFactors = FALSE |
| 89 |
) |
|
| 90 | ||
| 91 |
# assign AVAL based on different tests |
|
| 92 | 3x |
adlb <- adlb %>% mutate(AVAL = case_when( |
| 93 | 3x |
PARAM == param[1] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[1], sd = 10)), |
| 94 | 3x |
PARAM == param[2] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[2], sd = 1)), |
| 95 | 3x |
PARAM == param[3] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[3], sd = 0.1)) |
| 96 |
)) |
|
| 97 | ||
| 98 |
# assign related variable values: PARAMxLBCAT are related |
|
| 99 | 3x |
adlb <- adlb %>% rel_var( |
| 100 | 3x |
var_name = "LBCAT", |
| 101 | 3x |
related_var = "PARAM", |
| 102 | 3x |
var_values = lbcat_init_list$relvar2 |
| 103 |
) |
|
| 104 | ||
| 105 |
# assign related variable values: PARAMxPARAMCD are related |
|
| 106 | 3x |
adlb <- adlb %>% rel_var( |
| 107 | 3x |
var_name = "PARAMCD", |
| 108 | 3x |
related_var = "PARAM", |
| 109 | 3x |
var_values = param_init_list$relvar2 |
| 110 |
) |
|
| 111 | ||
| 112 | 3x |
adlb <- adlb %>% |
| 113 | 3x |
dplyr::mutate(LBTESTCD = PARAMCD) %>% |
| 114 | 3x |
dplyr::mutate(LBTEST = PARAM) |
| 115 | ||
| 116 | 3x |
adlb <- adlb %>% dplyr::mutate(AVISITN = dplyr::case_when( |
| 117 | 3x |
AVISIT == "SCREENING" ~ -1, |
| 118 | 3x |
AVISIT == "BASELINE" ~ 0, |
| 119 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
|
| 120 | 3x |
TRUE ~ NA_real_ |
| 121 |
)) |
|
| 122 | ||
| 123 | 3x |
adlb <- adlb %>% rel_var( |
| 124 | 3x |
var_name = "AVALU", |
| 125 | 3x |
related_var = "PARAM", |
| 126 | 3x |
var_values = unit_init_list$relvar2 |
| 127 |
) |
|
| 128 | ||
| 129 | 3x |
adlb <- adlb %>% |
| 130 | 3x |
dplyr::mutate(AVISITN = dplyr::case_when( |
| 131 | 3x |
AVISIT == "SCREENING" ~ -1, |
| 132 | 3x |
AVISIT == "BASELINE" ~ 0, |
| 133 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
|
| 134 | 3x |
TRUE ~ NA_real_ |
| 135 |
)) |
|
| 136 | ||
| 137 |
# order to prepare for change from screening and baseline values |
|
| 138 | 3x |
adlb <- adlb[order(adlb$STUDYID, adlb$USUBJID, adlb$PARAMCD, adlb$AVISITN), ] |
| 139 | ||
| 140 | 3x |
adlb <- Reduce(rbind, lapply(split(adlb, adlb$USUBJID), function(x) {
|
| 141 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
| 142 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
| 143 | 30x |
x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
| 144 | 30x |
"Y", |
| 145 | 30x |
ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "") |
| 146 |
) |
|
| 147 | 30x |
x |
| 148 |
})) |
|
| 149 | ||
| 150 | 3x |
adlb$BASE2 <- retain(adlb, adlb$AVAL, adlb$ABLFL2 == "Y") |
| 151 | 3x |
adlb$BASE <- ifelse(adlb$ABLFL2 != "Y", retain(adlb, adlb$AVAL, adlb$ABLFL == "Y"), NA) |
| 152 | ||
| 153 | 3x |
adlb <- adlb %>% |
| 154 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
| 155 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
| 156 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
| 157 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
| 158 | 3x |
dplyr::mutate(BASETYPE = "LAST") %>% |
| 159 | 3x |
dplyr::mutate(ANRLO = dplyr::case_when( |
| 160 | 3x |
PARAMCD == "ALT" ~ 7, |
| 161 | 3x |
PARAMCD == "CRP" ~ 8, |
| 162 | 3x |
PARAMCD == "IGA" ~ 0.8 |
| 163 |
)) %>% |
|
| 164 | 3x |
dplyr::mutate(ANRHI = dplyr::case_when( |
| 165 | 3x |
PARAMCD == "ALT" ~ 55, |
| 166 | 3x |
PARAMCD == "CRP" ~ 10, |
| 167 | 3x |
PARAMCD == "IGA" ~ 3 |
| 168 |
)) %>% |
|
| 169 | 3x |
dplyr::mutate(ANRIND = factor(dplyr::case_when( |
| 170 | 3x |
AVAL < ANRLO ~ "LOW", |
| 171 | 3x |
AVAL > ANRHI ~ "HIGH", |
| 172 | 3x |
TRUE ~ "NORMAL" |
| 173 |
))) %>% |
|
| 174 | 3x |
dplyr::mutate(LBSTRESC = factor(dplyr::case_when( |
| 175 | 3x |
PARAMCD == "ALT" ~ "<7", |
| 176 | 3x |
PARAMCD == "CRP" ~ "<8", |
| 177 | 3x |
PARAMCD == "IGA" ~ ">3" |
| 178 |
))) %>% |
|
| 179 | 3x |
dplyr::rowwise() %>% |
| 180 | 3x |
dplyr::mutate(LOQFL = factor( |
| 181 | 3x |
ifelse(eval(parse(text = paste(AVAL, LBSTRESC))), "Y", "N") |
| 182 |
)) %>% |
|
| 183 | 3x |
dplyr::ungroup() %>% |
| 184 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
| 185 | 3x |
dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>% |
| 186 | 3x |
dplyr::ungroup() %>% |
| 187 | 3x |
dplyr::mutate(SHIFT1 = factor(ifelse( |
| 188 | 3x |
AVISITN > 0, |
| 189 | 3x |
paste( |
| 190 | 3x |
retain( |
| 191 | 3x |
adlb, as.character(BNRIND), |
| 192 | 3x |
AVISITN == 0 |
| 193 |
), |
|
| 194 | 3x |
ANRIND, |
| 195 | 3x |
sep = " to " |
| 196 |
), |
|
| 197 |
"" |
|
| 198 |
))) %>% |
|
| 199 | 3x |
dplyr::mutate(ATOXGR = factor(dplyr::case_when( |
| 200 | 3x |
ANRIND == "LOW" ~ sample( |
| 201 | 3x |
c("-1", "-2", "-3", "-4", "-5"),
|
| 202 | 3x |
nrow(adlb), |
| 203 | 3x |
replace = TRUE, |
| 204 | 3x |
prob = c(0.30, 0.25, 0.20, 0.15, 0) |
| 205 |
), |
|
| 206 | 3x |
ANRIND == "HIGH" ~ sample( |
| 207 | 3x |
c("1", "2", "3", "4", "5"),
|
| 208 | 3x |
nrow(adlb), |
| 209 | 3x |
replace = TRUE, |
| 210 | 3x |
prob = c(0.30, 0.25, 0.20, 0.15, 0) |
| 211 |
), |
|
| 212 | 3x |
ANRIND == "NORMAL" ~ "0" |
| 213 |
))) %>% |
|
| 214 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
| 215 | 3x |
dplyr::mutate(BTOXGR = ATOXGR[ABLFL == "Y"]) %>% |
| 216 | 3x |
dplyr::ungroup() %>% |
| 217 | 3x |
dplyr::mutate(ATPTN = 1) %>% |
| 218 | 3x |
dplyr::mutate(DTYPE = NA) %>% |
| 219 | 3x |
dplyr::mutate(BTOXGRL = factor(dplyr::case_when( |
| 220 | 3x |
BTOXGR == "0" ~ "0", |
| 221 | 3x |
BTOXGR == "-1" ~ "1", |
| 222 | 3x |
BTOXGR == "-2" ~ "2", |
| 223 | 3x |
BTOXGR == "-3" ~ "3", |
| 224 | 3x |
BTOXGR == "-4" ~ "4", |
| 225 | 3x |
BTOXGR == "1" ~ "<Missing>", |
| 226 | 3x |
BTOXGR == "2" ~ "<Missing>", |
| 227 | 3x |
BTOXGR == "3" ~ "<Missing>", |
| 228 | 3x |
BTOXGR == "4" ~ "<Missing>" |
| 229 |
))) %>% |
|
| 230 | 3x |
dplyr::mutate(BTOXGRH = factor(dplyr::case_when( |
| 231 | 3x |
BTOXGR == "0" ~ "0", |
| 232 | 3x |
BTOXGR == "1" ~ "1", |
| 233 | 3x |
BTOXGR == "2" ~ "2", |
| 234 | 3x |
BTOXGR == "3" ~ "3", |
| 235 | 3x |
BTOXGR == "4" ~ "4", |
| 236 | 3x |
BTOXGR == "-1" ~ "<Missing>", |
| 237 | 3x |
BTOXGR == "-2" ~ "<Missing>", |
| 238 | 3x |
BTOXGR == "-3" ~ "<Missing>", |
| 239 | 3x |
BTOXGR == "-4" ~ "<Missing>", |
| 240 |
))) %>% |
|
| 241 | 3x |
dplyr::mutate(ATOXGRL = factor(dplyr::case_when( |
| 242 | 3x |
ATOXGR == "0" ~ "0", |
| 243 | 3x |
ATOXGR == "-1" ~ "1", |
| 244 | 3x |
ATOXGR == "-2" ~ "2", |
| 245 | 3x |
ATOXGR == "-3" ~ "3", |
| 246 | 3x |
ATOXGR == "-4" ~ "4", |
| 247 | 3x |
ATOXGR == "1" ~ "<Missing>", |
| 248 | 3x |
ATOXGR == "2" ~ "<Missing>", |
| 249 | 3x |
ATOXGR == "3" ~ "<Missing>", |
| 250 | 3x |
ATOXGR == "4" ~ "<Missing>", |
| 251 |
))) %>% |
|
| 252 | 3x |
dplyr::mutate(ATOXGRH = factor(dplyr::case_when( |
| 253 | 3x |
ATOXGR == "0" ~ "0", |
| 254 | 3x |
ATOXGR == "1" ~ "1", |
| 255 | 3x |
ATOXGR == "2" ~ "2", |
| 256 | 3x |
ATOXGR == "3" ~ "3", |
| 257 | 3x |
ATOXGR == "4" ~ "4", |
| 258 | 3x |
ATOXGR == "-1" ~ "<Missing>", |
| 259 | 3x |
ATOXGR == "-2" ~ "<Missing>", |
| 260 | 3x |
ATOXGR == "-3" ~ "<Missing>", |
| 261 | 3x |
ATOXGR == "-4" ~ "<Missing>", |
| 262 |
))) %>% |
|
| 263 | 3x |
rcd_var_relabel( |
| 264 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
| 265 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
| 266 |
) |
|
| 267 | ||
| 268 |
# High and low descriptions of the different PARAMCD values |
|
| 269 |
# This is currently hard coded as the GDSR does not have these descriptions yet |
|
| 270 | 3x |
grade_lookup <- tibble::tribble( |
| 271 | 3x |
~PARAMCD, ~ATOXDSCL, ~ATOXDSCH, |
| 272 | 3x |
"ALB", "Hypoalbuminemia", NA_character_, |
| 273 | 3x |
"ALKPH", NA_character_, "Alkaline phosphatase increased", |
| 274 | 3x |
"ALT", NA_character_, "Alanine aminotransferase increased", |
| 275 | 3x |
"AST", NA_character_, "Aspartate aminotransferase increased", |
| 276 | 3x |
"BILI", NA_character_, "Blood bilirubin increased", |
| 277 | 3x |
"CA", "Hypocalcemia", "Hypercalcemia", |
| 278 | 3x |
"CHOLES", NA_character_, "Cholesterol high", |
| 279 | 3x |
"CK", NA_character_, "CPK increased", |
| 280 | 3x |
"CREAT", NA_character_, "Creatinine increased", |
| 281 | 3x |
"CRP", NA_character_, "C reactive protein increased", |
| 282 | 3x |
"GGT", NA_character_, "GGT increased", |
| 283 | 3x |
"GLUC", "Hypoglycemia", "Hyperglycemia", |
| 284 | 3x |
"HGB", "Anemia", "Hemoglobin increased", |
| 285 | 3x |
"IGA", NA_character_, "Immunoglobulin A increased", |
| 286 | 3x |
"POTAS", "Hypokalemia", "Hyperkalemia", |
| 287 | 3x |
"LYMPH", "CD4 lymphocytes decreased", NA_character_, |
| 288 | 3x |
"PHOS", "Hypophosphatemia", NA_character_, |
| 289 | 3x |
"PLAT", "Platelet count decreased", NA_character_, |
| 290 | 3x |
"SODIUM", "Hyponatremia", "Hypernatremia", |
| 291 | 3x |
"WBC", "White blood cell decreased", "Leukocytosis", |
| 292 |
) |
|
| 293 | ||
| 294 |
# merge grade_lookup onto adlb |
|
| 295 | 3x |
adlb <- dplyr::left_join(adlb, grade_lookup, by = "PARAMCD") |
| 296 | ||
| 297 | 3x |
adlb <- rcd_var_relabel( |
| 298 | 3x |
adlb, |
| 299 | 3x |
STUDYID = "Study Identifier", |
| 300 | 3x |
USUBJID = "Unique Subject Identifier" |
| 301 |
) |
|
| 302 | ||
| 303 |
# merge ADSL to be able to add LB date and study day variables |
|
| 304 | 3x |
adlb <- dplyr::inner_join( |
| 305 | 3x |
adlb, |
| 306 | 3x |
adsl, |
| 307 | 3x |
by = c("STUDYID", "USUBJID")
|
| 308 |
) %>% |
|
| 309 | 3x |
dplyr::rowwise() %>% |
| 310 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
| 311 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
| 312 | 3x |
TRUE ~ TRTEDTM |
| 313 |
))) %>% |
|
| 314 | 3x |
dplyr::ungroup() |
| 315 | ||
| 316 | 3x |
adlb <- adlb %>% |
| 317 | 3x |
dplyr::group_by(USUBJID) %>% |
| 318 | 3x |
dplyr::arrange(USUBJID, AVISITN) %>% |
| 319 | 3x |
dplyr::mutate(ADTM = rep( |
| 320 | 3x |
sort(sample( |
| 321 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
| 322 | 3x |
size = nlevels(AVISIT) |
| 323 |
)), |
|
| 324 | 3x |
each = n() / nlevels(AVISIT) |
| 325 |
)) %>% |
|
| 326 | 3x |
dplyr::ungroup() %>% |
| 327 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
| 328 | 3x |
dplyr::select(-TRTENDT) %>% |
| 329 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
| 330 | ||
| 331 | 3x |
adlb <- adlb %>% |
| 332 | 3x |
dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>% |
| 333 | 3x |
dplyr::group_by(USUBJID) %>% |
| 334 | 3x |
dplyr::mutate(LBSEQ = seq_len(dplyr::n())) %>% |
| 335 | 3x |
dplyr::mutate(ASEQ = LBSEQ) %>% |
| 336 | 3x |
dplyr::ungroup() %>% |
| 337 | 3x |
dplyr::arrange( |
| 338 | 3x |
STUDYID, |
| 339 | 3x |
USUBJID, |
| 340 | 3x |
PARAMCD, |
| 341 | 3x |
BASETYPE, |
| 342 | 3x |
AVISITN, |
| 343 | 3x |
ATPTN, |
| 344 | 3x |
DTYPE, |
| 345 | 3x |
ADTM, |
| 346 | 3x |
LBSEQ, |
| 347 | 3x |
ASPID |
| 348 |
) |
|
| 349 | ||
| 350 | 3x |
adlb <- adlb %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when( |
| 351 | 3x |
!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y",
|
| 352 | 3x |
TRUE ~ "" |
| 353 |
))) |
|
| 354 | ||
| 355 | 3x |
flag_variables <- function(data, |
| 356 | 3x |
apply_grouping, |
| 357 | 3x |
apply_filter, |
| 358 | 3x |
apply_mutate) {
|
| 359 | 15x |
data_compare <- data %>% |
| 360 | 15x |
dplyr::mutate(row_check = seq_len(nrow(data))) |
| 361 | ||
| 362 | 15x |
data <- data_compare %>% |
| 363 |
{
|
|
| 364 | 15x |
if (apply_grouping == TRUE) {
|
| 365 | 9x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE, AVISIT) |
| 366 |
} else {
|
|
| 367 | 6x |
dplyr::group_by(., USUBJID, PARAMCD, BASETYPE) |
| 368 |
} |
|
| 369 |
} %>% |
|
| 370 | 15x |
dplyr::arrange(ADTM, ASPID, LBSEQ) %>% |
| 371 |
{
|
|
| 372 | 15x |
if (apply_filter == TRUE) {
|
| 373 | 6x |
dplyr::filter( |
| 374 |
., |
|
| 375 | 6x |
(AVISIT != "BASELINE" & AVISIT != "SCREENING") & |
| 376 | 6x |
(ONTRTFL == "Y" | ADTM <= TRTSDTM) |
| 377 |
) %>% |
|
| 378 | 6x |
dplyr::filter(ATOXGR == max(as.numeric(as.character(ATOXGR)))) |
| 379 | 9x |
} else if (apply_filter == FALSE) {
|
| 380 | 6x |
dplyr::filter( |
| 381 |
., |
|
| 382 | 6x |
(AVISIT != "BASELINE" & AVISIT != "SCREENING") & |
| 383 | 6x |
(ONTRTFL == "Y" | ADTM <= TRTSDTM) |
| 384 |
) %>% |
|
| 385 | 6x |
dplyr::filter(ATOXGR == min(as.numeric(as.character(ATOXGR)))) |
| 386 |
} else {
|
|
| 387 | 3x |
dplyr::filter( |
| 388 |
., |
|
| 389 | 3x |
AVAL == min(AVAL) & |
| 390 | 3x |
(AVISIT != "BASELINE" & AVISIT != "SCREENING") & |
| 391 | 3x |
(ONTRTFL == "Y" | ADTM <= TRTSDTM) |
| 392 |
) |
|
| 393 |
} |
|
| 394 |
} %>% |
|
| 395 | 15x |
dplyr::slice(1) %>% |
| 396 |
{
|
|
| 397 | 15x |
if (apply_mutate == TRUE) {
|
| 398 | 12x |
dplyr::mutate(., new_var = ifelse(is.na(DTYPE), "Y", "")) |
| 399 |
} else {
|
|
| 400 | 3x |
dplyr::mutate(., new_var = ifelse(is.na(AVAL) == FALSE & is.na(DTYPE), "Y", "")) |
| 401 |
} |
|
| 402 |
} %>% |
|
| 403 | 15x |
dplyr::ungroup() |
| 404 | ||
| 405 | 15x |
data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "") |
| 406 | ||
| 407 | 15x |
data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))]
|
| 408 | ||
| 409 | 15x |
return(data_compare) |
| 410 |
} |
|
| 411 | ||
| 412 | 3x |
adlb <- flag_variables(adlb, TRUE, "ELSE", FALSE) %>% dplyr::rename(WORS01FL = "new_var") |
| 413 | 3x |
adlb <- flag_variables(adlb, FALSE, TRUE, TRUE) %>% dplyr::rename(WGRHIFL = "new_var") |
| 414 | 3x |
adlb <- flag_variables(adlb, FALSE, FALSE, TRUE) %>% dplyr::rename(WGRLOFL = "new_var") |
| 415 | 3x |
adlb <- flag_variables(adlb, TRUE, TRUE, TRUE) %>% dplyr::rename(WGRHIVFL = "new_var") |
| 416 | 3x |
adlb <- flag_variables(adlb, TRUE, FALSE, TRUE) %>% dplyr::rename(WGRLOVFL = "new_var") |
| 417 | ||
| 418 | 3x |
adlb <- adlb %>% dplyr::mutate(ANL01FL = ifelse( |
| 419 | 3x |
(ABLFL == "Y" | (WORS01FL == "Y" & is.na(DTYPE))) & |
| 420 | 3x |
(AVISIT != "SCREENING"), |
| 421 | 3x |
"Y", |
| 422 |
"" |
|
| 423 |
)) |
|
| 424 | ||
| 425 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 426 | ! |
adlb <- mutate_na(ds = adlb, na_vars = na_vars, na_percentage = na_percentage) |
| 427 |
} |
|
| 428 | ||
| 429 |
# apply metadata |
|
| 430 | ||
| 431 | 3x |
adlb <- apply_metadata(adlb, "metadata/ADLB.yml") |
| 432 | ||
| 433 | 3x |
return(adlb) |
| 434 |
} |
| 1 |
#' Medical History Analysis Dataset (ADMH) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Medical History Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `MHSEQ` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param max_n_mhs (`integer`)\cr Maximum number of MHs per patient. Defaults to 10. |
|
| 14 |
#' @template param_cached |
|
| 15 |
#' @templateVar data admh |
|
| 16 |
#' |
|
| 17 |
#' @return `data.frame` |
|
| 18 |
#' @export |
|
| 19 |
#' |
|
| 20 |
#' @examples |
|
| 21 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
| 22 |
#' |
|
| 23 |
#' admh <- radmh(adsl, seed = 2) |
|
| 24 |
#' admh |
|
| 25 |
radmh <- function(adsl, |
|
| 26 |
max_n_mhs = 10L, |
|
| 27 |
lookup = NULL, |
|
| 28 |
seed = NULL, |
|
| 29 |
na_percentage = 0, |
|
| 30 |
na_vars = list(MHBODSYS = c(NA, 0.1), MHDECOD = c(1234, 0.1)), |
|
| 31 |
cached = FALSE) {
|
|
| 32 | 4x |
checkmate::assert_flag(cached) |
| 33 | 4x |
if (cached) {
|
| 34 | 1x |
return(get_cached_data("cadmh"))
|
| 35 |
} |
|
| 36 | ||
| 37 | 3x |
checkmate::assert_data_frame(adsl) |
| 38 | 3x |
checkmate::assert_integer(max_n_mhs, len = 1, any.missing = FALSE) |
| 39 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 40 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 41 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 42 | ||
| 43 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 44 | 3x |
lookup_mh <- if (!is.null(lookup)) {
|
| 45 | ! |
lookup |
| 46 |
} else {
|
|
| 47 | 3x |
tibble::tribble( |
| 48 | 3x |
~MHBODSYS, ~MHDECOD, ~MHSOC, |
| 49 | 3x |
"cl A", "trm A_1/2", "cl A", |
| 50 | 3x |
"cl A", "trm A_2/2", "cl A", |
| 51 | 3x |
"cl B", "trm B_1/3", "cl B", |
| 52 | 3x |
"cl B", "trm B_2/3", "cl B", |
| 53 | 3x |
"cl B", "trm B_3/3", "cl B", |
| 54 | 3x |
"cl C", "trm C_1/2", "cl C", |
| 55 | 3x |
"cl C", "trm C_2/2", "cl C", |
| 56 | 3x |
"cl D", "trm D_1/3", "cl D", |
| 57 | 3x |
"cl D", "trm D_2/3", "cl D", |
| 58 | 3x |
"cl D", "trm D_3/3", "cl D" |
| 59 |
) |
|
| 60 |
} |
|
| 61 | ||
| 62 | 3x |
if (!is.null(seed)) {
|
| 63 | 3x |
set.seed(seed) |
| 64 |
} |
|
| 65 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 66 | ||
| 67 | 3x |
admh <- Map( |
| 68 | 3x |
function(id, sid) {
|
| 69 | 30x |
n_mhs <- sample(0:max_n_mhs, 1) |
| 70 | 30x |
i <- sample(seq_len(nrow(lookup_mh)), n_mhs, TRUE) |
| 71 | 30x |
dplyr::mutate( |
| 72 | 30x |
lookup_mh[i, ], |
| 73 | 30x |
USUBJID = id, |
| 74 | 30x |
STUDYID = sid |
| 75 |
) |
|
| 76 |
}, |
|
| 77 | 3x |
adsl$USUBJID, |
| 78 | 3x |
adsl$STUDYID |
| 79 |
) %>% |
|
| 80 | 3x |
Reduce(rbind, .) %>% |
| 81 | 3x |
`[`(c(4, 5, 1, 2, 3)) %>% |
| 82 | 3x |
dplyr::mutate(MHTERM = MHDECOD) |
| 83 | ||
| 84 | 3x |
admh <- rcd_var_relabel( |
| 85 | 3x |
admh, |
| 86 | 3x |
STUDYID = "Study Identifier", |
| 87 | 3x |
USUBJID = "Unique Subject Identifier" |
| 88 |
) |
|
| 89 | ||
| 90 |
# merge ADSL to be able to add MH date and study day variables |
|
| 91 | 3x |
admh <- dplyr::inner_join( |
| 92 | 3x |
admh, |
| 93 | 3x |
adsl, |
| 94 | 3x |
by = c("STUDYID", "USUBJID")
|
| 95 |
) %>% |
|
| 96 | 3x |
dplyr::rowwise() %>% |
| 97 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
| 98 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
| 99 | 3x |
TRUE ~ TRTEDTM |
| 100 |
))) %>% |
|
| 101 | 3x |
dplyr::mutate(ASTDTM = sample( |
| 102 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
| 103 | 3x |
size = 1 |
| 104 |
)) %>% |
|
| 105 | 3x |
dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>% |
| 106 |
# add 1 to end of range incase both values passed to sample() are the same |
|
| 107 | 3x |
dplyr::mutate(AENDTM = sample( |
| 108 | 3x |
seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"), |
| 109 | 3x |
size = 1 |
| 110 |
)) %>% |
|
| 111 | 3x |
dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>% |
| 112 | 3x |
select(-TRTENDT) %>% |
| 113 | 3x |
dplyr::ungroup() %>% |
| 114 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM, MHTERM) %>% |
| 115 | 3x |
dplyr::mutate(MHDISTAT = sample( |
| 116 | 3x |
x = c("Resolved", "Ongoing with treatment", "Ongoing without treatment"),
|
| 117 | 3x |
prob = c(0.6, 0.2, 0.2), |
| 118 | 3x |
size = dplyr::n(), |
| 119 | 3x |
replace = TRUE |
| 120 |
)) %>% |
|
| 121 | 3x |
dplyr::mutate(ATIREL = dplyr::case_when( |
| 122 | 3x |
(AENDTM < TRTSDTM | (is.na(AENDTM) & MHDISTAT == "Resolved")) ~ "PRIOR", |
| 123 | 3x |
(AENDTM >= TRTSDTM | (is.na(AENDTM) & grepl("Ongoing", MHDISTAT))) ~ "PRIOR_CONCOMITANT"
|
| 124 |
)) |
|
| 125 | ||
| 126 | 3x |
admh <- admh %>% |
| 127 | 3x |
dplyr::group_by(USUBJID) %>% |
| 128 | 3x |
dplyr::mutate(MHSEQ = seq_len(dplyr::n())) %>% |
| 129 | 3x |
dplyr::mutate(ASEQ = MHSEQ) %>% |
| 130 | 3x |
dplyr::ungroup() %>% |
| 131 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM, MHSEQ) |
| 132 | ||
| 133 | 3x |
if (length(na_vars) > 0 && na_percentage > 0 && na_percentage <= 1) {
|
| 134 | ! |
admh <- mutate_na(ds = admh, na_vars = na_vars, na_percentage = na_percentage) |
| 135 |
} |
|
| 136 | ||
| 137 |
# apply metadata |
|
| 138 | 3x |
admh <- apply_metadata(admh, "metadata/ADMH.yml") |
| 139 | ||
| 140 | 3x |
return(admh) |
| 141 |
} |
| 1 |
#' Protocol Deviations Analysis Dataset (ADDV) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating random Protocol Deviations Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `ASTDT`, `DVTERM`, `DVSEQ` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param max_n_dv (`integer`)\cr Maximum number of deviations per patient. Defaults to 3. |
|
| 14 |
#' @param p_dv (`proportion`)\cr Probability of a patient having protocol deviations. |
|
| 15 |
#' @template param_cached |
|
| 16 |
#' @templateVar data addv |
|
| 17 |
#' |
|
| 18 |
#' @return `data.frame` |
|
| 19 |
#' @export |
|
| 20 |
#' |
|
| 21 |
#' @examples |
|
| 22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 23 |
#' |
|
| 24 |
#' addv <- raddv(adsl, seed = 2) |
|
| 25 |
#' addv |
|
| 26 |
raddv <- function(adsl, |
|
| 27 |
max_n_dv = 3L, |
|
| 28 |
p_dv = 0.15, |
|
| 29 |
lookup = NULL, |
|
| 30 |
seed = NULL, |
|
| 31 |
na_percentage = 0, |
|
| 32 |
na_vars = list( |
|
| 33 |
"ASTDT" = c(seed = 1234, percentage = 0.1), |
|
| 34 |
"DVCAT" = c(seed = 1234, percentage = 0.1) |
|
| 35 |
), |
|
| 36 |
cached = FALSE) {
|
|
| 37 | 4x |
checkmate::assert_flag(cached) |
| 38 | 4x |
if (cached) {
|
| 39 | 1x |
return(get_cached_data("caddv"))
|
| 40 |
} |
|
| 41 | ||
| 42 | 3x |
checkmate::assert_data_frame(adsl) |
| 43 | 3x |
checkmate::assert_integer(max_n_dv, len = 1, lower = 1, any.missing = FALSE) |
| 44 | 3x |
checkmate::assert_number(p_dv, lower = .Machine$double.xmin, upper = 1) |
| 45 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 46 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 47 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 48 | ||
| 49 | 3x |
if (!is.null(seed)) set.seed(seed) |
| 50 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 51 | ||
| 52 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 53 | 3x |
lookup_dv <- if (!is.null(lookup)) {
|
| 54 | ! |
lookup |
| 55 |
} else {
|
|
| 56 | 3x |
tibble::tribble( |
| 57 | 3x |
~DOMAIN, ~DVCAT, ~DVDECOD, ~DVTERM, ~DVREAS, ~DVEPRELI, |
| 58 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Received prior prohibited therapy or medication", "", "N", |
| 59 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Active or untreated or other excluded cns metastases", "", "N", |
| 60 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "History of other malignancies within the last 5 years", "", "N", |
| 61 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Uncontrolled concurrent condition", "", "N", |
| 62 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Other exclusion criteria", "", "N", |
| 63 | 3x |
"DV", "MAJOR", "EXCLUSION CRITERIA", "Pregnancy criteria", "", "N", |
| 64 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Does not meet prior therapy requirements", "", "N", |
| 65 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Inclusion lab values outside allowed limits", "", "N", |
| 66 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "No signed ICF at study entry", "", "N", |
| 67 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Inclusion-related test not done/out of window", "", "N", |
| 68 | 3x |
"DV", "MAJOR", "INCLUSION CRITERIA", "Ineligible cancer type or current cancer stage", "", "N", |
| 69 | 3x |
"DV", "MAJOR", "MEDICATION", "Dose missed or significantly out of window", |
| 70 | 3x |
"Site action due to epidemic/pandemic", "Y", |
| 71 | 3x |
"DV", "MAJOR", "MEDICATION", "Received incorrect study medication", "", "N", |
| 72 | 3x |
"DV", "MAJOR", "MEDICATION", "Received prohibited concomitant medication", "", "N", |
| 73 | 3x |
"DV", "MAJOR", "MEDICATION", "Discontinued study drug for unspecified reason", "", "N", |
| 74 | 3x |
"DV", "MAJOR", "MEDICATION", "Significant deviation from planned dose", |
| 75 | 3x |
"Site action due to epidemic/pandemic", "Y", |
| 76 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Missed assessment affecting safety/study outcomes", "", "N", |
| 77 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Eligibility-related test not done/out of window", "", "N", |
| 78 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Failure to sign updated ICF within two visits", |
| 79 | 3x |
"Site action due to epidemic/pandemic", "Y", |
| 80 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Omission of complete lab panel required by protocol", "", "N", |
| 81 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Omission of screening tumor assessment", "", "N", |
| 82 | 3x |
"DV", "MAJOR", "PROCEDURAL", "Missed 2 or more efficacy assessments", |
| 83 | 3x |
"Site action due to epidemic/pandemic", "Y" |
| 84 |
) |
|
| 85 |
} |
|
| 86 | ||
| 87 | ||
| 88 | 3x |
addv <- Map( |
| 89 | 3x |
function(id, sid) {
|
| 90 | 30x |
n_dv <- stats::rbinom(1, 1, p_dv) * sample(c(1, seq_len(max_n_dv)), 1) |
| 91 | 30x |
i <- sample(seq_len(nrow(lookup_dv)), n_dv, TRUE) |
| 92 | 30x |
dplyr::mutate( |
| 93 | 30x |
lookup_dv[i, ], |
| 94 | 30x |
USUBJID = id, |
| 95 | 30x |
STUDYID = sid |
| 96 |
) |
|
| 97 |
}, |
|
| 98 | 3x |
adsl$USUBJID, |
| 99 | 3x |
adsl$STUDYID |
| 100 |
) %>% |
|
| 101 | 3x |
Reduce(rbind, .) %>% |
| 102 | 3x |
dplyr::mutate(DVSCAT = DVCAT) |
| 103 | ||
| 104 | 3x |
addv <- rcd_var_relabel( |
| 105 | 3x |
addv, |
| 106 | 3x |
STUDYID = "Study Identifier", |
| 107 | 3x |
USUBJID = "Unique Subject Identifier" |
| 108 |
) |
|
| 109 | ||
| 110 |
# merge ADSL to be able to add deviation date and study day variables |
|
| 111 | 3x |
addv <- dplyr::inner_join(addv, adsl, by = c("STUDYID", "USUBJID")) %>%
|
| 112 | 3x |
dplyr::rowwise() %>% |
| 113 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
| 114 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
| 115 | 3x |
TRUE ~ TRTEDTM |
| 116 |
))) %>% |
|
| 117 | 3x |
dplyr::mutate(ASTDTM = sample( |
| 118 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
| 119 | 3x |
size = 1 |
| 120 |
)) %>% |
|
| 121 | 3x |
dplyr::mutate(ASTDT = lubridate::date(ASTDTM)) %>% |
| 122 | 3x |
dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>% |
| 123 | 3x |
dplyr::select(-TRTENDT, -ASTDTM) %>% |
| 124 | 3x |
dplyr::ungroup() %>% |
| 125 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDT, DVTERM) |
| 126 | ||
| 127 | 3x |
addv <- addv %>% |
| 128 | 3x |
dplyr::group_by(USUBJID) %>% |
| 129 | 3x |
dplyr::mutate(DVSEQ = seq_len(dplyr::n())) %>% |
| 130 | 3x |
dplyr::ungroup() %>% |
| 131 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDT, DVTERM, DVSEQ) |
| 132 | ||
| 133 | 3x |
addv <- addv %>% |
| 134 | 3x |
dplyr::mutate(AEPRELFL = ifelse(DVEPRELI == "Y", DVEPRELI, "")) |
| 135 | ||
| 136 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 137 | ! |
addv <- mutate_na(ds = addv, na_vars = na_vars, na_percentage = na_percentage) |
| 138 |
} |
|
| 139 | ||
| 140 |
# apply metadata |
|
| 141 | 3x |
addv <- apply_metadata(addv, "metadata/ADDV.yml") |
| 142 | ||
| 143 | 3x |
return(addv) |
| 144 |
} |
| 1 |
#' Tumor Response Analysis Dataset (ADRS) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Tumor Response Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details |
|
| 9 |
#' One record per subject per parameter per analysis visit per analysis date. |
|
| 10 |
#' SDTM variables are populated on new records coming from other single records. |
|
| 11 |
#' Otherwise, SDTM variables are left blank. |
|
| 12 |
#' |
|
| 13 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADT`, `RSSEQ` |
|
| 14 |
#' |
|
| 15 |
#' @inheritParams argument_convention |
|
| 16 |
#' @param avalc (`character vector`)\cr Analysis value categories. |
|
| 17 |
#' @template param_cached |
|
| 18 |
#' @templateVar data adrs |
|
| 19 |
#' |
|
| 20 |
#' @return `data.frame` |
|
| 21 |
#' @export |
|
| 22 |
#' |
|
| 23 |
#' @examples |
|
| 24 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 25 |
#' |
|
| 26 |
#' adrs <- radrs(adsl, seed = 2) |
|
| 27 |
#' adrs |
|
| 28 |
radrs <- function(adsl, |
|
| 29 |
avalc = NULL, |
|
| 30 |
lookup = NULL, |
|
| 31 |
seed = NULL, |
|
| 32 |
na_percentage = 0, |
|
| 33 |
na_vars = list(AVISIT = c(NA, 0.1), AVAL = c(1234, 0.1), AVALC = c(1234, 0.1)), |
|
| 34 |
cached = FALSE) {
|
|
| 35 | 7x |
checkmate::assert_flag(cached) |
| 36 | 7x |
if (cached) {
|
| 37 | 1x |
return(get_cached_data("cadrs"))
|
| 38 |
} |
|
| 39 | ||
| 40 | 6x |
checkmate::assert_data_frame(adsl) |
| 41 | 6x |
checkmate::assert_vector(avalc, null.ok = TRUE) |
| 42 | 6x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 43 | 6x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 44 | 6x |
checkmate::assert_true(na_percentage < 1) |
| 45 | ||
| 46 | 6x |
param_codes <- if (!is.null(avalc)) {
|
| 47 | ! |
avalc |
| 48 |
} else {
|
|
| 49 | 6x |
stats::setNames(1:5, c("CR", "PR", "SD", "PD", "NE"))
|
| 50 |
} |
|
| 51 | ||
| 52 | 6x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 53 | 6x |
lookup_ars <- if (!is.null(lookup)) {
|
| 54 | ! |
lookup |
| 55 |
} else {
|
|
| 56 | 6x |
expand.grid( |
| 57 | 6x |
ARM = c("A: Drug X", "B: Placebo", "C: Combination"),
|
| 58 | 6x |
AVALC = names(param_codes) |
| 59 | 6x |
) %>% dplyr::mutate( |
| 60 | 6x |
AVAL = param_codes[AVALC], |
| 61 | 6x |
p_scr = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)), |
| 62 | 6x |
p_bsl = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)), |
| 63 | 6x |
p_cycle = c(c(.4, .3, .5), c(.35, .25, .25), c(.1, .2, .08), c(.14, 0.15, 0.15), c(.01, 0.1, 0.02)), |
| 64 | 6x |
p_eoi = c(c(.4, .3, .5), c(.35, .25, .25), c(.1, .2, .08), c(.14, 0.15, 0.15), c(.01, 0.1, 0.02)), |
| 65 | 6x |
p_fu = c(c(.3, .2, .4), c(.2, .1, .3), c(.2, .2, .2), c(.3, .5, 0.1), rep(0, 3)) |
| 66 |
) |
|
| 67 |
} |
|
| 68 | ||
| 69 | 6x |
if (!is.null(seed)) {
|
| 70 | 6x |
set.seed(seed) |
| 71 |
} |
|
| 72 | 6x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 73 | ||
| 74 | 6x |
adrs <- split(adsl, adsl$USUBJID) %>% |
| 75 | 6x |
lapply(function(pinfo) {
|
| 76 | 60x |
probs <- dplyr::filter(lookup_ars, ARM == as.character(pinfo$ACTARM)) |
| 77 | ||
| 78 |
# screening |
|
| 79 | 60x |
rsp_screen <- sample(probs$AVALC, 1, prob = probs$p_scr) %>% as.character() |
| 80 | ||
| 81 |
# baseline |
|
| 82 | 60x |
rsp_bsl <- sample(probs$AVALC, 1, prob = probs$p_bsl) %>% as.character() |
| 83 | ||
| 84 |
# cycle |
|
| 85 | 60x |
rsp_c2d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character() |
| 86 | 60x |
rsp_c4d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character() |
| 87 | ||
| 88 |
# end of induction |
|
| 89 | 60x |
rsp_eoi <- sample(probs$AVALC, 1, prob = probs$p_eoi) %>% as.character() |
| 90 | ||
| 91 |
# follow up |
|
| 92 | 60x |
rsp_fu <- sample(probs$AVALC, 1, prob = probs$p_fu) %>% as.character() |
| 93 | ||
| 94 | 60x |
best_rsp <- min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)]) |
| 95 | 60x |
best_rsp_i <- which.min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)]) |
| 96 | ||
| 97 | 60x |
avisit <- c("SCREENING", "BASELINE", "CYCLE 2 DAY 1", "CYCLE 4 DAY 1", "END OF INDUCTION", "FOLLOW UP")
|
| 98 | ||
| 99 |
# meaningful date information |
|
| 100 | 60x |
trtstdt <- lubridate::date(pinfo$TRTSDTM) |
| 101 | 60x |
trtendt <- lubridate::date(dplyr::if_else( |
| 102 | 60x |
!is.na(pinfo$TRTEDTM), pinfo$TRTEDTM, |
| 103 | 60x |
lubridate::floor_date(trtstdt + study_duration_secs, unit = "day") |
| 104 |
)) |
|
| 105 | 60x |
scr_date <- trtstdt - lubridate::days(100) |
| 106 | 60x |
bs_date <- trtstdt |
| 107 | 60x |
flu_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1) |
| 108 | 60x |
eoi_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1) |
| 109 | 60x |
c2d1_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1) |
| 110 | 60x |
c4d1_date <- min(lubridate::date(c2d1_date + lubridate::days(60)), trtendt) |
| 111 | ||
| 112 | 60x |
tibble::tibble( |
| 113 | 60x |
STUDYID = pinfo$STUDYID, |
| 114 | 60x |
SITEID = pinfo$SITEID, |
| 115 | 60x |
USUBJID = pinfo$USUBJID, |
| 116 | 60x |
PARAMCD = as.factor(c(rep("OVRINV", 6), "BESRSPI", "INVET")),
|
| 117 | 60x |
PARAM = as.factor(dplyr::recode( |
| 118 | 60x |
PARAMCD, |
| 119 | 60x |
OVRINV = "Overall Response by Investigator - by visit", |
| 120 | 60x |
OVRSPI = "Best Overall Response by Investigator (no confirmation required)", |
| 121 | 60x |
BESRSPI = "Best Confirmed Overall Response by Investigator", |
| 122 | 60x |
INVET = "Investigator End Of Induction Response" |
| 123 |
)), |
|
| 124 | 60x |
AVALC = c( |
| 125 | 60x |
rsp_screen, rsp_bsl, rsp_c2d1, rsp_c4d1, rsp_eoi, rsp_fu, |
| 126 | 60x |
names(param_codes)[best_rsp], |
| 127 | 60x |
rsp_eoi |
| 128 |
), |
|
| 129 | 60x |
AVAL = param_codes[AVALC], |
| 130 | 60x |
AVISIT = factor(c(avisit, avisit[best_rsp_i], avisit[5]), levels = avisit) |
| 131 |
) %>% |
|
| 132 | 60x |
merge( |
| 133 | 60x |
tibble::tibble( |
| 134 | 60x |
AVISIT = avisit, |
| 135 | 60x |
ADTM = c(scr_date, bs_date, c2d1_date, c4d1_date, eoi_date, flu_date), |
| 136 | 60x |
AVISITN = c(-1, 0, 2, 4, 999, 999), |
| 137 | 60x |
TRTSDTM = pinfo$TRTSDTM |
| 138 |
) %>% |
|
| 139 | 60x |
dplyr::mutate( |
| 140 | 60x |
ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days")) |
| 141 |
) %>% |
|
| 142 | 60x |
dplyr::select(-"TRTSDTM"), |
| 143 | 60x |
by = "AVISIT" |
| 144 |
) |
|
| 145 |
}) %>% |
|
| 146 | 6x |
Reduce(rbind, .) %>% |
| 147 | 6x |
dplyr::mutate(AVALC = factor(AVALC, levels = names(param_codes))) %>% |
| 148 | 6x |
rcd_var_relabel( |
| 149 | 6x |
STUDYID = "Study Identifier", |
| 150 | 6x |
USUBJID = "Unique Subject Identifier" |
| 151 |
) |
|
| 152 | ||
| 153 | 6x |
adrs <- rcd_var_relabel( |
| 154 | 6x |
adrs, |
| 155 | 6x |
STUDYID = "Study Identifier", |
| 156 | 6x |
USUBJID = "Unique Subject Identifier" |
| 157 |
) |
|
| 158 | ||
| 159 |
# merge ADSL to be able to add RS date and study day variables |
|
| 160 | ||
| 161 | ||
| 162 | 6x |
adrs <- dplyr::inner_join( |
| 163 | 6x |
dplyr::select(adrs, -"SITEID"), |
| 164 | 6x |
adsl, |
| 165 | 6x |
by = c("STUDYID", "USUBJID")
|
| 166 |
) |
|
| 167 | ||
| 168 | 6x |
adrs <- adrs %>% |
| 169 | 6x |
dplyr::group_by(USUBJID) %>% |
| 170 | 6x |
dplyr::mutate(RSSEQ = seq_len(dplyr::n())) %>% |
| 171 | 6x |
dplyr::mutate(ASEQ = RSSEQ) %>% |
| 172 | 6x |
dplyr::ungroup() %>% |
| 173 | 6x |
dplyr::arrange( |
| 174 | 6x |
STUDYID, |
| 175 | 6x |
USUBJID, |
| 176 | 6x |
PARAMCD, |
| 177 | 6x |
AVISITN, |
| 178 | 6x |
ADTM, |
| 179 | 6x |
RSSEQ |
| 180 |
) |
|
| 181 | ||
| 182 | 6x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 183 | ! |
adrs <- mutate_na(ds = adrs, na_vars = na_vars, na_percentage = na_percentage) |
| 184 |
} |
|
| 185 | ||
| 186 |
# apply metadata |
|
| 187 | 6x |
adrs <- apply_metadata(adrs, "metadata/ADRS.yml") |
| 188 | ||
| 189 | 6x |
return(adrs) |
| 190 |
} |
| 1 |
#' Generate Anthropometric Measurements for Males and Females. |
|
| 2 |
#' |
|
| 3 |
#' Anthropometric measurements are randomly generated using normal approximation. |
|
| 4 |
#' The default mean and standard deviation values used are based on US National Health |
|
| 5 |
#' Statistics for adults aged 20 years or over. The measurements are generated in same units |
|
| 6 |
#' as provided to the function. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per subject. |
|
| 9 |
#' |
|
| 10 |
#' @inheritParams argument_convention |
|
| 11 |
#' @param df (`data.frame`)\cr Analysis dataset. |
|
| 12 |
#' @param id_var (`character`)\cr Patient identifier variable name. |
|
| 13 |
#' @param sex_var (`character`)\cr Name of variable representing sex of patient. |
|
| 14 |
#' @param sex_var_level_male (`character`)\cr Level of `sex_var` representing males. |
|
| 15 |
#' @param male_weight_in_kg (named `list`)\cr List of means and SDs of male weights in kilograms. |
|
| 16 |
#' @param female_weight_in_kg (named `list`)\cr List of means and SDs of female weights in kilograms. |
|
| 17 |
#' @param male_height_in_m (named `list`)\cr List of means and SDs of male heights in metres. |
|
| 18 |
#' @param female_height_in_m (named `list`)\cr list of means and SDs of female heights in metres. |
|
| 19 |
#' |
|
| 20 |
#' @return a dataframe with anthropometric measurements for each subject in analysis dataset. |
|
| 21 |
#' @keywords internal |
|
| 22 |
h_anthropometrics_by_sex <- function(df, |
|
| 23 |
seed = 1, |
|
| 24 |
id_var = "USUBJID", |
|
| 25 |
sex_var = "SEX", |
|
| 26 |
sex_var_level_male = "M", |
|
| 27 |
male_weight_in_kg = list(mean = 90.6, sd = 44.9), |
|
| 28 |
female_weight_in_kg = list(mean = 77.5, sd = 46.2), |
|
| 29 |
male_height_in_m = list(mean = 1.75, sd = 0.14), |
|
| 30 |
female_height_in_m = list(mean = 1.61, sd = 0.24)) {
|
|
| 31 | 3x |
checkmate::assert_data_frame(df) |
| 32 | 3x |
checkmate::assert_string(id_var) |
| 33 | 3x |
checkmate::assert_string(sex_var) |
| 34 | 3x |
checkmate::assert_string(sex_var_level_male) |
| 35 | 3x |
checkmate::assert_list(male_weight_in_kg, types = "numeric") |
| 36 | 3x |
checkmate::assert_subset(names(male_weight_in_kg), choices = c("mean", "sd"))
|
| 37 | 3x |
checkmate::assert_list(female_weight_in_kg, types = "numeric") |
| 38 | 3x |
checkmate::assert_subset(names(female_weight_in_kg), choices = c("mean", "sd"))
|
| 39 | 3x |
checkmate::assert_list(male_height_in_m, types = "numeric") |
| 40 | 3x |
checkmate::assert_subset(names(male_height_in_m), choices = c("mean", "sd"))
|
| 41 | 3x |
checkmate::assert_list(female_height_in_m, types = "numeric") |
| 42 | 3x |
checkmate::assert_subset(names(female_height_in_m), choices = c("mean", "sd"))
|
| 43 | ||
| 44 | ||
| 45 | 3x |
n <- length(unique(df[[id_var]])) |
| 46 | 3x |
set.seed(seed) |
| 47 | ||
| 48 | 3x |
df_by_sex <- unique(subset(df, select = c(id_var, sex_var))) |
| 49 | ||
| 50 | 3x |
df_with_measurements <- df_by_sex %>% |
| 51 | 3x |
dplyr::mutate( |
| 52 | 3x |
WEIGHT = ifelse( |
| 53 | 3x |
.data[[sex_var]] == sex_var_level_male, |
| 54 | 3x |
stats::rnorm(n = n, mean = male_weight_in_kg$mean, sd = male_weight_in_kg$sd), |
| 55 | 3x |
stats::rnorm(n = n, mean = female_weight_in_kg$mean, sd = female_weight_in_kg$sd) |
| 56 |
) |
|
| 57 |
) %>% |
|
| 58 | 3x |
dplyr::mutate( |
| 59 | 3x |
HEIGHT = ifelse( |
| 60 | 3x |
.data[[sex_var]] == sex_var_level_male, |
| 61 | 3x |
stats::rnorm(n = n, mean = male_height_in_m$mean, sd = male_height_in_m$sd), |
| 62 | 3x |
stats::rnorm(n = n, mean = female_height_in_m$mean, sd = female_height_in_m$sd) |
| 63 |
) |
|
| 64 |
) %>% |
|
| 65 | 3x |
dplyr::mutate( |
| 66 | 3x |
BMI = WEIGHT / ((HEIGHT)^2) |
| 67 |
) |
|
| 68 | ||
| 69 | 3x |
return(df_with_measurements) |
| 70 |
} |
|
| 71 | ||
| 72 |
#' Subcategory Analysis Dataset (ADSUB) |
|
| 73 |
#' |
|
| 74 |
#' @description `r lifecycle::badge("stable")`
|
|
| 75 |
#' |
|
| 76 |
#' Function for generating a random Subcategory Analysis Dataset for a given |
|
| 77 |
#' Subject-Level Analysis Dataset. |
|
| 78 |
#' |
|
| 79 |
#' @details One record per subject. |
|
| 80 |
#' |
|
| 81 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADTM`, `SRCSEQ` |
|
| 82 |
#' |
|
| 83 |
#' @inheritParams argument_convention |
|
| 84 |
#' @template param_cached |
|
| 85 |
#' @templateVar data adsub |
|
| 86 |
#' |
|
| 87 |
#' @return `data.frame` |
|
| 88 |
#' @export |
|
| 89 |
#' |
|
| 90 |
#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc |
|
| 91 |
#' |
|
| 92 |
#' @examples |
|
| 93 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 94 |
#' |
|
| 95 |
#' adsub <- radsub(adsl, seed = 2) |
|
| 96 |
#' adsub |
|
| 97 |
radsub <- function(adsl, |
|
| 98 |
param = c( |
|
| 99 |
"Baseline Weight", |
|
| 100 |
"Baseline Height", |
|
| 101 |
"Baseline BMI", |
|
| 102 |
"Baseline ECOG", |
|
| 103 |
"Baseline Biomarker Mutation" |
|
| 104 |
), |
|
| 105 |
paramcd = c("BWGHTSI", "BHGHTSI", "BBMISI", "BECOG", "BBMRKR1"),
|
|
| 106 |
seed = NULL, |
|
| 107 |
na_percentage = 0, |
|
| 108 |
na_vars = list(), |
|
| 109 |
cached = FALSE) {
|
|
| 110 | 4x |
checkmate::assert_flag(cached) |
| 111 | 4x |
if (cached) {
|
| 112 | 1x |
return(get_cached_data("cadsub"))
|
| 113 |
} |
|
| 114 | ||
| 115 | 3x |
checkmate::assert_data_frame(adsl) |
| 116 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
| 117 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
| 118 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 119 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 120 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 121 | ||
| 122 |
# Validate and initialize related variables. |
|
| 123 | 3x |
param_init_list <- relvar_init(param, paramcd) |
| 124 | ||
| 125 | 3x |
if (!is.null(seed)) {
|
| 126 | 3x |
set.seed(seed) |
| 127 |
} |
|
| 128 | ||
| 129 | 3x |
adsub <- expand.grid( |
| 130 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 131 | 3x |
USUBJID = adsl$USUBJID, |
| 132 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
| 133 | 3x |
AVISIT = "BASELINE", |
| 134 | 3x |
stringsAsFactors = FALSE |
| 135 |
) |
|
| 136 | ||
| 137 |
# Assign related variable values: PARAM and PARAMCD are related. |
|
| 138 | 3x |
adsub <- adsub %>% rel_var( |
| 139 | 3x |
var_name = "PARAMCD", |
| 140 | 3x |
related_var = "PARAM", |
| 141 | 3x |
var_values = param_init_list$relvar2 |
| 142 |
) |
|
| 143 | ||
| 144 | 3x |
adsub <- adsub[order(adsub$STUDYID, adsub$USUBJID, adsub$PARAMCD), ] |
| 145 | ||
| 146 | 3x |
adsub <- rcd_var_relabel( |
| 147 | 3x |
adsub, |
| 148 | 3x |
STUDYID = "Study Identifier", |
| 149 | 3x |
USUBJID = "Unique Subject Identifier" |
| 150 |
) |
|
| 151 | ||
| 152 |
# Merge ADSL to be able to add EG date and study day variables. |
|
| 153 |
# Sample ADTM to be a few days before TRTSDTM. |
|
| 154 | 3x |
adsub <- dplyr::inner_join( |
| 155 | 3x |
adsub, |
| 156 | 3x |
adsl, |
| 157 | 3x |
by = c("STUDYID", "USUBJID")
|
| 158 |
) %>% |
|
| 159 | 3x |
dplyr::group_by(USUBJID) %>% |
| 160 | 3x |
dplyr::mutate(ADTM = rep( |
| 161 | 3x |
lubridate::date(TRTSDTM)[1] - lubridate::days(sample(1:10, size = 1)), |
| 162 | 3x |
each = n() |
| 163 |
)) %>% |
|
| 164 | 3x |
dplyr::ungroup() %>% |
| 165 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
| 166 | ||
| 167 |
# Generate a dataset with height, weight and BMI measurements for each subject. |
|
| 168 | 3x |
if (!is.null(seed)) {
|
| 169 | 3x |
df_with_measurements <- h_anthropometrics_by_sex(adsub, seed = seed) |
| 170 |
} else {
|
|
| 171 | ! |
df_with_measurements <- h_anthropometrics_by_sex(adsub) |
| 172 |
} |
|
| 173 | ||
| 174 |
# Add this to adsub and create other measurements. |
|
| 175 | 3x |
adsub <- adsub %>% |
| 176 | 3x |
dplyr::group_by(USUBJID) %>% |
| 177 | 3x |
dplyr::mutate( |
| 178 | 3x |
AVAL = dplyr::case_when( |
| 179 | 3x |
PARAMCD == |
| 180 | 3x |
"BWGHTSI" ~ df_with_measurements$WEIGHT[df_with_measurements$USUBJID == USUBJID], |
| 181 | 3x |
PARAMCD == |
| 182 | 3x |
"BHGHTSI" ~ df_with_measurements$HEIGHT[df_with_measurements$USUBJID == USUBJID], |
| 183 | 3x |
PARAMCD == |
| 184 | 3x |
"BBMISI" ~ df_with_measurements$BMI[df_with_measurements$USUBJID == USUBJID], |
| 185 | 3x |
PARAMCD == "BECOG" ~ sample(c(0, 1, 2, 3, 4, 5), 1), |
| 186 | 3x |
PARAMCD == "BBMRKR1" ~ sample(c(1, 2), prob = c(0.5, 0.5), 1) |
| 187 |
) |
|
| 188 |
) %>% |
|
| 189 | 3x |
dplyr::arrange(PARAMCD) %>% |
| 190 | 3x |
dplyr::ungroup() %>% |
| 191 | 3x |
dplyr::mutate(AVAL = dplyr::case_when( |
| 192 | 3x |
PARAMCD != "BBMRKR1" | PARAMCD != "BECOG" ~ round(AVAL, 1), |
| 193 | 3x |
TRUE ~ round(AVAL) |
| 194 |
)) |
|
| 195 | ||
| 196 | 3x |
adsub <- adsub %>% |
| 197 | 3x |
dplyr::mutate( |
| 198 | 3x |
AVALC = dplyr::case_when( |
| 199 | 3x |
PARAMCD == "BBMRKR1" ~ dplyr::case_when( |
| 200 | 3x |
AVAL == "1" ~ "WILD TYPE", |
| 201 | 3x |
AVAL == "2" ~ "MUTANT", |
| 202 | 3x |
TRUE ~ "" |
| 203 |
), |
|
| 204 | 3x |
TRUE ~ as.character(AVAL) |
| 205 |
), |
|
| 206 | 3x |
AVALU = dplyr::case_when( |
| 207 | 3x |
PARAMCD == "BWGHTSI" ~ "kg", |
| 208 | 3x |
PARAMCD == "BHGHTSI" ~ "m", |
| 209 | 3x |
PARAMCD == "BBMISI" ~ "kg/m2", |
| 210 | 3x |
TRUE ~ "" |
| 211 |
), |
|
| 212 | 3x |
AVALCAT1 = dplyr::case_when( |
| 213 | 3x |
PARAMCD == "BBMISI" ~ dplyr::case_when( |
| 214 | 3x |
AVAL < 18.5 ~ "<18.5", |
| 215 | 3x |
AVAL >= 18.5 & AVAL < 25 ~ "18.5 - 24.9", |
| 216 | 3x |
AVAL >= 25 & AVAL < 30 ~ "25 - 29.9", |
| 217 | 3x |
TRUE ~ ">30" |
| 218 |
), |
|
| 219 | 3x |
PARAMCD == "BECOG" ~ dplyr::case_when( |
| 220 | 3x |
AVAL <= 1 ~ "0-1", |
| 221 | 3x |
AVAL > 1 & AVAL <= 3 ~ "2-3", |
| 222 | 3x |
TRUE ~ "4-5" |
| 223 |
), |
|
| 224 | 3x |
TRUE ~ "" |
| 225 |
), |
|
| 226 | 3x |
AVISITN = "0", |
| 227 | 3x |
SRCSEQ = "1" |
| 228 |
) %>% |
|
| 229 | 3x |
dplyr::arrange( |
| 230 | 3x |
USUBJID, |
| 231 | 3x |
factor(PARAMCD, levels = c("BWGHTSI", "BHGHTSI", "BBMISI", "BECOG", "BBMRKR1"))
|
| 232 |
) |
|
| 233 | ||
| 234 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 235 | ! |
adsub <- mutate_na(ds = adsub, na_vars = na_vars, na_percentage = na_percentage) |
| 236 |
} |
|
| 237 | ||
| 238 |
# Apply metadata. |
|
| 239 | 3x |
adsub <- apply_metadata(adsub, "metadata/ADSUB.yml") |
| 240 | ||
| 241 | 3x |
return(adsub) |
| 242 |
} |
| 1 |
#' Vital Signs Analysis Dataset (ADVS) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Vital Signs Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `VSSEQ`, `ASPID` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @template param_cached |
|
| 14 |
#' @templateVar data advs |
|
| 15 |
#' |
|
| 16 |
#' @return `data.frame` |
|
| 17 |
#' @export |
|
| 18 |
#' |
|
| 19 |
#' @author npaszty |
|
| 20 |
#' |
|
| 21 |
#' @examples |
|
| 22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 23 |
#' |
|
| 24 |
#' advs <- radvs(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
| 25 |
#' advs |
|
| 26 |
#' |
|
| 27 |
#' advs <- radvs(adsl, visit_format = "CYCLE", n_assessments = 3L, seed = 2) |
|
| 28 |
#' advs |
|
| 29 |
radvs <- function(adsl, |
|
| 30 |
param = c( |
|
| 31 |
"Diastolic Blood Pressure", |
|
| 32 |
"Pulse Rate", |
|
| 33 |
"Respiratory Rate", |
|
| 34 |
"Systolic Blood Pressure", |
|
| 35 |
"Temperature", "Weight" |
|
| 36 |
), |
|
| 37 |
paramcd = c("DIABP", "PULSE", "RESP", "SYSBP", "TEMP", "WEIGHT"),
|
|
| 38 |
paramu = c("Pa", "beats/min", "breaths/min", "Pa", "C", "Kg"),
|
|
| 39 |
visit_format = "WEEK", |
|
| 40 |
n_assessments = 5L, |
|
| 41 |
n_days = 5L, |
|
| 42 |
seed = NULL, |
|
| 43 |
na_percentage = 0, |
|
| 44 |
na_vars = list( |
|
| 45 |
CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1), |
|
| 46 |
AVAL = c(123, 0.1), AVALU = c(123, 0.1) |
|
| 47 |
), |
|
| 48 |
cached = FALSE) {
|
|
| 49 | 4x |
checkmate::assert_flag(cached) |
| 50 | 4x |
if (cached) {
|
| 51 | 1x |
return(get_cached_data("cadvs"))
|
| 52 |
} |
|
| 53 | ||
| 54 | 3x |
checkmate::assert_data_frame(adsl) |
| 55 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
| 56 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
| 57 | 3x |
checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE) |
| 58 | 3x |
checkmate::assert_string(visit_format) |
| 59 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
| 60 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
| 61 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 62 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 63 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 64 | ||
| 65 |
# validate and initialize param vectors |
|
| 66 | 3x |
param_init_list <- relvar_init(param, paramcd) |
| 67 | 3x |
unit_init_list <- relvar_init(param, paramu) |
| 68 | ||
| 69 | 3x |
if (!is.null(seed)) {
|
| 70 | 3x |
set.seed(seed) |
| 71 |
} |
|
| 72 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 73 | ||
| 74 | 3x |
advs <- expand.grid( |
| 75 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 76 | 3x |
USUBJID = adsl$USUBJID, |
| 77 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
| 78 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments), |
| 79 | 3x |
stringsAsFactors = FALSE |
| 80 |
) |
|
| 81 | ||
| 82 | 3x |
advs <- dplyr::mutate( |
| 83 | 3x |
advs, |
| 84 | 3x |
AVISITN = dplyr::case_when( |
| 85 | 3x |
AVISIT == "SCREENING" ~ -1, |
| 86 | 3x |
AVISIT == "BASELINE" ~ 0, |
| 87 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
|
| 88 | 3x |
TRUE ~ NA_real_ |
| 89 |
) |
|
| 90 |
) |
|
| 91 | ||
| 92 | 3x |
advs$VSCAT <- "VITAL SIGNS" |
| 93 | ||
| 94 |
# assign related variable values: PARAMxPARAMCD are related |
|
| 95 | 3x |
advs <- advs %>% rel_var( |
| 96 | 3x |
var_name = "PARAMCD", |
| 97 | 3x |
related_var = "PARAM", |
| 98 | 3x |
var_values = param_init_list$relvar2 |
| 99 |
) |
|
| 100 | ||
| 101 |
# assign related variable values: PARAMxAVALU are related |
|
| 102 | 3x |
advs <- advs %>% rel_var( |
| 103 | 3x |
var_name = "AVALU", |
| 104 | 3x |
related_var = "PARAM", |
| 105 | 3x |
var_values = unit_init_list$relvar2 |
| 106 |
) |
|
| 107 | ||
| 108 | 3x |
advs <- advs %>% |
| 109 | 3x |
dplyr::mutate(VSTESTCD = PARAMCD) %>% |
| 110 | 3x |
dplyr::mutate(VSTEST = PARAM) |
| 111 | ||
| 112 | 3x |
advs <- advs %>% dplyr::mutate(AVAL = dplyr::case_when( |
| 113 | 3x |
PARAMCD == paramcd[1] ~ stats::rnorm(nrow(advs), mean = 100, sd = 20), |
| 114 | 3x |
PARAMCD == paramcd[2] ~ stats::rnorm(nrow(advs), mean = 80, sd = 15), |
| 115 | 3x |
PARAMCD == paramcd[3] ~ stats::rnorm(nrow(advs), mean = 16, sd = 5), |
| 116 | 3x |
PARAMCD == paramcd[4] ~ stats::rnorm(nrow(advs), mean = 150, sd = 30), |
| 117 | 3x |
PARAMCD == paramcd[5] ~ stats::rnorm(nrow(advs), mean = 36.65, sd = 1), |
| 118 | 3x |
PARAMCD == paramcd[6] ~ stats::rnorm(nrow(advs), mean = 70, sd = 20) |
| 119 |
)) |
|
| 120 | ||
| 121 |
# order to prepare for change from screening and baseline values |
|
| 122 | 3x |
advs <- advs[order(advs$STUDYID, advs$USUBJID, advs$PARAMCD, advs$AVISITN), ] |
| 123 | ||
| 124 | 3x |
advs <- Reduce(rbind, lapply(split(advs, advs$USUBJID), function(x) {
|
| 125 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
| 126 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
| 127 | 30x |
x$ABLFL <- ifelse( |
| 128 | 30x |
toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
| 129 | 30x |
"Y", |
| 130 | 30x |
ifelse( |
| 131 | 30x |
toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", |
| 132 | 30x |
"Y", |
| 133 |
"" |
|
| 134 |
) |
|
| 135 |
) |
|
| 136 | 30x |
x |
| 137 |
})) |
|
| 138 | ||
| 139 | 3x |
advs$BASE2 <- retain(advs, advs$AVAL, advs$ABLFL2 == "Y") |
| 140 | 3x |
advs$BASE <- ifelse(advs$ABLFL2 != "Y", retain(advs, advs$AVAL, advs$ABLFL == "Y"), NA) |
| 141 | ||
| 142 | 3x |
advs <- advs %>% |
| 143 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
| 144 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
| 145 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
| 146 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
| 147 | 3x |
dplyr::mutate(ANRLO = dplyr::case_when( |
| 148 | 3x |
PARAMCD == "DIABP" ~ 80, |
| 149 | 3x |
PARAMCD == "PULSE" ~ 60, |
| 150 | 3x |
PARAMCD == "RESP" ~ 12, |
| 151 | 3x |
PARAMCD == "SYSBP" ~ 120, |
| 152 | 3x |
PARAMCD == "TEMP" ~ 36.1, |
| 153 | 3x |
PARAMCD == "WEIGHT" ~ 40 |
| 154 |
)) %>% |
|
| 155 | 3x |
dplyr::mutate(ANRHI = dplyr::case_when( |
| 156 | 3x |
PARAMCD == "DIABP" ~ 120, |
| 157 | 3x |
PARAMCD == "PULSE" ~ 100, |
| 158 | 3x |
PARAMCD == "RESP" ~ 20, |
| 159 | 3x |
PARAMCD == "SYSBP" ~ 180, |
| 160 | 3x |
PARAMCD == "TEMP" ~ 37.2, |
| 161 | 3x |
PARAMCD == "WEIGHT" ~ 100 |
| 162 |
)) %>% |
|
| 163 | 3x |
dplyr::mutate(ANRIND = factor(dplyr::case_when( |
| 164 | 3x |
AVAL < ANRLO ~ "LOW", |
| 165 | 3x |
AVAL > ANRHI ~ "HIGH", |
| 166 | 3x |
TRUE ~ "NORMAL" |
| 167 |
))) %>% |
|
| 168 | 3x |
dplyr::mutate(VSSTRESC = dplyr::case_when( |
| 169 | 3x |
PARAMCD == "DIABP" ~ "<80", |
| 170 | 3x |
PARAMCD == "PULSE" ~ "<60", |
| 171 | 3x |
PARAMCD == "RESP" ~ ">20", |
| 172 | 3x |
PARAMCD == "SYSBP" ~ ">180", |
| 173 | 3x |
PARAMCD == "TEMP" ~ "<36.1", |
| 174 | 3x |
PARAMCD == "WEIGHT" ~ "<40" |
| 175 |
)) %>% |
|
| 176 | 3x |
dplyr::rowwise() %>% |
| 177 | 3x |
dplyr::mutate(LOQFL = factor( |
| 178 | 3x |
ifelse(eval(parse(text = paste(AVAL, VSSTRESC))), "Y", "N") |
| 179 |
)) %>% |
|
| 180 | 3x |
dplyr::ungroup() %>% |
| 181 | 3x |
dplyr::mutate(BASETYPE = "LAST") %>% |
| 182 | 3x |
dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>% |
| 183 | 3x |
dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>% |
| 184 | 3x |
dplyr::ungroup() %>% |
| 185 | 3x |
dplyr::mutate(ATPTN = 1) %>% |
| 186 | 3x |
dplyr::mutate(DTYPE = NA) %>% |
| 187 | 3x |
rcd_var_relabel( |
| 188 | 3x |
USUBJID = attr(adsl$USUBJID, "label"), |
| 189 | 3x |
STUDYID = attr(adsl$STUDYID, "label") |
| 190 |
) |
|
| 191 | ||
| 192 | 3x |
advs <- rcd_var_relabel( |
| 193 | 3x |
advs, |
| 194 | 3x |
STUDYID = "Study Identifier", |
| 195 | 3x |
USUBJID = "Unique Subject Identifier" |
| 196 |
) |
|
| 197 | ||
| 198 |
# merge ADSL to be able to add LB date and study day variables |
|
| 199 | 3x |
advs <- dplyr::inner_join( |
| 200 | 3x |
advs, |
| 201 | 3x |
adsl, |
| 202 | 3x |
by = c("STUDYID", "USUBJID")
|
| 203 |
) %>% |
|
| 204 | 3x |
dplyr::rowwise() %>% |
| 205 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
| 206 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
| 207 | 3x |
TRUE ~ TRTEDTM |
| 208 |
))) %>% |
|
| 209 | 3x |
dplyr::ungroup() |
| 210 | ||
| 211 | 3x |
advs <- advs %>% |
| 212 | 3x |
dplyr::group_by(USUBJID) %>% |
| 213 | 3x |
dplyr::arrange(USUBJID, AVISITN) %>% |
| 214 | 3x |
dplyr::mutate(ADTM = rep( |
| 215 | 3x |
sort(sample( |
| 216 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
| 217 | 3x |
size = nlevels(AVISIT) |
| 218 |
)), |
|
| 219 | 3x |
each = n() / nlevels(AVISIT) |
| 220 |
)) %>% |
|
| 221 | 3x |
dplyr::ungroup() %>% |
| 222 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
| 223 | 3x |
dplyr::select(-TRTENDT) %>% |
| 224 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
| 225 | ||
| 226 | 3x |
advs <- advs %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when( |
| 227 | 3x |
!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y",
|
| 228 | 3x |
TRUE ~ "" |
| 229 |
))) |
|
| 230 | ||
| 231 | 3x |
advs <- advs %>% |
| 232 | 3x |
dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>% |
| 233 | 3x |
dplyr::group_by(USUBJID) %>% |
| 234 | 3x |
dplyr::mutate(VSSEQ = seq_len(dplyr::n())) %>% |
| 235 | 3x |
dplyr::mutate(ASEQ = VSSEQ) %>% |
| 236 | 3x |
dplyr::ungroup() %>% |
| 237 | 3x |
dplyr::arrange( |
| 238 | 3x |
STUDYID, |
| 239 | 3x |
USUBJID, |
| 240 | 3x |
PARAMCD, |
| 241 | 3x |
BASETYPE, |
| 242 | 3x |
AVISITN, |
| 243 | 3x |
ATPTN, |
| 244 | 3x |
DTYPE, |
| 245 | 3x |
ADTM, |
| 246 | 3x |
VSSEQ, |
| 247 | 3x |
ASPID |
| 248 |
) |
|
| 249 | ||
| 250 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 251 | ! |
advs <- mutate_na(ds = advs, na_vars = na_vars, na_percentage = na_percentage) |
| 252 |
} |
|
| 253 | ||
| 254 |
# apply metadata |
|
| 255 | 3x |
advs <- apply_metadata(advs, "metadata/ADVS.yml") |
| 256 | ||
| 257 | 3x |
return(advs) |
| 258 |
} |
| 1 |
#' Load Cached Data |
|
| 2 |
#' |
|
| 3 |
#' Return data attached to package. |
|
| 4 |
#' |
|
| 5 |
#' @keywords internal |
|
| 6 |
#' @noRd |
|
| 7 |
get_cached_data <- function(dataname) {
|
|
| 8 | 22x |
checkmate::assert_string(dataname) |
| 9 | 22x |
if (!("package:random.cdisc.data" %in% search())) {
|
| 10 | 1x |
stop("cached data can only be loaded if the random.cdisc.data package is attached.",
|
| 11 | 1x |
"Please run library(random.cdisc.data) before loading cached data.", |
| 12 | 1x |
call. = FALSE |
| 13 |
) |
|
| 14 |
} else {
|
|
| 15 | 21x |
get(dataname, envir = asNamespace("random.cdisc.data"))
|
| 16 |
} |
|
| 17 |
} |
|
| 18 | ||
| 19 |
#' Create a Factor with Random Elements of x |
|
| 20 |
#' |
|
| 21 |
#' Sample elements from `x` with replacement to build a factor. |
|
| 22 |
#' |
|
| 23 |
#' @param x (`character vector` or `factor`)\cr If character vector then it is also used |
|
| 24 |
#' as levels of the returned factor. If factor then the levels are used as the new levels. |
|
| 25 |
#' @param N (`numeric`)\cr Number of items to choose. |
|
| 26 |
#' @param ... Additional arguments to be passed to `sample`. |
|
| 27 |
#' |
|
| 28 |
#' @return A factor of length `N`. |
|
| 29 |
#' @export |
|
| 30 |
#' |
|
| 31 |
#' @examples |
|
| 32 |
#' sample_fct(letters[1:3], 10) |
|
| 33 |
#' sample_fct(iris$Species, 10) |
|
| 34 |
sample_fct <- function(x, N, ...) { # nolint
|
|
| 35 | 318x |
checkmate::assert_number(N) |
| 36 | ||
| 37 | 318x |
factor(sample(x, N, replace = TRUE, ...), levels = if (is.factor(x)) levels(x) else x) |
| 38 |
} |
|
| 39 | ||
| 40 |
#' Related Variables: Initialize |
|
| 41 |
#' |
|
| 42 |
#' Verify and initialize related variable values. |
|
| 43 |
#' For example, `relvar_init("Alanine Aminotransferase Measurement", "ALT")`.
|
|
| 44 |
#' |
|
| 45 |
#' @param relvar1 (`list` of `character`)\cr List of n elements. |
|
| 46 |
#' @param relvar2 (`list` of `character`)\cr List of n elements. |
|
| 47 |
#' |
|
| 48 |
#' @return A vector of n elements. |
|
| 49 |
#' |
|
| 50 |
#' @keywords internal |
|
| 51 |
relvar_init <- function(relvar1, relvar2) {
|
|
| 52 | 64x |
checkmate::assert_character(relvar1, min.len = 1, any.missing = FALSE) |
| 53 | 64x |
checkmate::assert_character(relvar2, min.len = 1, any.missing = FALSE) |
| 54 | ||
| 55 | 64x |
if (length(relvar1) != length(relvar2)) {
|
| 56 | 1x |
message(simpleError( |
| 57 | 1x |
"The argument value length of relvar1 and relvar2 differ. They must contain the same number of elements." |
| 58 |
)) |
|
| 59 | ! |
return(NA) |
| 60 |
} |
|
| 61 | 63x |
return(list("relvar1" = relvar1, "relvar2" = relvar2))
|
| 62 |
} |
|
| 63 | ||
| 64 |
#' Related Variables: Assign |
|
| 65 |
#' |
|
| 66 |
#' Assign values to a related variable within a domain. |
|
| 67 |
#' |
|
| 68 |
#' @param df (`data.frame`)\cr Data frame containing the related variables. |
|
| 69 |
#' @param var_name (`character`)\cr Name of variable related to `rel_var` to add to `df`. |
|
| 70 |
#' @param var_values (`any`)\cr Vector of values related to values of `related_var`. |
|
| 71 |
#' @param related_var (`character`)\cr Name of variable within `df` with values to which values |
|
| 72 |
#' of `var_name` must relate. |
|
| 73 |
#' |
|
| 74 |
#' @return `df` with added factor variable `var_name` containing `var_values` corresponding to `related_var`. |
|
| 75 |
#' @export |
|
| 76 |
#' |
|
| 77 |
#' @examples |
|
| 78 |
#' # Example with data.frame. |
|
| 79 |
#' params <- c("Level A", "Level B", "Level C")
|
|
| 80 |
#' adlb_df <- data.frame( |
|
| 81 |
#' ID = 1:9, |
|
| 82 |
#' PARAM = factor( |
|
| 83 |
#' rep(c("Level A", "Level B", "Level C"), 3),
|
|
| 84 |
#' levels = params |
|
| 85 |
#' ) |
|
| 86 |
#' ) |
|
| 87 |
#' rel_var( |
|
| 88 |
#' df = adlb_df, |
|
| 89 |
#' var_name = "PARAMCD", |
|
| 90 |
#' var_values = c("A", "B", "C"),
|
|
| 91 |
#' related_var = "PARAM" |
|
| 92 |
#' ) |
|
| 93 |
#' |
|
| 94 |
#' # Example with tibble. |
|
| 95 |
#' adlb_tbl <- tibble::tibble( |
|
| 96 |
#' ID = 1:9, |
|
| 97 |
#' PARAM = factor( |
|
| 98 |
#' rep(c("Level A", "Level B", "Level C"), 3),
|
|
| 99 |
#' levels = params |
|
| 100 |
#' ) |
|
| 101 |
#' ) |
|
| 102 |
#' rel_var( |
|
| 103 |
#' df = adlb_tbl, |
|
| 104 |
#' var_name = "PARAMCD", |
|
| 105 |
#' var_values = c("A", "B", "C"),
|
|
| 106 |
#' related_var = "PARAM" |
|
| 107 |
#' ) |
|
| 108 |
rel_var <- function(df, var_name, related_var, var_values = NULL) {
|
|
| 109 | 64x |
checkmate::assert_data_frame(df) |
| 110 | 64x |
checkmate::assert_string(var_name) |
| 111 | 64x |
checkmate::assert_string(related_var) |
| 112 | 64x |
n_relvar1 <- length(unique(df[, related_var, drop = TRUE])) |
| 113 | 64x |
checkmate::assert_vector(var_values, null.ok = TRUE, len = n_relvar1, any.missing = FALSE) |
| 114 | 1x |
if (is.null(var_values)) var_values <- rep(NA, n_relvar1) |
| 115 | ||
| 116 | 64x |
relvar1 <- unique(df[, related_var, drop = TRUE]) |
| 117 | 64x |
relvar2_values <- rep(NA, nrow(df)) |
| 118 | 64x |
for (r in seq_len(n_relvar1)) {
|
| 119 | 538x |
matched <- which(df[, related_var, drop = TRUE] == relvar1[r]) |
| 120 | 538x |
relvar2_values[matched] <- var_values[r] |
| 121 |
} |
|
| 122 | 64x |
df[[var_name]] <- factor(relvar2_values) |
| 123 | 64x |
return(df) |
| 124 |
} |
|
| 125 | ||
| 126 |
#' Create Visit Schedule |
|
| 127 |
#' |
|
| 128 |
#' Create a visit schedule as a factor. |
|
| 129 |
#' |
|
| 130 |
#' X number of visits, or X number of cycles and Y number of days. |
|
| 131 |
#' |
|
| 132 |
#' @inheritParams argument_convention |
|
| 133 |
#' |
|
| 134 |
#' @return A factor of length `n_assessments`. |
|
| 135 |
#' @export |
|
| 136 |
#' |
|
| 137 |
#' @examples |
|
| 138 |
#' visit_schedule(visit_format = "WEeK", n_assessments = 10L) |
|
| 139 |
#' visit_schedule(visit_format = "CyCLE", n_assessments = 5L, n_days = 2L) |
|
| 140 |
visit_schedule <- function(visit_format = "WEEK", |
|
| 141 |
n_assessments = 10L, |
|
| 142 |
n_days = 5L) {
|
|
| 143 | 56x |
checkmate::assert_string(visit_format, pattern = "^WEEK$|^CYCLE$", ignore.case = TRUE) |
| 144 | 56x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
| 145 | 56x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
| 146 | ||
| 147 | 56x |
if (toupper(visit_format) == "WEEK") {
|
| 148 |
# numeric vector of n assessments/cycles/days |
|
| 149 | 49x |
assessments <- 1:n_assessments |
| 150 |
# numeric vector for ordering including screening (-1) and baseline (0) place holders |
|
| 151 | 49x |
assessments_ord <- -1:n_assessments |
| 152 |
# character vector of nominal visit values |
|
| 153 | 49x |
visit_values <- c("SCREENING", "BASELINE", paste(toupper(visit_format), assessments, "DAY", (assessments * 7) + 1))
|
| 154 | 7x |
} else if (toupper(visit_format) == "CYCLE") {
|
| 155 | 7x |
cycles <- sort(rep(1:n_assessments, times = 1, each = n_days)) |
| 156 | 7x |
days <- rep(seq(1:n_days), times = n_assessments, each = 1) |
| 157 | 7x |
assessments_ord <- 0:(n_assessments * n_days) |
| 158 | 7x |
visit_values <- c("SCREENING", paste(toupper(visit_format), cycles, "DAY", days))
|
| 159 |
} |
|
| 160 | ||
| 161 |
# create and order factor variable to return from function |
|
| 162 | 56x |
visit_values <- stats::reorder(factor(visit_values), assessments_ord) |
| 163 |
} |
|
| 164 | ||
| 165 |
#' Primary Keys: Retain Values |
|
| 166 |
#' |
|
| 167 |
#' Retain values within primary keys. |
|
| 168 |
#' |
|
| 169 |
#' @param df (`data.frame`)\cr Data frame in which to apply the retain. |
|
| 170 |
#' @param value_var (`any`)\cr Variable in `df` containing the value to be retained. |
|
| 171 |
#' @param event (`expression`)\cr Expression returning a logical value to trigger the retain. |
|
| 172 |
#' @param outside (`any`)\cr Additional value to retain. Defaults to `NA`. |
|
| 173 |
#' @return A vector of values where expression is true. |
|
| 174 |
#' @keywords internal |
|
| 175 |
retain <- function(df, value_var, event, outside = NA) {
|
|
| 176 | 31x |
indices <- c(1, which(event == TRUE), nrow(df) + 1) |
| 177 | 31x |
values <- c(outside, value_var[event == TRUE]) |
| 178 | 31x |
rep(values, diff(indices)) |
| 179 |
} |
|
| 180 | ||
| 181 |
#' Primary Keys: Labels |
|
| 182 |
#' |
|
| 183 |
#' @description Shallow copy of `formatters::var_relabel()`. Used mainly internally to |
|
| 184 |
#' relabel a subset of variables in a data set. |
|
| 185 |
#' |
|
| 186 |
#' @param x (`data.frame`)\cr Data frame containing variables to which labels are applied. |
|
| 187 |
#' @param ... (`named character`)\cr Name-Value pairs, where name corresponds to a variable |
|
| 188 |
#' name in `x` and the value to the new variable label. |
|
| 189 |
#' @return x (`data.frame`)\cr Data frame with labels applied. |
|
| 190 |
#' |
|
| 191 |
#' @keywords internal |
|
| 192 |
rcd_var_relabel <- function(x, ...) {
|
|
| 193 | 79x |
stopifnot(is.data.frame(x)) |
| 194 | 79x |
if (missing(...)) {
|
| 195 | ! |
return(x) |
| 196 |
} |
|
| 197 | 79x |
dots <- list(...) |
| 198 | 79x |
varnames <- names(dots) |
| 199 | 79x |
if (is.null(varnames)) {
|
| 200 | 1x |
stop("missing variable declarations")
|
| 201 |
} |
|
| 202 | 78x |
map_varnames <- match(varnames, colnames(x)) |
| 203 | 78x |
if (any(is.na(map_varnames))) {
|
| 204 | ! |
stop("variables: ", paste(varnames[is.na(map_varnames)], collapse = ", "), " not found")
|
| 205 |
} |
|
| 206 | 78x |
if (any(vapply(dots, Negate(is.character), logical(1)))) {
|
| 207 | ! |
stop("all variable labels must be of type character")
|
| 208 |
} |
|
| 209 | 78x |
for (i in seq_along(map_varnames)) {
|
| 210 | 155x |
attr(x[[map_varnames[[i]]]], "label") <- dots[[i]] |
| 211 |
} |
|
| 212 | 78x |
x |
| 213 |
} |
|
| 214 | ||
| 215 |
#' Apply Metadata |
|
| 216 |
#' |
|
| 217 |
#' Apply label and variable ordering attributes to domains. |
|
| 218 |
#' |
|
| 219 |
#' @param df (`data.frame`)\cr Data frame to which metadata is applied. |
|
| 220 |
#' @param filename (`yaml`)\cr File containing domain metadata. |
|
| 221 |
#' @param add_adsl (`logical`)\cr Should ADSL data be merged to domain. |
|
| 222 |
#' @param adsl_filename (`yaml`)\cr File containing ADSL metadata. |
|
| 223 |
#' @return Data frame with metadata applied. |
|
| 224 |
#' |
|
| 225 |
#' @export |
|
| 226 |
#' @examples |
|
| 227 |
#' seed <- 1 |
|
| 228 |
#' adsl <- radsl(seed = seed) |
|
| 229 |
#' adsub <- radsub(adsl, seed = seed) |
|
| 230 |
#' yaml_path <- file.path(path.package("random.cdisc.data"), "inst", "metadata")
|
|
| 231 |
#' adsl <- apply_metadata(adsl, file.path(yaml_path, "ADSL.yml"), FALSE) |
|
| 232 |
#' adsub <- apply_metadata( |
|
| 233 |
#' adsub, file.path(yaml_path, "ADSUB.yml"), TRUE, |
|
| 234 |
#' file.path(yaml_path, "ADSL.yml") |
|
| 235 |
#' ) |
|
| 236 |
apply_metadata <- function(df, filename, add_adsl = TRUE, adsl_filename = "metadata/ADSL.yml") {
|
|
| 237 | 92x |
checkmate::assert_data_frame(df) |
| 238 | 92x |
checkmate::assert_string(filename) |
| 239 | 92x |
checkmate::assert_flag(add_adsl) |
| 240 | 92x |
checkmate::assert_string(adsl_filename) |
| 241 | ||
| 242 | 92x |
apply_type <- function(df, var, type) {
|
| 243 | 6096x |
if (is.null(type)) {
|
| 244 | ! |
return() |
| 245 |
} |
|
| 246 | ||
| 247 | 6096x |
if (type == "character" && !is.character(df[[var]])) {
|
| 248 | 12x |
df[[var]] <- as.character(df[[var]]) |
| 249 | 6084x |
} else if (type == "factor" && !is.factor(df[[var]])) {
|
| 250 | 752x |
df[[var]] <- as.factor(df[[var]]) |
| 251 | 5332x |
} else if (type == "integer" && !is.integer(df[[var]])) {
|
| 252 | 233x |
df[[var]] <- as.integer(df[[var]]) |
| 253 | 5099x |
} else if (type == "numeric" && !is.numeric(df[[var]])) {
|
| 254 | 3x |
df[[var]] <- as.numeric(df[[var]]) |
| 255 | 5096x |
} else if (type == "logical" && !is.logical(df[[var]])) {
|
| 256 | ! |
df[[var]] <- as.logical(df[[var]]) |
| 257 | 5096x |
} else if (type == "datetime" && !lubridate::is.POSIXct(df[[var]])) {
|
| 258 | 9x |
df[[var]] <- as.POSIXct(df[[var]]) |
| 259 | 5087x |
} else if (type == "date" && !lubridate::is.Date(df[[var]])) {
|
| 260 | ! |
df[[var]] <- as.Date(df[[var]]) |
| 261 |
} |
|
| 262 | 6096x |
return(df) |
| 263 |
} |
|
| 264 | ||
| 265 |
# remove existing attributes |
|
| 266 | 92x |
for (i in base::setdiff(names(attributes(df)), names(attributes(data.frame())))) {
|
| 267 | 3x |
attr(df, i) <- NULL |
| 268 |
} |
|
| 269 | ||
| 270 |
# get metadata |
|
| 271 | 92x |
metadata <- yaml::yaml.load_file(system.file(filename, package = "random.cdisc.data")) |
| 272 | 92x |
adsl_metadata <- if (add_adsl) {
|
| 273 | 64x |
yaml::yaml.load_file(system.file(adsl_filename, package = "random.cdisc.data")) |
| 274 |
} else {
|
|
| 275 | 28x |
NULL |
| 276 |
} |
|
| 277 | 92x |
metadata_variables <- append(adsl_metadata$variables, metadata$variables) |
| 278 | 92x |
metadata_varnames <- names(metadata_variables) |
| 279 | ||
| 280 |
# find variables that does not have labels and are not it metadata |
|
| 281 | 92x |
missing_vars_map <- vapply( |
| 282 | 92x |
names(df), |
| 283 | 92x |
function(x) {
|
| 284 | 6096x |
!(x %in% c("STUDYID", "USUBJID", metadata_varnames)) && is.null(attr(df[[x]], "label"))
|
| 285 |
}, |
|
| 286 | 92x |
logical(1) |
| 287 |
) |
|
| 288 | 92x |
missing_vars <- names(df)[missing_vars_map] |
| 289 | 92x |
if (length(missing_vars) > 0) {
|
| 290 | ! |
msg <- paste0( |
| 291 | ! |
"Following variables does not have label or are not found in ", |
| 292 | ! |
filename, |
| 293 |
": ", |
|
| 294 | ! |
paste0(missing_vars, collapse = ", ") |
| 295 |
) |
|
| 296 | ! |
warning(msg) |
| 297 |
} |
|
| 298 | ||
| 299 | 92x |
if (!all(metadata_varnames %in% names(df))) {
|
| 300 | 6x |
metadata_varnames <- metadata_varnames[metadata_varnames %in% names(df)] |
| 301 |
} |
|
| 302 | ||
| 303 |
# assign labels to variables |
|
| 304 | 92x |
for (var in metadata_varnames) {
|
| 305 | 6096x |
df <- apply_type(df, var, metadata_variables[[var]]$type) |
| 306 | 6096x |
attr(df[[var]], "label") <- metadata_variables[[var]]$label |
| 307 |
} |
|
| 308 | ||
| 309 |
# reorder data frame columns to expected BDS order |
|
| 310 | 92x |
df <- df[, unique(c("STUDYID", "USUBJID", metadata_varnames, names(df)))]
|
| 311 | ||
| 312 |
# assign label to data frame |
|
| 313 | 92x |
attr(df, "label") <- metadata$domain$label |
| 314 | ||
| 315 | 92x |
df |
| 316 |
} |
|
| 317 | ||
| 318 |
#' Replace Values in a Vector by NA |
|
| 319 |
#' |
|
| 320 |
#' @description `r lifecycle::badge("stable")`
|
|
| 321 |
#' |
|
| 322 |
#' Randomized replacement of values by `NA`. |
|
| 323 |
#' |
|
| 324 |
#' @inheritParams argument_convention |
|
| 325 |
#' @param v (`any`)\cr Vector of any type. |
|
| 326 |
#' @param percentage (`proportion`)\cr Value between 0 and 1 defining |
|
| 327 |
#' how much of the vector shall be replaced by `NA`. This number |
|
| 328 |
#' is randomized by +/- 5% to have full randomization. |
|
| 329 |
#' |
|
| 330 |
#' @return The input vector `v` where a certain number of values are replaced by `NA`. |
|
| 331 |
#' |
|
| 332 |
#' @export |
|
| 333 |
replace_na <- function(v, percentage = 0.05, seed = NULL) {
|
|
| 334 | 9x |
checkmate::assert_number(percentage, lower = 0, upper = 1) |
| 335 | ||
| 336 | 9x |
if (percentage == 0) {
|
| 337 | 1x |
return(v) |
| 338 |
} |
|
| 339 | ||
| 340 | 8x |
if (!is.null(seed) && !is.na(seed)) {
|
| 341 | 8x |
set.seed(seed) |
| 342 |
} |
|
| 343 | ||
| 344 |
# randomize the percentage |
|
| 345 | 8x |
ind <- sample(seq_along(v), round(length(v) * percentage)) |
| 346 | ||
| 347 | 8x |
v[ind] <- NA |
| 348 | ||
| 349 | 8x |
return(v) |
| 350 |
} |
|
| 351 | ||
| 352 |
#' Replace Values with NA |
|
| 353 |
#' |
|
| 354 |
#' @description `r lifecycle::badge("stable")`
|
|
| 355 |
#' |
|
| 356 |
#' Replace column values with `NA`s. |
|
| 357 |
#' |
|
| 358 |
#' @inheritParams argument_convention |
|
| 359 |
#' @param ds (`data.frame`)\cr Any data set. |
|
| 360 |
#' |
|
| 361 |
#' @return dataframe without `NA` values. |
|
| 362 |
#' |
|
| 363 |
#' @export |
|
| 364 |
mutate_na <- function(ds, na_vars = NULL, na_percentage = 0.05) {
|
|
| 365 | 5x |
if (!is.null(na_vars)) {
|
| 366 | 4x |
stopifnot(is.list(na_vars)) # any list is OK; as values can be left NA |
| 367 | 4x |
stopifnot(length(names(na_vars)) == length(na_vars)) # names for all elements |
| 368 |
} else {
|
|
| 369 | 1x |
na_vars <- names(ds) |
| 370 |
} |
|
| 371 | ||
| 372 | 5x |
stopifnot(is.numeric(na_percentage)) |
| 373 | 5x |
stopifnot(na_percentage >= 0 && na_percentage < 1) |
| 374 | ||
| 375 | 5x |
for (na_var in names(na_vars)) {
|
| 376 | 8x |
if (!is.na(na_var)) {
|
| 377 | 8x |
if (!na_var %in% names(ds)) {
|
| 378 | 1x |
warning(paste(na_var, "not in column names")) |
| 379 |
} else {
|
|
| 380 | 7x |
ds <- ds %>% |
| 381 | 7x |
ungroup_rowwise_df() %>% |
| 382 | 7x |
dplyr::mutate( |
| 383 | 7x |
!!na_var := ds[[na_var]] %>% |
| 384 | 7x |
replace_na( |
| 385 | 7x |
percentage = ifelse(is.na(na_vars[[na_var]][2]), na_percentage, na_vars[[na_var]][2]), |
| 386 | 7x |
seed = na_vars[[na_var]][1] |
| 387 |
) |
|
| 388 |
) |
|
| 389 |
} |
|
| 390 |
} |
|
| 391 |
} |
|
| 392 | 5x |
return(ds) |
| 393 |
} |
|
| 394 | ||
| 395 |
ungroup_rowwise_df <- function(x) {
|
|
| 396 | 7x |
class(x) <- c("tbl", "tbl_df", "data.frame")
|
| 397 | 7x |
return(x) |
| 398 |
} |
|
| 399 | ||
| 400 |
#' Zero-Truncated Poisson Distribution |
|
| 401 |
#' |
|
| 402 |
#' @description `r lifecycle::badge("stable")`
|
|
| 403 |
#' |
|
| 404 |
#' This generates random numbers from a zero-truncated Poisson distribution, |
|
| 405 |
#' i.e. from `X | X > 0` when `X ~ Poisson(lambda)`. The advantage here is that |
|
| 406 |
#' we guarantee to return exactly `n` numbers and without using a loop internally. |
|
| 407 |
#' This solution was provided in a post by |
|
| 408 |
#' [Peter Dalgaard](https://stat.ethz.ch/pipermail/r-help/2005-May/070680.html). |
|
| 409 |
#' |
|
| 410 |
#' @param n (`numeric`)\cr Number of random numbers. |
|
| 411 |
#' @param lambda (`numeric`)\cr Non-negative mean(s). |
|
| 412 |
#' |
|
| 413 |
#' @return The random numbers. |
|
| 414 |
#' @export |
|
| 415 |
#' |
|
| 416 |
#' @examples |
|
| 417 |
#' x <- rpois(1e6, lambda = 5) |
|
| 418 |
#' x <- x[x > 0] |
|
| 419 |
#' hist(x) |
|
| 420 |
#' |
|
| 421 |
#' y <- rtpois(1e6, lambda = 5) |
|
| 422 |
#' hist(y) |
|
| 423 |
rtpois <- function(n, lambda) {
|
|
| 424 | 121x |
stats::qpois(stats::runif(n, stats::dpois(0, lambda), 1), lambda) |
| 425 |
} |
|
| 426 | ||
| 427 |
#' Truncated Exponential Distribution |
|
| 428 |
#' |
|
| 429 |
#' @description `r lifecycle::badge("stable")`
|
|
| 430 |
#' |
|
| 431 |
#' This generates random numbers from a truncated Exponential distribution, |
|
| 432 |
#' i.e. from `X | X > l` or `X | X < r` when `X ~ Exp(rate)`. The advantage here is that |
|
| 433 |
#' we guarantee to return exactly `n` numbers and without using a loop internally. |
|
| 434 |
#' This can be derived from the quantile functions of the left- and right-truncated |
|
| 435 |
#' Exponential distributions. |
|
| 436 |
#' |
|
| 437 |
#' @param n (`numeric`)\cr Number of random numbers. |
|
| 438 |
#' @param rate (`numeric`)\cr Non-negative rate. |
|
| 439 |
#' @param l (`numeric`)\cr Positive left-hand truncation parameter. |
|
| 440 |
#' @param r (`numeric`)\cr Positive right-hand truncation parameter. |
|
| 441 |
#' |
|
| 442 |
#' @return The random numbers. If neither `l` nor `r` are provided then the usual Exponential |
|
| 443 |
#' distribution is used. |
|
| 444 |
#' @export |
|
| 445 |
#' |
|
| 446 |
#' @examples |
|
| 447 |
#' x <- stats::rexp(1e6, rate = 5) |
|
| 448 |
#' x <- x[x > 0.5] |
|
| 449 |
#' hist(x) |
|
| 450 |
#' |
|
| 451 |
#' y <- rtexp(1e6, rate = 5, l = 0.5) |
|
| 452 |
#' hist(y) |
|
| 453 |
#' |
|
| 454 |
#' z <- rtexp(1e6, rate = 5, r = 0.5) |
|
| 455 |
#' hist(z) |
|
| 456 |
rtexp <- function(n, rate, l = NULL, r = NULL) {
|
|
| 457 | 123x |
if (!is.null(l)) {
|
| 458 | 1x |
l - log(1 - stats::runif(n)) / rate |
| 459 | 122x |
} else if (!is.null(r)) {
|
| 460 | 121x |
-log(1 - stats::runif(n) * (1 - exp(-r * rate))) / rate |
| 461 |
} else {
|
|
| 462 | 1x |
stats::rexp(n, rate) |
| 463 |
} |
|
| 464 |
} |
| 1 |
#' Previous and Concomitant Medications Analysis Dataset (ADCM) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating random Concomitant Medication Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `CMSEQ` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param max_n_cms (`integer`)\cr Maximum number of concomitant medications per patient. Defaults to 10. |
|
| 14 |
#' @param who_coding (`flag`)\cr Whether WHO coding (with multiple paths per medication) should be used. |
|
| 15 |
#' @template param_cached |
|
| 16 |
#' @templateVar data adcm |
|
| 17 |
#' |
|
| 18 |
#' @return `data.frame` |
|
| 19 |
#' @export |
|
| 20 |
#' |
|
| 21 |
#' @examples |
|
| 22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 23 |
#' |
|
| 24 |
#' adcm <- radcm(adsl, seed = 2) |
|
| 25 |
#' adcm |
|
| 26 |
#' |
|
| 27 |
#' adcm_who <- radcm(adsl, seed = 2, who_coding = TRUE) |
|
| 28 |
#' adcm_who |
|
| 29 |
radcm <- function(adsl, |
|
| 30 |
max_n_cms = 10L, |
|
| 31 |
lookup = NULL, |
|
| 32 |
seed = NULL, |
|
| 33 |
na_percentage = 0, |
|
| 34 |
na_vars = list(CMCLAS = c(NA, 0.1), CMDECOD = c(1234, 0.1), ATIREL = c(1234, 0.1)), |
|
| 35 |
who_coding = FALSE, |
|
| 36 |
cached = FALSE) {
|
|
| 37 | 5x |
checkmate::assert_flag(cached) |
| 38 | 5x |
if (cached) {
|
| 39 | 1x |
return(get_cached_data("cadcm"))
|
| 40 |
} |
|
| 41 | ||
| 42 | 4x |
checkmate::assert_data_frame(adsl) |
| 43 | 4x |
checkmate::assert_integer(max_n_cms, len = 1, any.missing = FALSE) |
| 44 | 4x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 45 | 4x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 46 | 4x |
checkmate::assert_true(na_percentage < 1) |
| 47 | 4x |
checkmate::assert_flag(who_coding) |
| 48 | ||
| 49 | 4x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 50 | 4x |
lookup_cm <- if (!is.null(lookup)) {
|
| 51 | ! |
lookup |
| 52 |
} else {
|
|
| 53 | 4x |
tibble::tribble( |
| 54 | 4x |
~CMCLAS, ~CMDECOD, ~ATIREL, |
| 55 | 4x |
"medcl A", "medname A_1/3", "PRIOR", |
| 56 | 4x |
"medcl A", "medname A_2/3", "CONCOMITANT", |
| 57 | 4x |
"medcl A", "medname A_3/3", "CONCOMITANT", |
| 58 | 4x |
"medcl B", "medname B_1/4", "CONCOMITANT", |
| 59 | 4x |
"medcl B", "medname B_2/4", "PRIOR", |
| 60 | 4x |
"medcl B", "medname B_3/4", "PRIOR", |
| 61 | 4x |
"medcl B", "medname B_4/4", "CONCOMITANT", |
| 62 | 4x |
"medcl C", "medname C_1/2", "CONCOMITANT", |
| 63 | 4x |
"medcl C", "medname C_2/2", "CONCOMITANT" |
| 64 |
) |
|
| 65 |
} |
|
| 66 | ||
| 67 | 4x |
if (!is.null(seed)) {
|
| 68 | 3x |
set.seed(seed) |
| 69 |
} |
|
| 70 | 4x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 71 | ||
| 72 | 4x |
adcm <- Map(function(id, sid) {
|
| 73 | 430x |
n_cms <- sample(c(0, seq_len(max_n_cms)), 1) |
| 74 | 430x |
i <- sample(seq_len(nrow(lookup_cm)), n_cms, TRUE) |
| 75 | 430x |
dplyr::mutate( |
| 76 | 430x |
lookup_cm[i, ], |
| 77 | 430x |
USUBJID = id, |
| 78 | 430x |
STUDYID = sid |
| 79 |
) |
|
| 80 | 4x |
}, adsl$USUBJID, adsl$STUDYID) %>% |
| 81 | 4x |
Reduce(rbind, .) %>% |
| 82 | 4x |
`[`(c(4, 5, 1, 2, 3)) %>% |
| 83 | 4x |
dplyr::mutate(CMCAT = CMCLAS) |
| 84 | ||
| 85 | 4x |
adcm <- rcd_var_relabel( |
| 86 | 4x |
adcm, |
| 87 | 4x |
STUDYID = "Study Identifier", |
| 88 | 4x |
USUBJID = "Unique Subject Identifier" |
| 89 |
) |
|
| 90 | ||
| 91 |
# merge ADSL to be able to add CM date and study day variables |
|
| 92 | 4x |
adcm <- dplyr::inner_join( |
| 93 | 4x |
adcm, |
| 94 | 4x |
adsl, |
| 95 | 4x |
by = c("STUDYID", "USUBJID")
|
| 96 |
) %>% |
|
| 97 | 4x |
dplyr::rowwise() %>% |
| 98 | 4x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
| 99 | 4x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
| 100 | 4x |
TRUE ~ TRTEDTM |
| 101 |
))) %>% |
|
| 102 | 4x |
dplyr::mutate(ASTDTM = sample( |
| 103 | 4x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
| 104 | 4x |
size = 1 |
| 105 |
)) %>% |
|
| 106 | 4x |
dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>% |
| 107 |
# add 1 to end of range incase both values passed to sample() are the same |
|
| 108 | 4x |
dplyr::mutate(AENDTM = sample( |
| 109 | 4x |
seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"), |
| 110 | 4x |
size = 1 |
| 111 |
)) %>% |
|
| 112 | 4x |
dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>% |
| 113 | 4x |
dplyr::select(-TRTENDT) %>% |
| 114 | 4x |
dplyr::ungroup() %>% |
| 115 | 4x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM) |
| 116 | ||
| 117 | 4x |
adcm <- adcm %>% |
| 118 | 4x |
dplyr::group_by(USUBJID) %>% |
| 119 | 4x |
dplyr::mutate(CMSEQ = seq_len(dplyr::n())) %>% |
| 120 | 4x |
dplyr::mutate(ASEQ = CMSEQ) %>% |
| 121 | 4x |
dplyr::ungroup() %>% |
| 122 | 4x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM, CMSEQ) %>% |
| 123 | 4x |
dplyr::mutate( |
| 124 | 4x |
ATC1 = paste("ATCCLAS1", substr(CMDECOD, 9, 9)),
|
| 125 | 4x |
ATC2 = paste("ATCCLAS2", substr(CMDECOD, 9, 9)),
|
| 126 | 4x |
ATC3 = paste("ATCCLAS3", substr(CMDECOD, 9, 9)),
|
| 127 | 4x |
ATC4 = paste("ATCCLAS4", substr(CMDECOD, 9, 9))
|
| 128 |
) %>% |
|
| 129 | 4x |
dplyr::mutate(CMINDC = sample(c( |
| 130 | 4x |
"Nausea", "Hypertension", "Urticaria", "Fever", |
| 131 | 4x |
"Asthma", "Infection", "Diabete", "Diarrhea", "Pneumonia" |
| 132 | 4x |
), dplyr::n(), replace = TRUE)) %>% |
| 133 | 4x |
dplyr::mutate(CMDOSE = sample(1:99, dplyr::n(), replace = TRUE)) %>% |
| 134 | 4x |
dplyr::mutate(CMTRT = substr(CMDECOD, 9, 13)) %>% |
| 135 | 4x |
dplyr::mutate(CMDOSU = sample(c( |
| 136 | 4x |
"ug/mL", "ug/kg/day", "%", "uL", "DROP", |
| 137 | 4x |
"umol/L", "mg", "mg/breath", "ug" |
| 138 | 4x |
), dplyr::n(), replace = TRUE)) %>% |
| 139 | 4x |
dplyr::mutate(CMROUTE = sample(c( |
| 140 | 4x |
"INTRAVENOUS", "ORAL", "NASAL", |
| 141 | 4x |
"INTRAMUSCULAR", "SUBCUTANEOUS", "INHALED", "RECTAL", "UNKNOWN" |
| 142 | 4x |
), dplyr::n(), replace = TRUE)) %>% |
| 143 | 4x |
dplyr::mutate(CMDOSFRQ = sample(c( |
| 144 | 4x |
"Q4W", "QN", "Q4H", "UNKNOWN", "TWICE", |
| 145 | 4x |
"Q4H", "QD", "TID", "4 TIMES PER MONTH" |
| 146 | 4x |
), dplyr::n(), replace = TRUE)) %>% |
| 147 | 4x |
dplyr::mutate( |
| 148 |
# use 1 year as reference time point |
|
| 149 | 4x |
CMSTRTPT = dplyr::case_when( |
| 150 | 4x |
ASTDY <= 365 ~ "BEFORE", |
| 151 | 4x |
ASTDY > 365 ~ "AFTER", |
| 152 | 4x |
is.na(ASTDY) ~ "U" |
| 153 |
), |
|
| 154 | 4x |
CMENRTPT = dplyr::case_when( |
| 155 | 4x |
EOSSTT %in% c("COMPLETED", "DISCONTINUED") ~ "BEFORE",
|
| 156 | 4x |
EOSSTT == "ONGOING" ~ "ONGOING", |
| 157 | 4x |
is.na(EOSSTT) ~ "U" |
| 158 |
), |
|
| 159 | 4x |
ADURN = as.numeric(difftime(ASTDTM, AENDTM, units = "days")), |
| 160 | 4x |
ADURU = "days" |
| 161 |
) |
|
| 162 | ||
| 163 | ||
| 164 |
# Optional WHO coding, which adds more `ATC` paths for randomly selected `CMDECOD`. |
|
| 165 | 4x |
if (who_coding) {
|
| 166 | 1x |
n_cmdecod_path2 <- ceiling(nrow(lookup_cm) / 2) |
| 167 | 1x |
cmdecod_path2 <- sample(lookup_cm$CMDECOD, n_cmdecod_path2) |
| 168 | 1x |
adcm_path2 <- adcm %>% |
| 169 | 1x |
dplyr::filter(CMDECOD %in% cmdecod_path2) %>% |
| 170 | 1x |
dplyr::mutate( |
| 171 | 1x |
ATC1 = paste(ATC1, "p2"), |
| 172 | 1x |
ATC2 = paste(ATC2, "p2"), |
| 173 | 1x |
ATC3 = paste(ATC3, "p2"), |
| 174 | 1x |
ATC4 = paste(ATC4, "p2") |
| 175 |
) |
|
| 176 | ||
| 177 | 1x |
n_cmdecod_path3 <- ceiling(length(cmdecod_path2) / 2) |
| 178 | 1x |
cmdecod_path3 <- sample(cmdecod_path2, n_cmdecod_path3) |
| 179 | 1x |
adcm_path3 <- adcm %>% |
| 180 | 1x |
dplyr::filter(CMDECOD %in% cmdecod_path3) %>% |
| 181 | 1x |
dplyr::mutate( |
| 182 | 1x |
ATC1 = paste(ATC1, "p3"), |
| 183 | 1x |
ATC2 = paste(ATC2, "p3"), |
| 184 | 1x |
ATC3 = paste(ATC3, "p3"), |
| 185 | 1x |
ATC4 = paste(ATC4, "p3") |
| 186 |
) |
|
| 187 | ||
| 188 | 1x |
adcm <- dplyr::bind_rows( |
| 189 | 1x |
adcm, |
| 190 | 1x |
adcm_path2, |
| 191 | 1x |
adcm_path3 |
| 192 |
) |
|
| 193 |
} |
|
| 194 | ||
| 195 | 4x |
adcm <- adcm %>% |
| 196 | 4x |
dplyr::mutate( |
| 197 | 4x |
ATC1CD = ATC1, |
| 198 | 4x |
ATC2CD = ATC2, |
| 199 | 4x |
ATC3CD = ATC3, |
| 200 | 4x |
ATC4CD = ATC4 |
| 201 |
) |
|
| 202 | ||
| 203 | 4x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 204 | ! |
adcm <- mutate_na(ds = adcm, na_vars = na_vars, na_percentage = na_percentage) |
| 205 |
} |
|
| 206 | ||
| 207 |
# apply metadata |
|
| 208 | 4x |
adcm <- apply_metadata(adcm, "metadata/ADCM.yml") |
| 209 | ||
| 210 | 4x |
return(adcm) |
| 211 |
} |
| 1 |
#' Subject-Level Analysis Dataset (ADSL) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' The Subject-Level Analysis Dataset (ADSL) is used to provide the variables |
|
| 6 |
#' that describe attributes of a subject. ADSL is a source for subject-level |
|
| 7 |
#' variables used in other analysis data sets, such as population flags and |
|
| 8 |
#' treatment variables. There is only one ADSL per study. ADSL and its related |
|
| 9 |
#' metadata are required in a CDISC-based submission of data from a clinical |
|
| 10 |
#' trial even if no other analysis data sets are submitted. |
|
| 11 |
#' |
|
| 12 |
#' @details One record per subject. |
|
| 13 |
#' |
|
| 14 |
#' Keys: `STUDYID`, `USUBJID` |
|
| 15 |
#' |
|
| 16 |
#' @inheritParams argument_convention |
|
| 17 |
#' @param N (`numeric`)\cr Number of patients. |
|
| 18 |
#' @param study_duration (`numeric`)\cr Duration of study in years. |
|
| 19 |
#' @param with_trt02 (`logical`)\cr Should period 2 be added. |
|
| 20 |
#' @param ae_withdrawal_prob (`proportion`)\cr Probability that there is at least one |
|
| 21 |
#' Adverse Event leading to the withdrawal of a study drug. |
|
| 22 |
#' @param female_prob (`proportion`)\cr Probability of a subject being female, male is calculated by `1-female_prob`. |
|
| 23 |
#' @template param_cached |
|
| 24 |
#' @templateVar data adsl |
|
| 25 |
#' |
|
| 26 |
#' @return `data.frame` |
|
| 27 |
#' @export |
|
| 28 |
# |
|
| 29 |
#' @examples |
|
| 30 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
| 31 |
#' adsl |
|
| 32 |
#' |
|
| 33 |
#' adsl <- radsl( |
|
| 34 |
#' N = 10, seed = 1, |
|
| 35 |
#' na_percentage = 0.1, |
|
| 36 |
#' na_vars = list( |
|
| 37 |
#' DTHDT = c(seed = 1234, percentage = 0.1), |
|
| 38 |
#' LSTALVDT = c(seed = 1234, percentage = 0.1) |
|
| 39 |
#' ) |
|
| 40 |
#' ) |
|
| 41 |
#' adsl |
|
| 42 |
#' |
|
| 43 |
#' adsl <- radsl(N = 10, seed = 1, na_percentage = .1) |
|
| 44 |
#' adsl |
|
| 45 |
radsl <- function(N = 400, # nolint |
|
| 46 |
study_duration = 2, |
|
| 47 |
seed = NULL, |
|
| 48 |
with_trt02 = TRUE, |
|
| 49 |
na_percentage = 0, |
|
| 50 |
na_vars = list( |
|
| 51 |
"AGE" = NA, "SEX" = NA, "RACE" = NA, "STRATA1" = NA, "STRATA2" = NA, |
|
| 52 |
"BMRKR1" = c(seed = 1234, percentage = 0.1), "BMRKR2" = c(1234, 0.1), "BEP01FL" = NA |
|
| 53 |
), |
|
| 54 |
ae_withdrawal_prob = 0.05, |
|
| 55 |
female_prob = 0.52, |
|
| 56 |
cached = FALSE) {
|
|
| 57 | 30x |
checkmate::assert_flag(cached) |
| 58 | 30x |
if (cached) {
|
| 59 | 2x |
return(get_cached_data("cadsl"))
|
| 60 |
} |
|
| 61 | ||
| 62 | 28x |
checkmate::assert_number(N) |
| 63 | 28x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 64 | 28x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE) |
| 65 | 28x |
checkmate::assert_number(study_duration, lower = 1) |
| 66 | 28x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 67 | 28x |
checkmate::assert_true(na_percentage < 1) |
| 68 | 28x |
checkmate::assert_number(female_prob, lower = 0, upper = 1) |
| 69 | ||
| 70 | 28x |
if (!is.null(seed)) {
|
| 71 | 28x |
set.seed(seed) |
| 72 |
} |
|
| 73 | ||
| 74 | 28x |
study_duration_secs <- lubridate::seconds(lubridate::years(study_duration)) |
| 75 | 28x |
sys_dtm <- lubridate::fast_strptime("20/2/2019 11:16:16.683", "%d/%m/%Y %H:%M:%OS")
|
| 76 | 28x |
discons <- max(1, floor((N * .3))) |
| 77 | 28x |
country_site_prob <- c(.5, .121, .077, .077, .075, .052, .046, .025, .014, .003) |
| 78 | ||
| 79 | 28x |
adsl <- tibble::tibble( |
| 80 | 28x |
STUDYID = rep("AB12345", N),
|
| 81 | 28x |
COUNTRY = sample_fct( |
| 82 | 28x |
c("CHN", "USA", "BRA", "PAK", "NGA", "RUS", "JPN", "GBR", "CAN", "CHE"),
|
| 83 | 28x |
N, |
| 84 | 28x |
prob = country_site_prob |
| 85 |
), |
|
| 86 | 28x |
SITEID = sample_fct(1:20, N, prob = rep(country_site_prob, times = 2)), |
| 87 | 28x |
SUBJID = paste("id", seq_len(N), sep = "-"),
|
| 88 | 28x |
AGE = sapply(stats::rchisq(N, df = 5, ncp = 10), max, 0) + 20, |
| 89 | 28x |
AGEU = "YEARS", |
| 90 | 28x |
SEX = c("F", "M") %>% sample_fct(N, prob = c(female_prob, 1 - female_prob)),
|
| 91 | 28x |
ARMCD = c("ARM A", "ARM B", "ARM C") %>% sample_fct(N),
|
| 92 | 28x |
RACE = c( |
| 93 | 28x |
"ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE", |
| 94 | 28x |
"MULTIPLE", "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER", "OTHER", "UNKNOWN" |
| 95 |
) %>% |
|
| 96 | 28x |
sample_fct(N, prob = c(.55, .23, .16, .05, .004, .003, .002, .002)), |
| 97 | 28x |
TRTSDTM = sys_dtm + sample(seq(0, study_duration_secs), size = N, replace = TRUE), |
| 98 | 28x |
RANDDT = lubridate::date(TRTSDTM - lubridate::days(floor(stats::runif(N, min = 0, max = 5)))), |
| 99 | 28x |
TRTEDTM = TRTSDTM + study_duration_secs, |
| 100 | 28x |
STRATA1 = c("A", "B", "C") %>% sample_fct(N),
|
| 101 | 28x |
STRATA2 = c("S1", "S2") %>% sample_fct(N),
|
| 102 | 28x |
BMRKR1 = stats::rchisq(N, 6), |
| 103 | 28x |
BMRKR2 = sample_fct(c("LOW", "MEDIUM", "HIGH"), N),
|
| 104 | 28x |
BMEASIFL = sample_fct(c("Y", "N"), N),
|
| 105 | 28x |
BEP01FL = sample_fct(c("Y", "N"), N),
|
| 106 | 28x |
AEWITHFL = sample_fct(c("Y", "N"), N, prob = c(ae_withdrawal_prob, 1 - ae_withdrawal_prob))
|
| 107 |
) %>% |
|
| 108 | 28x |
dplyr::mutate(ARM = dplyr::recode( |
| 109 | 28x |
ARMCD, |
| 110 | 28x |
"ARM A" = "A: Drug X", "ARM B" = "B: Placebo", "ARM C" = "C: Combination" |
| 111 |
)) %>% |
|
| 112 | 28x |
dplyr::mutate(ACTARM = ARM) %>% |
| 113 | 28x |
dplyr::mutate(ACTARMCD = ARMCD) %>% |
| 114 | 28x |
dplyr::mutate(TRT01P = ARM) %>% |
| 115 | 28x |
dplyr::mutate(TRT01A = ACTARM) %>% |
| 116 | 28x |
dplyr::mutate(ITTFL = factor("Y")) %>%
|
| 117 | 28x |
dplyr::mutate(SAFFL = factor("Y")) %>%
|
| 118 | 28x |
dplyr::arrange(TRTSDTM) |
| 119 | ||
| 120 | 28x |
adds <- adsl[sample(nrow(adsl), discons), ] %>% |
| 121 | 28x |
dplyr::mutate(TRTEDTM_discon = sample( |
| 122 | 28x |
seq(from = max(TRTSDTM), to = sys_dtm + study_duration_secs, by = 1), |
| 123 | 28x |
size = discons, |
| 124 | 28x |
replace = TRUE |
| 125 |
)) %>% |
|
| 126 | 28x |
dplyr::select(SUBJID, TRTSDTM, TRTEDTM_discon) %>% |
| 127 | 28x |
dplyr::arrange(TRTSDTM) |
| 128 | ||
| 129 | 28x |
adsl <- dplyr::left_join(adsl, adds, by = c("SUBJID", "TRTSDTM")) %>%
|
| 130 | 28x |
dplyr::mutate(TRTEDTM = dplyr::case_when( |
| 131 | 28x |
!is.na(TRTEDTM_discon) ~ TRTEDTM_discon, |
| 132 | 28x |
TRTSDTM >= quantile(TRTSDTM)[2] & TRTSDTM <= quantile(TRTSDTM)[3] ~ lubridate::as_datetime(NA), |
| 133 | 28x |
TRUE ~ TRTEDTM |
| 134 |
)) %>% |
|
| 135 | 28x |
dplyr::select(-"TRTEDTM_discon") |
| 136 | ||
| 137 |
# add period 2 if needed |
|
| 138 | 28x |
if (with_trt02) {
|
| 139 | 28x |
with_trt02 <- lubridate::seconds(lubridate::years(1)) |
| 140 | 28x |
adsl <- adsl %>% |
| 141 | 28x |
dplyr::mutate(TRT02P = sample(ARM)) %>% |
| 142 | 28x |
dplyr::mutate(TRT02A = sample(ACTARM)) %>% |
| 143 | 28x |
dplyr::mutate( |
| 144 | 28x |
TRT01SDTM = TRTSDTM, |
| 145 | 28x |
AP01SDTM = TRT01SDTM, |
| 146 | 28x |
TRT01EDTM = TRTEDTM, |
| 147 | 28x |
AP01EDTM = TRT01EDTM, |
| 148 | 28x |
TRT02SDTM = TRTEDTM, |
| 149 | 28x |
AP02SDTM = TRT02SDTM, |
| 150 | 28x |
TRT02EDTM = TRT01EDTM + with_trt02, |
| 151 | 28x |
AP02EDTM = TRT02EDTM, |
| 152 | 28x |
TRTEDTM = TRT02EDTM |
| 153 |
) |
|
| 154 |
} |
|
| 155 | ||
| 156 | 28x |
adsl <- adsl %>% |
| 157 | 28x |
dplyr::mutate(EOSDT = lubridate::date(TRTEDTM)) %>% |
| 158 | 28x |
dplyr::mutate(EOSDY = ceiling(difftime(TRTEDTM, TRTSDTM))) %>% |
| 159 | 28x |
dplyr::mutate(EOSSTT = dplyr::case_when( |
| 160 | 28x |
EOSDY == max(EOSDY, na.rm = TRUE) ~ "COMPLETED", |
| 161 | 28x |
EOSDY < max(EOSDY, na.rm = TRUE) ~ "DISCONTINUED", |
| 162 | 28x |
is.na(TRTEDTM) ~ "ONGOING" |
| 163 |
)) %>% |
|
| 164 | 28x |
dplyr::mutate(EOTSTT = EOSSTT) |
| 165 | ||
| 166 |
# disposition related variables |
|
| 167 |
# using probability of 1 for the "DEATH" level to ensure at least one death record exists |
|
| 168 | 28x |
l_dcsreas <- list( |
| 169 | 28x |
choices = c( |
| 170 | 28x |
"ADVERSE EVENT", "DEATH", "LACK OF EFFICACY", "PHYSICIAN DECISION", |
| 171 | 28x |
"PROTOCOL VIOLATION", "WITHDRAWAL BY PARENT/GUARDIAN", "WITHDRAWAL BY SUBJECT" |
| 172 |
), |
|
| 173 | 28x |
prob = c(.2, 1, .1, .1, .2, .1, .1) |
| 174 |
) |
|
| 175 | 28x |
l_dthcat_other <- list( |
| 176 | 28x |
choices = c( |
| 177 | 28x |
"Post-study reporting of death", "LOST TO FOLLOW UP", "MISSING", "SUICIDE", "UNKNOWN" |
| 178 |
), |
|
| 179 | 28x |
prob = c(.1, .3, .3, .2, .1) |
| 180 |
) |
|
| 181 | ||
| 182 | 28x |
adsl <- adsl %>% |
| 183 | 28x |
dplyr::mutate( |
| 184 | 28x |
DCSREAS = ifelse( |
| 185 | 28x |
EOSSTT == "DISCONTINUED", |
| 186 | 28x |
sample(x = l_dcsreas$choices, size = N, replace = TRUE, prob = l_dcsreas$prob), |
| 187 | 28x |
as.character(NA) |
| 188 |
) |
|
| 189 |
) %>% |
|
| 190 | 28x |
dplyr::mutate(DTHFL = dplyr::case_when( |
| 191 | 28x |
DCSREAS == "DEATH" ~ "Y", |
| 192 | 28x |
TRUE ~ "N" |
| 193 |
)) %>% |
|
| 194 | 28x |
dplyr::mutate( |
| 195 | 28x |
DTHCAT = ifelse( |
| 196 | 28x |
DCSREAS == "DEATH", |
| 197 | 28x |
sample(x = c("ADVERSE EVENT", "PROGRESSIVE DISEASE", "OTHER"), size = N, replace = TRUE),
|
| 198 | 28x |
as.character(NA) |
| 199 |
) |
|
| 200 |
) %>% |
|
| 201 | 28x |
dplyr::mutate(DTHCAUS = dplyr::case_when( |
| 202 | 28x |
DTHCAT == "ADVERSE EVENT" ~ "ADVERSE EVENT", |
| 203 | 28x |
DTHCAT == "PROGRESSIVE DISEASE" ~ "DISEASE PROGRESSION", |
| 204 | 28x |
DTHCAT == "OTHER" ~ sample(x = l_dthcat_other$choices, size = N, replace = TRUE, prob = l_dthcat_other$prob), |
| 205 | 28x |
TRUE ~ as.character(NA) |
| 206 |
)) %>% |
|
| 207 | 28x |
dplyr::mutate(ADTHAUT = dplyr::case_when( |
| 208 | 28x |
DTHCAUS %in% c("ADVERSE EVENT", "DISEASE PROGRESSION") ~ "Yes",
|
| 209 | 28x |
DTHCAUS %in% c("UNKNOWN", "SUICIDE", "Post-study reporting of death") ~ sample(
|
| 210 | 28x |
x = c("Yes", "No"), size = N, replace = TRUE, prob = c(0.25, 0.75)
|
| 211 |
), |
|
| 212 | 28x |
TRUE ~ as.character(NA) |
| 213 |
)) %>% |
|
| 214 |
# adding some random number of days post last treatment date so that death days from last trt admin |
|
| 215 |
# supports the LDDTHGR1 derivation below |
|
| 216 | 28x |
dplyr::mutate(DTHDT = dplyr::case_when( |
| 217 | 28x |
DCSREAS == "DEATH" ~ lubridate::date(TRTEDTM + lubridate::days(sample(0:50, size = N, replace = TRUE))), |
| 218 | 28x |
TRUE ~ NA |
| 219 |
)) %>% |
|
| 220 | 28x |
dplyr::mutate(LDDTHELD = difftime(DTHDT, lubridate::date(TRTEDTM), units = "days")) %>% |
| 221 | 28x |
dplyr::mutate(LDDTHGR1 = dplyr::case_when( |
| 222 | 28x |
LDDTHELD <= 30 ~ "<=30", |
| 223 | 28x |
LDDTHELD > 30 ~ ">30", |
| 224 | 28x |
TRUE ~ as.character(NA) |
| 225 |
)) %>% |
|
| 226 | 28x |
dplyr::mutate(LSTALVDT = dplyr::case_when( |
| 227 | 28x |
DCSREAS == "DEATH" ~ DTHDT, |
| 228 | 28x |
TRUE ~ lubridate::date(TRTEDTM) + lubridate::days(floor(stats::runif(N, min = 10, max = 30))) |
| 229 |
)) |
|
| 230 | ||
| 231 |
# add random ETHNIC (Ethnicity) |
|
| 232 | 28x |
adsl <- adsl %>% |
| 233 | 28x |
dplyr::mutate(ETHNIC = sample( |
| 234 | 28x |
x = c("HISPANIC OR LATINO", "NOT HISPANIC OR LATINO", "NOT REPORTED", "UNKNOWN"),
|
| 235 | 28x |
size = N, replace = TRUE, prob = c(.1, .8, .06, .04) |
| 236 |
)) |
|
| 237 | ||
| 238 |
# associate DTHADY (Relative Day of Death) with Death date |
|
| 239 |
# Date of Death [adsl.DTHDT] - date part of Date of First Exposure to Treatment [adsl.TRTSDTM] |
|
| 240 | ||
| 241 | 28x |
adsl <- adsl %>% |
| 242 | 28x |
dplyr::mutate(DTHADY = difftime(DTHDT, TRTSDTM, units = "days")) |
| 243 | ||
| 244 | ||
| 245 |
# associate sites with countries and regions |
|
| 246 | 28x |
adsl <- adsl %>% |
| 247 | 28x |
dplyr::mutate(SITEID = paste0(COUNTRY, "-", SITEID)) %>% |
| 248 | 28x |
dplyr::mutate(REGION1 = dplyr::case_when( |
| 249 | 28x |
COUNTRY %in% c("NGA") ~ "Africa",
|
| 250 | 28x |
COUNTRY %in% c("CHN", "JPN", "PAK") ~ "Asia",
|
| 251 | 28x |
COUNTRY %in% c("RUS") ~ "Eurasia",
|
| 252 | 28x |
COUNTRY %in% c("GBR") ~ "Europe",
|
| 253 | 28x |
COUNTRY %in% c("CAN", "USA") ~ "North America",
|
| 254 | 28x |
COUNTRY %in% c("BRA") ~ "South America",
|
| 255 | 28x |
TRUE ~ as.character(NA) |
| 256 |
)) %>% |
|
| 257 | 28x |
dplyr::mutate(INVID = paste("INV ID", SITEID)) %>%
|
| 258 | 28x |
dplyr::mutate(INVNAM = paste("Dr.", SITEID, "Doe")) %>%
|
| 259 | 28x |
dplyr::mutate(USUBJID = paste(STUDYID, SITEID, SUBJID, sep = "-")) |
| 260 | ||
| 261 | ||
| 262 | 28x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 263 | ! |
adsl <- mutate_na(ds = adsl, na_vars = na_vars, na_percentage = na_percentage) |
| 264 |
} |
|
| 265 | ||
| 266 |
# apply metadata |
|
| 267 | 28x |
adsl <- apply_metadata(adsl, "metadata/ADSL.yml", FALSE) |
| 268 | ||
| 269 | 28x |
attr(adsl, "study_duration_secs") <- as.numeric(study_duration_secs) |
| 270 | 28x |
return(adsl) |
| 271 |
} |
| 1 |
#' Questionnaires Analysis Dataset (ADQS) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Questionnaires Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @template param_cached |
|
| 14 |
#' @templateVar data adqs |
|
| 15 |
#' |
|
| 16 |
#' @return `data.frame` |
|
| 17 |
#' @export |
|
| 18 |
#' |
|
| 19 |
#' @author npaszty |
|
| 20 |
#' |
|
| 21 |
#' @examples |
|
| 22 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 23 |
#' |
|
| 24 |
#' adqs <- radqs(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2) |
|
| 25 |
#' adqs |
|
| 26 |
#' |
|
| 27 |
#' adqs <- radqs(adsl, visit_format = "CYCLE", n_assessments = 3L, seed = 2) |
|
| 28 |
#' adqs |
|
| 29 |
radqs <- function(adsl, |
|
| 30 |
param = c( |
|
| 31 |
"BFI All Questions", |
|
| 32 |
"Fatigue Interference", |
|
| 33 |
"Function/Well-Being (GF1,GF3,GF7)", |
|
| 34 |
"Treatment Side Effects (GP2,C5,GP5)", |
|
| 35 |
"FKSI-19 All Questions" |
|
| 36 |
), |
|
| 37 |
paramcd = c("BFIALL", "FATIGI", "FKSI-FWB", "FKSI-TSE", "FKSIALL"),
|
|
| 38 |
visit_format = "WEEK", |
|
| 39 |
n_assessments = 5L, |
|
| 40 |
n_days = 5L, |
|
| 41 |
seed = NULL, |
|
| 42 |
na_percentage = 0, |
|
| 43 |
na_vars = list( |
|
| 44 |
LOQFL = c(NA, 0.1), ABLFL2 = c(1234, 0.1), ABLFL = c(1235, 0.1), |
|
| 45 |
CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1) |
|
| 46 |
), |
|
| 47 |
cached = FALSE) {
|
|
| 48 | 4x |
checkmate::assert_flag(cached) |
| 49 | 4x |
if (cached) {
|
| 50 | 1x |
return(get_cached_data("cadqs"))
|
| 51 |
} |
|
| 52 | ||
| 53 | 3x |
checkmate::assert_data_frame(adsl) |
| 54 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
| 55 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
| 56 | 3x |
checkmate::assert_string(visit_format) |
| 57 | 3x |
checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE) |
| 58 | 3x |
checkmate::assert_integer(n_days, len = 1, any.missing = FALSE) |
| 59 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 60 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 61 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 62 | ||
| 63 |
# validate and initialize param vectors |
|
| 64 | 3x |
param_init_list <- relvar_init(param, paramcd) |
| 65 | ||
| 66 | 3x |
if (!is.null(seed)) {
|
| 67 | 3x |
set.seed(seed) |
| 68 |
} |
|
| 69 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 70 | ||
| 71 | 3x |
adqs <- expand.grid( |
| 72 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 73 | 3x |
USUBJID = adsl$USUBJID, |
| 74 | 3x |
PARAM = param_init_list$relvar1, |
| 75 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), |
| 76 | 3x |
stringsAsFactors = FALSE |
| 77 |
) |
|
| 78 | ||
| 79 | 3x |
adqs <- dplyr::mutate( |
| 80 | 3x |
adqs, |
| 81 | 3x |
AVISITN = dplyr::case_when( |
| 82 | 3x |
AVISIT == "SCREENING" ~ -1, |
| 83 | 3x |
AVISIT == "BASELINE" ~ 0, |
| 84 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
|
| 85 | 3x |
TRUE ~ NA_real_ |
| 86 |
) |
|
| 87 |
) |
|
| 88 | ||
| 89 |
# assign related variable values: PARAMxPARAMCD are related |
|
| 90 | 3x |
adqs <- adqs %>% rel_var( |
| 91 | 3x |
var_name = "PARAMCD", |
| 92 | 3x |
related_var = "PARAM", |
| 93 | 3x |
var_values = param_init_list$relvar2 |
| 94 |
) |
|
| 95 | ||
| 96 | 3x |
adqs$AVAL <- stats::rnorm(nrow(adqs), mean = 50, sd = 8) + adqs$AVISITN * stats::rnorm(nrow(adqs), mean = 5, sd = 2) |
| 97 | ||
| 98 |
# order to prepare for change from screening and baseline values |
|
| 99 | 3x |
adqs <- adqs[order(adqs$STUDYID, adqs$USUBJID, adqs$PARAMCD, adqs$AVISITN), ] |
| 100 | ||
| 101 | 3x |
adqs <- Reduce( |
| 102 | 3x |
rbind, |
| 103 | 3x |
lapply( |
| 104 | 3x |
split(adqs, adqs$USUBJID), |
| 105 | 3x |
function(x) {
|
| 106 | 30x |
x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])] |
| 107 | 30x |
x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "") |
| 108 | 30x |
x$ABLFL <- ifelse( |
| 109 | 30x |
toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE", |
| 110 | 30x |
"Y", |
| 111 | 30x |
ifelse( |
| 112 | 30x |
toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", |
| 113 | 30x |
"Y", |
| 114 |
"" |
|
| 115 |
) |
|
| 116 |
) |
|
| 117 | 30x |
x$LOQFL <- ifelse(x$AVAL < 32, "Y", "N") |
| 118 | 30x |
x |
| 119 |
} |
|
| 120 |
) |
|
| 121 |
) |
|
| 122 | ||
| 123 | 3x |
adqs$BASE2 <- retain(adqs, adqs$AVAL, adqs$ABLFL2 == "Y") |
| 124 | 3x |
adqs$BASE <- ifelse(adqs$ABLFL2 != "Y", retain(adqs, adqs$AVAL, adqs$ABLFL == "Y"), NA) |
| 125 | ||
| 126 | 3x |
adqs <- adqs %>% |
| 127 | 3x |
dplyr::mutate(CHG2 = AVAL - BASE2) %>% |
| 128 | 3x |
dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>% |
| 129 | 3x |
dplyr::mutate(CHG = AVAL - BASE) %>% |
| 130 | 3x |
dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>% |
| 131 | 3x |
rcd_var_relabel( |
| 132 | 3x |
STUDYID = attr(adsl$STUDYID, "label"), |
| 133 | 3x |
USUBJID = attr(adsl$USUBJID, "label") |
| 134 |
) |
|
| 135 | ||
| 136 | 3x |
adqs <- rcd_var_relabel( |
| 137 | 3x |
adqs, |
| 138 | 3x |
STUDYID = "Study Identifier", |
| 139 | 3x |
USUBJID = "Unique Subject Identifier" |
| 140 |
) |
|
| 141 | ||
| 142 |
# merge ADSL to be able to add QS date and study day variables |
|
| 143 | 3x |
adqs <- dplyr::inner_join( |
| 144 | 3x |
adqs, |
| 145 | 3x |
adsl, |
| 146 | 3x |
by = c("STUDYID", "USUBJID")
|
| 147 |
) %>% |
|
| 148 | 3x |
dplyr::rowwise() %>% |
| 149 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
| 150 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
| 151 | 3x |
TRUE ~ TRTEDTM |
| 152 |
))) %>% |
|
| 153 | 3x |
ungroup() |
| 154 | ||
| 155 | 3x |
adqs <- adqs %>% |
| 156 | 3x |
group_by(USUBJID) %>% |
| 157 | 3x |
arrange(USUBJID, AVISITN) %>% |
| 158 | 3x |
dplyr::mutate(ADTM = rep( |
| 159 | 3x |
sort(sample( |
| 160 | 3x |
seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"), |
| 161 | 3x |
size = nlevels(AVISIT) |
| 162 |
)), |
|
| 163 | 3x |
each = n() / nlevels(AVISIT) |
| 164 |
)) %>% |
|
| 165 | 3x |
dplyr::ungroup() %>% |
| 166 | 3x |
dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>% |
| 167 | 3x |
dplyr::select(-TRTENDT) %>% |
| 168 | 3x |
dplyr::arrange(STUDYID, USUBJID, ADTM) |
| 169 | ||
| 170 | 3x |
adqs <- adqs %>% |
| 171 | 3x |
dplyr::group_by(USUBJID) %>% |
| 172 | 3x |
dplyr::mutate(QSSEQ = seq_len(dplyr::n())) %>% |
| 173 | 3x |
dplyr::mutate(ASEQ = QSSEQ) %>% |
| 174 | 3x |
dplyr::ungroup() %>% |
| 175 | 3x |
dplyr::arrange( |
| 176 | 3x |
STUDYID, |
| 177 | 3x |
USUBJID, |
| 178 | 3x |
PARAMCD, |
| 179 | 3x |
AVISITN, |
| 180 | 3x |
ADTM, |
| 181 | 3x |
QSSEQ |
| 182 |
) |
|
| 183 | ||
| 184 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 185 | ! |
adqs <- mutate_na(ds = adqs, na_vars = na_vars, na_percentage = na_percentage) |
| 186 |
} |
|
| 187 | ||
| 188 |
# apply metadata |
|
| 189 | 3x |
adqs <- apply_metadata(adqs, "metadata/ADQS.yml") |
| 190 | ||
| 191 | 3x |
return(adqs) |
| 192 |
} |
| 1 |
#' Adverse Event Analysis Dataset (ADAE) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating random Adverse Event Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per each record in the corresponding SDTM domain. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `AETERM`, `AESEQ` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param max_n_aes (`integer`)\cr Maximum number of AEs per patient. Defaults to 10. |
|
| 14 |
#' @template param_cached |
|
| 15 |
#' @templateVar data adae |
|
| 16 |
#' |
|
| 17 |
#' @return `data.frame` |
|
| 18 |
#' @export |
|
| 19 |
#' |
|
| 20 |
#' @examples |
|
| 21 |
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1) |
|
| 22 |
#' |
|
| 23 |
#' adae <- radae(adsl, seed = 2) |
|
| 24 |
#' adae |
|
| 25 |
#' |
|
| 26 |
#' # Add metadata. |
|
| 27 |
#' aag <- utils::read.table( |
|
| 28 |
#' sep = ",", header = TRUE, |
|
| 29 |
#' text = paste( |
|
| 30 |
#' "NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE", |
|
| 31 |
#' "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd D.2.1.5.3,", |
|
| 32 |
#' "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd A.1.1.1.1,", |
|
| 33 |
#' "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd C.1.1.1.3,BROAD", |
|
| 34 |
#' "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd B.2.2.3.1,BROAD", |
|
| 35 |
#' "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Y.9.9.9.9,NARROW", |
|
| 36 |
#' "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Z.9.9.9.9,NARROW", |
|
| 37 |
#' sep = "\n" |
|
| 38 |
#' ), stringsAsFactors = FALSE |
|
| 39 |
#' ) |
|
| 40 |
#' |
|
| 41 |
#' adae <- radae(adsl, lookup_aag = aag) |
|
| 42 |
#' |
|
| 43 |
#' with( |
|
| 44 |
#' adae, |
|
| 45 |
#' cbind( |
|
| 46 |
#' table(AEDECOD, SMQ01NAM), |
|
| 47 |
#' table(AEDECOD, CQ01NAM) |
|
| 48 |
#' ) |
|
| 49 |
#' ) |
|
| 50 |
radae <- function(adsl, |
|
| 51 |
max_n_aes = 10L, |
|
| 52 |
lookup = NULL, |
|
| 53 |
lookup_aag = NULL, |
|
| 54 |
seed = NULL, |
|
| 55 |
na_percentage = 0, |
|
| 56 |
na_vars = list( |
|
| 57 |
AEBODSYS = c(NA, 0.1), |
|
| 58 |
AEDECOD = c(1234, 0.1), |
|
| 59 |
AETOXGR = c(1234, 0.1) |
|
| 60 |
), |
|
| 61 |
cached = FALSE) {
|
|
| 62 | 4x |
checkmate::assert_flag(cached) |
| 63 | 4x |
if (cached) {
|
| 64 | 1x |
return(get_cached_data("cadae"))
|
| 65 |
} |
|
| 66 | ||
| 67 | 3x |
checkmate::assert_data_frame(adsl) |
| 68 | 3x |
checkmate::assert_integer(max_n_aes, len = 1, any.missing = FALSE) |
| 69 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 70 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 71 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 72 | ||
| 73 |
# check lookup parameters |
|
| 74 | 3x |
checkmate::assert_data_frame(lookup, null.ok = TRUE) |
| 75 | 3x |
lookup_ae <- if (!is.null(lookup)) {
|
| 76 | ! |
lookup |
| 77 |
} else {
|
|
| 78 | 3x |
tibble::tribble( |
| 79 | 3x |
~AEBODSYS, ~AELLT, ~AEDECOD, ~AEHLT, ~AEHLGT, ~AETOXGR, ~AESOC, ~AESER, ~AEREL, |
| 80 | 3x |
"cl A.1", "llt A.1.1.1.1", "dcd A.1.1.1.1", "hlt A.1.1.1", "hlgt A.1.1", "1", "cl A", "N", "N", |
| 81 | 3x |
"cl A.1", "llt A.1.1.1.2", "dcd A.1.1.1.2", "hlt A.1.1.1", "hlgt A.1.1", "2", "cl A", "Y", "N", |
| 82 | 3x |
"cl B.1", "llt B.1.1.1.1", "dcd B.1.1.1.1", "hlt B.1.1.1", "hlgt B.1.1", "5", "cl B", "Y", "Y", |
| 83 | 3x |
"cl B.2", "llt B.2.1.2.1", "dcd B.2.1.2.1", "hlt B.2.1.2", "hlgt B.2.1", "3", "cl B", "N", "N", |
| 84 | 3x |
"cl B.2", "llt B.2.2.3.1", "dcd B.2.2.3.1", "hlt B.2.2.3", "hlgt B.2.2", "1", "cl B", "Y", "N", |
| 85 | 3x |
"cl C.1", "llt C.1.1.1.3", "dcd C.1.1.1.3", "hlt C.1.1.1", "hlgt C.1.1", "4", "cl C", "N", "Y", |
| 86 | 3x |
"cl C.2", "llt C.2.1.2.1", "dcd C.2.1.2.1", "hlt C.2.1.2", "hlgt C.2.1", "2", "cl C", "N", "Y", |
| 87 | 3x |
"cl D.1", "llt D.1.1.1.1", "dcd D.1.1.1.1", "hlt D.1.1.1", "hlgt D.1.1", "5", "cl D", "Y", "Y", |
| 88 | 3x |
"cl D.1", "llt D.1.1.4.2", "dcd D.1.1.4.2", "hlt D.1.1.4", "hlgt D.1.1", "3", "cl D", "N", "N", |
| 89 | 3x |
"cl D.2", "llt D.2.1.5.3", "dcd D.2.1.5.3", "hlt D.2.1.5", "hlgt D.2.1", "1", "cl D", "N", "Y" |
| 90 |
) |
|
| 91 |
} |
|
| 92 | ||
| 93 | 3x |
checkmate::assert_data_frame(lookup_aag, null.ok = TRUE) |
| 94 | 3x |
aag <- if (!is.null(lookup_aag)) {
|
| 95 | ! |
lookup_aag |
| 96 |
} else {
|
|
| 97 | 3x |
aag <- utils::read.table( |
| 98 | 3x |
sep = ",", header = TRUE, |
| 99 | 3x |
text = paste( |
| 100 | 3x |
"NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE", |
| 101 | 3x |
"CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd D.2.1.5.3,", |
| 102 | 3x |
"CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd A.1.1.1.1,", |
| 103 | 3x |
"SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd C.1.1.1.3,BROAD", |
| 104 | 3x |
"SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd B.2.2.3.1,BROAD", |
| 105 | 3x |
"SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Y.9.9.9.9,NARROW", |
| 106 | 3x |
"SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Z.9.9.9.9,NARROW", |
| 107 | 3x |
sep = "\n" |
| 108 | 3x |
), stringsAsFactors = FALSE |
| 109 |
) |
|
| 110 |
} |
|
| 111 | ||
| 112 | 3x |
if (!is.null(seed)) set.seed(seed) |
| 113 | 3x |
study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs")) |
| 114 | ||
| 115 | 3x |
adae <- Map( |
| 116 | 3x |
function(id, sid) {
|
| 117 | 30x |
n_aes <- sample(c(0, seq_len(max_n_aes)), 1) |
| 118 | 30x |
i <- sample(seq_len(nrow(lookup_ae)), n_aes, TRUE) |
| 119 | 30x |
dplyr::mutate( |
| 120 | 30x |
lookup_ae[i, ], |
| 121 | 30x |
USUBJID = id, |
| 122 | 30x |
STUDYID = sid |
| 123 |
) |
|
| 124 |
}, |
|
| 125 | 3x |
adsl$USUBJID, |
| 126 | 3x |
adsl$STUDYID |
| 127 |
) %>% |
|
| 128 | 3x |
Reduce(rbind, .) %>% |
| 129 | 3x |
`[`(c(10, 11, 1, 2, 3, 4, 5, 6, 7, 8, 9)) %>% |
| 130 | 3x |
dplyr::mutate(AETERM = gsub("dcd", "trm", AEDECOD)) %>%
|
| 131 | 3x |
dplyr::mutate(AESEV = dplyr::case_when( |
| 132 | 3x |
AETOXGR == 1 ~ "MILD", |
| 133 | 3x |
AETOXGR %in% c(2, 3) ~ "MODERATE", |
| 134 | 3x |
AETOXGR %in% c(4, 5) ~ "SEVERE" |
| 135 |
)) |
|
| 136 | ||
| 137 | 3x |
adae <- rcd_var_relabel( |
| 138 | 3x |
adae, |
| 139 | 3x |
STUDYID = "Study Identifier", |
| 140 | 3x |
USUBJID = "Unique Subject Identifier" |
| 141 |
) |
|
| 142 | ||
| 143 |
# merge adsl to be able to add AE date and study day variables |
|
| 144 | 3x |
adae <- dplyr::inner_join(adae, adsl, by = c("STUDYID", "USUBJID")) %>%
|
| 145 | 3x |
dplyr::rowwise() %>% |
| 146 | 3x |
dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when( |
| 147 | 3x |
is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"), |
| 148 | 3x |
TRUE ~ TRTEDTM |
| 149 |
))) %>% |
|
| 150 | 3x |
dplyr::mutate(ASTDTM = sample( |
| 151 | 3x |
seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"), |
| 152 | 3x |
size = 1 |
| 153 |
)) %>% |
|
| 154 | 3x |
dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>% |
| 155 |
# add 1 to end of range incase both values passed to sample() are the same |
|
| 156 | 3x |
dplyr::mutate(AENDTM = sample( |
| 157 | 3x |
seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"), |
| 158 | 3x |
size = 1 |
| 159 |
)) %>% |
|
| 160 | 3x |
dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>% |
| 161 | 3x |
dplyr::mutate(LDOSEDTM = dplyr::case_when( |
| 162 | 3x |
TRTSDTM < ASTDTM ~ lubridate::as_datetime(stats::runif(1, TRTSDTM, ASTDTM)), |
| 163 | 3x |
TRUE ~ ASTDTM |
| 164 |
)) %>% |
|
| 165 | 3x |
dplyr::mutate(LDRELTM = as.numeric(difftime(ASTDTM, LDOSEDTM, units = "mins"))) %>% |
| 166 | 3x |
dplyr::select(-TRTENDT) %>% |
| 167 | 3x |
dplyr::ungroup() %>% |
| 168 | 3x |
dplyr::arrange(STUDYID, USUBJID, ASTDTM, AETERM) |
| 169 | ||
| 170 | 3x |
adae <- adae %>% |
| 171 | 3x |
dplyr::group_by(USUBJID) %>% |
| 172 | 3x |
dplyr::mutate(AESEQ = seq_len(dplyr::n())) %>% |
| 173 | 3x |
dplyr::mutate(ASEQ = AESEQ) %>% |
| 174 | 3x |
dplyr::ungroup() %>% |
| 175 | 3x |
dplyr::arrange( |
| 176 | 3x |
STUDYID, |
| 177 | 3x |
USUBJID, |
| 178 | 3x |
ASTDTM, |
| 179 | 3x |
AETERM, |
| 180 | 3x |
AESEQ |
| 181 |
) |
|
| 182 | ||
| 183 | 3x |
outcomes <- c( |
| 184 | 3x |
"UNKNOWN", |
| 185 | 3x |
"NOT RECOVERED/NOT RESOLVED", |
| 186 | 3x |
"RECOVERED/RESOLVED WITH SEQUELAE", |
| 187 | 3x |
"RECOVERING/RESOLVING", |
| 188 | 3x |
"RECOVERED/RESOLVED" |
| 189 |
) |
|
| 190 | ||
| 191 | 3x |
actions <- c( |
| 192 | 3x |
"DOSE RATE REDUCED", |
| 193 | 3x |
"UNKNOWN", |
| 194 | 3x |
"NOT APPLICABLE", |
| 195 | 3x |
"DRUG INTERRUPTED", |
| 196 | 3x |
"DRUG WITHDRAWN", |
| 197 | 3x |
"DOSE INCREASED", |
| 198 | 3x |
"DOSE NOT CHANGED", |
| 199 | 3x |
"DOSE REDUCED", |
| 200 | 3x |
"NOT EVALUABLE" |
| 201 |
) |
|
| 202 | ||
| 203 | 3x |
adae <- adae %>% |
| 204 | 3x |
dplyr::mutate(AEOUT = factor(ifelse( |
| 205 | 3x |
AETOXGR == "5", |
| 206 | 3x |
"FATAL", |
| 207 | 3x |
as.character(sample_fct(outcomes, nrow(adae), prob = c(0.1, 0.2, 0.1, 0.3, 0.3))) |
| 208 |
))) %>% |
|
| 209 | 3x |
dplyr::mutate(AEACN = factor(ifelse( |
| 210 | 3x |
AETOXGR == "5", |
| 211 | 3x |
"NOT EVALUABLE", |
| 212 | 3x |
as.character(sample_fct(actions, nrow(adae), prob = c(0.05, 0.05, 0.05, 0.01, 0.05, 0.1, 0.45, 0.1, 0.05))) |
| 213 |
))) %>% |
|
| 214 | 3x |
dplyr::mutate(AESDTH = dplyr::case_when( |
| 215 | 3x |
AEOUT == "FATAL" ~ "Y", |
| 216 | 3x |
TRUE ~ "N" |
| 217 |
)) %>% |
|
| 218 | 3x |
dplyr::mutate(TRTEMFL = ifelse(ASTDTM >= TRTSDTM, "Y", "")) %>% |
| 219 | 3x |
dplyr::mutate(AECONTRT = sample(c("Y", "N"), prob = c(0.4, 0.6), size = dplyr::n(), replace = TRUE)) %>%
|
| 220 | 3x |
dplyr::mutate( |
| 221 | 3x |
ANL01FL = ifelse(TRTEMFL == "Y" & ASTDTM <= TRTEDTM + lubridate::month(1), "Y", "") |
| 222 |
) %>% |
|
| 223 | 3x |
dplyr::mutate(ANL01FL = ifelse(is.na(ANL01FL), "", ANL01FL)) |
| 224 | ||
| 225 | 3x |
adae <- adae %>% |
| 226 | 3x |
dplyr::mutate(AERELNST = sample(c("Y", "N"), prob = c(0.4, 0.6), size = dplyr::n(), replace = TRUE)) %>%
|
| 227 | 3x |
dplyr::mutate(AEACNOTH = sample( |
| 228 | 3x |
x = c("MEDICATION", "PROCEDURE/SURGERY", "SUBJECT DISCONTINUED FROM STUDY", "NONE"),
|
| 229 | 3x |
prob = c(0.2, 0.4, 0.2, 0.2), |
| 230 | 3x |
size = dplyr::n(), |
| 231 | 3x |
replace = TRUE |
| 232 |
)) |
|
| 233 | ||
| 234 |
# Split metadata for AEs of special interest (AESI). |
|
| 235 | 3x |
l_aag <- split(aag, interaction(aag$NAMVAR, aag$SRCVAR, aag$GRPTYPE, drop = TRUE)) |
| 236 | ||
| 237 |
# Create AESI flags |
|
| 238 | 3x |
l_aesi <- lapply(l_aag, function(d_adag, d_adae) {
|
| 239 | 9x |
names(d_adag)[names(d_adag) == "REFTERM"] <- d_adag$SRCVAR[1] |
| 240 | 9x |
names(d_adag)[names(d_adag) == "REFNAME"] <- d_adag$NAMVAR[1] |
| 241 | ||
| 242 | 9x |
if (d_adag$GRPTYPE[1] == "CUSTOM") {
|
| 243 | 3x |
d_adag <- d_adag[-which(names(d_adag) == "SCOPE")] |
| 244 | 6x |
} else if (d_adag$GRPTYPE[1] == "SMQ") {
|
| 245 | 6x |
names(d_adag)[names(d_adag) == "SCOPE"] <- paste0(substr(d_adag$NAMVAR[1], 1, 5), "SC") |
| 246 |
} |
|
| 247 | ||
| 248 | 9x |
d_adag <- d_adag[-which(names(d_adag) %in% c("NAMVAR", "SRCVAR", "GRPTYPE"))]
|
| 249 | 9x |
d_new <- dplyr::left_join(x = d_adae, y = d_adag, by = intersect(names(d_adae), names(d_adag))) |
| 250 | 9x |
d_new[, dplyr::setdiff(names(d_new), names(d_adae)), drop = FALSE] |
| 251 | 3x |
}, adae) |
| 252 | ||
| 253 | 3x |
adae <- dplyr::bind_cols(adae, l_aesi) |
| 254 | ||
| 255 | 3x |
adae <- dplyr::mutate(adae, AERELNST = sample( |
| 256 | 3x |
x = c("CONCURRENT ILLNESS", "OTHER", "DISEASE UNDER STUDY", "NONE"),
|
| 257 | 3x |
prob = c(0.3, 0.3, 0.3, 0.1), |
| 258 | 3x |
size = dplyr::n(), |
| 259 | 3x |
replace = TRUE |
| 260 |
)) |
|
| 261 | ||
| 262 | ||
| 263 | 3x |
adae <- adae %>% |
| 264 | 3x |
dplyr::mutate(AES_FLAG = sample( |
| 265 | 3x |
x = c("AESLIFE", "AESHOSP", "AESDISAB", "AESCONG", "AESMIE"),
|
| 266 | 3x |
prob = c(0.1, 0.2, 0.2, 0.2, 0.3), |
| 267 | 3x |
size = dplyr::n(), |
| 268 | 3x |
replace = TRUE |
| 269 |
)) %>% |
|
| 270 | 3x |
dplyr::mutate(AES_FLAG = dplyr::case_when( |
| 271 | 3x |
AESDTH == "Y" ~ "AESDTH", |
| 272 | 3x |
TRUE ~ AES_FLAG |
| 273 |
)) %>% |
|
| 274 | 3x |
dplyr::mutate( |
| 275 | 3x |
AESCONG = ifelse(AES_FLAG == "AESCONG", "Y", "N"), |
| 276 | 3x |
AESDISAB = ifelse(AES_FLAG == "AESDISAB", "Y", "N"), |
| 277 | 3x |
AESHOSP = ifelse(AES_FLAG == "AESHOSP", "Y", "N"), |
| 278 | 3x |
AESLIFE = ifelse(AES_FLAG == "AESLIFE", "Y", "N"), |
| 279 | 3x |
AESMIE = ifelse(AES_FLAG == "AESMIE", "Y", "N") |
| 280 |
) %>% |
|
| 281 | 3x |
dplyr::select(-"AES_FLAG") |
| 282 | ||
| 283 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 284 | ! |
adae <- mutate_na(ds = adae, na_vars = na_vars, na_percentage = na_percentage) |
| 285 |
} |
|
| 286 | ||
| 287 |
# apply metadata |
|
| 288 | 3x |
adae <- apply_metadata(adae, "metadata/ADAE.yml") |
| 289 | ||
| 290 | 3x |
return(adae) |
| 291 |
} |
| 1 |
#' Tumor Response Analysis Dataset (ADTR) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Tumor Response Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per subject per parameter per analysis visit per analysis date. |
|
| 9 |
#' |
|
| 10 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `DTYPE` |
|
| 11 |
#' |
|
| 12 |
#' @inheritParams argument_convention |
|
| 13 |
#' @param ... Additional arguments to be passed to `radrs`. |
|
| 14 |
#' @template param_cached |
|
| 15 |
#' @templateVar data adtr |
|
| 16 |
#' |
|
| 17 |
#' @return `data.frame` |
|
| 18 |
#' @export |
|
| 19 |
#' |
|
| 20 |
#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc |
|
| 21 |
#' |
|
| 22 |
#' @examples |
|
| 23 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 24 |
#' |
|
| 25 |
#' adtr <- radtr(adsl, seed = 2) |
|
| 26 |
#' adtr |
|
| 27 |
radtr <- function(adsl, |
|
| 28 |
param = c("Sum of Longest Diameter by Investigator"),
|
|
| 29 |
paramcd = c("SLDINV"),
|
|
| 30 |
seed = NULL, |
|
| 31 |
cached = FALSE, |
|
| 32 |
...) {
|
|
| 33 | 4x |
checkmate::assert_flag(cached) |
| 34 | 4x |
if (cached) {
|
| 35 | 1x |
return(get_cached_data("cadtr"))
|
| 36 |
} |
|
| 37 | 3x |
checkmate::assert_data_frame(adsl) |
| 38 | 3x |
checkmate::assert_character(param, min.len = 1, any.missing = FALSE) |
| 39 | 3x |
checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE) |
| 40 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 41 | 3x |
stopifnot(length(param) == length(paramcd)) |
| 42 |
# validate and initialize related variables |
|
| 43 | ||
| 44 | 3x |
if (!is.null(seed)) {
|
| 45 | 3x |
set.seed(seed) |
| 46 |
} |
|
| 47 | ||
| 48 |
# Make times consistent with ADRS at ADY and ADTM. |
|
| 49 | 3x |
adrs <- radrs(adsl, seed = seed, ...) %>% |
| 50 | 3x |
dplyr::filter(PARAMCD == "OVRINV") %>% |
| 51 | 3x |
dplyr::select( |
| 52 | 3x |
"STUDYID", |
| 53 | 3x |
"USUBJID", |
| 54 | 3x |
"AVISIT", |
| 55 | 3x |
"AVISITN", |
| 56 | 3x |
"ADTM", |
| 57 | 3x |
"ADY" |
| 58 |
) |
|
| 59 | ||
| 60 | 3x |
adtr <- Map(function(parcd, par) {
|
| 61 | 3x |
df <- adrs |
| 62 | 3x |
df$AVAL <- stats::rnorm(nrow(df), mean = 150, sd = 30) |
| 63 | 3x |
df$PARAMCD <- parcd |
| 64 | 3x |
df$PARAM <- par |
| 65 | 3x |
df |
| 66 | 3x |
}, paramcd, param) %>% |
| 67 | 3x |
Reduce(rbind, .) |
| 68 | ||
| 69 | 3x |
adtr_base <- adtr %>% |
| 70 | 3x |
dplyr::filter(AVISITN == 0) %>% |
| 71 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
| 72 | 3x |
dplyr::mutate(BASE = AVAL) %>% |
| 73 | 3x |
dplyr::select("STUDYID", "USUBJID", "BASE", "PARAMCD")
|
| 74 | ||
| 75 | 3x |
adtr_postbase <- adtr %>% |
| 76 | 3x |
dplyr::filter(AVISITN > 0) %>% |
| 77 | 3x |
dplyr::filter(!is.na(AVAL)) %>% |
| 78 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
| 79 | 3x |
dplyr::filter(AVAL == min(AVAL)) %>% |
| 80 | 3x |
dplyr::slice(1) %>% |
| 81 | 3x |
dplyr::mutate(AVISIT = "POST-BASELINE MINIMUM") %>% |
| 82 | 3x |
dplyr::mutate(DTYPE = "MINIMUM") %>% |
| 83 | 3x |
dplyr::ungroup() |
| 84 | ||
| 85 | 3x |
adtr_lastobs <- adtr %>% |
| 86 | 3x |
dplyr::filter(AVISITN > 0) %>% |
| 87 | 3x |
dplyr::filter(!is.na(AVAL)) %>% |
| 88 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
| 89 | 3x |
dplyr::filter(ADTM == max(ADTM, na.rm = TRUE)) %>% |
| 90 | 3x |
dplyr::slice(1) %>% |
| 91 | 3x |
dplyr::mutate(LAST_VISIT = AVISIT) %>% |
| 92 | 3x |
dplyr::ungroup() %>% |
| 93 | 3x |
dplyr::select( |
| 94 | 3x |
"STUDYID", |
| 95 | 3x |
"USUBJID", |
| 96 | 3x |
"PARAMCD", |
| 97 | 3x |
"LAST_VISIT" |
| 98 |
) |
|
| 99 | ||
| 100 | 3x |
adtr <- rbind(adtr %>% dplyr::mutate(DTYPE = ""), adtr_postbase) |
| 101 | ||
| 102 | 3x |
adtr <- merge(adtr, adtr_base, by = c("STUDYID", "USUBJID", "PARAMCD")) %>%
|
| 103 | 3x |
dplyr::mutate( |
| 104 | 3x |
ABLFL = dplyr::case_when(AVISIT == "BASELINE" ~ "Y", TRUE ~ ""), |
| 105 | 3x |
AVAL = dplyr::case_when(AVISIT == "BASELINE" ~ NA_real_, TRUE ~ AVAL), |
| 106 | 3x |
CHG = dplyr::case_when(AVISITN > 0 ~ AVAL - BASE, TRUE ~ NA_real_), |
| 107 | 3x |
PCHG = dplyr::case_when(AVISITN > 0 ~ CHG / BASE * 100, TRUE ~ NA_real_), |
| 108 | 3x |
AVALC = as.character(AVAL), |
| 109 | 3x |
AVALU = "mm" |
| 110 |
) |
|
| 111 | ||
| 112 |
# ensure PCHG does not exceed 200%, nor go below -100% (double in size, or complete remission of tumor). |
|
| 113 | 3x |
adtr <- adtr %>% |
| 114 | 3x |
dplyr::mutate( |
| 115 | 3x |
PCHG_DUM = PCHG, |
| 116 | 3x |
PCHG = dplyr::case_when( |
| 117 | 3x |
PCHG_DUM > 200 ~ 200, |
| 118 | 3x |
PCHG_DUM < -100 ~ -100, |
| 119 | 3x |
TRUE ~ PCHG |
| 120 |
), |
|
| 121 | 3x |
AVAL = dplyr::case_when( |
| 122 | 3x |
PCHG_DUM > 200 ~ 3 * BASE, |
| 123 | 3x |
PCHG_DUM < -100 ~ 0, |
| 124 | 3x |
TRUE ~ AVAL |
| 125 |
), |
|
| 126 | 3x |
CHG = dplyr::case_when( |
| 127 | 3x |
PCHG_DUM > 200 ~ 2 * BASE, |
| 128 | 3x |
PCHG_DUM < -100 ~ -BASE, |
| 129 | 3x |
TRUE ~ CHG |
| 130 |
) |
|
| 131 |
) %>% |
|
| 132 | 3x |
dplyr::select(-"PCHG_DUM") |
| 133 | ||
| 134 | 3x |
adtr <- merge(adsl, adtr, by = c("STUDYID", "USUBJID")) %>%
|
| 135 | 3x |
dplyr::group_by(USUBJID, PARAMCD) %>% |
| 136 | 3x |
dplyr::mutate( |
| 137 | 3x |
ONTRTFL = factor(dplyr::case_when( |
| 138 | 3x |
!AVISIT %in% c("SCREENING", "BASELINE", "FOLLOW UP") ~ "Y",
|
| 139 | 3x |
TRUE ~ "" |
| 140 |
)), |
|
| 141 | 3x |
ANL01FL = dplyr::case_when( |
| 142 | 3x |
DTYPE == "" & AVISITN > 0 ~ "Y", |
| 143 | 3x |
TRUE ~ "" |
| 144 |
), |
|
| 145 | 3x |
ANL03FL = dplyr::case_when( |
| 146 | 3x |
DTYPE == "MINIMUM" ~ "Y", |
| 147 | 3x |
ABLFL == "Y" ~ "Y", |
| 148 | 3x |
TRUE ~ "" |
| 149 |
) |
|
| 150 |
) |
|
| 151 | 3x |
adtr <- merge(adtr, adtr_lastobs, by = c("STUDYID", "USUBJID", "PARAMCD")) %>%
|
| 152 | 3x |
dplyr::mutate( |
| 153 | 3x |
ANL02FL = dplyr::case_when( |
| 154 | 3x |
as.character(AVISIT) == as.character(LAST_VISIT) ~ "Y", |
| 155 | 3x |
ABLFL == "Y" ~ "Y", |
| 156 | 3x |
TRUE ~ "" |
| 157 |
) |
|
| 158 |
) %>% |
|
| 159 | 3x |
dplyr::select(-"LAST_VISIT") |
| 160 |
# Adding variables that are in ADTR osprey but not RCD. |
|
| 161 | 3x |
adtr <- adtr %>% |
| 162 | 3x |
dplyr::mutate( |
| 163 | 3x |
DCSREAS_GRP = ifelse(DCSREAS == "ADVERSE EVENT", "Safety", "Non-Safety"), |
| 164 | 3x |
TRTDURD = ifelse( |
| 165 | 3x |
is.na(TRTSDTM) | is.na(TRTEDTM), |
| 166 | 3x |
NA, |
| 167 | 3x |
TRTEDTM - (TRTSDTM + lubridate::days(1)) |
| 168 |
), |
|
| 169 | 3x |
AGEGR1 = ifelse(AGE < 65, "<65", ">=65") |
| 170 |
) |
|
| 171 | ||
| 172 |
# apply metadata |
|
| 173 | 3x |
adtr <- apply_metadata(adtr, "metadata/ADTR.yml") |
| 174 | 3x |
return(adtr) |
| 175 |
} |
| 1 |
#' Pharmacokinetics Parameters Dataset (ADPP) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Pharmacokinetics Parameters Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per study, subject, parameter category, parameter and visit. |
|
| 9 |
#' |
|
| 10 |
#' @inheritParams argument_convention |
|
| 11 |
#' @param ppcat (`character vector`)\cr Categories of parameters. |
|
| 12 |
#' @param ppspec (`character vector`)\cr Specimen material types. |
|
| 13 |
#' @template param_cached |
|
| 14 |
#' @templateVar data adpp |
|
| 15 |
#' |
|
| 16 |
#' @return `data.frame` |
|
| 17 |
#' @export |
|
| 18 |
#' |
|
| 19 |
#' @examples |
|
| 20 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 21 |
#' |
|
| 22 |
#' adpp <- radpp(adsl, seed = 2) |
|
| 23 |
#' adpp |
|
| 24 |
radpp <- function(adsl, |
|
| 25 |
ppcat = c("Plasma Drug X", "Plasma Drug Y", "Metabolite Drug X", "Metabolite Drug Y"),
|
|
| 26 |
ppspec = c( |
|
| 27 |
"Plasma", "Plasma", "Plasma", "Matrix of PD", "Matrix of PD", |
|
| 28 |
"Urine", "Urine", "Urine", "Urine" |
|
| 29 |
), |
|
| 30 |
paramcd = c( |
|
| 31 |
"AUCIFO", "CMAX", "CLO", "RMAX", "TON", |
|
| 32 |
"RENALCL", "RENALCLD", "RCAMINT", "RCPCINT" |
|
| 33 |
), |
|
| 34 |
param = c( |
|
| 35 |
"AUC Infinity Obs", "Max Conc", "Total CL Obs", "Time of Maximum Response", |
|
| 36 |
"Time to Onset", "Renal CL", "Renal CL Norm by Dose", |
|
| 37 |
"Amt Rec from T1 to T2", "Pct Rec from T1 to T2" |
|
| 38 |
), |
|
| 39 |
paramu = c("day*ug/mL", "ug/mL", "ml/day/kg", "hr", "hr", "L/hr", "L/hr/mg", "mg", "%"),
|
|
| 40 |
aval_mean = c(200, 30, 5, 10, 3, 0.05, 0.005, 1.5613, 15.65), |
|
| 41 |
visit_format = "CYCLE", |
|
| 42 |
n_days = 2L, |
|
| 43 |
seed = NULL, |
|
| 44 |
na_percentage = 0, |
|
| 45 |
na_vars = list( |
|
| 46 |
AVAL = c(NA, 0.1) |
|
| 47 |
), |
|
| 48 |
cached = FALSE) {
|
|
| 49 | 4x |
checkmate::assert_flag(cached) |
| 50 | 4x |
if (cached) {
|
| 51 | 1x |
return(get_cached_data("cadlb"))
|
| 52 |
} |
|
| 53 | ||
| 54 | 3x |
checkmate::assert_character(ppcat) |
| 55 | 3x |
checkmate::assert_character(ppspec) |
| 56 | 3x |
checkmate::assert_character(paramcd) |
| 57 | 3x |
checkmate::assert_character(param) |
| 58 | 3x |
checkmate::assert_character(paramu) |
| 59 | 3x |
checkmate::assert_vector(aval_mean) |
| 60 | 3x |
checkmate::assert_string(visit_format) |
| 61 | 3x |
checkmate::assert_integer(n_days) |
| 62 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 63 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 64 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 65 | 3x |
checkmate::assert_list(na_vars) |
| 66 | ||
| 67 | 3x |
checkmate::assertTRUE(length(ppspec) == length(paramcd)) |
| 68 | 3x |
checkmate::assertTRUE(length(ppspec) == length(param)) |
| 69 | 3x |
checkmate::assertTRUE(length(ppspec) == length(paramu)) |
| 70 | 3x |
checkmate::assertTRUE(length(ppspec) == length(aval_mean)) |
| 71 | ||
| 72 | 3x |
if (!is.null(seed)) {
|
| 73 | 3x |
set.seed(seed) |
| 74 |
} |
|
| 75 | ||
| 76 |
# validate and initialize related variables |
|
| 77 | 3x |
ppspec_init_list <- relvar_init(param, ppspec) |
| 78 | 3x |
param_init_list <- relvar_init(param, paramcd) |
| 79 | 3x |
unit_init_list <- relvar_init(param, paramu) |
| 80 | ||
| 81 | 3x |
adpp <- expand.grid( |
| 82 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 83 | 3x |
USUBJID = adsl$USUBJID, |
| 84 | 3x |
PPCAT = as.factor(ppcat), |
| 85 | 3x |
PARAM = as.factor(param_init_list$relvar1), |
| 86 | 3x |
AVISIT = visit_schedule(visit_format = visit_format, n_assessments = 1L, n_days = n_days), |
| 87 | 3x |
stringsAsFactors = FALSE |
| 88 |
) |
|
| 89 | 3x |
adpp <- adpp %>% |
| 90 | 3x |
dplyr::mutate(AVAL = stats::rnorm(nrow(adpp), mean = 1, sd = 0.2)) %>% |
| 91 | 3x |
dplyr::left_join(data.frame(PARAM = param, ADJUST = aval_mean), by = "PARAM") %>% |
| 92 | 3x |
dplyr::mutate(AVAL = AVAL * ADJUST) %>% |
| 93 | 3x |
dplyr::select(-"ADJUST") |
| 94 | ||
| 95 |
# assign related variable values: PARAMxPPSPEC are related |
|
| 96 | 3x |
adpp <- adpp %>% rel_var( |
| 97 | 3x |
var_name = "PPSPEC", |
| 98 | 3x |
related_var = "PARAM", |
| 99 | 3x |
var_values = ppspec_init_list$relvar2 |
| 100 |
) |
|
| 101 | ||
| 102 |
# assign related variable values: PARAMxPARAMCD are related |
|
| 103 | 3x |
adpp <- adpp %>% rel_var( |
| 104 | 3x |
var_name = "PARAMCD", |
| 105 | 3x |
related_var = "PARAM", |
| 106 | 3x |
var_values = param_init_list$relvar2 |
| 107 |
) |
|
| 108 | ||
| 109 |
# assign related variable values: PARAMxAVALU are related |
|
| 110 | 3x |
adpp <- adpp %>% rel_var( |
| 111 | 3x |
var_name = "AVALU", |
| 112 | 3x |
related_var = "PARAM", |
| 113 | 3x |
var_values = unit_init_list$relvar2 |
| 114 |
) |
|
| 115 | ||
| 116 |
# derive AVISITN based AVISIT and AVALC based on AVAL |
|
| 117 | 3x |
adpp <- adpp %>% |
| 118 | 3x |
dplyr::mutate(AVALC = as.character(AVAL)) %>% |
| 119 | 3x |
dplyr::mutate( |
| 120 | 3x |
AVISITN = dplyr::case_when( |
| 121 | 3x |
AVISIT == "SCREENING" ~ 0, |
| 122 | 3x |
(grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 1,
|
| 123 | 3x |
TRUE ~ NA_real_ |
| 124 |
) |
|
| 125 |
) |
|
| 126 | ||
| 127 |
# derive REGIMEN variable |
|
| 128 | 3x |
adpp <- adpp %>% dplyr::mutate(REGIMEN = "BID") |
| 129 | ||
| 130 |
# derive PPSTINT and PPENINT based on PARAMCD |
|
| 131 | 3x |
t1_t2 <- data.frame( |
| 132 | 3x |
PARAMCD = c("RCAMINT", "RCAMINT", "RCPCINT", "RCPCINT"),
|
| 133 | 3x |
PPSTINT = c("P0H", "P0H", "P0H", "P0H"),
|
| 134 | 3x |
PPENINT = c("P12H", "P24H", "P12H", "P24H")
|
| 135 |
) |
|
| 136 | 3x |
adpp <- adpp %>% |
| 137 | 3x |
dplyr::left_join(t1_t2, by = c("PARAMCD"), multiple = "all", relationship = "many-to-many")
|
| 138 | ||
| 139 | 3x |
adpp <- dplyr::inner_join(adpp, adsl, by = c("STUDYID", "USUBJID")) %>%
|
| 140 | 3x |
dplyr::filter( |
| 141 | 3x |
ACTARM != "B: Placebo", |
| 142 | 3x |
!(ACTARM == "A: Drug X" & (PPCAT == "Plasma Drug Y" | PPCAT == "Metabolite Drug Y")) |
| 143 |
) |
|
| 144 | ||
| 145 |
# derive PKARMCD column for creating more cohorts |
|
| 146 | 3x |
adpp <- adpp %>% |
| 147 | 3x |
dplyr::mutate(PKARMCD = factor(1 + (seq_len(nrow(adpp)) - 1) %/% (nrow(adpp) / 10), labels = c( |
| 148 | 3x |
"Drug A", "Drug B", "Drug C", "Drug D", "Drug E", "Drug F", "Drug G", "Drug H", |
| 149 | 3x |
"Drug I", "Drug J" |
| 150 |
))) |
|
| 151 | ||
| 152 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 153 | ! |
adpp <- mutate_na(ds = adpp, na_vars = na_vars, na_percentage = na_percentage) |
| 154 |
} |
|
| 155 | ||
| 156 | 3x |
adpp <- apply_metadata(adpp, "metadata/ADPP.yml") |
| 157 | 3x |
return(adpp) |
| 158 |
} |
| 1 |
#' Anti-Drug Antibody Analysis Dataset (ADAB) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Anti-Drug Antibody Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset and Pharmacokinetics Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @inheritParams argument_convention |
|
| 9 |
#' @inheritParams radpc |
|
| 10 |
#' @param adpc (`data.frame`)\cr Pharmacokinetics Analysis Dataset. |
|
| 11 |
#' @template param_cached |
|
| 12 |
#' @templateVar data adab |
|
| 13 |
#' |
|
| 14 |
#' @return `data.frame` |
|
| 15 |
#' @export |
|
| 16 |
#' |
|
| 17 |
#' @details One record per study per subject per parameter per time point: "R1800000", "RESULT1", "R1800001", "RESULT2". |
|
| 18 |
#' |
|
| 19 |
#' @examples |
|
| 20 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 21 |
#' adpc <- radpc(adsl, seed = 2, duration = 9 * 7) |
|
| 22 |
#' |
|
| 23 |
#' adab <- radab(adsl, adpc, seed = 2) |
|
| 24 |
#' adab |
|
| 25 |
radab <- function(adsl, |
|
| 26 |
adpc, |
|
| 27 |
constants = c(D = 100, ka = 0.8, ke = 1), |
|
| 28 |
paramcd = c( |
|
| 29 |
"R1800000", "RESULT1", "R1800001", "RESULT2", "ADASTAT1", "INDUCD1", "ENHANC1", |
|
| 30 |
"TRUNAFF1", "EMERNEG1", "EMERPOS1", "PERSADA1", "TRANADA1", "BFLAG1", "TIMADA1", |
|
| 31 |
"ADADUR1", "ADASTAT2", "INDUCD2", "ENHANC2", "EMERNEG2", "EMERPOS2", "BFLAG2", |
|
| 32 |
"TRUNAFF2" |
|
| 33 |
), |
|
| 34 |
param = c( |
|
| 35 |
"Antibody titer units", "ADA interpreted per sample result", |
|
| 36 |
"Neutralizing Antibody titer units", "NAB interpreted per sample result", |
|
| 37 |
"ADA Status of a patient", "Treatment induced ADA", "Treatment enhanced ADA", |
|
| 38 |
"Treatment unaffected", "Treatment Emergent - Negative", |
|
| 39 |
"Treatment Emergent - Positive", "Persistent ADA", "Transient ADA", "Baseline", |
|
| 40 |
"Time to onset of ADA", "ADA Duration", "NAB Status of a patient", |
|
| 41 |
"Treatment induced ADA, Neutralizing Antibody", |
|
| 42 |
"Treatment enhanced ADA, Neutralizing Antibody", |
|
| 43 |
"Treatment Emergent - Negative, Neutralizing Antibody", |
|
| 44 |
"Treatment Emergent - Positive, Neutralizing Antibody", |
|
| 45 |
"Baseline, Neutralizing Antibody", |
|
| 46 |
"Treatment unaffected, Neutralizing Antibody" |
|
| 47 |
), |
|
| 48 |
avalu = c( |
|
| 49 |
"titer", "", "titer", "", "", "", "", "", "", "", "", "", "", "weeks", "weeks", |
|
| 50 |
"", "", "", "", "", "", "" |
|
| 51 |
), |
|
| 52 |
seed = NULL, |
|
| 53 |
na_percentage = 0, |
|
| 54 |
na_vars = list( |
|
| 55 |
AVAL = c(NA, 0.1) |
|
| 56 |
), |
|
| 57 |
cached = FALSE) {
|
|
| 58 | 4x |
checkmate::assert_flag(cached) |
| 59 | 4x |
if (cached) {
|
| 60 | 1x |
return(get_cached_data("cadab"))
|
| 61 |
} |
|
| 62 | ||
| 63 | 3x |
checkmate::assert_data_frame(adpc) |
| 64 | 3x |
checkmate::assert_subset(names(constants), c("D", "ka", "ke"))
|
| 65 | 3x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 66 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE) |
| 67 | 3x |
checkmate::assert_list(na_vars) |
| 68 | 3x |
checkmate::assert_character(paramcd) |
| 69 | 3x |
checkmate::assert_character(param, len = length(paramcd)) |
| 70 | 3x |
checkmate::assert_character(avalu, len = length(paramcd)) |
| 71 | 3x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 72 | 3x |
checkmate::assert_true(na_percentage < 1) |
| 73 | ||
| 74 | 3x |
if (!is.null(seed)) {
|
| 75 | 3x |
set.seed(seed) |
| 76 |
} |
|
| 77 | ||
| 78 |
# validate and initialize related variables |
|
| 79 | 3x |
param_init_list <- relvar_init(param, paramcd) |
| 80 | 3x |
unit_init_list <- relvar_init(param, avalu) |
| 81 | ||
| 82 | 3x |
adpc <- adpc %>% dplyr::filter(ASMED == "PLASMA") |
| 83 | 3x |
adab0 <- expand.grid( |
| 84 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 85 | 3x |
USUBJID = unique(adsl$USUBJID), |
| 86 | 3x |
VISIT = unique(adpc$VISIT), |
| 87 | 3x |
PARAM = as.factor(param_init_list$relvar1[c(1:4)]), |
| 88 | 3x |
PARCAT1 = "A: Drug X Antibody", |
| 89 | 3x |
stringsAsFactors = FALSE |
| 90 |
) |
|
| 91 |
# Set random values for observations |
|
| 92 | 3x |
visit_lvl_params <- c( |
| 93 | 3x |
"Antibody titer units", "Neutralizing Antibody titer units", |
| 94 | 3x |
"ADA interpreted per sample result", "NAB interpreted per sample result" |
| 95 |
) |
|
| 96 | 3x |
aval_random <- stats::rnorm(nrow(unique(adab0 %>% dplyr::select(USUBJID, VISIT))), mean = 1, sd = 0.2) |
| 97 | 3x |
aval_random <- cbind(unique(adab0 %>% dplyr::select(USUBJID, VISIT)), AVAL1 = aval_random) |
| 98 | ||
| 99 | 3x |
adab_visit <- adab0 %>% dplyr::left_join(aval_random, by = c("USUBJID", "VISIT"))
|
| 100 | 3x |
adab_visit <- adab_visit %>% |
| 101 | 3x |
dplyr::mutate( |
| 102 | 3x |
AVAL2 = ifelse(AVAL1 >= 1, AVAL1, NA), |
| 103 | 3x |
AVALC = dplyr::case_when( |
| 104 | 3x |
!is.na(AVAL2) ~ "POSITIVE", |
| 105 | 3x |
is.na(AVAL2) ~ "NEGATIVE" |
| 106 |
), |
|
| 107 | 3x |
AVAL = dplyr::case_when( |
| 108 | 3x |
(PARAM %in% visit_lvl_params[3:4] & !is.na(AVAL2)) ~ 1, |
| 109 | 3x |
(PARAM %in% visit_lvl_params[3:4] & is.na(AVAL2)) ~ 0, |
| 110 | 3x |
(PARAM %in% visit_lvl_params[1:2] & !is.na(AVAL2)) ~ AVAL2, |
| 111 | 3x |
TRUE ~ as.numeric(NA) |
| 112 |
) |
|
| 113 |
) %>% |
|
| 114 | 3x |
dplyr::select(-c(AVAL1, AVAL2)) |
| 115 | ||
| 116 |
# retrieve other variables from adpc |
|
| 117 | 3x |
adab_visit <- adab_visit %>% |
| 118 | 3x |
dplyr::inner_join( |
| 119 | 3x |
adpc %>% |
| 120 | 3x |
dplyr::filter(PCTPT %in% c("Predose", "24H")) %>%
|
| 121 | 3x |
dplyr::select( |
| 122 | 3x |
STUDYID, |
| 123 | 3x |
USUBJID, |
| 124 | 3x |
VISIT, |
| 125 | 3x |
PCTPT, |
| 126 | 3x |
ARM, |
| 127 | 3x |
ACTARM, |
| 128 | 3x |
VISITDY, |
| 129 | 3x |
AFRLT, |
| 130 | 3x |
NFRLT, |
| 131 | 3x |
ARRLT, |
| 132 | 3x |
NRRLT, |
| 133 | 3x |
RELTMU |
| 134 |
) %>% |
|
| 135 | 3x |
unique(), |
| 136 | 3x |
by = c("STUDYID", "USUBJID", "VISIT")
|
| 137 |
) %>% |
|
| 138 | 3x |
rename(ISTPT = PCTPT) |
| 139 | ||
| 140 |
# mutate time from dose variables from adpc to convert into Days |
|
| 141 | 3x |
adab_visit <- adab_visit %>% dplyr::mutate_at(c("AFRLT", "NFRLT", "ARRLT", "NRRLT"), ~ . / 24)
|
| 142 | ||
| 143 | ||
| 144 | ||
| 145 |
# Set random values for subject level paramaters (Y/N) |
|
| 146 | ||
| 147 | 3x |
adab1 <- expand.grid( |
| 148 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 149 | 3x |
USUBJID = unique(adpc$USUBJID), |
| 150 | 3x |
VISIT = NA, |
| 151 | 3x |
PARAM = as.factor(param_init_list$relvar1[c(5:13, 16:22)]), |
| 152 | 3x |
PARCAT1 = "A: Drug X Antibody", |
| 153 | 3x |
stringsAsFactors = FALSE |
| 154 |
) |
|
| 155 | ||
| 156 | 3x |
sub_lvl_params <- c( |
| 157 | 3x |
"ADA Status of a patient", "Treatment induced ADA", "Treatment enhanced ADA", |
| 158 | 3x |
"Treatment unaffected", "Treatment Emergent - Negative", |
| 159 | 3x |
"Treatment Emergent - Positive", "Persistent ADA", "Transient ADA", "Baseline", |
| 160 |
# "Time to onset of ADA", "ADA Duration", |
|
| 161 | 3x |
"NAB Status of a patient", |
| 162 | 3x |
"Treatment induced ADA, Neutralizing Antibody", |
| 163 | 3x |
"Treatment enhanced ADA, Neutralizing Antibody", |
| 164 | 3x |
"Treatment Emergent - Negative, Neutralizing Antibody", |
| 165 | 3x |
"Treatment Emergent - Positive, Neutralizing Antibody", |
| 166 | 3x |
"Baseline, Neutralizing Antibody", |
| 167 | 3x |
"Treatment unaffected, Neutralizing Antibody" |
| 168 |
) |
|
| 169 | ||
| 170 | 3x |
aval_random_sub <- stats::rbinom(nrow(unique(adab1 %>% dplyr::select(USUBJID))), 1, 0.5) |
| 171 | 3x |
aval_random_sub <- cbind(unique(adab1 %>% dplyr::select(USUBJID)), AVAL1 = aval_random_sub) |
| 172 | ||
| 173 | 3x |
adab_sub <- adab1 %>% dplyr::left_join(aval_random_sub, by = c("USUBJID"))
|
| 174 | 3x |
adab_sub <- adab_sub %>% |
| 175 | 3x |
dplyr::mutate( |
| 176 | 3x |
AVAL = AVAL1, |
| 177 | 3x |
AVALC = dplyr::case_when( |
| 178 | 3x |
PARAM %in% c("ADA Status of a patient", "NAB Status of a patient") & AVAL1 == 1 ~ "POSITIVE",
|
| 179 | 3x |
PARAM %in% c("ADA Status of a patient", "NAB Status of a patient") & AVAL1 == 0 ~ "NEGATIVE",
|
| 180 | 3x |
!(PARAM %in% c("ADA Status of a patient", "NAB Status of a patient")) & AVAL1 == 1 ~ "Y",
|
| 181 | 3x |
!(PARAM %in% c("ADA Status of a patient", "NAB Status of a patient")) & AVAL1 == 0 ~ "N"
|
| 182 |
) |
|
| 183 |
) %>% |
|
| 184 | 3x |
dplyr::select(-c(AVAL1)) |
| 185 | ||
| 186 |
# Set random values for subject level paramaters (numeric) |
|
| 187 | ||
| 188 | 3x |
adab2 <- expand.grid( |
| 189 | 3x |
STUDYID = unique(adsl$STUDYID), |
| 190 | 3x |
USUBJID = unique(adpc$USUBJID), |
| 191 | 3x |
VISIT = NA, |
| 192 | 3x |
PARAM = as.factor(param_init_list$relvar1[c(14, 15)]), |
| 193 | 3x |
PARCAT1 = "A: Drug X Antibody", |
| 194 | 3x |
stringsAsFactors = FALSE |
| 195 |
) |
|
| 196 | ||
| 197 | 3x |
sub_lvl_params_num <- c("Time to onset of ADA", "ADA Duration")
|
| 198 | ||
| 199 | 3x |
aval_random_sub_num <- stats::rnorm(nrow(unique(adab2 %>% dplyr::select(USUBJID))), mean = 1, sd = 1) |
| 200 | 3x |
aval_random_sub_num <- cbind(unique(adab2 %>% dplyr::select(USUBJID)), AVAL1 = aval_random_sub_num) |
| 201 | ||
| 202 | 3x |
adab_sub_num <- adab2 %>% dplyr::left_join(aval_random_sub_num, by = c("USUBJID"))
|
| 203 | 3x |
adab_sub_num <- adab_sub_num %>% |
| 204 | 3x |
dplyr::mutate( |
| 205 | 3x |
AVAL = ifelse(AVAL1 >= 1, round(AVAL1, 2), NA), |
| 206 | 3x |
AVALC = as.character(AVAL) |
| 207 |
) %>% |
|
| 208 | 3x |
dplyr::select(-c(AVAL1)) |
| 209 | ||
| 210 | ||
| 211 | 3x |
adab <- bind_rows(adab_visit, adab_sub, adab_sub_num) |
| 212 | ||
| 213 | ||
| 214 |
# assign related variable values: PARAMxPARAMCD are related |
|
| 215 | 3x |
adab <- adab %>% rel_var( |
| 216 | 3x |
var_name = "PARAMCD", |
| 217 | 3x |
related_var = "PARAM", |
| 218 | 3x |
var_values = param_init_list$relvar2 |
| 219 |
) |
|
| 220 | ||
| 221 |
# assign related variable values: PARAMxAVALU are related |
|
| 222 | 3x |
adab <- adab %>% rel_var( |
| 223 | 3x |
var_name = "AVALU", |
| 224 | 3x |
related_var = "PARAM", |
| 225 | 3x |
var_values = unit_init_list$relvar2 |
| 226 |
) |
|
| 227 | ||
| 228 | ||
| 229 | 3x |
adab <- adab %>% |
| 230 | 3x |
dplyr::mutate( |
| 231 | 3x |
RELTMU = "day", |
| 232 | 3x |
ABLFL = ifelse(!is.na(NFRLT) & NFRLT == 0, "Y", NA) # Baseline Record Flag |
| 233 |
, |
|
| 234 | 3x |
ADABLPFL = ifelse(PARAMCD == "RESULT1" & !is.na(NFRLT) & NFRLT == 0, "Y", NA) |
| 235 |
# Baseline ADA Eval. Param-Level Flag, only populate for ADA, not for NAB |
|
| 236 |
, |
|
| 237 | 3x |
ADPBLPFL = ifelse(PARAMCD == "RESULT1" & !is.na(NFRLT) & NFRLT > 0 & !is.na(AVAL), "Y", NA) |
| 238 |
# Post-Baseline ADA Eval. Param-Level Flag, only populate for ADA, not for NAB |
|
| 239 |
) %>% |
|
| 240 | 3x |
dplyr::group_by(USUBJID) %>% |
| 241 | 3x |
dplyr::ungroup() |
| 242 | ||
| 243 |
# create temporary flags to derive subject-level variables |
|
| 244 | 3x |
adab_subj <- adab %>% |
| 245 | 3x |
dplyr::group_by(USUBJID) %>% |
| 246 | 3x |
dplyr::mutate( |
| 247 | 3x |
pos_bl = any(PARAM == "ADA interpreted per sample result" & !is.na(ABLFL) & AVALC == "POSITIVE"), |
| 248 | 3x |
pos_bl_nab = any(PARAM == "NAB interpreted per sample result" & !is.na(ABLFL) & AVALC == "POSITIVE"), |
| 249 | 3x |
any_pos_postbl = any(PARAM == "ADA interpreted per sample result" & is.na(ABLFL) & AVALC == "POSITIVE"), |
| 250 | 3x |
any_pos_postbl_nab = any(PARAM == "NAB interpreted per sample result" & is.na(ABLFL) & AVALC == "POSITIVE"), |
| 251 | 3x |
pos_last_postbl = any(PARAM == "ADA interpreted per sample result" & NFRLT == max(NFRLT) & AVALC == "POSITIVE"), |
| 252 | 3x |
ada_bl = AVAL[PARAM == "Antibody titer units" & !is.na(ABLFL)], |
| 253 | 3x |
nab_bl = AVAL[PARAM == "Neutralizing Antibody titer units" & !is.na(ABLFL)] |
| 254 |
) |
|
| 255 | 3x |
pos_tots <- adab_subj %>% |
| 256 | 3x |
dplyr::summarise( |
| 257 | 3x |
n_pos = sum(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"), |
| 258 | 3x |
inc_postbl = sum(PARAM == "ADA interpreted per sample result" & is.na(ABLFL) & (AVAL - ada_bl) > 0.60), |
| 259 | 3x |
inc_postbl_nab = sum(PARAM == "NAB interpreted per sample result" & is.na(ABLFL) & (AVAL - nab_bl) > 0.60), |
| 260 | 3x |
onset_ada = if (any(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE")) {
|
| 261 | 18x |
min(NFRLT[PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"]) |
| 262 |
} else {
|
|
| 263 | 3x |
NA |
| 264 |
}, |
|
| 265 | 3x |
last_ada = if (any(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE")) {
|
| 266 | 18x |
max(NFRLT[PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"]) |
| 267 |
} else {
|
|
| 268 | 3x |
NA |
| 269 |
} |
|
| 270 |
) |
|
| 271 | 3x |
adab_subj <- adab_subj %>% |
| 272 | 3x |
dplyr::left_join(pos_tots, by = "USUBJID") %>% |
| 273 | 3x |
dplyr::select( |
| 274 | 3x |
USUBJID, |
| 275 | 3x |
NFRLT, |
| 276 | 3x |
pos_bl, |
| 277 | 3x |
pos_bl_nab, |
| 278 | 3x |
any_pos_postbl, |
| 279 | 3x |
any_pos_postbl_nab, |
| 280 | 3x |
inc_postbl, |
| 281 | 3x |
inc_postbl_nab, |
| 282 | 3x |
pos_last_postbl, |
| 283 | 3x |
n_pos, |
| 284 | 3x |
onset_ada, |
| 285 | 3x |
last_ada |
| 286 |
) %>% |
|
| 287 | 3x |
unique() |
| 288 | ||
| 289 |
# add flags to ADAB dataset |
|
| 290 | 3x |
adab <- adab %>% |
| 291 | 3x |
dplyr::left_join(adab_subj, by = c("USUBJID", "NFRLT"))
|
| 292 | ||
| 293 |
# derive subject-level variables |
|
| 294 | 3x |
adab[!(adab$PARAM %in% visit_lvl_params), ] <- adab %>% |
| 295 | 3x |
dplyr::filter(!(PARAM %in% visit_lvl_params)) %>% |
| 296 | 3x |
dplyr::mutate( |
| 297 |
# nolint start indentation_linter |
|
| 298 | 3x |
AVALC = dplyr::case_when( |
| 299 | 3x |
(PARAM == "ADA Status of a patient" & any_pos_postbl) ~ "POSITIVE", |
| 300 | 3x |
(PARAM == "ADA Status of a patient" & !any_pos_postbl) ~ "NEGATIVE", |
| 301 | 3x |
(PARAM == "Treatment induced ADA" & !pos_bl & any_pos_postbl) ~ "Y", |
| 302 | 3x |
(PARAM == "Treatment enhanced ADA" & pos_bl & inc_postbl > 0) ~ "Y", |
| 303 | 3x |
(PARAM == "Treatment unaffected" & pos_bl & (inc_postbl == 0 | !any_pos_postbl)) ~ "Y", |
| 304 | 3x |
(PARAM == "Treatment Emergent - Positive" & |
| 305 | 3x |
((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ "Y", |
| 306 | 3x |
(PARAM == "Treatment Emergent - Negative" & |
| 307 | 3x |
!((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ "Y", |
| 308 | 3x |
(PARAM == "Persistent ADA" & pos_last_postbl) ~ "Y", |
| 309 | 3x |
(PARAM == "Transient ADA" & |
| 310 | 3x |
(n_pos - pos_bl - pos_last_postbl == 1 | n_pos > 1)) ~ "Y", |
| 311 | 3x |
(PARAM == "Baseline" & pos_bl) ~ "POSITIVE", |
| 312 | 3x |
(PARAM == "Baseline" & !pos_bl) ~ "NEGATIVE", |
| 313 | 3x |
(PARAM == "Time to onset of ADA") ~ as.character(onset_ada / 7), |
| 314 | 3x |
(PARAM == "ADA Duration") ~ as.character((last_ada - onset_ada) / 7), |
| 315 | 3x |
(PARAM == "NAB Status of a patient" & any_pos_postbl_nab) ~ "POSITIVE", |
| 316 | 3x |
(PARAM == "NAB Status of a patient" & !any_pos_postbl_nab) ~ "NEGATIVE", |
| 317 | 3x |
(PARAM == "Treatment induced ADA, Neutralizing Antibody" & |
| 318 | 3x |
!pos_bl_nab & any_pos_postbl_nab) ~ "Y", |
| 319 | 3x |
(PARAM == "Treatment enhanced ADA, Neutralizing Antibody" & |
| 320 | 3x |
pos_bl_nab & inc_postbl_nab > 0) ~ "Y", |
| 321 | 3x |
(PARAM == "Baseline, Neutralizing Antibody" & pos_bl_nab) ~ "POSITIVE", |
| 322 | 3x |
(PARAM == "Baseline, Neutralizing Antibody" & !pos_bl_nab) ~ "NEGATIVE", |
| 323 | 3x |
(PARAM == "Treatment unaffected, Neutralizing Antibody" & pos_bl_nab & |
| 324 | 3x |
(inc_postbl_nab == 0 | !any_pos_postbl_nab)) ~ "Y", |
| 325 | 3x |
(PARAM == "Treatment Emergent - Positive, Neutralizing Antibody" & |
| 326 | 3x |
((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ "Y", |
| 327 | 3x |
(PARAM == "Treatment Emergent - Negative, Neutralizing Antibody" & |
| 328 | 3x |
!((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ "Y", |
| 329 | 3x |
TRUE ~ "N" |
| 330 |
), |
|
| 331 | 3x |
AVAL = dplyr::case_when( |
| 332 | 3x |
(PARAM == "ADA Status of a patient" & any_pos_postbl) ~ 1, |
| 333 | 3x |
(PARAM == "Treatment induced ADA" & !pos_bl & any_pos_postbl) ~ 1, |
| 334 | 3x |
(PARAM == "Treatment enhanced ADA" & pos_bl & inc_postbl > 0) ~ 1, |
| 335 | 3x |
(PARAM == "Treatment unaffected" & pos_bl & (inc_postbl == 0 | !any_pos_postbl)) ~ 1, |
| 336 | 3x |
(PARAM == "Treatment Emergent - Positive" & |
| 337 | 3x |
((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ 1, |
| 338 | 3x |
(PARAM == "Treatment Emergent - Negative" & |
| 339 | 3x |
!((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ 1, |
| 340 | 3x |
(PARAM == "Persistent ADA" & pos_last_postbl) ~ 1, |
| 341 | 3x |
(PARAM == "Transient ADA" & |
| 342 | 3x |
(n_pos - ifelse(pos_bl, 1, 0) - ifelse(pos_last_postbl, 1, 0) == 1 | n_pos > 1)) ~ 1, |
| 343 | 3x |
(PARAM == "Baseline" & pos_bl) ~ 1, |
| 344 | 3x |
(PARAM == "Time to onset of ADA") ~ onset_ada / 7, |
| 345 | 3x |
(PARAM == "ADA Duration") ~ (last_ada - onset_ada) / 7, |
| 346 | 3x |
(PARAM == "NAB Status of a patient" & any_pos_postbl_nab) ~ 1, |
| 347 | 3x |
(PARAM == "Treatment induced ADA, Neutralizing Antibody" & |
| 348 | 3x |
!pos_bl_nab & any_pos_postbl_nab) ~ 1, |
| 349 | 3x |
(PARAM == "Treatment enhanced ADA, Neutralizing Antibody" & |
| 350 | 3x |
pos_bl_nab & inc_postbl_nab > 0) ~ 1, |
| 351 | 3x |
(PARAM == "Baseline, Neutralizing Antibody" & pos_bl_nab) ~ 1, |
| 352 | 3x |
(PARAM == "Treatment unaffected, Neutralizing Antibody" & pos_bl_nab & |
| 353 | 3x |
(inc_postbl_nab == 0 | !any_pos_postbl_nab)) ~ 1, |
| 354 | 3x |
(PARAM == "Treatment Emergent - Positive, Neutralizing Antibody" & |
| 355 | 3x |
((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ 1, |
| 356 | 3x |
(PARAM == "Treatment Emergent - Negative, Neutralizing Antibody" & |
| 357 | 3x |
!((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ 1, |
| 358 | 3x |
TRUE ~ 0 |
| 359 |
), |
|
| 360 |
# nolint end indentation_linter |
|
| 361 | 3x |
PARCAT1 = dplyr::case_when( |
| 362 | 3x |
PARAM %in% c( |
| 363 | 3x |
"Neutralizing Antibody titer units", "NAB interpreted per sample result", |
| 364 | 3x |
"NAB Status of a patient", "Treatment induced ADA, Neutralizing Antibody", |
| 365 | 3x |
"Treatment enhanced ADA, Neutralizing Antibody", |
| 366 | 3x |
"Treatment Emergent - Negative, Neutralizing Antibody", |
| 367 | 3x |
"Treatment Emergent - Positive, Neutralizing Antibody", |
| 368 | 3x |
"Treatment unaffected, Neutralizing Antibody" |
| 369 | 3x |
) ~ "A: Drug X Neutralizing Antibody", |
| 370 | 3x |
TRUE ~ PARCAT1 |
| 371 |
) |
|
| 372 |
) |
|
| 373 | ||
| 374 |
# remove intermediate flag variables from adab |
|
| 375 | 3x |
adab <- adab %>% |
| 376 | 3x |
dplyr::select(-c( |
| 377 | 3x |
pos_bl, |
| 378 | 3x |
pos_bl_nab, |
| 379 | 3x |
any_pos_postbl, |
| 380 | 3x |
any_pos_postbl_nab, |
| 381 | 3x |
pos_last_postbl, |
| 382 | 3x |
inc_postbl, |
| 383 | 3x |
inc_postbl_nab, |
| 384 | 3x |
n_pos, |
| 385 | 3x |
onset_ada, |
| 386 | 3x |
last_ada |
| 387 |
)) |
|
| 388 | ||
| 389 |
# Carry over ARM and ACTARM for all records. |
|
| 390 | 3x |
arm <- adab %>% |
| 391 | 3x |
filter(!is.na(ARM), !is.na(ACTARM)) %>% |
| 392 | 3x |
select(USUBJID, ARM, ACTARM) %>% |
| 393 | 3x |
distinct(.) |
| 394 | 3x |
adab$ARM <- arm$ARM[match(adab$USUBJID, arm$USUBJID)] |
| 395 | 3x |
adab$ACTARM <- arm$ACTARM[match(adab$USUBJID, arm$USUBJID)] |
| 396 | ||
| 397 | 3x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 398 | ! |
adab <- mutate_na(ds = adab, na_vars = na_vars, na_percentage = na_percentage) |
| 399 |
} |
|
| 400 | ||
| 401 | 3x |
adab <- apply_metadata(adab, "metadata/ADAB.yml") |
| 402 |
} |
| 1 |
#' Pharmacokinetics Analysis Dataset (ADPC) |
|
| 2 |
#' |
|
| 3 |
#' @description `r lifecycle::badge("stable")`
|
|
| 4 |
#' |
|
| 5 |
#' Function for generating a random Pharmacokinetics Analysis Dataset for a given |
|
| 6 |
#' Subject-Level Analysis Dataset. |
|
| 7 |
#' |
|
| 8 |
#' @details One record per study, subject, parameter, and time point. |
|
| 9 |
#' |
|
| 10 |
#' @inheritParams argument_convention |
|
| 11 |
#' @param avalu (`character`)\cr Analysis value units. |
|
| 12 |
#' @param constants (`character vector`)\cr Constant parameters to be used in formulas for creating analysis values. |
|
| 13 |
#' @param duration (`numeric`)\cr Duration in number of days. |
|
| 14 |
#' @template param_cached |
|
| 15 |
#' @templateVar data adpc |
|
| 16 |
#' |
|
| 17 |
#' @return `data.frame` |
|
| 18 |
#' @export |
|
| 19 |
#' |
|
| 20 |
#' @examples |
|
| 21 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 22 |
#' |
|
| 23 |
#' adpc <- radpc(adsl, seed = 2) |
|
| 24 |
#' adpc |
|
| 25 |
#' |
|
| 26 |
#' adpc <- radpc(adsl, seed = 2, duration = 3) |
|
| 27 |
#' adpc |
|
| 28 |
radpc <- function(adsl, |
|
| 29 |
avalu = "ug/mL", |
|
| 30 |
constants = c(D = 100, ka = 0.8, ke = 1), |
|
| 31 |
duration = 2, |
|
| 32 |
seed = NULL, |
|
| 33 |
na_percentage = 0, |
|
| 34 |
na_vars = list( |
|
| 35 |
AVAL = c(NA, 0.1) |
|
| 36 |
), |
|
| 37 |
cached = FALSE) {
|
|
| 38 | 5x |
checkmate::assert_flag(cached) |
| 39 | 5x |
if (cached) {
|
| 40 | 1x |
return(get_cached_data("cadpc"))
|
| 41 |
} |
|
| 42 | ||
| 43 | 4x |
checkmate::assert_data_frame(adsl) |
| 44 | 4x |
checkmate::assert_character(avalu, len = 1, any.missing = FALSE) |
| 45 | 4x |
checkmate::assert_subset(names(constants), c("D", "ka", "ke"))
|
| 46 | 4x |
checkmate::assert_numeric(x = duration, max.len = 1) |
| 47 | 4x |
checkmate::assert_number(seed, null.ok = TRUE) |
| 48 | 4x |
checkmate::assert_number(na_percentage, lower = 0, upper = 1) |
| 49 | 4x |
checkmate::assert_true(na_percentage < 1) |
| 50 | 4x |
checkmate::assert_list(na_vars) |
| 51 | ||
| 52 | 4x |
if (!is.null(seed)) {
|
| 53 | 4x |
set.seed(seed) |
| 54 |
} |
|
| 55 | ||
| 56 | 4x |
radpc_core <- function(day) {
|
| 57 | 8x |
adpc_day <- tidyr::expand_grid( |
| 58 | 8x |
data.frame( |
| 59 | 8x |
STUDYID = adsl$STUDYID, |
| 60 | 8x |
USUBJID = adsl$USUBJID, |
| 61 | 8x |
ARMCD = adsl$ARMCD, |
| 62 | 8x |
A0 = unname(constants["D"]), |
| 63 | 8x |
ka = unname(constants["ka"]) - stats::runif(length(adsl$USUBJID), -0.2, 0.2), |
| 64 | 8x |
ke = unname(constants["ke"]) - stats::runif(length(adsl$USUBJID), -0.2, 0.2) |
| 65 |
), |
|
| 66 | 8x |
PCTPTNUM = if (day == 1) c(0, 0.5, 1, 1.5, 2, 3, 4, 8, 12) else 24 * (day - 1), |
| 67 | 8x |
PARAM = factor(c("Plasma Drug X", "Urine Drug X", "Plasma Drug Y", "Urine Drug Y"))
|
| 68 |
) |
|
| 69 | 8x |
adpc_day <- adpc_day[!(grepl("Urine", adpc_day$PARAM) & adpc_day$PCTPTNUM %in% c(0.5, 1, 1.5, 2, 3)), ] %>%
|
| 70 | 8x |
dplyr::arrange(USUBJID, PARAM) %>% |
| 71 | 8x |
dplyr::mutate( |
| 72 | 8x |
VISITDY = day, |
| 73 | 8x |
VISIT = ifelse(day <= 7, paste("Day", VISITDY), paste("Week", (VISITDY - 1) / 7)),
|
| 74 | 8x |
PCVOLU = ifelse(grepl("Urine", PARAM), "mL", ""),
|
| 75 | 8x |
ASMED = ifelse(grepl("Urine", PARAM), "URINE", "PLASMA"),
|
| 76 | 8x |
PCTPT = factor(dplyr::case_when( |
| 77 | 8x |
PCTPTNUM == 0 ~ "Predose", |
| 78 | 8x |
(day == 1 & grepl("Urine", PARAM)) ~
|
| 79 | 8x |
paste0(lag(PCTPTNUM), "H - ", PCTPTNUM, "H"), |
| 80 | 8x |
(day != 1 & grepl("Urine", PARAM)) ~
|
| 81 | 8x |
paste0(as.numeric(PCTPTNUM) - 24, "H - ", PCTPTNUM, "H"), |
| 82 | 8x |
TRUE ~ paste0(PCTPTNUM, "H") |
| 83 |
)), |
|
| 84 | 8x |
ARELTM1 = PCTPTNUM, |
| 85 | 8x |
NRELTM1 = PCTPTNUM, |
| 86 | 8x |
ARELTM2 = ARELTM1 - (24 * (day - 1)), |
| 87 | 8x |
NRELTM2 = NRELTM1 - (24 * (day - 1)), |
| 88 | 8x |
A0 = ifelse(PARAM == "Plasma Drug Y", A0, A0 / 2), |
| 89 | 8x |
AVAL = round( |
| 90 | 8x |
(A0 * ka * ( |
| 91 | 8x |
exp(-ka * ARELTM1) - exp(-ke * ARELTM1) |
| 92 |
)) |
|
| 93 | 8x |
/ (ke - ka), |
| 94 | 8x |
digits = 3 |
| 95 |
) |
|
| 96 |
) %>% |
|
| 97 | 8x |
dplyr::mutate( |
| 98 | 8x |
PCVOL = ifelse( |
| 99 | 8x |
ASMED == "URINE", |
| 100 | 8x |
round(abs(((PCTPTNUM - 1) %% 24) * A0 * ka * exp(PCTPTNUM %% 1.8 / 10)), 2), |
| 101 | 8x |
NA |
| 102 |
), |
|
| 103 |
# PK Equation |
|
| 104 | 8x |
AVALC = ifelse(AVAL == 0, "BLQ", as.character(AVAL)), |
| 105 | 8x |
AVALU = avalu, |
| 106 | 8x |
RELTMU = "hr" |
| 107 |
) %>% |
|
| 108 | 8x |
dplyr::select(-c("A0", "ka", "ke"))
|
| 109 | ||
| 110 | 8x |
return(adpc_day) |
| 111 |
} |
|
| 112 | ||
| 113 | 4x |
adpc <- list() |
| 114 | ||
| 115 | 4x |
for (day in seq(duration)[seq(duration) <= 7 | ((seq(duration) - 1) %% 7 == 0)]) {
|
| 116 | 8x |
adpc[[day]] <- radpc_core(day = day) |
| 117 |
} |
|
| 118 | ||
| 119 | 4x |
adpc <- do.call(rbind, adpc) |
| 120 | ||
| 121 | 4x |
adpc <- dplyr::inner_join(adpc, adsl, by = c("STUDYID", "USUBJID", "ARMCD")) %>%
|
| 122 | 4x |
dplyr::filter(ACTARM != "B: Placebo", !(ACTARM == "A: Drug X" & PARAM == "Plasma Drug Y")) |
| 123 | ||
| 124 | 4x |
if (length(na_vars) > 0 && na_percentage > 0) {
|
| 125 | ! |
adpc <- mutate_na(ds = adpc, na_vars = na_vars, na_percentage = na_percentage) |
| 126 |
} |
|
| 127 | ||
| 128 | 4x |
adpc <- adpc %>% |
| 129 | 4x |
rename( |
| 130 | 4x |
AVALCAT1 = AVALC, |
| 131 | 4x |
NFRLT = NRELTM1, |
| 132 | 4x |
AFRLT = ARELTM1, |
| 133 | 4x |
NRRLT = NRELTM2, |
| 134 | 4x |
ARRLT = ARELTM2 |
| 135 |
) %>% |
|
| 136 | 4x |
mutate(ANL02FL = "Y") |
| 137 | ||
| 138 | 4x |
adpc <- apply_metadata(adpc, "metadata/ADPC.yml") |
| 139 |
} |
| 1 |
#' Time to Safety Event Analysis Dataset (ADSAFTTE) |
|
| 2 |
#' |
|
| 3 |
#' Function to generate random Time-to-Safety Event Dataset for a |
|
| 4 |
#' given Subject-Level Analysis Dataset. |
|
| 5 |
#' |
|
| 6 |
#' @details |
|
| 7 |
#' |
|
| 8 |
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD` |
|
| 9 |
#' |
|
| 10 |
#' @inheritParams radaette |
|
| 11 |
#' @param ... Additional arguments to be passed to `radaette` |
|
| 12 |
#' |
|
| 13 |
#' @return `data.frame` |
|
| 14 |
#' @export |
|
| 15 |
#' |
|
| 16 |
#' @examples |
|
| 17 |
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2) |
|
| 18 |
#' |
|
| 19 |
#' adsaftte <- radsaftte(adsl, seed = 2) |
|
| 20 |
#' adsaftte |
|
| 21 |
radsaftte <- function(adsl, |
|
| 22 |
...) {
|
|
| 23 | 2x |
radaette(adsl = adsl, ...) |
| 24 |
} |