random.cdisc.data coverage - 98.80%

Files
Source

#' ECG Analysis Dataset (ADEG)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating random dataset from ECG Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per subject per parameter per analysis visit per analysis date.
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `EGSEQ`, `ASPID`
#'
#' @inheritParams argument_convention
#' @param egcat (`character vector`)\cr EG category values.
#' @param max_n_eg (`integer`)\cr Maximum number of EG results per patient. Defaults to 10.
#' @template param_cached
#' @templateVar data adeg
#'
#' @return `data.frame`
#' @export
#'
#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adeg <- radeg(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2)
#' adeg
#'
#' adeg <- radeg(adsl, visit_format = "CYCLE", n_assessments = 2L, seed = 2)
#' adeg
radeg <- function(adsl,
                  egcat = c("INTERVAL", "INTERVAL", "MEASUREMENT", "FINDING"),
                  param = c(
                    "QT Duration",
                    "RR Duration",
                    "Heart Rate",
                    "ECG Interpretation"
                  ),
                  paramcd = c("QT", "RR", "HR", "ECGINTP"),
                  paramu = c("msec", "msec", "beats/min", ""),
                  visit_format = "WEEK",
                  n_assessments = 5L,
                  n_days = 5L,
                  max_n_eg = 10L,
                  lookup = NULL,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(
                    ABLFL = c(1235, 0.1), BASE = c(NA, 0.1), BASEC = c(NA, 0.1),
                    CHG = c(1234, 0.1), PCHG = c(1234, 0.1)
                  ),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadeg"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(egcat, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE)
  checkmate::assert_string(visit_format)
  checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
  checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
  checkmate::assert_integer(max_n_eg, len = 1, any.missing = FALSE)
  checkmate::assert_data_frame(lookup, null.ok = TRUE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  # validate and initialize related variables
  egcat_init_list <- relvar_init(param, egcat)
  param_init_list <- relvar_init(param, paramcd)
  unit_init_list <- relvar_init(param, paramu)

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  adeg <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = adsl$USUBJID,
    PARAM = as.factor(param_init_list$relvar1),
    AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days),
    stringsAsFactors = FALSE
  )

  # assign related variable values: PARAMxEGCAT are related
  adeg <- adeg %>% rel_var(
    var_name = "EGCAT",
    related_var = "PARAM",
    var_values = egcat_init_list$relvar2
  )

  # assign related variable values: PARAMxPARAMCD are related
  adeg <- adeg %>% rel_var(
    var_name = "PARAMCD",
    related_var = "PARAM",
    var_values = param_init_list$relvar2
  )

  adeg <- adeg %>% dplyr::mutate(AVAL = dplyr::case_when(
    PARAMCD == "QT" ~ stats::rnorm(nrow(adeg), mean = 350, sd = 100),
    PARAMCD == "RR" ~ stats::rnorm(nrow(adeg), mean = 1050, sd = 300),
    PARAMCD == "HR" ~ stats::rnorm(nrow(adeg), mean = 70, sd = 20),
    PARAMCD == "ECGINTP" ~ NA_real_
  ))

  adeg <- adeg %>%
    dplyr::mutate(EGTESTCD = PARAMCD) %>%
    dplyr::mutate(EGTEST = PARAM)

  adeg <- adeg %>% dplyr::mutate(AVISITN = dplyr::case_when(
    AVISIT == "SCREENING" ~ -1,
    AVISIT == "BASELINE" ~ 0,
    (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
    TRUE ~ NA_real_
  ))

  adeg <- adeg %>% rel_var(
    var_name = "AVALU",
    related_var = "PARAM",
    var_values = unit_init_list$relvar2
  )

  # order to prepare for change from screening and baseline values
  adeg <- adeg[order(adeg$STUDYID, adeg$USUBJID, adeg$PARAMCD, adeg$AVISITN), ]

  adeg <- Reduce(rbind, lapply(split(adeg, adeg$USUBJID), function(x) {
    x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
    x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
      "Y",
      ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "")
    )
    x
  }))

  adeg$BASE <- ifelse(adeg$AVISITN >= 0, retain(adeg, adeg$AVAL, adeg$ABLFL == "Y"), adeg$AVAL)

  adeg <- adeg %>% dplyr::mutate(ANRLO = dplyr::case_when(
    PARAMCD == "QT" ~ 200,
    PARAMCD == "RR" ~ 600,
    PARAMCD == "HR" ~ 40,
    PARAMCD == "ECGINTP" ~ NA_real_
  ))

  adeg <- adeg %>% dplyr::mutate(ANRHI = dplyr::case_when(
    PARAMCD == "QT" ~ 500,
    PARAMCD == "RR" ~ 1500,
    PARAMCD == "HR" ~ 100,
    PARAMCD == "ECGINTP" ~ NA_real_
  ))

  adeg <- adeg %>% dplyr::mutate(ANRIND = factor(dplyr::case_when(
    AVAL < ANRLO ~ "LOW",
    AVAL >= ANRLO & AVAL <= ANRHI ~ "NORMAL",
    AVAL > ANRHI ~ "HIGH"
  )))

  adeg <- adeg %>%
    dplyr::mutate(CHG = ifelse(AVISITN > 0, AVAL - BASE, NA)) %>%
    dplyr::mutate(PCHG = ifelse(AVISITN > 0, 100 * (CHG / BASE), NA)) %>%
    dplyr::mutate(BASETYPE = "LAST") %>%
    dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
    dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(ATPTN = 1) %>%
    dplyr::mutate(DTYPE = NA)

  adeg$ANRIND <- factor(adeg$ANRIND, levels = c("LOW", "NORMAL", "HIGH"))
  adeg$BNRIND <- factor(adeg$BNRIND, levels = c("LOW", "NORMAL", "HIGH"))

  adeg <- rcd_var_relabel(
    adeg,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge ADSL to be able to add EG date and study day variables
  adeg <- dplyr::inner_join(
    adeg,
    adsl,
    by = c("STUDYID", "USUBJID")
  ) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
      is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
      TRUE ~ TRTEDTM
    ))) %>%
    dplyr::ungroup()

  adeg <- adeg %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::arrange(USUBJID, AVISITN) %>%
    dplyr::mutate(ADTM = rep(
      sort(sample(
        seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
        size = nlevels(AVISIT)
      )),
      each = n() / nlevels(AVISIT)
    )) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
    dplyr::select(-TRTENDT) %>%
    dplyr::arrange(STUDYID, USUBJID, ADTM)

  adeg <- adeg %>%
    dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(EGSEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = EGSEQ) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      PARAMCD,
      BASETYPE,
      AVISITN,
      ATPTN,
      DTYPE,
      ADTM,
      EGSEQ,
      ASPID
    )

  adeg <- adeg %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when(
    !AVISIT %in% c("SCREENING", "BASELINE") ~ "Y",
    TRUE ~ ""
  )))

  adeg <- adeg %>% dplyr::mutate(AVALC = ifelse(
    PARAMCD == "ECGINTP",
    as.character(sample_fct(c("ABNORMAL", "NORMAL"), nrow(adeg), prob = c(0.25, 0.75))),
    as.character(AVAL)
  ))

  # Temporarily creating a row_check column to easily match newly created
  # observations with their row correct arrangement.
  adeg <- adeg %>%
    dplyr::mutate(row_check = seq_len(nrow(adeg)))

  # Created function to add in new observations for DTYPE, "MINIMUM" & "MAXIMUM" in this case.
  get_groups <- function(data,
                         minimum) {
    data <- data %>%
      dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
      dplyr::arrange(ADTM, ASPID, EGSEQ) %>%
      dplyr::filter(
        (AVISIT != "BASELINE" & AVISIT != "SCREENING") &
          (ONTRTFL == "Y" | ADTM <= TRTSDTM)
      ) %>%
      {
        if (minimum == TRUE) {
          dplyr::filter(., AVAL == min(AVAL)) %>%
            dplyr::mutate(., DTYPE = "MINIMUM", AVISIT = "POST-BASELINE MINIMUM")
        } else {
          dplyr::filter(., AVAL == max(AVAL)) %>%
            dplyr::mutate(., DTYPE = "MAXIMUM", AVISIT = "POST-BASELINE MAXIMUM")
        }
      } %>%
      dplyr::slice(1) %>%
      dplyr::ungroup()

    return(data)
  }

  # Binding the new observations to the dataset from the function above and rearranging in the correct order.
  adeg <- rbind(adeg, get_groups(adeg, TRUE), get_groups(adeg, FALSE)) %>%
    dplyr::arrange(row_check) %>%
    dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
    dplyr::arrange(AVISIT, .by_group = TRUE) %>%
    dplyr::ungroup()

  # Dropping the "row_check" column created above.
  adeg <- adeg[, -which(names(adeg) %in% c("row_check"))]

  # Created function to easily match rows which comply to ONTRTFL derivation
  flag_variables <- function(data, worst_obs) {
    data_compare <- data %>%
      dplyr::mutate(row_check = seq_len(nrow(data)))

    data <- data_compare %>%
      {
        if (worst_obs == FALSE) {
          dplyr::group_by(., USUBJID, PARAMCD, BASETYPE, AVISIT) %>%
            dplyr::arrange(., ADTM, ASPID, EGSEQ)
        } else {
          dplyr::group_by(., USUBJID, PARAMCD, BASETYPE)
        }
      } %>%
      dplyr::filter(
        AVISITN > 0 & (ONTRTFL == "Y" | ADTM <= TRTSDTM) &
          is.na(DTYPE)
      ) %>%
      {
        if (worst_obs == TRUE) {
          dplyr::arrange(., AVALC) %>% dplyr::filter(., ifelse(
            PARAMCD == "ECGINTP",
            ifelse(AVALC == "ABNORMAL", AVALC == "ABNORMAL", AVALC == "NORMAL"),
            AVAL == min(AVAL)
          ))
        } else {
          dplyr::filter(., ifelse(
            PARAMCD == "ECGINTP",
            AVALC == "ABNORMAL" | AVALC == "NORMAL",
            AVAL == min(AVAL)
          ))
        }
      } %>%
      dplyr::slice(1) %>%
      {
        if (worst_obs == TRUE) {
          dplyr::mutate(., new_var = dplyr::case_when(
            (AVALC == "ABNORMAL" | AVALC == "NORMAL") ~ "Y",
            (!is.na(AVAL) & is.na(DTYPE)) ~ "Y",
            TRUE ~ ""
          ))
        } else {
          dplyr::mutate(., new_var = dplyr::case_when(
            (AVALC == "ABNORMAL" | AVALC == "NORMAL") ~ "Y",
            (!is.na(AVAL) & is.na(DTYPE)) ~ "Y",
            TRUE ~ ""
          ))
        }
      } %>%
      dplyr::ungroup()

    data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "")
    data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))]

    return(data_compare)
  }

  adeg <- flag_variables(adeg, FALSE) %>% dplyr::rename(WORS01FL = "new_var")
  adeg <- flag_variables(adeg, TRUE) %>% dplyr::rename(WORS02FL = "new_var")

  adeg <- adeg %>% dplyr::mutate(ANL01FL = factor(ifelse(
    (ABLFL == "Y" | (is.na(DTYPE) & WORS01FL == "Y")) &
      (AVISIT != "SCREENING"),
    "Y",
    ""
  )))

  adeg <- adeg %>%
    dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
    dplyr::mutate(BASEC = ifelse(
      PARAMCD == "ECGINTP",
      AVALC[AVISIT == "BASELINE"],
      as.character(BASE)
    )) %>%
    dplyr::mutate(ANL03FL = dplyr::case_when(
      DTYPE == "MINIMUM" ~ "Y",
      ABLFL == "Y" & PARAMCD != "ECGINTP" ~ "Y",
      TRUE ~ ""
    )) %>%
    dplyr::mutate(ANL04FL = dplyr::case_when(
      DTYPE == "MAXIMUM" ~ "Y",
      ABLFL == "Y" & PARAMCD != "ECGINTP" ~ "Y",
      TRUE ~ ""
    )) %>%
    dplyr::ungroup()

  if (length(na_vars) > 0 && na_percentage > 0) {
    adeg <- mutate_na(ds = adeg, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  adeg <- apply_metadata(adeg, "metadata/ADEG.yml")

  return(adeg)
}

#' Time to Adverse Event Analysis Dataset (ADAETTE)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function to generate random Time-to-AE Dataset for a
#' given Subject-Level Analysis Dataset.
#'
#' @details
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`
#'
#' @inheritParams argument_convention
#' @param event_descr (`character vector`)\cr Descriptions of events. Defaults to `NULL`.
#' @param censor_descr (`character vector`)\cr Descriptions of censors. Defaults to `NULL`.
#' @template param_cached
#' @templateVar data adaette
#'
#' @return `data.frame`
#' @export
#'
#' @author Xiuting Mi
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adaette <- radaette(adsl, seed = 2)
#' adaette
radaette <- function(adsl,
                     event_descr = NULL,
                     censor_descr = NULL,
                     lookup = NULL,
                     seed = NULL,
                     na_percentage = 0,
                     na_vars = list(CNSR = c(NA, 0.1), AVAL = c(1234, 0.1)),
                     cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadaette"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(censor_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(event_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  checkmate::assert_data_frame(lookup, null.ok = TRUE)
  lookup_adaette <- if (!is.null(lookup)) {
    lookup
  } else {
    tibble::tribble(
      ~ARM, ~CATCD, ~CAT, ~LAMBDA, ~CNSR_P,
      "ARM A", "1", "any adverse event", 1 / 80, 0.4,
      "ARM B", "1", "any adverse event", 1 / 100, 0.2,
      "ARM C", "1", "any adverse event", 1 / 60, 0.42,
      "ARM A", "2", "any serious adverse event", 1 / 100, 0.3,
      "ARM B", "2", "any serious adverse event", 1 / 150, 0.1,
      "ARM C", "2", "any serious adverse event", 1 / 80, 0.32,
      "ARM A", "3", "a grade 3-5 adverse event", 1 / 80, 0.2,
      "ARM B", "3", "a grade 3-5 adverse event", 1 / 100, 0.08,
      "ARM C", "3", "a grade 3-5 adverse event", 1 / 60, 0.23
    )
  }

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  evntdescr_sel <- if (!is.null(event_descr)) {
    event_descr
  } else {
    "Preferred Term"
  }

  cnsdtdscr_sel <- if (!is.null(censor_descr)) {
    censor_descr
  } else {
    c(
      "Clinical Cut Off",
      "Completion or Discontinuation",
      "End of AE Reporting Period"
    )
  }

  random_patient_data <- function(patient_info) {
    startdt <- lubridate::date(patient_info$TRTSDTM)
    trtedtm <- lubridate::floor_date(dplyr::case_when(
      is.na(patient_info$TRTEDTM) ~ lubridate::date(patient_info$TRTSDTM) + study_duration_secs,
      TRUE ~ lubridate::date(patient_info$TRTEDTM)
    ), unit = "day")
    enddts <- c(patient_info$EOSDT, lubridate::date(trtedtm))
    enddts_min_index <- which.min(enddts)
    adt <- enddts[enddts_min_index]
    adtm <- lubridate::as_datetime(adt)
    ady <- as.numeric(adt - startdt + 1)
    data.frame(
      ARM = patient_info$ARM,
      STUDYID = patient_info$STUDYID,
      SITEID = patient_info$SITEID,
      USUBJID = patient_info$USUBJID,
      PARAMCD = "AEREPTTE",
      PARAM = "Time to end of AE reporting period",
      CNSR = 0,
      AVAL = lubridate::days(ady) / lubridate::years(1),
      AVALU = "YEARS",
      EVNTDESC = ifelse(enddts_min_index == 1, "Completion or Discontinuation", "End of AE Reporting Period"),
      CNSDTDSC = NA,
      ADTM = adtm,
      ADY = ady,
      stringsAsFactors = FALSE
    )
  }

  # validate and initialize related variables for Hy's law
  paramcd_hy <- c("HYSTTEUL", "HYSTTEBL")
  param_hy <- c("Time to Hy's Law Elevation in relation to ULN", "Time to Hy's Law Elevation in relation to Baseline")
  param_init_list <- relvar_init(param_hy, paramcd_hy)
  adsl_hy <- dplyr::select(adsl, "STUDYID", "USUBJID", "TRTSDTM", "SITEID", "ARM")

  # create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT
  adaette_hy <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = adsl$USUBJID,
    PARAM = as.factor(param_init_list$relvar1),
    stringsAsFactors = FALSE
  )

  # Add other variables to adaette_hy
  adaette_hy <- dplyr::left_join(adaette_hy, adsl_hy, by = c("STUDYID", "USUBJID")) %>%
    rel_var(
      var_name = "PARAMCD",
      related_var = "PARAM",
      var_values = param_init_list$relvar2
    ) %>%
    dplyr::mutate(
      CNSR = sample(c(0, 1), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE),
      EVNTDESC = dplyr::if_else(
        CNSR == 0,
        "First Post-Baseline Raised ALT or AST Elevation Result",
        NA_character_
      ),
      CNSDTDSC = dplyr::if_else(CNSR == 0, NA_character_,
        sample(c("Last Post-Baseline ALT or AST Result", "Treatment Start"),
          prob = c(0.9, 0.1),
          size = dplyr::n(), replace = TRUE
        )
      )
    ) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(ADTM = dplyr::case_when(
      CNSDTDSC == "Treatment Start" ~ TRTSDTM,
      TRUE ~ TRTSDTM + sample(seq(0, study_duration_secs), size = dplyr::n(), replace = TRUE)
    )) %>%
    dplyr::mutate(
      ADY_int = lubridate::date(ADTM) - lubridate::date(TRTSDTM) + 1,
      ADY = as.numeric(ADY_int),
      AVAL = lubridate::days(ADY_int) / lubridate::weeks(1),
      AVALU = "WEEKS"
    ) %>%
    dplyr::select(-TRTSDTM, -ADY_int)

  random_ae_data <- function(lookup_info, patient_info, patient_data) {
    cnsr <- sample(c(0, 1), 1, prob = c(1 - lookup_info$CNSR_P, lookup_info$CNSR_P))
    ae_rep_tte <- patient_data$AVAL[patient_data$PARAMCD == "AEREPTTE"]
    data.frame(
      ARM = rep(patient_data$ARM, 2),
      STUDYID = rep(patient_data$STUDYID, 2),
      SITEID = rep(patient_data$SITEID, 2),
      USUBJID = rep(patient_data$USUBJID, 2),
      PARAMCD = c(
        paste0("AETTE", lookup_info$CATCD),
        paste0("AETOT", lookup_info$CATCD)
      ),
      PARAM = c(
        paste("Time to first occurrence of", lookup_info$CAT),
        paste("Number of occurrences of", lookup_info$CAT)
      ),
      CNSR = c(
        cnsr,
        NA
      ),
      AVAL = c(
        # We generate these values conditional on the censoring information.
        # If this time to event is censored, then there were no AEs reported and the time is set
        # to the AE reporting period time. Otherwise we draw from truncated distributions to make
        # sure that we are within the AE reporting time and above 0 AEs.
        ifelse(cnsr == 1, ae_rep_tte, rtexp(1, lookup_info$LAMBDA * 365.25, r = ae_rep_tte)),
        ifelse(cnsr == 1, 0, rtpois(1, lookup_info$LAMBDA * 365.25))
      ),
      AVALU = c(
        "YEARS",
        NA
      ),
      EVNTDESC = c(
        ifelse(cnsr == 0, sample(evntdescr_sel, 1), ""),
        NA
      ),
      CNSDTDSC = c(
        ifelse(cnsr == 1, sample(cnsdtdscr_sel, 1), ""),
        NA
      ),
      stringsAsFactors = FALSE
    ) %>% dplyr::mutate(
      ADY = dplyr::if_else(is.na(AVALU), NA_real_, ceiling(as.numeric(lubridate::dyears(AVAL), "days"))),
      ADTM = dplyr::if_else(
        is.na(AVALU),
        lubridate::as_datetime(NA),
        patient_info$TRTSDTM + lubridate::days(ADY)
      )
    )
  }

  adaette <- split(adsl, adsl$USUBJID) %>%
    lapply(function(patient_info) {
      patient_data <- random_patient_data(patient_info)
      lookup_arm <- lookup_adaette %>%
        dplyr::filter(ARM == as.character(patient_info$ARMCD))
      ae_data <- split(lookup_arm, lookup_arm$CATCD) %>%
        lapply(random_ae_data, patient_data = patient_data, patient_info = patient_info) %>%
        Reduce(rbind, .)
      dplyr::bind_rows(patient_data, ae_data)
    }) %>%
    Reduce(rbind, .) %>%
    rcd_var_relabel(
      STUDYID = "Study Identifier",
      USUBJID = "Unique Subject Identifier"
    )

  adaette <- rcd_var_relabel(
    adaette,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  adaette <- rbind(adaette, adaette_hy)

  adaette <- dplyr::inner_join(
    dplyr::select(adaette, -"SITEID", -"ARM"),
    adsl,
    by = c("STUDYID", "USUBJID")
  ) %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::arrange(ADTM) %>%
    dplyr::mutate(TTESEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = TTESEQ) %>%
    dplyr::mutate(PARAM = as.factor(PARAM)) %>%
    dplyr::mutate(PARAMCD = as.factor(PARAMCD)) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      PARAMCD,
      ADTM,
      TTESEQ
    )

  if (length(na_vars) > 0 && na_percentage > 0) {
    adaette <- dplyr::mutate(ds = adaette, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  adaette <- apply_metadata(adaette, "metadata/ADAETTE.yml")

  return(adaette)
}

#' Load Cached Data
#'
#' Return data attached to package.
#'
#' @keywords internal
#' @noRd
get_cached_data <- function(dataname) {
  checkmate::assert_string(dataname)
  if (!("package:random.cdisc.data" %in% search())) {
    stop("cached data can only be loaded if the random.cdisc.data package is attached.",
      "Please run library(random.cdisc.data) before loading cached data.",
      call. = FALSE
    )
  } else {
    get(dataname, envir = asNamespace("random.cdisc.data"))
  }
}

#' Create a Factor with Random Elements of x
#'
#' Sample elements from `x` with replacement to build a factor.
#'
#' @param x (`character vector` or `factor`)\cr If character vector then it is also used
#'   as levels of the returned factor. If factor then the levels are used as the new levels.
#' @param N (`numeric`)\cr Number of items to choose.
#' @param ... Additional arguments to be passed to `sample`.
#'
#' @return A factor of length `N`.
#' @export
#'
#' @examples
#' sample_fct(letters[1:3], 10)
#' sample_fct(iris$Species, 10)
sample_fct <- function(x, N, ...) { # nolint
  checkmate::assert_number(N)

  factor(sample(x, N, replace = TRUE, ...), levels = if (is.factor(x)) levels(x) else x)
}

#' Related Variables: Initialize
#'
#' Verify and initialize related variable values.
#' For example, `relvar_init("Alanine Aminotransferase Measurement", "ALT")`.
#'
#' @param relvar1 (`list` of `character`)\cr List of n elements.
#' @param relvar2 (`list` of `character`)\cr List of n elements.
#'
#' @return A vector of n elements.
#'
#' @keywords internal
relvar_init <- function(relvar1, relvar2) {
  checkmate::assert_character(relvar1, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(relvar2, min.len = 1, any.missing = FALSE)

  if (length(relvar1) != length(relvar2)) {
    message(simpleError(
      "The argument value length of relvar1 and relvar2 differ. They must contain the same number of elements."
    ))
    return(NA)
  }
  return(list("relvar1" = relvar1, "relvar2" = relvar2))
}

#' Related Variables: Assign
#'
#' Assign values to a related variable within a domain.
#'
#' @param df (`data.frame`)\cr Data frame containing the related variables.
#' @param var_name (`character`)\cr Name of variable related to `rel_var` to add to `df`.
#' @param var_values (`any`)\cr Vector of values related to values of `related_var`.
#' @param related_var (`character`)\cr Name of variable within `df` with values to which values
#' of `var_name` must relate.
#'
#' @return `df` with added factor variable `var_name` containing `var_values` corresponding to `related_var`.
#' @export
#'
#' @examples
#' # Example with data.frame.
#' params <- c("Level A", "Level B", "Level C")
#' adlb_df <- data.frame(
#'   ID = 1:9,
#'   PARAM = factor(
#'     rep(c("Level A", "Level B", "Level C"), 3),
#'     levels = params
#'   )
#' )
#' rel_var(
#'   df = adlb_df,
#'   var_name = "PARAMCD",
#'   var_values = c("A", "B", "C"),
#'   related_var = "PARAM"
#' )
#'
#' # Example with tibble.
#' adlb_tbl <- tibble::tibble(
#'   ID = 1:9,
#'   PARAM = factor(
#'     rep(c("Level A", "Level B", "Level C"), 3),
#'     levels = params
#'   )
#' )
#' rel_var(
#'   df = adlb_tbl,
#'   var_name = "PARAMCD",
#'   var_values = c("A", "B", "C"),
#'   related_var = "PARAM"
#' )
rel_var <- function(df, var_name, related_var, var_values = NULL) {
  checkmate::assert_data_frame(df)
  checkmate::assert_string(var_name)
  checkmate::assert_string(related_var)
  n_relvar1 <- length(unique(df[, related_var, drop = TRUE]))
  checkmate::assert_vector(var_values, null.ok = TRUE, len = n_relvar1, any.missing = FALSE)
  if (is.null(var_values)) var_values <- rep(NA, n_relvar1)

  relvar1 <- unique(df[, related_var, drop = TRUE])
  relvar2_values <- rep(NA, nrow(df))
  for (r in seq_len(n_relvar1)) {
    matched <- which(df[, related_var, drop = TRUE] == relvar1[r])
    relvar2_values[matched] <- var_values[r]
  }
  df[[var_name]] <- factor(relvar2_values)
  return(df)
}

#' Create Visit Schedule
#'
#' Create a visit schedule as a factor.
#'
#' X number of visits, or X number of cycles and Y number of days.
#'
#' @inheritParams argument_convention
#'
#' @return A factor of length `n_assessments`.
#' @export
#'
#' @examples
#' visit_schedule(visit_format = "WEeK", n_assessments = 10L)
#' visit_schedule(visit_format = "CyCLE", n_assessments = 5L, n_days = 2L)
visit_schedule <- function(visit_format = "WEEK",
                           n_assessments = 10L,
                           n_days = 5L) {
  checkmate::assert_string(visit_format, pattern = "^WEEK$|^CYCLE$", ignore.case = TRUE)
  checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
  checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)

  if (toupper(visit_format) == "WEEK") {
    # numeric vector of n assessments/cycles/days
    assessments <- 1:n_assessments
    # numeric vector for ordering including screening (-1) and baseline (0) place holders
    assessments_ord <- -1:n_assessments
    # character vector of nominal visit values
    visit_values <- c("SCREENING", "BASELINE", paste(toupper(visit_format), assessments, "DAY", (assessments * 7) + 1))
  } else if (toupper(visit_format) == "CYCLE") {
    cycles <- sort(rep(1:n_assessments, times = 1, each = n_days))
    days <- rep(seq(1:n_days), times = n_assessments, each = 1)
    assessments_ord <- 0:(n_assessments * n_days)
    visit_values <- c("SCREENING", paste(toupper(visit_format), cycles, "DAY", days))
  }

  # create and order factor variable to return from function
  visit_values <- stats::reorder(factor(visit_values), assessments_ord)
}

#' Primary Keys: Retain Values
#'
#' Retain values within primary keys.
#'
#' @param df (`data.frame`)\cr Data frame in which to apply the retain.
#' @param value_var (`any`)\cr Variable in `df` containing the value to be retained.
#' @param event (`expression`)\cr Expression returning a logical value to trigger the retain.
#' @param outside (`any`)\cr Additional value to retain. Defaults to `NA`.
#' @return A vector of values where expression is true.
#' @keywords internal
retain <- function(df, value_var, event, outside = NA) {
  indices <- c(1, which(event == TRUE), nrow(df) + 1)
  values <- c(outside, value_var[event == TRUE])
  rep(values, diff(indices))
}

#' Primary Keys: Labels
#'
#' @description Shallow copy of `formatters::var_relabel()`. Used mainly internally to
#'   relabel a subset of variables in a data set.
#'
#' @param x (`data.frame`)\cr Data frame containing variables to which labels are applied.
#' @param ... (`named character`)\cr Name-Value pairs, where name corresponds to a variable
#'   name in `x` and the value to the new variable label.
#' @return x (`data.frame`)\cr Data frame with labels applied.
#'
#' @keywords internal
rcd_var_relabel <- function(x, ...) {
  stopifnot(is.data.frame(x))
  if (missing(...)) {
    return(x)
  }
  dots <- list(...)
  varnames <- names(dots)
  if (is.null(varnames)) {
    stop("missing variable declarations")
  }
  map_varnames <- match(varnames, colnames(x))
  if (any(is.na(map_varnames))) {
    stop("variables: ", paste(varnames[is.na(map_varnames)], collapse = ", "), " not found")
  }
  if (any(vapply(dots, Negate(is.character), logical(1)))) {
    stop("all variable labels must be of type character")
  }
  for (i in seq_along(map_varnames)) {
    attr(x[[map_varnames[[i]]]], "label") <- dots[[i]]
  }
  x
}

#' Apply Metadata
#'
#' Apply label and variable ordering attributes to domains.
#'
#' @param df (`data.frame`)\cr Data frame to which metadata is applied.
#' @param filename (`yaml`)\cr File containing domain metadata.
#' @param add_adsl (`logical`)\cr Should ADSL data be merged to domain.
#' @param adsl_filename (`yaml`)\cr File containing ADSL metadata.
#' @return Data frame with metadata applied.
#'
#' @export
#' @examples
#' seed <- 1
#' adsl <- radsl(seed = seed)
#' adsub <- radsub(adsl, seed = seed)
#' yaml_path <- file.path(path.package("random.cdisc.data"), "inst", "metadata")
#' adsl <- apply_metadata(adsl, file.path(yaml_path, "ADSL.yml"), FALSE)
#' adsub <- apply_metadata(
#'   adsub, file.path(yaml_path, "ADSUB.yml"), TRUE,
#'   file.path(yaml_path, "ADSL.yml")
#' )
apply_metadata <- function(df, filename, add_adsl = TRUE, adsl_filename = "metadata/ADSL.yml") {
  checkmate::assert_data_frame(df)
  checkmate::assert_string(filename)
  checkmate::assert_flag(add_adsl)
  checkmate::assert_string(adsl_filename)

  apply_type <- function(df, var, type) {
    if (is.null(type)) {
      return()
    }

    if (type == "character" && !is.character(df[[var]])) {
      df[[var]] <- as.character(df[[var]])
    } else if (type == "factor" && !is.factor(df[[var]])) {
      df[[var]] <- as.factor(df[[var]])
    } else if (type == "integer" && !is.integer(df[[var]])) {
      df[[var]] <- as.integer(df[[var]])
    } else if (type == "numeric" && !is.numeric(df[[var]])) {
      df[[var]] <- as.numeric(df[[var]])
    } else if (type == "logical" && !is.logical(df[[var]])) {
      df[[var]] <- as.logical(df[[var]])
    } else if (type == "datetime" && !lubridate::is.POSIXct(df[[var]])) {
      df[[var]] <- as.POSIXct(df[[var]])
    } else if (type == "date" && !lubridate::is.Date(df[[var]])) {
      df[[var]] <- as.Date(df[[var]])
    }
    return(df)
  }

  # remove existing attributes
  for (i in base::setdiff(names(attributes(df)), names(attributes(data.frame())))) {
    attr(df, i) <- NULL
  }

  # get metadata
  metadata <- yaml::yaml.load_file(system.file(filename, package = "random.cdisc.data"))
  adsl_metadata <- if (add_adsl) {
    yaml::yaml.load_file(system.file(adsl_filename, package = "random.cdisc.data"))
  } else {
    NULL
  }
  metadata_variables <- append(adsl_metadata$variables, metadata$variables)
  metadata_varnames <- names(metadata_variables)

  # find variables that does not have labels and are not it metadata
  missing_vars_map <- vapply(
    names(df),
    function(x) {
      !(x %in% c("STUDYID", "USUBJID", metadata_varnames)) && is.null(attr(df[[x]], "label"))
    },
    logical(1)
  )
  missing_vars <- names(df)[missing_vars_map]
  if (length(missing_vars) > 0) {
    msg <- paste0(
      "Following variables does not have label or are not found in ",
      filename,
      ": ",
      paste0(missing_vars, collapse = ", ")
    )
    warning(msg)
  }

  if (!all(metadata_varnames %in% names(df))) {
    metadata_varnames <- metadata_varnames[metadata_varnames %in% names(df)]
  }

  # assign labels to variables
  for (var in metadata_varnames) {
    df <- apply_type(df, var, metadata_variables[[var]]$type)
    attr(df[[var]], "label") <- metadata_variables[[var]]$label
  }

  # reorder data frame columns to expected BDS order
  df <- df[, unique(c("STUDYID", "USUBJID", metadata_varnames, names(df)))]

  # assign label to data frame
  attr(df, "label") <- metadata$domain$label

  df
}

#' Replace Values in a Vector by NA
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Randomized replacement of values by `NA`.
#'
#' @inheritParams argument_convention
#' @param v (`any`)\cr Vector of any type.
#' @param percentage (`proportion`)\cr Value between 0 and 1 defining
#'   how much of the vector shall be replaced by `NA`. This number
#'   is randomized by +/- 5% to have full randomization.
#'
#' @return The input vector `v` where a certain number of values are replaced by `NA`.
#'
#' @export
replace_na <- function(v, percentage = 0.05, seed = NULL) {
  checkmate::assert_number(percentage, lower = 0, upper = 1)

  if (percentage == 0) {
    return(v)
  }

  if (!is.null(seed) && !is.na(seed)) {
    set.seed(seed)
  }

  # randomize the percentage
  ind <- sample(seq_along(v), round(length(v) * percentage))

  v[ind] <- NA

  return(v)
}

#' Replace Values with NA
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Replace column values with `NA`s.
#'
#' @inheritParams argument_convention
#' @param ds (`data.frame`)\cr Any data set.
#'
#' @return dataframe without `NA` values.
#'
#' @export
mutate_na <- function(ds, na_vars = NULL, na_percentage = 0.05) {
  if (!is.null(na_vars)) {
    stopifnot(is.list(na_vars)) # any list is OK; as values can be left NA
    stopifnot(length(names(na_vars)) == length(na_vars)) # names for all elements
  } else {
    na_vars <- names(ds)
  }

  stopifnot(is.numeric(na_percentage))
  stopifnot(na_percentage >= 0 && na_percentage < 1)

  for (na_var in names(na_vars)) {
    if (!is.na(na_var)) {
      if (!na_var %in% names(ds)) {
        warning(paste(na_var, "not in column names"))
      } else {
        ds <- ds %>%
          ungroup_rowwise_df() %>%
          dplyr::mutate(
            !!na_var := ds[[na_var]] %>%
              replace_na(
                percentage = ifelse(is.na(na_vars[[na_var]][2]), na_percentage, na_vars[[na_var]][2]),
                seed = na_vars[[na_var]][1]
              )
          )
      }
    }
  }
  return(ds)
}

ungroup_rowwise_df <- function(x) {
  class(x) <- c("tbl", "tbl_df", "data.frame")
  return(x)
}

#' Zero-Truncated Poisson Distribution
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This generates random numbers from a zero-truncated Poisson distribution,
#' i.e. from `X | X > 0` when `X ~ Poisson(lambda)`. The advantage here is that
#' we guarantee to return exactly `n` numbers and without using a loop internally.
#' This solution was provided in a post by
#' [Peter Dalgaard](https://stat.ethz.ch/pipermail/r-help/2005-May/070680.html).
#'
#' @param n (`numeric`)\cr Number of random numbers.
#' @param lambda (`numeric`)\cr Non-negative mean(s).
#'
#' @return The random numbers.
#' @export
#'
#' @examples
#' x <- rpois(1e6, lambda = 5)
#' x <- x[x > 0]
#' hist(x)
#'
#' y <- rtpois(1e6, lambda = 5)
#' hist(y)
rtpois <- function(n, lambda) {
  stats::qpois(stats::runif(n, stats::dpois(0, lambda), 1), lambda)
}

#' Truncated Exponential Distribution
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This generates random numbers from a truncated Exponential distribution,
#' i.e. from `X | X > l` or `X | X < r` when `X ~ Exp(rate)`. The advantage here is that
#' we guarantee to return exactly `n` numbers and without using a loop internally.
#' This can be derived from the quantile functions of the left- and right-truncated
#' Exponential distributions.
#'
#' @param n (`numeric`)\cr Number of random numbers.
#' @param rate (`numeric`)\cr Non-negative rate.
#' @param l (`numeric`)\cr Positive left-hand truncation parameter.
#' @param r (`numeric`)\cr Positive right-hand truncation parameter.
#'
#' @return The random numbers. If neither `l` nor `r` are provided then the usual Exponential
#'  distribution is used.
#' @export
#'
#' @examples
#' x <- stats::rexp(1e6, rate = 5)
#' x <- x[x > 0.5]
#' hist(x)
#'
#' y <- rtexp(1e6, rate = 5, l = 0.5)
#' hist(y)
#'
#' z <- rtexp(1e6, rate = 5, r = 0.5)
#' hist(z)
rtexp <- function(n, rate, l = NULL, r = NULL) {
  if (!is.null(l)) {
    l - log(1 - stats::runif(n)) / rate
  } else if (!is.null(r)) {
    -log(1 - stats::runif(n) * (1 - exp(-r * rate))) / rate
  } else {
    stats::rexp(n, rate)
  }
}

#' Hy's Law Analysis Dataset (ADHY)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Hy's Law Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per subject per parameter per analysis visit per analysis date.
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADTM`, `SRCSEQ`
#
#' @inheritParams argument_convention
#' @template param_cached
#' @templateVar data adhy
#'
#' @return `data.frame`
#' @export
#'
#' @author wojciakw
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adhy <- radhy(adsl, seed = 2)
#' adhy
radhy <- function(adsl,
                  param = c(
                    "TBILI <= 2 times ULN and ALT value category",
                    "TBILI > 2 times ULN and AST value category",
                    "TBILI > 2 times ULN and ALT value category",
                    "TBILI <= 2 times ULN and AST value category",
                    "TBILI > 2 times ULN and ALKPH <= 2 times ULN and ALT value category",
                    "TBILI > 2 times ULN and ALKPH <= 2 times ULN and AST value category",
                    "TBILI > 2 times ULN and ALKPH <= 5 times ULN and ALT value category",
                    "TBILI > 2 times ULN and ALKPH <= 5 times ULN and AST value category",
                    "TBILI <= 2 times ULN and two consecutive elevations of ALT in relation to ULN",
                    "TBILI > 2 times ULN and two consecutive elevations of AST in relation to ULN",
                    "TBILI <= 2 times ULN and two consecutive elevations of AST in relation to ULN",
                    "TBILI > 2 times ULN and two consecutive elevations of ALT in relation to ULN",
                    "TBILI > 2 times ULN and two consecutive elevations of ALT in relation to Baseline",
                    "TBILI <= 2 times ULN and two consecutive elevations of ALT in relation to Baseline",
                    "TBILI > 2 times ULN and two consecutive elevations of AST in relation to Baseline",
                    "TBILI <= 2 times ULN and two consecutive elevations of AST in relation to Baseline",
                    "ALT > 3 times ULN by Period",
                    "AST > 3 times ULN by Period",
                    "ALT or AST > 3 times ULN by Period",
                    "ALT > 3 times Baseline by Period",
                    "AST > 3 times Baseline by Period",
                    "ALT or AST > 3 times Baseline by Period"
                  ),
                  paramcd = c(
                    "BLAL",
                    "BGAS",
                    "BGAL",
                    "BLAS",
                    "BA2AL",
                    "BA2AS",
                    "BA5AL",
                    "BA5AS",
                    "BL2AL2CU",
                    "BG2AS2CU",
                    "BL2AS2CU",
                    "BG2AL2CU",
                    "BG2AL2CB",
                    "BL2AL2CB",
                    "BG2AS2CB",
                    "BL2AS2CB",
                    "ALTPULN",
                    "ASTPULN",
                    "ALTASTPU",
                    "ALTPBASE",
                    "ASTPBASE",
                    "ALTASTPB"
                  ),
                  seed = NULL,
                  cached = FALSE) {
  checkmate::assert_flag(cached)

  if (cached) {
    return(get_cached_data("cadhy"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)

  # validate and initialize related variables
  param_init_list <- relvar_init(param, paramcd)

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  # create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT
  adhy <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = adsl$USUBJID,
    PARAM = as.factor(param_init_list$relvar1),
    AVISIT = as.factor(c("BASELINE", "POST-BASELINE")),
    APERIODC = as.factor(c("PERIOD 1", "PERIOD 2")),
    stringsAsFactors = FALSE
  )

  # remove records that are not needed and were created as a side product of expand.grid above
  adhy <- dplyr::filter(adhy, !(AVISIT == "BASELINE" & APERIODC == "PERIOD 2"))

  # define TBILI ALT/AST params, period dependent parameters and the parameters that will be assigned values "Y" or "N"
  paramcd_tbilialtast <- c("BLAL", "BGAS", "BGAL", "BLAS", "BA2AL", "BA2AS", "BA5AL", "BA5AS")
  paramcd_by_period <- c("ALTPULN", "ASTPULN", "ALTASTPU", "ALTPBASE", "ASTPBASE", "ALTASTPB")
  paramcd_yn <- c(
    "BL2AL2CU", "BG2AS2CU", "BL2AS2CU", "BG2AL2CU", "BG2AL2CB", "BL2AL2CB", "BG2AS2CB", "BL2AS2CB",
    paramcd_by_period
  )

  # add other variables to adhy
  adhy <- adhy %>%
    rel_var(
      var_name = "PARAMCD",
      related_var = "PARAM",
      var_values = param_init_list$relvar2
    ) %>%
    dplyr::mutate(
      AVALC = dplyr::case_when(
        PARAMCD %in% paramcd_tbilialtast ~ sample(
          x = c(">3-5ULN", ">5-10ULN", ">10-20ULN", ">20ULN", "Criteria not met"), size = dplyr::n(), replace = TRUE
        ),
        PARAMCD %in% paramcd_yn ~ sample(
          x = c("Y", "N"), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE
        )
      ),
      AVAL = dplyr::case_when(
        AVALC == ">3-5ULN" ~ 1,
        AVALC == ">5-10ULN" ~ 2,
        AVALC == ">10-20ULN" ~ 3,
        AVALC == ">20ULN" ~ 4,
        AVALC == "Y" ~ 1,
        AVALC == "N" ~ 0,
        AVALC == "Criteria not met" ~ 0
      ),
      AVISITN = dplyr::case_when(
        AVISIT == "BASELINE" ~ 0L,
        AVISIT == "POST-BASELINE" ~ 9995L,
        TRUE ~ NA_integer_
      ),
      APERIOD = dplyr::case_when(
        APERIODC == "PERIOD 1" ~ 1L,
        APERIODC == "PERIOD 2" ~ 2L,
        TRUE ~ NA_integer_
      ),
      ABLFL = dplyr::if_else(AVISIT == "BASELINE", "Y", NA_character_),
      ONTRTFL = dplyr::if_else(AVISIT == "POST-BASELINE", "Y", NA_character_),
      ANL01FL = "Y",
      SRCSEQ = NA_integer_
    )

  # remove records for parameters with period 2 and not in paramcd_by_period
  adhy <- dplyr::filter(adhy, PARAMCD %in% paramcd_by_period | APERIODC == "PERIOD 1")

  # add baseline variables
  adhy <- adhy %>%
    dplyr::group_by(USUBJID, PARAMCD) %>%
    dplyr::mutate(
      BASEC = AVALC[AVISIT == "BASELINE"],
      BASE = AVAL[AVISIT == "BASELINE"]
    ) %>%
    dplyr::ungroup()

  adhy <- adhy %>%
    rcd_var_relabel(
      STUDYID = attr(adsl$STUDYID, "label"),
      USUBJID = attr(adsl$USUBJID, "label")
    )

  # merge ADSL to be able to add analysis datetime and analysis relative day variables
  adhy <- dplyr::inner_join(adhy, adsl, by = c("STUDYID", "USUBJID"))

  # define a simple helper function to create ADY variable
  add_ady <- function(x, avisit) {
    if (avisit == "BASELINE") {
      dplyr::mutate(
        x,
        ADY = sample(x = -(1:14), size = dplyr::n(), replace = TRUE)
      )
    } else if (avisit == "POST-BASELINE") {
      dplyr::rowwise(x) %>%
        dplyr::mutate(ADY = as.integer(sample(
          dplyr::if_else(
            !is.na(TRTEDTM),
            as.numeric(difftime(TRTEDTM, TRTSDTM, units = "days")),
            as.numeric(study_duration_secs, "days")
          ),
          size = 1,
          replace = TRUE
        )))
    } else {
      dplyr::mutate(x, ADY = NA_integer_)
    }
  }

  # add ADY and ADTM variables
  adhy <- adhy %>%
    dplyr::group_by(AVISIT, .add = FALSE) %>%
    dplyr::group_modify(~ add_ady(.x, .y$AVISIT)) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(ADTM = TRTSDTM + lubridate::days(ADY))

  # order columns and arrange rows; column order follows ADaM_1.1 specification
  adhy <-
    adhy[, c(
      colnames(adsl),
      "PARAM",
      "PARAMCD",
      "AVAL",
      "AVALC",
      "BASE",
      "BASEC",
      "ABLFL",
      "ADTM",
      "ADY",
      "AVISIT",
      "AVISITN",
      "APERIOD",
      "APERIODC",
      "ONTRTFL",
      "SRCSEQ",
      "ANL01FL"
    )]

  adhy <- adhy %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      PARAMCD,
      AVISITN,
      ADTM,
      SRCSEQ
    )

  # apply metadata
  adhy <- apply_metadata(adhy, "metadata/ADHY.yml")

  return(adhy)
}

#' EORTC QLQ-C30 V3 Analysis Dataset (ADQLQC)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random EORTC QLQ-C30 V3 Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details
#'
#' Keys: `STUDYID`, `USUBJID`, `PARCAT1N`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `ADTM`, `QSSEQ`
#'
#' @inheritParams argument_convention
#' @param percent (`numeric`)\cr Completion - Completed at least y percent of questions, 1 record per visit
#' @param number (`numeric`)\cr Completion - Completed at least x question(s), 1 record per visit
#' @template param_cached
#' @templateVar data adqlqc
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1)
#'
#' adqlqc <- radqlqc(adsl, seed = 1, percent = 80, number = 2)
#' adqlqc
radqlqc <- function(adsl,
                    percent,
                    number,
                    seed = NULL,
                    cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadqlqc"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_number(percent, lower = 1, upper = 100)
  checkmate::assert_number(number, lower = 1)

  if (!is.null(seed)) {
    set.seed(seed)
  }

  # ADQLQC data -------------------------------------------------------------
  qs <- get_qs_data(adsl, n_assessments = 5L, seed = seed, na_percentage = 0.1)
  # prepare ADaM ADQLQC data
  adqlqc1 <- prep_adqlqc(df = qs)
  # derive AVAL and AVALC
  adqlqc1 <- mutate(
    adqlqc1,
    AVAL = as.numeric(QSSTRESC),
    AVALC = case_when(
      QSTESTCD == "QSALL" ~ QSREASND,
      TRUE ~ QSORRES
    ),
    AVISIT = VISIT,
    AVISITN = VISITNUM,
    ADTM = QSDTC
  )
  # include scale calculation
  adqlqc_tmp <- calc_scales(adqlqc1)
  # order to prepare for change from screening and baseline values
  adqlqc_tmp <- adqlqc_tmp[order(adqlqc_tmp$STUDYID, adqlqc_tmp$USUBJID, adqlqc_tmp$PARAMCD, adqlqc_tmp$AVISITN), ]

  adqlqc_tmp <- Reduce(
    rbind,
    lapply(
      split(adqlqc_tmp, adqlqc_tmp$USUBJID),
      function(x) {
        x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
        x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "")
        x$ABLFL <- ifelse(
          x$AVISIT == "BASELINE" &
            x$PARAMCD != "EX028",
          "Y",
          ifelse(
            x$AVISIT == "CYCLE 1 DAY 1" &
              x$PARAMCD != "EX028",
            "Y",
            ""
          )
        )
        x
      }
    )
  )

  adqlqc_tmp$BASE2 <- ifelse(
    str_detect(adqlqc_tmp$PARCAT2, "Completion", negate = TRUE),
    retain(
      df = adqlqc_tmp,
      value_var = adqlqc_tmp$AVAL,
      event = adqlqc_tmp$ABLFL2 == "Y"
    ),
    NA
  )

  adqlqc_tmp$BASE <- ifelse(
    adqlqc_tmp$ABLFL2 != "Y" &
      str_detect(adqlqc_tmp$PARCAT2, "Completion", negate = TRUE),
    retain(
      adqlqc_tmp,
      adqlqc_tmp$AVAL,
      adqlqc_tmp$ABLFL == "Y"
    ),
    NA
  )

  adqlqc_tmp <- adqlqc_tmp %>%
    dplyr::mutate(CHG2 = AVAL - BASE2) %>%
    dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>%
    dplyr::mutate(CHG = AVAL - BASE) %>%
    dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>%
    rcd_var_relabel(
      STUDYID = attr(adsl$STUDYID, "label"),
      USUBJID = attr(adsl$USUBJID, "label")
    )
  # derive CHGCAT1 ----------------------------------------------------------
  adqlqc_tmp <- derv_chgcat1(dataset = adqlqc_tmp)

  adqlqc_tmp <- rcd_var_relabel(
    adqlqc_tmp,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  adqlqc_tmp <- arrange(
    adqlqc_tmp,
    USUBJID,
    AVISITN
  )
  # Merge ADSL --------------------------------------------------------------
  # ADSL variables needed for ADQLQC
  adsl_vars <- c(
    "STUDYID", "USUBJID", "SUBJID", "SITEID", "REGION1", "COUNTRY", "ETHNIC", "AGE",
    "AGEU", "AAGE", "AAGEU", "AGEGR1", "AGEGR2", "AGEGR3", "STRATwNM", "STRATw", "STRATwV",
    "SEX", "RACE", "ITTFL", "SAFFL", "PPROTFL", "TRT01P", "TRT01A",
    "TRTSEQP", "TRTSEQA", "TRTSDTM", "TRTSDT", "TRTEDTM", "TRTEDT", "DCUTDT"
  )
  adsl <- select(
    adsl,
    any_of(adsl_vars)
  )
  adqlqc <- dplyr::inner_join(
    adqlqc_tmp,
    adsl,
    by = c("STUDYID", "USUBJID")
  ) %>%
    dplyr::mutate(
      ADY_der = ceiling(difftime(ADTM, TRTSDTM, units = "days")),
      ADY = case_when(
        ADY_der >= 0 ~ ADY_der + 1,
        TRUE ~ ADY_der
      )
    ) %>%
    select(-ADY_der)

  # get compliance data ---------------------------------------------------
  compliance_data <- comp_derv(
    dataset = adqlqc,
    percent = percent,
    number = number
  )
  # add ADSL variables
  compliance_data <- left_join(
    compliance_data,
    adsl,
    by = c("STUDYID", "USUBJID")
  )
  # add completion to ADQLQC
  adqlqc <- bind_rows(
    adqlqc,
    compliance_data
  ) %>%
    arrange(
      USUBJID,
      AVISITN,
      QSTESTCD
    )
  # find first set of questionnaire observations
  adqlqc_x <- arrange(
    adqlqc,
    USUBJID,
    ADTM
  ) %>%
    filter(
      PARAMCD != "QSALL" &
        !str_detect(AVISIT, "SCREENING|UNSCHEDULED")
    ) %>%
    group_by(
      USUBJID,
      ADTM
    ) %>%
    summarise(first_date = first(ADTM), .groups = "drop")

  adqlqc <- left_join(
    adqlqc,
    adqlqc_x,
    by = c("USUBJID", "ADTM")
  ) %>%
    mutate(
      ANL01FL = case_when(
        PARAMCD != "QSALL" & ABLFL == "Y" ~ "Y",
        PARAMCD != "QSALL" &
          !str_detect(AVISIT, "UNSCHEDULED") &
          !is.na(first_date) ~ "Y"
      )
    ) %>%
    select(-first_date)

  # final dataset -----------------------------------------------------------
  adqlqc_final <- adqlqc %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(ASEQ = row_number()) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      AVISITN
    ) %>%
    select(
      -c("BASE2", "CHG2", "PCHG2", "ABLFL2")
    ) %>%
    ungroup()

  adam_vars <- c(
    adsl_vars, "QSSEQ", "QSCAT", "QSSCAT", "QSDTC", "QSSPID", "QSSTAT", "QSSTRESN",
    "QSSTRESC", "QSSTRESU", "QSORRES", "QSORRESU", "QSTEST", "QSTESTCD", "QSTPT",
    "QSTPTNUM", "QSTPTREF", "QSDY", "QSREASND", "QSTSTDTL", "QSEVAL", "VISIT", "VISITNUM",
    "PARAM", "PARAMCD", "PARCAT1", "PARCAT1N", "PARCAT2", "AVAL", "AVALC", "AREASND",
    "BASE", "BASETYPE", "ABLFL", "CHG", "PCHG", "CHGCAT1", "CRIT1", "CRIT1FL", "DTYPE",
    "ADTM", "ADT", "ADY", "ADTF", "ATMF", "ATPT", "ATPTN", "AVISIT", "AVISITN", "APHASE",
    "APHASEN", "APERIOD", "APERIODC", "APERIODC", "ASPER", "ASPERC", "PERADY", "TRTP",
    "TRTA", "ONTRTFL", "LAST02FL", "FIRS02FL", "ANL01FL", "ANL02FL", "ANL03FL",
    "ANL04FL", "CGCAT1NX"
  )
  # order variables in mapped qs by variables in adam_vars
  adqlqc_name_ordered <- names(adqlqc_final)[order(match(names(adqlqc_final), adam_vars))]
  # adqlqc with variables ordered per gdsr
  adqlqc_final <- adqlqc_final %>%
    select(
      any_of(adqlqc_name_ordered)
    )

  adqlqc_final <- relocate(adqlqc_final, "QSEVLINT", .after = "QSTESTCD") %>%
    arrange(
      USUBJID,
      AVISITN,
      ASEQ,
      QSTESTCD
    )
  # apply metadata
  adqlqc_final <- apply_metadata(adqlqc_final, "metadata/ADQLQC.yml")
  return(adqlqc_final)
}

#' Helper Functions for Constructing ADQLQC
#'
#' Internal functions used by `radqlqc`.
#'
#' @inheritParams argument_convention
#' @inheritParams radqlqc
#'
#' @examples
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1)
#' adqlqc <- radqlqc(adsl, seed = 1, percent = 80, number = 2)
#'
#' @name h_adqlqc
NULL

#' @describeIn h_adqlqc Questionnaires EORTC QLQ-C30 V3.0 SDTM (QS)
#'
#' Function for generating random Questionnaires SDTM domain
#'
#' @return a dataframe with SDTM questionnaire data
#' @keywords internal
get_qs_data <- function(adsl,
                        visit_format = "CYCLE",
                        n_assessments = 5L,
                        n_days = 1L,
                        lookup = NULL,
                        seed = NULL,
                        na_percentage = 0,
                        na_vars = list(
                          QSORRES = c(1234, 0.2),
                          QSSTRESC = c(1234, 0.2)
                        )) {
  load(system.file("sysdata.rda", package = "random.cdisc.data"))
  checkmate::assert_string(visit_format)
  checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
  checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  # get subjects for QS data from ADSL
  # get studyid, subject for QS generation
  qs <- select(
    adsl,
    STUDYID,
    USUBJID
  ) %>%
    mutate(
      DOMAIN = "QS"
    )

  # QS prep -----------------------------------------------------------------
  # get questionnaire function for QS
  # QSTESTCD: EOR0101 to EOR0130
  eortc_qlq_c30_sub <- filter(
    eortc_qlq_c30,
    as.numeric(str_extract(QSTESTCD, "\\d+$")) >= 101 &
      as.numeric(str_extract(QSTESTCD, "\\d+$")) <= 130
  ) %>%
    select(-publication_name)

  # validate and initialize QSTEST vectors
  qstest_init_list <- relvar_init(
    unique(eortc_qlq_c30_sub$QSTEST),
    unique(eortc_qlq_c30_sub$QSTESTCD)
  )

  if (!is.null(seed)) {
    set.seed(seed)
  }

  checkmate::assert_data_frame(lookup, null.ok = TRUE)

  lookup_qs <- if (!is.null(lookup)) {
    lookup
  } else {
    expand.grid(
      STUDYID = unique(qs$STUDYID),
      USUBJID = qs$USUBJID,
      QSTEST = qstest_init_list$relvar1,
      VISIT = visit_schedule(
        visit_format = visit_format,
        n_assessments = n_assessments,
        n_days = n_days
      ),
      stringsAsFactors = FALSE
    )
  }

  # assign related variable values: QSTESTxQSTESTCD are related
  lookup_qs <- lookup_qs %>% rel_var(
    var_name = "QSTESTCD",
    related_var = "QSTEST",
    var_values = qstest_init_list$relvar2
  )

  lookup_qs <- left_join(
    lookup_qs,
    eortc_qlq_c30_sub,
    by = c(
      "QSTEST",
      "QSTESTCD"
    ),
    multiple = "all",
    relationship = "many-to-many"
  )

  lookup_qs <- dplyr::mutate(
    lookup_qs,
    VISITNUM = dplyr::case_when(
      VISIT == "SCREENING" ~ -1,
      VISIT == "BASELINE" ~ 0,
      (grepl("^WEEK", VISIT) | grepl("^CYCLE", VISIT)) ~ as.numeric(VISIT) - 2,
      TRUE ~ NA_real_
    )
  ) %>% arrange(USUBJID)

  # # prep QSALL --------------------------------------------------------------
  # get last subject and visit for QSALL
  last_subj_vis <- select(lookup_qs, USUBJID, VISIT) %>%
    distinct() %>%
    slice(n())
  last_subj_vis_full <- filter(
    lookup_qs,
    USUBJID == last_subj_vis$USUBJID,
    VISIT == last_subj_vis$VISIT
  )

  qsall_data1 <- tibble::tibble(
    STUDYID = unique(last_subj_vis_full$STUDYID),
    USUBJID = unique(last_subj_vis_full$USUBJID),
    VISIT = unique(last_subj_vis_full$VISIT),
    VISITNUM = unique(last_subj_vis_full$VISITNUM),
    QSTESTCD = "QSALL",
    QSTEST = "Questionnaires",
    QSSTAT = "NOT DONE",
    QSREASND = "SUBJECT REFUSED"
  )

  # remove last subject and visit from main data
  lookup_qs_sub <- anti_join(
    lookup_qs,
    last_subj_vis_full,
    by = c("USUBJID", "VISIT")
  )

  set.seed(seed)
  lookup_qs_sub_x <- lookup_qs_sub %>%
    group_by(
      USUBJID,
      QSTESTCD,
      VISIT
    ) %>%
    slice_sample(n = 1) %>%
    ungroup() %>%
    as.data.frame()

  lookup_qs_sub_x <- arrange(
    lookup_qs_sub_x,
    USUBJID,
    VISITNUM
  )

  # add date: QSDTC ---------------------------------------------------------
  # get treatment dates from ADSL
  adsl_trt <- select(
    adsl,
    USUBJID,
    TRTSDTM,
    TRTEDTM
  )
  # use to derive QSDTC
  # if no treatment end date, create an arbituary one
  trt_end_date <- max(adsl_trt$TRTEDTM, na.rm = TRUE)

  lookup_qs_sub_x <- left_join(
    lookup_qs_sub_x,
    adsl_trt,
    by = "USUBJID"
  ) %>%
    group_by(
      USUBJID
    ) %>%
    mutate(QSDTC = get_random_dates_between(
      from = TRTSDTM,
      to = ifelse(
        is.na(TRTEDTM),
        trt_end_date,
        TRTEDTM
      ),
      visit_id = VISITNUM
    )) %>%
    select(-c("TRTSDTM", "TRTEDTM"))

  # filter out subjects with missing dates
  lookup_qs_sub_x1 <- filter(
    lookup_qs_sub_x,
    !is.na(QSDTC)
  )

  # subjects with missing dates
  lookup_qs_sub_x2 <- filter(
    lookup_qs_sub_x,
    is.na(QSDTC)
  ) %>%
    select(
      STUDYID,
      USUBJID,
      VISIT,
      VISITNUM
    ) %>%
    distinct()

  # generate QSALL for subjects with missing dates
  qsall_data2 <- mutate(
    lookup_qs_sub_x2,
    QSTESTCD = "QSALL",
    QSTEST = "Questionnaires",
    QSSTAT = "NOT DONE",
    QSREASND = "SUBJECT REFUSED"
  )

  # add qsall data to original item data
  lookup_qs_sub_all <- bind_rows(
    lookup_qs_sub_x1,
    qsall_data1,
    qsall_data2
  )

  qs_all <- lookup_qs_sub_all %>%
    arrange(
      STUDYID,
      USUBJID,
      VISITNUM
    ) %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::ungroup()

  # get first and second subject ids
  first_second_subj <- select(qs_all, USUBJID) %>%
    distinct() %>%
    slice(1:2)

  qs1 <- filter(
    qs_all,
    USUBJID %in% first_second_subj$USUBJID
  )

  if (length(na_vars) > 0 && na_percentage > 0) {
    qs1 <- mutate_na(ds = qs1, na_vars = na_vars, na_percentage = na_percentage)
  }

  # QSSTAT = NOT DONE
  qs1 <- mutate(
    qs1,
    QSSTAT = case_when(
      is.na(QSORRES) & is.na(QSSTRESC) ~ "NOT DONE"
    )
  )

  # remove first and second subjects from main data
  qs2 <- anti_join(
    qs_all,
    qs1,
    by = c("USUBJID")
  )

  final_qs <- rbind(
    qs1,
    qs2
  ) %>%
    group_by(USUBJID) %>%
    dplyr::mutate(QSSEQ = row_number()) %>%
    arrange(
      STUDYID,
      USUBJID,
      VISITNUM
    ) %>%
    ungroup()

  # ordered variables as per gdsr
  final_qs <- select(
    final_qs,
    STUDYID,
    USUBJID,
    QSSEQ,
    QSTESTCD,
    QSTEST,
    QSCAT,
    QSSCAT,
    QSORRES,
    QSORRESU,
    QSSTRESC,
    QSSTRESU,
    QSSTAT,
    QSREASND,
    VISITNUM,
    VISIT,
    QSDTC,
    QSEVLINT
  )
  return(final_qs)
}

#' @describeIn h_adqlqc Function for generating random dates between 2 dates
#'
#' @param from (`datetime vector`)\cr Start date/times.
#' @param to (`datetime vector`)\cr End date/times.
#' @param visit_id (`vector`)\cr Visit identifiers.
#'
#' @return Data frame with new randomly generated dates variable.
#' @keywords internal
get_random_dates_between <- function(from, to, visit_id) {
  min_date <- min(lubridate::as_datetime(from), na.rm = TRUE)
  max_date <- max(lubridate::as_datetime(to), na.rm = TRUE)
  date_seq <- seq(from = min_date + lubridate::days(1), to = max_date, by = "28 days")

  visit_ids <- unique(visit_id)
  out <- sapply(visit_ids, simplify = TRUE, USE.NAMES = TRUE, FUN = function(x) {
    if (x == -1) {
      random_days_to_subtract <- lubridate::days(sample(1:10, size = 1))
      min_date - random_days_to_subtract
    } else if (x == 0) {
      min_date
    } else if (x > 0) {
      if (x %in% seq_along(date_seq)) {
        date_seq[[x]]
      } else {
        NA
      }
    }
  })
  lubridate::as_datetime(out[match(visit_id, visit_ids)])
}

#' @describeIn h_adqlqc Prepare ADaM ADQLQC data, adding PARAMCD to SDTM QS data
#'
#' @param df (`data.frame`)\cr SDTM QS dataset.
#'
#' @return `data.frame`
#' @keywords internal
prep_adqlqc <- function(df) {
  # create PARAMCD from QSTESTCD
  adqlqc <- dplyr::mutate(
    df,
    PARAMCD = case_when(
      QSTESTCD == "EOR0101" ~ "QS02801",
      QSTESTCD == "EOR0102" ~ "QS02802",
      QSTESTCD == "EOR0103" ~ "QS02803",
      QSTESTCD == "EOR0104" ~ "QS02804",
      QSTESTCD == "EOR0105" ~ "QS02805",
      QSTESTCD == "EOR0106" ~ "QS02806",
      QSTESTCD == "EOR0107" ~ "QS02807",
      QSTESTCD == "EOR0108" ~ "QS02808",
      QSTESTCD == "EOR0109" ~ "QS02809",
      QSTESTCD == "EOR0110" ~ "QS02810",
      QSTESTCD == "EOR0111" ~ "QS02811",
      QSTESTCD == "EOR0112" ~ "QS02812",
      QSTESTCD == "EOR0113" ~ "QS02813",
      QSTESTCD == "EOR0114" ~ "QS02814",
      QSTESTCD == "EOR0115" ~ "QS02815",
      QSTESTCD == "EOR0116" ~ "QS02816",
      QSTESTCD == "EOR0117" ~ "QS02817",
      QSTESTCD == "EOR0118" ~ "QS02818",
      QSTESTCD == "EOR0119" ~ "QS02819",
      QSTESTCD == "EOR0120" ~ "QS02820",
      QSTESTCD == "EOR0121" ~ "QS02821",
      QSTESTCD == "EOR0122" ~ "QS02822",
      QSTESTCD == "EOR0123" ~ "QS02823",
      QSTESTCD == "EOR0124" ~ "QS02824",
      QSTESTCD == "EOR0125" ~ "QS02825",
      QSTESTCD == "EOR0126" ~ "QS02826",
      QSTESTCD == "EOR0127" ~ "QS02827",
      QSTESTCD == "EOR0128" ~ "QS02828",
      QSTESTCD == "EOR0129" ~ "QS02829",
      QSTESTCD == "EOR0130" ~ "QS02830",
      TRUE ~ QSTESTCD
    )
  )
  load(system.file("sysdata.rda", package = "random.cdisc.data"))
  adqlqc1 <- dplyr::left_join(
    adqlqc,
    gdsr_param_adqlqc,
    by = "PARAMCD"
  )
  return(adqlqc1)
}

#' @describeIn h_adqlqc Scale calculation for ADQLQC data
#'
#' @param adqlqc1 (`data.frame`)\cr Prepared data generated from the [prep_adqlqc()] function.
#'
#' @return `data.frame`
#' @keywords internal
calc_scales <- function(adqlqc1) {
  # Prep scale data ---------------------------------------------------------
  # parcat2 = scales or global health status
  # global health status/scales data
  # QSTESTCD: EOR0131 to EOR0145 (global health status and scales)
  load(system.file("sysdata.rda", package = "random.cdisc.data"))
  eortc_qlq_c30_sub <- filter(
    eortc_qlq_c30,
    !(as.numeric(str_extract(QSTESTCD, "\\d+$")) >= 101 & as.numeric(str_extract(QSTESTCD, "\\d+$")) <= 130)
  ) %>%
    mutate(
      PARAMCD = case_when(
        QSTESTCD == "EOR0131" ~ "QS028QL2",
        QSTESTCD == "EOR0132" ~ "QS028PF2",
        QSTESTCD == "EOR0133" ~ "QS028RF2",
        QSTESTCD == "EOR0134" ~ "QS028EF",
        QSTESTCD == "EOR0135" ~ "QS028CF",
        QSTESTCD == "EOR0136" ~ "QS028SF",
        QSTESTCD == "EOR0137" ~ "QS028FA",
        QSTESTCD == "EOR0138" ~ "QS028NV",
        QSTESTCD == "EOR0139" ~ "QS028PA",
        QSTESTCD == "EOR0140" ~ "QS028DY",
        QSTESTCD == "EOR0141" ~ "QS028SL",
        QSTESTCD == "EOR0142" ~ "QS028AP",
        QSTESTCD == "EOR0143" ~ "QS028CO",
        QSTESTCD == "EOR0144" ~ "QS028DI",
        QSTESTCD == "EOR0145" ~ "QS028FI",
        TRUE ~ QSTESTCD
      )
    ) %>%
    select(-publication_name)

  # ADaM global health status and scales from gdsr
  gdsr_param_adqlqc <- gdsr_param_adqlqc %>%
    filter(
      !str_detect(PARCAT2, "Original Items|Completion")
    )

  ghs_scales <- left_join(
    eortc_qlq_c30_sub,
    gdsr_param_adqlqc,
    by = "PARAMCD"
  )
  # scale data
  df <- data.frame(index = seq_len(nrow(ghs_scales)))
  df$previous <- list(
    c("QS02826", "QS02827"),
    c("QS02811"),
    c("QS02810", "QS02812", "QS02818"),
    c("QS02806", "QS02807"),
    c("QS02814", "QS02815"),
    c("QS02808"),
    c("QS02817"),
    c("QS02816"),
    c("QS02821", "QS02822", "QS02823", "QS02824"),
    c("QS02829", "QS02830"),
    c("QS02813"),
    c("QS02801", "QS02802", "QS02803", "QS02804", "QS02805"),
    c("QS02809", "QS02819"),
    c("QS02820", "QS02825"),
    c("QS02828")
  )
  df$newName <- list(
    "QS028SF",
    "QS028SL",
    "QS028FA",
    "QS028RF2",
    "QS028NV",
    "QS028DY",
    "QS028DI",
    "QS028CO",
    "QS028EF",
    "QS028QL2",
    "QS028AP",
    "QS028PF2",
    "QS028PA",
    "QS028CF",
    "QS028FI"
  )
  df$newNamelabel <- list(
    "EORTC QLQ-C30: Social functioning",
    "EORTC QLQ-C30: Insomnia",
    "EORTC QLQ-C30: Fatigue",
    "EORTC QLQ-C30: Role functioning (revised)",
    "EORTC QLQ-C30: Nausea and vomiting",
    "EORTC QLQ-C30: Dyspnoea",
    "EORTC QLQ-C30: Diarrhoea",
    "EORTC QLQ-C30: Constipation",
    "EORTC QLQ-C30: Emotional functioning",
    "EORTC QLQ-C30: Global health status/QoL (revised)",
    "EORTC QLQ-C30: Appetite loss",
    "EORTC QLQ-C30: Physical functioning (revised)",
    "EORTC QLQ-C30: Pain",
    "EORTC QLQ-C30: Cognitive functioning",
    "EORTC QLQ-C30: Financial difficulties"
  )
  df$newNameCategory <- list(
    "Functional Scales",
    "Symptom Scales",
    "Symptom Scales",
    "Functional Scales",
    "Symptom Scales",
    "Symptom Scales",
    "Symptom Scales",
    "Symptom Scales",
    "Functional Scales",
    "Global Health Status",
    "Symptom Scales",
    "Functional Scales",
    "Symptom Scales",
    "Functional Scales",
    "Symptom Scales"
  )
  df$num_param <- list(
    "1",
    "1",
    "2",
    "1",
    "1",
    "1",
    "1",
    "1",
    "2",
    "1",
    "1",
    "3",
    "1",
    "1",
    "1"
  )
  df$equation <- list(
    "new_value = (1 - ((temp_val/var_length)-1)/3)*100.0",
    "new_value = ((temp_val/var_length-1)/3)*100.0",
    "new_value = ((temp_val/var_length-1)/3)*100.0",
    "new_value = (1 - ((temp_val/var_length)-1)/3)*100.0",
    "new_value = ((temp_val/var_length-1)/3)*100.0",
    "new_value = ((temp_val/var_length-1)/3)*100.0",
    "new_value = ((temp_val/var_length-1)/3)*100.0",
    "new_value = ((temp_val/var_length-1)/3)*100.0",
    "new_value = (1 - ((temp_val/var_length)-1)/3)*100.0",
    "new_value = ((temp_val/var_length-1)/6)*100.0",
    "new_value = ((temp_val/var_length-1)/3)*100.0",
    "new_value = (1 - ((temp_val/var_length)-1)/3)*100.0",
    "new_value = ((temp_val/var_length-1)/3)*100.0",
    "new_value = (1 - ((temp_val/var_length)-1)/3)*100.0",
    "new_value = ((temp_val/var_length-1)/3)*100.0"
  )

  expect_data <- data.frame(
    PARAM = expect$PARAM,
    PARAMCD = expect$PARAMCD,
    PARCAT2 = expect$PARCAT2,
    PARCAT1N = expect$PARCAT1N,
    AVAL = c(0, 1),
    AVALC = c(
      "Not expected to complete questionnaire",
      "Expected to complete questionnaire"
    )
  )

  df_saved <- data.frame()

  unique_id <- unique(adqlqc1$USUBJID)

  for (id in unique_id) {
    id_data <- adqlqc1[adqlqc1$USUBJID == id, ]
    unique_avisit <- unique(id_data$AVISIT)
    for (visit in unique_avisit) {
      if (is.na(visit)) {
        next
      }
      id_data_at_visit <- id_data[id_data$AVISIT == visit, ]

      if (any(id_data_at_visit$PARAMCD != "QSALL")) {
        for (idx in seq_along(df$index)) {
          previous_names <- df$previous[idx]
          current_name <- df$newName[idx]
          current_name_label <- df$newNamelabel[idx]
          current_name_category <- df$newNameCategory[idx]
          eqn <- df$equation[idx]
          temp_val <- 0
          var_length <- 0
          for (param_name in previous_names[[1]]) {
            if (param_name %in% id_data_at_visit$PARAMCD) { ####
              current_val <- as.numeric(as.character(id_data_at_visit$AVAL[id_data_at_visit$PARAMCD == param_name]))
              if (!is.na(current_val)) {
                temp_val <- temp_val + current_val ###
                var_length <- var_length + 1
              }
            } # if
          } # param_name
          # eval
          if (var_length >= as.numeric(df$num_param[idx])) {
            eval(parse(text = eqn)) #####
          } else {
            new_value <- NA
          }

          new_data_row <- data.frame(
            study = str_extract(id, "[A-Z]+[0-9]+"),
            id,
            visit,
            id_data_at_visit$AVISITN[1],
            id_data_at_visit$QSDTC[1],
            current_name_category,
            current_name_label,
            current_name,
            new_value,
            NA,
            stringsAsFactors = FALSE
          )
          colnames(new_data_row) <- c(
            "STUDYID", "USUBJID", "AVISIT", "AVISITN",
            "ADTM", "PARCAT2", "PARAM", "PARAMCD",
            "AVAL", "AVALC"
          ) ###
          df_saved <- rbind(df_saved, new_data_row) #####
        } # idx
      }
      # add expect data
      expect_value <- sample(expect_data$AVAL, 1, prob = c(0.10, 0.90))
      expect_valuec <- expect_data$AVALC[expect_data$AVAL == expect_value]

      new_data_row <- data.frame(
        study = str_extract(id, "[A-Z]+[0-9]+"),
        id,
        visit,
        id_data_at_visit$AVISITN[1],
        datetime = NA,
        expect_data$PARCAT2[1],
        expect_data$PARAM[1],
        expect_data$PARAMCD[1],
        expect_value,
        expect_valuec,
        stringsAsFactors = FALSE
      )
      colnames(new_data_row) <- c(
        "STUDYID", "USUBJID", "AVISIT", "AVISITN",
        "ADTM", "PARCAT2", "PARAM", "PARAMCD", "AVAL",
        "AVALC"
      ) ###
      df_saved <- rbind(df_saved, new_data_row)
    } # visit
  } # id

  df_saved1 <- left_join(
    df_saved,
    ghs_scales,
    by = c(
      "PARAM",
      "PARAMCD",
      "PARCAT2"
    )
  ) %>%
    mutate(
      AVALC = ifelse(is.na(AVALC), as.character(AVAL), AVALC),
      PARCAT1 = ifelse(PARAMCD == "EX028", expect$PARCAT1, PARCAT1),
      PARCAT1N = ifelse(PARAMCD == "EX028", expect$PARCAT1N, PARCAT1N)
    )

  adqlqc_tmp <- bind_rows(adqlqc1, df_saved1) %>%
    arrange(
      USUBJID,
      AVISITN,
      QSTESTCD
    )
  return(adqlqc_tmp)
}

#' @describeIn h_adqlqc Calculate Change from Baseline Category 1
#'
#' @param dataset (`data.frame`)\cr ADaM dataset.
#'
#' @return `data.frame`
#' @keywords internal
derv_chgcat1 <- function(dataset) {
  # derivation of CHGCAT1
  check_vars <- c("PARCAT2", "CHG")

  if (all(check_vars %in% names(dataset))) {
    dataset$CHGCAT1 <- ifelse(
      dataset$PARCAT2 == "Symptom Scales" & !is.na(dataset$CHG) & dataset$CHG <= -10,
      "Improved", ""
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARCAT2 == "Symptom Scales" & !is.na(dataset$CHG) & dataset$CHG >= 10,
      "Worsened", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARCAT2 == "Symptom Scales" &
        !is.na(dataset$CHG) & dataset$CHG > -10 &
        dataset$CHG < 10,
      "No change", dataset$CHGCAT1
    )

    dataset$CHGCAT1 <- ifelse(
      dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") &
        !is.na(dataset$CHG) & dataset$CHG >= 10,
      "Improved", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") &
        !is.na(dataset$CHG) & dataset$CHG <= -10,
      "Worsened", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARCAT2 %in% c("Functional Scales", "Global Health Status") &
        !is.na(dataset$CHG) &
        dataset$CHG > -10 & dataset$CHG < 10,
      "No change", dataset$CHGCAT1
    )

    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 6,
      "Improved by six levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 5,
      "Improved by five levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 4,
      "Improved by four levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 3,
      "Improved by three levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 2,
      "Improved by two levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 1,
      "Improved by one level", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == 0,
      "No change", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -1,
      "Worsened by one level", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -2,
      "Worsened by two levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -3,
      "Worsened by three levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -4,
      "Worsened by four levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -5,
      "Worsened by five levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02829", "QS02830") & dataset$CHG == -6,
      "Worsened by six levels", dataset$CHGCAT1
    )

    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -3,
      "Improved by three levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -2,
      "Improved by two levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == -1,
      "Improved by one level", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 0,
      "No change", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 1,
      "Worsened by one level", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 2,
      "Worsened by two levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% c("QS02802", "QS02806") & dataset$CHG == 3,
      "Worsened by three levels", dataset$CHGCAT1
    )

    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD == "QS02801" & dataset$CHG == -3,
      "Improved by three levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD == "QS02801" & dataset$CHG == -2,
      "Improved by two levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD == "QS02801" & dataset$CHG == -1,
      "Improved by one level", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD == "QS02801" & dataset$CHG == 0,
      "No changed", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD == "QS02801" & dataset$CHG == 1,
      "Worsened by one level", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD == "QS02801" & dataset$CHG == 2,
      "Worsened by two levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD == "QS02801" & dataset$CHG == 3,
      "Worsened by three levels", dataset$CHGCAT1
    )

    paramcd_vec <- c(
      "QS02803", "QS02804", "QS02805", "QS02807", "QS02808", "QS02809", "QS02810",
      "QS02811", "QS02812", "QS02813", "QS02814", "QS02815", "QS02816", "QS02817",
      "QS02818", "QS02819", "QS02820", "QS02821", "QS02822", "QS02823", "QS02824",
      "QS02825", "QS02826", "QS02827", "QS02828"
    )

    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -3,
      "Improved by three levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -2,
      "Improved by two levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% paramcd_vec & dataset$CHG == -1,
      "Improved by one level", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 0,
      "No change", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 1,
      "Worsened by one level", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 2,
      "Worsened by two levels", dataset$CHGCAT1
    )
    dataset$CHGCAT1 <- ifelse(
      dataset$PARAMCD %in% paramcd_vec & dataset$CHG == 3,
      "Worsened by three levels", dataset$CHGCAT1
    )

    return(dataset)
  } else {
    collapse_vars <- paste(check_vars, collapse = ", ")
    stop(sprintf(
      "%s: one or both variables is/are missing, needed for derivation",
      collapse_vars
    ))
  }
}

#' @describeIn h_adqlqc Completion/Compliance Data Calculation
#'
#' @param dataset (`data.frame`)\cr Dataset.
#'
#' @return `data.frame`
#' @keywords internal
comp_derv <- function(dataset, percent, number) {
  # original items data
  orig_data <- filter(
    dataset,
    PARCAT2 == "Original Items"
  )
  # total number of questionnaires
  comp_count_all <- select(
    orig_data,
    PARAMCD
  ) %>%
    distinct() %>%
    count()
  comp_count_all <- comp_count_all$n
  # original items data count of questions answered
  orig_data_summ <- group_by(
    orig_data,
    STUDYID,
    USUBJID,
    PARCAT1,
    AVISIT,
    AVISITN,
    ADTM,
    ADY
  ) %>%
    summarise(
      comp_count = sum(!is.na(AVAL)),
      comp_count_all = comp_count_all,
      .groups = "drop"
    ) %>%
    mutate(
      per_comp = trunc((comp_count / comp_count_all) * 100)
    )
  # expected data
  ex028_data <- filter(
    dataset,
    PARAMCD == "EX028",
    AVAL == 1
  ) %>%
    select(
      STUDYID,
      USUBJID,
      PARCAT1,
      AVISIT,
      AVISITN,
      ADTM,
      ADY,
      AVAL_ex028 = AVAL
    ) %>%
    mutate(
      comp_count_all = comp_count_all
    )

  joined <- left_join(
    ex028_data,
    orig_data_summ,
    by = c(
      "STUDYID",
      "USUBJID",
      "PARCAT1",
      "AVISIT",
      "AVISITN",
      "comp_count_all"
    )
  ) %>%
    select(-c("ADTM.x", "ADY.x"))

  joined <- rename(
    joined,
    ADTM = ADTM.y,
    ADY = ADY.y
  )
  # CO028ALL
  co028all <- mutate(
    joined,
    PARAMCD = "CO028ALL",
    PARAM = "EORTC QLQ-C30: Completion - Completed all questions",
    PARCAT2 = "Completion",
    AVAL = case_when(
      AVAL_ex028 == 1 & comp_count == comp_count_all ~ 1,
      AVAL_ex028 == 1 & (is.na(comp_count) | comp_count < comp_count_all) ~ 0
    ),
    AVALC = case_when(
      AVAL == 1 ~ "Completed all questions",
      AVAL == 0 ~ "Did not complete all questions"
    )
  )
  # CO028<y>P
  co028p <- mutate(
    joined,
    PARAMCD = paste0("CO028", as.character(percent), "P"),
    PARAM = sprintf(
      "EORTC QLQ-C30: Completion - Completed at least %s%% of questions",
      as.character(percent)
    ),
    PARCAT2 = "Completion",
    AVAL = case_when(
      AVAL_ex028 == 1 & per_comp >= percent ~ 1,
      AVAL_ex028 == 1 & (is.na(per_comp) | per_comp < percent) ~ 0
    ),
    AVALC = case_when(
      AVAL == 1 ~ sprintf(
        "Completed at least %s%% of questions",
        as.character(percent)
      ),
      AVAL == 0 ~ sprintf(
        "Did not complete at least %s%% of questions",
        as.character(percent)
      )
    )
  )
  # CO028<x>Q
  co028q <- mutate(
    joined,
    PARAMCD = paste0("CO028", as.character(number), "Q"),
    PARAM = sprintf(
      "EORTC QLQ-C30: Completion - Completed at least %s question(s)",
      as.character(number)
    ),
    PARCAT2 = "Completion",
    AVAL = case_when(
      AVAL_ex028 == 1 & comp_count >= number ~ 1,
      AVAL_ex028 == 1 & (comp_count < number | is.na(comp_count)) ~ 0
    ),
    AVALC = case_when(
      AVAL == 1 ~ sprintf(
        "Completed at least %s questions",
        as.character(number)
      ),
      AVAL == 0 ~ sprintf(
        "Did not complete at least %s question(s)",
        as.character(number)
      )
    )
  )

  co028_bind <- rbind(
    co028all,
    co028p,
    co028q
  ) %>%
    select(
      -c("AVAL_ex028", "comp_count", "comp_count_all", "per_comp")
    )
  return(co028_bind)
}

#' Exposure Analysis Dataset (ADEX)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating random Exposure Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per each record in the corresponding SDTM domain.
#'
#' Keys: `STUDYID`, `USUBJID`, `EXSEQ`, `PARAMCD`, `PARCAT1`, `ASTDTM`, `AENDTM`, `ASTDY`, `AENDY`,
#' `AVISITN`, `EXDOSFRQ`, `EXROUTE`, `VISIT`, `VISITDY`, `EXSTDTC`, `EXENDTC`, `EXSTDY`, `EXENDY`
#'
#' @inheritParams argument_convention
#' @param parcat1 (`character vector`)\cr Dose amount categories. Defaults to "Individual" and "Overall".
#' @param parcat2 (`character vector`)\cr Types of drug received. Defaults to "Drug A" and "Drug B".
#' @param max_n_exs (`integer`)\cr Maximum number of exposures per patient. Defaults to 6.
#' @template param_cached
#' @templateVar data adex
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1)
#'
#' adex <- radex(adsl, seed = 2)
#' adex
radex <- function(adsl,
                  param = c(
                    "Dose administered during constant dosing interval",
                    "Number of doses administered during constant dosing interval",
                    "Total dose administered",
                    "Total number of doses administered"
                  ),
                  paramcd = c("DOSE", "NDOSE", "TDOSE", "TNDOSE"),
                  paramu = c("mg", " ", "mg", " "),
                  parcat1 = c("INDIVIDUAL", "OVERALL"),
                  parcat2 = c("Drug A", "Drug B"),
                  visit_format = "WEEK",
                  n_assessments = 5L,
                  n_days = 5L,
                  max_n_exs = 6L,
                  lookup = NULL,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(AVAL = c(NA, 0.1), AVALU = c(NA), 0.1),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadex"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(parcat1, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(parcat2, min.len = 1, any.missing = FALSE)
  checkmate::assert_string(visit_format)
  checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
  checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
  checkmate::assert_integer(max_n_exs, len = 1, any.missing = FALSE)
  checkmate::assert_data_frame(lookup, null.ok = TRUE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  # validate and initialize related variables
  param_init_list <- relvar_init(param, paramcd)
  unit_init_list <- relvar_init(param, paramu)

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  adex <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = adsl$USUBJID,
    PARAM = c(
      rep(
        param_init_list$relvar1[1],
        length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days)))
      ),
      rep(
        param_init_list$relvar1[2],
        length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days)))
      ),
      param_init_list$relvar1[3:4]
    ),
    stringsAsFactors = FALSE
  )

  # assign related variable values: PARAMxPARAMCD are related
  adex <- adex %>% rel_var(
    var_name = "PARAMCD",
    related_var = "PARAM",
    var_values = param_init_list$relvar2
  )

  # assign related variable values: AVALUxPARAM are related
  adex <- adex %>% rel_var(
    var_name = "AVALU",
    related_var = "PARAM",
    var_values = unit_init_list$relvar2
  )

  adex <- adex %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(PARCAT_ind = sample(c(1, 2), size = 1)) %>%
    dplyr::mutate(PARCAT2 = ifelse(PARCAT_ind == 1, parcat2[1], parcat2[2])) %>%
    dplyr::select(-"PARCAT_ind")

  # Add in PARCAT1
  adex <- adex %>% dplyr::mutate(PARCAT1 = dplyr::case_when(
    (PARAMCD == "TNDOSE" | PARAMCD == "TDOSE") ~ "OVERALL",
    PARAMCD == "DOSE" | PARAMCD == "NDOSE" ~ "INDIVIDUAL"
  ))

  adex_visit <- adex %>%
    dplyr::filter(PARAMCD == "DOSE" | PARAMCD == "NDOSE") %>%
    dplyr::mutate(
      AVISIT = rep(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), 2)
    )

  adex <- dplyr::left_join(
    adex %>%
      dplyr::group_by(
        USUBJID,
        STUDYID,
        PARAM,
        PARAMCD,
        AVALU,
        PARCAT1,
        PARCAT2
      ) %>%
      dplyr::mutate(id = dplyr::row_number()),
    adex_visit %>%
      dplyr::group_by(
        USUBJID,
        STUDYID,
        PARAM,
        PARAMCD,
        AVALU,
        PARCAT1,
        PARCAT2
      ) %>%
      dplyr::mutate(id = dplyr::row_number()),
    by = c("USUBJID", "STUDYID", "PARCAT1", "PARCAT2", "id", "PARAMCD", "PARAM", "AVALU")
  ) %>%
    dplyr::select(-"id")

  # Visit numbers
  adex <- adex %>% dplyr::mutate(AVISITN = dplyr::case_when(
    AVISIT == "SCREENING" ~ -1,
    AVISIT == "BASELINE" ~ 0,
    (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
    TRUE ~ 999000
  ))


  adex2 <- split(adex, adex$USUBJID) %>%
    lapply(function(pinfo) {
      pinfo %>%
        dplyr::filter(PARAMCD == "DOSE") %>%
        dplyr::group_by(USUBJID, PARCAT2, AVISIT) %>%
        dplyr::mutate(changeind = dplyr::case_when(
          AVISIT == "SCREENING" ~ 0,
          AVISIT != "SCREENING" ~ sample(c(-1, 0, 1),
            size = 1,
            prob = c(0.25, 0.5, 0.25),
            replace = TRUE
          )
        )) %>%
        dplyr::ungroup() %>%
        dplyr::group_by(USUBJID, PARCAT2) %>%
        dplyr::mutate(
          csum = cumsum(changeind),
          changeind = dplyr::case_when(
            csum <= -3 ~ sample(c(0, 1), size = 1, prob = c(0.5, 0.5)),
            csum >= 3 ~ sample(c(0, -1), size = 1, prob = c(0.5, 0.5)),
            TRUE ~ changeind
          )
        ) %>%
        dplyr::mutate(csum = cumsum(changeind)) %>%
        dplyr::ungroup() %>%
        dplyr::group_by(USUBJID, PARCAT2, AVISIT) %>%
        dplyr::mutate(AVAL = dplyr::case_when(
          csum == -2 ~ 480,
          csum == -1 ~ 720,
          csum == 0 ~ 960,
          csum == 1 ~ 1200,
          csum == 2 ~ 1440
        )) %>%
        dplyr::select(-c("csum", "changeind")) %>%
        dplyr::ungroup()
    }) %>%
    Reduce(rbind, .)

  adex_tmp <- dplyr::full_join(adex2, adex, by = names(adex))
  adex <- adex_tmp %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(AVAL = ifelse(PARAMCD == "NDOSE", 1, AVAL)) %>%
    dplyr::mutate(AVAL = ifelse(
      PARAMCD == "TNDOSE",
      sum(AVAL[PARAMCD == "NDOSE"]),
      AVAL
    )) %>%
    dplyr::ungroup() %>%
    dplyr::group_by(USUBJID, STUDYID, PARCAT2) %>%
    dplyr::mutate(AVAL = ifelse(
      PARAMCD == "TDOSE",
      sum(AVAL[PARAMCD == "DOSE"]),
      AVAL
    ))

  adex <- rcd_var_relabel(
    adex,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge ADSL to be able to add ADEX date and study day variables
  adex <- dplyr::inner_join(adex, adsl, by = c("STUDYID", "USUBJID")) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
      is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
      TRUE ~ TRTEDTM
    ))) %>%
    dplyr::mutate(ASTDTM = sample(
      seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
      size = 1
    )) %>%
    # add 1 to end of range incase both values passed to sample() are the same
    dplyr::mutate(AENDTM = sample(
      seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"),
      size = 1
    )) %>%
    dplyr::select(-TRTENDT) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(STUDYID, USUBJID, ASTDTM)


  adex <- adex %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(EXSEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = EXSEQ) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      PARAMCD,
      ASTDTM,
      AVISITN,
      EXSEQ
    )

  # Adding EXDOSFRQ
  adex <- adex %>%
    dplyr::mutate(EXDOSFRQ = dplyr::case_when(
      PARCAT1 == "INDIVIDUAL" ~ "ONCE",
      TRUE ~ ""
    ))

  # Adding EXROUTE
  adex <- adex %>%
    dplyr::mutate(EXROUTE = dplyr::case_when(
      PARCAT1 == "INDIVIDUAL" ~ sample(c("INTRAVENOUS", "SUBCUTANEOUS"),
        nrow(adex),
        replace = TRUE,
        prob = c(0.9, 0.1)
      ),
      TRUE ~ ""
    ))

  # Fix VISIT according to AVISIT
  adex <- adex %>%
    dplyr::mutate(VISIT = AVISIT)

  # Hack for VISITDY - to fix in ADSL
  visit_levels <- str_extract(levels(adex$VISIT), pattern = "[0-9]+")
  vl_extracted <- vapply(visit_levels, function(x) as.numeric(x[2]), numeric(1))
  vl_extracted <- c(-1, 1, vl_extracted[!is.na(vl_extracted)])

  # Adding VISITDY
  adex <- adex %>%
    dplyr::mutate(VISITDY = as.numeric(as.character(factor(VISIT, labels = vl_extracted))))

  # Exposure time stamps
  adex <- adex %>%
    dplyr::mutate(
      EXSTDTC = TRTSDTM + lubridate::days(VISITDY),
      EXENDTC = EXSTDTC + lubridate::hours(1),
      EXSTDY = VISITDY,
      EXENDY = VISITDY
    )

  # Correcting last exposure to treatment
  adex <- adex %>%
    dplyr::group_by(SUBJID) %>%
    dplyr::mutate(TRTEDTM = lubridate::as_datetime(max(EXENDTC, na.rm = TRUE))) %>%
    dplyr::ungroup()

  # Fixing Date - to add into ADSL
  adex <- adex %>%
    dplyr::mutate(
      TRTSDT = lubridate::date(TRTSDTM),
      TRTEDT = lubridate::date(TRTEDTM)
    )

  # Fixing analysis time stamps
  adex <- adex %>%
    dplyr::mutate(
      ASTDY = EXSTDY,
      AENDY = EXENDY,
      ASTDTM = EXSTDTC,
      AENDTM = EXENDTC
    )

  if (length(na_vars) > 0 && na_percentage > 0) {
    adex <- mutate_na(ds = adex, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  adex <- apply_metadata(adex, "metadata/ADEX.yml")
}

# Equivalent of stringr::str_extract_all()
str_extract <- function(string, pattern) {
  regmatches(string, gregexpr(pattern, string))
}

#' Laboratory Data Analysis Dataset (ADLB)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Laboratory Data Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per subject per parameter per analysis visit per analysis date.
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `LBSEQ`, `ASPID`
#
#' @inheritParams argument_convention
#' @param lbcat (`character vector`)\cr LB category values.
#' @param max_n_lbs (`integer`)\cr Maximum number of labs per patient. Defaults to 10.
#' @template param_cached
#' @templateVar data adlb
#'
#' @return `data.frame`
#' @export
#'
#' @author tomlinsj, npaszty, Xuefeng Hou
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adlb <- radlb(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2)
#' adlb
#'
#' adlb <- radlb(adsl, visit_format = "CYCLE", n_assessments = 2L, seed = 2)
#' adlb
radlb <- function(adsl,
                  lbcat = c("CHEMISTRY", "CHEMISTRY", "IMMUNOLOGY"),
                  param = c(
                    "Alanine Aminotransferase Measurement",
                    "C-Reactive Protein Measurement",
                    "Immunoglobulin A Measurement"
                  ),
                  paramcd = c("ALT", "CRP", "IGA"),
                  paramu = c("U/L", "mg/L", "g/L"),
                  aval_mean = c(18, 9, 2.9),
                  visit_format = "WEEK",
                  n_assessments = 5L,
                  n_days = 5L,
                  max_n_lbs = 10L,
                  lookup = NULL,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(
                    LOQFL = c(NA, 0.1), ABLFL2 = c(1234, 0.1), ABLFL = c(1235, 0.1),
                    BASE2 = c(NA, 0.1), BASE = c(NA, 0.1),
                    CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1)
                  ),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadlb"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(lbcat, min.len = 1, any.missing = FALSE)
  checkmate::assert_string(visit_format)
  checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
  checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
  checkmate::assert_integer(max_n_lbs, len = 1, any.missing = FALSE)
  checkmate::assert_data_frame(lookup, null.ok = TRUE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  # validate and initialize related variables
  lbcat_init_list <- relvar_init(param, lbcat)
  param_init_list <- relvar_init(param, paramcd)
  unit_init_list <- relvar_init(param, paramu)

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  adlb <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = adsl$USUBJID,
    PARAM = as.factor(param_init_list$relvar1),
    AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days),
    stringsAsFactors = FALSE
  )

  # assign AVAL based on different tests
  adlb <- adlb %>% mutate(AVAL = case_when(
    PARAM == param[1] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[1], sd = 10)),
    PARAM == param[2] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[2], sd = 1)),
    PARAM == param[3] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[3], sd = 0.1))
  ))

  # assign related variable values: PARAMxLBCAT are related
  adlb <- adlb %>% rel_var(
    var_name = "LBCAT",
    related_var = "PARAM",
    var_values = lbcat_init_list$relvar2
  )

  # assign related variable values: PARAMxPARAMCD are related
  adlb <- adlb %>% rel_var(
    var_name = "PARAMCD",
    related_var = "PARAM",
    var_values = param_init_list$relvar2
  )

  adlb <- adlb %>%
    dplyr::mutate(LBTESTCD = PARAMCD) %>%
    dplyr::mutate(LBTEST = PARAM)

  adlb <- adlb %>% dplyr::mutate(AVISITN = dplyr::case_when(
    AVISIT == "SCREENING" ~ -1,
    AVISIT == "BASELINE" ~ 0,
    (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
    TRUE ~ NA_real_
  ))

  adlb <- adlb %>% rel_var(
    var_name = "AVALU",
    related_var = "PARAM",
    var_values = unit_init_list$relvar2
  )

  adlb <- adlb %>%
    dplyr::mutate(AVISITN = dplyr::case_when(
      AVISIT == "SCREENING" ~ -1,
      AVISIT == "BASELINE" ~ 0,
      (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
      TRUE ~ NA_real_
    ))

  # order to prepare for change from screening and baseline values
  adlb <- adlb[order(adlb$STUDYID, adlb$USUBJID, adlb$PARAMCD, adlb$AVISITN), ]

  adlb <- Reduce(rbind, lapply(split(adlb, adlb$USUBJID), function(x) {
    x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
    x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "")
    x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
      "Y",
      ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "")
    )
    x
  }))

  adlb$BASE2 <- retain(adlb, adlb$AVAL, adlb$ABLFL2 == "Y")
  adlb$BASE <- ifelse(adlb$ABLFL2 != "Y", retain(adlb, adlb$AVAL, adlb$ABLFL == "Y"), NA)

  adlb <- adlb %>%
    dplyr::mutate(CHG2 = AVAL - BASE2) %>%
    dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>%
    dplyr::mutate(CHG = AVAL - BASE) %>%
    dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>%
    dplyr::mutate(BASETYPE = "LAST") %>%
    dplyr::mutate(ANRLO = dplyr::case_when(
      PARAMCD == "ALT" ~ 7,
      PARAMCD == "CRP" ~ 8,
      PARAMCD == "IGA" ~ 0.8
    )) %>%
    dplyr::mutate(ANRHI = dplyr::case_when(
      PARAMCD == "ALT" ~ 55,
      PARAMCD == "CRP" ~ 10,
      PARAMCD == "IGA" ~ 3
    )) %>%
    dplyr::mutate(ANRIND = factor(dplyr::case_when(
      AVAL < ANRLO ~ "LOW",
      AVAL > ANRHI ~ "HIGH",
      TRUE ~ "NORMAL"
    ))) %>%
    dplyr::mutate(LBSTRESC = factor(dplyr::case_when(
      PARAMCD == "ALT" ~ "<7",
      PARAMCD == "CRP" ~ "<8",
      PARAMCD == "IGA" ~ ">3"
    ))) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(LOQFL = factor(
      ifelse(eval(parse(text = paste(AVAL, LBSTRESC))), "Y", "N")
    )) %>%
    dplyr::ungroup() %>%
    dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
    dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(SHIFT1 = factor(ifelse(
      AVISITN > 0,
      paste(
        retain(
          adlb, as.character(BNRIND),
          AVISITN == 0
        ),
        ANRIND,
        sep = " to "
      ),
      ""
    ))) %>%
    dplyr::mutate(ATOXGR = factor(dplyr::case_when(
      ANRIND == "LOW" ~ sample(
        c("-1", "-2", "-3", "-4", "-5"),
        nrow(adlb),
        replace = TRUE,
        prob = c(0.30, 0.25, 0.20, 0.15, 0)
      ),
      ANRIND == "HIGH" ~ sample(
        c("1", "2", "3", "4", "5"),
        nrow(adlb),
        replace = TRUE,
        prob = c(0.30, 0.25, 0.20, 0.15, 0)
      ),
      ANRIND == "NORMAL" ~ "0"
    ))) %>%
    dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
    dplyr::mutate(BTOXGR = ATOXGR[ABLFL == "Y"]) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(ATPTN = 1) %>%
    dplyr::mutate(DTYPE = NA) %>%
    dplyr::mutate(BTOXGRL = factor(dplyr::case_when(
      BTOXGR == "0" ~ "0",
      BTOXGR == "-1" ~ "1",
      BTOXGR == "-2" ~ "2",
      BTOXGR == "-3" ~ "3",
      BTOXGR == "-4" ~ "4",
      BTOXGR == "1" ~ "<Missing>",
      BTOXGR == "2" ~ "<Missing>",
      BTOXGR == "3" ~ "<Missing>",
      BTOXGR == "4" ~ "<Missing>"
    ))) %>%
    dplyr::mutate(BTOXGRH = factor(dplyr::case_when(
      BTOXGR == "0" ~ "0",
      BTOXGR == "1" ~ "1",
      BTOXGR == "2" ~ "2",
      BTOXGR == "3" ~ "3",
      BTOXGR == "4" ~ "4",
      BTOXGR == "-1" ~ "<Missing>",
      BTOXGR == "-2" ~ "<Missing>",
      BTOXGR == "-3" ~ "<Missing>",
      BTOXGR == "-4" ~ "<Missing>",
    ))) %>%
    dplyr::mutate(ATOXGRL = factor(dplyr::case_when(
      ATOXGR == "0" ~ "0",
      ATOXGR == "-1" ~ "1",
      ATOXGR == "-2" ~ "2",
      ATOXGR == "-3" ~ "3",
      ATOXGR == "-4" ~ "4",
      ATOXGR == "1" ~ "<Missing>",
      ATOXGR == "2" ~ "<Missing>",
      ATOXGR == "3" ~ "<Missing>",
      ATOXGR == "4" ~ "<Missing>",
    ))) %>%
    dplyr::mutate(ATOXGRH = factor(dplyr::case_when(
      ATOXGR == "0" ~ "0",
      ATOXGR == "1" ~ "1",
      ATOXGR == "2" ~ "2",
      ATOXGR == "3" ~ "3",
      ATOXGR == "4" ~ "4",
      ATOXGR == "-1" ~ "<Missing>",
      ATOXGR == "-2" ~ "<Missing>",
      ATOXGR == "-3" ~ "<Missing>",
      ATOXGR == "-4" ~ "<Missing>",
    ))) %>%
    rcd_var_relabel(
      STUDYID = attr(adsl$STUDYID, "label"),
      USUBJID = attr(adsl$USUBJID, "label")
    )

  # High and low descriptions of the different PARAMCD values
  # This is currently hard coded as the GDSR does not have these descriptions yet
  grade_lookup <- tibble::tribble(
    ~PARAMCD, ~ATOXDSCL, ~ATOXDSCH,
    "ALB", "Hypoalbuminemia", NA_character_,
    "ALKPH", NA_character_, "Alkaline phosphatase increased",
    "ALT", NA_character_, "Alanine aminotransferase increased",
    "AST", NA_character_, "Aspartate aminotransferase increased",
    "BILI", NA_character_, "Blood bilirubin increased",
    "CA", "Hypocalcemia", "Hypercalcemia",
    "CHOLES", NA_character_, "Cholesterol high",
    "CK", NA_character_, "CPK increased",
    "CREAT", NA_character_, "Creatinine increased",
    "CRP", NA_character_, "C reactive protein increased",
    "GGT", NA_character_, "GGT increased",
    "GLUC", "Hypoglycemia", "Hyperglycemia",
    "HGB", "Anemia", "Hemoglobin increased",
    "IGA", NA_character_, "Immunoglobulin A increased",
    "POTAS", "Hypokalemia", "Hyperkalemia",
    "LYMPH", "CD4 lymphocytes decreased", NA_character_,
    "PHOS", "Hypophosphatemia", NA_character_,
    "PLAT", "Platelet count decreased", NA_character_,
    "SODIUM", "Hyponatremia", "Hypernatremia",
    "WBC", "White blood cell decreased", "Leukocytosis",
  )

  # merge grade_lookup onto adlb
  adlb <- dplyr::left_join(adlb, grade_lookup, by = "PARAMCD")

  adlb <- rcd_var_relabel(
    adlb,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge ADSL to be able to add LB date and study day variables
  adlb <- dplyr::inner_join(
    adlb,
    adsl,
    by = c("STUDYID", "USUBJID")
  ) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
      is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
      TRUE ~ TRTEDTM
    ))) %>%
    dplyr::ungroup()

  adlb <- adlb %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::arrange(USUBJID, AVISITN) %>%
    dplyr::mutate(ADTM = rep(
      sort(sample(
        seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
        size = nlevels(AVISIT)
      )),
      each = n() / nlevels(AVISIT)
    )) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
    dplyr::select(-TRTENDT) %>%
    dplyr::arrange(STUDYID, USUBJID, ADTM)

  adlb <- adlb %>%
    dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(LBSEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = LBSEQ) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      PARAMCD,
      BASETYPE,
      AVISITN,
      ATPTN,
      DTYPE,
      ADTM,
      LBSEQ,
      ASPID
    )

  adlb <- adlb %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when(
    !AVISIT %in% c("SCREENING", "BASELINE") ~ "Y",
    TRUE ~ ""
  )))

  flag_variables <- function(data,
                             apply_grouping,
                             apply_filter,
                             apply_mutate) {
    data_compare <- data %>%
      dplyr::mutate(row_check = seq_len(nrow(data)))

    data <- data_compare %>%
      {
        if (apply_grouping == TRUE) {
          dplyr::group_by(., USUBJID, PARAMCD, BASETYPE, AVISIT)
        } else {
          dplyr::group_by(., USUBJID, PARAMCD, BASETYPE)
        }
      } %>%
      dplyr::arrange(ADTM, ASPID, LBSEQ) %>%
      {
        if (apply_filter == TRUE) {
          dplyr::filter(
            .,
            (AVISIT != "BASELINE" & AVISIT != "SCREENING") &
              (ONTRTFL == "Y" | ADTM <= TRTSDTM)
          ) %>%
            dplyr::filter(ATOXGR == max(as.numeric(as.character(ATOXGR))))
        } else if (apply_filter == FALSE) {
          dplyr::filter(
            .,
            (AVISIT != "BASELINE" & AVISIT != "SCREENING") &
              (ONTRTFL == "Y" | ADTM <= TRTSDTM)
          ) %>%
            dplyr::filter(ATOXGR == min(as.numeric(as.character(ATOXGR))))
        } else {
          dplyr::filter(
            .,
            AVAL == min(AVAL) &
              (AVISIT != "BASELINE" & AVISIT != "SCREENING") &
              (ONTRTFL == "Y" | ADTM <= TRTSDTM)
          )
        }
      } %>%
      dplyr::slice(1) %>%
      {
        if (apply_mutate == TRUE) {
          dplyr::mutate(., new_var = ifelse(is.na(DTYPE), "Y", ""))
        } else {
          dplyr::mutate(., new_var = ifelse(is.na(AVAL) == FALSE & is.na(DTYPE), "Y", ""))
        }
      } %>%
      dplyr::ungroup()

    data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "")

    data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))]

    return(data_compare)
  }

  adlb <- flag_variables(adlb, TRUE, "ELSE", FALSE) %>% dplyr::rename(WORS01FL = "new_var")
  adlb <- flag_variables(adlb, FALSE, TRUE, TRUE) %>% dplyr::rename(WGRHIFL = "new_var")
  adlb <- flag_variables(adlb, FALSE, FALSE, TRUE) %>% dplyr::rename(WGRLOFL = "new_var")
  adlb <- flag_variables(adlb, TRUE, TRUE, TRUE) %>% dplyr::rename(WGRHIVFL = "new_var")
  adlb <- flag_variables(adlb, TRUE, FALSE, TRUE) %>% dplyr::rename(WGRLOVFL = "new_var")

  adlb <- adlb %>% dplyr::mutate(ANL01FL = ifelse(
    (ABLFL == "Y" | (WORS01FL == "Y" & is.na(DTYPE))) &
      (AVISIT != "SCREENING"),
    "Y",
    ""
  ))

  if (length(na_vars) > 0 && na_percentage > 0) {
    adlb <- mutate_na(ds = adlb, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata

  adlb <- apply_metadata(adlb, "metadata/ADLB.yml")

  return(adlb)
}

#' Medical History Analysis Dataset (ADMH)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Medical History Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per each record in the corresponding SDTM domain.
#'
#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `MHSEQ`
#'
#' @inheritParams argument_convention
#' @param max_n_mhs (`integer`)\cr Maximum number of MHs per patient. Defaults to 10.
#' @template param_cached
#' @templateVar data admh
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1)
#'
#' admh <- radmh(adsl, seed = 2)
#' admh
radmh <- function(adsl,
                  max_n_mhs = 10L,
                  lookup = NULL,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(MHBODSYS = c(NA, 0.1), MHDECOD = c(1234, 0.1)),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadmh"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_integer(max_n_mhs, len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  checkmate::assert_data_frame(lookup, null.ok = TRUE)
  lookup_mh <- if (!is.null(lookup)) {
    lookup
  } else {
    tibble::tribble(
      ~MHBODSYS, ~MHDECOD, ~MHSOC,
      "cl A", "trm A_1/2", "cl A",
      "cl A", "trm A_2/2", "cl A",
      "cl B", "trm B_1/3", "cl B",
      "cl B", "trm B_2/3", "cl B",
      "cl B", "trm B_3/3", "cl B",
      "cl C", "trm C_1/2", "cl C",
      "cl C", "trm C_2/2", "cl C",
      "cl D", "trm D_1/3", "cl D",
      "cl D", "trm D_2/3", "cl D",
      "cl D", "trm D_3/3", "cl D"
    )
  }

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  admh <- Map(
    function(id, sid) {
      n_mhs <- sample(0:max_n_mhs, 1)
      i <- sample(seq_len(nrow(lookup_mh)), n_mhs, TRUE)
      dplyr::mutate(
        lookup_mh[i, ],
        USUBJID = id,
        STUDYID = sid
      )
    },
    adsl$USUBJID,
    adsl$STUDYID
  ) %>%
    Reduce(rbind, .) %>%
    `[`(c(4, 5, 1, 2, 3)) %>%
    dplyr::mutate(MHTERM = MHDECOD)

  admh <- rcd_var_relabel(
    admh,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge ADSL to be able to add MH date and study day variables
  admh <- dplyr::inner_join(
    admh,
    adsl,
    by = c("STUDYID", "USUBJID")
  ) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
      is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
      TRUE ~ TRTEDTM
    ))) %>%
    dplyr::mutate(ASTDTM = sample(
      seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
      size = 1
    )) %>%
    dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>%
    # add 1 to end of range incase both values passed to sample() are the same
    dplyr::mutate(AENDTM = sample(
      seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"),
      size = 1
    )) %>%
    dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>%
    select(-TRTENDT) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(STUDYID, USUBJID, ASTDTM, MHTERM) %>%
    dplyr::mutate(MHDISTAT = sample(
      x = c("Resolved", "Ongoing with treatment", "Ongoing without treatment"),
      prob = c(0.6, 0.2, 0.2),
      size = dplyr::n(),
      replace = TRUE
    )) %>%
    dplyr::mutate(ATIREL = dplyr::case_when(
      (AENDTM < TRTSDTM | (is.na(AENDTM) & MHDISTAT == "Resolved")) ~ "PRIOR",
      (AENDTM >= TRTSDTM | (is.na(AENDTM) & grepl("Ongoing", MHDISTAT))) ~ "PRIOR_CONCOMITANT"
    ))

  admh <- admh %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(MHSEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = MHSEQ) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(STUDYID, USUBJID, ASTDTM, MHSEQ)

  if (length(na_vars) > 0 && na_percentage > 0 && na_percentage <= 1) {
    admh <- mutate_na(ds = admh, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  admh <- apply_metadata(admh, "metadata/ADMH.yml")

  return(admh)
}

#' Time-to-Event Analysis Dataset (ADTTE)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Time-to-Event Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`
#'
#' @inheritParams argument_convention
#' @inheritParams radaette
#' @template param_cached
#' @templateVar data adtte
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adtte <- radtte(adsl, seed = 2)
#' adtte
radtte <- function(adsl,
                   event_descr = NULL,
                   censor_descr = NULL,
                   lookup = NULL,
                   seed = NULL,
                   na_percentage = 0,
                   na_vars = list(CNSR = c(NA, 0.1), AVAL = c(1234, 0.1), AVALU = c(1234, 0.1)),
                   cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadtte"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(censor_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(event_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  checkmate::assert_data_frame(lookup, null.ok = TRUE)
  lookup_tte <- if (!is.null(lookup)) {
    lookup
  } else {
    tibble::tribble(
      ~ARM, ~PARAMCD, ~PARAM, ~LAMBDA, ~CNSR_P,
      "ARM A", "EFS", "Event Free Survival", log(2) / 365, 0.4,
      "ARM B", "EFS", "Event Free Survival", log(2) / 305, 0.3,
      "ARM C", "EFS", "Event Free Survival", log(2) / 243, 0.2,
      "ARM A", "CRSD", "Duration of Confirmed Response", log(2) / 305, 0.4,
      "ARM B", "CRSD", "Duration of Confirmed Response", log(2) / 243, 0.3,
      "ARM C", "CRSD", "Duration of Confirmed Response", log(2) / 182, 0.2,
      "ARM A", "PFS", "Progression Free Survival", log(2) / 365, 0.4,
      "ARM B", "PFS", "Progression Free Survival", log(2) / 305, 0.3,
      "ARM C", "PFS", "Progression Free Survival", log(2) / 243, 0.2,
      "ARM A", "OS", "Overall Survival", log(2) / 610, 0.4,
      "ARM B", "OS", "Overall Survival", log(2) / 490, 0.3,
      "ARM C", "OS", "Overall Survival", log(2) / 365, 0.2,
    )
  }

  evntdescr_sel <- if (!is.null(event_descr)) {
    event_descr
  } else {
    c(
      "Death",
      "Disease Progression",
      "Last Tumor Assessment",
      "Adverse Event",
      "Alive"
    )
  }

  cnsdtdscr_sel <- if (!is.null(censor_descr)) {
    censor_descr
  } else {
    c(
      "Preferred Term",
      "Clinical Cut Off",
      "Completion or Discontinuation",
      "End of AE Reporting Period"
    )
  }

  adtte <- split(adsl, adsl$USUBJID) %>%
    lapply(FUN = function(pinfo) {
      lookup_tte %>%
        dplyr::filter(ARM == as.character(pinfo$ACTARMCD)) %>%
        dplyr::rowwise() %>%
        dplyr::mutate(
          STUDYID = pinfo$STUDYID,
          SITEID = pinfo$SITEID,
          USUBJID = pinfo$USUBJID,
          AVALU = "DAYS"
        ) %>%
        dplyr::select(-"LAMBDA", -"CNSR_P")
    }) %>%
    Reduce(rbind, .) %>%
    rcd_var_relabel(
      STUDYID = "Study Identifier",
      USUBJID = "Unique Subject Identifier" # )
    )

  # Loop through each patient and randomly assign a value for EVNTDESC
  adtte_split <- split(adtte, adtte$USUBJID)

  # Add EVNTDESC column
  adtte_lst <- lapply(adtte_split, function(split_df) {
    # First create an empty EVNTDESC variable to populate
    split_df$EVNTDESC <- NA
    for (i in 1:nrow(split_df)) { # nolint
      # If this is the first row then create a random value from evntdescr_sel for EVNTDESC
      if (i == 1) {
        split_df$EVNTDESC[i] <- sample(evntdescr_sel[c(1:4)], 1, prob = c(0.1, 0.3, 0.4, 0.2))
      } else if (i != 1 & i != nrow(split_df)) {
        # First check to see if "Death" has been entered in as a previous value
        # If so we need to make the rest of the EVNTDESC values "Death" to make sense
        # The patient cannot die and then come back to life
        if (any(grepl("Death", split_df$EVNTDESC))) { # If previous value has "Death" the following need to be "Death"
          split_df$EVNTDESC[i] <- "Death"
        } else { # If there are no "Death" values randomly select another value
          split_df$EVNTDESC[i] <- sample(evntdescr_sel[c(1:4)], 1)
        }
      } else { # This is for processing OS as this can only be "Death" or "Alive"
        if (any(grepl("Death", split_df$EVNTDESC))) { # If previous value has "Death" the following need to be "Death"
          split_df$EVNTDESC[i] <- "Death"
        } else { # If there are no "Death" values randomly select another value
          split_df$EVNTDESC[i] <- "Alive"
        }
      }
    }
    split_df
  })

  # Add CNSR column
  adtte_lst <- lapply(adtte_lst, function(split_df) {
    # First create an empty CNSR variable to populate
    split_df$CNSR <- NA
    for (i in 1:nrow(split_df)) { # nolint
      # If this is the first row then create a random value from evntdescr_sel for EVNTDESC
      if (split_df$EVNTDESC[i] == "Death" | split_df$EVNTDESC[i] == "Disease Progression") {
        split_df$CNSR[i] <- 0
      } else {
        split_df$CNSR[i] <- 1
      }
    }
    split_df
  })

  # Add AVAL column
  adtte_lst <- lapply(adtte_lst, function(split_df) {
    # First create an empty CNSR variable to populate
    split_df$AVAL <- NA
    for (i in 1:nrow(split_df)) { # nolint
      if (i == 1) {
        split_df$AVAL[i] <- stats::runif(1, 15, 100)
      } else if (i != 1 & any(grepl("Death", split_df[1:i - 1, "EVNTDESC"]))) {
        # Check if there are any death values before the current row
        # Set the AVAL to the value of the row that has the "Death" value
        # as the patient cannot live longer than this value
        death_position <- match("Death", split_df[1:i - 1, "EVNTDESC"][[1]])
        split_df$AVAL[i] <- split_df$AVAL[death_position]
      } else if (i == 2) {
        split_df$AVAL[i] <- stats::runif(1, 100, 200)
      } else if (i == 3) {
        split_df$AVAL[i] <- stats::runif(1, 200, 300)
      } else if (i == 4) {
        split_df$AVAL[i] <- stats::runif(1, 300, 500)
      }
    }
    split_df
  })

  # Add CNSDTDSC column
  adtte_lst <- lapply(adtte_lst, function(split_df) {
    # First create an empty CNSDTDSC variable to populate
    split_df$CNSDTDSC <- NA
    for (i in 1:nrow(split_df)) { # nolint
      if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Last Tumor Assessment") {
        split_df$CNSDTDSC[i] <- "Completion or Discontinuation"
      } else if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Adverse Event") {
        split_df$CNSDTDSC[i] <- "Preferred Term"
      } else if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Alive") {
        split_df$CNSDTDSC[i] <- "Alive During Study"
      } else {
        split_df$CNSDTDSC[i] <- ""
      }
    }
    split_df
  })

  # Take the split df and combine them back together
  adtte <- do.call("rbind", adtte_lst)
  rownames(adtte) <- NULL

  adtte <- rcd_var_relabel(
    adtte,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge ADSL to be able to add TTE date and study day variables
  adtte <- dplyr::inner_join(
    dplyr::select(adtte, -"SITEID", -"ARM"),
    adsl,
    by = c("STUDYID", "USUBJID")
  ) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
      is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
      TRUE ~ TRTEDTM
    ))) %>%
    dplyr::mutate(ADTM = sample(
      seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
      size = 1
    )) %>%
    dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
    dplyr::select(-TRTENDT) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(STUDYID, USUBJID, ADTM)

  adtte <- adtte %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(TTESEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = TTESEQ) %>%
    dplyr::mutate(PARAM = as.factor(PARAM)) %>%
    dplyr::mutate(PARAMCD = as.factor(PARAMCD)) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      PARAMCD,
      ADTM,
      TTESEQ
    )

  mod_before_adtte <- adtte

  # adding adverse event counts and log follow-up time
  adtte <- dplyr::bind_rows(
    adtte,
    data.frame(
      adtte %>%
        dplyr::group_by(USUBJID) %>%
        dplyr::slice_head(n = 1) %>%
        dplyr::mutate(
          PARAMCD = "TNE",
          PARAM = "Total Number of Exacerbations",
          AVAL = stats::rpois(1, 3),
          AVALU = "COUNT",
          lgTMATRSK = log(stats::rexp(1, rate = 3)),
          dplyr::across(
            c("ASEQ", "TTESEQ", "ADY", "ADTM", "EVNTDESC"),
            ~NA
          )
        )
    )
  ) %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      PARAMCD,
      ADTM,
      TTESEQ
    )

  mod_after_adtte <- adtte

  if (length(na_vars) > 0 && na_percentage > 0) {
    adtte <- mutate_na(ds = adtte, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  adtte <- apply_metadata(adtte, "metadata/ADTTE.yml")

  return(adtte)
}

#' Protocol Deviations Analysis Dataset (ADDV)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating random Protocol Deviations Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per each record in the corresponding SDTM domain.
#'
#' Keys: `STUDYID`, `USUBJID`, `ASTDT`, `DVTERM`, `DVSEQ`
#'
#' @inheritParams argument_convention
#' @param max_n_dv (`integer`)\cr Maximum number of deviations per patient. Defaults to 3.
#' @param p_dv (`proportion`)\cr Probability of a patient having protocol deviations.
#' @template param_cached
#' @templateVar data addv
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' addv <- raddv(adsl, seed = 2)
#' addv
raddv <- function(adsl,
                  max_n_dv = 3L,
                  p_dv = 0.15,
                  lookup = NULL,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(
                    "ASTDT" = c(seed = 1234, percentage = 0.1),
                    "DVCAT" = c(seed = 1234, percentage = 0.1)
                  ),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("caddv"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_integer(max_n_dv, len = 1, lower = 1, any.missing = FALSE)
  checkmate::assert_number(p_dv, lower = .Machine$double.xmin, upper = 1)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  if (!is.null(seed)) set.seed(seed)
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  checkmate::assert_data_frame(lookup, null.ok = TRUE)
  lookup_dv <- if (!is.null(lookup)) {
    lookup
  } else {
    tibble::tribble(
      ~DOMAIN, ~DVCAT, ~DVDECOD, ~DVTERM, ~DVREAS, ~DVEPRELI,
      "DV", "MAJOR", "EXCLUSION CRITERIA", "Received prior prohibited therapy or medication", "", "N",
      "DV", "MAJOR", "EXCLUSION CRITERIA", "Active or untreated or other excluded cns metastases", "", "N",
      "DV", "MAJOR", "EXCLUSION CRITERIA", "History of other malignancies within the last 5 years", "", "N",
      "DV", "MAJOR", "EXCLUSION CRITERIA", "Uncontrolled concurrent condition", "", "N",
      "DV", "MAJOR", "EXCLUSION CRITERIA", "Other exclusion criteria", "", "N",
      "DV", "MAJOR", "EXCLUSION CRITERIA", "Pregnancy criteria", "", "N",
      "DV", "MAJOR", "INCLUSION CRITERIA", "Does not meet prior therapy requirements", "", "N",
      "DV", "MAJOR", "INCLUSION CRITERIA", "Inclusion lab values outside allowed limits", "", "N",
      "DV", "MAJOR", "INCLUSION CRITERIA", "No signed ICF at study entry", "", "N",
      "DV", "MAJOR", "INCLUSION CRITERIA", "Inclusion-related test not done/out of window", "", "N",
      "DV", "MAJOR", "INCLUSION CRITERIA", "Ineligible cancer type or current cancer stage", "", "N",
      "DV", "MAJOR", "MEDICATION", "Dose missed or significantly out of window",
      "Site action due to epidemic/pandemic", "Y",
      "DV", "MAJOR", "MEDICATION", "Received incorrect study medication", "", "N",
      "DV", "MAJOR", "MEDICATION", "Received prohibited concomitant medication", "", "N",
      "DV", "MAJOR", "MEDICATION", "Discontinued study drug for unspecified reason", "", "N",
      "DV", "MAJOR", "MEDICATION", "Significant deviation from planned dose",
      "Site action due to epidemic/pandemic", "Y",
      "DV", "MAJOR", "PROCEDURAL", "Missed assessment affecting safety/study outcomes", "", "N",
      "DV", "MAJOR", "PROCEDURAL", "Eligibility-related test not done/out of window", "", "N",
      "DV", "MAJOR", "PROCEDURAL", "Failure to sign updated ICF within two visits",
      "Site action due to epidemic/pandemic", "Y",
      "DV", "MAJOR", "PROCEDURAL", "Omission of complete lab panel required by protocol", "", "N",
      "DV", "MAJOR", "PROCEDURAL", "Omission of screening tumor assessment", "", "N",
      "DV", "MAJOR", "PROCEDURAL", "Missed 2 or more efficacy assessments",
      "Site action due to epidemic/pandemic", "Y"
    )
  }


  addv <- Map(
    function(id, sid) {
      n_dv <- stats::rbinom(1, 1, p_dv) * sample(c(1, seq_len(max_n_dv)), 1)
      i <- sample(seq_len(nrow(lookup_dv)), n_dv, TRUE)
      dplyr::mutate(
        lookup_dv[i, ],
        USUBJID = id,
        STUDYID = sid
      )
    },
    adsl$USUBJID,
    adsl$STUDYID
  ) %>%
    Reduce(rbind, .) %>%
    dplyr::mutate(DVSCAT = DVCAT)

  addv <- rcd_var_relabel(
    addv,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge ADSL to be able to add deviation date and study day variables
  addv <- dplyr::inner_join(addv, adsl, by = c("STUDYID", "USUBJID")) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
      is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
      TRUE ~ TRTEDTM
    ))) %>%
    dplyr::mutate(ASTDTM = sample(
      seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
      size = 1
    )) %>%
    dplyr::mutate(ASTDT = lubridate::date(ASTDTM)) %>%
    dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>%
    dplyr::select(-TRTENDT, -ASTDTM) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(STUDYID, USUBJID, ASTDT, DVTERM)

  addv <- addv %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(DVSEQ = seq_len(dplyr::n())) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(STUDYID, USUBJID, ASTDT, DVTERM, DVSEQ)

  addv <- addv %>%
    dplyr::mutate(AEPRELFL = ifelse(DVEPRELI == "Y", DVEPRELI, ""))

  if (length(na_vars) > 0 && na_percentage > 0) {
    addv <- mutate_na(ds = addv, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  addv <- apply_metadata(addv, "metadata/ADDV.yml")

  return(addv)
}

#' Tumor Response Analysis Dataset (ADRS)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Tumor Response Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details
#' One record per subject per parameter per analysis visit per analysis date.
#' SDTM variables are populated on new records coming from other single records.
#' Otherwise, SDTM variables are left blank.
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADT`, `RSSEQ`
#'
#' @inheritParams argument_convention
#' @param avalc (`character vector`)\cr Analysis value categories.
#' @template param_cached
#' @templateVar data adrs
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adrs <- radrs(adsl, seed = 2)
#' adrs
radrs <- function(adsl,
                  avalc = NULL,
                  lookup = NULL,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(AVISIT = c(NA, 0.1), AVAL = c(1234, 0.1), AVALC = c(1234, 0.1)),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadrs"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_vector(avalc, null.ok = TRUE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  param_codes <- if (!is.null(avalc)) {
    avalc
  } else {
    stats::setNames(1:5, c("CR", "PR", "SD", "PD", "NE"))
  }

  checkmate::assert_data_frame(lookup, null.ok = TRUE)
  lookup_ars <- if (!is.null(lookup)) {
    lookup
  } else {
    expand.grid(
      ARM = c("A: Drug X", "B: Placebo", "C: Combination"),
      AVALC = names(param_codes)
    ) %>% dplyr::mutate(
      AVAL = param_codes[AVALC],
      p_scr = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)),
      p_bsl = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)),
      p_cycle = c(c(.4, .3, .5), c(.35, .25, .25), c(.1, .2, .08), c(.14, 0.15, 0.15), c(.01, 0.1, 0.02)),
      p_eoi = c(c(.4, .3, .5), c(.35, .25, .25), c(.1, .2, .08), c(.14, 0.15, 0.15), c(.01, 0.1, 0.02)),
      p_fu = c(c(.3, .2, .4), c(.2, .1, .3), c(.2, .2, .2), c(.3, .5, 0.1), rep(0, 3))
    )
  }

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  adrs <- split(adsl, adsl$USUBJID) %>%
    lapply(function(pinfo) {
      probs <- dplyr::filter(lookup_ars, ARM == as.character(pinfo$ACTARM))

      # screening
      rsp_screen <- sample(probs$AVALC, 1, prob = probs$p_scr) %>% as.character()

      # baseline
      rsp_bsl <- sample(probs$AVALC, 1, prob = probs$p_bsl) %>% as.character()

      # cycle
      rsp_c2d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character()
      rsp_c4d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character()

      # end of induction
      rsp_eoi <- sample(probs$AVALC, 1, prob = probs$p_eoi) %>% as.character()

      # follow up
      rsp_fu <- sample(probs$AVALC, 1, prob = probs$p_fu) %>% as.character()

      best_rsp <- min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)])
      best_rsp_i <- which.min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)])

      avisit <- c("SCREENING", "BASELINE", "CYCLE 2 DAY 1", "CYCLE 4 DAY 1", "END OF INDUCTION", "FOLLOW UP")

      # meaningful date information
      trtstdt <- lubridate::date(pinfo$TRTSDTM)
      trtendt <- lubridate::date(dplyr::if_else(
        !is.na(pinfo$TRTEDTM), pinfo$TRTEDTM,
        lubridate::floor_date(trtstdt + study_duration_secs, unit = "day")
      ))
      scr_date <- trtstdt - lubridate::days(100)
      bs_date <- trtstdt
      flu_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1)
      eoi_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1)
      c2d1_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1)
      c4d1_date <- min(lubridate::date(c2d1_date + lubridate::days(60)), trtendt)

      tibble::tibble(
        STUDYID = pinfo$STUDYID,
        SITEID = pinfo$SITEID,
        USUBJID = pinfo$USUBJID,
        PARAMCD = as.factor(c(rep("OVRINV", 6), "BESRSPI", "INVET")),
        PARAM = as.factor(dplyr::recode(
          PARAMCD,
          OVRINV = "Overall Response by Investigator - by visit",
          OVRSPI = "Best Overall Response by Investigator (no confirmation required)",
          BESRSPI = "Best Confirmed Overall Response by Investigator",
          INVET = "Investigator End Of Induction Response"
        )),
        AVALC = c(
          rsp_screen, rsp_bsl, rsp_c2d1, rsp_c4d1, rsp_eoi, rsp_fu,
          names(param_codes)[best_rsp],
          rsp_eoi
        ),
        AVAL = param_codes[AVALC],
        AVISIT = factor(c(avisit, avisit[best_rsp_i], avisit[5]), levels = avisit)
      ) %>%
        merge(
          tibble::tibble(
            AVISIT = avisit,
            ADTM = c(scr_date, bs_date, c2d1_date, c4d1_date, eoi_date, flu_date),
            AVISITN = c(-1, 0, 2, 4, 999, 999),
            TRTSDTM = pinfo$TRTSDTM
          ) %>%
            dplyr::mutate(
              ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))
            ) %>%
            dplyr::select(-"TRTSDTM"),
          by = "AVISIT"
        )
    }) %>%
    Reduce(rbind, .) %>%
    dplyr::mutate(AVALC = factor(AVALC, levels = names(param_codes))) %>%
    rcd_var_relabel(
      STUDYID = "Study Identifier",
      USUBJID = "Unique Subject Identifier"
    )

  adrs <- rcd_var_relabel(
    adrs,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge ADSL to be able to add RS date and study day variables


  adrs <- dplyr::inner_join(
    dplyr::select(adrs, -"SITEID"),
    adsl,
    by = c("STUDYID", "USUBJID")
  )

  adrs <- adrs %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(RSSEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = RSSEQ) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      PARAMCD,
      AVISITN,
      ADTM,
      RSSEQ
    )

  if (length(na_vars) > 0 && na_percentage > 0) {
    adrs <- mutate_na(ds = adrs, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  adrs <- apply_metadata(adrs, "metadata/ADRS.yml")

  return(adrs)
}

#' Previous and Concomitant Medications Analysis Dataset (ADCM)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating random Concomitant Medication Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per each record in the corresponding SDTM domain.
#'
#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `CMSEQ`
#'
#' @inheritParams argument_convention
#' @param max_n_cms (`integer`)\cr Maximum number of concomitant medications per patient. Defaults to 10.
#' @param who_coding (`flag`)\cr Whether WHO coding (with multiple paths per medication) should be used.
#' @template param_cached
#' @templateVar data adcm
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adcm <- radcm(adsl, seed = 2)
#' adcm
#'
#' adcm_who <- radcm(adsl, seed = 2, who_coding = TRUE)
#' adcm_who
radcm <- function(adsl,
                  max_n_cms = 10L,
                  lookup = NULL,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(CMCLAS = c(NA, 0.1), CMDECOD = c(1234, 0.1), ATIREL = c(1234, 0.1)),
                  who_coding = FALSE,
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadcm"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_integer(max_n_cms, len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)
  checkmate::assert_flag(who_coding)

  checkmate::assert_data_frame(lookup, null.ok = TRUE)
  lookup_cm <- if (!is.null(lookup)) {
    lookup
  } else {
    tibble::tribble(
      ~CMCLAS, ~CMDECOD, ~ATIREL,
      "medcl A", "medname A_1/3", "PRIOR",
      "medcl A", "medname A_2/3", "CONCOMITANT",
      "medcl A", "medname A_3/3", "CONCOMITANT",
      "medcl B", "medname B_1/4", "CONCOMITANT",
      "medcl B", "medname B_2/4", "PRIOR",
      "medcl B", "medname B_3/4", "PRIOR",
      "medcl B", "medname B_4/4", "CONCOMITANT",
      "medcl C", "medname C_1/2", "CONCOMITANT",
      "medcl C", "medname C_2/2", "CONCOMITANT"
    )
  }

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  adcm <- Map(function(id, sid) {
    n_cms <- sample(c(0, seq_len(max_n_cms)), 1)
    i <- sample(seq_len(nrow(lookup_cm)), n_cms, TRUE)
    dplyr::mutate(
      lookup_cm[i, ],
      USUBJID = id,
      STUDYID = sid
    )
  }, adsl$USUBJID, adsl$STUDYID) %>%
    Reduce(rbind, .) %>%
    `[`(c(4, 5, 1, 2, 3)) %>%
    dplyr::mutate(CMCAT = CMCLAS)

  adcm <- rcd_var_relabel(
    adcm,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge ADSL to be able to add CM date and study day variables
  adcm <- dplyr::inner_join(
    adcm,
    adsl,
    by = c("STUDYID", "USUBJID")
  ) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
      is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
      TRUE ~ TRTEDTM
    ))) %>%
    dplyr::mutate(ASTDTM = sample(
      seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
      size = 1
    )) %>%
    dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>%
    # add 1 to end of range incase both values passed to sample() are the same
    dplyr::mutate(AENDTM = sample(
      seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"),
      size = 1
    )) %>%
    dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>%
    dplyr::select(-TRTENDT) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(STUDYID, USUBJID, ASTDTM)

  adcm <- adcm %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(CMSEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = CMSEQ) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(STUDYID, USUBJID, ASTDTM, CMSEQ) %>%
    dplyr::mutate(
      ATC1 = paste("ATCCLAS1", substr(CMDECOD, 9, 9)),
      ATC2 = paste("ATCCLAS2", substr(CMDECOD, 9, 9)),
      ATC3 = paste("ATCCLAS3", substr(CMDECOD, 9, 9)),
      ATC4 = paste("ATCCLAS4", substr(CMDECOD, 9, 9))
    ) %>%
    dplyr::mutate(CMINDC = sample(c(
      "Nausea", "Hypertension", "Urticaria", "Fever",
      "Asthma", "Infection", "Diabete", "Diarrhea", "Pneumonia"
    ), dplyr::n(), replace = TRUE)) %>%
    dplyr::mutate(CMDOSE = sample(1:99, dplyr::n(), replace = TRUE)) %>%
    dplyr::mutate(CMTRT = substr(CMDECOD, 9, 13)) %>%
    dplyr::mutate(CMDOSU = sample(c(
      "ug/mL", "ug/kg/day", "%", "uL", "DROP",
      "umol/L", "mg", "mg/breath", "ug"
    ), dplyr::n(), replace = TRUE)) %>%
    dplyr::mutate(CMROUTE = sample(c(
      "INTRAVENOUS", "ORAL", "NASAL",
      "INTRAMUSCULAR", "SUBCUTANEOUS", "INHALED", "RECTAL", "UNKNOWN"
    ), dplyr::n(), replace = TRUE)) %>%
    dplyr::mutate(CMDOSFRQ = sample(c(
      "Q4W", "QN", "Q4H", "UNKNOWN", "TWICE",
      "Q4H", "QD", "TID", "4 TIMES PER MONTH"
    ), dplyr::n(), replace = TRUE)) %>%
    dplyr::mutate(
      # use 1 year as reference time point
      CMSTRTPT = dplyr::case_when(
        ASTDY <= 365 ~ "BEFORE",
        ASTDY > 365 ~ "AFTER",
        is.na(ASTDY) ~ "U"
      ),
      CMENRTPT = dplyr::case_when(
        EOSSTT %in% c("COMPLETED", "DISCONTINUED") ~ "BEFORE",
        EOSSTT == "ONGOING" ~ "ONGOING",
        is.na(EOSSTT) ~ "U"
      ),
      ADURN = as.numeric(difftime(ASTDTM, AENDTM, units = "days")),
      ADURU = "days"
    )


  # Optional WHO coding, which adds more `ATC` paths for randomly selected `CMDECOD`.
  if (who_coding) {
    n_cmdecod_path2 <- ceiling(nrow(lookup_cm) / 2)
    cmdecod_path2 <- sample(lookup_cm$CMDECOD, n_cmdecod_path2)
    adcm_path2 <- adcm %>%
      dplyr::filter(CMDECOD %in% cmdecod_path2) %>%
      dplyr::mutate(
        ATC1 = paste(ATC1, "p2"),
        ATC2 = paste(ATC2, "p2"),
        ATC3 = paste(ATC3, "p2"),
        ATC4 = paste(ATC4, "p2")
      )

    n_cmdecod_path3 <- ceiling(length(cmdecod_path2) / 2)
    cmdecod_path3 <- sample(cmdecod_path2, n_cmdecod_path3)
    adcm_path3 <- adcm %>%
      dplyr::filter(CMDECOD %in% cmdecod_path3) %>%
      dplyr::mutate(
        ATC1 = paste(ATC1, "p3"),
        ATC2 = paste(ATC2, "p3"),
        ATC3 = paste(ATC3, "p3"),
        ATC4 = paste(ATC4, "p3")
      )

    adcm <- dplyr::bind_rows(
      adcm,
      adcm_path2,
      adcm_path3
    )
  }

  adcm <- adcm %>%
    dplyr::mutate(
      ATC1CD = ATC1,
      ATC2CD = ATC2,
      ATC3CD = ATC3,
      ATC4CD = ATC4
    )

  if (length(na_vars) > 0 && na_percentage > 0) {
    adcm <- mutate_na(ds = adcm, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  adcm <- apply_metadata(adcm, "metadata/ADCM.yml")

  return(adcm)
}

#' Questionnaires Analysis Dataset (ADQS)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Questionnaires Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per subject per parameter per analysis visit per analysis date.
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`
#'
#' @inheritParams argument_convention
#' @template param_cached
#' @templateVar data adqs
#'
#' @return `data.frame`
#' @export
#'
#' @author npaszty
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adqs <- radqs(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2)
#' adqs
#'
#' adqs <- radqs(adsl, visit_format = "CYCLE", n_assessments = 3L, seed = 2)
#' adqs
radqs <- function(adsl,
                  param = c(
                    "BFI All Questions",
                    "Fatigue Interference",
                    "Function/Well-Being (GF1,GF3,GF7)",
                    "Treatment Side Effects (GP2,C5,GP5)",
                    "FKSI-19 All Questions"
                  ),
                  paramcd = c("BFIALL", "FATIGI", "FKSI-FWB", "FKSI-TSE", "FKSIALL"),
                  visit_format = "WEEK",
                  n_assessments = 5L,
                  n_days = 5L,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(
                    LOQFL = c(NA, 0.1), ABLFL2 = c(1234, 0.1), ABLFL = c(1235, 0.1),
                    CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1)
                  ),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadqs"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
  checkmate::assert_string(visit_format)
  checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
  checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  # validate and initialize param vectors
  param_init_list <- relvar_init(param, paramcd)

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  adqs <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = adsl$USUBJID,
    PARAM = param_init_list$relvar1,
    AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days),
    stringsAsFactors = FALSE
  )

  adqs <- dplyr::mutate(
    adqs,
    AVISITN = dplyr::case_when(
      AVISIT == "SCREENING" ~ -1,
      AVISIT == "BASELINE" ~ 0,
      (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
      TRUE ~ NA_real_
    )
  )

  # assign related variable values: PARAMxPARAMCD are related
  adqs <- adqs %>% rel_var(
    var_name = "PARAMCD",
    related_var = "PARAM",
    var_values = param_init_list$relvar2
  )

  adqs$AVAL <- stats::rnorm(nrow(adqs), mean = 50, sd = 8) + adqs$AVISITN * stats::rnorm(nrow(adqs), mean = 5, sd = 2)

  # order to prepare for change from screening and baseline values
  adqs <- adqs[order(adqs$STUDYID, adqs$USUBJID, adqs$PARAMCD, adqs$AVISITN), ]

  adqs <- Reduce(
    rbind,
    lapply(
      split(adqs, adqs$USUBJID),
      function(x) {
        x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
        x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "")
        x$ABLFL <- ifelse(
          toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
          "Y",
          ifelse(
            toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1",
            "Y",
            ""
          )
        )
        x$LOQFL <- ifelse(x$AVAL < 32, "Y", "N")
        x
      }
    )
  )

  adqs$BASE2 <- retain(adqs, adqs$AVAL, adqs$ABLFL2 == "Y")
  adqs$BASE <- ifelse(adqs$ABLFL2 != "Y", retain(adqs, adqs$AVAL, adqs$ABLFL == "Y"), NA)

  adqs <- adqs %>%
    dplyr::mutate(CHG2 = AVAL - BASE2) %>%
    dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>%
    dplyr::mutate(CHG = AVAL - BASE) %>%
    dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>%
    rcd_var_relabel(
      STUDYID = attr(adsl$STUDYID, "label"),
      USUBJID = attr(adsl$USUBJID, "label")
    )

  adqs <- rcd_var_relabel(
    adqs,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge ADSL to be able to add QS date and study day variables
  adqs <- dplyr::inner_join(
    adqs,
    adsl,
    by = c("STUDYID", "USUBJID")
  ) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
      is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
      TRUE ~ TRTEDTM
    ))) %>%
    ungroup()

  adqs <- adqs %>%
    group_by(USUBJID) %>%
    arrange(USUBJID, AVISITN) %>%
    dplyr::mutate(ADTM = rep(
      sort(sample(
        seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
        size = nlevels(AVISIT)
      )),
      each = n() / nlevels(AVISIT)
    )) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
    dplyr::select(-TRTENDT) %>%
    dplyr::arrange(STUDYID, USUBJID, ADTM)

  adqs <- adqs %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(QSSEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = QSSEQ) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      PARAMCD,
      AVISITN,
      ADTM,
      QSSEQ
    )

  if (length(na_vars) > 0 && na_percentage > 0) {
    adqs <- mutate_na(ds = adqs, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  adqs <- apply_metadata(adqs, "metadata/ADQS.yml")

  return(adqs)
}

#' Vital Signs Analysis Dataset (ADVS)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Vital Signs Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per subject per parameter per analysis visit per analysis date.
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `VSSEQ`, `ASPID`
#'
#' @inheritParams argument_convention
#' @template param_cached
#' @templateVar data advs
#'
#' @return `data.frame`
#' @export
#'
#' @author npaszty
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' advs <- radvs(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2)
#' advs
#'
#' advs <- radvs(adsl, visit_format = "CYCLE", n_assessments = 3L, seed = 2)
#' advs
radvs <- function(adsl,
                  param = c(
                    "Diastolic Blood Pressure",
                    "Pulse Rate",
                    "Respiratory Rate",
                    "Systolic Blood Pressure",
                    "Temperature", "Weight"
                  ),
                  paramcd = c("DIABP", "PULSE", "RESP", "SYSBP", "TEMP", "WEIGHT"),
                  paramu = c("Pa", "beats/min", "breaths/min", "Pa", "C", "Kg"),
                  visit_format = "WEEK",
                  n_assessments = 5L,
                  n_days = 5L,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(
                    CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1),
                    AVAL = c(123, 0.1), AVALU = c(123, 0.1)
                  ),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadvs"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE)
  checkmate::assert_string(visit_format)
  checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
  checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  # validate and initialize param vectors
  param_init_list <- relvar_init(param, paramcd)
  unit_init_list <- relvar_init(param, paramu)

  if (!is.null(seed)) {
    set.seed(seed)
  }
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  advs <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = adsl$USUBJID,
    PARAM = as.factor(param_init_list$relvar1),
    AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments),
    stringsAsFactors = FALSE
  )

  advs <- dplyr::mutate(
    advs,
    AVISITN = dplyr::case_when(
      AVISIT == "SCREENING" ~ -1,
      AVISIT == "BASELINE" ~ 0,
      (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
      TRUE ~ NA_real_
    )
  )

  advs$VSCAT <- "VITAL SIGNS"

  # assign related variable values: PARAMxPARAMCD are related
  advs <- advs %>% rel_var(
    var_name = "PARAMCD",
    related_var = "PARAM",
    var_values = param_init_list$relvar2
  )

  # assign related variable values: PARAMxAVALU are related
  advs <- advs %>% rel_var(
    var_name = "AVALU",
    related_var = "PARAM",
    var_values = unit_init_list$relvar2
  )

  advs <- advs %>%
    dplyr::mutate(VSTESTCD = PARAMCD) %>%
    dplyr::mutate(VSTEST = PARAM)

  advs <- advs %>% dplyr::mutate(AVAL = dplyr::case_when(
    PARAMCD == paramcd[1] ~ stats::rnorm(nrow(advs), mean = 100, sd = 20),
    PARAMCD == paramcd[2] ~ stats::rnorm(nrow(advs), mean = 80, sd = 15),
    PARAMCD == paramcd[3] ~ stats::rnorm(nrow(advs), mean = 16, sd = 5),
    PARAMCD == paramcd[4] ~ stats::rnorm(nrow(advs), mean = 150, sd = 30),
    PARAMCD == paramcd[5] ~ stats::rnorm(nrow(advs), mean = 36.65, sd = 1),
    PARAMCD == paramcd[6] ~ stats::rnorm(nrow(advs), mean = 70, sd = 20)
  ))

  # order to prepare for change from screening and baseline values
  advs <- advs[order(advs$STUDYID, advs$USUBJID, advs$PARAMCD, advs$AVISITN), ]

  advs <- Reduce(rbind, lapply(split(advs, advs$USUBJID), function(x) {
    x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
    x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "")
    x$ABLFL <- ifelse(
      toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
      "Y",
      ifelse(
        toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1",
        "Y",
        ""
      )
    )
    x
  }))

  advs$BASE2 <- retain(advs, advs$AVAL, advs$ABLFL2 == "Y")
  advs$BASE <- ifelse(advs$ABLFL2 != "Y", retain(advs, advs$AVAL, advs$ABLFL == "Y"), NA)

  advs <- advs %>%
    dplyr::mutate(CHG2 = AVAL - BASE2) %>%
    dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>%
    dplyr::mutate(CHG = AVAL - BASE) %>%
    dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>%
    dplyr::mutate(ANRLO = dplyr::case_when(
      PARAMCD == "DIABP" ~ 80,
      PARAMCD == "PULSE" ~ 60,
      PARAMCD == "RESP" ~ 12,
      PARAMCD == "SYSBP" ~ 120,
      PARAMCD == "TEMP" ~ 36.1,
      PARAMCD == "WEIGHT" ~ 40
    )) %>%
    dplyr::mutate(ANRHI = dplyr::case_when(
      PARAMCD == "DIABP" ~ 120,
      PARAMCD == "PULSE" ~ 100,
      PARAMCD == "RESP" ~ 20,
      PARAMCD == "SYSBP" ~ 180,
      PARAMCD == "TEMP" ~ 37.2,
      PARAMCD == "WEIGHT" ~ 100
    )) %>%
    dplyr::mutate(ANRIND = factor(dplyr::case_when(
      AVAL < ANRLO ~ "LOW",
      AVAL > ANRHI ~ "HIGH",
      TRUE ~ "NORMAL"
    ))) %>%
    dplyr::mutate(VSSTRESC = dplyr::case_when(
      PARAMCD == "DIABP" ~ "<80",
      PARAMCD == "PULSE" ~ "<60",
      PARAMCD == "RESP" ~ ">20",
      PARAMCD == "SYSBP" ~ ">180",
      PARAMCD == "TEMP" ~ "<36.1",
      PARAMCD == "WEIGHT" ~ "<40"
    )) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(LOQFL = factor(
      ifelse(eval(parse(text = paste(AVAL, VSSTRESC))), "Y", "N")
    )) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(BASETYPE = "LAST") %>%
    dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
    dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(ATPTN = 1) %>%
    dplyr::mutate(DTYPE = NA) %>%
    rcd_var_relabel(
      USUBJID = attr(adsl$USUBJID, "label"),
      STUDYID = attr(adsl$STUDYID, "label")
    )

  advs <- rcd_var_relabel(
    advs,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge ADSL to be able to add LB date and study day variables
  advs <- dplyr::inner_join(
    advs,
    adsl,
    by = c("STUDYID", "USUBJID")
  ) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
      is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
      TRUE ~ TRTEDTM
    ))) %>%
    dplyr::ungroup()

  advs <- advs %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::arrange(USUBJID, AVISITN) %>%
    dplyr::mutate(ADTM = rep(
      sort(sample(
        seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
        size = nlevels(AVISIT)
      )),
      each = n() / nlevels(AVISIT)
    )) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
    dplyr::select(-TRTENDT) %>%
    dplyr::arrange(STUDYID, USUBJID, ADTM)

  advs <- advs %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when(
    !AVISIT %in% c("SCREENING", "BASELINE") ~ "Y",
    TRUE ~ ""
  )))

  advs <- advs %>%
    dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(VSSEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = VSSEQ) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      PARAMCD,
      BASETYPE,
      AVISITN,
      ATPTN,
      DTYPE,
      ADTM,
      VSSEQ,
      ASPID
    )

  if (length(na_vars) > 0 && na_percentage > 0) {
    advs <- mutate_na(ds = advs, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  advs <- apply_metadata(advs, "metadata/ADVS.yml")

  return(advs)
}

#' Adverse Event Analysis Dataset (ADAE)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating random Adverse Event Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per each record in the corresponding SDTM domain.
#'
#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `AETERM`, `AESEQ`
#'
#' @inheritParams argument_convention
#' @param max_n_aes (`integer`)\cr Maximum number of AEs per patient. Defaults to 10.
#' @template param_cached
#' @templateVar data adae
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1)
#'
#' adae <- radae(adsl, seed = 2)
#' adae
#'
#' # Add metadata.
#' aag <- utils::read.table(
#'   sep = ",", header = TRUE,
#'   text = paste(
#'     "NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE",
#'     "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd D.2.1.5.3,",
#'     "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd A.1.1.1.1,",
#'     "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd C.1.1.1.3,BROAD",
#'     "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd B.2.2.3.1,BROAD",
#'     "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Y.9.9.9.9,NARROW",
#'     "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Z.9.9.9.9,NARROW",
#'     sep = "\n"
#'   ), stringsAsFactors = FALSE
#' )
#'
#' adae <- radae(adsl, lookup_aag = aag)
#'
#' with(
#'   adae,
#'   cbind(
#'     table(AEDECOD, SMQ01NAM),
#'     table(AEDECOD, CQ01NAM)
#'   )
#' )
radae <- function(adsl,
                  max_n_aes = 10L,
                  lookup = NULL,
                  lookup_aag = NULL,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(
                    AEBODSYS = c(NA, 0.1),
                    AEDECOD = c(1234, 0.1),
                    AETOXGR = c(1234, 0.1)
                  ),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadae"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_integer(max_n_aes, len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  # check lookup parameters
  checkmate::assert_data_frame(lookup, null.ok = TRUE)
  lookup_ae <- if (!is.null(lookup)) {
    lookup
  } else {
    tibble::tribble(
      ~AEBODSYS, ~AELLT, ~AEDECOD, ~AEHLT, ~AEHLGT, ~AETOXGR, ~AESOC, ~AESER, ~AEREL,
      "cl A.1", "llt A.1.1.1.1", "dcd A.1.1.1.1", "hlt A.1.1.1", "hlgt A.1.1", "1", "cl A", "N", "N",
      "cl A.1", "llt A.1.1.1.2", "dcd A.1.1.1.2", "hlt A.1.1.1", "hlgt A.1.1", "2", "cl A", "Y", "N",
      "cl B.1", "llt B.1.1.1.1", "dcd B.1.1.1.1", "hlt B.1.1.1", "hlgt B.1.1", "5", "cl B", "Y", "Y",
      "cl B.2", "llt B.2.1.2.1", "dcd B.2.1.2.1", "hlt B.2.1.2", "hlgt B.2.1", "3", "cl B", "N", "N",
      "cl B.2", "llt B.2.2.3.1", "dcd B.2.2.3.1", "hlt B.2.2.3", "hlgt B.2.2", "1", "cl B", "Y", "N",
      "cl C.1", "llt C.1.1.1.3", "dcd C.1.1.1.3", "hlt C.1.1.1", "hlgt C.1.1", "4", "cl C", "N", "Y",
      "cl C.2", "llt C.2.1.2.1", "dcd C.2.1.2.1", "hlt C.2.1.2", "hlgt C.2.1", "2", "cl C", "N", "Y",
      "cl D.1", "llt D.1.1.1.1", "dcd D.1.1.1.1", "hlt D.1.1.1", "hlgt D.1.1", "5", "cl D", "Y", "Y",
      "cl D.1", "llt D.1.1.4.2", "dcd D.1.1.4.2", "hlt D.1.1.4", "hlgt D.1.1", "3", "cl D", "N", "N",
      "cl D.2", "llt D.2.1.5.3", "dcd D.2.1.5.3", "hlt D.2.1.5", "hlgt D.2.1", "1", "cl D", "N", "Y"
    )
  }

  checkmate::assert_data_frame(lookup_aag, null.ok = TRUE)
  aag <- if (!is.null(lookup_aag)) {
    lookup_aag
  } else {
    aag <- utils::read.table(
      sep = ",", header = TRUE,
      text = paste(
        "NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE",
        "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd D.2.1.5.3,",
        "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd A.1.1.1.1,",
        "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd C.1.1.1.3,BROAD",
        "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd B.2.2.3.1,BROAD",
        "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Y.9.9.9.9,NARROW",
        "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Z.9.9.9.9,NARROW",
        sep = "\n"
      ), stringsAsFactors = FALSE
    )
  }

  if (!is.null(seed)) set.seed(seed)
  study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))

  adae <- Map(
    function(id, sid) {
      n_aes <- sample(c(0, seq_len(max_n_aes)), 1)
      i <- sample(seq_len(nrow(lookup_ae)), n_aes, TRUE)
      dplyr::mutate(
        lookup_ae[i, ],
        USUBJID = id,
        STUDYID = sid
      )
    },
    adsl$USUBJID,
    adsl$STUDYID
  ) %>%
    Reduce(rbind, .) %>%
    `[`(c(10, 11, 1, 2, 3, 4, 5, 6, 7, 8, 9)) %>%
    dplyr::mutate(AETERM = gsub("dcd", "trm", AEDECOD)) %>%
    dplyr::mutate(AESEV = dplyr::case_when(
      AETOXGR == 1 ~ "MILD",
      AETOXGR %in% c(2, 3) ~ "MODERATE",
      AETOXGR %in% c(4, 5) ~ "SEVERE"
    ))

  adae <- rcd_var_relabel(
    adae,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # merge adsl to be able to add AE date and study day variables
  adae <- dplyr::inner_join(adae, adsl, by = c("STUDYID", "USUBJID")) %>%
    dplyr::rowwise() %>%
    dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
      is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
      TRUE ~ TRTEDTM
    ))) %>%
    dplyr::mutate(ASTDTM = sample(
      seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
      size = 1
    )) %>%
    dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>%
    # add 1 to end of range incase both values passed to sample() are the same
    dplyr::mutate(AENDTM = sample(
      seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"),
      size = 1
    )) %>%
    dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>%
    dplyr::mutate(LDOSEDTM = dplyr::case_when(
      TRTSDTM < ASTDTM ~ lubridate::as_datetime(stats::runif(1, TRTSDTM, ASTDTM)),
      TRUE ~ ASTDTM
    )) %>%
    dplyr::mutate(LDRELTM = as.numeric(difftime(ASTDTM, LDOSEDTM, units = "mins"))) %>%
    dplyr::select(-TRTENDT) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(STUDYID, USUBJID, ASTDTM, AETERM)

  adae <- adae %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(AESEQ = seq_len(dplyr::n())) %>%
    dplyr::mutate(ASEQ = AESEQ) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(
      STUDYID,
      USUBJID,
      ASTDTM,
      AETERM,
      AESEQ
    )

  outcomes <- c(
    "UNKNOWN",
    "NOT RECOVERED/NOT RESOLVED",
    "RECOVERED/RESOLVED WITH SEQUELAE",
    "RECOVERING/RESOLVING",
    "RECOVERED/RESOLVED"
  )

  actions <- c(
    "DOSE RATE REDUCED",
    "UNKNOWN",
    "NOT APPLICABLE",
    "DRUG INTERRUPTED",
    "DRUG WITHDRAWN",
    "DOSE INCREASED",
    "DOSE NOT CHANGED",
    "DOSE REDUCED",
    "NOT EVALUABLE"
  )

  adae <- adae %>%
    dplyr::mutate(AEOUT = factor(ifelse(
      AETOXGR == "5",
      "FATAL",
      as.character(sample_fct(outcomes, nrow(adae), prob = c(0.1, 0.2, 0.1, 0.3, 0.3)))
    ))) %>%
    dplyr::mutate(AEACN = factor(ifelse(
      AETOXGR == "5",
      "NOT EVALUABLE",
      as.character(sample_fct(actions, nrow(adae), prob = c(0.05, 0.05, 0.05, 0.01, 0.05, 0.1, 0.45, 0.1, 0.05)))
    ))) %>%
    dplyr::mutate(AESDTH = dplyr::case_when(
      AEOUT == "FATAL" ~ "Y",
      TRUE ~ "N"
    )) %>%
    dplyr::mutate(TRTEMFL = ifelse(ASTDTM >= TRTSDTM, "Y", "")) %>%
    dplyr::mutate(AECONTRT = sample(c("Y", "N"), prob = c(0.4, 0.6), size = dplyr::n(), replace = TRUE)) %>%
    dplyr::mutate(
      ANL01FL = ifelse(TRTEMFL == "Y" & ASTDTM <= TRTEDTM + lubridate::month(1), "Y", "")
    ) %>%
    dplyr::mutate(ANL01FL = ifelse(is.na(ANL01FL), "", ANL01FL))

  adae <- adae %>%
    dplyr::mutate(AERELNST = sample(c("Y", "N"), prob = c(0.4, 0.6), size = dplyr::n(), replace = TRUE)) %>%
    dplyr::mutate(AEACNOTH = sample(
      x = c("MEDICATION", "PROCEDURE/SURGERY", "SUBJECT DISCONTINUED FROM STUDY", "NONE"),
      prob = c(0.2, 0.4, 0.2, 0.2),
      size = dplyr::n(),
      replace = TRUE
    ))

  # Split metadata for AEs of special interest (AESI).
  l_aag <- split(aag, interaction(aag$NAMVAR, aag$SRCVAR, aag$GRPTYPE, drop = TRUE))

  # Create AESI flags
  l_aesi <- lapply(l_aag, function(d_adag, d_adae) {
    names(d_adag)[names(d_adag) == "REFTERM"] <- d_adag$SRCVAR[1]
    names(d_adag)[names(d_adag) == "REFNAME"] <- d_adag$NAMVAR[1]

    if (d_adag$GRPTYPE[1] == "CUSTOM") {
      d_adag <- d_adag[-which(names(d_adag) == "SCOPE")]
    } else if (d_adag$GRPTYPE[1] == "SMQ") {
      names(d_adag)[names(d_adag) == "SCOPE"] <- paste0(substr(d_adag$NAMVAR[1], 1, 5), "SC")
    }

    d_adag <- d_adag[-which(names(d_adag) %in% c("NAMVAR", "SRCVAR", "GRPTYPE"))]
    d_new <- dplyr::left_join(x = d_adae, y = d_adag, by = intersect(names(d_adae), names(d_adag)))
    d_new[, dplyr::setdiff(names(d_new), names(d_adae)), drop = FALSE]
  }, adae)

  adae <- dplyr::bind_cols(adae, l_aesi)

  adae <- dplyr::mutate(adae, AERELNST = sample(
    x = c("CONCURRENT ILLNESS", "OTHER", "DISEASE UNDER STUDY", "NONE"),
    prob = c(0.3, 0.3, 0.3, 0.1),
    size = dplyr::n(),
    replace = TRUE
  ))


  adae <- adae %>%
    dplyr::mutate(AES_FLAG = sample(
      x = c("AESLIFE", "AESHOSP", "AESDISAB", "AESCONG", "AESMIE"),
      prob = c(0.1, 0.2, 0.2, 0.2, 0.3),
      size = dplyr::n(),
      replace = TRUE
    )) %>%
    dplyr::mutate(AES_FLAG = dplyr::case_when(
      AESDTH == "Y" ~ "AESDTH",
      TRUE ~ AES_FLAG
    )) %>%
    dplyr::mutate(
      AESCONG = ifelse(AES_FLAG == "AESCONG", "Y", "N"),
      AESDISAB = ifelse(AES_FLAG == "AESDISAB", "Y", "N"),
      AESHOSP = ifelse(AES_FLAG == "AESHOSP", "Y", "N"),
      AESLIFE = ifelse(AES_FLAG == "AESLIFE", "Y", "N"),
      AESMIE = ifelse(AES_FLAG == "AESMIE", "Y", "N")
    ) %>%
    dplyr::select(-"AES_FLAG")

  if (length(na_vars) > 0 && na_percentage > 0) {
    adae <- mutate_na(ds = adae, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  adae <- apply_metadata(adae, "metadata/ADAE.yml")

  return(adae)
}

#' Pharmacokinetics Parameters Dataset (ADPP)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Pharmacokinetics Parameters Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per study, subject, parameter category, parameter and visit.
#'
#' @inheritParams argument_convention
#' @param ppcat (`character vector`)\cr Categories of parameters.
#' @param ppspec (`character vector`)\cr Specimen material types.
#' @template param_cached
#' @templateVar data adpp
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adpp <- radpp(adsl, seed = 2)
#' adpp
radpp <- function(adsl,
                  ppcat = c("Plasma Drug X", "Plasma Drug Y", "Metabolite Drug X", "Metabolite Drug Y"),
                  ppspec = c(
                    "Plasma", "Plasma", "Plasma", "Matrix of PD", "Matrix of PD",
                    "Urine", "Urine", "Urine", "Urine"
                  ),
                  paramcd = c(
                    "AUCIFO", "CMAX", "CLO", "RMAX", "TON",
                    "RENALCL", "RENALCLD", "RCAMINT", "RCPCINT"
                  ),
                  param = c(
                    "AUC Infinity Obs", "Max Conc", "Total CL Obs", "Time of Maximum Response",
                    "Time to Onset", "Renal CL", "Renal CL Norm by Dose",
                    "Amt Rec from T1 to T2", "Pct Rec from T1 to T2"
                  ),
                  paramu = c("day*ug/mL", "ug/mL", "ml/day/kg", "hr", "hr", "L/hr", "L/hr/mg", "mg", "%"),
                  aval_mean = c(200, 30, 5, 10, 3, 0.05, 0.005, 1.5613, 15.65),
                  visit_format = "CYCLE",
                  n_days = 2L,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(
                    AVAL = c(NA, 0.1)
                  ),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadlb"))
  }

  checkmate::assert_character(ppcat)
  checkmate::assert_character(ppspec)
  checkmate::assert_character(paramcd)
  checkmate::assert_character(param)
  checkmate::assert_character(paramu)
  checkmate::assert_vector(aval_mean)
  checkmate::assert_string(visit_format)
  checkmate::assert_integer(n_days)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)
  checkmate::assert_list(na_vars)

  checkmate::assertTRUE(length(ppspec) == length(paramcd))
  checkmate::assertTRUE(length(ppspec) == length(param))
  checkmate::assertTRUE(length(ppspec) == length(paramu))
  checkmate::assertTRUE(length(ppspec) == length(aval_mean))

  if (!is.null(seed)) {
    set.seed(seed)
  }

  # validate and initialize related variables
  ppspec_init_list <- relvar_init(param, ppspec)
  param_init_list <- relvar_init(param, paramcd)
  unit_init_list <- relvar_init(param, paramu)

  adpp <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = adsl$USUBJID,
    PPCAT = as.factor(ppcat),
    PARAM = as.factor(param_init_list$relvar1),
    AVISIT = visit_schedule(visit_format = visit_format, n_assessments = 1L, n_days = n_days),
    stringsAsFactors = FALSE
  )
  adpp <- adpp %>%
    dplyr::mutate(AVAL = stats::rnorm(nrow(adpp), mean = 1, sd = 0.2)) %>%
    dplyr::left_join(data.frame(PARAM = param, ADJUST = aval_mean), by = "PARAM") %>%
    dplyr::mutate(AVAL = AVAL * ADJUST) %>%
    dplyr::select(-"ADJUST")

  # assign related variable values: PARAMxPPSPEC are related
  adpp <- adpp %>% rel_var(
    var_name = "PPSPEC",
    related_var = "PARAM",
    var_values = ppspec_init_list$relvar2
  )

  # assign related variable values: PARAMxPARAMCD are related
  adpp <- adpp %>% rel_var(
    var_name = "PARAMCD",
    related_var = "PARAM",
    var_values = param_init_list$relvar2
  )

  # assign related variable values: PARAMxAVALU are related
  adpp <- adpp %>% rel_var(
    var_name = "AVALU",
    related_var = "PARAM",
    var_values = unit_init_list$relvar2
  )

  # derive AVISITN based AVISIT and AVALC based on AVAL
  adpp <- adpp %>%
    dplyr::mutate(AVALC = as.character(AVAL)) %>%
    dplyr::mutate(
      AVISITN = dplyr::case_when(
        AVISIT == "SCREENING" ~ 0,
        (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 1,
        TRUE ~ NA_real_
      )
    )

  # derive REGIMEN variable
  adpp <- adpp %>% dplyr::mutate(REGIMEN = "BID")

  # derive PPSTINT and PPENINT based on PARAMCD
  t1_t2 <- data.frame(
    PARAMCD = c("RCAMINT", "RCAMINT", "RCPCINT", "RCPCINT"),
    PPSTINT = c("P0H", "P0H", "P0H", "P0H"),
    PPENINT = c("P12H", "P24H", "P12H", "P24H")
  )
  adpp <- adpp %>%
    dplyr::left_join(t1_t2, by = c("PARAMCD"), multiple = "all", relationship = "many-to-many")

  adpp <- dplyr::inner_join(adpp, adsl, by = c("STUDYID", "USUBJID")) %>%
    dplyr::filter(
      ACTARM != "B: Placebo",
      !(ACTARM == "A: Drug X" & (PPCAT == "Plasma Drug Y" | PPCAT == "Metabolite Drug Y"))
    )

  # derive PKARMCD column for creating more cohorts
  adpp <- adpp %>%
    dplyr::mutate(PKARMCD = factor(1 + (seq_len(nrow(adpp)) - 1) %/% (nrow(adpp) / 10), labels = c(
      "Drug A", "Drug B", "Drug C", "Drug D", "Drug E", "Drug F", "Drug G", "Drug H",
      "Drug I", "Drug J"
    )))

  if (length(na_vars) > 0 && na_percentage > 0) {
    adpp <- mutate_na(ds = adpp, na_vars = na_vars, na_percentage = na_percentage)
  }

  adpp <- apply_metadata(adpp, "metadata/ADPP.yml")
  return(adpp)
}

#' Generate Anthropometric Measurements for Males and Females.
#'
#' Anthropometric measurements are randomly generated using normal approximation.
#' The default mean and standard deviation values used are based on US National Health
#' Statistics for adults aged 20 years or over. The measurements are generated in same units
#' as provided to the function.
#'
#' @details One record per subject.
#'
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr Analysis dataset.
#' @param id_var (`character`)\cr Patient identifier variable name.
#' @param sex_var (`character`)\cr Name of variable representing sex of patient.
#' @param sex_var_level_male (`character`)\cr Level of `sex_var` representing males.
#' @param male_weight_in_kg (named `list`)\cr List of means and SDs of male weights in kilograms.
#' @param female_weight_in_kg (named `list`)\cr List of means and SDs of female weights in kilograms.
#' @param male_height_in_m (named `list`)\cr List of means and SDs of male heights in metres.
#' @param female_height_in_m (named `list`)\cr list of means and SDs of female heights in metres.
#'
#' @return a dataframe with anthropometric measurements for each subject in analysis dataset.
#' @keywords internal
h_anthropometrics_by_sex <- function(df,
                                     seed = 1,
                                     id_var = "USUBJID",
                                     sex_var = "SEX",
                                     sex_var_level_male = "M",
                                     male_weight_in_kg = list(mean = 90.6, sd = 44.9),
                                     female_weight_in_kg = list(mean = 77.5, sd = 46.2),
                                     male_height_in_m = list(mean = 1.75, sd = 0.14),
                                     female_height_in_m = list(mean = 1.61, sd = 0.24)) {
  checkmate::assert_data_frame(df)
  checkmate::assert_string(id_var)
  checkmate::assert_string(sex_var)
  checkmate::assert_string(sex_var_level_male)
  checkmate::assert_list(male_weight_in_kg, types = "numeric")
  checkmate::assert_subset(names(male_weight_in_kg), choices = c("mean", "sd"))
  checkmate::assert_list(female_weight_in_kg, types = "numeric")
  checkmate::assert_subset(names(female_weight_in_kg), choices = c("mean", "sd"))
  checkmate::assert_list(male_height_in_m, types = "numeric")
  checkmate::assert_subset(names(male_height_in_m), choices = c("mean", "sd"))
  checkmate::assert_list(female_height_in_m, types = "numeric")
  checkmate::assert_subset(names(female_height_in_m), choices = c("mean", "sd"))


  n <- length(unique(df[[id_var]]))
  set.seed(seed)

  df_by_sex <- unique(subset(df, select = c(id_var, sex_var)))

  df_with_measurements <- df_by_sex %>%
    dplyr::mutate(
      WEIGHT = ifelse(
        .data[[sex_var]] == sex_var_level_male,
        stats::rnorm(n = n, mean = male_weight_in_kg$mean, sd = male_weight_in_kg$sd),
        stats::rnorm(n = n, mean = female_weight_in_kg$mean, sd = female_weight_in_kg$sd)
      )
    ) %>%
    dplyr::mutate(
      HEIGHT = ifelse(
        .data[[sex_var]] == sex_var_level_male,
        stats::rnorm(n = n, mean = male_height_in_m$mean, sd = male_height_in_m$sd),
        stats::rnorm(n = n, mean = female_height_in_m$mean, sd = female_height_in_m$sd)
      )
    ) %>%
    dplyr::mutate(
      BMI = WEIGHT / ((HEIGHT)^2)
    )

  return(df_with_measurements)
}

#' Subcategory Analysis Dataset (ADSUB)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Subcategory Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per subject.
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADTM`, `SRCSEQ`
#'
#' @inheritParams argument_convention
#' @template param_cached
#' @templateVar data adsub
#'
#' @return `data.frame`
#' @export
#'
#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adsub <- radsub(adsl, seed = 2)
#' adsub
radsub <- function(adsl,
                   param = c(
                     "Baseline Weight",
                     "Baseline Height",
                     "Baseline BMI",
                     "Baseline ECOG",
                     "Baseline Biomarker Mutation"
                   ),
                   paramcd = c("BWGHTSI", "BHGHTSI", "BBMISI", "BECOG", "BBMRKR1"),
                   seed = NULL,
                   na_percentage = 0,
                   na_vars = list(),
                   cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadsub"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  # Validate and initialize related variables.
  param_init_list <- relvar_init(param, paramcd)

  if (!is.null(seed)) {
    set.seed(seed)
  }

  adsub <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = adsl$USUBJID,
    PARAM = as.factor(param_init_list$relvar1),
    AVISIT = "BASELINE",
    stringsAsFactors = FALSE
  )

  # Assign related variable values: PARAM and PARAMCD are related.
  adsub <- adsub %>% rel_var(
    var_name = "PARAMCD",
    related_var = "PARAM",
    var_values = param_init_list$relvar2
  )

  adsub <- adsub[order(adsub$STUDYID, adsub$USUBJID, adsub$PARAMCD), ]

  adsub <- rcd_var_relabel(
    adsub,
    STUDYID = "Study Identifier",
    USUBJID = "Unique Subject Identifier"
  )

  # Merge ADSL to be able to add EG date and study day variables.
  # Sample ADTM to be a few days before TRTSDTM.
  adsub <- dplyr::inner_join(
    adsub,
    adsl,
    by = c("STUDYID", "USUBJID")
  ) %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(ADTM = rep(
      lubridate::date(TRTSDTM)[1] - lubridate::days(sample(1:10, size = 1)),
      each = n()
    )) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(STUDYID, USUBJID, ADTM)

  # Generate a dataset with height, weight and BMI measurements for each subject.
  if (!is.null(seed)) {
    df_with_measurements <- h_anthropometrics_by_sex(adsub, seed = seed)
  } else {
    df_with_measurements <- h_anthropometrics_by_sex(adsub)
  }

  # Add this to adsub and create other measurements.
  adsub <- adsub %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(
      AVAL = dplyr::case_when(
        PARAMCD ==
          "BWGHTSI" ~ df_with_measurements$WEIGHT[df_with_measurements$USUBJID == USUBJID],
        PARAMCD ==
          "BHGHTSI" ~ df_with_measurements$HEIGHT[df_with_measurements$USUBJID == USUBJID],
        PARAMCD ==
          "BBMISI" ~ df_with_measurements$BMI[df_with_measurements$USUBJID == USUBJID],
        PARAMCD == "BECOG" ~ sample(c(0, 1, 2, 3, 4, 5), 1),
        PARAMCD == "BBMRKR1" ~ sample(c(1, 2), prob = c(0.5, 0.5), 1)
      )
    ) %>%
    dplyr::arrange(PARAMCD) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(AVAL = dplyr::case_when(
      PARAMCD != "BBMRKR1" | PARAMCD != "BECOG" ~ round(AVAL, 1),
      TRUE ~ round(AVAL)
    ))

  adsub <- adsub %>%
    dplyr::mutate(
      AVALC = dplyr::case_when(
        PARAMCD == "BBMRKR1" ~ dplyr::case_when(
          AVAL == "1" ~ "WILD TYPE",
          AVAL == "2" ~ "MUTANT",
          TRUE ~ ""
        ),
        TRUE ~ as.character(AVAL)
      ),
      AVALU = dplyr::case_when(
        PARAMCD == "BWGHTSI" ~ "kg",
        PARAMCD == "BHGHTSI" ~ "m",
        PARAMCD == "BBMISI" ~ "kg/m2",
        TRUE ~ ""
      ),
      AVALCAT1 = dplyr::case_when(
        PARAMCD == "BBMISI" ~ dplyr::case_when(
          AVAL < 18.5 ~ "<18.5",
          AVAL >= 18.5 & AVAL < 25 ~ "18.5 - 24.9",
          AVAL >= 25 & AVAL < 30 ~ "25 - 29.9",
          TRUE ~ ">30"
        ),
        PARAMCD == "BECOG" ~ dplyr::case_when(
          AVAL <= 1 ~ "0-1",
          AVAL > 1 & AVAL <= 3 ~ "2-3",
          TRUE ~ "4-5"
        ),
        TRUE ~ ""
      ),
      AVISITN = "0",
      SRCSEQ = "1"
    ) %>%
    dplyr::arrange(
      USUBJID,
      factor(PARAMCD, levels = c("BWGHTSI", "BHGHTSI", "BBMISI", "BECOG", "BBMRKR1"))
    )

  if (length(na_vars) > 0 && na_percentage > 0) {
    adsub <- mutate_na(ds = adsub, na_vars = na_vars, na_percentage = na_percentage)
  }

  # Apply metadata.
  adsub <- apply_metadata(adsub, "metadata/ADSUB.yml")

  return(adsub)
}

#' Anti-Drug Antibody Analysis Dataset (ADAB)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Anti-Drug Antibody Analysis Dataset for a given
#' Subject-Level Analysis Dataset and Pharmacokinetics Analysis Dataset.
#'
#' @inheritParams argument_convention
#' @inheritParams radpc
#' @param adpc (`data.frame`)\cr Pharmacokinetics Analysis Dataset.
#' @template param_cached
#' @templateVar data adab
#'
#' @return `data.frame`
#' @export
#'
#' @details One record per study per subject per parameter per time point: "R1800000", "RESULT1", "R1800001", "RESULT2".
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#' adpc <- radpc(adsl, seed = 2, duration = 9 * 7)
#'
#' adab <- radab(adsl, adpc, seed = 2)
#' adab
radab <- function(adsl,
                  adpc,
                  constants = c(D = 100, ka = 0.8, ke = 1),
                  paramcd = c(
                    "R1800000", "RESULT1", "R1800001", "RESULT2", "ADASTAT1", "INDUCD1", "ENHANC1",
                    "TRUNAFF1", "EMERNEG1", "EMERPOS1", "PERSADA1", "TRANADA1", "BFLAG1", "TIMADA1",
                    "ADADUR1", "ADASTAT2", "INDUCD2", "ENHANC2", "EMERNEG2", "EMERPOS2", "BFLAG2",
                    "TRUNAFF2"
                  ),
                  param = c(
                    "Antibody titer units", "ADA interpreted per sample result",
                    "Neutralizing Antibody titer units", "NAB interpreted per sample result",
                    "ADA Status of a patient", "Treatment induced ADA", "Treatment enhanced ADA",
                    "Treatment unaffected", "Treatment Emergent - Negative",
                    "Treatment Emergent - Positive", "Persistent ADA", "Transient ADA", "Baseline",
                    "Time to onset of ADA", "ADA Duration", "NAB Status of a patient",
                    "Treatment induced ADA, Neutralizing Antibody",
                    "Treatment enhanced ADA, Neutralizing Antibody",
                    "Treatment Emergent - Negative, Neutralizing Antibody",
                    "Treatment Emergent - Positive, Neutralizing Antibody",
                    "Baseline, Neutralizing Antibody",
                    "Treatment unaffected, Neutralizing Antibody"
                  ),
                  avalu = c(
                    "titer", "", "titer", "", "", "", "", "", "", "", "", "", "", "weeks", "weeks",
                    "", "", "", "", "", "", ""
                  ),
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(
                    AVAL = c(NA, 0.1)
                  ),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadab"))
  }

  checkmate::assert_data_frame(adpc)
  checkmate::assert_subset(names(constants), c("D", "ka", "ke"))
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE)
  checkmate::assert_list(na_vars)
  checkmate::assert_character(paramcd)
  checkmate::assert_character(param, len = length(paramcd))
  checkmate::assert_character(avalu, len = length(paramcd))
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  if (!is.null(seed)) {
    set.seed(seed)
  }

  # validate and initialize related variables
  param_init_list <- relvar_init(param, paramcd)
  unit_init_list <- relvar_init(param, avalu)

  adpc <- adpc %>% dplyr::filter(ASMED == "PLASMA")
  adab0 <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = unique(adsl$USUBJID),
    VISIT = unique(adpc$VISIT),
    PARAM = as.factor(param_init_list$relvar1[c(1:4)]),
    PARCAT1 = "A: Drug X Antibody",
    stringsAsFactors = FALSE
  )
  # Set random values for observations
  visit_lvl_params <- c(
    "Antibody titer units", "Neutralizing Antibody titer units",
    "ADA interpreted per sample result", "NAB interpreted per sample result"
  )
  aval_random <- stats::rnorm(nrow(unique(adab0 %>% dplyr::select(USUBJID, VISIT))), mean = 1, sd = 0.2)
  aval_random <- cbind(unique(adab0 %>% dplyr::select(USUBJID, VISIT)), AVAL1 = aval_random)

  adab_visit <- adab0 %>% dplyr::left_join(aval_random, by = c("USUBJID", "VISIT"))
  adab_visit <- adab_visit %>%
    dplyr::mutate(
      AVAL2 = ifelse(AVAL1 >= 1, AVAL1, NA),
      AVALC = dplyr::case_when(
        !is.na(AVAL2) ~ "POSITIVE",
        is.na(AVAL2) ~ "NEGATIVE"
      ),
      AVAL = dplyr::case_when(
        (PARAM %in% visit_lvl_params[3:4] & !is.na(AVAL2)) ~ 1,
        (PARAM %in% visit_lvl_params[3:4] & is.na(AVAL2)) ~ 0,
        (PARAM %in% visit_lvl_params[1:2] & !is.na(AVAL2)) ~ AVAL2,
        TRUE ~ as.numeric(NA)
      )
    ) %>%
    dplyr::select(-c(AVAL1, AVAL2))

  # retrieve other variables from adpc
  adab_visit <- adab_visit %>%
    dplyr::inner_join(
      adpc %>%
        dplyr::filter(PCTPT %in% c("Predose", "24H")) %>%
        dplyr::select(
          STUDYID,
          USUBJID,
          VISIT,
          PCTPT,
          ARM,
          ACTARM,
          VISITDY,
          AFRLT,
          NFRLT,
          ARRLT,
          NRRLT,
          RELTMU
        ) %>%
        unique(),
      by = c("STUDYID", "USUBJID", "VISIT")
    ) %>%
    rename(ISTPT = PCTPT)

  # mutate time from dose variables from adpc to convert into Days
  adab_visit <- adab_visit %>% dplyr::mutate_at(c("AFRLT", "NFRLT", "ARRLT", "NRRLT"), ~ . / 24)



  # Set random values for subject level paramaters (Y/N)

  adab1 <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = unique(adpc$USUBJID),
    VISIT = NA,
    PARAM = as.factor(param_init_list$relvar1[c(5:13, 16:22)]),
    PARCAT1 = "A: Drug X Antibody",
    stringsAsFactors = FALSE
  )

  sub_lvl_params <- c(
    "ADA Status of a patient", "Treatment induced ADA", "Treatment enhanced ADA",
    "Treatment unaffected", "Treatment Emergent - Negative",
    "Treatment Emergent - Positive", "Persistent ADA", "Transient ADA", "Baseline",
    # "Time to onset of ADA", "ADA Duration",
    "NAB Status of a patient",
    "Treatment induced ADA, Neutralizing Antibody",
    "Treatment enhanced ADA, Neutralizing Antibody",
    "Treatment Emergent - Negative, Neutralizing Antibody",
    "Treatment Emergent - Positive, Neutralizing Antibody",
    "Baseline, Neutralizing Antibody",
    "Treatment unaffected, Neutralizing Antibody"
  )

  aval_random_sub <- stats::rbinom(nrow(unique(adab1 %>% dplyr::select(USUBJID))), 1, 0.5)
  aval_random_sub <- cbind(unique(adab1 %>% dplyr::select(USUBJID)), AVAL1 = aval_random_sub)

  adab_sub <- adab1 %>% dplyr::left_join(aval_random_sub, by = c("USUBJID"))
  adab_sub <- adab_sub %>%
    dplyr::mutate(
      AVAL = AVAL1,
      AVALC = dplyr::case_when(
        PARAM %in% c("ADA Status of a patient", "NAB Status of a patient") & AVAL1 == 1 ~ "POSITIVE",
        PARAM %in% c("ADA Status of a patient", "NAB Status of a patient") & AVAL1 == 0 ~ "NEGATIVE",
        !(PARAM %in% c("ADA Status of a patient", "NAB Status of a patient")) & AVAL1 == 1 ~ "Y",
        !(PARAM %in% c("ADA Status of a patient", "NAB Status of a patient")) & AVAL1 == 0 ~ "N"
      )
    ) %>%
    dplyr::select(-c(AVAL1))

  # Set random values for subject level paramaters (numeric)

  adab2 <- expand.grid(
    STUDYID = unique(adsl$STUDYID),
    USUBJID = unique(adpc$USUBJID),
    VISIT = NA,
    PARAM = as.factor(param_init_list$relvar1[c(14, 15)]),
    PARCAT1 = "A: Drug X Antibody",
    stringsAsFactors = FALSE
  )

  sub_lvl_params_num <- c("Time to onset of ADA", "ADA Duration")

  aval_random_sub_num <- stats::rnorm(nrow(unique(adab2 %>% dplyr::select(USUBJID))), mean = 1, sd = 1)
  aval_random_sub_num <- cbind(unique(adab2 %>% dplyr::select(USUBJID)), AVAL1 = aval_random_sub_num)

  adab_sub_num <- adab2 %>% dplyr::left_join(aval_random_sub_num, by = c("USUBJID"))
  adab_sub_num <- adab_sub_num %>%
    dplyr::mutate(
      AVAL = ifelse(AVAL1 >= 1, round(AVAL1, 2), NA),
      AVALC = as.character(AVAL)
    ) %>%
    dplyr::select(-c(AVAL1))


  adab <- bind_rows(adab_visit, adab_sub, adab_sub_num)


  # assign related variable values: PARAMxPARAMCD are related
  adab <- adab %>% rel_var(
    var_name = "PARAMCD",
    related_var = "PARAM",
    var_values = param_init_list$relvar2
  )

  # assign related variable values: PARAMxAVALU are related
  adab <- adab %>% rel_var(
    var_name = "AVALU",
    related_var = "PARAM",
    var_values = unit_init_list$relvar2
  )


  adab <- adab %>%
    dplyr::mutate(
      RELTMU = "day",
      ABLFL = ifelse(!is.na(NFRLT) & NFRLT == 0, "Y", NA) # Baseline Record Flag
      ,
      ADABLPFL = ifelse(PARAMCD == "RESULT1" & !is.na(NFRLT) & NFRLT == 0, "Y", NA)
      # Baseline ADA Eval. Param-Level Flag, only populate for ADA, not for NAB
      ,
      ADPBLPFL = ifelse(PARAMCD == "RESULT1" & !is.na(NFRLT) & NFRLT > 0 & !is.na(AVAL), "Y", NA)
      # Post-Baseline ADA Eval. Param-Level Flag, only populate for ADA, not for NAB
    ) %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::ungroup()

  # create temporary flags to derive subject-level variables
  adab_subj <- adab %>%
    dplyr::group_by(USUBJID) %>%
    dplyr::mutate(
      pos_bl = any(PARAM == "ADA interpreted per sample result" & !is.na(ABLFL) & AVALC == "POSITIVE"),
      pos_bl_nab = any(PARAM == "NAB interpreted per sample result" & !is.na(ABLFL) & AVALC == "POSITIVE"),
      any_pos_postbl = any(PARAM == "ADA interpreted per sample result" & is.na(ABLFL) & AVALC == "POSITIVE"),
      any_pos_postbl_nab = any(PARAM == "NAB interpreted per sample result" & is.na(ABLFL) & AVALC == "POSITIVE"),
      pos_last_postbl = any(PARAM == "ADA interpreted per sample result" & NFRLT == max(NFRLT) & AVALC == "POSITIVE"),
      ada_bl = AVAL[PARAM == "Antibody titer units" & !is.na(ABLFL)],
      nab_bl = AVAL[PARAM == "Neutralizing Antibody titer units" & !is.na(ABLFL)]
    )
  pos_tots <- adab_subj %>%
    dplyr::summarise(
      n_pos = sum(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"),
      inc_postbl = sum(PARAM == "ADA interpreted per sample result" & is.na(ABLFL) & (AVAL - ada_bl) > 0.60),
      inc_postbl_nab = sum(PARAM == "NAB interpreted per sample result" & is.na(ABLFL) & (AVAL - nab_bl) > 0.60),
      onset_ada = if (any(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE")) {
        min(NFRLT[PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"])
      } else {
        NA
      },
      last_ada = if (any(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE")) {
        max(NFRLT[PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"])
      } else {
        NA
      }
    )
  adab_subj <- adab_subj %>%
    dplyr::left_join(pos_tots, by = "USUBJID") %>%
    dplyr::select(
      USUBJID,
      NFRLT,
      pos_bl,
      pos_bl_nab,
      any_pos_postbl,
      any_pos_postbl_nab,
      inc_postbl,
      inc_postbl_nab,
      pos_last_postbl,
      n_pos,
      onset_ada,
      last_ada
    ) %>%
    unique()

  # add flags to ADAB dataset
  adab <- adab %>%
    dplyr::left_join(adab_subj, by = c("USUBJID", "NFRLT"))

  # derive subject-level variables
  adab[!(adab$PARAM %in% visit_lvl_params), ] <- adab %>%
    dplyr::filter(!(PARAM %in% visit_lvl_params)) %>%
    dplyr::mutate(
      # nolint start indentation_linter
      AVALC = dplyr::case_when(
        (PARAM == "ADA Status of a patient" & any_pos_postbl) ~ "POSITIVE",
        (PARAM == "ADA Status of a patient" & !any_pos_postbl) ~ "NEGATIVE",
        (PARAM == "Treatment induced ADA" & !pos_bl & any_pos_postbl) ~ "Y",
        (PARAM == "Treatment enhanced ADA" & pos_bl & inc_postbl > 0) ~ "Y",
        (PARAM == "Treatment unaffected" & pos_bl & (inc_postbl == 0 | !any_pos_postbl)) ~ "Y",
        (PARAM == "Treatment Emergent - Positive" &
          ((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ "Y",
        (PARAM == "Treatment Emergent - Negative" &
          !((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ "Y",
        (PARAM == "Persistent ADA" & pos_last_postbl) ~ "Y",
        (PARAM == "Transient ADA" &
          (n_pos - pos_bl - pos_last_postbl == 1 | n_pos > 1)) ~ "Y",
        (PARAM == "Baseline" & pos_bl) ~ "POSITIVE",
        (PARAM == "Baseline" & !pos_bl) ~ "NEGATIVE",
        (PARAM == "Time to onset of ADA") ~ as.character(onset_ada / 7),
        (PARAM == "ADA Duration") ~ as.character((last_ada - onset_ada) / 7),
        (PARAM == "NAB Status of a patient" & any_pos_postbl_nab) ~ "POSITIVE",
        (PARAM == "NAB Status of a patient" & !any_pos_postbl_nab) ~ "NEGATIVE",
        (PARAM == "Treatment induced ADA, Neutralizing Antibody" &
          !pos_bl_nab & any_pos_postbl_nab) ~ "Y",
        (PARAM == "Treatment enhanced ADA, Neutralizing Antibody" &
          pos_bl_nab & inc_postbl_nab > 0) ~ "Y",
        (PARAM == "Baseline, Neutralizing Antibody" & pos_bl_nab) ~ "POSITIVE",
        (PARAM == "Baseline, Neutralizing Antibody" & !pos_bl_nab) ~ "NEGATIVE",
        (PARAM == "Treatment unaffected, Neutralizing Antibody" & pos_bl_nab &
          (inc_postbl_nab == 0 | !any_pos_postbl_nab)) ~ "Y",
        (PARAM == "Treatment Emergent - Positive, Neutralizing Antibody" &
          ((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ "Y",
        (PARAM == "Treatment Emergent - Negative, Neutralizing Antibody" &
          !((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ "Y",
        TRUE ~ "N"
      ),
      AVAL = dplyr::case_when(
        (PARAM == "ADA Status of a patient" & any_pos_postbl) ~ 1,
        (PARAM == "Treatment induced ADA" & !pos_bl & any_pos_postbl) ~ 1,
        (PARAM == "Treatment enhanced ADA" & pos_bl & inc_postbl > 0) ~ 1,
        (PARAM == "Treatment unaffected" & pos_bl & (inc_postbl == 0 | !any_pos_postbl)) ~ 1,
        (PARAM == "Treatment Emergent - Positive" &
          ((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ 1,
        (PARAM == "Treatment Emergent - Negative" &
          !((!pos_bl & any_pos_postbl) | (pos_bl & inc_postbl > 0))) ~ 1,
        (PARAM == "Persistent ADA" & pos_last_postbl) ~ 1,
        (PARAM == "Transient ADA" &
          (n_pos - ifelse(pos_bl, 1, 0) - ifelse(pos_last_postbl, 1, 0) == 1 | n_pos > 1)) ~ 1,
        (PARAM == "Baseline" & pos_bl) ~ 1,
        (PARAM == "Time to onset of ADA") ~ onset_ada / 7,
        (PARAM == "ADA Duration") ~ (last_ada - onset_ada) / 7,
        (PARAM == "NAB Status of a patient" & any_pos_postbl_nab) ~ 1,
        (PARAM == "Treatment induced ADA, Neutralizing Antibody" &
          !pos_bl_nab & any_pos_postbl_nab) ~ 1,
        (PARAM == "Treatment enhanced ADA, Neutralizing Antibody" &
          pos_bl_nab & inc_postbl_nab > 0) ~ 1,
        (PARAM == "Baseline, Neutralizing Antibody" & pos_bl_nab) ~ 1,
        (PARAM == "Treatment unaffected, Neutralizing Antibody" & pos_bl_nab &
          (inc_postbl_nab == 0 | !any_pos_postbl_nab)) ~ 1,
        (PARAM == "Treatment Emergent - Positive, Neutralizing Antibody" &
          ((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ 1,
        (PARAM == "Treatment Emergent - Negative, Neutralizing Antibody" &
          !((!pos_bl_nab & any_pos_postbl_nab) | (pos_bl_nab & inc_postbl_nab > 0))) ~ 1,
        TRUE ~ 0
      ),
      # nolint end indentation_linter
      PARCAT1 = dplyr::case_when(
        PARAM %in% c(
          "Neutralizing Antibody titer units", "NAB interpreted per sample result",
          "NAB Status of a patient", "Treatment induced ADA, Neutralizing Antibody",
          "Treatment enhanced ADA, Neutralizing Antibody",
          "Treatment Emergent - Negative, Neutralizing Antibody",
          "Treatment Emergent - Positive, Neutralizing Antibody",
          "Treatment unaffected, Neutralizing Antibody"
        ) ~ "A: Drug X Neutralizing Antibody",
        TRUE ~ PARCAT1
      )
    )

  # remove intermediate flag variables from adab
  adab <- adab %>%
    dplyr::select(-c(
      pos_bl,
      pos_bl_nab,
      any_pos_postbl,
      any_pos_postbl_nab,
      pos_last_postbl,
      inc_postbl,
      inc_postbl_nab,
      n_pos,
      onset_ada,
      last_ada
    ))

  # Carry over ARM and ACTARM for all records.
  arm <- adab %>%
    filter(!is.na(ARM), !is.na(ACTARM)) %>%
    select(USUBJID, ARM, ACTARM) %>%
    distinct(.)
  adab$ARM <- arm$ARM[match(adab$USUBJID, arm$USUBJID)]
  adab$ACTARM <- arm$ACTARM[match(adab$USUBJID, arm$USUBJID)]

  if (length(na_vars) > 0 && na_percentage > 0) {
    adab <- mutate_na(ds = adab, na_vars = na_vars, na_percentage = na_percentage)
  }

  adab <- apply_metadata(adab, "metadata/ADAB.yml")
}

#' Subject-Level Analysis Dataset (ADSL)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The Subject-Level Analysis Dataset (ADSL) is used to provide the variables
#' that describe attributes of a subject. ADSL is a source for subject-level
#' variables used in other analysis data sets, such as population flags and
#' treatment variables. There is only one ADSL per study. ADSL and its related
#' metadata are required in a CDISC-based submission of data from a clinical
#' trial even if no other analysis data sets are submitted.
#'
#' @details One record per subject.
#'
#' Keys: `STUDYID`, `USUBJID`
#'
#' @inheritParams argument_convention
#' @param N (`numeric`)\cr Number of patients.
#' @param study_duration (`numeric`)\cr Duration of study in years.
#' @param with_trt02 (`logical`)\cr Should period 2 be added.
#' @param ae_withdrawal_prob (`proportion`)\cr Probability that there is at least one
#' Adverse Event leading to the withdrawal of a study drug.
#' @template param_cached
#' @templateVar data adsl
#'
#' @return `data.frame`
#' @export
#
#' @examples
#' adsl <- radsl(N = 10, study_duration = 2, seed = 1)
#' adsl
#'
#' adsl <- radsl(
#'   N = 10, seed = 1,
#'   na_percentage = 0.1,
#'   na_vars = list(
#'     DTHDT = c(seed = 1234, percentage = 0.1),
#'     LSTALVDT = c(seed = 1234, percentage = 0.1)
#'   )
#' )
#' adsl
#'
#' adsl <- radsl(N = 10, seed = 1, na_percentage = .1)
#' adsl
radsl <- function(N = 400, # nolint
                  study_duration = 2,
                  seed = NULL,
                  with_trt02 = TRUE,
                  na_percentage = 0,
                  na_vars = list(
                    "AGE" = NA, "SEX" = NA, "RACE" = NA, "STRATA1" = NA, "STRATA2" = NA,
                    "BMRKR1" = c(seed = 1234, percentage = 0.1), "BMRKR2" = c(1234, 0.1), "BEP01FL" = NA
                  ),
                  ae_withdrawal_prob = 0.05,
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadsl"))
  }

  checkmate::assert_number(N)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE)
  checkmate::assert_number(study_duration, lower = 1)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)

  if (!is.null(seed)) {
    set.seed(seed)
  }

  study_duration_secs <- lubridate::seconds(lubridate::years(study_duration))
  sys_dtm <- lubridate::fast_strptime("20/2/2019 11:16:16.683", "%d/%m/%Y %H:%M:%OS")
  discons <- max(1, floor((N * .3)))
  country_site_prob <- c(.5, .121, .077, .077, .075, .052, .046, .025, .014, .003)

  adsl <- tibble::tibble(
    STUDYID = rep("AB12345", N),
    COUNTRY = sample_fct(
      c("CHN", "USA", "BRA", "PAK", "NGA", "RUS", "JPN", "GBR", "CAN", "CHE"),
      N,
      prob = country_site_prob
    ),
    SITEID = sample_fct(1:20, N, prob = rep(country_site_prob, times = 2)),
    SUBJID = paste("id", seq_len(N), sep = "-"),
    AGE = sapply(stats::rchisq(N, df = 5, ncp = 10), max, 0) + 20,
    AGEU = "YEARS",
    SEX = c("F", "M") %>% sample_fct(N, prob = c(.52, .48)),
    ARMCD = c("ARM A", "ARM B", "ARM C") %>% sample_fct(N),
    RACE = c(
      "ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE",
      "MULTIPLE", "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER", "OTHER", "UNKNOWN"
    ) %>%
      sample_fct(N, prob = c(.55, .23, .16, .05, .004, .003, .002, .002)),
    TRTSDTM = sys_dtm + sample(seq(0, study_duration_secs), size = N, replace = TRUE),
    RANDDT = lubridate::date(TRTSDTM - lubridate::days(floor(stats::runif(N, min = 0, max = 5)))),
    TRTEDTM = TRTSDTM + study_duration_secs,
    STRATA1 = c("A", "B", "C") %>% sample_fct(N),
    STRATA2 = c("S1", "S2") %>% sample_fct(N),
    BMRKR1 = stats::rchisq(N, 6),
    BMRKR2 = sample_fct(c("LOW", "MEDIUM", "HIGH"), N),
    BMEASIFL = sample_fct(c("Y", "N"), N),
    BEP01FL = sample_fct(c("Y", "N"), N),
    AEWITHFL = sample_fct(c("Y", "N"), N, prob = c(ae_withdrawal_prob, 1 - ae_withdrawal_prob))
  ) %>%
    dplyr::mutate(ARM = dplyr::recode(
      ARMCD,
      "ARM A" = "A: Drug X", "ARM B" = "B: Placebo", "ARM C" = "C: Combination"
    )) %>%
    dplyr::mutate(ACTARM = ARM) %>%
    dplyr::mutate(ACTARMCD = ARMCD) %>%
    dplyr::mutate(TRT01P = ARM) %>%
    dplyr::mutate(TRT01A = ACTARM) %>%
    dplyr::mutate(ITTFL = factor("Y")) %>%
    dplyr::mutate(SAFFL = factor("Y")) %>%
    dplyr::arrange(TRTSDTM)

  adds <- adsl[sample(nrow(adsl), discons), ] %>%
    dplyr::mutate(TRTEDTM_discon = sample(
      seq(from = max(TRTSDTM), to = sys_dtm + study_duration_secs, by = 1),
      size = discons,
      replace = TRUE
    )) %>%
    dplyr::select(SUBJID, TRTSDTM, TRTEDTM_discon) %>%
    dplyr::arrange(TRTSDTM)

  adsl <- dplyr::left_join(adsl, adds, by = c("SUBJID", "TRTSDTM")) %>%
    dplyr::mutate(TRTEDTM = dplyr::case_when(
      !is.na(TRTEDTM_discon) ~ TRTEDTM_discon,
      TRTSDTM >= quantile(TRTSDTM)[2] & TRTSDTM <= quantile(TRTSDTM)[3] ~ lubridate::as_datetime(NA),
      TRUE ~ TRTEDTM
    )) %>%
    dplyr::select(-"TRTEDTM_discon")

  # add period 2 if needed
  if (with_trt02) {
    with_trt02 <- lubridate::seconds(lubridate::years(1))
    adsl <- adsl %>%
      dplyr::mutate(TRT02P = sample(ARM)) %>%
      dplyr::mutate(TRT02A = sample(ACTARM)) %>%
      dplyr::mutate(
        TRT01SDTM = TRTSDTM,
        AP01SDTM = TRT01SDTM,
        TRT01EDTM = TRTEDTM,
        AP01EDTM = TRT01EDTM,
        TRT02SDTM = TRTEDTM,
        AP02SDTM = TRT02SDTM,
        TRT02EDTM = TRT01EDTM + with_trt02,
        AP02EDTM = TRT02EDTM,
        TRTEDTM = TRT02EDTM
      )
  }

  adsl <- adsl %>%
    dplyr::mutate(EOSDT = lubridate::date(TRTEDTM)) %>%
    dplyr::mutate(EOSDY = ceiling(difftime(TRTEDTM, TRTSDTM))) %>%
    dplyr::mutate(EOSSTT = dplyr::case_when(
      EOSDY == max(EOSDY, na.rm = TRUE) ~ "COMPLETED",
      EOSDY < max(EOSDY, na.rm = TRUE) ~ "DISCONTINUED",
      is.na(TRTEDTM) ~ "ONGOING"
    )) %>%
    dplyr::mutate(EOTSTT = EOSSTT)

  # disposition related variables
  # using probability of 1 for the "DEATH" level to ensure at least one death record exists
  l_dcsreas <- list(
    choices = c(
      "ADVERSE EVENT", "DEATH", "LACK OF EFFICACY", "PHYSICIAN DECISION",
      "PROTOCOL VIOLATION", "WITHDRAWAL BY PARENT/GUARDIAN", "WITHDRAWAL BY SUBJECT"
    ),
    prob = c(.2, 1, .1, .1, .2, .1, .1)
  )
  l_dthcat_other <- list(
    choices = c(
      "Post-study reporting of death", "LOST TO FOLLOW UP", "MISSING", "SUICIDE", "UNKNOWN"
    ),
    prob = c(.1, .3, .3, .2, .1)
  )

  adsl <- adsl %>%
    dplyr::mutate(
      DCSREAS = ifelse(
        EOSSTT == "DISCONTINUED",
        sample(x = l_dcsreas$choices, size = N, replace = TRUE, prob = l_dcsreas$prob),
        as.character(NA)
      )
    ) %>%
    dplyr::mutate(DTHFL = dplyr::case_when(
      DCSREAS == "DEATH" ~ "Y",
      TRUE ~ "N"
    )) %>%
    dplyr::mutate(
      DTHCAT = ifelse(
        DCSREAS == "DEATH",
        sample(x = c("ADVERSE EVENT", "PROGRESSIVE DISEASE", "OTHER"), size = N, replace = TRUE),
        as.character(NA)
      )
    ) %>%
    dplyr::mutate(DTHCAUS = dplyr::case_when(
      DTHCAT == "ADVERSE EVENT" ~ "ADVERSE EVENT",
      DTHCAT == "PROGRESSIVE DISEASE" ~ "DISEASE PROGRESSION",
      DTHCAT == "OTHER" ~ sample(x = l_dthcat_other$choices, size = N, replace = TRUE, prob = l_dthcat_other$prob),
      TRUE ~ as.character(NA)
    )) %>%
    dplyr::mutate(ADTHAUT = dplyr::case_when(
      DTHCAUS %in% c("ADVERSE EVENT", "DISEASE PROGRESSION") ~ "Yes",
      DTHCAUS %in% c("UNKNOWN", "SUICIDE", "Post-study reporting of death") ~ sample(
        x = c("Yes", "No"), size = N, replace = TRUE, prob = c(0.25, 0.75)
      ),
      TRUE ~ as.character(NA)
    )) %>%
    # adding some random number of days post last treatment date so that death days from last trt admin
    # supports the LDDTHGR1 derivation below
    dplyr::mutate(DTHDT = dplyr::case_when(
      DCSREAS == "DEATH" ~ lubridate::date(TRTEDTM + lubridate::days(sample(0:50, size = N, replace = TRUE))),
      TRUE ~ NA
    )) %>%
    dplyr::mutate(LDDTHELD = difftime(DTHDT, lubridate::date(TRTEDTM), units = "days")) %>%
    dplyr::mutate(LDDTHGR1 = dplyr::case_when(
      LDDTHELD <= 30 ~ "<=30",
      LDDTHELD > 30 ~ ">30",
      TRUE ~ as.character(NA)
    )) %>%
    dplyr::mutate(LSTALVDT = dplyr::case_when(
      DCSREAS == "DEATH" ~ DTHDT,
      TRUE ~ lubridate::date(TRTEDTM) + lubridate::days(floor(stats::runif(N, min = 10, max = 30)))
    ))

  # add random ETHNIC (Ethnicity)
  adsl <- adsl %>%
    dplyr::mutate(ETHNIC = sample(
      x = c("HISPANIC OR LATINO", "NOT HISPANIC OR LATINO", "NOT REPORTED", "UNKNOWN"),
      size = N, replace = TRUE, prob = c(.1, .8, .06, .04)
    ))

  # associate DTHADY (Relative Day of Death) with Death date
  # Date of Death [adsl.DTHDT] - date part of Date of First Exposure to Treatment [adsl.TRTSDTM]

  adsl <- adsl %>%
    dplyr::mutate(DTHADY = difftime(DTHDT, TRTSDTM, units = "days"))


  # associate sites with countries and regions
  adsl <- adsl %>%
    dplyr::mutate(SITEID = paste0(COUNTRY, "-", SITEID)) %>%
    dplyr::mutate(REGION1 = dplyr::case_when(
      COUNTRY %in% c("NGA") ~ "Africa",
      COUNTRY %in% c("CHN", "JPN", "PAK") ~ "Asia",
      COUNTRY %in% c("RUS") ~ "Eurasia",
      COUNTRY %in% c("GBR") ~ "Europe",
      COUNTRY %in% c("CAN", "USA") ~ "North America",
      COUNTRY %in% c("BRA") ~ "South America",
      TRUE ~ as.character(NA)
    )) %>%
    dplyr::mutate(INVID = paste("INV ID", SITEID)) %>%
    dplyr::mutate(INVNAM = paste("Dr.", SITEID, "Doe")) %>%
    dplyr::mutate(USUBJID = paste(STUDYID, SITEID, SUBJID, sep = "-"))


  if (length(na_vars) > 0 && na_percentage > 0) {
    adsl <- mutate_na(ds = adsl, na_vars = na_vars, na_percentage = na_percentage)
  }

  # apply metadata
  adsl <- apply_metadata(adsl, "metadata/ADSL.yml", FALSE)

  attr(adsl, "study_duration_secs") <- as.numeric(study_duration_secs)
  return(adsl)
}

#' Pharmacokinetics Analysis Dataset (ADPC)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Pharmacokinetics Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per study, subject, parameter, and time point.
#'
#' @inheritParams argument_convention
#' @param avalu (`character`)\cr Analysis value units.
#' @param constants (`character vector`)\cr Constant parameters to be used in formulas for creating analysis values.
#' @param duration (`numeric`)\cr Duration in number of days.
#' @template param_cached
#' @templateVar data adpc
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adpc <- radpc(adsl, seed = 2)
#' adpc
#'
#' adpc <- radpc(adsl, seed = 2, duration = 3)
#' adpc
radpc <- function(adsl,
                  avalu = "ug/mL",
                  constants = c(D = 100, ka = 0.8, ke = 1),
                  duration = 2,
                  seed = NULL,
                  na_percentage = 0,
                  na_vars = list(
                    AVAL = c(NA, 0.1)
                  ),
                  cached = FALSE) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadpc"))
  }

  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(avalu, len = 1, any.missing = FALSE)
  checkmate::assert_subset(names(constants), c("D", "ka", "ke"))
  checkmate::assert_numeric(x = duration, max.len = 1)
  checkmate::assert_number(seed, null.ok = TRUE)
  checkmate::assert_number(na_percentage, lower = 0, upper = 1)
  checkmate::assert_true(na_percentage < 1)
  checkmate::assert_list(na_vars)

  if (!is.null(seed)) {
    set.seed(seed)
  }

  radpc_core <- function(day) {
    adpc_day <- tidyr::expand_grid(
      data.frame(
        STUDYID = adsl$STUDYID,
        USUBJID = adsl$USUBJID,
        ARMCD = adsl$ARMCD,
        A0 = unname(constants["D"]),
        ka = unname(constants["ka"]) - stats::runif(length(adsl$USUBJID), -0.2, 0.2),
        ke = unname(constants["ke"]) - stats::runif(length(adsl$USUBJID), -0.2, 0.2)
      ),
      PCTPTNUM = if (day == 1) c(0, 0.5, 1, 1.5, 2, 3, 4, 8, 12) else 24 * (day - 1),
      PARAM = factor(c("Plasma Drug X", "Urine Drug X", "Plasma Drug Y", "Urine Drug Y"))
    )
    adpc_day <- adpc_day[!(grepl("Urine", adpc_day$PARAM) & adpc_day$PCTPTNUM %in% c(0.5, 1, 1.5, 2, 3)), ] %>%
      dplyr::arrange(USUBJID, PARAM) %>%
      dplyr::mutate(
        VISITDY = day,
        VISIT = ifelse(day <= 7, paste("Day", VISITDY), paste("Week", (VISITDY - 1) / 7)),
        PCVOLU = ifelse(grepl("Urine", PARAM), "mL", ""),
        ASMED = ifelse(grepl("Urine", PARAM), "URINE", "PLASMA"),
        PCTPT = factor(dplyr::case_when(
          PCTPTNUM == 0 ~ "Predose",
          (day == 1 & grepl("Urine", PARAM)) ~
            paste0(lag(PCTPTNUM), "H - ", PCTPTNUM, "H"),
          (day != 1 & grepl("Urine", PARAM)) ~
            paste0(as.numeric(PCTPTNUM) - 24, "H - ", PCTPTNUM, "H"),
          TRUE ~ paste0(PCTPTNUM, "H")
        )),
        ARELTM1 = PCTPTNUM,
        NRELTM1 = PCTPTNUM,
        ARELTM2 = ARELTM1 - (24 * (day - 1)),
        NRELTM2 = NRELTM1 - (24 * (day - 1)),
        A0 = ifelse(PARAM == "Plasma Drug Y", A0, A0 / 2),
        AVAL = round(
          (A0 * ka * (
            exp(-ka * ARELTM1) - exp(-ke * ARELTM1)
          ))
          / (ke - ka),
          digits = 3
        )
      ) %>%
      dplyr::mutate(
        PCVOL = ifelse(
          ASMED == "URINE",
          round(abs(((PCTPTNUM - 1) %% 24) * A0 * ka * exp(PCTPTNUM %% 1.8 / 10)), 2),
          NA
        ),
        # PK Equation
        AVALC = ifelse(AVAL == 0, "BLQ", as.character(AVAL)),
        AVALU = avalu,
        RELTMU = "hr"
      ) %>%
      dplyr::select(-c("A0", "ka", "ke"))

    return(adpc_day)
  }

  adpc <- list()

  for (day in seq(duration)[seq(duration) <= 7 | ((seq(duration) - 1) %% 7 == 0)]) {
    adpc[[day]] <- radpc_core(day = day)
  }

  adpc <- do.call(rbind, adpc)

  adpc <- dplyr::inner_join(adpc, adsl, by = c("STUDYID", "USUBJID", "ARMCD")) %>%
    dplyr::filter(ACTARM != "B: Placebo", !(ACTARM == "A: Drug X" & PARAM == "Plasma Drug Y"))

  if (length(na_vars) > 0 && na_percentage > 0) {
    adpc <- mutate_na(ds = adpc, na_vars = na_vars, na_percentage = na_percentage)
  }

  adpc <- adpc %>%
    rename(
      AVALCAT1 = AVALC,
      NFRLT = NRELTM1,
      AFRLT = ARELTM1,
      NRRLT = NRELTM2,
      ARRLT = ARELTM2
    ) %>%
    mutate(ANL02FL = "Y")

  adpc <- apply_metadata(adpc, "metadata/ADPC.yml")
}

#' Tumor Response Analysis Dataset (ADTR)
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for generating a random Tumor Response Analysis Dataset for a given
#' Subject-Level Analysis Dataset.
#'
#' @details One record per subject per parameter per analysis visit per analysis date.
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `DTYPE`
#'
#' @inheritParams argument_convention
#' @param ... Additional arguments to be passed to `radrs`.
#' @template param_cached
#' @templateVar data adtr
#'
#' @return `data.frame`
#' @export
#'
#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adtr <- radtr(adsl, seed = 2)
#' adtr
radtr <- function(adsl,
                  param = c("Sum of Longest Diameter by Investigator"),
                  paramcd = c("SLDINV"),
                  seed = NULL,
                  cached = FALSE,
                  ...) {
  checkmate::assert_flag(cached)
  if (cached) {
    return(get_cached_data("cadtr"))
  }
  checkmate::assert_data_frame(adsl)
  checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
  checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
  checkmate::assert_number(seed, null.ok = TRUE)
  stopifnot(length(param) == length(paramcd))
  # validate and initialize related variables

  if (!is.null(seed)) {
    set.seed(seed)
  }

  # Make times consistent with ADRS at ADY and ADTM.
  adrs <- radrs(adsl, seed = seed, ...) %>%
    dplyr::filter(PARAMCD == "OVRINV") %>%
    dplyr::select(
      "STUDYID",
      "USUBJID",
      "AVISIT",
      "AVISITN",
      "ADTM",
      "ADY"
    )

  adtr <- Map(function(parcd, par) {
    df <- adrs
    df$AVAL <- stats::rnorm(nrow(df), mean = 150, sd = 30)
    df$PARAMCD <- parcd
    df$PARAM <- par
    df
  }, paramcd, param) %>%
    Reduce(rbind, .)

  adtr_base <- adtr %>%
    dplyr::filter(AVISITN == 0) %>%
    dplyr::group_by(USUBJID, PARAMCD) %>%
    dplyr::mutate(BASE = AVAL) %>%
    dplyr::select("STUDYID", "USUBJID", "BASE", "PARAMCD")

  adtr_postbase <- adtr %>%
    dplyr::filter(AVISITN > 0) %>%
    dplyr::filter(!is.na(AVAL)) %>%
    dplyr::group_by(USUBJID, PARAMCD) %>%
    dplyr::filter(AVAL == min(AVAL)) %>%
    dplyr::slice(1) %>%
    dplyr::mutate(AVISIT = "POST-BASELINE MINIMUM") %>%
    dplyr::mutate(DTYPE = "MINIMUM") %>%
    dplyr::ungroup()

  adtr_lastobs <- adtr %>%
    dplyr::filter(AVISITN > 0) %>%
    dplyr::filter(!is.na(AVAL)) %>%
    dplyr::group_by(USUBJID, PARAMCD) %>%
    dplyr::filter(ADTM == max(ADTM, na.rm = TRUE)) %>%
    dplyr::slice(1) %>%
    dplyr::mutate(LAST_VISIT = AVISIT) %>%
    dplyr::ungroup() %>%
    dplyr::select(
      "STUDYID",
      "USUBJID",
      "PARAMCD",
      "LAST_VISIT"
    )

  adtr <- rbind(adtr %>% dplyr::mutate(DTYPE = ""), adtr_postbase)

  adtr <- merge(adtr, adtr_base, by = c("STUDYID", "USUBJID", "PARAMCD")) %>%
    dplyr::mutate(
      ABLFL = dplyr::case_when(AVISIT == "BASELINE" ~ "Y", TRUE ~ ""),
      AVAL = dplyr::case_when(AVISIT == "BASELINE" ~ NA_real_, TRUE ~ AVAL),
      CHG = dplyr::case_when(AVISITN > 0 ~ AVAL - BASE, TRUE ~ NA_real_),
      PCHG = dplyr::case_when(AVISITN > 0 ~ CHG / BASE * 100, TRUE ~ NA_real_),
      AVALC = as.character(AVAL),
      AVALU = "mm"
    )

  # ensure PCHG does not exceed 200%, nor go below -100% (double in size, or complete remission of tumor).
  adtr <- adtr %>%
    dplyr::mutate(
      PCHG_DUM = PCHG,
      PCHG = dplyr::case_when(
        PCHG_DUM > 200 ~ 200,
        PCHG_DUM < -100 ~ -100,
        TRUE ~ PCHG
      ),
      AVAL = dplyr::case_when(
        PCHG_DUM > 200 ~ 3 * BASE,
        PCHG_DUM < -100 ~ 0,
        TRUE ~ AVAL
      ),
      CHG = dplyr::case_when(
        PCHG_DUM > 200 ~ 2 * BASE,
        PCHG_DUM < -100 ~ -BASE,
        TRUE ~ CHG
      )
    ) %>%
    dplyr::select(-"PCHG_DUM")

  adtr <- merge(adsl, adtr, by = c("STUDYID", "USUBJID")) %>%
    dplyr::group_by(USUBJID, PARAMCD) %>%
    dplyr::mutate(
      ONTRTFL = factor(dplyr::case_when(
        !AVISIT %in% c("SCREENING", "BASELINE", "FOLLOW UP") ~ "Y",
        TRUE ~ ""
      )),
      ANL01FL = dplyr::case_when(
        DTYPE == "" & AVISITN > 0 ~ "Y",
        TRUE ~ ""
      ),
      ANL03FL = dplyr::case_when(
        DTYPE == "MINIMUM" ~ "Y",
        ABLFL == "Y" ~ "Y",
        TRUE ~ ""
      )
    )
  adtr <- merge(adtr, adtr_lastobs, by = c("STUDYID", "USUBJID", "PARAMCD")) %>%
    dplyr::mutate(
      ANL02FL = dplyr::case_when(
        as.character(AVISIT) == as.character(LAST_VISIT) ~ "Y",
        ABLFL == "Y" ~ "Y",
        TRUE ~ ""
      )
    ) %>%
    dplyr::select(-"LAST_VISIT")
  # Adding variables that are in ADTR osprey but not RCD.
  adtr <- adtr %>%
    dplyr::mutate(
      DCSREAS_GRP = ifelse(DCSREAS == "ADVERSE EVENT", "Safety", "Non-Safety"),
      TRTDURD = ifelse(
        is.na(TRTSDTM) | is.na(TRTEDTM),
        NA,
        TRTEDTM - (TRTSDTM + lubridate::days(1))
      ),
      AGEGR1 = ifelse(AGE < 65, "<65", ">=65")
    )

  # apply metadata
  adtr <- apply_metadata(adtr, "metadata/ADTR.yml")
  return(adtr)
}

#' Time to Safety Event Analysis Dataset (ADSAFTTE)
#'
#' Function to generate random Time-to-Safety Event Dataset for a
#' given Subject-Level Analysis Dataset.
#'
#' @details
#'
#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`
#'
#' @inheritParams radaette
#' @param ... Additional arguments to be passed to `radaette`
#'
#' @return `data.frame`
#' @export
#'
#' @examples
#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
#'
#' adsaftte <- radsaftte(adsl, seed = 2)
#' adsaftte
radsaftte <- function(adsl,
                      ...) {
  radaette(adsl = adsl, ...)
}

1		#' ECG Analysis Dataset (ADEG)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating random dataset from ECG Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per subject per parameter per analysis visit per analysis date.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `EGSEQ`, `ASPID`
11		#'
12		#' @inheritParams argument_convention
13		#' @param egcat (`character vector`)\cr EG category values.
14		#' @param max_n_eg (`integer`)\cr Maximum number of EG results per patient. Defaults to 10.
15		#' @template param_cached
16		#' @templateVar data adeg
17		#'
18		#' @return `data.frame`
19		#' @export
20		#'
21		#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc
22		#'
23		#' @examples
24		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
25		#'
26		#' adeg <- radeg(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2)
27		#' adeg
28		#'
29		#' adeg <- radeg(adsl, visit_format = "CYCLE", n_assessments = 2L, seed = 2)
30		#' adeg
31		radeg <- function(adsl,
32		egcat = c("INTERVAL", "INTERVAL", "MEASUREMENT", "FINDING"),
33		param = c(
34		"QT Duration",
35		"RR Duration",
36		"Heart Rate",
37		"ECG Interpretation"
38		),
39		paramcd = c("QT", "RR", "HR", "ECGINTP"),
40		paramu = c("msec", "msec", "beats/min", ""),
41		visit_format = "WEEK",
42		n_assessments = 5L,
43		n_days = 5L,
44		max_n_eg = 10L,
45		lookup = NULL,
46		seed = NULL,
47		na_percentage = 0,
48		na_vars = list(
49		ABLFL = c(1235, 0.1), BASE = c(NA, 0.1), BASEC = c(NA, 0.1),
50		CHG = c(1234, 0.1), PCHG = c(1234, 0.1)
51		),
52		cached = FALSE) {
53	4x	checkmate::assert_flag(cached)
54	4x	if (cached) {
55	1x	return(get_cached_data("cadeg"))
56		}
57
58	3x	checkmate::assert_data_frame(adsl)
59	3x	checkmate::assert_character(egcat, min.len = 1, any.missing = FALSE)
60	3x	checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
61	3x	checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
62	3x	checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE)
63	3x	checkmate::assert_string(visit_format)
64	3x	checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
65	3x	checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
66	3x	checkmate::assert_integer(max_n_eg, len = 1, any.missing = FALSE)
67	3x	checkmate::assert_data_frame(lookup, null.ok = TRUE)
68	3x	checkmate::assert_number(seed, null.ok = TRUE)
69	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
70	3x	checkmate::assert_true(na_percentage < 1)
71
72		# validate and initialize related variables
73	3x	egcat_init_list <- relvar_init(param, egcat)
74	3x	param_init_list <- relvar_init(param, paramcd)
75	3x	unit_init_list <- relvar_init(param, paramu)
76
77	3x	if (!is.null(seed)) {
78	3x	set.seed(seed)
79		}
80	3x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
81
82	3x	adeg <- expand.grid(
83	3x	STUDYID = unique(adsl$STUDYID),
84	3x	USUBJID = adsl$USUBJID,
85	3x	PARAM = as.factor(param_init_list$relvar1),
86	3x	AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days),
87	3x	stringsAsFactors = FALSE
88		)
89
90		# assign related variable values: PARAMxEGCAT are related
91	3x	adeg <- adeg %>% rel_var(
92	3x	var_name = "EGCAT",
93	3x	related_var = "PARAM",
94	3x	var_values = egcat_init_list$relvar2
95		)
96
97		# assign related variable values: PARAMxPARAMCD are related
98	3x	adeg <- adeg %>% rel_var(
99	3x	var_name = "PARAMCD",
100	3x	related_var = "PARAM",
101	3x	var_values = param_init_list$relvar2
102		)
103
104	3x	adeg <- adeg %>% dplyr::mutate(AVAL = dplyr::case_when(
105	3x	PARAMCD == "QT" ~ stats::rnorm(nrow(adeg), mean = 350, sd = 100),
106	3x	PARAMCD == "RR" ~ stats::rnorm(nrow(adeg), mean = 1050, sd = 300),
107	3x	PARAMCD == "HR" ~ stats::rnorm(nrow(adeg), mean = 70, sd = 20),
108	3x	PARAMCD == "ECGINTP" ~ NA_real_
109		))
110
111	3x	adeg <- adeg %>%
112	3x	dplyr::mutate(EGTESTCD = PARAMCD) %>%
113	3x	dplyr::mutate(EGTEST = PARAM)
114
115	3x	adeg <- adeg %>% dplyr::mutate(AVISITN = dplyr::case_when(
116	3x	AVISIT == "SCREENING" ~ -1,
117	3x	AVISIT == "BASELINE" ~ 0,
118	3x	(grepl("^WEEK", AVISIT) \| grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
119	3x	TRUE ~ NA_real_
120		))
121
122	3x	adeg <- adeg %>% rel_var(
123	3x	var_name = "AVALU",
124	3x	related_var = "PARAM",
125	3x	var_values = unit_init_list$relvar2
126		)
127
128		# order to prepare for change from screening and baseline values
129	3x	adeg <- adeg[order(adeg$STUDYID, adeg$USUBJID, adeg$PARAMCD, adeg$AVISITN), ]
130
131	3x	adeg <- Reduce(rbind, lapply(split(adeg, adeg$USUBJID), function(x) {
132	30x	x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
133	30x	x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
134	30x	"Y",
135	30x	ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "")
136		)
137	30x	x
138		}))
139
140	3x	adeg$BASE <- ifelse(adeg$AVISITN >= 0, retain(adeg, adeg$AVAL, adeg$ABLFL == "Y"), adeg$AVAL)
141
142	3x	adeg <- adeg %>% dplyr::mutate(ANRLO = dplyr::case_when(
143	3x	PARAMCD == "QT" ~ 200,
144	3x	PARAMCD == "RR" ~ 600,
145	3x	PARAMCD == "HR" ~ 40,
146	3x	PARAMCD == "ECGINTP" ~ NA_real_
147		))
148
149	3x	adeg <- adeg %>% dplyr::mutate(ANRHI = dplyr::case_when(
150	3x	PARAMCD == "QT" ~ 500,
151	3x	PARAMCD == "RR" ~ 1500,
152	3x	PARAMCD == "HR" ~ 100,
153	3x	PARAMCD == "ECGINTP" ~ NA_real_
154		))
155
156	3x	adeg <- adeg %>% dplyr::mutate(ANRIND = factor(dplyr::case_when(
157	3x	AVAL < ANRLO ~ "LOW",
158	3x	AVAL >= ANRLO & AVAL <= ANRHI ~ "NORMAL",
159	3x	AVAL > ANRHI ~ "HIGH"
160		)))
161
162	3x	adeg <- adeg %>%
163	3x	dplyr::mutate(CHG = ifelse(AVISITN > 0, AVAL - BASE, NA)) %>%
164	3x	dplyr::mutate(PCHG = ifelse(AVISITN > 0, 100 * (CHG / BASE), NA)) %>%
165	3x	dplyr::mutate(BASETYPE = "LAST") %>%
166	3x	dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
167	3x	dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>%
168	3x	dplyr::ungroup() %>%
169	3x	dplyr::mutate(ATPTN = 1) %>%
170	3x	dplyr::mutate(DTYPE = NA)
171
172	3x	adeg$ANRIND <- factor(adeg$ANRIND, levels = c("LOW", "NORMAL", "HIGH"))
173	3x	adeg$BNRIND <- factor(adeg$BNRIND, levels = c("LOW", "NORMAL", "HIGH"))
174
175	3x	adeg <- rcd_var_relabel(
176	3x	adeg,
177	3x	STUDYID = "Study Identifier",
178	3x	USUBJID = "Unique Subject Identifier"
179		)
180
181		# merge ADSL to be able to add EG date and study day variables
182	3x	adeg <- dplyr::inner_join(
183	3x	adeg,
184	3x	adsl,
185	3x	by = c("STUDYID", "USUBJID")
186		) %>%
187	3x	dplyr::rowwise() %>%
188	3x	dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
189	3x	is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
190	3x	TRUE ~ TRTEDTM
191		))) %>%
192	3x	dplyr::ungroup()
193
194	3x	adeg <- adeg %>%
195	3x	dplyr::group_by(USUBJID) %>%
196	3x	dplyr::arrange(USUBJID, AVISITN) %>%
197	3x	dplyr::mutate(ADTM = rep(
198	3x	sort(sample(
199	3x	seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
200	3x	size = nlevels(AVISIT)
201		)),
202	3x	each = n() / nlevels(AVISIT)
203		)) %>%
204	3x	dplyr::ungroup() %>%
205	3x	dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
206	3x	dplyr::select(-TRTENDT) %>%
207	3x	dplyr::arrange(STUDYID, USUBJID, ADTM)
208
209	3x	adeg <- adeg %>%
210	3x	dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>%
211	3x	dplyr::group_by(USUBJID) %>%
212	3x	dplyr::mutate(EGSEQ = seq_len(dplyr::n())) %>%
213	3x	dplyr::mutate(ASEQ = EGSEQ) %>%
214	3x	dplyr::ungroup() %>%
215	3x	dplyr::arrange(
216	3x	STUDYID,
217	3x	USUBJID,
218	3x	PARAMCD,
219	3x	BASETYPE,
220	3x	AVISITN,
221	3x	ATPTN,
222	3x	DTYPE,
223	3x	ADTM,
224	3x	EGSEQ,
225	3x	ASPID
226		)
227
228	3x	adeg <- adeg %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when(
229	3x	!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y",
230	3x	TRUE ~ ""
231		)))
232
233	3x	adeg <- adeg %>% dplyr::mutate(AVALC = ifelse(
234	3x	PARAMCD == "ECGINTP",
235	3x	as.character(sample_fct(c("ABNORMAL", "NORMAL"), nrow(adeg), prob = c(0.25, 0.75))),
236	3x	as.character(AVAL)
237		))
238
239		# Temporarily creating a row_check column to easily match newly created
240		# observations with their row correct arrangement.
241	3x	adeg <- adeg %>%
242	3x	dplyr::mutate(row_check = seq_len(nrow(adeg)))
243
244		# Created function to add in new observations for DTYPE, "MINIMUM" & "MAXIMUM" in this case.
245	3x	get_groups <- function(data,
246	3x	minimum) {
247	6x	data <- data %>%
248	6x	dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
249	6x	dplyr::arrange(ADTM, ASPID, EGSEQ) %>%
250	6x	dplyr::filter(
251	6x	(AVISIT != "BASELINE" & AVISIT != "SCREENING") &
252	6x	(ONTRTFL == "Y" \| ADTM <= TRTSDTM)
253		) %>%
254		{
255	6x	if (minimum == TRUE) {
256	3x	dplyr::filter(., AVAL == min(AVAL)) %>%
257	3x	dplyr::mutate(., DTYPE = "MINIMUM", AVISIT = "POST-BASELINE MINIMUM")
258		} else {
259	3x	dplyr::filter(., AVAL == max(AVAL)) %>%
260	3x	dplyr::mutate(., DTYPE = "MAXIMUM", AVISIT = "POST-BASELINE MAXIMUM")
261		}
262		} %>%
263	6x	dplyr::slice(1) %>%
264	6x	dplyr::ungroup()
265
266	6x	return(data)
267		}
268
269		# Binding the new observations to the dataset from the function above and rearranging in the correct order.
270	3x	adeg <- rbind(adeg, get_groups(adeg, TRUE), get_groups(adeg, FALSE)) %>%
271	3x	dplyr::arrange(row_check) %>%
272	3x	dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
273	3x	dplyr::arrange(AVISIT, .by_group = TRUE) %>%
274	3x	dplyr::ungroup()
275
276		# Dropping the "row_check" column created above.
277	3x	adeg <- adeg[, -which(names(adeg) %in% c("row_check"))]
278
279		# Created function to easily match rows which comply to ONTRTFL derivation
280	3x	flag_variables <- function(data, worst_obs) {
281	6x	data_compare <- data %>%
282	6x	dplyr::mutate(row_check = seq_len(nrow(data)))
283
284	6x	data <- data_compare %>%
285		{
286	6x	if (worst_obs == FALSE) {
287	3x	dplyr::group_by(., USUBJID, PARAMCD, BASETYPE, AVISIT) %>%
288	3x	dplyr::arrange(., ADTM, ASPID, EGSEQ)
289		} else {
290	3x	dplyr::group_by(., USUBJID, PARAMCD, BASETYPE)
291		}
292		} %>%
293	6x	dplyr::filter(
294	6x	AVISITN > 0 & (ONTRTFL == "Y" \| ADTM <= TRTSDTM) &
295	6x	is.na(DTYPE)
296		) %>%
297		{
298	6x	if (worst_obs == TRUE) {
299	3x	dplyr::arrange(., AVALC) %>% dplyr::filter(., ifelse(
300	3x	PARAMCD == "ECGINTP",
301	3x	ifelse(AVALC == "ABNORMAL", AVALC == "ABNORMAL", AVALC == "NORMAL"),
302	3x	AVAL == min(AVAL)
303		))
304		} else {
305	3x	dplyr::filter(., ifelse(
306	3x	PARAMCD == "ECGINTP",
307	3x	AVALC == "ABNORMAL" \| AVALC == "NORMAL",
308	3x	AVAL == min(AVAL)
309		))
310		}
311		} %>%
312	6x	dplyr::slice(1) %>%
313		{
314	6x	if (worst_obs == TRUE) {
315	3x	dplyr::mutate(., new_var = dplyr::case_when(
316	3x	(AVALC == "ABNORMAL" \| AVALC == "NORMAL") ~ "Y",
317	3x	(!is.na(AVAL) & is.na(DTYPE)) ~ "Y",
318	3x	TRUE ~ ""
319		))
320		} else {
321	3x	dplyr::mutate(., new_var = dplyr::case_when(
322	3x	(AVALC == "ABNORMAL" \| AVALC == "NORMAL") ~ "Y",
323	3x	(!is.na(AVAL) & is.na(DTYPE)) ~ "Y",
324	3x	TRUE ~ ""
325		))
326		}
327		} %>%
328	6x	dplyr::ungroup()
329
330	6x	data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "")
331	6x	data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))]
332
333	6x	return(data_compare)
334		}
335
336	3x	adeg <- flag_variables(adeg, FALSE) %>% dplyr::rename(WORS01FL = "new_var")
337	3x	adeg <- flag_variables(adeg, TRUE) %>% dplyr::rename(WORS02FL = "new_var")
338
339	3x	adeg <- adeg %>% dplyr::mutate(ANL01FL = factor(ifelse(
340	3x	(ABLFL == "Y" \| (is.na(DTYPE) & WORS01FL == "Y")) &
341	3x	(AVISIT != "SCREENING"),
342	3x	"Y",
343		""
344		)))
345
346	3x	adeg <- adeg %>%
347	3x	dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
348	3x	dplyr::mutate(BASEC = ifelse(
349	3x	PARAMCD == "ECGINTP",
350	3x	AVALC[AVISIT == "BASELINE"],
351	3x	as.character(BASE)
352		)) %>%
353	3x	dplyr::mutate(ANL03FL = dplyr::case_when(
354	3x	DTYPE == "MINIMUM" ~ "Y",
355	3x	ABLFL == "Y" & PARAMCD != "ECGINTP" ~ "Y",
356	3x	TRUE ~ ""
357		)) %>%
358	3x	dplyr::mutate(ANL04FL = dplyr::case_when(
359	3x	DTYPE == "MAXIMUM" ~ "Y",
360	3x	ABLFL == "Y" & PARAMCD != "ECGINTP" ~ "Y",
361	3x	TRUE ~ ""
362		)) %>%
363	3x	dplyr::ungroup()
364
365	3x	if (length(na_vars) > 0 && na_percentage > 0) {
366	!	adeg <- mutate_na(ds = adeg, na_vars = na_vars, na_percentage = na_percentage)
367		}
368
369		# apply metadata
370	3x	adeg <- apply_metadata(adeg, "metadata/ADEG.yml")
371
372	3x	return(adeg)
373		}

1		#' Time to Adverse Event Analysis Dataset (ADAETTE)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function to generate random Time-to-AE Dataset for a
6		#' given Subject-Level Analysis Dataset.
7		#'
8		#' @details
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`
11		#'
12		#' @inheritParams argument_convention
13		#' @param event_descr (`character vector`)\cr Descriptions of events. Defaults to `NULL`.
14		#' @param censor_descr (`character vector`)\cr Descriptions of censors. Defaults to `NULL`.
15		#' @template param_cached
16		#' @templateVar data adaette
17		#'
18		#' @return `data.frame`
19		#' @export
20		#'
21		#' @author Xiuting Mi
22		#'
23		#' @examples
24		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
25		#'
26		#' adaette <- radaette(adsl, seed = 2)
27		#' adaette
28		radaette <- function(adsl,
29		event_descr = NULL,
30		censor_descr = NULL,
31		lookup = NULL,
32		seed = NULL,
33		na_percentage = 0,
34		na_vars = list(CNSR = c(NA, 0.1), AVAL = c(1234, 0.1)),
35		cached = FALSE) {
36	6x	checkmate::assert_flag(cached)
37	6x	if (cached) {
38	1x	return(get_cached_data("cadaette"))
39		}
40
41	5x	checkmate::assert_data_frame(adsl)
42	5x	checkmate::assert_character(censor_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE)
43	5x	checkmate::assert_character(event_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE)
44	5x	checkmate::assert_number(seed, null.ok = TRUE)
45	5x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
46	5x	checkmate::assert_true(na_percentage < 1)
47
48	5x	checkmate::assert_data_frame(lookup, null.ok = TRUE)
49	5x	lookup_adaette <- if (!is.null(lookup)) {
50	!	lookup
51		} else {
52	5x	tibble::tribble(
53	5x	~ARM, ~CATCD, ~CAT, ~LAMBDA, ~CNSR_P,
54	5x	"ARM A", "1", "any adverse event", 1 / 80, 0.4,
55	5x	"ARM B", "1", "any adverse event", 1 / 100, 0.2,
56	5x	"ARM C", "1", "any adverse event", 1 / 60, 0.42,
57	5x	"ARM A", "2", "any serious adverse event", 1 / 100, 0.3,
58	5x	"ARM B", "2", "any serious adverse event", 1 / 150, 0.1,
59	5x	"ARM C", "2", "any serious adverse event", 1 / 80, 0.32,
60	5x	"ARM A", "3", "a grade 3-5 adverse event", 1 / 80, 0.2,
61	5x	"ARM B", "3", "a grade 3-5 adverse event", 1 / 100, 0.08,
62	5x	"ARM C", "3", "a grade 3-5 adverse event", 1 / 60, 0.23
63		)
64		}
65
66	5x	if (!is.null(seed)) {
67	5x	set.seed(seed)
68		}
69	5x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
70
71	5x	evntdescr_sel <- if (!is.null(event_descr)) {
72	!	event_descr
73		} else {
74	5x	"Preferred Term"
75		}
76
77	5x	cnsdtdscr_sel <- if (!is.null(censor_descr)) {
78	!	censor_descr
79		} else {
80	5x	c(
81	5x	"Clinical Cut Off",
82	5x	"Completion or Discontinuation",
83	5x	"End of AE Reporting Period"
84		)
85		}
86
87	5x	random_patient_data <- function(patient_info) {
88	50x	startdt <- lubridate::date(patient_info$TRTSDTM)
89	50x	trtedtm <- lubridate::floor_date(dplyr::case_when(
90	50x	is.na(patient_info$TRTEDTM) ~ lubridate::date(patient_info$TRTSDTM) + study_duration_secs,
91	50x	TRUE ~ lubridate::date(patient_info$TRTEDTM)
92	50x	), unit = "day")
93	50x	enddts <- c(patient_info$EOSDT, lubridate::date(trtedtm))
94	50x	enddts_min_index <- which.min(enddts)
95	50x	adt <- enddts[enddts_min_index]
96	50x	adtm <- lubridate::as_datetime(adt)
97	50x	ady <- as.numeric(adt - startdt + 1)
98	50x	data.frame(
99	50x	ARM = patient_info$ARM,
100	50x	STUDYID = patient_info$STUDYID,
101	50x	SITEID = patient_info$SITEID,
102	50x	USUBJID = patient_info$USUBJID,
103	50x	PARAMCD = "AEREPTTE",
104	50x	PARAM = "Time to end of AE reporting period",
105	50x	CNSR = 0,
106	50x	AVAL = lubridate::days(ady) / lubridate::years(1),
107	50x	AVALU = "YEARS",
108	50x	EVNTDESC = ifelse(enddts_min_index == 1, "Completion or Discontinuation", "End of AE Reporting Period"),
109	50x	CNSDTDSC = NA,
110	50x	ADTM = adtm,
111	50x	ADY = ady,
112	50x	stringsAsFactors = FALSE
113		)
114		}
115
116		# validate and initialize related variables for Hy's law
117	5x	paramcd_hy <- c("HYSTTEUL", "HYSTTEBL")
118	5x	param_hy <- c("Time to Hy's Law Elevation in relation to ULN", "Time to Hy's Law Elevation in relation to Baseline")
119	5x	param_init_list <- relvar_init(param_hy, paramcd_hy)
120	5x	adsl_hy <- dplyr::select(adsl, "STUDYID", "USUBJID", "TRTSDTM", "SITEID", "ARM")
121
122		# create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT
123	5x	adaette_hy <- expand.grid(
124	5x	STUDYID = unique(adsl$STUDYID),
125	5x	USUBJID = adsl$USUBJID,
126	5x	PARAM = as.factor(param_init_list$relvar1),
127	5x	stringsAsFactors = FALSE
128		)
129
130		# Add other variables to adaette_hy
131	5x	adaette_hy <- dplyr::left_join(adaette_hy, adsl_hy, by = c("STUDYID", "USUBJID")) %>%
132	5x	rel_var(
133	5x	var_name = "PARAMCD",
134	5x	related_var = "PARAM",
135	5x	var_values = param_init_list$relvar2
136		) %>%
137	5x	dplyr::mutate(
138	5x	CNSR = sample(c(0, 1), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE),
139	5x	EVNTDESC = dplyr::if_else(
140	5x	CNSR == 0,
141	5x	"First Post-Baseline Raised ALT or AST Elevation Result",
142	5x	NA_character_
143		),
144	5x	CNSDTDSC = dplyr::if_else(CNSR == 0, NA_character_,
145	5x	sample(c("Last Post-Baseline ALT or AST Result", "Treatment Start"),
146	5x	prob = c(0.9, 0.1),
147	5x	size = dplyr::n(), replace = TRUE
148		)
149		)
150		) %>%
151	5x	dplyr::rowwise() %>%
152	5x	dplyr::mutate(ADTM = dplyr::case_when(
153	5x	CNSDTDSC == "Treatment Start" ~ TRTSDTM,
154	5x	TRUE ~ TRTSDTM + sample(seq(0, study_duration_secs), size = dplyr::n(), replace = TRUE)
155		)) %>%
156	5x	dplyr::mutate(
157	5x	ADY_int = lubridate::date(ADTM) - lubridate::date(TRTSDTM) + 1,
158	5x	ADY = as.numeric(ADY_int),
159	5x	AVAL = lubridate::days(ADY_int) / lubridate::weeks(1),
160	5x	AVALU = "WEEKS"
161		) %>%
162	5x	dplyr::select(-TRTSDTM, -ADY_int)
163
164	5x	random_ae_data <- function(lookup_info, patient_info, patient_data) {
165	150x	cnsr <- sample(c(0, 1), 1, prob = c(1 - lookup_info$CNSR_P, lookup_info$CNSR_P))
166	150x	ae_rep_tte <- patient_data$AVAL[patient_data$PARAMCD == "AEREPTTE"]
167	150x	data.frame(
168	150x	ARM = rep(patient_data$ARM, 2),
169	150x	STUDYID = rep(patient_data$STUDYID, 2),
170	150x	SITEID = rep(patient_data$SITEID, 2),
171	150x	USUBJID = rep(patient_data$USUBJID, 2),
172	150x	PARAMCD = c(
173	150x	paste0("AETTE", lookup_info$CATCD),
174	150x	paste0("AETOT", lookup_info$CATCD)
175		),
176	150x	PARAM = c(
177	150x	paste("Time to first occurrence of", lookup_info$CAT),
178	150x	paste("Number of occurrences of", lookup_info$CAT)
179		),
180	150x	CNSR = c(
181	150x	cnsr,
182	150x	NA
183		),
184	150x	AVAL = c(
185		# We generate these values conditional on the censoring information.
186		# If this time to event is censored, then there were no AEs reported and the time is set
187		# to the AE reporting period time. Otherwise we draw from truncated distributions to make
188		# sure that we are within the AE reporting time and above 0 AEs.
189	150x	ifelse(cnsr == 1, ae_rep_tte, rtexp(1, lookup_info$LAMBDA * 365.25, r = ae_rep_tte)),
190	150x	ifelse(cnsr == 1, 0, rtpois(1, lookup_info$LAMBDA * 365.25))
191		),
192	150x	AVALU = c(
193	150x	"YEARS",
194	150x	NA
195		),
196	150x	EVNTDESC = c(
197	150x	ifelse(cnsr == 0, sample(evntdescr_sel, 1), ""),
198	150x	NA
199		),
200	150x	CNSDTDSC = c(
201	150x	ifelse(cnsr == 1, sample(cnsdtdscr_sel, 1), ""),
202	150x	NA
203		),
204	150x	stringsAsFactors = FALSE
205	150x	) %>% dplyr::mutate(
206	150x	ADY = dplyr::if_else(is.na(AVALU), NA_real_, ceiling(as.numeric(lubridate::dyears(AVAL), "days"))),
207	150x	ADTM = dplyr::if_else(
208	150x	is.na(AVALU),
209	150x	lubridate::as_datetime(NA),
210	150x	patient_info$TRTSDTM + lubridate::days(ADY)
211		)
212		)
213		}
214
215	5x	adaette <- split(adsl, adsl$USUBJID) %>%
216	5x	lapply(function(patient_info) {
217	50x	patient_data <- random_patient_data(patient_info)
218	50x	lookup_arm <- lookup_adaette %>%
219	50x	dplyr::filter(ARM == as.character(patient_info$ARMCD))
220	50x	ae_data <- split(lookup_arm, lookup_arm$CATCD) %>%
221	50x	lapply(random_ae_data, patient_data = patient_data, patient_info = patient_info) %>%
222	50x	Reduce(rbind, .)
223	50x	dplyr::bind_rows(patient_data, ae_data)
224		}) %>%
225	5x	Reduce(rbind, .) %>%
226	5x	rcd_var_relabel(
227	5x	STUDYID = "Study Identifier",
228	5x	USUBJID = "Unique Subject Identifier"
229		)
230
231	5x	adaette <- rcd_var_relabel(
232	5x	adaette,
233	5x	STUDYID = "Study Identifier",
234	5x	USUBJID = "Unique Subject Identifier"
235		)
236
237	5x	adaette <- rbind(adaette, adaette_hy)
238
239	5x	adaette <- dplyr::inner_join(
240	5x	dplyr::select(adaette, -"SITEID", -"ARM"),
241	5x	adsl,
242	5x	by = c("STUDYID", "USUBJID")
243		) %>%
244	5x	dplyr::group_by(USUBJID) %>%
245	5x	dplyr::arrange(ADTM) %>%
246	5x	dplyr::mutate(TTESEQ = seq_len(dplyr::n())) %>%
247	5x	dplyr::mutate(ASEQ = TTESEQ) %>%
248	5x	dplyr::mutate(PARAM = as.factor(PARAM)) %>%
249	5x	dplyr::mutate(PARAMCD = as.factor(PARAMCD)) %>%
250	5x	dplyr::ungroup() %>%
251	5x	dplyr::arrange(
252	5x	STUDYID,
253	5x	USUBJID,
254	5x	PARAMCD,
255	5x	ADTM,
256	5x	TTESEQ
257		)
258
259	5x	if (length(na_vars) > 0 && na_percentage > 0) {
260	!	adaette <- dplyr::mutate(ds = adaette, na_vars = na_vars, na_percentage = na_percentage)
261		}
262
263		# apply metadata
264	5x	adaette <- apply_metadata(adaette, "metadata/ADAETTE.yml")
265
266	5x	return(adaette)
267		}

1		#' Load Cached Data
2		#'
3		#' Return data attached to package.
4		#'
5		#' @keywords internal
6		#' @noRd
7		get_cached_data <- function(dataname) {
8	22x	checkmate::assert_string(dataname)
9	22x	if (!("package:random.cdisc.data" %in% search())) {
10	1x	stop("cached data can only be loaded if the random.cdisc.data package is attached.",
11	1x	"Please run library(random.cdisc.data) before loading cached data.",
12	1x	call. = FALSE
13		)
14		} else {
15	21x	get(dataname, envir = asNamespace("random.cdisc.data"))
16		}
17		}
18
19		#' Create a Factor with Random Elements of x
20		#'
21		#' Sample elements from `x` with replacement to build a factor.
22		#'
23		#' @param x (`character vector` or `factor`)\cr If character vector then it is also used
24		#' as levels of the returned factor. If factor then the levels are used as the new levels.
25		#' @param N (`numeric`)\cr Number of items to choose.
26		#' @param ... Additional arguments to be passed to `sample`.
27		#'
28		#' @return A factor of length `N`.
29		#' @export
30		#'
31		#' @examples
32		#' sample_fct(letters[1:3], 10)
33		#' sample_fct(iris$Species, 10)
34		sample_fct <- function(x, N, ...) { # nolint
35	296x	checkmate::assert_number(N)
36
37	296x	factor(sample(x, N, replace = TRUE, ...), levels = if (is.factor(x)) levels(x) else x)
38		}
39
40		#' Related Variables: Initialize
41		#'
42		#' Verify and initialize related variable values.
43		#' For example, `relvar_init("Alanine Aminotransferase Measurement", "ALT")`.
44		#'
45		#' @param relvar1 (`list` of `character`)\cr List of n elements.
46		#' @param relvar2 (`list` of `character`)\cr List of n elements.
47		#'
48		#' @return A vector of n elements.
49		#'
50		#' @keywords internal
51		relvar_init <- function(relvar1, relvar2) {
52	64x	checkmate::assert_character(relvar1, min.len = 1, any.missing = FALSE)
53	64x	checkmate::assert_character(relvar2, min.len = 1, any.missing = FALSE)
54
55	64x	if (length(relvar1) != length(relvar2)) {
56	1x	message(simpleError(
57	1x	"The argument value length of relvar1 and relvar2 differ. They must contain the same number of elements."
58		))
59	!	return(NA)
60		}
61	63x	return(list("relvar1" = relvar1, "relvar2" = relvar2))
62		}
63
64		#' Related Variables: Assign
65		#'
66		#' Assign values to a related variable within a domain.
67		#'
68		#' @param df (`data.frame`)\cr Data frame containing the related variables.
69		#' @param var_name (`character`)\cr Name of variable related to `rel_var` to add to `df`.
70		#' @param var_values (`any`)\cr Vector of values related to values of `related_var`.
71		#' @param related_var (`character`)\cr Name of variable within `df` with values to which values
72		#' of `var_name` must relate.
73		#'
74		#' @return `df` with added factor variable `var_name` containing `var_values` corresponding to `related_var`.
75		#' @export
76		#'
77		#' @examples
78		#' # Example with data.frame.
79		#' params <- c("Level A", "Level B", "Level C")
80		#' adlb_df <- data.frame(
81		#' ID = 1:9,
82		#' PARAM = factor(
83		#' rep(c("Level A", "Level B", "Level C"), 3),
84		#' levels = params
85		#' )
86		#' )
87		#' rel_var(
88		#' df = adlb_df,
89		#' var_name = "PARAMCD",
90		#' var_values = c("A", "B", "C"),
91		#' related_var = "PARAM"
92		#' )
93		#'
94		#' # Example with tibble.
95		#' adlb_tbl <- tibble::tibble(
96		#' ID = 1:9,
97		#' PARAM = factor(
98		#' rep(c("Level A", "Level B", "Level C"), 3),
99		#' levels = params
100		#' )
101		#' )
102		#' rel_var(
103		#' df = adlb_tbl,
104		#' var_name = "PARAMCD",
105		#' var_values = c("A", "B", "C"),
106		#' related_var = "PARAM"
107		#' )
108		rel_var <- function(df, var_name, related_var, var_values = NULL) {
109	64x	checkmate::assert_data_frame(df)
110	64x	checkmate::assert_string(var_name)
111	64x	checkmate::assert_string(related_var)
112	64x	n_relvar1 <- length(unique(df[, related_var, drop = TRUE]))
113	64x	checkmate::assert_vector(var_values, null.ok = TRUE, len = n_relvar1, any.missing = FALSE)
114	1x	if (is.null(var_values)) var_values <- rep(NA, n_relvar1)
115
116	64x	relvar1 <- unique(df[, related_var, drop = TRUE])
117	64x	relvar2_values <- rep(NA, nrow(df))
118	64x	for (r in seq_len(n_relvar1)) {
119	538x	matched <- which(df[, related_var, drop = TRUE] == relvar1[r])
120	538x	relvar2_values[matched] <- var_values[r]
121		}
122	64x	df[[var_name]] <- factor(relvar2_values)
123	64x	return(df)
124		}
125
126		#' Create Visit Schedule
127		#'
128		#' Create a visit schedule as a factor.
129		#'
130		#' X number of visits, or X number of cycles and Y number of days.
131		#'
132		#' @inheritParams argument_convention
133		#'
134		#' @return A factor of length `n_assessments`.
135		#' @export
136		#'
137		#' @examples
138		#' visit_schedule(visit_format = "WEeK", n_assessments = 10L)
139		#' visit_schedule(visit_format = "CyCLE", n_assessments = 5L, n_days = 2L)
140		visit_schedule <- function(visit_format = "WEEK",
141		n_assessments = 10L,
142		n_days = 5L) {
143	56x	checkmate::assert_string(visit_format, pattern = "^WEEK$\|^CYCLE$", ignore.case = TRUE)
144	56x	checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
145	56x	checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
146
147	56x	if (toupper(visit_format) == "WEEK") {
148		# numeric vector of n assessments/cycles/days
149	49x	assessments <- 1:n_assessments
150		# numeric vector for ordering including screening (-1) and baseline (0) place holders
151	49x	assessments_ord <- -1:n_assessments
152		# character vector of nominal visit values
153	49x	visit_values <- c("SCREENING", "BASELINE", paste(toupper(visit_format), assessments, "DAY", (assessments * 7) + 1))
154	7x	} else if (toupper(visit_format) == "CYCLE") {
155	7x	cycles <- sort(rep(1:n_assessments, times = 1, each = n_days))
156	7x	days <- rep(seq(1:n_days), times = n_assessments, each = 1)
157	7x	assessments_ord <- 0:(n_assessments * n_days)
158	7x	visit_values <- c("SCREENING", paste(toupper(visit_format), cycles, "DAY", days))
159		}
160
161		# create and order factor variable to return from function
162	56x	visit_values <- stats::reorder(factor(visit_values), assessments_ord)
163		}
164
165		#' Primary Keys: Retain Values
166		#'
167		#' Retain values within primary keys.
168		#'
169		#' @param df (`data.frame`)\cr Data frame in which to apply the retain.
170		#' @param value_var (`any`)\cr Variable in `df` containing the value to be retained.
171		#' @param event (`expression`)\cr Expression returning a logical value to trigger the retain.
172		#' @param outside (`any`)\cr Additional value to retain. Defaults to `NA`.
173		#' @return A vector of values where expression is true.
174		#' @keywords internal
175		retain <- function(df, value_var, event, outside = NA) {
176	31x	indices <- c(1, which(event == TRUE), nrow(df) + 1)
177	31x	values <- c(outside, value_var[event == TRUE])
178	31x	rep(values, diff(indices))
179		}
180
181		#' Primary Keys: Labels
182		#'
183		#' @description Shallow copy of `formatters::var_relabel()`. Used mainly internally to
184		#' relabel a subset of variables in a data set.
185		#'
186		#' @param x (`data.frame`)\cr Data frame containing variables to which labels are applied.
187		#' @param ... (`named character`)\cr Name-Value pairs, where name corresponds to a variable
188		#' name in `x` and the value to the new variable label.
189		#' @return x (`data.frame`)\cr Data frame with labels applied.
190		#'
191		#' @keywords internal
192		rcd_var_relabel <- function(x, ...) {
193	79x	stopifnot(is.data.frame(x))
194	79x	if (missing(...)) {
195	!	return(x)
196		}
197	79x	dots <- list(...)
198	79x	varnames <- names(dots)
199	79x	if (is.null(varnames)) {
200	1x	stop("missing variable declarations")
201		}
202	78x	map_varnames <- match(varnames, colnames(x))
203	78x	if (any(is.na(map_varnames))) {
204	!	stop("variables: ", paste(varnames[is.na(map_varnames)], collapse = ", "), " not found")
205		}
206	78x	if (any(vapply(dots, Negate(is.character), logical(1)))) {
207	!	stop("all variable labels must be of type character")
208		}
209	78x	for (i in seq_along(map_varnames)) {
210	155x	attr(x[[map_varnames[[i]]]], "label") <- dots[[i]]
211		}
212	78x	x
213		}
214
215		#' Apply Metadata
216		#'
217		#' Apply label and variable ordering attributes to domains.
218		#'
219		#' @param df (`data.frame`)\cr Data frame to which metadata is applied.
220		#' @param filename (`yaml`)\cr File containing domain metadata.
221		#' @param add_adsl (`logical`)\cr Should ADSL data be merged to domain.
222		#' @param adsl_filename (`yaml`)\cr File containing ADSL metadata.
223		#' @return Data frame with metadata applied.
224		#'
225		#' @export
226		#' @examples
227		#' seed <- 1
228		#' adsl <- radsl(seed = seed)
229		#' adsub <- radsub(adsl, seed = seed)
230		#' yaml_path <- file.path(path.package("random.cdisc.data"), "inst", "metadata")
231		#' adsl <- apply_metadata(adsl, file.path(yaml_path, "ADSL.yml"), FALSE)
232		#' adsub <- apply_metadata(
233		#' adsub, file.path(yaml_path, "ADSUB.yml"), TRUE,
234		#' file.path(yaml_path, "ADSL.yml")
235		#' )
236		apply_metadata <- function(df, filename, add_adsl = TRUE, adsl_filename = "metadata/ADSL.yml") {
237	90x	checkmate::assert_data_frame(df)
238	90x	checkmate::assert_string(filename)
239	90x	checkmate::assert_flag(add_adsl)
240	90x	checkmate::assert_string(adsl_filename)
241
242	90x	apply_type <- function(df, var, type) {
243	5986x	if (is.null(type)) {
244	!	return()
245		}
246
247	5986x	if (type == "character" && !is.character(df[[var]])) {
248	12x	df[[var]] <- as.character(df[[var]])
249	5974x	} else if (type == "factor" && !is.factor(df[[var]])) {
250	730x	df[[var]] <- as.factor(df[[var]])
251	5244x	} else if (type == "integer" && !is.integer(df[[var]])) {
252	225x	df[[var]] <- as.integer(df[[var]])
253	5019x	} else if (type == "numeric" && !is.numeric(df[[var]])) {
254	3x	df[[var]] <- as.numeric(df[[var]])
255	5016x	} else if (type == "logical" && !is.logical(df[[var]])) {
256	!	df[[var]] <- as.logical(df[[var]])
257	5016x	} else if (type == "datetime" && !lubridate::is.POSIXct(df[[var]])) {
258	9x	df[[var]] <- as.POSIXct(df[[var]])
259	5007x	} else if (type == "date" && !lubridate::is.Date(df[[var]])) {
260	!	df[[var]] <- as.Date(df[[var]])
261		}
262	5986x	return(df)
263		}
264
265		# remove existing attributes
266	90x	for (i in base::setdiff(names(attributes(df)), names(attributes(data.frame())))) {
267	3x	attr(df, i) <- NULL
268		}
269
270		# get metadata
271	90x	metadata <- yaml::yaml.load_file(system.file(filename, package = "random.cdisc.data"))
272	90x	adsl_metadata <- if (add_adsl) {
273	64x	yaml::yaml.load_file(system.file(adsl_filename, package = "random.cdisc.data"))
274		} else {
275	26x	NULL
276		}
277	90x	metadata_variables <- append(adsl_metadata$variables, metadata$variables)
278	90x	metadata_varnames <- names(metadata_variables)
279
280		# find variables that does not have labels and are not it metadata
281	90x	missing_vars_map <- vapply(
282	90x	names(df),
283	90x	function(x) {
284	5986x	!(x %in% c("STUDYID", "USUBJID", metadata_varnames)) && is.null(attr(df[[x]], "label"))
285		},
286	90x	logical(1)
287		)
288	90x	missing_vars <- names(df)[missing_vars_map]
289	90x	if (length(missing_vars) > 0) {
290	!	msg <- paste0(
291	!	"Following variables does not have label or are not found in ",
292	!	filename,
293		": ",
294	!	paste0(missing_vars, collapse = ", ")
295		)
296	!	warning(msg)
297		}
298
299	90x	if (!all(metadata_varnames %in% names(df))) {
300	6x	metadata_varnames <- metadata_varnames[metadata_varnames %in% names(df)]
301		}
302
303		# assign labels to variables
304	90x	for (var in metadata_varnames) {
305	5986x	df <- apply_type(df, var, metadata_variables[[var]]$type)
306	5986x	attr(df[[var]], "label") <- metadata_variables[[var]]$label
307		}
308
309		# reorder data frame columns to expected BDS order
310	90x	df <- df[, unique(c("STUDYID", "USUBJID", metadata_varnames, names(df)))]
311
312		# assign label to data frame
313	90x	attr(df, "label") <- metadata$domain$label
314
315	90x	df
316		}
317
318		#' Replace Values in a Vector by NA
319		#'
320		#' @description `r lifecycle::badge("stable")`
321		#'
322		#' Randomized replacement of values by `NA`.
323		#'
324		#' @inheritParams argument_convention
325		#' @param v (`any`)\cr Vector of any type.
326		#' @param percentage (`proportion`)\cr Value between 0 and 1 defining
327		#' how much of the vector shall be replaced by `NA`. This number
328		#' is randomized by +/- 5% to have full randomization.
329		#'
330		#' @return The input vector `v` where a certain number of values are replaced by `NA`.
331		#'
332		#' @export
333		replace_na <- function(v, percentage = 0.05, seed = NULL) {
334	9x	checkmate::assert_number(percentage, lower = 0, upper = 1)
335
336	9x	if (percentage == 0) {
337	1x	return(v)
338		}
339
340	8x	if (!is.null(seed) && !is.na(seed)) {
341	8x	set.seed(seed)
342		}
343
344		# randomize the percentage
345	8x	ind <- sample(seq_along(v), round(length(v) * percentage))
346
347	8x	v[ind] <- NA
348
349	8x	return(v)
350		}
351
352		#' Replace Values with NA
353		#'
354		#' @description `r lifecycle::badge("stable")`
355		#'
356		#' Replace column values with `NA`s.
357		#'
358		#' @inheritParams argument_convention
359		#' @param ds (`data.frame`)\cr Any data set.
360		#'
361		#' @return dataframe without `NA` values.
362		#'
363		#' @export
364		mutate_na <- function(ds, na_vars = NULL, na_percentage = 0.05) {
365	5x	if (!is.null(na_vars)) {
366	4x	stopifnot(is.list(na_vars)) # any list is OK; as values can be left NA
367	4x	stopifnot(length(names(na_vars)) == length(na_vars)) # names for all elements
368		} else {
369	1x	na_vars <- names(ds)
370		}
371
372	5x	stopifnot(is.numeric(na_percentage))
373	5x	stopifnot(na_percentage >= 0 && na_percentage < 1)
374
375	5x	for (na_var in names(na_vars)) {
376	8x	if (!is.na(na_var)) {
377	8x	if (!na_var %in% names(ds)) {
378	1x	warning(paste(na_var, "not in column names"))
379		} else {
380	7x	ds <- ds %>%
381	7x	ungroup_rowwise_df() %>%
382	7x	dplyr::mutate(
383	7x	!!na_var := ds[[na_var]] %>%
384	7x	replace_na(
385	7x	percentage = ifelse(is.na(na_vars[[na_var]][2]), na_percentage, na_vars[[na_var]][2]),
386	7x	seed = na_vars[[na_var]][1]
387		)
388		)
389		}
390		}
391		}
392	5x	return(ds)
393		}
394
395		ungroup_rowwise_df <- function(x) {
396	7x	class(x) <- c("tbl", "tbl_df", "data.frame")
397	7x	return(x)
398		}
399
400		#' Zero-Truncated Poisson Distribution
401		#'
402		#' @description `r lifecycle::badge("stable")`
403		#'
404		#' This generates random numbers from a zero-truncated Poisson distribution,
405		#' i.e. from `X \| X > 0` when `X ~ Poisson(lambda)`. The advantage here is that
406		#' we guarantee to return exactly `n` numbers and without using a loop internally.
407		#' This solution was provided in a post by
408		#' [Peter Dalgaard](https://stat.ethz.ch/pipermail/r-help/2005-May/070680.html).
409		#'
410		#' @param n (`numeric`)\cr Number of random numbers.
411		#' @param lambda (`numeric`)\cr Non-negative mean(s).
412		#'
413		#' @return The random numbers.
414		#' @export
415		#'
416		#' @examples
417		#' x <- rpois(1e6, lambda = 5)
418		#' x <- x[x > 0]
419		#' hist(x)
420		#'
421		#' y <- rtpois(1e6, lambda = 5)
422		#' hist(y)
423		rtpois <- function(n, lambda) {
424	121x	stats::qpois(stats::runif(n, stats::dpois(0, lambda), 1), lambda)
425		}
426
427		#' Truncated Exponential Distribution
428		#'
429		#' @description `r lifecycle::badge("stable")`
430		#'
431		#' This generates random numbers from a truncated Exponential distribution,
432		#' i.e. from `X \| X > l` or `X \| X < r` when `X ~ Exp(rate)`. The advantage here is that
433		#' we guarantee to return exactly `n` numbers and without using a loop internally.
434		#' This can be derived from the quantile functions of the left- and right-truncated
435		#' Exponential distributions.
436		#'
437		#' @param n (`numeric`)\cr Number of random numbers.
438		#' @param rate (`numeric`)\cr Non-negative rate.
439		#' @param l (`numeric`)\cr Positive left-hand truncation parameter.
440		#' @param r (`numeric`)\cr Positive right-hand truncation parameter.
441		#'
442		#' @return The random numbers. If neither `l` nor `r` are provided then the usual Exponential
443		#' distribution is used.
444		#' @export
445		#'
446		#' @examples
447		#' x <- stats::rexp(1e6, rate = 5)
448		#' x <- x[x > 0.5]
449		#' hist(x)
450		#'
451		#' y <- rtexp(1e6, rate = 5, l = 0.5)
452		#' hist(y)
453		#'
454		#' z <- rtexp(1e6, rate = 5, r = 0.5)
455		#' hist(z)
456		rtexp <- function(n, rate, l = NULL, r = NULL) {
457	123x	if (!is.null(l)) {
458	1x	l - log(1 - stats::runif(n)) / rate
459	122x	} else if (!is.null(r)) {
460	121x	-log(1 - stats::runif(n) * (1 - exp(-r * rate))) / rate
461		} else {
462	1x	stats::rexp(n, rate)
463		}
464		}

1		#' Hy's Law Analysis Dataset (ADHY)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Hy's Law Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per subject per parameter per analysis visit per analysis date.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADTM`, `SRCSEQ`
11		#
12		#' @inheritParams argument_convention
13		#' @template param_cached
14		#' @templateVar data adhy
15		#'
16		#' @return `data.frame`
17		#' @export
18		#'
19		#' @author wojciakw
20		#'
21		#' @examples
22		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
23		#'
24		#' adhy <- radhy(adsl, seed = 2)
25		#' adhy
26		radhy <- function(adsl,
27		param = c(
28		"TBILI <= 2 times ULN and ALT value category",
29		"TBILI > 2 times ULN and AST value category",
30		"TBILI > 2 times ULN and ALT value category",
31		"TBILI <= 2 times ULN and AST value category",
32		"TBILI > 2 times ULN and ALKPH <= 2 times ULN and ALT value category",
33		"TBILI > 2 times ULN and ALKPH <= 2 times ULN and AST value category",
34		"TBILI > 2 times ULN and ALKPH <= 5 times ULN and ALT value category",
35		"TBILI > 2 times ULN and ALKPH <= 5 times ULN and AST value category",
36		"TBILI <= 2 times ULN and two consecutive elevations of ALT in relation to ULN",
37		"TBILI > 2 times ULN and two consecutive elevations of AST in relation to ULN",
38		"TBILI <= 2 times ULN and two consecutive elevations of AST in relation to ULN",
39		"TBILI > 2 times ULN and two consecutive elevations of ALT in relation to ULN",
40		"TBILI > 2 times ULN and two consecutive elevations of ALT in relation to Baseline",
41		"TBILI <= 2 times ULN and two consecutive elevations of ALT in relation to Baseline",
42		"TBILI > 2 times ULN and two consecutive elevations of AST in relation to Baseline",
43		"TBILI <= 2 times ULN and two consecutive elevations of AST in relation to Baseline",
44		"ALT > 3 times ULN by Period",
45		"AST > 3 times ULN by Period",
46		"ALT or AST > 3 times ULN by Period",
47		"ALT > 3 times Baseline by Period",
48		"AST > 3 times Baseline by Period",
49		"ALT or AST > 3 times Baseline by Period"
50		),
51		paramcd = c(
52		"BLAL",
53		"BGAS",
54		"BGAL",
55		"BLAS",
56		"BA2AL",
57		"BA2AS",
58		"BA5AL",
59		"BA5AS",
60		"BL2AL2CU",
61		"BG2AS2CU",
62		"BL2AS2CU",
63		"BG2AL2CU",
64		"BG2AL2CB",
65		"BL2AL2CB",
66		"BG2AS2CB",
67		"BL2AS2CB",
68		"ALTPULN",
69		"ASTPULN",
70		"ALTASTPU",
71		"ALTPBASE",
72		"ASTPBASE",
73		"ALTASTPB"
74		),
75		seed = NULL,
76		cached = FALSE) {
77	4x	checkmate::assert_flag(cached)
78
79	4x	if (cached) {
80	1x	return(get_cached_data("cadhy"))
81		}
82
83	3x	checkmate::assert_data_frame(adsl)
84	3x	checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
85	3x	checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
86	3x	checkmate::assert_number(seed, null.ok = TRUE)
87
88		# validate and initialize related variables
89	3x	param_init_list <- relvar_init(param, paramcd)
90
91	3x	if (!is.null(seed)) {
92	3x	set.seed(seed)
93		}
94	3x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
95
96		# create all combinations of unique values in STUDYID, USUBJID, PARAM, AVISIT
97	3x	adhy <- expand.grid(
98	3x	STUDYID = unique(adsl$STUDYID),
99	3x	USUBJID = adsl$USUBJID,
100	3x	PARAM = as.factor(param_init_list$relvar1),
101	3x	AVISIT = as.factor(c("BASELINE", "POST-BASELINE")),
102	3x	APERIODC = as.factor(c("PERIOD 1", "PERIOD 2")),
103	3x	stringsAsFactors = FALSE
104		)
105
106		# remove records that are not needed and were created as a side product of expand.grid above
107	3x	adhy <- dplyr::filter(adhy, !(AVISIT == "BASELINE" & APERIODC == "PERIOD 2"))
108
109		# define TBILI ALT/AST params, period dependent parameters and the parameters that will be assigned values "Y" or "N"
110	3x	paramcd_tbilialtast <- c("BLAL", "BGAS", "BGAL", "BLAS", "BA2AL", "BA2AS", "BA5AL", "BA5AS")
111	3x	paramcd_by_period <- c("ALTPULN", "ASTPULN", "ALTASTPU", "ALTPBASE", "ASTPBASE", "ALTASTPB")
112	3x	paramcd_yn <- c(
113	3x	"BL2AL2CU", "BG2AS2CU", "BL2AS2CU", "BG2AL2CU", "BG2AL2CB", "BL2AL2CB", "BG2AS2CB", "BL2AS2CB",
114	3x	paramcd_by_period
115		)
116
117		# add other variables to adhy
118	3x	adhy <- adhy %>%
119	3x	rel_var(
120	3x	var_name = "PARAMCD",
121	3x	related_var = "PARAM",
122	3x	var_values = param_init_list$relvar2
123		) %>%
124	3x	dplyr::mutate(
125	3x	AVALC = dplyr::case_when(
126	3x	PARAMCD %in% paramcd_tbilialtast ~ sample(
127	3x	x = c(">3-5ULN", ">5-10ULN", ">10-20ULN", ">20ULN", "Criteria not met"), size = dplyr::n(), replace = TRUE
128		),
129	3x	PARAMCD %in% paramcd_yn ~ sample(
130	3x	x = c("Y", "N"), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE
131		)
132		),
133	3x	AVAL = dplyr::case_when(
134	3x	AVALC == ">3-5ULN" ~ 1,
135	3x	AVALC == ">5-10ULN" ~ 2,
136	3x	AVALC == ">10-20ULN" ~ 3,
137	3x	AVALC == ">20ULN" ~ 4,
138	3x	AVALC == "Y" ~ 1,
139	3x	AVALC == "N" ~ 0,
140	3x	AVALC == "Criteria not met" ~ 0
141		),
142	3x	AVISITN = dplyr::case_when(
143	3x	AVISIT == "BASELINE" ~ 0L,
144	3x	AVISIT == "POST-BASELINE" ~ 9995L,
145	3x	TRUE ~ NA_integer_
146		),
147	3x	APERIOD = dplyr::case_when(
148	3x	APERIODC == "PERIOD 1" ~ 1L,
149	3x	APERIODC == "PERIOD 2" ~ 2L,
150	3x	TRUE ~ NA_integer_
151		),
152	3x	ABLFL = dplyr::if_else(AVISIT == "BASELINE", "Y", NA_character_),
153	3x	ONTRTFL = dplyr::if_else(AVISIT == "POST-BASELINE", "Y", NA_character_),
154	3x	ANL01FL = "Y",
155	3x	SRCSEQ = NA_integer_
156		)
157
158		# remove records for parameters with period 2 and not in paramcd_by_period
159	3x	adhy <- dplyr::filter(adhy, PARAMCD %in% paramcd_by_period \| APERIODC == "PERIOD 1")
160
161		# add baseline variables
162	3x	adhy <- adhy %>%
163	3x	dplyr::group_by(USUBJID, PARAMCD) %>%
164	3x	dplyr::mutate(
165	3x	BASEC = AVALC[AVISIT == "BASELINE"],
166	3x	BASE = AVAL[AVISIT == "BASELINE"]
167		) %>%
168	3x	dplyr::ungroup()
169
170	3x	adhy <- adhy %>%
171	3x	rcd_var_relabel(
172	3x	STUDYID = attr(adsl$STUDYID, "label"),
173	3x	USUBJID = attr(adsl$USUBJID, "label")
174		)
175
176		# merge ADSL to be able to add analysis datetime and analysis relative day variables
177	3x	adhy <- dplyr::inner_join(adhy, adsl, by = c("STUDYID", "USUBJID"))
178
179		# define a simple helper function to create ADY variable
180	3x	add_ady <- function(x, avisit) {
181	6x	if (avisit == "BASELINE") {
182	3x	dplyr::mutate(
183	3x	x,
184	3x	ADY = sample(x = -(1:14), size = dplyr::n(), replace = TRUE)
185		)
186	3x	} else if (avisit == "POST-BASELINE") {
187	3x	dplyr::rowwise(x) %>%
188	3x	dplyr::mutate(ADY = as.integer(sample(
189	3x	dplyr::if_else(
190	3x	!is.na(TRTEDTM),
191	3x	as.numeric(difftime(TRTEDTM, TRTSDTM, units = "days")),
192	3x	as.numeric(study_duration_secs, "days")
193		),
194	3x	size = 1,
195	3x	replace = TRUE
196		)))
197		} else {
198	!	dplyr::mutate(x, ADY = NA_integer_)
199		}
200		}
201
202		# add ADY and ADTM variables
203	3x	adhy <- adhy %>%
204	3x	dplyr::group_by(AVISIT, .add = FALSE) %>%
205	3x	dplyr::group_modify(~ add_ady(.x, .y$AVISIT)) %>%
206	3x	dplyr::ungroup() %>%
207	3x	dplyr::mutate(ADTM = TRTSDTM + lubridate::days(ADY))
208
209		# order columns and arrange rows; column order follows ADaM_1.1 specification
210	3x	adhy <-
211	3x	adhy[, c(
212	3x	colnames(adsl),
213	3x	"PARAM",
214	3x	"PARAMCD",
215	3x	"AVAL",
216	3x	"AVALC",
217	3x	"BASE",
218	3x	"BASEC",
219	3x	"ABLFL",
220	3x	"ADTM",
221	3x	"ADY",
222	3x	"AVISIT",
223	3x	"AVISITN",
224	3x	"APERIOD",
225	3x	"APERIODC",
226	3x	"ONTRTFL",
227	3x	"SRCSEQ",
228	3x	"ANL01FL"
229		)]
230
231	3x	adhy <- adhy %>%
232	3x	dplyr::arrange(
233	3x	STUDYID,
234	3x	USUBJID,
235	3x	PARAMCD,
236	3x	AVISITN,
237	3x	ADTM,
238	3x	SRCSEQ
239		)
240
241		# apply metadata
242	3x	adhy <- apply_metadata(adhy, "metadata/ADHY.yml")
243
244	3x	return(adhy)
245		}

1		#' Exposure Analysis Dataset (ADEX)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating random Exposure Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per each record in the corresponding SDTM domain.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `EXSEQ`, `PARAMCD`, `PARCAT1`, `ASTDTM`, `AENDTM`, `ASTDY`, `AENDY`,
11		#' `AVISITN`, `EXDOSFRQ`, `EXROUTE`, `VISIT`, `VISITDY`, `EXSTDTC`, `EXENDTC`, `EXSTDY`, `EXENDY`
12		#'
13		#' @inheritParams argument_convention
14		#' @param parcat1 (`character vector`)\cr Dose amount categories. Defaults to "Individual" and "Overall".
15		#' @param parcat2 (`character vector`)\cr Types of drug received. Defaults to "Drug A" and "Drug B".
16		#' @param max_n_exs (`integer`)\cr Maximum number of exposures per patient. Defaults to 6.
17		#' @template param_cached
18		#' @templateVar data adex
19		#'
20		#' @return `data.frame`
21		#' @export
22		#'
23		#' @examples
24		#' adsl <- radsl(N = 10, study_duration = 2, seed = 1)
25		#'
26		#' adex <- radex(adsl, seed = 2)
27		#' adex
28		radex <- function(adsl,
29		param = c(
30		"Dose administered during constant dosing interval",
31		"Number of doses administered during constant dosing interval",
32		"Total dose administered",
33		"Total number of doses administered"
34		),
35		paramcd = c("DOSE", "NDOSE", "TDOSE", "TNDOSE"),
36		paramu = c("mg", " ", "mg", " "),
37		parcat1 = c("INDIVIDUAL", "OVERALL"),
38		parcat2 = c("Drug A", "Drug B"),
39		visit_format = "WEEK",
40		n_assessments = 5L,
41		n_days = 5L,
42		max_n_exs = 6L,
43		lookup = NULL,
44		seed = NULL,
45		na_percentage = 0,
46		na_vars = list(AVAL = c(NA, 0.1), AVALU = c(NA), 0.1),
47		cached = FALSE) {
48	4x	checkmate::assert_flag(cached)
49	4x	if (cached) {
50	1x	return(get_cached_data("cadex"))
51		}
52
53	3x	checkmate::assert_data_frame(adsl)
54	3x	checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
55	3x	checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
56	3x	checkmate::assert_character(parcat1, min.len = 1, any.missing = FALSE)
57	3x	checkmate::assert_character(parcat2, min.len = 1, any.missing = FALSE)
58	3x	checkmate::assert_string(visit_format)
59	3x	checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
60	3x	checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
61	3x	checkmate::assert_integer(max_n_exs, len = 1, any.missing = FALSE)
62	3x	checkmate::assert_data_frame(lookup, null.ok = TRUE)
63	3x	checkmate::assert_number(seed, null.ok = TRUE)
64	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
65	3x	checkmate::assert_true(na_percentage < 1)
66
67		# validate and initialize related variables
68	3x	param_init_list <- relvar_init(param, paramcd)
69	3x	unit_init_list <- relvar_init(param, paramu)
70
71	3x	if (!is.null(seed)) {
72	3x	set.seed(seed)
73		}
74	3x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
75
76	3x	adex <- expand.grid(
77	3x	STUDYID = unique(adsl$STUDYID),
78	3x	USUBJID = adsl$USUBJID,
79	3x	PARAM = c(
80	3x	rep(
81	3x	param_init_list$relvar1[1],
82	3x	length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days)))
83		),
84	3x	rep(
85	3x	param_init_list$relvar1[2],
86	3x	length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days)))
87		),
88	3x	param_init_list$relvar1[3:4]
89		),
90	3x	stringsAsFactors = FALSE
91		)
92
93		# assign related variable values: PARAMxPARAMCD are related
94	3x	adex <- adex %>% rel_var(
95	3x	var_name = "PARAMCD",
96	3x	related_var = "PARAM",
97	3x	var_values = param_init_list$relvar2
98		)
99
100		# assign related variable values: AVALUxPARAM are related
101	3x	adex <- adex %>% rel_var(
102	3x	var_name = "AVALU",
103	3x	related_var = "PARAM",
104	3x	var_values = unit_init_list$relvar2
105		)
106
107	3x	adex <- adex %>%
108	3x	dplyr::group_by(USUBJID) %>%
109	3x	dplyr::mutate(PARCAT_ind = sample(c(1, 2), size = 1)) %>%
110	3x	dplyr::mutate(PARCAT2 = ifelse(PARCAT_ind == 1, parcat2[1], parcat2[2])) %>%
111	3x	dplyr::select(-"PARCAT_ind")
112
113		# Add in PARCAT1
114	3x	adex <- adex %>% dplyr::mutate(PARCAT1 = dplyr::case_when(
115	3x	(PARAMCD == "TNDOSE" \| PARAMCD == "TDOSE") ~ "OVERALL",
116	3x	PARAMCD == "DOSE" \| PARAMCD == "NDOSE" ~ "INDIVIDUAL"
117		))
118
119	3x	adex_visit <- adex %>%
120	3x	dplyr::filter(PARAMCD == "DOSE" \| PARAMCD == "NDOSE") %>%
121	3x	dplyr::mutate(
122	3x	AVISIT = rep(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), 2)
123		)
124
125	3x	adex <- dplyr::left_join(
126	3x	adex %>%
127	3x	dplyr::group_by(
128	3x	USUBJID,
129	3x	STUDYID,
130	3x	PARAM,
131	3x	PARAMCD,
132	3x	AVALU,
133	3x	PARCAT1,
134	3x	PARCAT2
135		) %>%
136	3x	dplyr::mutate(id = dplyr::row_number()),
137	3x	adex_visit %>%
138	3x	dplyr::group_by(
139	3x	USUBJID,
140	3x	STUDYID,
141	3x	PARAM,
142	3x	PARAMCD,
143	3x	AVALU,
144	3x	PARCAT1,
145	3x	PARCAT2
146		) %>%
147	3x	dplyr::mutate(id = dplyr::row_number()),
148	3x	by = c("USUBJID", "STUDYID", "PARCAT1", "PARCAT2", "id", "PARAMCD", "PARAM", "AVALU")
149		) %>%
150	3x	dplyr::select(-"id")
151
152		# Visit numbers
153	3x	adex <- adex %>% dplyr::mutate(AVISITN = dplyr::case_when(
154	3x	AVISIT == "SCREENING" ~ -1,
155	3x	AVISIT == "BASELINE" ~ 0,
156	3x	(grepl("^WEEK", AVISIT) \| grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
157	3x	TRUE ~ 999000
158		))
159
160
161	3x	adex2 <- split(adex, adex$USUBJID) %>%
162	3x	lapply(function(pinfo) {
163	30x	pinfo %>%
164	30x	dplyr::filter(PARAMCD == "DOSE") %>%
165	30x	dplyr::group_by(USUBJID, PARCAT2, AVISIT) %>%
166	30x	dplyr::mutate(changeind = dplyr::case_when(
167	30x	AVISIT == "SCREENING" ~ 0,
168	30x	AVISIT != "SCREENING" ~ sample(c(-1, 0, 1),
169	30x	size = 1,
170	30x	prob = c(0.25, 0.5, 0.25),
171	30x	replace = TRUE
172		)
173		)) %>%
174	30x	dplyr::ungroup() %>%
175	30x	dplyr::group_by(USUBJID, PARCAT2) %>%
176	30x	dplyr::mutate(
177	30x	csum = cumsum(changeind),
178	30x	changeind = dplyr::case_when(
179	30x	csum <= -3 ~ sample(c(0, 1), size = 1, prob = c(0.5, 0.5)),
180	30x	csum >= 3 ~ sample(c(0, -1), size = 1, prob = c(0.5, 0.5)),
181	30x	TRUE ~ changeind
182		)
183		) %>%
184	30x	dplyr::mutate(csum = cumsum(changeind)) %>%
185	30x	dplyr::ungroup() %>%
186	30x	dplyr::group_by(USUBJID, PARCAT2, AVISIT) %>%
187	30x	dplyr::mutate(AVAL = dplyr::case_when(
188	30x	csum == -2 ~ 480,
189	30x	csum == -1 ~ 720,
190	30x	csum == 0 ~ 960,
191	30x	csum == 1 ~ 1200,
192	30x	csum == 2 ~ 1440
193		)) %>%
194	30x	dplyr::select(-c("csum", "changeind")) %>%
195	30x	dplyr::ungroup()
196		}) %>%
197	3x	Reduce(rbind, .)
198
199	3x	adex_tmp <- dplyr::full_join(adex2, adex, by = names(adex))
200	3x	adex <- adex_tmp %>%
201	3x	dplyr::group_by(USUBJID) %>%
202	3x	dplyr::mutate(AVAL = ifelse(PARAMCD == "NDOSE", 1, AVAL)) %>%
203	3x	dplyr::mutate(AVAL = ifelse(
204	3x	PARAMCD == "TNDOSE",
205	3x	sum(AVAL[PARAMCD == "NDOSE"]),
206	3x	AVAL
207		)) %>%
208	3x	dplyr::ungroup() %>%
209	3x	dplyr::group_by(USUBJID, STUDYID, PARCAT2) %>%
210	3x	dplyr::mutate(AVAL = ifelse(
211	3x	PARAMCD == "TDOSE",
212	3x	sum(AVAL[PARAMCD == "DOSE"]),
213	3x	AVAL
214		))
215
216	3x	adex <- rcd_var_relabel(
217	3x	adex,
218	3x	STUDYID = "Study Identifier",
219	3x	USUBJID = "Unique Subject Identifier"
220		)
221
222		# merge ADSL to be able to add ADEX date and study day variables
223	3x	adex <- dplyr::inner_join(adex, adsl, by = c("STUDYID", "USUBJID")) %>%
224	3x	dplyr::rowwise() %>%
225	3x	dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
226	3x	is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
227	3x	TRUE ~ TRTEDTM
228		))) %>%
229	3x	dplyr::mutate(ASTDTM = sample(
230	3x	seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
231	3x	size = 1
232		)) %>%
233		# add 1 to end of range incase both values passed to sample() are the same
234	3x	dplyr::mutate(AENDTM = sample(
235	3x	seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"),
236	3x	size = 1
237		)) %>%
238	3x	dplyr::select(-TRTENDT) %>%
239	3x	dplyr::ungroup() %>%
240	3x	dplyr::arrange(STUDYID, USUBJID, ASTDTM)
241
242
243	3x	adex <- adex %>%
244	3x	dplyr::group_by(USUBJID) %>%
245	3x	dplyr::mutate(EXSEQ = seq_len(dplyr::n())) %>%
246	3x	dplyr::mutate(ASEQ = EXSEQ) %>%
247	3x	dplyr::ungroup() %>%
248	3x	dplyr::arrange(
249	3x	STUDYID,
250	3x	USUBJID,
251	3x	PARAMCD,
252	3x	ASTDTM,
253	3x	AVISITN,
254	3x	EXSEQ
255		)
256
257		# Adding EXDOSFRQ
258	3x	adex <- adex %>%
259	3x	dplyr::mutate(EXDOSFRQ = dplyr::case_when(
260	3x	PARCAT1 == "INDIVIDUAL" ~ "ONCE",
261	3x	TRUE ~ ""
262		))
263
264		# Adding EXROUTE
265	3x	adex <- adex %>%
266	3x	dplyr::mutate(EXROUTE = dplyr::case_when(
267	3x	PARCAT1 == "INDIVIDUAL" ~ sample(c("INTRAVENOUS", "SUBCUTANEOUS"),
268	3x	nrow(adex),
269	3x	replace = TRUE,
270	3x	prob = c(0.9, 0.1)
271		),
272	3x	TRUE ~ ""
273		))
274
275		# Fix VISIT according to AVISIT
276	3x	adex <- adex %>%
277	3x	dplyr::mutate(VISIT = AVISIT)
278
279		# Hack for VISITDY - to fix in ADSL
280	3x	visit_levels <- str_extract(levels(adex$VISIT), pattern = "[0-9]+")
281	3x	vl_extracted <- vapply(visit_levels, function(x) as.numeric(x[2]), numeric(1))
282	3x	vl_extracted <- c(-1, 1, vl_extracted[!is.na(vl_extracted)])
283
284		# Adding VISITDY
285	3x	adex <- adex %>%
286	3x	dplyr::mutate(VISITDY = as.numeric(as.character(factor(VISIT, labels = vl_extracted))))
287
288		# Exposure time stamps
289	3x	adex <- adex %>%
290	3x	dplyr::mutate(
291	3x	EXSTDTC = TRTSDTM + lubridate::days(VISITDY),
292	3x	EXENDTC = EXSTDTC + lubridate::hours(1),
293	3x	EXSTDY = VISITDY,
294	3x	EXENDY = VISITDY
295		)
296
297		# Correcting last exposure to treatment
298	3x	adex <- adex %>%
299	3x	dplyr::group_by(SUBJID) %>%
300	3x	dplyr::mutate(TRTEDTM = lubridate::as_datetime(max(EXENDTC, na.rm = TRUE))) %>%
301	3x	dplyr::ungroup()
302
303		# Fixing Date - to add into ADSL
304	3x	adex <- adex %>%
305	3x	dplyr::mutate(
306	3x	TRTSDT = lubridate::date(TRTSDTM),
307	3x	TRTEDT = lubridate::date(TRTEDTM)
308		)
309
310		# Fixing analysis time stamps
311	3x	adex <- adex %>%
312	3x	dplyr::mutate(
313	3x	ASTDY = EXSTDY,
314	3x	AENDY = EXENDY,
315	3x	ASTDTM = EXSTDTC,
316	3x	AENDTM = EXENDTC
317		)
318
319	3x	if (length(na_vars) > 0 && na_percentage > 0) {
320	!	adex <- mutate_na(ds = adex, na_vars = na_vars, na_percentage = na_percentage)
321		}
322
323		# apply metadata
324	3x	adex <- apply_metadata(adex, "metadata/ADEX.yml")
325		}
326
327		# Equivalent of stringr::str_extract_all()
328		str_extract <- function(string, pattern) {
329	2850x	regmatches(string, gregexpr(pattern, string))
330		}

1		#' Laboratory Data Analysis Dataset (ADLB)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Laboratory Data Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per subject per parameter per analysis visit per analysis date.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `LBSEQ`, `ASPID`
11		#
12		#' @inheritParams argument_convention
13		#' @param lbcat (`character vector`)\cr LB category values.
14		#' @param max_n_lbs (`integer`)\cr Maximum number of labs per patient. Defaults to 10.
15		#' @template param_cached
16		#' @templateVar data adlb
17		#'
18		#' @return `data.frame`
19		#' @export
20		#'
21		#' @author tomlinsj, npaszty, Xuefeng Hou
22		#'
23		#' @examples
24		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
25		#'
26		#' adlb <- radlb(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2)
27		#' adlb
28		#'
29		#' adlb <- radlb(adsl, visit_format = "CYCLE", n_assessments = 2L, seed = 2)
30		#' adlb
31		radlb <- function(adsl,
32		lbcat = c("CHEMISTRY", "CHEMISTRY", "IMMUNOLOGY"),
33		param = c(
34		"Alanine Aminotransferase Measurement",
35		"C-Reactive Protein Measurement",
36		"Immunoglobulin A Measurement"
37		),
38		paramcd = c("ALT", "CRP", "IGA"),
39		paramu = c("U/L", "mg/L", "g/L"),
40		aval_mean = c(18, 9, 2.9),
41		visit_format = "WEEK",
42		n_assessments = 5L,
43		n_days = 5L,
44		max_n_lbs = 10L,
45		lookup = NULL,
46		seed = NULL,
47		na_percentage = 0,
48		na_vars = list(
49		LOQFL = c(NA, 0.1), ABLFL2 = c(1234, 0.1), ABLFL = c(1235, 0.1),
50		BASE2 = c(NA, 0.1), BASE = c(NA, 0.1),
51		CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1)
52		),
53		cached = FALSE) {
54	4x	checkmate::assert_flag(cached)
55	4x	if (cached) {
56	1x	return(get_cached_data("cadlb"))
57		}
58
59	3x	checkmate::assert_data_frame(adsl)
60	3x	checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
61	3x	checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
62	3x	checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE)
63	3x	checkmate::assert_character(lbcat, min.len = 1, any.missing = FALSE)
64	3x	checkmate::assert_string(visit_format)
65	3x	checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
66	3x	checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
67	3x	checkmate::assert_integer(max_n_lbs, len = 1, any.missing = FALSE)
68	3x	checkmate::assert_data_frame(lookup, null.ok = TRUE)
69	3x	checkmate::assert_number(seed, null.ok = TRUE)
70	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
71	3x	checkmate::assert_true(na_percentage < 1)
72
73		# validate and initialize related variables
74	3x	lbcat_init_list <- relvar_init(param, lbcat)
75	3x	param_init_list <- relvar_init(param, paramcd)
76	3x	unit_init_list <- relvar_init(param, paramu)
77
78	3x	if (!is.null(seed)) {
79	3x	set.seed(seed)
80		}
81	3x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
82
83	3x	adlb <- expand.grid(
84	3x	STUDYID = unique(adsl$STUDYID),
85	3x	USUBJID = adsl$USUBJID,
86	3x	PARAM = as.factor(param_init_list$relvar1),
87	3x	AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days),
88	3x	stringsAsFactors = FALSE
89		)
90
91		# assign AVAL based on different tests
92	3x	adlb <- adlb %>% mutate(AVAL = case_when(
93	3x	PARAM == param[1] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[1], sd = 10)),
94	3x	PARAM == param[2] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[2], sd = 1)),
95	3x	PARAM == param[3] ~ abs(stats::rnorm(nrow(adlb), mean = aval_mean[3], sd = 0.1))
96		))
97
98		# assign related variable values: PARAMxLBCAT are related
99	3x	adlb <- adlb %>% rel_var(
100	3x	var_name = "LBCAT",
101	3x	related_var = "PARAM",
102	3x	var_values = lbcat_init_list$relvar2
103		)
104
105		# assign related variable values: PARAMxPARAMCD are related
106	3x	adlb <- adlb %>% rel_var(
107	3x	var_name = "PARAMCD",
108	3x	related_var = "PARAM",
109	3x	var_values = param_init_list$relvar2
110		)
111
112	3x	adlb <- adlb %>%
113	3x	dplyr::mutate(LBTESTCD = PARAMCD) %>%
114	3x	dplyr::mutate(LBTEST = PARAM)
115
116	3x	adlb <- adlb %>% dplyr::mutate(AVISITN = dplyr::case_when(
117	3x	AVISIT == "SCREENING" ~ -1,
118	3x	AVISIT == "BASELINE" ~ 0,
119	3x	(grepl("^WEEK", AVISIT) \| grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
120	3x	TRUE ~ NA_real_
121		))
122
123	3x	adlb <- adlb %>% rel_var(
124	3x	var_name = "AVALU",
125	3x	related_var = "PARAM",
126	3x	var_values = unit_init_list$relvar2
127		)
128
129	3x	adlb <- adlb %>%
130	3x	dplyr::mutate(AVISITN = dplyr::case_when(
131	3x	AVISIT == "SCREENING" ~ -1,
132	3x	AVISIT == "BASELINE" ~ 0,
133	3x	(grepl("^WEEK", AVISIT) \| grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
134	3x	TRUE ~ NA_real_
135		))
136
137		# order to prepare for change from screening and baseline values
138	3x	adlb <- adlb[order(adlb$STUDYID, adlb$USUBJID, adlb$PARAMCD, adlb$AVISITN), ]
139
140	3x	adlb <- Reduce(rbind, lapply(split(adlb, adlb$USUBJID), function(x) {
141	30x	x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
142	30x	x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "")
143	30x	x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
144	30x	"Y",
145	30x	ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "")
146		)
147	30x	x
148		}))
149
150	3x	adlb$BASE2 <- retain(adlb, adlb$AVAL, adlb$ABLFL2 == "Y")
151	3x	adlb$BASE <- ifelse(adlb$ABLFL2 != "Y", retain(adlb, adlb$AVAL, adlb$ABLFL == "Y"), NA)
152
153	3x	adlb <- adlb %>%
154	3x	dplyr::mutate(CHG2 = AVAL - BASE2) %>%
155	3x	dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>%
156	3x	dplyr::mutate(CHG = AVAL - BASE) %>%
157	3x	dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>%
158	3x	dplyr::mutate(BASETYPE = "LAST") %>%
159	3x	dplyr::mutate(ANRLO = dplyr::case_when(
160	3x	PARAMCD == "ALT" ~ 7,
161	3x	PARAMCD == "CRP" ~ 8,
162	3x	PARAMCD == "IGA" ~ 0.8
163		)) %>%
164	3x	dplyr::mutate(ANRHI = dplyr::case_when(
165	3x	PARAMCD == "ALT" ~ 55,
166	3x	PARAMCD == "CRP" ~ 10,
167	3x	PARAMCD == "IGA" ~ 3
168		)) %>%
169	3x	dplyr::mutate(ANRIND = factor(dplyr::case_when(
170	3x	AVAL < ANRLO ~ "LOW",
171	3x	AVAL > ANRHI ~ "HIGH",
172	3x	TRUE ~ "NORMAL"
173		))) %>%
174	3x	dplyr::mutate(LBSTRESC = factor(dplyr::case_when(
175	3x	PARAMCD == "ALT" ~ "<7",
176	3x	PARAMCD == "CRP" ~ "<8",
177	3x	PARAMCD == "IGA" ~ ">3"
178		))) %>%
179	3x	dplyr::rowwise() %>%
180	3x	dplyr::mutate(LOQFL = factor(
181	3x	ifelse(eval(parse(text = paste(AVAL, LBSTRESC))), "Y", "N")
182		)) %>%
183	3x	dplyr::ungroup() %>%
184	3x	dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
185	3x	dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>%
186	3x	dplyr::ungroup() %>%
187	3x	dplyr::mutate(SHIFT1 = factor(ifelse(
188	3x	AVISITN > 0,
189	3x	paste(
190	3x	retain(
191	3x	adlb, as.character(BNRIND),
192	3x	AVISITN == 0
193		),
194	3x	ANRIND,
195	3x	sep = " to "
196		),
197		""
198		))) %>%
199	3x	dplyr::mutate(ATOXGR = factor(dplyr::case_when(
200	3x	ANRIND == "LOW" ~ sample(
201	3x	c("-1", "-2", "-3", "-4", "-5"),
202	3x	nrow(adlb),
203	3x	replace = TRUE,
204	3x	prob = c(0.30, 0.25, 0.20, 0.15, 0)
205		),
206	3x	ANRIND == "HIGH" ~ sample(
207	3x	c("1", "2", "3", "4", "5"),
208	3x	nrow(adlb),
209	3x	replace = TRUE,
210	3x	prob = c(0.30, 0.25, 0.20, 0.15, 0)
211		),
212	3x	ANRIND == "NORMAL" ~ "0"
213		))) %>%
214	3x	dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
215	3x	dplyr::mutate(BTOXGR = ATOXGR[ABLFL == "Y"]) %>%
216	3x	dplyr::ungroup() %>%
217	3x	dplyr::mutate(ATPTN = 1) %>%
218	3x	dplyr::mutate(DTYPE = NA) %>%
219	3x	dplyr::mutate(BTOXGRL = factor(dplyr::case_when(
220	3x	BTOXGR == "0" ~ "0",
221	3x	BTOXGR == "-1" ~ "1",
222	3x	BTOXGR == "-2" ~ "2",
223	3x	BTOXGR == "-3" ~ "3",
224	3x	BTOXGR == "-4" ~ "4",
225	3x	BTOXGR == "1" ~ "<Missing>",
226	3x	BTOXGR == "2" ~ "<Missing>",
227	3x	BTOXGR == "3" ~ "<Missing>",
228	3x	BTOXGR == "4" ~ "<Missing>"
229		))) %>%
230	3x	dplyr::mutate(BTOXGRH = factor(dplyr::case_when(
231	3x	BTOXGR == "0" ~ "0",
232	3x	BTOXGR == "1" ~ "1",
233	3x	BTOXGR == "2" ~ "2",
234	3x	BTOXGR == "3" ~ "3",
235	3x	BTOXGR == "4" ~ "4",
236	3x	BTOXGR == "-1" ~ "<Missing>",
237	3x	BTOXGR == "-2" ~ "<Missing>",
238	3x	BTOXGR == "-3" ~ "<Missing>",
239	3x	BTOXGR == "-4" ~ "<Missing>",
240		))) %>%
241	3x	dplyr::mutate(ATOXGRL = factor(dplyr::case_when(
242	3x	ATOXGR == "0" ~ "0",
243	3x	ATOXGR == "-1" ~ "1",
244	3x	ATOXGR == "-2" ~ "2",
245	3x	ATOXGR == "-3" ~ "3",
246	3x	ATOXGR == "-4" ~ "4",
247	3x	ATOXGR == "1" ~ "<Missing>",
248	3x	ATOXGR == "2" ~ "<Missing>",
249	3x	ATOXGR == "3" ~ "<Missing>",
250	3x	ATOXGR == "4" ~ "<Missing>",
251		))) %>%
252	3x	dplyr::mutate(ATOXGRH = factor(dplyr::case_when(
253	3x	ATOXGR == "0" ~ "0",
254	3x	ATOXGR == "1" ~ "1",
255	3x	ATOXGR == "2" ~ "2",
256	3x	ATOXGR == "3" ~ "3",
257	3x	ATOXGR == "4" ~ "4",
258	3x	ATOXGR == "-1" ~ "<Missing>",
259	3x	ATOXGR == "-2" ~ "<Missing>",
260	3x	ATOXGR == "-3" ~ "<Missing>",
261	3x	ATOXGR == "-4" ~ "<Missing>",
262		))) %>%
263	3x	rcd_var_relabel(
264	3x	STUDYID = attr(adsl$STUDYID, "label"),
265	3x	USUBJID = attr(adsl$USUBJID, "label")
266		)
267
268		# High and low descriptions of the different PARAMCD values
269		# This is currently hard coded as the GDSR does not have these descriptions yet
270	3x	grade_lookup <- tibble::tribble(
271	3x	~PARAMCD, ~ATOXDSCL, ~ATOXDSCH,
272	3x	"ALB", "Hypoalbuminemia", NA_character_,
273	3x	"ALKPH", NA_character_, "Alkaline phosphatase increased",
274	3x	"ALT", NA_character_, "Alanine aminotransferase increased",
275	3x	"AST", NA_character_, "Aspartate aminotransferase increased",
276	3x	"BILI", NA_character_, "Blood bilirubin increased",
277	3x	"CA", "Hypocalcemia", "Hypercalcemia",
278	3x	"CHOLES", NA_character_, "Cholesterol high",
279	3x	"CK", NA_character_, "CPK increased",
280	3x	"CREAT", NA_character_, "Creatinine increased",
281	3x	"CRP", NA_character_, "C reactive protein increased",
282	3x	"GGT", NA_character_, "GGT increased",
283	3x	"GLUC", "Hypoglycemia", "Hyperglycemia",
284	3x	"HGB", "Anemia", "Hemoglobin increased",
285	3x	"IGA", NA_character_, "Immunoglobulin A increased",
286	3x	"POTAS", "Hypokalemia", "Hyperkalemia",
287	3x	"LYMPH", "CD4 lymphocytes decreased", NA_character_,
288	3x	"PHOS", "Hypophosphatemia", NA_character_,
289	3x	"PLAT", "Platelet count decreased", NA_character_,
290	3x	"SODIUM", "Hyponatremia", "Hypernatremia",
291	3x	"WBC", "White blood cell decreased", "Leukocytosis",
292		)
293
294		# merge grade_lookup onto adlb
295	3x	adlb <- dplyr::left_join(adlb, grade_lookup, by = "PARAMCD")
296
297	3x	adlb <- rcd_var_relabel(
298	3x	adlb,
299	3x	STUDYID = "Study Identifier",
300	3x	USUBJID = "Unique Subject Identifier"
301		)
302
303		# merge ADSL to be able to add LB date and study day variables
304	3x	adlb <- dplyr::inner_join(
305	3x	adlb,
306	3x	adsl,
307	3x	by = c("STUDYID", "USUBJID")
308		) %>%
309	3x	dplyr::rowwise() %>%
310	3x	dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
311	3x	is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
312	3x	TRUE ~ TRTEDTM
313		))) %>%
314	3x	dplyr::ungroup()
315
316	3x	adlb <- adlb %>%
317	3x	dplyr::group_by(USUBJID) %>%
318	3x	dplyr::arrange(USUBJID, AVISITN) %>%
319	3x	dplyr::mutate(ADTM = rep(
320	3x	sort(sample(
321	3x	seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
322	3x	size = nlevels(AVISIT)
323		)),
324	3x	each = n() / nlevels(AVISIT)
325		)) %>%
326	3x	dplyr::ungroup() %>%
327	3x	dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
328	3x	dplyr::select(-TRTENDT) %>%
329	3x	dplyr::arrange(STUDYID, USUBJID, ADTM)
330
331	3x	adlb <- adlb %>%
332	3x	dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>%
333	3x	dplyr::group_by(USUBJID) %>%
334	3x	dplyr::mutate(LBSEQ = seq_len(dplyr::n())) %>%
335	3x	dplyr::mutate(ASEQ = LBSEQ) %>%
336	3x	dplyr::ungroup() %>%
337	3x	dplyr::arrange(
338	3x	STUDYID,
339	3x	USUBJID,
340	3x	PARAMCD,
341	3x	BASETYPE,
342	3x	AVISITN,
343	3x	ATPTN,
344	3x	DTYPE,
345	3x	ADTM,
346	3x	LBSEQ,
347	3x	ASPID
348		)
349
350	3x	adlb <- adlb %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when(
351	3x	!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y",
352	3x	TRUE ~ ""
353		)))
354
355	3x	flag_variables <- function(data,
356	3x	apply_grouping,
357	3x	apply_filter,
358	3x	apply_mutate) {
359	15x	data_compare <- data %>%
360	15x	dplyr::mutate(row_check = seq_len(nrow(data)))
361
362	15x	data <- data_compare %>%
363		{
364	15x	if (apply_grouping == TRUE) {
365	9x	dplyr::group_by(., USUBJID, PARAMCD, BASETYPE, AVISIT)
366		} else {
367	6x	dplyr::group_by(., USUBJID, PARAMCD, BASETYPE)
368		}
369		} %>%
370	15x	dplyr::arrange(ADTM, ASPID, LBSEQ) %>%
371		{
372	15x	if (apply_filter == TRUE) {
373	6x	dplyr::filter(
374		.,
375	6x	(AVISIT != "BASELINE" & AVISIT != "SCREENING") &
376	6x	(ONTRTFL == "Y" \| ADTM <= TRTSDTM)
377		) %>%
378	6x	dplyr::filter(ATOXGR == max(as.numeric(as.character(ATOXGR))))
379	9x	} else if (apply_filter == FALSE) {
380	6x	dplyr::filter(
381		.,
382	6x	(AVISIT != "BASELINE" & AVISIT != "SCREENING") &
383	6x	(ONTRTFL == "Y" \| ADTM <= TRTSDTM)
384		) %>%
385	6x	dplyr::filter(ATOXGR == min(as.numeric(as.character(ATOXGR))))
386		} else {
387	3x	dplyr::filter(
388		.,
389	3x	AVAL == min(AVAL) &
390	3x	(AVISIT != "BASELINE" & AVISIT != "SCREENING") &
391	3x	(ONTRTFL == "Y" \| ADTM <= TRTSDTM)
392		)
393		}
394		} %>%
395	15x	dplyr::slice(1) %>%
396		{
397	15x	if (apply_mutate == TRUE) {
398	12x	dplyr::mutate(., new_var = ifelse(is.na(DTYPE), "Y", ""))
399		} else {
400	3x	dplyr::mutate(., new_var = ifelse(is.na(AVAL) == FALSE & is.na(DTYPE), "Y", ""))
401		}
402		} %>%
403	15x	dplyr::ungroup()
404
405	15x	data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "")
406
407	15x	data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))]
408
409	15x	return(data_compare)
410		}
411
412	3x	adlb <- flag_variables(adlb, TRUE, "ELSE", FALSE) %>% dplyr::rename(WORS01FL = "new_var")
413	3x	adlb <- flag_variables(adlb, FALSE, TRUE, TRUE) %>% dplyr::rename(WGRHIFL = "new_var")
414	3x	adlb <- flag_variables(adlb, FALSE, FALSE, TRUE) %>% dplyr::rename(WGRLOFL = "new_var")
415	3x	adlb <- flag_variables(adlb, TRUE, TRUE, TRUE) %>% dplyr::rename(WGRHIVFL = "new_var")
416	3x	adlb <- flag_variables(adlb, TRUE, FALSE, TRUE) %>% dplyr::rename(WGRLOVFL = "new_var")
417
418	3x	adlb <- adlb %>% dplyr::mutate(ANL01FL = ifelse(
419	3x	(ABLFL == "Y" \| (WORS01FL == "Y" & is.na(DTYPE))) &
420	3x	(AVISIT != "SCREENING"),
421	3x	"Y",
422		""
423		))
424
425	3x	if (length(na_vars) > 0 && na_percentage > 0) {
426	!	adlb <- mutate_na(ds = adlb, na_vars = na_vars, na_percentage = na_percentage)
427		}
428
429		# apply metadata
430
431	3x	adlb <- apply_metadata(adlb, "metadata/ADLB.yml")
432
433	3x	return(adlb)
434		}

1		#' Medical History Analysis Dataset (ADMH)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Medical History Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per each record in the corresponding SDTM domain.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `MHSEQ`
11		#'
12		#' @inheritParams argument_convention
13		#' @param max_n_mhs (`integer`)\cr Maximum number of MHs per patient. Defaults to 10.
14		#' @template param_cached
15		#' @templateVar data admh
16		#'
17		#' @return `data.frame`
18		#' @export
19		#'
20		#' @examples
21		#' adsl <- radsl(N = 10, study_duration = 2, seed = 1)
22		#'
23		#' admh <- radmh(adsl, seed = 2)
24		#' admh
25		radmh <- function(adsl,
26		max_n_mhs = 10L,
27		lookup = NULL,
28		seed = NULL,
29		na_percentage = 0,
30		na_vars = list(MHBODSYS = c(NA, 0.1), MHDECOD = c(1234, 0.1)),
31		cached = FALSE) {
32	4x	checkmate::assert_flag(cached)
33	4x	if (cached) {
34	1x	return(get_cached_data("cadmh"))
35		}
36
37	3x	checkmate::assert_data_frame(adsl)
38	3x	checkmate::assert_integer(max_n_mhs, len = 1, any.missing = FALSE)
39	3x	checkmate::assert_number(seed, null.ok = TRUE)
40	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
41	3x	checkmate::assert_true(na_percentage < 1)
42
43	3x	checkmate::assert_data_frame(lookup, null.ok = TRUE)
44	3x	lookup_mh <- if (!is.null(lookup)) {
45	!	lookup
46		} else {
47	3x	tibble::tribble(
48	3x	~MHBODSYS, ~MHDECOD, ~MHSOC,
49	3x	"cl A", "trm A_1/2", "cl A",
50	3x	"cl A", "trm A_2/2", "cl A",
51	3x	"cl B", "trm B_1/3", "cl B",
52	3x	"cl B", "trm B_2/3", "cl B",
53	3x	"cl B", "trm B_3/3", "cl B",
54	3x	"cl C", "trm C_1/2", "cl C",
55	3x	"cl C", "trm C_2/2", "cl C",
56	3x	"cl D", "trm D_1/3", "cl D",
57	3x	"cl D", "trm D_2/3", "cl D",
58	3x	"cl D", "trm D_3/3", "cl D"
59		)
60		}
61
62	3x	if (!is.null(seed)) {
63	3x	set.seed(seed)
64		}
65	3x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
66
67	3x	admh <- Map(
68	3x	function(id, sid) {
69	30x	n_mhs <- sample(0:max_n_mhs, 1)
70	30x	i <- sample(seq_len(nrow(lookup_mh)), n_mhs, TRUE)
71	30x	dplyr::mutate(
72	30x	lookup_mh[i, ],
73	30x	USUBJID = id,
74	30x	STUDYID = sid
75		)
76		},
77	3x	adsl$USUBJID,
78	3x	adsl$STUDYID
79		) %>%
80	3x	Reduce(rbind, .) %>%
81	3x	`[`(c(4, 5, 1, 2, 3)) %>%
82	3x	dplyr::mutate(MHTERM = MHDECOD)
83
84	3x	admh <- rcd_var_relabel(
85	3x	admh,
86	3x	STUDYID = "Study Identifier",
87	3x	USUBJID = "Unique Subject Identifier"
88		)
89
90		# merge ADSL to be able to add MH date and study day variables
91	3x	admh <- dplyr::inner_join(
92	3x	admh,
93	3x	adsl,
94	3x	by = c("STUDYID", "USUBJID")
95		) %>%
96	3x	dplyr::rowwise() %>%
97	3x	dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
98	3x	is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
99	3x	TRUE ~ TRTEDTM
100		))) %>%
101	3x	dplyr::mutate(ASTDTM = sample(
102	3x	seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
103	3x	size = 1
104		)) %>%
105	3x	dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>%
106		# add 1 to end of range incase both values passed to sample() are the same
107	3x	dplyr::mutate(AENDTM = sample(
108	3x	seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"),
109	3x	size = 1
110		)) %>%
111	3x	dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>%
112	3x	select(-TRTENDT) %>%
113	3x	dplyr::ungroup() %>%
114	3x	dplyr::arrange(STUDYID, USUBJID, ASTDTM, MHTERM) %>%
115	3x	dplyr::mutate(MHDISTAT = sample(
116	3x	x = c("Resolved", "Ongoing with treatment", "Ongoing without treatment"),
117	3x	prob = c(0.6, 0.2, 0.2),
118	3x	size = dplyr::n(),
119	3x	replace = TRUE
120		)) %>%
121	3x	dplyr::mutate(ATIREL = dplyr::case_when(
122	3x	(AENDTM < TRTSDTM \| (is.na(AENDTM) & MHDISTAT == "Resolved")) ~ "PRIOR",
123	3x	(AENDTM >= TRTSDTM \| (is.na(AENDTM) & grepl("Ongoing", MHDISTAT))) ~ "PRIOR_CONCOMITANT"
124		))
125
126	3x	admh <- admh %>%
127	3x	dplyr::group_by(USUBJID) %>%
128	3x	dplyr::mutate(MHSEQ = seq_len(dplyr::n())) %>%
129	3x	dplyr::mutate(ASEQ = MHSEQ) %>%
130	3x	dplyr::ungroup() %>%
131	3x	dplyr::arrange(STUDYID, USUBJID, ASTDTM, MHSEQ)
132
133	3x	if (length(na_vars) > 0 && na_percentage > 0 && na_percentage <= 1) {
134	!	admh <- mutate_na(ds = admh, na_vars = na_vars, na_percentage = na_percentage)
135		}
136
137		# apply metadata
138	3x	admh <- apply_metadata(admh, "metadata/ADMH.yml")
139
140	3x	return(admh)
141		}

1		#' Time-to-Event Analysis Dataset (ADTTE)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Time-to-Event Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`
11		#'
12		#' @inheritParams argument_convention
13		#' @inheritParams radaette
14		#' @template param_cached
15		#' @templateVar data adtte
16		#'
17		#' @return `data.frame`
18		#' @export
19		#'
20		#' @examples
21		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
22		#'
23		#' adtte <- radtte(adsl, seed = 2)
24		#' adtte
25		radtte <- function(adsl,
26		event_descr = NULL,
27		censor_descr = NULL,
28		lookup = NULL,
29		seed = NULL,
30		na_percentage = 0,
31		na_vars = list(CNSR = c(NA, 0.1), AVAL = c(1234, 0.1), AVALU = c(1234, 0.1)),
32		cached = FALSE) {
33	4x	checkmate::assert_flag(cached)
34	4x	if (cached) {
35	1x	return(get_cached_data("cadtte"))
36		}
37
38	3x	checkmate::assert_data_frame(adsl)
39	3x	checkmate::assert_character(censor_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE)
40	3x	checkmate::assert_character(event_descr, null.ok = TRUE, min.len = 1, any.missing = FALSE)
41	3x	checkmate::assert_number(seed, null.ok = TRUE)
42	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
43	3x	checkmate::assert_true(na_percentage < 1)
44
45	3x	if (!is.null(seed)) {
46	3x	set.seed(seed)
47		}
48	3x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
49
50	3x	checkmate::assert_data_frame(lookup, null.ok = TRUE)
51	3x	lookup_tte <- if (!is.null(lookup)) {
52	!	lookup
53		} else {
54	3x	tibble::tribble(
55	3x	~ARM, ~PARAMCD, ~PARAM, ~LAMBDA, ~CNSR_P,
56	3x	"ARM A", "EFS", "Event Free Survival", log(2) / 365, 0.4,
57	3x	"ARM B", "EFS", "Event Free Survival", log(2) / 305, 0.3,
58	3x	"ARM C", "EFS", "Event Free Survival", log(2) / 243, 0.2,
59	3x	"ARM A", "CRSD", "Duration of Confirmed Response", log(2) / 305, 0.4,
60	3x	"ARM B", "CRSD", "Duration of Confirmed Response", log(2) / 243, 0.3,
61	3x	"ARM C", "CRSD", "Duration of Confirmed Response", log(2) / 182, 0.2,
62	3x	"ARM A", "PFS", "Progression Free Survival", log(2) / 365, 0.4,
63	3x	"ARM B", "PFS", "Progression Free Survival", log(2) / 305, 0.3,
64	3x	"ARM C", "PFS", "Progression Free Survival", log(2) / 243, 0.2,
65	3x	"ARM A", "OS", "Overall Survival", log(2) / 610, 0.4,
66	3x	"ARM B", "OS", "Overall Survival", log(2) / 490, 0.3,
67	3x	"ARM C", "OS", "Overall Survival", log(2) / 365, 0.2,
68		)
69		}
70
71	3x	evntdescr_sel <- if (!is.null(event_descr)) {
72	!	event_descr
73		} else {
74	3x	c(
75	3x	"Death",
76	3x	"Disease Progression",
77	3x	"Last Tumor Assessment",
78	3x	"Adverse Event",
79	3x	"Alive"
80		)
81		}
82
83	3x	cnsdtdscr_sel <- if (!is.null(censor_descr)) {
84	!	censor_descr
85		} else {
86	3x	c(
87	3x	"Preferred Term",
88	3x	"Clinical Cut Off",
89	3x	"Completion or Discontinuation",
90	3x	"End of AE Reporting Period"
91		)
92		}
93
94	3x	adtte <- split(adsl, adsl$USUBJID) %>%
95	3x	lapply(FUN = function(pinfo) {
96	30x	lookup_tte %>%
97	30x	dplyr::filter(ARM == as.character(pinfo$ACTARMCD)) %>%
98	30x	dplyr::rowwise() %>%
99	30x	dplyr::mutate(
100	30x	STUDYID = pinfo$STUDYID,
101	30x	SITEID = pinfo$SITEID,
102	30x	USUBJID = pinfo$USUBJID,
103	30x	AVALU = "DAYS"
104		) %>%
105	30x	dplyr::select(-"LAMBDA", -"CNSR_P")
106		}) %>%
107	3x	Reduce(rbind, .) %>%
108	3x	rcd_var_relabel(
109	3x	STUDYID = "Study Identifier",
110	3x	USUBJID = "Unique Subject Identifier" # )
111		)
112
113		# Loop through each patient and randomly assign a value for EVNTDESC
114	3x	adtte_split <- split(adtte, adtte$USUBJID)
115
116		# Add EVNTDESC column
117	3x	adtte_lst <- lapply(adtte_split, function(split_df) {
118		# First create an empty EVNTDESC variable to populate
119	30x	split_df$EVNTDESC <- NA
120	30x	for (i in 1:nrow(split_df)) { # nolint
121		# If this is the first row then create a random value from evntdescr_sel for EVNTDESC
122	120x	if (i == 1) {
123	30x	split_df$EVNTDESC[i] <- sample(evntdescr_sel[c(1:4)], 1, prob = c(0.1, 0.3, 0.4, 0.2))
124	90x	} else if (i != 1 & i != nrow(split_df)) {
125		# First check to see if "Death" has been entered in as a previous value
126		# If so we need to make the rest of the EVNTDESC values "Death" to make sense
127		# The patient cannot die and then come back to life
128	60x	if (any(grepl("Death", split_df$EVNTDESC))) { # If previous value has "Death" the following need to be "Death"
129	21x	split_df$EVNTDESC[i] <- "Death"
130	3x	} else { # If there are no "Death" values randomly select another value
131	39x	split_df$EVNTDESC[i] <- sample(evntdescr_sel[c(1:4)], 1)
132		}
133	3x	} else { # This is for processing OS as this can only be "Death" or "Alive"
134	30x	if (any(grepl("Death", split_df$EVNTDESC))) { # If previous value has "Death" the following need to be "Death"
135	21x	split_df$EVNTDESC[i] <- "Death"
136	3x	} else { # If there are no "Death" values randomly select another value
137	9x	split_df$EVNTDESC[i] <- "Alive"
138		}
139		}
140		}
141	30x	split_df
142		})
143
144		# Add CNSR column
145	3x	adtte_lst <- lapply(adtte_lst, function(split_df) {
146		# First create an empty CNSR variable to populate
147	30x	split_df$CNSR <- NA
148	30x	for (i in 1:nrow(split_df)) { # nolint
149		# If this is the first row then create a random value from evntdescr_sel for EVNTDESC
150	120x	if (split_df$EVNTDESC[i] == "Death" \| split_df$EVNTDESC[i] == "Disease Progression") {
151	81x	split_df$CNSR[i] <- 0
152		} else {
153	39x	split_df$CNSR[i] <- 1
154		}
155		}
156	30x	split_df
157		})
158
159		# Add AVAL column
160	3x	adtte_lst <- lapply(adtte_lst, function(split_df) {
161		# First create an empty CNSR variable to populate
162	30x	split_df$AVAL <- NA
163	30x	for (i in 1:nrow(split_df)) { # nolint
164	120x	if (i == 1) {
165	30x	split_df$AVAL[i] <- stats::runif(1, 15, 100)
166	90x	} else if (i != 1 & any(grepl("Death", split_df[1:i - 1, "EVNTDESC"]))) {
167		# Check if there are any death values before the current row
168		# Set the AVAL to the value of the row that has the "Death" value
169		# as the patient cannot live longer than this value
170	42x	death_position <- match("Death", split_df[1:i - 1, "EVNTDESC"][[1]])
171	42x	split_df$AVAL[i] <- split_df$AVAL[death_position]
172	48x	} else if (i == 2) {
173	24x	split_df$AVAL[i] <- stats::runif(1, 100, 200)
174	24x	} else if (i == 3) {
175	15x	split_df$AVAL[i] <- stats::runif(1, 200, 300)
176	9x	} else if (i == 4) {
177	9x	split_df$AVAL[i] <- stats::runif(1, 300, 500)
178		}
179		}
180	30x	split_df
181		})
182
183		# Add CNSDTDSC column
184	3x	adtte_lst <- lapply(adtte_lst, function(split_df) {
185		# First create an empty CNSDTDSC variable to populate
186	30x	split_df$CNSDTDSC <- NA
187	30x	for (i in 1:nrow(split_df)) { # nolint
188	120x	if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Last Tumor Assessment") {
189	27x	split_df$CNSDTDSC[i] <- "Completion or Discontinuation"
190	93x	} else if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Adverse Event") {
191	3x	split_df$CNSDTDSC[i] <- "Preferred Term"
192	90x	} else if (split_df$CNSR[i] == 1 & split_df$EVNTDESC[i] == "Alive") {
193	9x	split_df$CNSDTDSC[i] <- "Alive During Study"
194		} else {
195	81x	split_df$CNSDTDSC[i] <- ""
196		}
197		}
198	30x	split_df
199		})
200
201		# Take the split df and combine them back together
202	3x	adtte <- do.call("rbind", adtte_lst)
203	3x	rownames(adtte) <- NULL
204
205	3x	adtte <- rcd_var_relabel(
206	3x	adtte,
207	3x	STUDYID = "Study Identifier",
208	3x	USUBJID = "Unique Subject Identifier"
209		)
210
211		# merge ADSL to be able to add TTE date and study day variables
212	3x	adtte <- dplyr::inner_join(
213	3x	dplyr::select(adtte, -"SITEID", -"ARM"),
214	3x	adsl,
215	3x	by = c("STUDYID", "USUBJID")
216		) %>%
217	3x	dplyr::rowwise() %>%
218	3x	dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
219	3x	is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
220	3x	TRUE ~ TRTEDTM
221		))) %>%
222	3x	dplyr::mutate(ADTM = sample(
223	3x	seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
224	3x	size = 1
225		)) %>%
226	3x	dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
227	3x	dplyr::select(-TRTENDT) %>%
228	3x	dplyr::ungroup() %>%
229	3x	dplyr::arrange(STUDYID, USUBJID, ADTM)
230
231	3x	adtte <- adtte %>%
232	3x	dplyr::group_by(USUBJID) %>%
233	3x	dplyr::mutate(TTESEQ = seq_len(dplyr::n())) %>%
234	3x	dplyr::mutate(ASEQ = TTESEQ) %>%
235	3x	dplyr::mutate(PARAM = as.factor(PARAM)) %>%
236	3x	dplyr::mutate(PARAMCD = as.factor(PARAMCD)) %>%
237	3x	dplyr::ungroup() %>%
238	3x	dplyr::arrange(
239	3x	STUDYID,
240	3x	USUBJID,
241	3x	PARAMCD,
242	3x	ADTM,
243	3x	TTESEQ
244		)
245
246	3x	mod_before_adtte <- adtte
247
248		# adding adverse event counts and log follow-up time
249	3x	adtte <- dplyr::bind_rows(
250	3x	adtte,
251	3x	data.frame(
252	3x	adtte %>%
253	3x	dplyr::group_by(USUBJID) %>%
254	3x	dplyr::slice_head(n = 1) %>%
255	3x	dplyr::mutate(
256	3x	PARAMCD = "TNE",
257	3x	PARAM = "Total Number of Exacerbations",
258	3x	AVAL = stats::rpois(1, 3),
259	3x	AVALU = "COUNT",
260	3x	lgTMATRSK = log(stats::rexp(1, rate = 3)),
261	3x	dplyr::across(
262	3x	c("ASEQ", "TTESEQ", "ADY", "ADTM", "EVNTDESC"),
263	3x	~NA
264		)
265		)
266		)
267		) %>%
268	3x	dplyr::arrange(
269	3x	STUDYID,
270	3x	USUBJID,
271	3x	PARAMCD,
272	3x	ADTM,
273	3x	TTESEQ
274		)
275
276	3x	mod_after_adtte <- adtte
277
278	3x	if (length(na_vars) > 0 && na_percentage > 0) {
279	!	adtte <- mutate_na(ds = adtte, na_vars = na_vars, na_percentage = na_percentage)
280		}
281
282		# apply metadata
283	3x	adtte <- apply_metadata(adtte, "metadata/ADTTE.yml")
284
285	3x	return(adtte)
286		}

1		#' Protocol Deviations Analysis Dataset (ADDV)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating random Protocol Deviations Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per each record in the corresponding SDTM domain.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `ASTDT`, `DVTERM`, `DVSEQ`
11		#'
12		#' @inheritParams argument_convention
13		#' @param max_n_dv (`integer`)\cr Maximum number of deviations per patient. Defaults to 3.
14		#' @param p_dv (`proportion`)\cr Probability of a patient having protocol deviations.
15		#' @template param_cached
16		#' @templateVar data addv
17		#'
18		#' @return `data.frame`
19		#' @export
20		#'
21		#' @examples
22		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
23		#'
24		#' addv <- raddv(adsl, seed = 2)
25		#' addv
26		raddv <- function(adsl,
27		max_n_dv = 3L,
28		p_dv = 0.15,
29		lookup = NULL,
30		seed = NULL,
31		na_percentage = 0,
32		na_vars = list(
33		"ASTDT" = c(seed = 1234, percentage = 0.1),
34		"DVCAT" = c(seed = 1234, percentage = 0.1)
35		),
36		cached = FALSE) {
37	4x	checkmate::assert_flag(cached)
38	4x	if (cached) {
39	1x	return(get_cached_data("caddv"))
40		}
41
42	3x	checkmate::assert_data_frame(adsl)
43	3x	checkmate::assert_integer(max_n_dv, len = 1, lower = 1, any.missing = FALSE)
44	3x	checkmate::assert_number(p_dv, lower = .Machine$double.xmin, upper = 1)
45	3x	checkmate::assert_number(seed, null.ok = TRUE)
46	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
47	3x	checkmate::assert_true(na_percentage < 1)
48
49	3x	if (!is.null(seed)) set.seed(seed)
50	3x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
51
52	3x	checkmate::assert_data_frame(lookup, null.ok = TRUE)
53	3x	lookup_dv <- if (!is.null(lookup)) {
54	!	lookup
55		} else {
56	3x	tibble::tribble(
57	3x	~DOMAIN, ~DVCAT, ~DVDECOD, ~DVTERM, ~DVREAS, ~DVEPRELI,
58	3x	"DV", "MAJOR", "EXCLUSION CRITERIA", "Received prior prohibited therapy or medication", "", "N",
59	3x	"DV", "MAJOR", "EXCLUSION CRITERIA", "Active or untreated or other excluded cns metastases", "", "N",
60	3x	"DV", "MAJOR", "EXCLUSION CRITERIA", "History of other malignancies within the last 5 years", "", "N",
61	3x	"DV", "MAJOR", "EXCLUSION CRITERIA", "Uncontrolled concurrent condition", "", "N",
62	3x	"DV", "MAJOR", "EXCLUSION CRITERIA", "Other exclusion criteria", "", "N",
63	3x	"DV", "MAJOR", "EXCLUSION CRITERIA", "Pregnancy criteria", "", "N",
64	3x	"DV", "MAJOR", "INCLUSION CRITERIA", "Does not meet prior therapy requirements", "", "N",
65	3x	"DV", "MAJOR", "INCLUSION CRITERIA", "Inclusion lab values outside allowed limits", "", "N",
66	3x	"DV", "MAJOR", "INCLUSION CRITERIA", "No signed ICF at study entry", "", "N",
67	3x	"DV", "MAJOR", "INCLUSION CRITERIA", "Inclusion-related test not done/out of window", "", "N",
68	3x	"DV", "MAJOR", "INCLUSION CRITERIA", "Ineligible cancer type or current cancer stage", "", "N",
69	3x	"DV", "MAJOR", "MEDICATION", "Dose missed or significantly out of window",
70	3x	"Site action due to epidemic/pandemic", "Y",
71	3x	"DV", "MAJOR", "MEDICATION", "Received incorrect study medication", "", "N",
72	3x	"DV", "MAJOR", "MEDICATION", "Received prohibited concomitant medication", "", "N",
73	3x	"DV", "MAJOR", "MEDICATION", "Discontinued study drug for unspecified reason", "", "N",
74	3x	"DV", "MAJOR", "MEDICATION", "Significant deviation from planned dose",
75	3x	"Site action due to epidemic/pandemic", "Y",
76	3x	"DV", "MAJOR", "PROCEDURAL", "Missed assessment affecting safety/study outcomes", "", "N",
77	3x	"DV", "MAJOR", "PROCEDURAL", "Eligibility-related test not done/out of window", "", "N",
78	3x	"DV", "MAJOR", "PROCEDURAL", "Failure to sign updated ICF within two visits",
79	3x	"Site action due to epidemic/pandemic", "Y",
80	3x	"DV", "MAJOR", "PROCEDURAL", "Omission of complete lab panel required by protocol", "", "N",
81	3x	"DV", "MAJOR", "PROCEDURAL", "Omission of screening tumor assessment", "", "N",
82	3x	"DV", "MAJOR", "PROCEDURAL", "Missed 2 or more efficacy assessments",
83	3x	"Site action due to epidemic/pandemic", "Y"
84		)
85		}
86
87
88	3x	addv <- Map(
89	3x	function(id, sid) {
90	30x	n_dv <- stats::rbinom(1, 1, p_dv) * sample(c(1, seq_len(max_n_dv)), 1)
91	30x	i <- sample(seq_len(nrow(lookup_dv)), n_dv, TRUE)
92	30x	dplyr::mutate(
93	30x	lookup_dv[i, ],
94	30x	USUBJID = id,
95	30x	STUDYID = sid
96		)
97		},
98	3x	adsl$USUBJID,
99	3x	adsl$STUDYID
100		) %>%
101	3x	Reduce(rbind, .) %>%
102	3x	dplyr::mutate(DVSCAT = DVCAT)
103
104	3x	addv <- rcd_var_relabel(
105	3x	addv,
106	3x	STUDYID = "Study Identifier",
107	3x	USUBJID = "Unique Subject Identifier"
108		)
109
110		# merge ADSL to be able to add deviation date and study day variables
111	3x	addv <- dplyr::inner_join(addv, adsl, by = c("STUDYID", "USUBJID")) %>%
112	3x	dplyr::rowwise() %>%
113	3x	dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
114	3x	is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
115	3x	TRUE ~ TRTEDTM
116		))) %>%
117	3x	dplyr::mutate(ASTDTM = sample(
118	3x	seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
119	3x	size = 1
120		)) %>%
121	3x	dplyr::mutate(ASTDT = lubridate::date(ASTDTM)) %>%
122	3x	dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>%
123	3x	dplyr::select(-TRTENDT, -ASTDTM) %>%
124	3x	dplyr::ungroup() %>%
125	3x	dplyr::arrange(STUDYID, USUBJID, ASTDT, DVTERM)
126
127	3x	addv <- addv %>%
128	3x	dplyr::group_by(USUBJID) %>%
129	3x	dplyr::mutate(DVSEQ = seq_len(dplyr::n())) %>%
130	3x	dplyr::ungroup() %>%
131	3x	dplyr::arrange(STUDYID, USUBJID, ASTDT, DVTERM, DVSEQ)
132
133	3x	addv <- addv %>%
134	3x	dplyr::mutate(AEPRELFL = ifelse(DVEPRELI == "Y", DVEPRELI, ""))
135
136	3x	if (length(na_vars) > 0 && na_percentage > 0) {
137	!	addv <- mutate_na(ds = addv, na_vars = na_vars, na_percentage = na_percentage)
138		}
139
140		# apply metadata
141	3x	addv <- apply_metadata(addv, "metadata/ADDV.yml")
142
143	3x	return(addv)
144		}

1		#' Tumor Response Analysis Dataset (ADRS)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Tumor Response Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details
9		#' One record per subject per parameter per analysis visit per analysis date.
10		#' SDTM variables are populated on new records coming from other single records.
11		#' Otherwise, SDTM variables are left blank.
12		#'
13		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADT`, `RSSEQ`
14		#'
15		#' @inheritParams argument_convention
16		#' @param avalc (`character vector`)\cr Analysis value categories.
17		#' @template param_cached
18		#' @templateVar data adrs
19		#'
20		#' @return `data.frame`
21		#' @export
22		#'
23		#' @examples
24		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
25		#'
26		#' adrs <- radrs(adsl, seed = 2)
27		#' adrs
28		radrs <- function(adsl,
29		avalc = NULL,
30		lookup = NULL,
31		seed = NULL,
32		na_percentage = 0,
33		na_vars = list(AVISIT = c(NA, 0.1), AVAL = c(1234, 0.1), AVALC = c(1234, 0.1)),
34		cached = FALSE) {
35	7x	checkmate::assert_flag(cached)
36	7x	if (cached) {
37	1x	return(get_cached_data("cadrs"))
38		}
39
40	6x	checkmate::assert_data_frame(adsl)
41	6x	checkmate::assert_vector(avalc, null.ok = TRUE)
42	6x	checkmate::assert_number(seed, null.ok = TRUE)
43	6x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
44	6x	checkmate::assert_true(na_percentage < 1)
45
46	6x	param_codes <- if (!is.null(avalc)) {
47	!	avalc
48		} else {
49	6x	stats::setNames(1:5, c("CR", "PR", "SD", "PD", "NE"))
50		}
51
52	6x	checkmate::assert_data_frame(lookup, null.ok = TRUE)
53	6x	lookup_ars <- if (!is.null(lookup)) {
54	!	lookup
55		} else {
56	6x	expand.grid(
57	6x	ARM = c("A: Drug X", "B: Placebo", "C: Combination"),
58	6x	AVALC = names(param_codes)
59	6x	) %>% dplyr::mutate(
60	6x	AVAL = param_codes[AVALC],
61	6x	p_scr = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)),
62	6x	p_bsl = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)),
63	6x	p_cycle = c(c(.4, .3, .5), c(.35, .25, .25), c(.1, .2, .08), c(.14, 0.15, 0.15), c(.01, 0.1, 0.02)),
64	6x	p_eoi = c(c(.4, .3, .5), c(.35, .25, .25), c(.1, .2, .08), c(.14, 0.15, 0.15), c(.01, 0.1, 0.02)),
65	6x	p_fu = c(c(.3, .2, .4), c(.2, .1, .3), c(.2, .2, .2), c(.3, .5, 0.1), rep(0, 3))
66		)
67		}
68
69	6x	if (!is.null(seed)) {
70	6x	set.seed(seed)
71		}
72	6x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
73
74	6x	adrs <- split(adsl, adsl$USUBJID) %>%
75	6x	lapply(function(pinfo) {
76	60x	probs <- dplyr::filter(lookup_ars, ARM == as.character(pinfo$ACTARM))
77
78		# screening
79	60x	rsp_screen <- sample(probs$AVALC, 1, prob = probs$p_scr) %>% as.character()
80
81		# baseline
82	60x	rsp_bsl <- sample(probs$AVALC, 1, prob = probs$p_bsl) %>% as.character()
83
84		# cycle
85	60x	rsp_c2d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character()
86	60x	rsp_c4d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character()
87
88		# end of induction
89	60x	rsp_eoi <- sample(probs$AVALC, 1, prob = probs$p_eoi) %>% as.character()
90
91		# follow up
92	60x	rsp_fu <- sample(probs$AVALC, 1, prob = probs$p_fu) %>% as.character()
93
94	60x	best_rsp <- min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)])
95	60x	best_rsp_i <- which.min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)])
96
97	60x	avisit <- c("SCREENING", "BASELINE", "CYCLE 2 DAY 1", "CYCLE 4 DAY 1", "END OF INDUCTION", "FOLLOW UP")
98
99		# meaningful date information
100	60x	trtstdt <- lubridate::date(pinfo$TRTSDTM)
101	60x	trtendt <- lubridate::date(dplyr::if_else(
102	60x	!is.na(pinfo$TRTEDTM), pinfo$TRTEDTM,
103	60x	lubridate::floor_date(trtstdt + study_duration_secs, unit = "day")
104		))
105	60x	scr_date <- trtstdt - lubridate::days(100)
106	60x	bs_date <- trtstdt
107	60x	flu_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1)
108	60x	eoi_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1)
109	60x	c2d1_date <- sample(seq(lubridate::as_datetime(trtstdt), lubridate::as_datetime(trtendt), by = "day"), size = 1)
110	60x	c4d1_date <- min(lubridate::date(c2d1_date + lubridate::days(60)), trtendt)
111
112	60x	tibble::tibble(
113	60x	STUDYID = pinfo$STUDYID,
114	60x	SITEID = pinfo$SITEID,
115	60x	USUBJID = pinfo$USUBJID,
116	60x	PARAMCD = as.factor(c(rep("OVRINV", 6), "BESRSPI", "INVET")),
117	60x	PARAM = as.factor(dplyr::recode(
118	60x	PARAMCD,
119	60x	OVRINV = "Overall Response by Investigator - by visit",
120	60x	OVRSPI = "Best Overall Response by Investigator (no confirmation required)",
121	60x	BESRSPI = "Best Confirmed Overall Response by Investigator",
122	60x	INVET = "Investigator End Of Induction Response"
123		)),
124	60x	AVALC = c(
125	60x	rsp_screen, rsp_bsl, rsp_c2d1, rsp_c4d1, rsp_eoi, rsp_fu,
126	60x	names(param_codes)[best_rsp],
127	60x	rsp_eoi
128		),
129	60x	AVAL = param_codes[AVALC],
130	60x	AVISIT = factor(c(avisit, avisit[best_rsp_i], avisit[5]), levels = avisit)
131		) %>%
132	60x	merge(
133	60x	tibble::tibble(
134	60x	AVISIT = avisit,
135	60x	ADTM = c(scr_date, bs_date, c2d1_date, c4d1_date, eoi_date, flu_date),
136	60x	AVISITN = c(-1, 0, 2, 4, 999, 999),
137	60x	TRTSDTM = pinfo$TRTSDTM
138		) %>%
139	60x	dplyr::mutate(
140	60x	ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))
141		) %>%
142	60x	dplyr::select(-"TRTSDTM"),
143	60x	by = "AVISIT"
144		)
145		}) %>%
146	6x	Reduce(rbind, .) %>%
147	6x	dplyr::mutate(AVALC = factor(AVALC, levels = names(param_codes))) %>%
148	6x	rcd_var_relabel(
149	6x	STUDYID = "Study Identifier",
150	6x	USUBJID = "Unique Subject Identifier"
151		)
152
153	6x	adrs <- rcd_var_relabel(
154	6x	adrs,
155	6x	STUDYID = "Study Identifier",
156	6x	USUBJID = "Unique Subject Identifier"
157		)
158
159		# merge ADSL to be able to add RS date and study day variables
160
161
162	6x	adrs <- dplyr::inner_join(
163	6x	dplyr::select(adrs, -"SITEID"),
164	6x	adsl,
165	6x	by = c("STUDYID", "USUBJID")
166		)
167
168	6x	adrs <- adrs %>%
169	6x	dplyr::group_by(USUBJID) %>%
170	6x	dplyr::mutate(RSSEQ = seq_len(dplyr::n())) %>%
171	6x	dplyr::mutate(ASEQ = RSSEQ) %>%
172	6x	dplyr::ungroup() %>%
173	6x	dplyr::arrange(
174	6x	STUDYID,
175	6x	USUBJID,
176	6x	PARAMCD,
177	6x	AVISITN,
178	6x	ADTM,
179	6x	RSSEQ
180		)
181
182	6x	if (length(na_vars) > 0 && na_percentage > 0) {
183	!	adrs <- mutate_na(ds = adrs, na_vars = na_vars, na_percentage = na_percentage)
184		}
185
186		# apply metadata
187	6x	adrs <- apply_metadata(adrs, "metadata/ADRS.yml")
188
189	6x	return(adrs)
190		}

1		#' Previous and Concomitant Medications Analysis Dataset (ADCM)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating random Concomitant Medication Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per each record in the corresponding SDTM domain.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `CMSEQ`
11		#'
12		#' @inheritParams argument_convention
13		#' @param max_n_cms (`integer`)\cr Maximum number of concomitant medications per patient. Defaults to 10.
14		#' @param who_coding (`flag`)\cr Whether WHO coding (with multiple paths per medication) should be used.
15		#' @template param_cached
16		#' @templateVar data adcm
17		#'
18		#' @return `data.frame`
19		#' @export
20		#'
21		#' @examples
22		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
23		#'
24		#' adcm <- radcm(adsl, seed = 2)
25		#' adcm
26		#'
27		#' adcm_who <- radcm(adsl, seed = 2, who_coding = TRUE)
28		#' adcm_who
29		radcm <- function(adsl,
30		max_n_cms = 10L,
31		lookup = NULL,
32		seed = NULL,
33		na_percentage = 0,
34		na_vars = list(CMCLAS = c(NA, 0.1), CMDECOD = c(1234, 0.1), ATIREL = c(1234, 0.1)),
35		who_coding = FALSE,
36		cached = FALSE) {
37	5x	checkmate::assert_flag(cached)
38	5x	if (cached) {
39	1x	return(get_cached_data("cadcm"))
40		}
41
42	4x	checkmate::assert_data_frame(adsl)
43	4x	checkmate::assert_integer(max_n_cms, len = 1, any.missing = FALSE)
44	4x	checkmate::assert_number(seed, null.ok = TRUE)
45	4x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
46	4x	checkmate::assert_true(na_percentage < 1)
47	4x	checkmate::assert_flag(who_coding)
48
49	4x	checkmate::assert_data_frame(lookup, null.ok = TRUE)
50	4x	lookup_cm <- if (!is.null(lookup)) {
51	!	lookup
52		} else {
53	4x	tibble::tribble(
54	4x	~CMCLAS, ~CMDECOD, ~ATIREL,
55	4x	"medcl A", "medname A_1/3", "PRIOR",
56	4x	"medcl A", "medname A_2/3", "CONCOMITANT",
57	4x	"medcl A", "medname A_3/3", "CONCOMITANT",
58	4x	"medcl B", "medname B_1/4", "CONCOMITANT",
59	4x	"medcl B", "medname B_2/4", "PRIOR",
60	4x	"medcl B", "medname B_3/4", "PRIOR",
61	4x	"medcl B", "medname B_4/4", "CONCOMITANT",
62	4x	"medcl C", "medname C_1/2", "CONCOMITANT",
63	4x	"medcl C", "medname C_2/2", "CONCOMITANT"
64		)
65		}
66
67	4x	if (!is.null(seed)) {
68	3x	set.seed(seed)
69		}
70	4x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
71
72	4x	adcm <- Map(function(id, sid) {
73	430x	n_cms <- sample(c(0, seq_len(max_n_cms)), 1)
74	430x	i <- sample(seq_len(nrow(lookup_cm)), n_cms, TRUE)
75	430x	dplyr::mutate(
76	430x	lookup_cm[i, ],
77	430x	USUBJID = id,
78	430x	STUDYID = sid
79		)
80	4x	}, adsl$USUBJID, adsl$STUDYID) %>%
81	4x	Reduce(rbind, .) %>%
82	4x	`[`(c(4, 5, 1, 2, 3)) %>%
83	4x	dplyr::mutate(CMCAT = CMCLAS)
84
85	4x	adcm <- rcd_var_relabel(
86	4x	adcm,
87	4x	STUDYID = "Study Identifier",
88	4x	USUBJID = "Unique Subject Identifier"
89		)
90
91		# merge ADSL to be able to add CM date and study day variables
92	4x	adcm <- dplyr::inner_join(
93	4x	adcm,
94	4x	adsl,
95	4x	by = c("STUDYID", "USUBJID")
96		) %>%
97	4x	dplyr::rowwise() %>%
98	4x	dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
99	4x	is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
100	4x	TRUE ~ TRTEDTM
101		))) %>%
102	4x	dplyr::mutate(ASTDTM = sample(
103	4x	seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
104	4x	size = 1
105		)) %>%
106	4x	dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>%
107		# add 1 to end of range incase both values passed to sample() are the same
108	4x	dplyr::mutate(AENDTM = sample(
109	4x	seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"),
110	4x	size = 1
111		)) %>%
112	4x	dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>%
113	4x	dplyr::select(-TRTENDT) %>%
114	4x	dplyr::ungroup() %>%
115	4x	dplyr::arrange(STUDYID, USUBJID, ASTDTM)
116
117	4x	adcm <- adcm %>%
118	4x	dplyr::group_by(USUBJID) %>%
119	4x	dplyr::mutate(CMSEQ = seq_len(dplyr::n())) %>%
120	4x	dplyr::mutate(ASEQ = CMSEQ) %>%
121	4x	dplyr::ungroup() %>%
122	4x	dplyr::arrange(STUDYID, USUBJID, ASTDTM, CMSEQ) %>%
123	4x	dplyr::mutate(
124	4x	ATC1 = paste("ATCCLAS1", substr(CMDECOD, 9, 9)),
125	4x	ATC2 = paste("ATCCLAS2", substr(CMDECOD, 9, 9)),
126	4x	ATC3 = paste("ATCCLAS3", substr(CMDECOD, 9, 9)),
127	4x	ATC4 = paste("ATCCLAS4", substr(CMDECOD, 9, 9))
128		) %>%
129	4x	dplyr::mutate(CMINDC = sample(c(
130	4x	"Nausea", "Hypertension", "Urticaria", "Fever",
131	4x	"Asthma", "Infection", "Diabete", "Diarrhea", "Pneumonia"
132	4x	), dplyr::n(), replace = TRUE)) %>%
133	4x	dplyr::mutate(CMDOSE = sample(1:99, dplyr::n(), replace = TRUE)) %>%
134	4x	dplyr::mutate(CMTRT = substr(CMDECOD, 9, 13)) %>%
135	4x	dplyr::mutate(CMDOSU = sample(c(
136	4x	"ug/mL", "ug/kg/day", "%", "uL", "DROP",
137	4x	"umol/L", "mg", "mg/breath", "ug"
138	4x	), dplyr::n(), replace = TRUE)) %>%
139	4x	dplyr::mutate(CMROUTE = sample(c(
140	4x	"INTRAVENOUS", "ORAL", "NASAL",
141	4x	"INTRAMUSCULAR", "SUBCUTANEOUS", "INHALED", "RECTAL", "UNKNOWN"
142	4x	), dplyr::n(), replace = TRUE)) %>%
143	4x	dplyr::mutate(CMDOSFRQ = sample(c(
144	4x	"Q4W", "QN", "Q4H", "UNKNOWN", "TWICE",
145	4x	"Q4H", "QD", "TID", "4 TIMES PER MONTH"
146	4x	), dplyr::n(), replace = TRUE)) %>%
147	4x	dplyr::mutate(
148		# use 1 year as reference time point
149	4x	CMSTRTPT = dplyr::case_when(
150	4x	ASTDY <= 365 ~ "BEFORE",
151	4x	ASTDY > 365 ~ "AFTER",
152	4x	is.na(ASTDY) ~ "U"
153		),
154	4x	CMENRTPT = dplyr::case_when(
155	4x	EOSSTT %in% c("COMPLETED", "DISCONTINUED") ~ "BEFORE",
156	4x	EOSSTT == "ONGOING" ~ "ONGOING",
157	4x	is.na(EOSSTT) ~ "U"
158		),
159	4x	ADURN = as.numeric(difftime(ASTDTM, AENDTM, units = "days")),
160	4x	ADURU = "days"
161		)
162
163
164		# Optional WHO coding, which adds more `ATC` paths for randomly selected `CMDECOD`.
165	4x	if (who_coding) {
166	1x	n_cmdecod_path2 <- ceiling(nrow(lookup_cm) / 2)
167	1x	cmdecod_path2 <- sample(lookup_cm$CMDECOD, n_cmdecod_path2)
168	1x	adcm_path2 <- adcm %>%
169	1x	dplyr::filter(CMDECOD %in% cmdecod_path2) %>%
170	1x	dplyr::mutate(
171	1x	ATC1 = paste(ATC1, "p2"),
172	1x	ATC2 = paste(ATC2, "p2"),
173	1x	ATC3 = paste(ATC3, "p2"),
174	1x	ATC4 = paste(ATC4, "p2")
175		)
176
177	1x	n_cmdecod_path3 <- ceiling(length(cmdecod_path2) / 2)
178	1x	cmdecod_path3 <- sample(cmdecod_path2, n_cmdecod_path3)
179	1x	adcm_path3 <- adcm %>%
180	1x	dplyr::filter(CMDECOD %in% cmdecod_path3) %>%
181	1x	dplyr::mutate(
182	1x	ATC1 = paste(ATC1, "p3"),
183	1x	ATC2 = paste(ATC2, "p3"),
184	1x	ATC3 = paste(ATC3, "p3"),
185	1x	ATC4 = paste(ATC4, "p3")
186		)
187
188	1x	adcm <- dplyr::bind_rows(
189	1x	adcm,
190	1x	adcm_path2,
191	1x	adcm_path3
192		)
193		}
194
195	4x	adcm <- adcm %>%
196	4x	dplyr::mutate(
197	4x	ATC1CD = ATC1,
198	4x	ATC2CD = ATC2,
199	4x	ATC3CD = ATC3,
200	4x	ATC4CD = ATC4
201		)
202
203	4x	if (length(na_vars) > 0 && na_percentage > 0) {
204	!	adcm <- mutate_na(ds = adcm, na_vars = na_vars, na_percentage = na_percentage)
205		}
206
207		# apply metadata
208	4x	adcm <- apply_metadata(adcm, "metadata/ADCM.yml")
209
210	4x	return(adcm)
211		}

1		#' Questionnaires Analysis Dataset (ADQS)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Questionnaires Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per subject per parameter per analysis visit per analysis date.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`
11		#'
12		#' @inheritParams argument_convention
13		#' @template param_cached
14		#' @templateVar data adqs
15		#'
16		#' @return `data.frame`
17		#' @export
18		#'
19		#' @author npaszty
20		#'
21		#' @examples
22		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
23		#'
24		#' adqs <- radqs(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2)
25		#' adqs
26		#'
27		#' adqs <- radqs(adsl, visit_format = "CYCLE", n_assessments = 3L, seed = 2)
28		#' adqs
29		radqs <- function(adsl,
30		param = c(
31		"BFI All Questions",
32		"Fatigue Interference",
33		"Function/Well-Being (GF1,GF3,GF7)",
34		"Treatment Side Effects (GP2,C5,GP5)",
35		"FKSI-19 All Questions"
36		),
37		paramcd = c("BFIALL", "FATIGI", "FKSI-FWB", "FKSI-TSE", "FKSIALL"),
38		visit_format = "WEEK",
39		n_assessments = 5L,
40		n_days = 5L,
41		seed = NULL,
42		na_percentage = 0,
43		na_vars = list(
44		LOQFL = c(NA, 0.1), ABLFL2 = c(1234, 0.1), ABLFL = c(1235, 0.1),
45		CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1)
46		),
47		cached = FALSE) {
48	4x	checkmate::assert_flag(cached)
49	4x	if (cached) {
50	1x	return(get_cached_data("cadqs"))
51		}
52
53	3x	checkmate::assert_data_frame(adsl)
54	3x	checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
55	3x	checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
56	3x	checkmate::assert_string(visit_format)
57	3x	checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
58	3x	checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
59	3x	checkmate::assert_number(seed, null.ok = TRUE)
60	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
61	3x	checkmate::assert_true(na_percentage < 1)
62
63		# validate and initialize param vectors
64	3x	param_init_list <- relvar_init(param, paramcd)
65
66	3x	if (!is.null(seed)) {
67	3x	set.seed(seed)
68		}
69	3x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
70
71	3x	adqs <- expand.grid(
72	3x	STUDYID = unique(adsl$STUDYID),
73	3x	USUBJID = adsl$USUBJID,
74	3x	PARAM = param_init_list$relvar1,
75	3x	AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days),
76	3x	stringsAsFactors = FALSE
77		)
78
79	3x	adqs <- dplyr::mutate(
80	3x	adqs,
81	3x	AVISITN = dplyr::case_when(
82	3x	AVISIT == "SCREENING" ~ -1,
83	3x	AVISIT == "BASELINE" ~ 0,
84	3x	(grepl("^WEEK", AVISIT) \| grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
85	3x	TRUE ~ NA_real_
86		)
87		)
88
89		# assign related variable values: PARAMxPARAMCD are related
90	3x	adqs <- adqs %>% rel_var(
91	3x	var_name = "PARAMCD",
92	3x	related_var = "PARAM",
93	3x	var_values = param_init_list$relvar2
94		)
95
96	3x	adqs$AVAL <- stats::rnorm(nrow(adqs), mean = 50, sd = 8) + adqs$AVISITN * stats::rnorm(nrow(adqs), mean = 5, sd = 2)
97
98		# order to prepare for change from screening and baseline values
99	3x	adqs <- adqs[order(adqs$STUDYID, adqs$USUBJID, adqs$PARAMCD, adqs$AVISITN), ]
100
101	3x	adqs <- Reduce(
102	3x	rbind,
103	3x	lapply(
104	3x	split(adqs, adqs$USUBJID),
105	3x	function(x) {
106	30x	x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
107	30x	x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "")
108	30x	x$ABLFL <- ifelse(
109	30x	toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
110	30x	"Y",
111	30x	ifelse(
112	30x	toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1",
113	30x	"Y",
114		""
115		)
116		)
117	30x	x$LOQFL <- ifelse(x$AVAL < 32, "Y", "N")
118	30x	x
119		}
120		)
121		)
122
123	3x	adqs$BASE2 <- retain(adqs, adqs$AVAL, adqs$ABLFL2 == "Y")
124	3x	adqs$BASE <- ifelse(adqs$ABLFL2 != "Y", retain(adqs, adqs$AVAL, adqs$ABLFL == "Y"), NA)
125
126	3x	adqs <- adqs %>%
127	3x	dplyr::mutate(CHG2 = AVAL - BASE2) %>%
128	3x	dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>%
129	3x	dplyr::mutate(CHG = AVAL - BASE) %>%
130	3x	dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>%
131	3x	rcd_var_relabel(
132	3x	STUDYID = attr(adsl$STUDYID, "label"),
133	3x	USUBJID = attr(adsl$USUBJID, "label")
134		)
135
136	3x	adqs <- rcd_var_relabel(
137	3x	adqs,
138	3x	STUDYID = "Study Identifier",
139	3x	USUBJID = "Unique Subject Identifier"
140		)
141
142		# merge ADSL to be able to add QS date and study day variables
143	3x	adqs <- dplyr::inner_join(
144	3x	adqs,
145	3x	adsl,
146	3x	by = c("STUDYID", "USUBJID")
147		) %>%
148	3x	dplyr::rowwise() %>%
149	3x	dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
150	3x	is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
151	3x	TRUE ~ TRTEDTM
152		))) %>%
153	3x	ungroup()
154
155	3x	adqs <- adqs %>%
156	3x	group_by(USUBJID) %>%
157	3x	arrange(USUBJID, AVISITN) %>%
158	3x	dplyr::mutate(ADTM = rep(
159	3x	sort(sample(
160	3x	seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
161	3x	size = nlevels(AVISIT)
162		)),
163	3x	each = n() / nlevels(AVISIT)
164		)) %>%
165	3x	dplyr::ungroup() %>%
166	3x	dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
167	3x	dplyr::select(-TRTENDT) %>%
168	3x	dplyr::arrange(STUDYID, USUBJID, ADTM)
169
170	3x	adqs <- adqs %>%
171	3x	dplyr::group_by(USUBJID) %>%
172	3x	dplyr::mutate(QSSEQ = seq_len(dplyr::n())) %>%
173	3x	dplyr::mutate(ASEQ = QSSEQ) %>%
174	3x	dplyr::ungroup() %>%
175	3x	dplyr::arrange(
176	3x	STUDYID,
177	3x	USUBJID,
178	3x	PARAMCD,
179	3x	AVISITN,
180	3x	ADTM,
181	3x	QSSEQ
182		)
183
184	3x	if (length(na_vars) > 0 && na_percentage > 0) {
185	!	adqs <- mutate_na(ds = adqs, na_vars = na_vars, na_percentage = na_percentage)
186		}
187
188		# apply metadata
189	3x	adqs <- apply_metadata(adqs, "metadata/ADQS.yml")
190
191	3x	return(adqs)
192		}

1		#' Vital Signs Analysis Dataset (ADVS)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Vital Signs Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per subject per parameter per analysis visit per analysis date.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `ATPTN`, `DTYPE`, `ADTM`, `VSSEQ`, `ASPID`
11		#'
12		#' @inheritParams argument_convention
13		#' @template param_cached
14		#' @templateVar data advs
15		#'
16		#' @return `data.frame`
17		#' @export
18		#'
19		#' @author npaszty
20		#'
21		#' @examples
22		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
23		#'
24		#' advs <- radvs(adsl, visit_format = "WEEK", n_assessments = 7L, seed = 2)
25		#' advs
26		#'
27		#' advs <- radvs(adsl, visit_format = "CYCLE", n_assessments = 3L, seed = 2)
28		#' advs
29		radvs <- function(adsl,
30		param = c(
31		"Diastolic Blood Pressure",
32		"Pulse Rate",
33		"Respiratory Rate",
34		"Systolic Blood Pressure",
35		"Temperature", "Weight"
36		),
37		paramcd = c("DIABP", "PULSE", "RESP", "SYSBP", "TEMP", "WEIGHT"),
38		paramu = c("Pa", "beats/min", "breaths/min", "Pa", "C", "Kg"),
39		visit_format = "WEEK",
40		n_assessments = 5L,
41		n_days = 5L,
42		seed = NULL,
43		na_percentage = 0,
44		na_vars = list(
45		CHG2 = c(1235, 0.1), PCHG2 = c(1235, 0.1), CHG = c(1234, 0.1), PCHG = c(1234, 0.1),
46		AVAL = c(123, 0.1), AVALU = c(123, 0.1)
47		),
48		cached = FALSE) {
49	4x	checkmate::assert_flag(cached)
50	4x	if (cached) {
51	1x	return(get_cached_data("cadvs"))
52		}
53
54	3x	checkmate::assert_data_frame(adsl)
55	3x	checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
56	3x	checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
57	3x	checkmate::assert_character(paramu, min.len = 1, any.missing = FALSE)
58	3x	checkmate::assert_string(visit_format)
59	3x	checkmate::assert_integer(n_assessments, len = 1, any.missing = FALSE)
60	3x	checkmate::assert_integer(n_days, len = 1, any.missing = FALSE)
61	3x	checkmate::assert_number(seed, null.ok = TRUE)
62	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
63	3x	checkmate::assert_true(na_percentage < 1)
64
65		# validate and initialize param vectors
66	3x	param_init_list <- relvar_init(param, paramcd)
67	3x	unit_init_list <- relvar_init(param, paramu)
68
69	3x	if (!is.null(seed)) {
70	3x	set.seed(seed)
71		}
72	3x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
73
74	3x	advs <- expand.grid(
75	3x	STUDYID = unique(adsl$STUDYID),
76	3x	USUBJID = adsl$USUBJID,
77	3x	PARAM = as.factor(param_init_list$relvar1),
78	3x	AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments),
79	3x	stringsAsFactors = FALSE
80		)
81
82	3x	advs <- dplyr::mutate(
83	3x	advs,
84	3x	AVISITN = dplyr::case_when(
85	3x	AVISIT == "SCREENING" ~ -1,
86	3x	AVISIT == "BASELINE" ~ 0,
87	3x	(grepl("^WEEK", AVISIT) \| grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
88	3x	TRUE ~ NA_real_
89		)
90		)
91
92	3x	advs$VSCAT <- "VITAL SIGNS"
93
94		# assign related variable values: PARAMxPARAMCD are related
95	3x	advs <- advs %>% rel_var(
96	3x	var_name = "PARAMCD",
97	3x	related_var = "PARAM",
98	3x	var_values = param_init_list$relvar2
99		)
100
101		# assign related variable values: PARAMxAVALU are related
102	3x	advs <- advs %>% rel_var(
103	3x	var_name = "AVALU",
104	3x	related_var = "PARAM",
105	3x	var_values = unit_init_list$relvar2
106		)
107
108	3x	advs <- advs %>%
109	3x	dplyr::mutate(VSTESTCD = PARAMCD) %>%
110	3x	dplyr::mutate(VSTEST = PARAM)
111
112	3x	advs <- advs %>% dplyr::mutate(AVAL = dplyr::case_when(
113	3x	PARAMCD == paramcd[1] ~ stats::rnorm(nrow(advs), mean = 100, sd = 20),
114	3x	PARAMCD == paramcd[2] ~ stats::rnorm(nrow(advs), mean = 80, sd = 15),
115	3x	PARAMCD == paramcd[3] ~ stats::rnorm(nrow(advs), mean = 16, sd = 5),
116	3x	PARAMCD == paramcd[4] ~ stats::rnorm(nrow(advs), mean = 150, sd = 30),
117	3x	PARAMCD == paramcd[5] ~ stats::rnorm(nrow(advs), mean = 36.65, sd = 1),
118	3x	PARAMCD == paramcd[6] ~ stats::rnorm(nrow(advs), mean = 70, sd = 20)
119		))
120
121		# order to prepare for change from screening and baseline values
122	3x	advs <- advs[order(advs$STUDYID, advs$USUBJID, advs$PARAMCD, advs$AVISITN), ]
123
124	3x	advs <- Reduce(rbind, lapply(split(advs, advs$USUBJID), function(x) {
125	30x	x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
126	30x	x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "")
127	30x	x$ABLFL <- ifelse(
128	30x	toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
129	30x	"Y",
130	30x	ifelse(
131	30x	toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1",
132	30x	"Y",
133		""
134		)
135		)
136	30x	x
137		}))
138
139	3x	advs$BASE2 <- retain(advs, advs$AVAL, advs$ABLFL2 == "Y")
140	3x	advs$BASE <- ifelse(advs$ABLFL2 != "Y", retain(advs, advs$AVAL, advs$ABLFL == "Y"), NA)
141
142	3x	advs <- advs %>%
143	3x	dplyr::mutate(CHG2 = AVAL - BASE2) %>%
144	3x	dplyr::mutate(PCHG2 = 100 * (CHG2 / BASE2)) %>%
145	3x	dplyr::mutate(CHG = AVAL - BASE) %>%
146	3x	dplyr::mutate(PCHG = 100 * (CHG / BASE)) %>%
147	3x	dplyr::mutate(ANRLO = dplyr::case_when(
148	3x	PARAMCD == "DIABP" ~ 80,
149	3x	PARAMCD == "PULSE" ~ 60,
150	3x	PARAMCD == "RESP" ~ 12,
151	3x	PARAMCD == "SYSBP" ~ 120,
152	3x	PARAMCD == "TEMP" ~ 36.1,
153	3x	PARAMCD == "WEIGHT" ~ 40
154		)) %>%
155	3x	dplyr::mutate(ANRHI = dplyr::case_when(
156	3x	PARAMCD == "DIABP" ~ 120,
157	3x	PARAMCD == "PULSE" ~ 100,
158	3x	PARAMCD == "RESP" ~ 20,
159	3x	PARAMCD == "SYSBP" ~ 180,
160	3x	PARAMCD == "TEMP" ~ 37.2,
161	3x	PARAMCD == "WEIGHT" ~ 100
162		)) %>%
163	3x	dplyr::mutate(ANRIND = factor(dplyr::case_when(
164	3x	AVAL < ANRLO ~ "LOW",
165	3x	AVAL > ANRHI ~ "HIGH",
166	3x	TRUE ~ "NORMAL"
167		))) %>%
168	3x	dplyr::mutate(VSSTRESC = dplyr::case_when(
169	3x	PARAMCD == "DIABP" ~ "<80",
170	3x	PARAMCD == "PULSE" ~ "<60",
171	3x	PARAMCD == "RESP" ~ ">20",
172	3x	PARAMCD == "SYSBP" ~ ">180",
173	3x	PARAMCD == "TEMP" ~ "<36.1",
174	3x	PARAMCD == "WEIGHT" ~ "<40"
175		)) %>%
176	3x	dplyr::rowwise() %>%
177	3x	dplyr::mutate(LOQFL = factor(
178	3x	ifelse(eval(parse(text = paste(AVAL, VSSTRESC))), "Y", "N")
179		)) %>%
180	3x	dplyr::ungroup() %>%
181	3x	dplyr::mutate(BASETYPE = "LAST") %>%
182	3x	dplyr::group_by(USUBJID, PARAMCD, BASETYPE) %>%
183	3x	dplyr::mutate(BNRIND = ANRIND[ABLFL == "Y"]) %>%
184	3x	dplyr::ungroup() %>%
185	3x	dplyr::mutate(ATPTN = 1) %>%
186	3x	dplyr::mutate(DTYPE = NA) %>%
187	3x	rcd_var_relabel(
188	3x	USUBJID = attr(adsl$USUBJID, "label"),
189	3x	STUDYID = attr(adsl$STUDYID, "label")
190		)
191
192	3x	advs <- rcd_var_relabel(
193	3x	advs,
194	3x	STUDYID = "Study Identifier",
195	3x	USUBJID = "Unique Subject Identifier"
196		)
197
198		# merge ADSL to be able to add LB date and study day variables
199	3x	advs <- dplyr::inner_join(
200	3x	advs,
201	3x	adsl,
202	3x	by = c("STUDYID", "USUBJID")
203		) %>%
204	3x	dplyr::rowwise() %>%
205	3x	dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
206	3x	is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
207	3x	TRUE ~ TRTEDTM
208		))) %>%
209	3x	dplyr::ungroup()
210
211	3x	advs <- advs %>%
212	3x	dplyr::group_by(USUBJID) %>%
213	3x	dplyr::arrange(USUBJID, AVISITN) %>%
214	3x	dplyr::mutate(ADTM = rep(
215	3x	sort(sample(
216	3x	seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
217	3x	size = nlevels(AVISIT)
218		)),
219	3x	each = n() / nlevels(AVISIT)
220		)) %>%
221	3x	dplyr::ungroup() %>%
222	3x	dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
223	3x	dplyr::select(-TRTENDT) %>%
224	3x	dplyr::arrange(STUDYID, USUBJID, ADTM)
225
226	3x	advs <- advs %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when(
227	3x	!AVISIT %in% c("SCREENING", "BASELINE") ~ "Y",
228	3x	TRUE ~ ""
229		)))
230
231	3x	advs <- advs %>%
232	3x	dplyr::mutate(ASPID = sample(seq_len(dplyr::n()))) %>%
233	3x	dplyr::group_by(USUBJID) %>%
234	3x	dplyr::mutate(VSSEQ = seq_len(dplyr::n())) %>%
235	3x	dplyr::mutate(ASEQ = VSSEQ) %>%
236	3x	dplyr::ungroup() %>%
237	3x	dplyr::arrange(
238	3x	STUDYID,
239	3x	USUBJID,
240	3x	PARAMCD,
241	3x	BASETYPE,
242	3x	AVISITN,
243	3x	ATPTN,
244	3x	DTYPE,
245	3x	ADTM,
246	3x	VSSEQ,
247	3x	ASPID
248		)
249
250	3x	if (length(na_vars) > 0 && na_percentage > 0) {
251	!	advs <- mutate_na(ds = advs, na_vars = na_vars, na_percentage = na_percentage)
252		}
253
254		# apply metadata
255	3x	advs <- apply_metadata(advs, "metadata/ADVS.yml")
256
257	3x	return(advs)
258		}

1		#' Adverse Event Analysis Dataset (ADAE)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating random Adverse Event Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per each record in the corresponding SDTM domain.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `ASTDTM`, `AETERM`, `AESEQ`
11		#'
12		#' @inheritParams argument_convention
13		#' @param max_n_aes (`integer`)\cr Maximum number of AEs per patient. Defaults to 10.
14		#' @template param_cached
15		#' @templateVar data adae
16		#'
17		#' @return `data.frame`
18		#' @export
19		#'
20		#' @examples
21		#' adsl <- radsl(N = 10, study_duration = 2, seed = 1)
22		#'
23		#' adae <- radae(adsl, seed = 2)
24		#' adae
25		#'
26		#' # Add metadata.
27		#' aag <- utils::read.table(
28		#' sep = ",", header = TRUE,
29		#' text = paste(
30		#' "NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE",
31		#' "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd D.2.1.5.3,",
32		#' "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd A.1.1.1.1,",
33		#' "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd C.1.1.1.3,BROAD",
34		#' "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd B.2.2.3.1,BROAD",
35		#' "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Y.9.9.9.9,NARROW",
36		#' "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Z.9.9.9.9,NARROW",
37		#' sep = "\n"
38		#' ), stringsAsFactors = FALSE
39		#' )
40		#'
41		#' adae <- radae(adsl, lookup_aag = aag)
42		#'
43		#' with(
44		#' adae,
45		#' cbind(
46		#' table(AEDECOD, SMQ01NAM),
47		#' table(AEDECOD, CQ01NAM)
48		#' )
49		#' )
50		radae <- function(adsl,
51		max_n_aes = 10L,
52		lookup = NULL,
53		lookup_aag = NULL,
54		seed = NULL,
55		na_percentage = 0,
56		na_vars = list(
57		AEBODSYS = c(NA, 0.1),
58		AEDECOD = c(1234, 0.1),
59		AETOXGR = c(1234, 0.1)
60		),
61		cached = FALSE) {
62	4x	checkmate::assert_flag(cached)
63	4x	if (cached) {
64	1x	return(get_cached_data("cadae"))
65		}
66
67	3x	checkmate::assert_data_frame(adsl)
68	3x	checkmate::assert_integer(max_n_aes, len = 1, any.missing = FALSE)
69	3x	checkmate::assert_number(seed, null.ok = TRUE)
70	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
71	3x	checkmate::assert_true(na_percentage < 1)
72
73		# check lookup parameters
74	3x	checkmate::assert_data_frame(lookup, null.ok = TRUE)
75	3x	lookup_ae <- if (!is.null(lookup)) {
76	!	lookup
77		} else {
78	3x	tibble::tribble(
79	3x	~AEBODSYS, ~AELLT, ~AEDECOD, ~AEHLT, ~AEHLGT, ~AETOXGR, ~AESOC, ~AESER, ~AEREL,
80	3x	"cl A.1", "llt A.1.1.1.1", "dcd A.1.1.1.1", "hlt A.1.1.1", "hlgt A.1.1", "1", "cl A", "N", "N",
81	3x	"cl A.1", "llt A.1.1.1.2", "dcd A.1.1.1.2", "hlt A.1.1.1", "hlgt A.1.1", "2", "cl A", "Y", "N",
82	3x	"cl B.1", "llt B.1.1.1.1", "dcd B.1.1.1.1", "hlt B.1.1.1", "hlgt B.1.1", "5", "cl B", "Y", "Y",
83	3x	"cl B.2", "llt B.2.1.2.1", "dcd B.2.1.2.1", "hlt B.2.1.2", "hlgt B.2.1", "3", "cl B", "N", "N",
84	3x	"cl B.2", "llt B.2.2.3.1", "dcd B.2.2.3.1", "hlt B.2.2.3", "hlgt B.2.2", "1", "cl B", "Y", "N",
85	3x	"cl C.1", "llt C.1.1.1.3", "dcd C.1.1.1.3", "hlt C.1.1.1", "hlgt C.1.1", "4", "cl C", "N", "Y",
86	3x	"cl C.2", "llt C.2.1.2.1", "dcd C.2.1.2.1", "hlt C.2.1.2", "hlgt C.2.1", "2", "cl C", "N", "Y",
87	3x	"cl D.1", "llt D.1.1.1.1", "dcd D.1.1.1.1", "hlt D.1.1.1", "hlgt D.1.1", "5", "cl D", "Y", "Y",
88	3x	"cl D.1", "llt D.1.1.4.2", "dcd D.1.1.4.2", "hlt D.1.1.4", "hlgt D.1.1", "3", "cl D", "N", "N",
89	3x	"cl D.2", "llt D.2.1.5.3", "dcd D.2.1.5.3", "hlt D.2.1.5", "hlgt D.2.1", "1", "cl D", "N", "Y"
90		)
91		}
92
93	3x	checkmate::assert_data_frame(lookup_aag, null.ok = TRUE)
94	3x	aag <- if (!is.null(lookup_aag)) {
95	!	lookup_aag
96		} else {
97	3x	aag <- utils::read.table(
98	3x	sep = ",", header = TRUE,
99	3x	text = paste(
100	3x	"NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE",
101	3x	"CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd D.2.1.5.3,",
102	3x	"CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 AESI,dcd A.1.1.1.1,",
103	3x	"SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd C.1.1.1.3,BROAD",
104	3x	"SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 AESI,dcd B.2.2.3.1,BROAD",
105	3x	"SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Y.9.9.9.9,NARROW",
106	3x	"SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 AESI,dcd Z.9.9.9.9,NARROW",
107	3x	sep = "\n"
108	3x	), stringsAsFactors = FALSE
109		)
110		}
111
112	3x	if (!is.null(seed)) set.seed(seed)
113	3x	study_duration_secs <- lubridate::seconds(attr(adsl, "study_duration_secs"))
114
115	3x	adae <- Map(
116	3x	function(id, sid) {
117	30x	n_aes <- sample(c(0, seq_len(max_n_aes)), 1)
118	30x	i <- sample(seq_len(nrow(lookup_ae)), n_aes, TRUE)
119	30x	dplyr::mutate(
120	30x	lookup_ae[i, ],
121	30x	USUBJID = id,
122	30x	STUDYID = sid
123		)
124		},
125	3x	adsl$USUBJID,
126	3x	adsl$STUDYID
127		) %>%
128	3x	Reduce(rbind, .) %>%
129	3x	`[`(c(10, 11, 1, 2, 3, 4, 5, 6, 7, 8, 9)) %>%
130	3x	dplyr::mutate(AETERM = gsub("dcd", "trm", AEDECOD)) %>%
131	3x	dplyr::mutate(AESEV = dplyr::case_when(
132	3x	AETOXGR == 1 ~ "MILD",
133	3x	AETOXGR %in% c(2, 3) ~ "MODERATE",
134	3x	AETOXGR %in% c(4, 5) ~ "SEVERE"
135		))
136
137	3x	adae <- rcd_var_relabel(
138	3x	adae,
139	3x	STUDYID = "Study Identifier",
140	3x	USUBJID = "Unique Subject Identifier"
141		)
142
143		# merge adsl to be able to add AE date and study day variables
144	3x	adae <- dplyr::inner_join(adae, adsl, by = c("STUDYID", "USUBJID")) %>%
145	3x	dplyr::rowwise() %>%
146	3x	dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
147	3x	is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
148	3x	TRUE ~ TRTEDTM
149		))) %>%
150	3x	dplyr::mutate(ASTDTM = sample(
151	3x	seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
152	3x	size = 1
153		)) %>%
154	3x	dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>%
155		# add 1 to end of range incase both values passed to sample() are the same
156	3x	dplyr::mutate(AENDTM = sample(
157	3x	seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"),
158	3x	size = 1
159		)) %>%
160	3x	dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>%
161	3x	dplyr::mutate(LDOSEDTM = dplyr::case_when(
162	3x	TRTSDTM < ASTDTM ~ lubridate::as_datetime(stats::runif(1, TRTSDTM, ASTDTM)),
163	3x	TRUE ~ ASTDTM
164		)) %>%
165	3x	dplyr::mutate(LDRELTM = as.numeric(difftime(ASTDTM, LDOSEDTM, units = "mins"))) %>%
166	3x	dplyr::select(-TRTENDT) %>%
167	3x	dplyr::ungroup() %>%
168	3x	dplyr::arrange(STUDYID, USUBJID, ASTDTM, AETERM)
169
170	3x	adae <- adae %>%
171	3x	dplyr::group_by(USUBJID) %>%
172	3x	dplyr::mutate(AESEQ = seq_len(dplyr::n())) %>%
173	3x	dplyr::mutate(ASEQ = AESEQ) %>%
174	3x	dplyr::ungroup() %>%
175	3x	dplyr::arrange(
176	3x	STUDYID,
177	3x	USUBJID,
178	3x	ASTDTM,
179	3x	AETERM,
180	3x	AESEQ
181		)
182
183	3x	outcomes <- c(
184	3x	"UNKNOWN",
185	3x	"NOT RECOVERED/NOT RESOLVED",
186	3x	"RECOVERED/RESOLVED WITH SEQUELAE",
187	3x	"RECOVERING/RESOLVING",
188	3x	"RECOVERED/RESOLVED"
189		)
190
191	3x	actions <- c(
192	3x	"DOSE RATE REDUCED",
193	3x	"UNKNOWN",
194	3x	"NOT APPLICABLE",
195	3x	"DRUG INTERRUPTED",
196	3x	"DRUG WITHDRAWN",
197	3x	"DOSE INCREASED",
198	3x	"DOSE NOT CHANGED",
199	3x	"DOSE REDUCED",
200	3x	"NOT EVALUABLE"
201		)
202
203	3x	adae <- adae %>%
204	3x	dplyr::mutate(AEOUT = factor(ifelse(
205	3x	AETOXGR == "5",
206	3x	"FATAL",
207	3x	as.character(sample_fct(outcomes, nrow(adae), prob = c(0.1, 0.2, 0.1, 0.3, 0.3)))
208		))) %>%
209	3x	dplyr::mutate(AEACN = factor(ifelse(
210	3x	AETOXGR == "5",
211	3x	"NOT EVALUABLE",
212	3x	as.character(sample_fct(actions, nrow(adae), prob = c(0.05, 0.05, 0.05, 0.01, 0.05, 0.1, 0.45, 0.1, 0.05)))
213		))) %>%
214	3x	dplyr::mutate(AESDTH = dplyr::case_when(
215	3x	AEOUT == "FATAL" ~ "Y",
216	3x	TRUE ~ "N"
217		)) %>%
218	3x	dplyr::mutate(TRTEMFL = ifelse(ASTDTM >= TRTSDTM, "Y", "")) %>%
219	3x	dplyr::mutate(AECONTRT = sample(c("Y", "N"), prob = c(0.4, 0.6), size = dplyr::n(), replace = TRUE)) %>%
220	3x	dplyr::mutate(
221	3x	ANL01FL = ifelse(TRTEMFL == "Y" & ASTDTM <= TRTEDTM + lubridate::month(1), "Y", "")
222		) %>%
223	3x	dplyr::mutate(ANL01FL = ifelse(is.na(ANL01FL), "", ANL01FL))
224
225	3x	adae <- adae %>%
226	3x	dplyr::mutate(AERELNST = sample(c("Y", "N"), prob = c(0.4, 0.6), size = dplyr::n(), replace = TRUE)) %>%
227	3x	dplyr::mutate(AEACNOTH = sample(
228	3x	x = c("MEDICATION", "PROCEDURE/SURGERY", "SUBJECT DISCONTINUED FROM STUDY", "NONE"),
229	3x	prob = c(0.2, 0.4, 0.2, 0.2),
230	3x	size = dplyr::n(),
231	3x	replace = TRUE
232		))
233
234		# Split metadata for AEs of special interest (AESI).
235	3x	l_aag <- split(aag, interaction(aag$NAMVAR, aag$SRCVAR, aag$GRPTYPE, drop = TRUE))
236
237		# Create AESI flags
238	3x	l_aesi <- lapply(l_aag, function(d_adag, d_adae) {
239	9x	names(d_adag)[names(d_adag) == "REFTERM"] <- d_adag$SRCVAR[1]
240	9x	names(d_adag)[names(d_adag) == "REFNAME"] <- d_adag$NAMVAR[1]
241
242	9x	if (d_adag$GRPTYPE[1] == "CUSTOM") {
243	3x	d_adag <- d_adag[-which(names(d_adag) == "SCOPE")]
244	6x	} else if (d_adag$GRPTYPE[1] == "SMQ") {
245	6x	names(d_adag)[names(d_adag) == "SCOPE"] <- paste0(substr(d_adag$NAMVAR[1], 1, 5), "SC")
246		}
247
248	9x	d_adag <- d_adag[-which(names(d_adag) %in% c("NAMVAR", "SRCVAR", "GRPTYPE"))]
249	9x	d_new <- dplyr::left_join(x = d_adae, y = d_adag, by = intersect(names(d_adae), names(d_adag)))
250	9x	d_new[, dplyr::setdiff(names(d_new), names(d_adae)), drop = FALSE]
251	3x	}, adae)
252
253	3x	adae <- dplyr::bind_cols(adae, l_aesi)
254
255	3x	adae <- dplyr::mutate(adae, AERELNST = sample(
256	3x	x = c("CONCURRENT ILLNESS", "OTHER", "DISEASE UNDER STUDY", "NONE"),
257	3x	prob = c(0.3, 0.3, 0.3, 0.1),
258	3x	size = dplyr::n(),
259	3x	replace = TRUE
260		))
261
262
263	3x	adae <- adae %>%
264	3x	dplyr::mutate(AES_FLAG = sample(
265	3x	x = c("AESLIFE", "AESHOSP", "AESDISAB", "AESCONG", "AESMIE"),
266	3x	prob = c(0.1, 0.2, 0.2, 0.2, 0.3),
267	3x	size = dplyr::n(),
268	3x	replace = TRUE
269		)) %>%
270	3x	dplyr::mutate(AES_FLAG = dplyr::case_when(
271	3x	AESDTH == "Y" ~ "AESDTH",
272	3x	TRUE ~ AES_FLAG
273		)) %>%
274	3x	dplyr::mutate(
275	3x	AESCONG = ifelse(AES_FLAG == "AESCONG", "Y", "N"),
276	3x	AESDISAB = ifelse(AES_FLAG == "AESDISAB", "Y", "N"),
277	3x	AESHOSP = ifelse(AES_FLAG == "AESHOSP", "Y", "N"),
278	3x	AESLIFE = ifelse(AES_FLAG == "AESLIFE", "Y", "N"),
279	3x	AESMIE = ifelse(AES_FLAG == "AESMIE", "Y", "N")
280		) %>%
281	3x	dplyr::select(-"AES_FLAG")
282
283	3x	if (length(na_vars) > 0 && na_percentage > 0) {
284	!	adae <- mutate_na(ds = adae, na_vars = na_vars, na_percentage = na_percentage)
285		}
286
287		# apply metadata
288	3x	adae <- apply_metadata(adae, "metadata/ADAE.yml")
289
290	3x	return(adae)
291		}

1		#' Pharmacokinetics Parameters Dataset (ADPP)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Pharmacokinetics Parameters Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per study, subject, parameter category, parameter and visit.
9		#'
10		#' @inheritParams argument_convention
11		#' @param ppcat (`character vector`)\cr Categories of parameters.
12		#' @param ppspec (`character vector`)\cr Specimen material types.
13		#' @template param_cached
14		#' @templateVar data adpp
15		#'
16		#' @return `data.frame`
17		#' @export
18		#'
19		#' @examples
20		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
21		#'
22		#' adpp <- radpp(adsl, seed = 2)
23		#' adpp
24		radpp <- function(adsl,
25		ppcat = c("Plasma Drug X", "Plasma Drug Y", "Metabolite Drug X", "Metabolite Drug Y"),
26		ppspec = c(
27		"Plasma", "Plasma", "Plasma", "Matrix of PD", "Matrix of PD",
28		"Urine", "Urine", "Urine", "Urine"
29		),
30		paramcd = c(
31		"AUCIFO", "CMAX", "CLO", "RMAX", "TON",
32		"RENALCL", "RENALCLD", "RCAMINT", "RCPCINT"
33		),
34		param = c(
35		"AUC Infinity Obs", "Max Conc", "Total CL Obs", "Time of Maximum Response",
36		"Time to Onset", "Renal CL", "Renal CL Norm by Dose",
37		"Amt Rec from T1 to T2", "Pct Rec from T1 to T2"
38		),
39		paramu = c("day*ug/mL", "ug/mL", "ml/day/kg", "hr", "hr", "L/hr", "L/hr/mg", "mg", "%"),
40		aval_mean = c(200, 30, 5, 10, 3, 0.05, 0.005, 1.5613, 15.65),
41		visit_format = "CYCLE",
42		n_days = 2L,
43		seed = NULL,
44		na_percentage = 0,
45		na_vars = list(
46		AVAL = c(NA, 0.1)
47		),
48		cached = FALSE) {
49	4x	checkmate::assert_flag(cached)
50	4x	if (cached) {
51	1x	return(get_cached_data("cadlb"))
52		}
53
54	3x	checkmate::assert_character(ppcat)
55	3x	checkmate::assert_character(ppspec)
56	3x	checkmate::assert_character(paramcd)
57	3x	checkmate::assert_character(param)
58	3x	checkmate::assert_character(paramu)
59	3x	checkmate::assert_vector(aval_mean)
60	3x	checkmate::assert_string(visit_format)
61	3x	checkmate::assert_integer(n_days)
62	3x	checkmate::assert_number(seed, null.ok = TRUE)
63	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
64	3x	checkmate::assert_true(na_percentage < 1)
65	3x	checkmate::assert_list(na_vars)
66
67	3x	checkmate::assertTRUE(length(ppspec) == length(paramcd))
68	3x	checkmate::assertTRUE(length(ppspec) == length(param))
69	3x	checkmate::assertTRUE(length(ppspec) == length(paramu))
70	3x	checkmate::assertTRUE(length(ppspec) == length(aval_mean))
71
72	3x	if (!is.null(seed)) {
73	3x	set.seed(seed)
74		}
75
76		# validate and initialize related variables
77	3x	ppspec_init_list <- relvar_init(param, ppspec)
78	3x	param_init_list <- relvar_init(param, paramcd)
79	3x	unit_init_list <- relvar_init(param, paramu)
80
81	3x	adpp <- expand.grid(
82	3x	STUDYID = unique(adsl$STUDYID),
83	3x	USUBJID = adsl$USUBJID,
84	3x	PPCAT = as.factor(ppcat),
85	3x	PARAM = as.factor(param_init_list$relvar1),
86	3x	AVISIT = visit_schedule(visit_format = visit_format, n_assessments = 1L, n_days = n_days),
87	3x	stringsAsFactors = FALSE
88		)
89	3x	adpp <- adpp %>%
90	3x	dplyr::mutate(AVAL = stats::rnorm(nrow(adpp), mean = 1, sd = 0.2)) %>%
91	3x	dplyr::left_join(data.frame(PARAM = param, ADJUST = aval_mean), by = "PARAM") %>%
92	3x	dplyr::mutate(AVAL = AVAL * ADJUST) %>%
93	3x	dplyr::select(-"ADJUST")
94
95		# assign related variable values: PARAMxPPSPEC are related
96	3x	adpp <- adpp %>% rel_var(
97	3x	var_name = "PPSPEC",
98	3x	related_var = "PARAM",
99	3x	var_values = ppspec_init_list$relvar2
100		)
101
102		# assign related variable values: PARAMxPARAMCD are related
103	3x	adpp <- adpp %>% rel_var(
104	3x	var_name = "PARAMCD",
105	3x	related_var = "PARAM",
106	3x	var_values = param_init_list$relvar2
107		)
108
109		# assign related variable values: PARAMxAVALU are related
110	3x	adpp <- adpp %>% rel_var(
111	3x	var_name = "AVALU",
112	3x	related_var = "PARAM",
113	3x	var_values = unit_init_list$relvar2
114		)
115
116		# derive AVISITN based AVISIT and AVALC based on AVAL
117	3x	adpp <- adpp %>%
118	3x	dplyr::mutate(AVALC = as.character(AVAL)) %>%
119	3x	dplyr::mutate(
120	3x	AVISITN = dplyr::case_when(
121	3x	AVISIT == "SCREENING" ~ 0,
122	3x	(grepl("^WEEK", AVISIT) \| grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 1,
123	3x	TRUE ~ NA_real_
124		)
125		)
126
127		# derive REGIMEN variable
128	3x	adpp <- adpp %>% dplyr::mutate(REGIMEN = "BID")
129
130		# derive PPSTINT and PPENINT based on PARAMCD
131	3x	t1_t2 <- data.frame(
132	3x	PARAMCD = c("RCAMINT", "RCAMINT", "RCPCINT", "RCPCINT"),
133	3x	PPSTINT = c("P0H", "P0H", "P0H", "P0H"),
134	3x	PPENINT = c("P12H", "P24H", "P12H", "P24H")
135		)
136	3x	adpp <- adpp %>%
137	3x	dplyr::left_join(t1_t2, by = c("PARAMCD"), multiple = "all", relationship = "many-to-many")
138
139	3x	adpp <- dplyr::inner_join(adpp, adsl, by = c("STUDYID", "USUBJID")) %>%
140	3x	dplyr::filter(
141	3x	ACTARM != "B: Placebo",
142	3x	!(ACTARM == "A: Drug X" & (PPCAT == "Plasma Drug Y" \| PPCAT == "Metabolite Drug Y"))
143		)
144
145		# derive PKARMCD column for creating more cohorts
146	3x	adpp <- adpp %>%
147	3x	dplyr::mutate(PKARMCD = factor(1 + (seq_len(nrow(adpp)) - 1) %/% (nrow(adpp) / 10), labels = c(
148	3x	"Drug A", "Drug B", "Drug C", "Drug D", "Drug E", "Drug F", "Drug G", "Drug H",
149	3x	"Drug I", "Drug J"
150		)))
151
152	3x	if (length(na_vars) > 0 && na_percentage > 0) {
153	!	adpp <- mutate_na(ds = adpp, na_vars = na_vars, na_percentage = na_percentage)
154		}
155
156	3x	adpp <- apply_metadata(adpp, "metadata/ADPP.yml")
157	3x	return(adpp)
158		}

1		#' Generate Anthropometric Measurements for Males and Females.
2		#'
3		#' Anthropometric measurements are randomly generated using normal approximation.
4		#' The default mean and standard deviation values used are based on US National Health
5		#' Statistics for adults aged 20 years or over. The measurements are generated in same units
6		#' as provided to the function.
7		#'
8		#' @details One record per subject.
9		#'
10		#' @inheritParams argument_convention
11		#' @param df (`data.frame`)\cr Analysis dataset.
12		#' @param id_var (`character`)\cr Patient identifier variable name.
13		#' @param sex_var (`character`)\cr Name of variable representing sex of patient.
14		#' @param sex_var_level_male (`character`)\cr Level of `sex_var` representing males.
15		#' @param male_weight_in_kg (named `list`)\cr List of means and SDs of male weights in kilograms.
16		#' @param female_weight_in_kg (named `list`)\cr List of means and SDs of female weights in kilograms.
17		#' @param male_height_in_m (named `list`)\cr List of means and SDs of male heights in metres.
18		#' @param female_height_in_m (named `list`)\cr list of means and SDs of female heights in metres.
19		#'
20		#' @return a dataframe with anthropometric measurements for each subject in analysis dataset.
21		#' @keywords internal
22		h_anthropometrics_by_sex <- function(df,
23		seed = 1,
24		id_var = "USUBJID",
25		sex_var = "SEX",
26		sex_var_level_male = "M",
27		male_weight_in_kg = list(mean = 90.6, sd = 44.9),
28		female_weight_in_kg = list(mean = 77.5, sd = 46.2),
29		male_height_in_m = list(mean = 1.75, sd = 0.14),
30		female_height_in_m = list(mean = 1.61, sd = 0.24)) {
31	3x	checkmate::assert_data_frame(df)
32	3x	checkmate::assert_string(id_var)
33	3x	checkmate::assert_string(sex_var)
34	3x	checkmate::assert_string(sex_var_level_male)
35	3x	checkmate::assert_list(male_weight_in_kg, types = "numeric")
36	3x	checkmate::assert_subset(names(male_weight_in_kg), choices = c("mean", "sd"))
37	3x	checkmate::assert_list(female_weight_in_kg, types = "numeric")
38	3x	checkmate::assert_subset(names(female_weight_in_kg), choices = c("mean", "sd"))
39	3x	checkmate::assert_list(male_height_in_m, types = "numeric")
40	3x	checkmate::assert_subset(names(male_height_in_m), choices = c("mean", "sd"))
41	3x	checkmate::assert_list(female_height_in_m, types = "numeric")
42	3x	checkmate::assert_subset(names(female_height_in_m), choices = c("mean", "sd"))
43
44
45	3x	n <- length(unique(df[[id_var]]))
46	3x	set.seed(seed)
47
48	3x	df_by_sex <- unique(subset(df, select = c(id_var, sex_var)))
49
50	3x	df_with_measurements <- df_by_sex %>%
51	3x	dplyr::mutate(
52	3x	WEIGHT = ifelse(
53	3x	.data[[sex_var]] == sex_var_level_male,
54	3x	stats::rnorm(n = n, mean = male_weight_in_kg$mean, sd = male_weight_in_kg$sd),
55	3x	stats::rnorm(n = n, mean = female_weight_in_kg$mean, sd = female_weight_in_kg$sd)
56		)
57		) %>%
58	3x	dplyr::mutate(
59	3x	HEIGHT = ifelse(
60	3x	.data[[sex_var]] == sex_var_level_male,
61	3x	stats::rnorm(n = n, mean = male_height_in_m$mean, sd = male_height_in_m$sd),
62	3x	stats::rnorm(n = n, mean = female_height_in_m$mean, sd = female_height_in_m$sd)
63		)
64		) %>%
65	3x	dplyr::mutate(
66	3x	BMI = WEIGHT / ((HEIGHT)^2)
67		)
68
69	3x	return(df_with_measurements)
70		}
71
72		#' Subcategory Analysis Dataset (ADSUB)
73		#'
74		#' @description `r lifecycle::badge("stable")`
75		#'
76		#' Function for generating a random Subcategory Analysis Dataset for a given
77		#' Subject-Level Analysis Dataset.
78		#'
79		#' @details One record per subject.
80		#'
81		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `AVISITN`, `ADTM`, `SRCSEQ`
82		#'
83		#' @inheritParams argument_convention
84		#' @template param_cached
85		#' @templateVar data adsub
86		#'
87		#' @return `data.frame`
88		#' @export
89		#'
90		#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc
91		#'
92		#' @examples
93		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
94		#'
95		#' adsub <- radsub(adsl, seed = 2)
96		#' adsub
97		radsub <- function(adsl,
98		param = c(
99		"Baseline Weight",
100		"Baseline Height",
101		"Baseline BMI",
102		"Baseline ECOG",
103		"Baseline Biomarker Mutation"
104		),
105		paramcd = c("BWGHTSI", "BHGHTSI", "BBMISI", "BECOG", "BBMRKR1"),
106		seed = NULL,
107		na_percentage = 0,
108		na_vars = list(),
109		cached = FALSE) {
110	4x	checkmate::assert_flag(cached)
111	4x	if (cached) {
112	1x	return(get_cached_data("cadsub"))
113		}
114
115	3x	checkmate::assert_data_frame(adsl)
116	3x	checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
117	3x	checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
118	3x	checkmate::assert_number(seed, null.ok = TRUE)
119	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
120	3x	checkmate::assert_true(na_percentage < 1)
121
122		# Validate and initialize related variables.
123	3x	param_init_list <- relvar_init(param, paramcd)
124
125	3x	if (!is.null(seed)) {
126	3x	set.seed(seed)
127		}
128
129	3x	adsub <- expand.grid(
130	3x	STUDYID = unique(adsl$STUDYID),
131	3x	USUBJID = adsl$USUBJID,
132	3x	PARAM = as.factor(param_init_list$relvar1),
133	3x	AVISIT = "BASELINE",
134	3x	stringsAsFactors = FALSE
135		)
136
137		# Assign related variable values: PARAM and PARAMCD are related.
138	3x	adsub <- adsub %>% rel_var(
139	3x	var_name = "PARAMCD",
140	3x	related_var = "PARAM",
141	3x	var_values = param_init_list$relvar2
142		)
143
144	3x	adsub <- adsub[order(adsub$STUDYID, adsub$USUBJID, adsub$PARAMCD), ]
145
146	3x	adsub <- rcd_var_relabel(
147	3x	adsub,
148	3x	STUDYID = "Study Identifier",
149	3x	USUBJID = "Unique Subject Identifier"
150		)
151
152		# Merge ADSL to be able to add EG date and study day variables.
153		# Sample ADTM to be a few days before TRTSDTM.
154	3x	adsub <- dplyr::inner_join(
155	3x	adsub,
156	3x	adsl,
157	3x	by = c("STUDYID", "USUBJID")
158		) %>%
159	3x	dplyr::group_by(USUBJID) %>%
160	3x	dplyr::mutate(ADTM = rep(
161	3x	lubridate::date(TRTSDTM)[1] - lubridate::days(sample(1:10, size = 1)),
162	3x	each = n()
163		)) %>%
164	3x	dplyr::ungroup() %>%
165	3x	dplyr::arrange(STUDYID, USUBJID, ADTM)
166
167		# Generate a dataset with height, weight and BMI measurements for each subject.
168	3x	if (!is.null(seed)) {
169	3x	df_with_measurements <- h_anthropometrics_by_sex(adsub, seed = seed)
170		} else {
171	!	df_with_measurements <- h_anthropometrics_by_sex(adsub)
172		}
173
174		# Add this to adsub and create other measurements.
175	3x	adsub <- adsub %>%
176	3x	dplyr::group_by(USUBJID) %>%
177	3x	dplyr::mutate(
178	3x	AVAL = dplyr::case_when(
179	3x	PARAMCD ==
180	3x	"BWGHTSI" ~ df_with_measurements$WEIGHT[df_with_measurements$USUBJID == USUBJID],
181	3x	PARAMCD ==
182	3x	"BHGHTSI" ~ df_with_measurements$HEIGHT[df_with_measurements$USUBJID == USUBJID],
183	3x	PARAMCD ==
184	3x	"BBMISI" ~ df_with_measurements$BMI[df_with_measurements$USUBJID == USUBJID],
185	3x	PARAMCD == "BECOG" ~ sample(c(0, 1, 2, 3, 4, 5), 1),
186	3x	PARAMCD == "BBMRKR1" ~ sample(c(1, 2), prob = c(0.5, 0.5), 1)
187		)
188		) %>%
189	3x	dplyr::arrange(PARAMCD) %>%
190	3x	dplyr::ungroup() %>%
191	3x	dplyr::mutate(AVAL = dplyr::case_when(
192	3x	PARAMCD != "BBMRKR1" \| PARAMCD != "BECOG" ~ round(AVAL, 1),
193	3x	TRUE ~ round(AVAL)
194		))
195
196	3x	adsub <- adsub %>%
197	3x	dplyr::mutate(
198	3x	AVALC = dplyr::case_when(
199	3x	PARAMCD == "BBMRKR1" ~ dplyr::case_when(
200	3x	AVAL == "1" ~ "WILD TYPE",
201	3x	AVAL == "2" ~ "MUTANT",
202	3x	TRUE ~ ""
203		),
204	3x	TRUE ~ as.character(AVAL)
205		),
206	3x	AVALU = dplyr::case_when(
207	3x	PARAMCD == "BWGHTSI" ~ "kg",
208	3x	PARAMCD == "BHGHTSI" ~ "m",
209	3x	PARAMCD == "BBMISI" ~ "kg/m2",
210	3x	TRUE ~ ""
211		),
212	3x	AVALCAT1 = dplyr::case_when(
213	3x	PARAMCD == "BBMISI" ~ dplyr::case_when(
214	3x	AVAL < 18.5 ~ "<18.5",
215	3x	AVAL >= 18.5 & AVAL < 25 ~ "18.5 - 24.9",
216	3x	AVAL >= 25 & AVAL < 30 ~ "25 - 29.9",
217	3x	TRUE ~ ">30"
218		),
219	3x	PARAMCD == "BECOG" ~ dplyr::case_when(
220	3x	AVAL <= 1 ~ "0-1",
221	3x	AVAL > 1 & AVAL <= 3 ~ "2-3",
222	3x	TRUE ~ "4-5"
223		),
224	3x	TRUE ~ ""
225		),
226	3x	AVISITN = "0",
227	3x	SRCSEQ = "1"
228		) %>%
229	3x	dplyr::arrange(
230	3x	USUBJID,
231	3x	factor(PARAMCD, levels = c("BWGHTSI", "BHGHTSI", "BBMISI", "BECOG", "BBMRKR1"))
232		)
233
234	3x	if (length(na_vars) > 0 && na_percentage > 0) {
235	!	adsub <- mutate_na(ds = adsub, na_vars = na_vars, na_percentage = na_percentage)
236		}
237
238		# Apply metadata.
239	3x	adsub <- apply_metadata(adsub, "metadata/ADSUB.yml")
240
241	3x	return(adsub)
242		}

1		#' Anti-Drug Antibody Analysis Dataset (ADAB)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Anti-Drug Antibody Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset and Pharmacokinetics Analysis Dataset.
7		#'
8		#' @inheritParams argument_convention
9		#' @inheritParams radpc
10		#' @param adpc (`data.frame`)\cr Pharmacokinetics Analysis Dataset.
11		#' @template param_cached
12		#' @templateVar data adab
13		#'
14		#' @return `data.frame`
15		#' @export
16		#'
17		#' @details One record per study per subject per parameter per time point: "R1800000", "RESULT1", "R1800001", "RESULT2".
18		#'
19		#' @examples
20		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
21		#' adpc <- radpc(adsl, seed = 2, duration = 9 * 7)
22		#'
23		#' adab <- radab(adsl, adpc, seed = 2)
24		#' adab
25		radab <- function(adsl,
26		adpc,
27		constants = c(D = 100, ka = 0.8, ke = 1),
28		paramcd = c(
29		"R1800000", "RESULT1", "R1800001", "RESULT2", "ADASTAT1", "INDUCD1", "ENHANC1",
30		"TRUNAFF1", "EMERNEG1", "EMERPOS1", "PERSADA1", "TRANADA1", "BFLAG1", "TIMADA1",
31		"ADADUR1", "ADASTAT2", "INDUCD2", "ENHANC2", "EMERNEG2", "EMERPOS2", "BFLAG2",
32		"TRUNAFF2"
33		),
34		param = c(
35		"Antibody titer units", "ADA interpreted per sample result",
36		"Neutralizing Antibody titer units", "NAB interpreted per sample result",
37		"ADA Status of a patient", "Treatment induced ADA", "Treatment enhanced ADA",
38		"Treatment unaffected", "Treatment Emergent - Negative",
39		"Treatment Emergent - Positive", "Persistent ADA", "Transient ADA", "Baseline",
40		"Time to onset of ADA", "ADA Duration", "NAB Status of a patient",
41		"Treatment induced ADA, Neutralizing Antibody",
42		"Treatment enhanced ADA, Neutralizing Antibody",
43		"Treatment Emergent - Negative, Neutralizing Antibody",
44		"Treatment Emergent - Positive, Neutralizing Antibody",
45		"Baseline, Neutralizing Antibody",
46		"Treatment unaffected, Neutralizing Antibody"
47		),
48		avalu = c(
49		"titer", "", "titer", "", "", "", "", "", "", "", "", "", "", "weeks", "weeks",
50		"", "", "", "", "", "", ""
51		),
52		seed = NULL,
53		na_percentage = 0,
54		na_vars = list(
55		AVAL = c(NA, 0.1)
56		),
57		cached = FALSE) {
58	4x	checkmate::assert_flag(cached)
59	4x	if (cached) {
60	1x	return(get_cached_data("cadab"))
61		}
62
63	3x	checkmate::assert_data_frame(adpc)
64	3x	checkmate::assert_subset(names(constants), c("D", "ka", "ke"))
65	3x	checkmate::assert_number(seed, null.ok = TRUE)
66	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE)
67	3x	checkmate::assert_list(na_vars)
68	3x	checkmate::assert_character(paramcd)
69	3x	checkmate::assert_character(param, len = length(paramcd))
70	3x	checkmate::assert_character(avalu, len = length(paramcd))
71	3x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
72	3x	checkmate::assert_true(na_percentage < 1)
73
74	3x	if (!is.null(seed)) {
75	3x	set.seed(seed)
76		}
77
78		# validate and initialize related variables
79	3x	param_init_list <- relvar_init(param, paramcd)
80	3x	unit_init_list <- relvar_init(param, avalu)
81
82	3x	adpc <- adpc %>% dplyr::filter(ASMED == "PLASMA")
83	3x	adab0 <- expand.grid(
84	3x	STUDYID = unique(adsl$STUDYID),
85	3x	USUBJID = unique(adsl$USUBJID),
86	3x	VISIT = unique(adpc$VISIT),
87	3x	PARAM = as.factor(param_init_list$relvar1[c(1:4)]),
88	3x	PARCAT1 = "A: Drug X Antibody",
89	3x	stringsAsFactors = FALSE
90		)
91		# Set random values for observations
92	3x	visit_lvl_params <- c(
93	3x	"Antibody titer units", "Neutralizing Antibody titer units",
94	3x	"ADA interpreted per sample result", "NAB interpreted per sample result"
95		)
96	3x	aval_random <- stats::rnorm(nrow(unique(adab0 %>% dplyr::select(USUBJID, VISIT))), mean = 1, sd = 0.2)
97	3x	aval_random <- cbind(unique(adab0 %>% dplyr::select(USUBJID, VISIT)), AVAL1 = aval_random)
98
99	3x	adab_visit <- adab0 %>% dplyr::left_join(aval_random, by = c("USUBJID", "VISIT"))
100	3x	adab_visit <- adab_visit %>%
101	3x	dplyr::mutate(
102	3x	AVAL2 = ifelse(AVAL1 >= 1, AVAL1, NA),
103	3x	AVALC = dplyr::case_when(
104	3x	!is.na(AVAL2) ~ "POSITIVE",
105	3x	is.na(AVAL2) ~ "NEGATIVE"
106		),
107	3x	AVAL = dplyr::case_when(
108	3x	(PARAM %in% visit_lvl_params[3:4] & !is.na(AVAL2)) ~ 1,
109	3x	(PARAM %in% visit_lvl_params[3:4] & is.na(AVAL2)) ~ 0,
110	3x	(PARAM %in% visit_lvl_params[1:2] & !is.na(AVAL2)) ~ AVAL2,
111	3x	TRUE ~ as.numeric(NA)
112		)
113		) %>%
114	3x	dplyr::select(-c(AVAL1, AVAL2))
115
116		# retrieve other variables from adpc
117	3x	adab_visit <- adab_visit %>%
118	3x	dplyr::inner_join(
119	3x	adpc %>%
120	3x	dplyr::filter(PCTPT %in% c("Predose", "24H")) %>%
121	3x	dplyr::select(
122	3x	STUDYID,
123	3x	USUBJID,
124	3x	VISIT,
125	3x	PCTPT,
126	3x	ARM,
127	3x	ACTARM,
128	3x	VISITDY,
129	3x	AFRLT,
130	3x	NFRLT,
131	3x	ARRLT,
132	3x	NRRLT,
133	3x	RELTMU
134		) %>%
135	3x	unique(),
136	3x	by = c("STUDYID", "USUBJID", "VISIT")
137		) %>%
138	3x	rename(ISTPT = PCTPT)
139
140		# mutate time from dose variables from adpc to convert into Days
141	3x	adab_visit <- adab_visit %>% dplyr::mutate_at(c("AFRLT", "NFRLT", "ARRLT", "NRRLT"), ~ . / 24)
142
143
144
145		# Set random values for subject level paramaters (Y/N)
146
147	3x	adab1 <- expand.grid(
148	3x	STUDYID = unique(adsl$STUDYID),
149	3x	USUBJID = unique(adpc$USUBJID),
150	3x	VISIT = NA,
151	3x	PARAM = as.factor(param_init_list$relvar1[c(5:13, 16:22)]),
152	3x	PARCAT1 = "A: Drug X Antibody",
153	3x	stringsAsFactors = FALSE
154		)
155
156	3x	sub_lvl_params <- c(
157	3x	"ADA Status of a patient", "Treatment induced ADA", "Treatment enhanced ADA",
158	3x	"Treatment unaffected", "Treatment Emergent - Negative",
159	3x	"Treatment Emergent - Positive", "Persistent ADA", "Transient ADA", "Baseline",
160		# "Time to onset of ADA", "ADA Duration",
161	3x	"NAB Status of a patient",
162	3x	"Treatment induced ADA, Neutralizing Antibody",
163	3x	"Treatment enhanced ADA, Neutralizing Antibody",
164	3x	"Treatment Emergent - Negative, Neutralizing Antibody",
165	3x	"Treatment Emergent - Positive, Neutralizing Antibody",
166	3x	"Baseline, Neutralizing Antibody",
167	3x	"Treatment unaffected, Neutralizing Antibody"
168		)
169
170	3x	aval_random_sub <- stats::rbinom(nrow(unique(adab1 %>% dplyr::select(USUBJID))), 1, 0.5)
171	3x	aval_random_sub <- cbind(unique(adab1 %>% dplyr::select(USUBJID)), AVAL1 = aval_random_sub)
172
173	3x	adab_sub <- adab1 %>% dplyr::left_join(aval_random_sub, by = c("USUBJID"))
174	3x	adab_sub <- adab_sub %>%
175	3x	dplyr::mutate(
176	3x	AVAL = AVAL1,
177	3x	AVALC = dplyr::case_when(
178	3x	PARAM %in% c("ADA Status of a patient", "NAB Status of a patient") & AVAL1 == 1 ~ "POSITIVE",
179	3x	PARAM %in% c("ADA Status of a patient", "NAB Status of a patient") & AVAL1 == 0 ~ "NEGATIVE",
180	3x	!(PARAM %in% c("ADA Status of a patient", "NAB Status of a patient")) & AVAL1 == 1 ~ "Y",
181	3x	!(PARAM %in% c("ADA Status of a patient", "NAB Status of a patient")) & AVAL1 == 0 ~ "N"
182		)
183		) %>%
184	3x	dplyr::select(-c(AVAL1))
185
186		# Set random values for subject level paramaters (numeric)
187
188	3x	adab2 <- expand.grid(
189	3x	STUDYID = unique(adsl$STUDYID),
190	3x	USUBJID = unique(adpc$USUBJID),
191	3x	VISIT = NA,
192	3x	PARAM = as.factor(param_init_list$relvar1[c(14, 15)]),
193	3x	PARCAT1 = "A: Drug X Antibody",
194	3x	stringsAsFactors = FALSE
195		)
196
197	3x	sub_lvl_params_num <- c("Time to onset of ADA", "ADA Duration")
198
199	3x	aval_random_sub_num <- stats::rnorm(nrow(unique(adab2 %>% dplyr::select(USUBJID))), mean = 1, sd = 1)
200	3x	aval_random_sub_num <- cbind(unique(adab2 %>% dplyr::select(USUBJID)), AVAL1 = aval_random_sub_num)
201
202	3x	adab_sub_num <- adab2 %>% dplyr::left_join(aval_random_sub_num, by = c("USUBJID"))
203	3x	adab_sub_num <- adab_sub_num %>%
204	3x	dplyr::mutate(
205	3x	AVAL = ifelse(AVAL1 >= 1, round(AVAL1, 2), NA),
206	3x	AVALC = as.character(AVAL)
207		) %>%
208	3x	dplyr::select(-c(AVAL1))
209
210
211	3x	adab <- bind_rows(adab_visit, adab_sub, adab_sub_num)
212
213
214		# assign related variable values: PARAMxPARAMCD are related
215	3x	adab <- adab %>% rel_var(
216	3x	var_name = "PARAMCD",
217	3x	related_var = "PARAM",
218	3x	var_values = param_init_list$relvar2
219		)
220
221		# assign related variable values: PARAMxAVALU are related
222	3x	adab <- adab %>% rel_var(
223	3x	var_name = "AVALU",
224	3x	related_var = "PARAM",
225	3x	var_values = unit_init_list$relvar2
226		)
227
228
229	3x	adab <- adab %>%
230	3x	dplyr::mutate(
231	3x	RELTMU = "day",
232	3x	ABLFL = ifelse(!is.na(NFRLT) & NFRLT == 0, "Y", NA) # Baseline Record Flag
233		,
234	3x	ADABLPFL = ifelse(PARAMCD == "RESULT1" & !is.na(NFRLT) & NFRLT == 0, "Y", NA)
235		# Baseline ADA Eval. Param-Level Flag, only populate for ADA, not for NAB
236		,
237	3x	ADPBLPFL = ifelse(PARAMCD == "RESULT1" & !is.na(NFRLT) & NFRLT > 0 & !is.na(AVAL), "Y", NA)
238		# Post-Baseline ADA Eval. Param-Level Flag, only populate for ADA, not for NAB
239		) %>%
240	3x	dplyr::group_by(USUBJID) %>%
241	3x	dplyr::ungroup()
242
243		# create temporary flags to derive subject-level variables
244	3x	adab_subj <- adab %>%
245	3x	dplyr::group_by(USUBJID) %>%
246	3x	dplyr::mutate(
247	3x	pos_bl = any(PARAM == "ADA interpreted per sample result" & !is.na(ABLFL) & AVALC == "POSITIVE"),
248	3x	pos_bl_nab = any(PARAM == "NAB interpreted per sample result" & !is.na(ABLFL) & AVALC == "POSITIVE"),
249	3x	any_pos_postbl = any(PARAM == "ADA interpreted per sample result" & is.na(ABLFL) & AVALC == "POSITIVE"),
250	3x	any_pos_postbl_nab = any(PARAM == "NAB interpreted per sample result" & is.na(ABLFL) & AVALC == "POSITIVE"),
251	3x	pos_last_postbl = any(PARAM == "ADA interpreted per sample result" & NFRLT == max(NFRLT) & AVALC == "POSITIVE"),
252	3x	ada_bl = AVAL[PARAM == "Antibody titer units" & !is.na(ABLFL)],
253	3x	nab_bl = AVAL[PARAM == "Neutralizing Antibody titer units" & !is.na(ABLFL)]
254		)
255	3x	pos_tots <- adab_subj %>%
256	3x	dplyr::summarise(
257	3x	n_pos = sum(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"),
258	3x	inc_postbl = sum(PARAM == "ADA interpreted per sample result" & is.na(ABLFL) & (AVAL - ada_bl) > 0.60),
259	3x	inc_postbl_nab = sum(PARAM == "NAB interpreted per sample result" & is.na(ABLFL) & (AVAL - nab_bl) > 0.60),
260	3x	onset_ada = if (any(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE")) {
261	18x	min(NFRLT[PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"])
262		} else {
263	3x	NA
264		},
265	3x	last_ada = if (any(PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE")) {
266	18x	max(NFRLT[PARAM == "ADA interpreted per sample result" & AVALC == "POSITIVE"])
267		} else {
268	3x	NA
269		}
270		)
271	3x	adab_subj <- adab_subj %>%
272	3x	dplyr::left_join(pos_tots, by = "USUBJID") %>%
273	3x	dplyr::select(
274	3x	USUBJID,
275	3x	NFRLT,
276	3x	pos_bl,
277	3x	pos_bl_nab,
278	3x	any_pos_postbl,
279	3x	any_pos_postbl_nab,
280	3x	inc_postbl,
281	3x	inc_postbl_nab,
282	3x	pos_last_postbl,
283	3x	n_pos,
284	3x	onset_ada,
285	3x	last_ada
286		) %>%
287	3x	unique()
288
289		# add flags to ADAB dataset
290	3x	adab <- adab %>%
291	3x	dplyr::left_join(adab_subj, by = c("USUBJID", "NFRLT"))
292
293		# derive subject-level variables
294	3x	adab[!(adab$PARAM %in% visit_lvl_params), ] <- adab %>%
295	3x	dplyr::filter(!(PARAM %in% visit_lvl_params)) %>%
296	3x	dplyr::mutate(
297		# nolint start indentation_linter
298	3x	AVALC = dplyr::case_when(
299	3x	(PARAM == "ADA Status of a patient" & any_pos_postbl) ~ "POSITIVE",
300	3x	(PARAM == "ADA Status of a patient" & !any_pos_postbl) ~ "NEGATIVE",
301	3x	(PARAM == "Treatment induced ADA" & !pos_bl & any_pos_postbl) ~ "Y",
302	3x	(PARAM == "Treatment enhanced ADA" & pos_bl & inc_postbl > 0) ~ "Y",
303	3x	(PARAM == "Treatment unaffected" & pos_bl & (inc_postbl == 0 \| !any_pos_postbl)) ~ "Y",
304	3x	(PARAM == "Treatment Emergent - Positive" &
305	3x	((!pos_bl & any_pos_postbl) \| (pos_bl & inc_postbl > 0))) ~ "Y",
306	3x	(PARAM == "Treatment Emergent - Negative" &
307	3x	!((!pos_bl & any_pos_postbl) \| (pos_bl & inc_postbl > 0))) ~ "Y",
308	3x	(PARAM == "Persistent ADA" & pos_last_postbl) ~ "Y",
309	3x	(PARAM == "Transient ADA" &
310	3x	(n_pos - pos_bl - pos_last_postbl == 1 \| n_pos > 1)) ~ "Y",
311	3x	(PARAM == "Baseline" & pos_bl) ~ "POSITIVE",
312	3x	(PARAM == "Baseline" & !pos_bl) ~ "NEGATIVE",
313	3x	(PARAM == "Time to onset of ADA") ~ as.character(onset_ada / 7),
314	3x	(PARAM == "ADA Duration") ~ as.character((last_ada - onset_ada) / 7),
315	3x	(PARAM == "NAB Status of a patient" & any_pos_postbl_nab) ~ "POSITIVE",
316	3x	(PARAM == "NAB Status of a patient" & !any_pos_postbl_nab) ~ "NEGATIVE",
317	3x	(PARAM == "Treatment induced ADA, Neutralizing Antibody" &
318	3x	!pos_bl_nab & any_pos_postbl_nab) ~ "Y",
319	3x	(PARAM == "Treatment enhanced ADA, Neutralizing Antibody" &
320	3x	pos_bl_nab & inc_postbl_nab > 0) ~ "Y",
321	3x	(PARAM == "Baseline, Neutralizing Antibody" & pos_bl_nab) ~ "POSITIVE",
322	3x	(PARAM == "Baseline, Neutralizing Antibody" & !pos_bl_nab) ~ "NEGATIVE",
323	3x	(PARAM == "Treatment unaffected, Neutralizing Antibody" & pos_bl_nab &
324	3x	(inc_postbl_nab == 0 \| !any_pos_postbl_nab)) ~ "Y",
325	3x	(PARAM == "Treatment Emergent - Positive, Neutralizing Antibody" &
326	3x	((!pos_bl_nab & any_pos_postbl_nab) \| (pos_bl_nab & inc_postbl_nab > 0))) ~ "Y",
327	3x	(PARAM == "Treatment Emergent - Negative, Neutralizing Antibody" &
328	3x	!((!pos_bl_nab & any_pos_postbl_nab) \| (pos_bl_nab & inc_postbl_nab > 0))) ~ "Y",
329	3x	TRUE ~ "N"
330		),
331	3x	AVAL = dplyr::case_when(
332	3x	(PARAM == "ADA Status of a patient" & any_pos_postbl) ~ 1,
333	3x	(PARAM == "Treatment induced ADA" & !pos_bl & any_pos_postbl) ~ 1,
334	3x	(PARAM == "Treatment enhanced ADA" & pos_bl & inc_postbl > 0) ~ 1,
335	3x	(PARAM == "Treatment unaffected" & pos_bl & (inc_postbl == 0 \| !any_pos_postbl)) ~ 1,
336	3x	(PARAM == "Treatment Emergent - Positive" &
337	3x	((!pos_bl & any_pos_postbl) \| (pos_bl & inc_postbl > 0))) ~ 1,
338	3x	(PARAM == "Treatment Emergent - Negative" &
339	3x	!((!pos_bl & any_pos_postbl) \| (pos_bl & inc_postbl > 0))) ~ 1,
340	3x	(PARAM == "Persistent ADA" & pos_last_postbl) ~ 1,
341	3x	(PARAM == "Transient ADA" &
342	3x	(n_pos - ifelse(pos_bl, 1, 0) - ifelse(pos_last_postbl, 1, 0) == 1 \| n_pos > 1)) ~ 1,
343	3x	(PARAM == "Baseline" & pos_bl) ~ 1,
344	3x	(PARAM == "Time to onset of ADA") ~ onset_ada / 7,
345	3x	(PARAM == "ADA Duration") ~ (last_ada - onset_ada) / 7,
346	3x	(PARAM == "NAB Status of a patient" & any_pos_postbl_nab) ~ 1,
347	3x	(PARAM == "Treatment induced ADA, Neutralizing Antibody" &
348	3x	!pos_bl_nab & any_pos_postbl_nab) ~ 1,
349	3x	(PARAM == "Treatment enhanced ADA, Neutralizing Antibody" &
350	3x	pos_bl_nab & inc_postbl_nab > 0) ~ 1,
351	3x	(PARAM == "Baseline, Neutralizing Antibody" & pos_bl_nab) ~ 1,
352	3x	(PARAM == "Treatment unaffected, Neutralizing Antibody" & pos_bl_nab &
353	3x	(inc_postbl_nab == 0 \| !any_pos_postbl_nab)) ~ 1,
354	3x	(PARAM == "Treatment Emergent - Positive, Neutralizing Antibody" &
355	3x	((!pos_bl_nab & any_pos_postbl_nab) \| (pos_bl_nab & inc_postbl_nab > 0))) ~ 1,
356	3x	(PARAM == "Treatment Emergent - Negative, Neutralizing Antibody" &
357	3x	!((!pos_bl_nab & any_pos_postbl_nab) \| (pos_bl_nab & inc_postbl_nab > 0))) ~ 1,
358	3x	TRUE ~ 0
359		),
360		# nolint end indentation_linter
361	3x	PARCAT1 = dplyr::case_when(
362	3x	PARAM %in% c(
363	3x	"Neutralizing Antibody titer units", "NAB interpreted per sample result",
364	3x	"NAB Status of a patient", "Treatment induced ADA, Neutralizing Antibody",
365	3x	"Treatment enhanced ADA, Neutralizing Antibody",
366	3x	"Treatment Emergent - Negative, Neutralizing Antibody",
367	3x	"Treatment Emergent - Positive, Neutralizing Antibody",
368	3x	"Treatment unaffected, Neutralizing Antibody"
369	3x	) ~ "A: Drug X Neutralizing Antibody",
370	3x	TRUE ~ PARCAT1
371		)
372		)
373
374		# remove intermediate flag variables from adab
375	3x	adab <- adab %>%
376	3x	dplyr::select(-c(
377	3x	pos_bl,
378	3x	pos_bl_nab,
379	3x	any_pos_postbl,
380	3x	any_pos_postbl_nab,
381	3x	pos_last_postbl,
382	3x	inc_postbl,
383	3x	inc_postbl_nab,
384	3x	n_pos,
385	3x	onset_ada,
386	3x	last_ada
387		))
388
389		# Carry over ARM and ACTARM for all records.
390	3x	arm <- adab %>%
391	3x	filter(!is.na(ARM), !is.na(ACTARM)) %>%
392	3x	select(USUBJID, ARM, ACTARM) %>%
393	3x	distinct(.)
394	3x	adab$ARM <- arm$ARM[match(adab$USUBJID, arm$USUBJID)]
395	3x	adab$ACTARM <- arm$ACTARM[match(adab$USUBJID, arm$USUBJID)]
396
397	3x	if (length(na_vars) > 0 && na_percentage > 0) {
398	!	adab <- mutate_na(ds = adab, na_vars = na_vars, na_percentage = na_percentage)
399		}
400
401	3x	adab <- apply_metadata(adab, "metadata/ADAB.yml")
402		}

1		#' Subject-Level Analysis Dataset (ADSL)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The Subject-Level Analysis Dataset (ADSL) is used to provide the variables
6		#' that describe attributes of a subject. ADSL is a source for subject-level
7		#' variables used in other analysis data sets, such as population flags and
8		#' treatment variables. There is only one ADSL per study. ADSL and its related
9		#' metadata are required in a CDISC-based submission of data from a clinical
10		#' trial even if no other analysis data sets are submitted.
11		#'
12		#' @details One record per subject.
13		#'
14		#' Keys: `STUDYID`, `USUBJID`
15		#'
16		#' @inheritParams argument_convention
17		#' @param N (`numeric`)\cr Number of patients.
18		#' @param study_duration (`numeric`)\cr Duration of study in years.
19		#' @param with_trt02 (`logical`)\cr Should period 2 be added.
20		#' @param ae_withdrawal_prob (`proportion`)\cr Probability that there is at least one
21		#' Adverse Event leading to the withdrawal of a study drug.
22		#' @template param_cached
23		#' @templateVar data adsl
24		#'
25		#' @return `data.frame`
26		#' @export
27		#
28		#' @examples
29		#' adsl <- radsl(N = 10, study_duration = 2, seed = 1)
30		#' adsl
31		#'
32		#' adsl <- radsl(
33		#' N = 10, seed = 1,
34		#' na_percentage = 0.1,
35		#' na_vars = list(
36		#' DTHDT = c(seed = 1234, percentage = 0.1),
37		#' LSTALVDT = c(seed = 1234, percentage = 0.1)
38		#' )
39		#' )
40		#' adsl
41		#'
42		#' adsl <- radsl(N = 10, seed = 1, na_percentage = .1)
43		#' adsl
44		radsl <- function(N = 400, # nolint
45		study_duration = 2,
46		seed = NULL,
47		with_trt02 = TRUE,
48		na_percentage = 0,
49		na_vars = list(
50		"AGE" = NA, "SEX" = NA, "RACE" = NA, "STRATA1" = NA, "STRATA2" = NA,
51		"BMRKR1" = c(seed = 1234, percentage = 0.1), "BMRKR2" = c(1234, 0.1), "BEP01FL" = NA
52		),
53		ae_withdrawal_prob = 0.05,
54		cached = FALSE) {
55	28x	checkmate::assert_flag(cached)
56	28x	if (cached) {
57	2x	return(get_cached_data("cadsl"))
58		}
59
60	26x	checkmate::assert_number(N)
61	26x	checkmate::assert_number(seed, null.ok = TRUE)
62	26x	checkmate::assert_number(na_percentage, lower = 0, upper = 1, na.ok = TRUE)
63	26x	checkmate::assert_number(study_duration, lower = 1)
64	26x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
65	26x	checkmate::assert_true(na_percentage < 1)
66
67	26x	if (!is.null(seed)) {
68	26x	set.seed(seed)
69		}
70
71	26x	study_duration_secs <- lubridate::seconds(lubridate::years(study_duration))
72	26x	sys_dtm <- lubridate::fast_strptime("20/2/2019 11:16:16.683", "%d/%m/%Y %H:%M:%OS")
73	26x	discons <- max(1, floor((N * .3)))
74	26x	country_site_prob <- c(.5, .121, .077, .077, .075, .052, .046, .025, .014, .003)
75
76	26x	adsl <- tibble::tibble(
77	26x	STUDYID = rep("AB12345", N),
78	26x	COUNTRY = sample_fct(
79	26x	c("CHN", "USA", "BRA", "PAK", "NGA", "RUS", "JPN", "GBR", "CAN", "CHE"),
80	26x	N,
81	26x	prob = country_site_prob
82		),
83	26x	SITEID = sample_fct(1:20, N, prob = rep(country_site_prob, times = 2)),
84	26x	SUBJID = paste("id", seq_len(N), sep = "-"),
85	26x	AGE = sapply(stats::rchisq(N, df = 5, ncp = 10), max, 0) + 20,
86	26x	AGEU = "YEARS",
87	26x	SEX = c("F", "M") %>% sample_fct(N, prob = c(.52, .48)),
88	26x	ARMCD = c("ARM A", "ARM B", "ARM C") %>% sample_fct(N),
89	26x	RACE = c(
90	26x	"ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE",
91	26x	"MULTIPLE", "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER", "OTHER", "UNKNOWN"
92		) %>%
93	26x	sample_fct(N, prob = c(.55, .23, .16, .05, .004, .003, .002, .002)),
94	26x	TRTSDTM = sys_dtm + sample(seq(0, study_duration_secs), size = N, replace = TRUE),
95	26x	RANDDT = lubridate::date(TRTSDTM - lubridate::days(floor(stats::runif(N, min = 0, max = 5)))),
96	26x	TRTEDTM = TRTSDTM + study_duration_secs,
97	26x	STRATA1 = c("A", "B", "C") %>% sample_fct(N),
98	26x	STRATA2 = c("S1", "S2") %>% sample_fct(N),
99	26x	BMRKR1 = stats::rchisq(N, 6),
100	26x	BMRKR2 = sample_fct(c("LOW", "MEDIUM", "HIGH"), N),
101	26x	BMEASIFL = sample_fct(c("Y", "N"), N),
102	26x	BEP01FL = sample_fct(c("Y", "N"), N),
103	26x	AEWITHFL = sample_fct(c("Y", "N"), N, prob = c(ae_withdrawal_prob, 1 - ae_withdrawal_prob))
104		) %>%
105	26x	dplyr::mutate(ARM = dplyr::recode(
106	26x	ARMCD,
107	26x	"ARM A" = "A: Drug X", "ARM B" = "B: Placebo", "ARM C" = "C: Combination"
108		)) %>%
109	26x	dplyr::mutate(ACTARM = ARM) %>%
110	26x	dplyr::mutate(ACTARMCD = ARMCD) %>%
111	26x	dplyr::mutate(TRT01P = ARM) %>%
112	26x	dplyr::mutate(TRT01A = ACTARM) %>%
113	26x	dplyr::mutate(ITTFL = factor("Y")) %>%
114	26x	dplyr::mutate(SAFFL = factor("Y")) %>%
115	26x	dplyr::arrange(TRTSDTM)
116
117	26x	adds <- adsl[sample(nrow(adsl), discons), ] %>%
118	26x	dplyr::mutate(TRTEDTM_discon = sample(
119	26x	seq(from = max(TRTSDTM), to = sys_dtm + study_duration_secs, by = 1),
120	26x	size = discons,
121	26x	replace = TRUE
122		)) %>%
123	26x	dplyr::select(SUBJID, TRTSDTM, TRTEDTM_discon) %>%
124	26x	dplyr::arrange(TRTSDTM)
125
126	26x	adsl <- dplyr::left_join(adsl, adds, by = c("SUBJID", "TRTSDTM")) %>%
127	26x	dplyr::mutate(TRTEDTM = dplyr::case_when(
128	26x	!is.na(TRTEDTM_discon) ~ TRTEDTM_discon,
129	26x	TRTSDTM >= quantile(TRTSDTM)[2] & TRTSDTM <= quantile(TRTSDTM)[3] ~ lubridate::as_datetime(NA),
130	26x	TRUE ~ TRTEDTM
131		)) %>%
132	26x	dplyr::select(-"TRTEDTM_discon")
133
134		# add period 2 if needed
135	26x	if (with_trt02) {
136	26x	with_trt02 <- lubridate::seconds(lubridate::years(1))
137	26x	adsl <- adsl %>%
138	26x	dplyr::mutate(TRT02P = sample(ARM)) %>%
139	26x	dplyr::mutate(TRT02A = sample(ACTARM)) %>%
140	26x	dplyr::mutate(
141	26x	TRT01SDTM = TRTSDTM,
142	26x	AP01SDTM = TRT01SDTM,
143	26x	TRT01EDTM = TRTEDTM,
144	26x	AP01EDTM = TRT01EDTM,
145	26x	TRT02SDTM = TRTEDTM,
146	26x	AP02SDTM = TRT02SDTM,
147	26x	TRT02EDTM = TRT01EDTM + with_trt02,
148	26x	AP02EDTM = TRT02EDTM,
149	26x	TRTEDTM = TRT02EDTM
150		)
151		}
152
153	26x	adsl <- adsl %>%
154	26x	dplyr::mutate(EOSDT = lubridate::date(TRTEDTM)) %>%
155	26x	dplyr::mutate(EOSDY = ceiling(difftime(TRTEDTM, TRTSDTM))) %>%
156	26x	dplyr::mutate(EOSSTT = dplyr::case_when(
157	26x	EOSDY == max(EOSDY, na.rm = TRUE) ~ "COMPLETED",
158	26x	EOSDY < max(EOSDY, na.rm = TRUE) ~ "DISCONTINUED",
159	26x	is.na(TRTEDTM) ~ "ONGOING"
160		)) %>%
161	26x	dplyr::mutate(EOTSTT = EOSSTT)
162
163		# disposition related variables
164		# using probability of 1 for the "DEATH" level to ensure at least one death record exists
165	26x	l_dcsreas <- list(
166	26x	choices = c(
167	26x	"ADVERSE EVENT", "DEATH", "LACK OF EFFICACY", "PHYSICIAN DECISION",
168	26x	"PROTOCOL VIOLATION", "WITHDRAWAL BY PARENT/GUARDIAN", "WITHDRAWAL BY SUBJECT"
169		),
170	26x	prob = c(.2, 1, .1, .1, .2, .1, .1)
171		)
172	26x	l_dthcat_other <- list(
173	26x	choices = c(
174	26x	"Post-study reporting of death", "LOST TO FOLLOW UP", "MISSING", "SUICIDE", "UNKNOWN"
175		),
176	26x	prob = c(.1, .3, .3, .2, .1)
177		)
178
179	26x	adsl <- adsl %>%
180	26x	dplyr::mutate(
181	26x	DCSREAS = ifelse(
182	26x	EOSSTT == "DISCONTINUED",
183	26x	sample(x = l_dcsreas$choices, size = N, replace = TRUE, prob = l_dcsreas$prob),
184	26x	as.character(NA)
185		)
186		) %>%
187	26x	dplyr::mutate(DTHFL = dplyr::case_when(
188	26x	DCSREAS == "DEATH" ~ "Y",
189	26x	TRUE ~ "N"
190		)) %>%
191	26x	dplyr::mutate(
192	26x	DTHCAT = ifelse(
193	26x	DCSREAS == "DEATH",
194	26x	sample(x = c("ADVERSE EVENT", "PROGRESSIVE DISEASE", "OTHER"), size = N, replace = TRUE),
195	26x	as.character(NA)
196		)
197		) %>%
198	26x	dplyr::mutate(DTHCAUS = dplyr::case_when(
199	26x	DTHCAT == "ADVERSE EVENT" ~ "ADVERSE EVENT",
200	26x	DTHCAT == "PROGRESSIVE DISEASE" ~ "DISEASE PROGRESSION",
201	26x	DTHCAT == "OTHER" ~ sample(x = l_dthcat_other$choices, size = N, replace = TRUE, prob = l_dthcat_other$prob),
202	26x	TRUE ~ as.character(NA)
203		)) %>%
204	26x	dplyr::mutate(ADTHAUT = dplyr::case_when(
205	26x	DTHCAUS %in% c("ADVERSE EVENT", "DISEASE PROGRESSION") ~ "Yes",
206	26x	DTHCAUS %in% c("UNKNOWN", "SUICIDE", "Post-study reporting of death") ~ sample(
207	26x	x = c("Yes", "No"), size = N, replace = TRUE, prob = c(0.25, 0.75)
208		),
209	26x	TRUE ~ as.character(NA)
210		)) %>%
211		# adding some random number of days post last treatment date so that death days from last trt admin
212		# supports the LDDTHGR1 derivation below
213	26x	dplyr::mutate(DTHDT = dplyr::case_when(
214	26x	DCSREAS == "DEATH" ~ lubridate::date(TRTEDTM + lubridate::days(sample(0:50, size = N, replace = TRUE))),
215	26x	TRUE ~ NA
216		)) %>%
217	26x	dplyr::mutate(LDDTHELD = difftime(DTHDT, lubridate::date(TRTEDTM), units = "days")) %>%
218	26x	dplyr::mutate(LDDTHGR1 = dplyr::case_when(
219	26x	LDDTHELD <= 30 ~ "<=30",
220	26x	LDDTHELD > 30 ~ ">30",
221	26x	TRUE ~ as.character(NA)
222		)) %>%
223	26x	dplyr::mutate(LSTALVDT = dplyr::case_when(
224	26x	DCSREAS == "DEATH" ~ DTHDT,
225	26x	TRUE ~ lubridate::date(TRTEDTM) + lubridate::days(floor(stats::runif(N, min = 10, max = 30)))
226		))
227
228		# add random ETHNIC (Ethnicity)
229	26x	adsl <- adsl %>%
230	26x	dplyr::mutate(ETHNIC = sample(
231	26x	x = c("HISPANIC OR LATINO", "NOT HISPANIC OR LATINO", "NOT REPORTED", "UNKNOWN"),
232	26x	size = N, replace = TRUE, prob = c(.1, .8, .06, .04)
233		))
234
235		# associate DTHADY (Relative Day of Death) with Death date
236		# Date of Death [adsl.DTHDT] - date part of Date of First Exposure to Treatment [adsl.TRTSDTM]
237
238	26x	adsl <- adsl %>%
239	26x	dplyr::mutate(DTHADY = difftime(DTHDT, TRTSDTM, units = "days"))
240
241
242		# associate sites with countries and regions
243	26x	adsl <- adsl %>%
244	26x	dplyr::mutate(SITEID = paste0(COUNTRY, "-", SITEID)) %>%
245	26x	dplyr::mutate(REGION1 = dplyr::case_when(
246	26x	COUNTRY %in% c("NGA") ~ "Africa",
247	26x	COUNTRY %in% c("CHN", "JPN", "PAK") ~ "Asia",
248	26x	COUNTRY %in% c("RUS") ~ "Eurasia",
249	26x	COUNTRY %in% c("GBR") ~ "Europe",
250	26x	COUNTRY %in% c("CAN", "USA") ~ "North America",
251	26x	COUNTRY %in% c("BRA") ~ "South America",
252	26x	TRUE ~ as.character(NA)
253		)) %>%
254	26x	dplyr::mutate(INVID = paste("INV ID", SITEID)) %>%
255	26x	dplyr::mutate(INVNAM = paste("Dr.", SITEID, "Doe")) %>%
256	26x	dplyr::mutate(USUBJID = paste(STUDYID, SITEID, SUBJID, sep = "-"))
257
258
259	26x	if (length(na_vars) > 0 && na_percentage > 0) {
260	!	adsl <- mutate_na(ds = adsl, na_vars = na_vars, na_percentage = na_percentage)
261		}
262
263		# apply metadata
264	26x	adsl <- apply_metadata(adsl, "metadata/ADSL.yml", FALSE)
265
266	26x	attr(adsl, "study_duration_secs") <- as.numeric(study_duration_secs)
267	26x	return(adsl)
268		}

1		#' Pharmacokinetics Analysis Dataset (ADPC)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Pharmacokinetics Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per study, subject, parameter, and time point.
9		#'
10		#' @inheritParams argument_convention
11		#' @param avalu (`character`)\cr Analysis value units.
12		#' @param constants (`character vector`)\cr Constant parameters to be used in formulas for creating analysis values.
13		#' @param duration (`numeric`)\cr Duration in number of days.
14		#' @template param_cached
15		#' @templateVar data adpc
16		#'
17		#' @return `data.frame`
18		#' @export
19		#'
20		#' @examples
21		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
22		#'
23		#' adpc <- radpc(adsl, seed = 2)
24		#' adpc
25		#'
26		#' adpc <- radpc(adsl, seed = 2, duration = 3)
27		#' adpc
28		radpc <- function(adsl,
29		avalu = "ug/mL",
30		constants = c(D = 100, ka = 0.8, ke = 1),
31		duration = 2,
32		seed = NULL,
33		na_percentage = 0,
34		na_vars = list(
35		AVAL = c(NA, 0.1)
36		),
37		cached = FALSE) {
38	5x	checkmate::assert_flag(cached)
39	5x	if (cached) {
40	1x	return(get_cached_data("cadpc"))
41		}
42
43	4x	checkmate::assert_data_frame(adsl)
44	4x	checkmate::assert_character(avalu, len = 1, any.missing = FALSE)
45	4x	checkmate::assert_subset(names(constants), c("D", "ka", "ke"))
46	4x	checkmate::assert_numeric(x = duration, max.len = 1)
47	4x	checkmate::assert_number(seed, null.ok = TRUE)
48	4x	checkmate::assert_number(na_percentage, lower = 0, upper = 1)
49	4x	checkmate::assert_true(na_percentage < 1)
50	4x	checkmate::assert_list(na_vars)
51
52	4x	if (!is.null(seed)) {
53	4x	set.seed(seed)
54		}
55
56	4x	radpc_core <- function(day) {
57	8x	adpc_day <- tidyr::expand_grid(
58	8x	data.frame(
59	8x	STUDYID = adsl$STUDYID,
60	8x	USUBJID = adsl$USUBJID,
61	8x	ARMCD = adsl$ARMCD,
62	8x	A0 = unname(constants["D"]),
63	8x	ka = unname(constants["ka"]) - stats::runif(length(adsl$USUBJID), -0.2, 0.2),
64	8x	ke = unname(constants["ke"]) - stats::runif(length(adsl$USUBJID), -0.2, 0.2)
65		),
66	8x	PCTPTNUM = if (day == 1) c(0, 0.5, 1, 1.5, 2, 3, 4, 8, 12) else 24 * (day - 1),
67	8x	PARAM = factor(c("Plasma Drug X", "Urine Drug X", "Plasma Drug Y", "Urine Drug Y"))
68		)
69	8x	adpc_day <- adpc_day[!(grepl("Urine", adpc_day$PARAM) & adpc_day$PCTPTNUM %in% c(0.5, 1, 1.5, 2, 3)), ] %>%
70	8x	dplyr::arrange(USUBJID, PARAM) %>%
71	8x	dplyr::mutate(
72	8x	VISITDY = day,
73	8x	VISIT = ifelse(day <= 7, paste("Day", VISITDY), paste("Week", (VISITDY - 1) / 7)),
74	8x	PCVOLU = ifelse(grepl("Urine", PARAM), "mL", ""),
75	8x	ASMED = ifelse(grepl("Urine", PARAM), "URINE", "PLASMA"),
76	8x	PCTPT = factor(dplyr::case_when(
77	8x	PCTPTNUM == 0 ~ "Predose",
78	8x	(day == 1 & grepl("Urine", PARAM)) ~
79	8x	paste0(lag(PCTPTNUM), "H - ", PCTPTNUM, "H"),
80	8x	(day != 1 & grepl("Urine", PARAM)) ~
81	8x	paste0(as.numeric(PCTPTNUM) - 24, "H - ", PCTPTNUM, "H"),
82	8x	TRUE ~ paste0(PCTPTNUM, "H")
83		)),
84	8x	ARELTM1 = PCTPTNUM,
85	8x	NRELTM1 = PCTPTNUM,
86	8x	ARELTM2 = ARELTM1 - (24 * (day - 1)),
87	8x	NRELTM2 = NRELTM1 - (24 * (day - 1)),
88	8x	A0 = ifelse(PARAM == "Plasma Drug Y", A0, A0 / 2),
89	8x	AVAL = round(
90	8x	(A0 * ka * (
91	8x	exp(-ka * ARELTM1) - exp(-ke * ARELTM1)
92		))
93	8x	/ (ke - ka),
94	8x	digits = 3
95		)
96		) %>%
97	8x	dplyr::mutate(
98	8x	PCVOL = ifelse(
99	8x	ASMED == "URINE",
100	8x	round(abs(((PCTPTNUM - 1) %% 24) * A0 * ka * exp(PCTPTNUM %% 1.8 / 10)), 2),
101	8x	NA
102		),
103		# PK Equation
104	8x	AVALC = ifelse(AVAL == 0, "BLQ", as.character(AVAL)),
105	8x	AVALU = avalu,
106	8x	RELTMU = "hr"
107		) %>%
108	8x	dplyr::select(-c("A0", "ka", "ke"))
109
110	8x	return(adpc_day)
111		}
112
113	4x	adpc <- list()
114
115	4x	for (day in seq(duration)[seq(duration) <= 7 \| ((seq(duration) - 1) %% 7 == 0)]) {
116	8x	adpc[[day]] <- radpc_core(day = day)
117		}
118
119	4x	adpc <- do.call(rbind, adpc)
120
121	4x	adpc <- dplyr::inner_join(adpc, adsl, by = c("STUDYID", "USUBJID", "ARMCD")) %>%
122	4x	dplyr::filter(ACTARM != "B: Placebo", !(ACTARM == "A: Drug X" & PARAM == "Plasma Drug Y"))
123
124	4x	if (length(na_vars) > 0 && na_percentage > 0) {
125	!	adpc <- mutate_na(ds = adpc, na_vars = na_vars, na_percentage = na_percentage)
126		}
127
128	4x	adpc <- adpc %>%
129	4x	rename(
130	4x	AVALCAT1 = AVALC,
131	4x	NFRLT = NRELTM1,
132	4x	AFRLT = ARELTM1,
133	4x	NRRLT = NRELTM2,
134	4x	ARRLT = ARELTM2
135		) %>%
136	4x	mutate(ANL02FL = "Y")
137
138	4x	adpc <- apply_metadata(adpc, "metadata/ADPC.yml")
139		}

1		#' Tumor Response Analysis Dataset (ADTR)
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Function for generating a random Tumor Response Analysis Dataset for a given
6		#' Subject-Level Analysis Dataset.
7		#'
8		#' @details One record per subject per parameter per analysis visit per analysis date.
9		#'
10		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`, `BASETYPE`, `AVISITN`, `DTYPE`
11		#'
12		#' @inheritParams argument_convention
13		#' @param ... Additional arguments to be passed to `radrs`.
14		#' @template param_cached
15		#' @templateVar data adtr
16		#'
17		#' @return `data.frame`
18		#' @export
19		#'
20		#' @author tomlinsj, npaszty, Xuefeng Hou, dipietrc
21		#'
22		#' @examples
23		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
24		#'
25		#' adtr <- radtr(adsl, seed = 2)
26		#' adtr
27		radtr <- function(adsl,
28		param = c("Sum of Longest Diameter by Investigator"),
29		paramcd = c("SLDINV"),
30		seed = NULL,
31		cached = FALSE,
32		...) {
33	4x	checkmate::assert_flag(cached)
34	4x	if (cached) {
35	1x	return(get_cached_data("cadtr"))
36		}
37	3x	checkmate::assert_data_frame(adsl)
38	3x	checkmate::assert_character(param, min.len = 1, any.missing = FALSE)
39	3x	checkmate::assert_character(paramcd, min.len = 1, any.missing = FALSE)
40	3x	checkmate::assert_number(seed, null.ok = TRUE)
41	3x	stopifnot(length(param) == length(paramcd))
42		# validate and initialize related variables
43
44	3x	if (!is.null(seed)) {
45	3x	set.seed(seed)
46		}
47
48		# Make times consistent with ADRS at ADY and ADTM.
49	3x	adrs <- radrs(adsl, seed = seed, ...) %>%
50	3x	dplyr::filter(PARAMCD == "OVRINV") %>%
51	3x	dplyr::select(
52	3x	"STUDYID",
53	3x	"USUBJID",
54	3x	"AVISIT",
55	3x	"AVISITN",
56	3x	"ADTM",
57	3x	"ADY"
58		)
59
60	3x	adtr <- Map(function(parcd, par) {
61	3x	df <- adrs
62	3x	df$AVAL <- stats::rnorm(nrow(df), mean = 150, sd = 30)
63	3x	df$PARAMCD <- parcd
64	3x	df$PARAM <- par
65	3x	df
66	3x	}, paramcd, param) %>%
67	3x	Reduce(rbind, .)
68
69	3x	adtr_base <- adtr %>%
70	3x	dplyr::filter(AVISITN == 0) %>%
71	3x	dplyr::group_by(USUBJID, PARAMCD) %>%
72	3x	dplyr::mutate(BASE = AVAL) %>%
73	3x	dplyr::select("STUDYID", "USUBJID", "BASE", "PARAMCD")
74
75	3x	adtr_postbase <- adtr %>%
76	3x	dplyr::filter(AVISITN > 0) %>%
77	3x	dplyr::filter(!is.na(AVAL)) %>%
78	3x	dplyr::group_by(USUBJID, PARAMCD) %>%
79	3x	dplyr::filter(AVAL == min(AVAL)) %>%
80	3x	dplyr::slice(1) %>%
81	3x	dplyr::mutate(AVISIT = "POST-BASELINE MINIMUM") %>%
82	3x	dplyr::mutate(DTYPE = "MINIMUM") %>%
83	3x	dplyr::ungroup()
84
85	3x	adtr_lastobs <- adtr %>%
86	3x	dplyr::filter(AVISITN > 0) %>%
87	3x	dplyr::filter(!is.na(AVAL)) %>%
88	3x	dplyr::group_by(USUBJID, PARAMCD) %>%
89	3x	dplyr::filter(ADTM == max(ADTM, na.rm = TRUE)) %>%
90	3x	dplyr::slice(1) %>%
91	3x	dplyr::mutate(LAST_VISIT = AVISIT) %>%
92	3x	dplyr::ungroup() %>%
93	3x	dplyr::select(
94	3x	"STUDYID",
95	3x	"USUBJID",
96	3x	"PARAMCD",
97	3x	"LAST_VISIT"
98		)
99
100	3x	adtr <- rbind(adtr %>% dplyr::mutate(DTYPE = ""), adtr_postbase)
101
102	3x	adtr <- merge(adtr, adtr_base, by = c("STUDYID", "USUBJID", "PARAMCD")) %>%
103	3x	dplyr::mutate(
104	3x	ABLFL = dplyr::case_when(AVISIT == "BASELINE" ~ "Y", TRUE ~ ""),
105	3x	AVAL = dplyr::case_when(AVISIT == "BASELINE" ~ NA_real_, TRUE ~ AVAL),
106	3x	CHG = dplyr::case_when(AVISITN > 0 ~ AVAL - BASE, TRUE ~ NA_real_),
107	3x	PCHG = dplyr::case_when(AVISITN > 0 ~ CHG / BASE * 100, TRUE ~ NA_real_),
108	3x	AVALC = as.character(AVAL),
109	3x	AVALU = "mm"
110		)
111
112		# ensure PCHG does not exceed 200%, nor go below -100% (double in size, or complete remission of tumor).
113	3x	adtr <- adtr %>%
114	3x	dplyr::mutate(
115	3x	PCHG_DUM = PCHG,
116	3x	PCHG = dplyr::case_when(
117	3x	PCHG_DUM > 200 ~ 200,
118	3x	PCHG_DUM < -100 ~ -100,
119	3x	TRUE ~ PCHG
120		),
121	3x	AVAL = dplyr::case_when(
122	3x	PCHG_DUM > 200 ~ 3 * BASE,
123	3x	PCHG_DUM < -100 ~ 0,
124	3x	TRUE ~ AVAL
125		),
126	3x	CHG = dplyr::case_when(
127	3x	PCHG_DUM > 200 ~ 2 * BASE,
128	3x	PCHG_DUM < -100 ~ -BASE,
129	3x	TRUE ~ CHG
130		)
131		) %>%
132	3x	dplyr::select(-"PCHG_DUM")
133
134	3x	adtr <- merge(adsl, adtr, by = c("STUDYID", "USUBJID")) %>%
135	3x	dplyr::group_by(USUBJID, PARAMCD) %>%
136	3x	dplyr::mutate(
137	3x	ONTRTFL = factor(dplyr::case_when(
138	3x	!AVISIT %in% c("SCREENING", "BASELINE", "FOLLOW UP") ~ "Y",
139	3x	TRUE ~ ""
140		)),
141	3x	ANL01FL = dplyr::case_when(
142	3x	DTYPE == "" & AVISITN > 0 ~ "Y",
143	3x	TRUE ~ ""
144		),
145	3x	ANL03FL = dplyr::case_when(
146	3x	DTYPE == "MINIMUM" ~ "Y",
147	3x	ABLFL == "Y" ~ "Y",
148	3x	TRUE ~ ""
149		)
150		)
151	3x	adtr <- merge(adtr, adtr_lastobs, by = c("STUDYID", "USUBJID", "PARAMCD")) %>%
152	3x	dplyr::mutate(
153	3x	ANL02FL = dplyr::case_when(
154	3x	as.character(AVISIT) == as.character(LAST_VISIT) ~ "Y",
155	3x	ABLFL == "Y" ~ "Y",
156	3x	TRUE ~ ""
157		)
158		) %>%
159	3x	dplyr::select(-"LAST_VISIT")
160		# Adding variables that are in ADTR osprey but not RCD.
161	3x	adtr <- adtr %>%
162	3x	dplyr::mutate(
163	3x	DCSREAS_GRP = ifelse(DCSREAS == "ADVERSE EVENT", "Safety", "Non-Safety"),
164	3x	TRTDURD = ifelse(
165	3x	is.na(TRTSDTM) \| is.na(TRTEDTM),
166	3x	NA,
167	3x	TRTEDTM - (TRTSDTM + lubridate::days(1))
168		),
169	3x	AGEGR1 = ifelse(AGE < 65, "<65", ">=65")
170		)
171
172		# apply metadata
173	3x	adtr <- apply_metadata(adtr, "metadata/ADTR.yml")
174	3x	return(adtr)
175		}

1		#' Time to Safety Event Analysis Dataset (ADSAFTTE)
2		#'
3		#' Function to generate random Time-to-Safety Event Dataset for a
4		#' given Subject-Level Analysis Dataset.
5		#'
6		#' @details
7		#'
8		#' Keys: `STUDYID`, `USUBJID`, `PARAMCD`
9		#'
10		#' @inheritParams radaette
11		#' @param ... Additional arguments to be passed to `radaette`
12		#'
13		#' @return `data.frame`
14		#' @export
15		#'
16		#' @examples
17		#' adsl <- radsl(N = 10, seed = 1, study_duration = 2)
18		#'
19		#' adsaftte <- radsaftte(adsl, seed = 2)
20		#' adsaftte
21		radsaftte <- function(adsl,
22		...) {
23	2x	radaette(adsl = adsl, ...)
24		}