tern coverage - 95.13%

Files
Source

#' Helper function to create a new SMQ variable in ADAE by stacking SMQ and/or CQ records.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to create a new SMQ variable in ADAE that consists of all adverse events belonging to
#' selected Standardized/Customized queries. The new dataset will only contain records of the adverse events
#' belonging to any of the selected baskets. Remember that `na_str` must match the needed pre-processing
#' done with [df_explicit_na()] to have the desired output.
#'
#' @inheritParams argument_convention
#' @param baskets (`character`)\cr variable names of the selected Standardized/Customized queries.
#' @param smq_varlabel (`string`)\cr a label for the new variable created.
#' @param keys (`character`)\cr names of the key variables to be returned along with the new variable created.
#' @param aag_summary (`data.frame`)\cr containing the SMQ baskets and the levels of interest for the final SMQ
#'   variable. This is useful when there are some levels of interest that are not observed in the `df` dataset.
#'   The two columns of this dataset should be named `basket` and `basket_name`.
#'
#' @return A `data.frame` with variables in `keys` taken from `df` and new variable SMQ containing
#'   records belonging to the baskets selected via the `baskets` argument.
#'
#' @examples
#' adae <- tern_ex_adae[1:20, ] %>% df_explicit_na()
#' h_stack_by_baskets(df = adae)
#'
#' aag <- data.frame(
#'   NAMVAR = c("CQ01NAM", "CQ02NAM", "SMQ01NAM", "SMQ02NAM"),
#'   REFNAME = c(
#'     "D.2.1.5.3/A.1.1.1.1 aesi", "X.9.9.9.9/Y.8.8.8.8 aesi",
#'     "C.1.1.1.3/B.2.2.3.1 aesi", "C.1.1.1.3/B.3.3.3.3 aesi"
#'   ),
#'   SCOPE = c("", "", "BROAD", "BROAD"),
#'   stringsAsFactors = FALSE
#' )
#'
#' basket_name <- character(nrow(aag))
#' cq_pos <- grep("^(CQ).+NAM$", aag$NAMVAR)
#' smq_pos <- grep("^(SMQ).+NAM$", aag$NAMVAR)
#' basket_name[cq_pos] <- aag$REFNAME[cq_pos]
#' basket_name[smq_pos] <- paste0(
#'   aag$REFNAME[smq_pos], "(", aag$SCOPE[smq_pos], ")"
#' )
#'
#' aag_summary <- data.frame(
#'   basket = aag$NAMVAR,
#'   basket_name = basket_name,
#'   stringsAsFactors = TRUE
#' )
#'
#' result <- h_stack_by_baskets(df = adae, aag_summary = aag_summary)
#' all(levels(aag_summary$basket_name) %in% levels(result$SMQ))
#'
#' h_stack_by_baskets(
#'   df = adae,
#'   aag_summary = NULL,
#'   keys = c("STUDYID", "USUBJID", "AEDECOD", "ARM"),
#'   baskets = "SMQ01NAM"
#' )
#'
#' @export
h_stack_by_baskets <- function(df,
                               baskets = grep("^(SMQ|CQ).+NAM$", names(df), value = TRUE),
                               smq_varlabel = "Standardized MedDRA Query",
                               keys = c("STUDYID", "USUBJID", "ASTDTM", "AEDECOD", "AESEQ"),
                               aag_summary = NULL,
                               na_str = "<Missing>") {
  smq_nam <- baskets[startsWith(baskets, "SMQ")]
  # SC corresponding to NAM
  smq_sc <- gsub(pattern = "NAM", replacement = "SC", x = smq_nam, fixed = TRUE)
  smq <- stats::setNames(smq_sc, smq_nam)

  checkmate::assert_character(baskets)
  checkmate::assert_string(smq_varlabel)
  checkmate::assert_data_frame(df)
  checkmate::assert_true(all(startsWith(baskets, "SMQ") | startsWith(baskets, "CQ")))
  checkmate::assert_true(all(endsWith(baskets, "NAM")))
  checkmate::assert_subset(baskets, names(df))
  checkmate::assert_subset(keys, names(df))
  checkmate::assert_subset(smq_sc, names(df))
  checkmate::assert_string(na_str)

  if (!is.null(aag_summary)) {
    assert_df_with_variables(
      df = aag_summary,
      variables = list(val = c("basket", "basket_name"))
    )
    # Warning in case there is no match between `aag_summary$basket` and `baskets` argument.
    # Honestly, I think those should completely match. Target baskets should be the same.
    if (length(intersect(baskets, unique(aag_summary$basket))) == 0) {
      warning("There are 0 baskets in common between aag_summary$basket and `baskets` argument.")
    }
  }

  var_labels <- c(formatters::var_labels(df[, keys]), "SMQ" = smq_varlabel)

  # convert `na_str` records from baskets to NA for the later loop and from wide to long steps
  df[, c(baskets, smq_sc)][df[, c(baskets, smq_sc)] == na_str] <- NA

  if (all(is.na(df[, baskets]))) { # in case there is no level for the target baskets
    df_long <- df[-seq_len(nrow(df)), keys] # we just need an empty data frame keeping all factor levels
  } else {
    # Concatenate SMQxxxNAM with corresponding SMQxxxSC
    df_cnct <- df[, c(keys, baskets[startsWith(baskets, "CQ")])]

    for (nam in names(smq)) {
      sc <- smq[nam] # SMQxxxSC corresponding to SMQxxxNAM
      nam_notna <- !is.na(df[[nam]])
      new_colname <- paste(nam, sc, sep = "_")
      df_cnct[nam_notna, new_colname] <- paste0(df[[nam]], "(", df[[sc]], ")")[nam_notna]
    }

    df_cnct$unique_id <- seq(1, nrow(df_cnct))
    var_cols <- names(df_cnct)[!(names(df_cnct) %in% c(keys, "unique_id"))]
    # have to convert df_cnct from tibble to data frame
    # as it throws a warning otherwise about rownames.
    # tibble do not support rownames and reshape creates rownames

    df_long <- stats::reshape(
      data = as.data.frame(df_cnct),
      varying = var_cols,
      v.names = "SMQ",
      idvar = names(df_cnct)[names(df_cnct) %in% c(keys, "unique_id")],
      direction = "long",
      new.row.names = seq(prod(length(var_cols), nrow(df_cnct)))
    )

    df_long <- df_long[!is.na(df_long[, "SMQ"]), !(names(df_long) %in% c("time", "unique_id"))]
    df_long$SMQ <- as.factor(df_long$SMQ)
  }

  smq_levels <- setdiff(levels(df_long[["SMQ"]]), na_str)

  if (!is.null(aag_summary)) {
    # A warning in case there is no match between df and aag_summary records
    if (length(intersect(smq_levels, unique(aag_summary$basket_name))) == 0) {
      warning("There are 0 basket levels in common between aag_summary$basket_name and df.")
    }
    df_long[["SMQ"]] <- factor(
      df_long[["SMQ"]],
      levels = sort(
        c(
          smq_levels,
          setdiff(unique(aag_summary$basket_name), smq_levels)
        )
      )
    )
  } else {
    all_na_basket_flag <- vapply(df[, baskets], function(x) {
      all(is.na(x))
    }, FUN.VALUE = logical(1))
    all_na_basket <- baskets[all_na_basket_flag]

    df_long[["SMQ"]] <- factor(
      df_long[["SMQ"]],
      levels = sort(c(smq_levels, all_na_basket))
    )
  }
  formatters::var_labels(df_long) <- var_labels
  tibble::tibble(df_long)
}

#' Control function for Cox regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters for Cox regression fit. Used internally.
#'
#' @inheritParams argument_convention
#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied
#'   treatment and candidate covariate. Note that for univariate models without treatment arm, and
#'   multivariate models, no interaction can be used so that this needs to be `FALSE`.
#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`,
#'   see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R.
#'
#' @return A `list` of items with names corresponding to the arguments.
#'
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()].
#'
#' @examples
#' control_coxreg()
#'
#' @export
control_coxreg <- function(pval_method = c("wald", "likelihood"),
                           ties = c("exact", "efron", "breslow"),
                           conf_level = 0.95,
                           interaction = FALSE) {
  pval_method <- match.arg(pval_method)
  ties <- match.arg(ties)
  checkmate::assert_flag(interaction)
  assert_proportion_value(conf_level)
  list(
    pval_method = pval_method,
    ties = ties,
    conf_level = conf_level,
    interaction = interaction
  )
}

#' Custom tidy methods for Cox regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param x (`list`)\cr result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models)
#'   or [fit_coxreg_multivar()] (for multivariate models).
#'
#' @return [broom::tidy()] returns:
#' * For `summary.coxph` objects,  a `data.frame` with columns: `Pr(>|z|)`, `exp(coef)`, `exp(-coef)`, `lower .95`,
#'   `upper .95`, `level`, and `n`.
#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`,
#'   `lcl`, `ucl`, `pval`, and `ci`.
#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`,
#'   `level`, and `ci`.
#'
#' @seealso [cox_regression]
#'
#' @name tidy_coxreg
NULL

#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results.
#'
#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results.
#'
#' @method tidy summary.coxph
#'
#' @examples
#' library(survival)
#' library(broom)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' formula <- "survival::Surv(time, status) ~ armcd + covar1"
#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder))
#' tidy(msum)
#'
#' @export
tidy.summary.coxph <- function(x, # nolint
                               ...) {
  checkmate::assert_class(x, "summary.coxph")
  pval <- x$coefficients
  confint <- x$conf.int
  levels <- rownames(pval)

  pval <- tibble::as_tibble(pval)
  confint <- tibble::as_tibble(confint)

  ret <- cbind(pval[, grepl("Pr", names(pval))], confint)
  ret$level <- levels
  ret$n <- x[["n"]]
  ret
}

#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression.
#'
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()].
#'
#' @method tidy coxreg.univar
#'
#' @examples
#' ## Cox regression: arm + 1 covariate.
#' mod1 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = "covar1"
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
#' mod2 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91, interaction = TRUE)
#' )
#'
#' tidy(mod1)
#' tidy(mod2)
#'
#' @export
tidy.coxreg.univar <- function(x, # nolint
                               ...) {
  checkmate::assert_class(x, "coxreg.univar")
  mod <- x$mod
  vars <- c(x$vars$arm, x$vars$covariates)
  has_arm <- "arm" %in% names(x$vars)

  result <- if (!has_arm) {
    Map(
      mod = mod, vars = vars,
      f = function(mod, vars) {
        h_coxreg_multivar_extract(
          var = vars,
          data = x$data,
          mod = mod,
          control = x$control
        )
      }
    )
  } else if (x$control$interaction) {
    Map(
      mod = mod, covar = vars,
      f = function(mod, covar) {
        h_coxreg_extract_interaction(
          effect = x$vars$arm, covar = covar, mod = mod, data = x$data,
          at = x$at, control = x$control
        )
      }
    )
  } else {
    Map(
      mod = mod, vars = vars,
      f = function(mod, vars) {
        h_coxreg_univar_extract(
          effect = x$vars$arm, covar = vars, data = x$data, mod = mod,
          control = x$control
        )
      }
    )
  }
  result <- do.call(rbind, result)

  result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
  result$n <- lapply(result$n, empty_vector_if_na)
  result$ci <- lapply(result$ci, empty_vector_if_na)
  result$hr <- lapply(result$hr, empty_vector_if_na)
  if (x$control$interaction) {
    result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na)
    # Remove interaction p-values due to change in specifications.
    result$pval[result$effect != "Treatment:"] <- NA
  }
  result$pval <- lapply(result$pval, empty_vector_if_na)
  attr(result, "conf_level") <- x$control$conf_level
  result
}

#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression.
#'
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()].
#'
#' @method tidy coxreg.multivar
#'
#' @examples
#' multivar_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#' broom::tidy(multivar_model)
#'
#' @export
tidy.coxreg.multivar <- function(x, # nolint
                                 ...) {
  checkmate::assert_class(x, "coxreg.multivar")
  vars <- c(x$vars$arm, x$vars$covariates)

  # Convert the model summaries to data.
  result <- Map(
    vars = vars,
    f = function(vars) {
      h_coxreg_multivar_extract(
        var = vars, data = x$data,
        mod = x$mod, control = x$control
      )
    }
  )
  result <- do.call(rbind, result)

  result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
  result$ci <- lapply(result$ci, empty_vector_if_na)
  result$hr <- lapply(result$hr, empty_vector_if_na)
  result$pval <- lapply(result$pval, empty_vector_if_na)
  result <- result[, names(result) != "n"]
  attr(result, "conf_level") <- x$control$conf_level

  result
}

#' Fitting functions for Cox proportional hazards regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fitting functions for univariate and multivariate Cox regression models.
#'
#' @param variables (named `list`)\cr the names of the variables found in `data`, passed as a named list and
#'   corresponding to the `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from
#'   `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect
#'   estimates will be tabulated later.
#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models.
#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify
#'   the value of the covariate at which the effect should be estimated.
#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()].
#'
#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression].
#'
#' @examples
#' library(survival)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' # Testing dataset [survival::bladder].
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' plot(
#'   survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name fit_coxreg
NULL

#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs.
#'
#' @return
#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list`
#'   with 5 elements:
#'   * `mod`: Cox regression models fitted by [survival::coxph()].
#'   * `data`: The original data frame input.
#'   * `control`: The original control input.
#'   * `vars`: The variables used in the model.
#'   * `at`: Value of the covariate at which the effect should be estimated.
#'
#' @note When using `fit_coxreg_univar` there should be two study arms.
#'
#' @examples
#' # fit_coxreg_univar
#'
#' ## Cox regression: arm + 1 covariate.
#' mod1 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = "covar1"
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
#' mod2 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91, interaction = TRUE)
#' )
#'
#' ## Cox regression: arm + 1 covariate, stratified analysis.
#' mod3 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", strata = "covar2",
#'     covariates = c("covar1")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: no arm, only covariates.
#' mod4 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' @export
fit_coxreg_univar <- function(variables,
                              data,
                              at = list(),
                              control = control_coxreg()) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_df_with_variables(data, variables)
  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$strata)) {
    checkmate::assert_disjunct(control$pval_method, "likelihood")
  }
  if (has_arm) {
    assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  }
  vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE)
  for (i in vars) {
    if (is.factor(data[[i]])) {
      attr(data[[i]], "levels") <- levels(droplevels(data[[i]]))
    }
  }
  forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction)
  mod <- lapply(
    forms, function(x) {
      survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties)
    }
  )
  structure(
    list(
      mod = mod,
      data = data,
      control = control,
      vars = variables,
      at = at
    ),
    class = "coxreg.univar"
  )
}

#' @describeIn fit_coxreg Fit a multivariate Cox regression model.
#'
#' @return
#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list
#'   with 4 elements:
#'   * `mod`: Cox regression model fitted by [survival::coxph()].
#'   * `data`: The original data frame input.
#'   * `control`: The original control input.
#'   * `vars`: The variables used in the model.
#'
#' @examples
#' # fit_coxreg_multivar
#'
#' ## Cox regression: multivariate Cox regression.
#' multivar_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' # Example without treatment arm.
#' multivar_covs_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' @export
fit_coxreg_multivar <- function(variables,
                                data,
                                control = control_coxreg()) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  if (!is.null(variables$covariates)) {
    checkmate::assert_character(variables$covariates)
  }

  checkmate::assert_false(control$interaction)
  assert_df_with_variables(data, variables)
  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$strata)) {
    checkmate::assert_disjunct(control$pval_method, "likelihood")
  }

  form <- h_coxreg_multivar_formula(variables)
  mod <- survival::coxph(
    formula = stats::as.formula(form),
    data = data,
    ties = control$ties
  )
  structure(
    list(
      mod = mod,
      data = data,
      control = control,
      vars = variables
    ),
    class = "coxreg.multivar"
  )
}

#' Muffled `car::Anova`
#'
#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when
#' present, this function deliberately muffles this message.
#'
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
#'
#' @return The output of [car::Anova()], with convergence message muffled.
#'
#' @keywords internal
muffled_car_anova <- function(mod, test_statistic) {
  tryCatch(
    withCallingHandlers(
      expr = {
        car::Anova(
          mod,
          test.statistic = test_statistic,
          type = "III"
        )
      },
      message = function(m) invokeRestart("muffleMessage"),
      error = function(e) {
        stop(paste(
          "the model seems to have convergence problems, please try to change",
          "the configuration of covariates or strata variables, e.g.",
          "- original error:", e
        ))
      }
    )
  )
}

#' Multivariate logistic regression table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
#' category or specified values and corresponding Wald confidence intervals as default but allow user
#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
#' that covariate has no effect on response in model containing all specified covariates.
#' Allow option to include one two-way interaction and present similar output for
#' each interaction degree of freedom.
#'
#' @inheritParams argument_convention
#' @param drop_and_remove_str (`string`)\cr string to be dropped and removed.
#'
#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#'   Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
#'
#' @note For the formula, the variable names need to be standard `data.frame` column names without
#'   special characters.
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' df <- tidy(mod1, conf_level = 0.99)
#' df2 <- tidy(mod2, conf_level = 0.99)
#'
#' # flagging empty strings with "_"
#' df <- df_explicit_na(df, na_level = "_")
#' df2 <- df_explicit_na(df2, na_level = "_")
#'
#' result1 <- basic_table() %>%
#'   summarize_logistic(
#'     conf_level = 0.95,
#'     drop_and_remove_str = "_"
#'   ) %>%
#'   build_table(df = df)
#' result1
#'
#' result2 <- basic_table() %>%
#'   summarize_logistic(
#'     conf_level = 0.95,
#'     drop_and_remove_str = "_"
#'   ) %>%
#'   build_table(df = df2)
#' result2
#'
#' @export
#' @order 1
summarize_logistic <- function(lyt,
                               conf_level,
                               drop_and_remove_str = "",
                               .indent_mods = NULL) {
  # checks
  checkmate::assert_string(drop_and_remove_str)

  sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
  sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods)
  sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods)
  split_fun <- drop_and_remove_levels(drop_and_remove_str)

  lyt <- logistic_regression_cols(lyt, conf_level = conf_level)
  lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun)
  lyt <- sum_logistic_variable_test(lyt)
  lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun)
  lyt <- sum_logistic_term_estimates(lyt)
  lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun)
  lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun)
  lyt <- sum_logistic_odds_ratios(lyt)
  lyt
}

#' Fit for logistic regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fit a (conditional) logistic regression model.
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
#'   This will be used when fitting the (conditional) logistic regression model on the left hand
#'   side of the formula.
#'
#' @return A fitted logistic regression model.
#'
#' @section Model Specification:
#'
#' The `variables` list needs to include the following elements:
#'   * `arm`: Treatment arm variable name.
#'   * `response`: The response arm variable name. Usually this is a 0/1 variable.
#'   * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
#'   * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
#'     included in `covariates`. Then the interaction with the treatment arm is included in the model.
#'
#' @examples
#' library(dplyr)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' @export
fit_logistic <- function(data,
                         variables = list(
                           response = "Response",
                           arm = "ARMCD",
                           covariates = NULL,
                           interaction = NULL,
                           strata = NULL
                         ),
                         response_definition = "response") {
  assert_df_with_variables(data, variables)
  checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
  checkmate::assert_string(response_definition)
  checkmate::assert_true(grepl("response", response_definition))

  response_definition <- sub(
    pattern = "response",
    replacement = variables$response,
    x = response_definition,
    fixed = TRUE
  )
  form <- paste0(response_definition, " ~ ", variables$arm)
  if (!is.null(variables$covariates)) {
    form <- paste0(form, " + ", paste(variables$covariates, collapse = " + "))
  }
  if (!is.null(variables$interaction)) {
    checkmate::assert_string(variables$interaction)
    checkmate::assert_subset(variables$interaction, variables$covariates)
    form <- paste0(form, " + ", variables$arm, ":", variables$interaction)
  }
  if (!is.null(variables$strata)) {
    strata_arg <- if (length(variables$strata) > 1) {
      paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
    } else {
      variables$strata
    }
    form <- paste0(form, "+ strata(", strata_arg, ")")
  }
  formula <- stats::as.formula(form)
  if (is.null(variables$strata)) {
    stats::glm(
      formula = formula,
      data = data,
      family = stats::binomial("logit")
    )
  } else {
    clogit_with_tryCatch(
      formula = formula,
      data = data,
      x = TRUE
    )
  }
}

#' Custom tidy method for binomial GLM results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
#' with `binomial` family.
#'
#' @inheritParams argument_convention
#' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise the median is used.
#' @param x (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family.
#'
#' @return A `data.frame` containing the tidied model.
#'
#' @method tidy glm
#'
#' @seealso [h_logistic_regression] for relevant helper functions.
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' df <- tidy(mod1, conf_level = 0.99)
#' df2 <- tidy(mod2, conf_level = 0.99)
#'
#' @export
tidy.glm <- function(x, # nolint
                     conf_level = 0.95,
                     at = NULL,
                     ...) {
  checkmate::assert_class(x, "glm")
  checkmate::assert_set_equal(x$family$family, "binomial")

  terms_name <- attr(stats::terms(x), "term.labels")
  xs_class <- attr(x$terms, "dataClasses")
  interaction <- terms_name[which(!terms_name %in% names(xs_class))]
  df <- if (length(interaction) == 0) {
    h_logistic_simple_terms(
      x = terms_name,
      fit_glm = x,
      conf_level = conf_level
    )
  } else {
    h_logistic_inter_terms(
      x = terms_name,
      fit_glm = x,
      conf_level = conf_level,
      at = at
    )
  }
  for (var in c("variable", "term", "interaction", "reference")) {
    df[[var]] <- factor(df[[var]], levels = unique(df[[var]]))
  }
  df
}

#' Logistic regression multivariate column layout function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which creates a multivariate column layout summarizing logistic
#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
#'
#' @inheritParams argument_convention
#'
#' @return A layout object suitable for passing to further layouting functions. Adding this
#'   function to an `rtable` layout will split the table into columns corresponding to
#'   statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
#'
#' @export
logistic_regression_cols <- function(lyt,
                                     conf_level = 0.95) {
  vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
  var_labels <- c(
    df = "Degrees of Freedom",
    estimate = "Parameter Estimate",
    std_error = "Standard Error",
    odds_ratio = "Odds Ratio",
    ci = paste("Wald", f_conf_level(conf_level)),
    pvalue = "p-value"
  )
  split_cols_by_multivar(
    lyt = lyt,
    vars = vars,
    varlabels = var_labels
  )
}

#' Logistic regression summary table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
#'   content function.
#'
#' @return A content function.
#'
#' @export
logistic_summary_by_flag <- function(flag_var, na_str = default_na_str(), .indent_mods = NULL) {
  checkmate::assert_string(flag_var)
  function(lyt) {
    cfun_list <- list(
      df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods),
      estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
      std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
      odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods),
      ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods),
      pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx | (<0.0001)", .indent_mods = .indent_mods)
    )
    summarize_row_groups(
      lyt = lyt,
      cfun = cfun_list,
      na_str = na_str
    )
  }
}

# Utility functions to cooperate with {rtables} package

#' Convert table into matrix of strings
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to use mostly within tests. `with_spaces`parameter allows
#' to test not only for content but also indentation and table structure.
#' `print_txt_to_copy` instead facilitate the testing development by returning a well
#' formatted text that needs only to be copied and pasted in the expected output.
#'
#' @inheritParams formatters::toString
#' @param x (`VTableTree`)\cr `rtables` table object.
#' @param with_spaces (`flag`)\cr whether the tested table should keep the indentation and other relevant spaces.
#' @param print_txt_to_copy (`flag`)\cr utility to have a way to copy the input table directly
#'   into the expected variable instead of copying it too manually.
#'
#' @return A `matrix` of `string`s. If `print_txt_to_copy = TRUE` the well formatted printout of the
#'   table will be printed to console, ready to be copied as a expected value.
#'
#' @examples
#' tbl <- basic_table() %>%
#'   split_rows_by("SEX") %>%
#'   split_cols_by("ARM") %>%
#'   analyze("AGE") %>%
#'   build_table(tern_ex_adsl)
#'
#' to_string_matrix(tbl, widths = ceiling(propose_column_widths(tbl) / 2))
#'
#' @export
to_string_matrix <- function(x, widths = NULL, max_width = NULL,
                             hsep = formatters::default_hsep(),
                             with_spaces = TRUE, print_txt_to_copy = FALSE) {
  checkmate::assert_flag(with_spaces)
  checkmate::assert_flag(print_txt_to_copy)
  checkmate::assert_int(max_width, null.ok = TRUE)

  if (inherits(x, "MatrixPrintForm")) {
    tx <- x
  } else {
    tx <- matrix_form(x, TRUE)
  }

  tf_wrap <- FALSE
  if (!is.null(max_width)) {
    tf_wrap <- TRUE
  }

  # Producing the matrix to test
  if (with_spaces) {
    out <- strsplit(toString(tx, widths = widths, tf_wrap = tf_wrap, max_width = max_width, hsep = hsep), "\n")[[1]]
  } else {
    out <- tx$strings
  }

  # Printing to console formatted output that needs to be copied in "expected"
  if (print_txt_to_copy) {
    out_tmp <- out
    if (!with_spaces) {
      out_tmp <- apply(out, 1, paste0, collapse = '", "')
    }
    cat(paste0('c(\n  "', paste0(out_tmp, collapse = '",\n  "'), '"\n)'))
  }

  # Return values
  out
}

#' Blank for missing input
#'
#' Helper function to use in tabulating model results.
#'
#' @param x (`vector`)\cr input for a cell.
#'
#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise
#'   the unlisted version of `x`.
#'
#' @keywords internal
unlist_and_blank_na <- function(x) {
  unl <- unlist(x)
  if (all(is.na(unl))) {
    character()
  } else {
    unl
  }
}

#' Constructor for content functions given a data frame with flag input
#'
#' This can be useful for tabulating model results.
#'
#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the
#'   content function.
#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned.
#' @param format (`string`)\cr `rtables` format to use.
#'
#' @return A content function which gives `df$analysis_var` at the row identified by
#'   `.df_row$flag` in the given format.
#'
#' @keywords internal
cfun_by_flag <- function(analysis_var,
                         flag_var,
                         format = "xx",
                         .indent_mods = NULL) {
  checkmate::assert_string(analysis_var)
  checkmate::assert_string(flag_var)
  function(df, labelstr) {
    row_index <- which(df[[flag_var]])
    x <- unlist_and_blank_na(df[[analysis_var]][row_index])
    formatters::with_label(
      rcell(x, format = format, indent_mod = .indent_mods),
      labelstr
    )
  }
}

#' Content row function to add row total to labels
#'
#' This takes the label of the latest row split level and adds the row total from `df` in parentheses.
#' This function differs from [c_label_n_alt()] by taking row counts from `df` rather than
#' `alt_counts_df`, and is used by [add_rowcounts()] when `alt_counts` is set to `FALSE`.
#'
#' @inheritParams argument_convention
#'
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
#'
#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because
#'   the former is already split by columns and will refer to the first column of the data only.
#'
#' @seealso [c_label_n_alt()] which performs the same function but retrieves row counts from
#'   `alt_counts_df` instead of `df`.
#'
#' @keywords internal
c_label_n <- function(df,
                      labelstr,
                      .N_row) { # nolint
  label <- paste0(labelstr, " (N=", .N_row, ")")
  in_rows(
    .list = list(row_count = formatters::with_label(c(.N_row, .N_row), label)),
    .formats = c(row_count = function(x, ...) "")
  )
}

#' Content row function to add `alt_counts_df` row total to labels
#'
#' This takes the label of the latest row split level and adds the row total from `alt_counts_df`
#' in parentheses. This function differs from [c_label_n()] by taking row counts from `alt_counts_df`
#' rather than `df`, and is used by [add_rowcounts()] when `alt_counts` is set to `TRUE`.
#'
#' @inheritParams argument_convention
#'
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
#'
#' @seealso [c_label_n()] which performs the same function but retrieves row counts from `df` instead
#'   of `alt_counts_df`.
#'
#' @keywords internal
c_label_n_alt <- function(df,
                          labelstr,
                          .alt_df_row) {
  N_row_alt <- nrow(.alt_df_row) # nolint
  label <- paste0(labelstr, " (N=", N_row_alt, ")")
  in_rows(
    .list = list(row_count = formatters::with_label(c(N_row_alt, N_row_alt), label)),
    .formats = c(row_count = function(x, ...) "")
  )
}

#' Layout-creating function to add row total counts
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This works analogously to [rtables::add_colcounts()] but on the rows. This function
#'  is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#' @param alt_counts (`flag`)\cr whether row counts should be taken from `alt_counts_df` (`TRUE`)
#'   or from `df` (`FALSE`). Defaults to `FALSE`.
#'
#' @return A modified layout where the latest row split labels now have the row-wise
#'   total counts (i.e. without column-based subsetting) attached in parentheses.
#'
#' @note Row count values are contained in these row count rows but are not displayed
#'   so that they are not considered zero rows by default when pruning.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   split_rows_by("RACE", split_fun = drop_split_levels) %>%
#'   add_rowcounts() %>%
#'   analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>%
#'   build_table(DM)
#'
#' @export
add_rowcounts <- function(lyt, alt_counts = FALSE) {
  summarize_row_groups(
    lyt,
    cfun = if (alt_counts) c_label_n_alt else c_label_n
  )
}

#' Obtain column indices
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to extract column indices from a `VTableTree` for a given
#' vector of column names.
#'
#' @param table_tree (`VTableTree`)\cr `rtables` table object to extract the indices from.
#' @param col_names (`character`)\cr vector of column names.
#'
#' @return A vector of column indices.
#'
#' @export
h_col_indices <- function(table_tree, col_names) {
  checkmate::assert_class(table_tree, "VTableNodeInfo")
  checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE)
  match(col_names, names(attr(col_info(table_tree), "cextra_args")))
}

#' Labels or names of list elements
#'
#' Helper function for working with nested statistic function results which typically
#' don't have labels but names that we can use.
#'
#' @param x (`list`)\cr a list.
#'
#' @return A `character` vector with the labels or names for the list elements.
#'
#' @examples
#' x <- data.frame(
#'   a = 1:10,
#'   b = rnorm(10)
#' )
#' labels_or_names(x)
#' var_labels(x) <- c(b = "Label for b", a = NA)
#' labels_or_names(x)
#'
#' @export
labels_or_names <- function(x) {
  checkmate::assert_multi_class(x, c("data.frame", "list"))
  labs <- sapply(x, obj_label)
  nams <- rlang::names2(x)
  label_is_null <- sapply(labs, is.null)
  result <- unlist(ifelse(label_is_null, nams, labs))
  result
}

#' Convert to `rtable`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a new generic function to convert objects to `rtable` tables.
#'
#' @param x (`data.frame`)\cr the object which should be converted to an `rtable`.
#' @param ... additional arguments for methods.
#'
#' @return An `rtables` table object. Note that the concrete class will depend on the method used.
#'
#' @export
as.rtable <- function(x, ...) { # nolint
  UseMethod("as.rtable", x)
}

#' @describeIn as.rtable Method for converting a `data.frame` that contains numeric columns to `rtable`.
#'
#' @param format (`string` or `function`)\cr the format which should be used for the columns.
#'
#' @method as.rtable data.frame
#'
#' @examples
#' x <- data.frame(
#'   a = 1:10,
#'   b = rnorm(10)
#' )
#' as.rtable(x)
#'
#' @export
as.rtable.data.frame <- function(x, format = "xx.xx", ...) {
  checkmate::assert_numeric(unlist(x))
  do.call(
    rtable,
    c(
      list(
        header = labels_or_names(x),
        format = format
      ),
      Map(
        function(row, row_name) {
          do.call(
            rrow,
            c(as.list(unname(row)),
              row.name = row_name
            )
          )
        },
        row = as.data.frame(t(x)),
        row_name = rownames(x)
      )
    )
  )
}

#' Split parameters
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant
#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to
#' specific analysis function.
#'
#' @param param (`vector`)\cr the parameter to be split.
#' @param value (`vector`)\cr the value used to split.
#' @param f (`list`)\cr the reference to make the split.
#'
#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`.
#'
#' @examples
#' f <- list(
#'   surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
#'   surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
#' )
#'
#' .stats <- c("pt_at_risk", "rate_diff")
#' h_split_param(.stats, .stats, f = f)
#'
#' # $surv
#' # [1] "pt_at_risk"
#' #
#' # $surv_diff
#' # [1] "rate_diff"
#'
#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx")
#' h_split_param(.formats, names(.formats), f = f)
#'
#' # $surv
#' # pt_at_risk event_free_rate
#' # "xx"           "xxx"
#' #
#' # $surv_diff
#' # NULL
#'
#' @export
h_split_param <- function(param,
                          value,
                          f) {
  lifecycle::deprecate_warn("0.9.8", "h_split_param()")

  y <- lapply(f, function(x) param[value %in% x])
  lapply(y, function(x) if (length(x) == 0) NULL else x)
}

#' Get selected statistics names
#'
#' Helper function to be used for creating `afun`.
#'
#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means
#'   in this context that all default statistics should be used.
#' @param all_stats (`character`)\cr all statistics which can be selected here potentially.
#'
#' @return A `character` vector with the selected statistics.
#'
#' @keywords internal
afun_selected_stats <- function(.stats, all_stats) {
  checkmate::assert_character(.stats, null.ok = TRUE)
  checkmate::assert_character(all_stats)
  if (is.null(.stats)) {
    all_stats
  } else {
    intersect(.stats, all_stats)
  }
}

#' Add variable labels to top left corner in table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper layout-creating function to append the variable labels of a given variables vector
#' from a given dataset in the top left corner. If a variable label is not found then the
#' variable name itself is used instead. Multiple variable labels are concatenated with slashes.
#'
#' @inheritParams argument_convention
#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`.
#' @param indent (`integer(1)`)\cr non-negative number of nested indent space, default to 0L which means no indent.
#'   1L means two spaces indent, 2L means four spaces indent and so on.
#'
#' @return A modified layout with the new variable label(s) added to the top-left material.
#'
#' @note This is not an optimal implementation of course, since we are using here the data set
#'   itself during the layout creation. When we have a more mature `rtables` implementation then
#'   this will also be improved or not necessary anymore.
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   split_rows_by("SEX") %>%
#'   append_varlabels(DM, "SEX") %>%
#'   analyze("AGE", afun = mean) %>%
#'   append_varlabels(DM, "AGE", indent = 1)
#' build_table(lyt, DM)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("SEX") %>%
#'   analyze("AGE", afun = mean) %>%
#'   append_varlabels(DM, c("SEX", "AGE"))
#' build_table(lyt, DM)
#'
#' @export
append_varlabels <- function(lyt, df, vars, indent = 0L) {
  if (checkmate::test_flag(indent)) {
    warning("indent argument is now accepting integers. Boolean indent will be converted to integers.")
    indent <- as.integer(indent)
  }

  checkmate::assert_data_frame(df)
  checkmate::assert_character(vars)
  checkmate::assert_count(indent)

  lab <- formatters::var_labels(df[vars], fill = TRUE)
  lab <- paste(lab, collapse = " / ")
  space <- paste(rep(" ", indent * 2), collapse = "")
  lab <- paste0(space, lab)

  append_topleft(lyt, lab)
}

#' Default string replacement for `NA` values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The default string used to represent `NA` values. This value is used as the default
#' value for the `na_str` argument throughout the `tern` package, and printed in place
#' of `NA` values in output tables. If not specified for each `tern` function by the user
#' via the `na_str` argument, or in the R environment options via [set_default_na_str()],
#' then `NA` is used.
#'
#' @param na_str (`string`)\cr single string value to set in the R environment options as
#'   the default value to replace `NA`s. Use `getOption("tern_default_na_str")` to check the
#'   current value set in the R environment (defaults to `NULL` if not set).
#'
#' @name default_na_str
NULL

#' @describeIn default_na_str Accessor for default `NA` value replacement string.
#'
#' @return
#' * `default_na_str` returns the current value if an R environment option has been set
#'   for `"tern_default_na_str"`, or `NA_character_` otherwise.
#'
#' @examples
#' # Default settings
#' default_na_str()
#' getOption("tern_default_na_str")
#'
#' # Set custom value
#' set_default_na_str("<Missing>")
#'
#' # Settings after value has been set
#' default_na_str()
#' getOption("tern_default_na_str")
#'
#' @export
default_na_str <- function() {
  getOption("tern_default_na_str", default = NA_character_)
}

#' @describeIn default_na_str Setter for default `NA` value replacement string. Sets the
#'   option `"tern_default_na_str"` within the R environment.
#'
#' @return
#' * `set_default_na_str` has no return value.
#'
#' @export
set_default_na_str <- function(na_str) {
  checkmate::assert_character(na_str, len = 1, null.ok = TRUE)
  options("tern_default_na_str" = na_str)
}


#' Utilities to handle extra arguments in analysis functions
#'
#' @description `r lifecycle::badge("stable")`
#' Important additional parameters, useful to modify behavior of analysis and summary
#' functions are listed in [rtables::additional_fun_params]. With these utility functions
#' we can retrieve a curated list of these parameters from the environment, and pass them
#' to the analysis functions with dedicated `...`; notice that the final `s_*` function
#' will get them through argument matching.
#'
#' @param extra_afun_params (`list`)\cr list of additional parameters (`character`) to be
#'   retrieved from the environment. Curated list is present in [rtables::additional_fun_params].
#' @param add_alt_df (`logical`)\cr if `TRUE`, the function will also add `.alt_df` and `.alt_df_row`
#'   parameters.
#'
#' @name util_handling_additional_fun_params
NULL

#' @describeIn util_handling_additional_fun_params Retrieve additional parameters from the environment.
#'
#' @return
#' * `retrieve_extra_afun_params` returns a list of the values of the parameters in the environment.
#'
#' @keywords internal
retrieve_extra_afun_params <- function(extra_afun_params) {
  out <- list()
  for (extra_param in extra_afun_params) {
    out <- c(out, list(get(extra_param, envir = parent.frame())))
  }
  setNames(out, extra_afun_params)
}

#' @describeIn util_handling_additional_fun_params Curated list of additional parameters for
#'   analysis functions. Please check [rtables::additional_fun_params] for precise descriptions.
#'
#' @return
#' * `get_additional_afun_params` returns a list of additional parameters.
#'
#' @keywords internal
get_additional_afun_params <- function(add_alt_df = FALSE) {
  out_list <- list(
    .N_col = integer(),
    .N_total = integer(),
    .N_row = integer(),
    .df_row = data.frame(),
    .var = character(),
    .ref_group = character(),
    .ref_full = vector(mode = "numeric"),
    .in_ref_col = logical(),
    .spl_context = data.frame(),
    .all_col_exprs = vector(mode = "expression"),
    .all_col_counts = vector(mode = "integer")
  )

  if (isTRUE(add_alt_df)) {
    out_list <- c(
      out_list,
      .alt_df_row = data.frame(),
      .alt_df = data.frame()
    )
  }

  out_list
}

#' Apply 1/3 or 1/2 imputation rule to data
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param x_stats (named `list`)\cr a named list of statistics, typically the results of [s_summary()].
#' @param stat (`string`)\cr statistic to return the value/NA level of according to the imputation
#'   rule applied.
#' @param imp_rule (`string`)\cr imputation rule setting. Set to `"1/3"` to implement 1/3 imputation
#'   rule or `"1/2"` to implement 1/2 imputation rule.
#' @param post (`flag`)\cr whether the data corresponds to a post-dose time-point (defaults to `FALSE`).
#'   This parameter is only used when `imp_rule` is set to `"1/3"`.
#' @param avalcat_var (`string`)\cr name of variable that indicates whether a row in `df` corresponds
#'   to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of the above
#'   (defaults to `"AVALCAT1"`). Variable `avalcat_var` must be present in `df`.
#'
#' @return A `list` containing statistic value (`val`) and NA level (`na_str`) that should be displayed
#'   according to the specified imputation rule.
#'
#' @seealso [analyze_vars_in_cols()] where this function can be implemented by setting the `imp_rule`
#'   argument.
#'
#' @examples
#' set.seed(1)
#' df <- data.frame(
#'   AVAL = runif(50, 0, 1),
#'   AVALCAT1 = sample(c(1, "BLQ"), 50, replace = TRUE)
#' )
#' x_stats <- s_summary(df$AVAL)
#' imputation_rule(df, x_stats, "max", "1/3")
#' imputation_rule(df, x_stats, "geom_mean", "1/3")
#' imputation_rule(df, x_stats, "mean", "1/2")
#'
#' @export
imputation_rule <- function(df, x_stats, stat, imp_rule, post = FALSE, avalcat_var = "AVALCAT1") {
  checkmate::assert_choice(avalcat_var, names(df))
  checkmate::assert_choice(imp_rule, c("1/3", "1/2"))
  n_blq <- sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", df[[avalcat_var]]))
  ltr_blq_ratio <- n_blq / max(1, nrow(df))

  # defaults
  val <- x_stats[[stat]]
  na_str <- "NE"

  if (imp_rule == "1/3") {
    if (!post && stat == "geom_mean") val <- NA # 1/3_pre_LT, 1/3_pre_GT
    if (ltr_blq_ratio > 1 / 3) {
      if (stat != "geom_mean") na_str <- "ND" # 1/3_pre_GT, 1/3_post_GT
      if (!post && !stat %in% c("median", "max")) val <- NA # 1/3_pre_GT
      if (post && !stat %in% c("median", "max", "geom_mean")) val <- NA # 1/3_post_GT
    }
  } else if (imp_rule == "1/2") {
    if (ltr_blq_ratio > 1 / 2 && !stat == "max") {
      val <- NA # 1/2_GT
      na_str <- "ND" # 1/2_GT
    }
  }

  list(val = val, na_str = na_str)
}

#' Control functions for Kaplan-Meier plot annotation tables
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Auxiliary functions for controlling arguments for formatting the annotation tables that can be added to plots
#' generated via [g_km()].
#'
#' @param x (`proportion`)\cr x-coordinate for center of annotation table.
#' @param y (`proportion`)\cr y-coordinate for center of annotation table.
#' @param w (`proportion`)\cr relative width of the annotation table.
#' @param h (`proportion`)\cr relative height of the annotation table.
#' @param fill (`flag` or `character`)\cr whether the annotation table should have a background fill color.
#'   Can also be a color code to use as the background fill color. If `TRUE`, color code defaults to `"#00000020"`.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @seealso [g_km()]
#'
#' @name control_annot
NULL

#' @describeIn control_annot Control function for formatting the median survival time annotation table. This annotation
#'   table can be added in [g_km()] by setting `annot_surv_med=TRUE`, and can be configured using the
#'   `control_surv_med_annot()` function by setting it as the `control_annot_surv_med` argument.
#'
#' @examples
#' control_surv_med_annot()
#'
#' @export
control_surv_med_annot <- function(x = 0.8, y = 0.85, w = 0.32, h = 0.16, fill = TRUE) {
  assert_proportion_value(x)
  assert_proportion_value(y)
  assert_proportion_value(w)
  assert_proportion_value(h)

  list(x = x, y = y, w = w, h = h, fill = fill)
}

#' @describeIn control_annot Control function for formatting the Cox-PH annotation table. This annotation table can be
#'   added in [g_km()] by setting `annot_coxph=TRUE`, and can be configured using the `control_coxph_annot()` function
#'   by setting it as the `control_annot_coxph` argument.
#'
#' @param ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the
#'   annotation table. If `FALSE` (default), only comparison groups will be printed in the table labels.
#'
#' @examples
#' control_coxph_annot()
#'
#' @export
control_coxph_annot <- function(x = 0.29, y = 0.51, w = 0.4, h = 0.125, fill = TRUE, ref_lbls = FALSE) {
  checkmate::assert_logical(ref_lbls, any.missing = FALSE)

  res <- c(control_surv_med_annot(x = x, y = y, w = w, h = h), list(ref_lbls = ref_lbls))
  res
}

#' Helper function to calculate x-tick positions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Calculate the positions of ticks on the x-axis. However, if `xticks` already
#' exists it is kept as is. It is based on the same function `ggplot2` relies on,
#' and is required in the graphic and the patient-at-risk annotation table.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#'
#' @return A vector of positions to use for x-axis ticks on a `ggplot` object.
#'
#' @examples
#' library(dplyr)
#' library(survival)
#'
#' data <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot()
#'
#' h_xticks(data)
#' h_xticks(data, xticks = seq(0, 3000, 500))
#' h_xticks(data, xticks = 500)
#' h_xticks(data, xticks = 500, max_time = 6000)
#' h_xticks(data, xticks = c(0, 500), max_time = 300)
#' h_xticks(data, xticks = 500, max_time = 300)
#'
#' @export
h_xticks <- function(data, xticks = NULL, max_time = NULL) {
  if (is.null(xticks)) {
    if (is.null(max_time)) {
      labeling::extended(range(data$time)[1], range(data$time)[2], m = 5)
    } else {
      labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5)
    }
  } else if (checkmate::test_number(xticks)) {
    if (is.null(max_time)) {
      seq(0, max(data$time), xticks)
    } else {
      seq(0, max(data$time, max_time), xticks)
    }
  } else if (is.numeric(xticks)) {
    xticks
  } else {
    stop(
      paste(
        "xticks should be either `NULL`",
        "or a single number (interval between x ticks)",
        "or a numeric vector (position of ticks on the x axis)"
      )
    )
  }
}

#' Helper function for survival estimations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Transform a survival fit to a table with groups in rows characterized by N, median and confidence interval.
#'
#' @inheritParams h_data_plot
#'
#' @return A summary table with statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
#'
#' @examples
#' library(dplyr)
#' library(survival)
#'
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "OS")
#' fit <- survfit(
#'   formula = Surv(AVAL, 1 - CNSR) ~ ARMCD,
#'   data = adtte
#' )
#' h_tbl_median_surv(fit_km = fit)
#'
#' @export
h_tbl_median_surv <- function(fit_km, armval = "All") {
  y <- if (is.null(fit_km$strata)) {
    as.data.frame(t(summary(fit_km)$table), row.names = armval)
  } else {
    tbl <- summary(fit_km)$table
    rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals")
    rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2]
    as.data.frame(tbl)
  }
  conf.int <- summary(fit_km)$conf.int # nolint
  y$records <- round(y$records)
  y$median <- signif(y$median, 4)
  y$`CI` <- paste0(
    "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")"
  )
  stats::setNames(
    y[c("records", "median", "CI")],
    c("N", "Median", f_conf_level(conf.int))
  )
}

#' Helper function for generating a pairwise Cox-PH table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Create a `data.frame` of pairwise stratified or unstratified Cox-PH analysis results.
#'
#' @inheritParams g_km
#' @param annot_coxph_ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the
#'   `annot_coxph` table. If `FALSE` (default), only comparison groups will be printed in `annot_coxph` table labels.
#'
#' @return A `data.frame` containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
#'   and `p-value (log-rank)`.
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#'
#' h_tbl_coxph_pairwise(
#'   df = adtte,
#'   variables = list(tte = "AVAL", is_event = "is_event", arm = "ARM"),
#'   control_coxph_pw = control_coxph(conf_level = 0.9)
#' )
#'
#' @export
h_tbl_coxph_pairwise <- function(df,
                                 variables,
                                 ref_group_coxph = NULL,
                                 control_coxph_pw = control_coxph(),
                                 annot_coxph_ref_lbls = FALSE) {
  if ("strat" %in% names(variables)) {
    warning(
      "Warning: the `strat` element name of the `variables` list argument to `h_tbl_coxph_pairwise() ",
      "was deprecated in tern 0.9.4.\n  ",
      "Please use the name `strata` instead of `strat` in the `variables` argument."
    )
    variables[["strata"]] <- variables[["strat"]]
  }

  assert_df_with_variables(df, variables)
  checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE)
  checkmate::assert_flag(annot_coxph_ref_lbls)

  arm <- variables$arm
  df[[arm]] <- factor(df[[arm]])

  ref_group <- if (!is.null(ref_group_coxph)) ref_group_coxph else levels(df[[variables$arm]])[1]
  comp_group <- setdiff(levels(df[[arm]]), ref_group)

  results <- Map(function(comp) {
    res <- s_coxph_pairwise(
      df = df[df[[arm]] == comp, , drop = FALSE],
      .ref_group = df[df[[arm]] == ref_group, , drop = FALSE],
      .in_ref_col = FALSE,
      .var = variables$tte,
      is_event = variables$is_event,
      strata = variables$strata,
      control = control_coxph_pw
    )
    res_df <- data.frame(
      hr = format(round(res$hr, 2), nsmall = 2),
      hr_ci = paste0(
        "(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ",
        format(round(res$hr_ci[2], 2), nsmall = 2), ")"
      ),
      pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4),
      stringsAsFactors = FALSE
    )
    colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character"))
    row.names(res_df) <- comp
    res_df
  }, comp_group)
  if (annot_coxph_ref_lbls) names(results) <- paste(comp_group, "vs.", ref_group)

  do.call(rbind, results)
}

#' Helper function to tidy survival fit data
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convert the survival fit data into a data frame designed for plotting
#' within `g_km`.
#'
#' This starts from the [broom::tidy()] result, and then:
#'   * Post-processes the `strata` column into a factor.
#'   * Extends each stratum by an additional first row with time 0 and probability 1 so that
#'     downstream plot lines start at those coordinates.
#'   * Adds a `censor` column.
#'   * Filters the rows before `max_time`.
#'
#' @inheritParams g_km
#' @param fit_km (`survfit`)\cr result of [survival::survfit()].
#' @param armval (`string`)\cr used as strata name when treatment arm variable only has one level. Default is `"All"`.
#'
#' @return A `tibble` with columns `time`, `n.risk`, `n.event`, `n.censor`, `estimate`, `std.error`, `conf.high`,
#'   `conf.low`, `strata`, and `censor`.
#'
#' @examples
#' library(dplyr)
#' library(survival)
#'
#' # Test with multiple arms
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot()
#'
#' # Test with single arm
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS", ARMCD == "ARM B") %>%
#'   survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot(armval = "ARM B")
#'
#' @export
h_data_plot <- function(fit_km,
                        armval = "All",
                        max_time = NULL) {
  y <- broom::tidy(fit_km)

  if (!is.null(fit_km$strata)) {
    fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals")
    strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2)
    strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals")
    y$strata <- factor(
      vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2),
      levels = strata_levels
    )
  } else {
    y$strata <- armval
  }

  y_by_strata <- split(y, y$strata)
  y_by_strata_extended <- lapply(
    y_by_strata,
    FUN = function(tbl) {
      first_row <- tbl[1L, ]
      first_row$time <- 0
      first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")])
      first_row$n.event <- first_row$n.censor <- 0
      first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1
      first_row$std.error <- 0
      rbind(
        first_row,
        tbl
      )
    }
  )
  y <- do.call(rbind, y_by_strata_extended)

  y$censor <- ifelse(y$n.censor > 0, y$estimate, NA)
  if (!is.null(max_time)) {
    y <- y[y$time <= max(max_time), ]
  }
  y
}

## Deprecated Functions ----

#' Helper function to create a KM plot
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Draw the Kaplan-Meier plot using `ggplot2`.
#'
#' @inheritParams g_km
#' @param data (`data.frame`)\cr survival data as pre-processed by `h_data_plot`.
#'
#' @return A `ggplot` object.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks,
#'   xlab = "Days",
#'   yval = "Survival",
#'   ylab = "Survival Probability",
#'   title = "Survival"
#' )
#' gg
#' }
#'
#' @export
h_ggkm <- function(data,
                   xticks = NULL,
                   yval = "Survival",
                   censor_show,
                   xlab,
                   ylab,
                   ylim = NULL,
                   title,
                   footnotes = NULL,
                   max_time = NULL,
                   lwd = 1,
                   lty = NULL,
                   pch = 3,
                   size = 2,
                   col = NULL,
                   ci_ribbon = FALSE,
                   ggtheme = nestcolor::theme_nest()) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "h_ggkm()",
    details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`."
  )
  checkmate::assert_numeric(lty, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  if (is.null(ylim)) {
    data_lims <- data
    if (yval == "Failure") data_lims[["estimate"]] <- 1 - data_lims[["estimate"]]
    if (!is.null(max_time)) {
      y_lwr <- min(data_lims[data_lims$time < max_time, ][["estimate"]])
      y_upr <- max(data_lims[data_lims$time < max_time, ][["estimate"]])
    } else {
      y_lwr <- min(data_lims[["estimate"]])
      y_upr <- max(data_lims[["estimate"]])
    }
    ylim <- c(y_lwr, y_upr)
  }
  checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE)

  # change estimates of survival to estimates of failure (1 - survival)
  if (yval == "Failure") {
    data$estimate <- 1 - data$estimate
    data[c("conf.high", "conf.low")] <- list(1 - data$conf.low, 1 - data$conf.high)
    data$censor <- 1 - data$censor
  }

  gg <- {
    ggplot2::ggplot(
      data = data,
      mapping = ggplot2::aes(
        x = .data[["time"]],
        y = .data[["estimate"]],
        ymin = .data[["conf.low"]],
        ymax = .data[["conf.high"]],
        color = .data[["strata"]],
        fill = .data[["strata"]]
      )
    ) +
      ggplot2::geom_hline(yintercept = 0)
  }

  if (ci_ribbon) {
    gg <- gg + ggplot2::geom_ribbon(alpha = .3, lty = 0)
  }

  gg <- if (is.null(lty)) {
    gg +
      ggplot2::geom_step(linewidth = lwd)
  } else if (checkmate::test_number(lty)) {
    gg +
      ggplot2::geom_step(linewidth = lwd, lty = lty)
  } else if (is.numeric(lty)) {
    gg +
      ggplot2::geom_step(mapping = ggplot2::aes(linetype = .data[["strata"]]), linewidth = lwd) +
      ggplot2::scale_linetype_manual(values = lty)
  }

  gg <- gg +
    ggplot2::coord_cartesian(ylim = ylim) +
    ggplot2::labs(x = xlab, y = ylab, title = title, caption = footnotes)

  if (!is.null(col)) {
    gg <- gg +
      ggplot2::scale_color_manual(values = col) +
      ggplot2::scale_fill_manual(values = col)
  }
  if (censor_show) {
    dt <- data[data$n.censor != 0, ]
    dt$censor_lbl <- factor("Censored")

    gg <- gg + ggplot2::geom_point(
      data = dt,
      ggplot2::aes(
        x = .data[["time"]],
        y = .data[["censor"]],
        shape = .data[["censor_lbl"]]
      ),
      size = size,
      show.legend = TRUE,
      inherit.aes = TRUE
    ) +
      ggplot2::scale_shape_manual(name = NULL, values = pch) +
      ggplot2::guides(
        shape = ggplot2::guide_legend(override.aes = list(linetype = NA)),
        fill = ggplot2::guide_legend(override.aes = list(shape = NA))
      )
  }

  if (!is.null(max_time) && !is.null(xticks)) {
    gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))))
  } else if (!is.null(xticks)) {
    if (max(data$time) <= max(xticks)) {
      gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)))
    } else {
      gg <- gg + ggplot2::scale_x_continuous(breaks = xticks)
    }
  } else if (!is.null(max_time)) {
    gg <- gg + ggplot2::scale_x_continuous(limits = c(0, max_time))
  }

  if (!is.null(ggtheme)) {
    gg <- gg + ggtheme
  }

  gg + ggplot2::theme(
    legend.position = "bottom",
    legend.title = ggplot2::element_blank(),
    legend.key.height = unit(0.02, "npc"),
    panel.grid.major.x = ggplot2::element_line(linewidth = 2)
  )
}

#' `ggplot` decomposition
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' The elements composing the `ggplot` are extracted and organized in a `list`.
#'
#' @param gg (`ggplot`)\cr a graphic to decompose.
#'
#' @return A named `list` with elements:
#'   * `panel`: The panel.
#'   * `yaxis`: The y-axis.
#'   * `xaxis`: The x-axis.
#'   * `xlab`: The x-axis label.
#'   * `ylab`: The y-axis label.
#'   * `guide`: The legend.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   yval = "Survival",
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt",
#'   footnotes = "ff"
#' )
#'
#' g_el <- h_decompose_gg(gg)
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "red", fill = "gray85", lwd = 5))
#' grid::grid.draw(g_el$panel)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "royalblue", fill = "gray85", lwd = 5))
#' grid::grid.draw(with(g_el, cbind(ylab, yaxis)))
#' }
#'
#' @export
h_decompose_gg <- function(gg) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "h_decompose_gg()",
    details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`."
  )
  g_el <- ggplot2::ggplotGrob(gg)
  y <- c(
    panel = "panel",
    yaxis = "axis-l",
    xaxis = "axis-b",
    xlab = "xlab-b",
    ylab = "ylab-l",
    guide = "guide"
  )
  lapply(X = y, function(x) gtable::gtable_filter(g_el, x))
}

#' Helper function to prepare a KM layout
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Prepares a (5 rows) x (2 cols) layout for the Kaplan-Meier curve.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#' @param g_el (`list` of `gtable`)\cr list as obtained by `h_decompose_gg()`.
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of
#'   patient at risk matching the main grid of the Kaplan-Meier curve.
#'
#' @return A grid layout.
#'
#' @details The layout corresponds to a grid of two columns and five rows of unequal dimensions. Most of the
#'   dimension are fixed, only the curve is flexible and will accommodate with the remaining free space.
#'   * The left column gets the annotation of the `ggplot` (y-axis) and the names of the strata for the patient
#'     at risk tabulation. The main constraint is about the width of the columns which must allow the writing of
#'     the strata name.
#'   * The right column receive the `ggplot`, the legend, the x-axis and the patient at risk table.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt", footnotes = "ff", yval = "Survival"
#' )
#' g_el <- h_decompose_gg(gg)
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
#' grid.show.layout(lyt)
#' }
#'
#' @export
h_km_layout <- function(data, g_el, title, footnotes, annot_at_risk = TRUE, annot_at_risk_title = TRUE) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "h_km_layout()",
    details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`."
  )
  txtlines <- levels(as.factor(data$strata))
  nlines <- nlevels(as.factor(data$strata))
  col_annot_width <- max(
    c(
      as.numeric(grid::convertX(g_el$yaxis$widths + g_el$ylab$widths, "pt")),
      as.numeric(
        grid::convertX(
          grid::stringWidth(txtlines) + grid::unit(7, "pt"), "pt"
        )
      )
    )
  )

  ttl_row <- as.numeric(!is.null(title))
  foot_row <- as.numeric(!is.null(footnotes))
  no_tbl_ind <- c()
  ht_x <- c()
  ht_units <- c()

  if (ttl_row == 1) {
    no_tbl_ind <- c(no_tbl_ind, TRUE)
    ht_x <- c(ht_x, 2)
    ht_units <- c(ht_units, "lines")
  }

  no_tbl_ind <- c(no_tbl_ind, rep(TRUE, 3), rep(FALSE, 2))
  ht_x <- c(
    ht_x,
    1,
    grid::convertX(with(g_el, xaxis$heights + ylab$widths), "pt") + grid::unit(5, "pt"),
    grid::convertX(g_el$guide$heights, "pt") + grid::unit(2, "pt"),
    1,
    nlines + 0.5,
    grid::convertX(with(g_el, xaxis$heights + ylab$widths), "pt")
  )
  ht_units <- c(
    ht_units,
    "null",
    "pt",
    "pt",
    "lines",
    "lines",
    "pt"
  )

  if (foot_row == 1) {
    no_tbl_ind <- c(no_tbl_ind, TRUE)
    ht_x <- c(ht_x, 1)
    ht_units <- c(ht_units, "lines")
  }
  if (annot_at_risk) {
    no_at_risk_tbl <- rep(TRUE, 6 + ttl_row + foot_row)
    if (!annot_at_risk_title) {
      no_at_risk_tbl[length(no_at_risk_tbl) - 2 - foot_row] <- FALSE
    }
  } else {
    no_at_risk_tbl <- no_tbl_ind
  }

  grid::grid.layout(
    nrow = sum(no_at_risk_tbl), ncol = 2,
    widths = grid::unit(c(col_annot_width, 1), c("pt", "null")),
    heights = grid::unit(
      x = ht_x[no_at_risk_tbl],
      units = ht_units[no_at_risk_tbl]
    )
  )
}

#' Helper function to create patient-at-risk grobs
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Two graphical objects are obtained, one corresponding to row labeling and the second to the table of
#' numbers of patients at risk. If `title = TRUE`, a third object corresponding to the table title is
#' also obtained.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#' @param annot_tbl (`data.frame`)\cr annotation as prepared by [survival::summary.survfit()] which
#'   includes the number of patients at risk at given time points.
#' @param xlim (`numeric(1)`)\cr the maximum value on the x-axis (used to ensure the at risk table aligns with the KM
#'   graph).
#' @param title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk`
#'   table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`.
#'
#' @return A named `list` of two `gTree` objects if `title = FALSE`: `at_risk` and `label`, or three
#'   `gTree` objects if `title = TRUE`: `at_risk`, `label`, and `title`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#'
#' data_plot <- h_data_plot(fit_km = fit_km)
#'
#' xticks <- h_xticks(data = data_plot)
#'
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt", footnotes = "ff", yval = "Survival"
#' )
#'
#' # The annotation table reports the patient at risk for a given strata and
#' # times (`xticks`).
#' annot_tbl <- summary(fit_km, times = xticks)
#' if (is.null(fit_km$strata)) {
#'   annot_tbl <- with(annot_tbl, data.frame(n.risk = n.risk, time = time, strata = "All"))
#' } else {
#'   strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
#'   levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
#'   annot_tbl <- data.frame(
#'     n.risk = annot_tbl$n.risk,
#'     time = annot_tbl$time,
#'     strata = annot_tbl$strata
#'   )
#' }
#'
#' # The annotation table is transformed into a grob.
#' tbl <- h_grob_tbl_at_risk(data = data_plot, annot_tbl = annot_tbl, xlim = max(xticks))
#'
#' # For the representation, the layout is estimated for which the decomposition
#' # of the graphic element is necessary.
#' g_el <- h_decompose_gg(gg)
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
#'
#' grid::grid.newpage()
#' pushViewport(viewport(layout = lyt, height = .95, width = .95))
#' grid.rect(gp = grid::gpar(lty = 1, col = "purple", fill = "gray85", lwd = 1))
#' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 2))
#' grid.rect(gp = grid::gpar(lty = 1, col = "orange", fill = "gray85", lwd = 1))
#' grid::grid.draw(tbl$at_risk)
#' popViewport()
#' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 1))
#' grid.rect(gp = grid::gpar(lty = 1, col = "green3", fill = "gray85", lwd = 1))
#' grid::grid.draw(tbl$label)
#' }
#'
#' @export
h_grob_tbl_at_risk <- function(data, annot_tbl, xlim, title = TRUE) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "h_grob_tbl_at_risk()",
    details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`."
  )
  txtlines <- levels(as.factor(data$strata))
  nlines <- nlevels(as.factor(data$strata))
  y_int <- annot_tbl$time[2] - annot_tbl$time[1]
  annot_tbl <- expand.grid(
    time = seq(0, xlim, y_int),
    strata = unique(annot_tbl$strata)
  ) %>% dplyr::left_join(annot_tbl, by = c("time", "strata"))
  annot_tbl[is.na(annot_tbl)] <- 0
  y_str_unit <- as.numeric(annot_tbl$strata)
  vp_table <- grid::plotViewport(margins = grid::unit(c(0, 0, 0, 0), "lines"))
  if (title) {
    gb_table_title <- grid::gList(
      grid::textGrob(
        label = "Patients at Risk:",
        x = 1,
        y = grid::unit(0.2, "native"),
        gp = grid::gpar(fontface = "bold", fontsize = 10)
      )
    )
  }
  gb_table_left_annot <- grid::gList(
    grid::rectGrob(
      x = 0, y = grid::unit(c(1:nlines) - 1, "lines"),
      gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
      height = grid::unit(1, "lines"), just = "bottom", hjust = 0
    ),
    grid::textGrob(
      label = unique(annot_tbl$strata),
      x = 0.5,
      y = grid::unit(
        (max(unique(y_str_unit)) - unique(y_str_unit)) + 0.75,
        "native"
      ),
      gp = grid::gpar(fontface = "italic", fontsize = 10)
    )
  )
  gb_patient_at_risk <- grid::gList(
    grid::rectGrob(
      x = 0, y = grid::unit(c(1:nlines) - 1, "lines"),
      gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
      height = grid::unit(1, "lines"), just = "bottom", hjust = 0
    ),
    grid::textGrob(
      label = annot_tbl$n.risk,
      x = grid::unit(annot_tbl$time, "native"),
      y = grid::unit(
        (max(y_str_unit) - y_str_unit) + .5,
        "line"
      ) # maybe native
    )
  )

  ret <- list(
    at_risk = grid::gList(
      grid::gTree(
        vp = vp_table,
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = c(0, xlim) + c(-0.05, 0.05) * xlim,
              yscale = c(0, nlines + 1),
              extension = c(0.05, 0)
            ),
            children = grid::gList(gb_patient_at_risk)
          )
        )
      )
    ),
    label = grid::gList(
      grid::gTree(
        vp = grid::viewport(width = max(grid::stringWidth(txtlines))),
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = 0:1,
              yscale = c(0, nlines + 1),
              extension = c(0.0, 0)
            ),
            children = grid::gList(gb_table_left_annot)
          )
        )
      )
    )
  )

  if (title) {
    ret[["title"]] <- grid::gList(
      grid::gTree(
        vp = grid::viewport(width = max(grid::stringWidth(txtlines))),
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = 0:1,
              yscale = c(0, 1),
              extension = c(0, 0)
            ),
            children = grid::gList(gb_table_title)
          )
        )
      )
    )
  }

  ret
}

#' Helper function to create survival estimation grobs
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' The survival fit is transformed in a grob containing a table with groups in
#' rows characterized by N, median and 95% confidence interval.
#'
#' @inheritParams g_km
#' @inheritParams h_data_plot
#' @param ttheme (`list`)\cr see [gridExtra::ttheme_default()].
#' @param x (`proportion`)\cr a value between 0 and 1 specifying x-location.
#' @param y (`proportion`)\cr a value between 0 and 1 specifying y-location.
#' @param width (`grid::unit`)\cr width (as a unit) to use when printing the grob.
#'
#' @return A `grob` of a table containing statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_grob_median_surv() %>%
#'   grid::grid.draw()
#' }
#'
#' @export
h_grob_median_surv <- function(fit_km,
                               armval = "All",
                               x = 0.9,
                               y = 0.9,
                               width = grid::unit(0.3, "npc"),
                               ttheme = gridExtra::ttheme_default()) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "h_grob_median_surv()",
    details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`."
  )
  data <- h_tbl_median_surv(fit_km, armval = armval)

  width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in")
  height <- width * (nrow(data) + 1) / 12

  w <- paste(" ", c(
    rownames(data)[which.max(nchar(rownames(data)))],
    sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))])
  ))
  w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE)

  w_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strwidth(w[4], units = "in")
  })
  f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]])

  h_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strheight(grid::stringHeight("X"), units = "in")
  })
  f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))])

  if (ttheme$core$fg_params$fontsize == 12) {
    ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h)
  }

  gt <- gridExtra::tableGrob(
    d = data,
    theme = ttheme
  )
  gt$widths <- ((w_unit / sum(w_unit)) * width)
  gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt))

  vp <- grid::viewport(
    x = grid::unit(x, "npc") + grid::unit(1, "lines"),
    y = grid::unit(y, "npc") + grid::unit(1.5, "lines"),
    height = height,
    width = width,
    just = c("right", "top")
  )

  grid::gList(
    grid::gTree(
      vp = vp,
      children = grid::gList(gt)
    )
  )
}

#' Helper function to create grid object with y-axis annotation
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Build the y-axis annotation from a decomposed `ggplot`.
#'
#' @param ylab (`gtable`)\cr the y-lab as a graphical object derived from a `ggplot`.
#' @param yaxis (`gtable`)\cr the y-axis as a graphical object derived from a `ggplot`.
#'
#' @return A `gTree` object containing the y-axis annotation from a `ggplot`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(formula = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "title", footnotes = "footnotes", yval = "Survival"
#' )
#'
#' g_el <- h_decompose_gg(gg)
#'
#' grid::grid.newpage()
#' pvp <- grid::plotViewport(margins = c(5, 4, 2, 20))
#' pushViewport(pvp)
#' grid::grid.draw(h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis))
#' grid.rect(gp = grid::gpar(lty = 1, col = "gray35", fill = NA))
#' }
#'
#' @export
h_grob_y_annot <- function(ylab, yaxis) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "h_grob_y_annot()",
    details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`."
  )
  grid::gList(
    grid::gTree(
      vp = grid::viewport(
        width = grid::convertX(yaxis$widths + ylab$widths, "pt"),
        x = grid::unit(1, "npc"),
        just = "right"
      ),
      children = grid::gList(cbind(ylab, yaxis))
    )
  )
}

#' Helper function to create Cox-PH grobs
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Grob of `rtable` output from [h_tbl_coxph_pairwise()]
#'
#' @inheritParams h_grob_median_surv
#' @param ... arguments to pass to [h_tbl_coxph_pairwise()].
#' @param x (`proportion`)\cr a value between 0 and 1 specifying x-location.
#' @param y (`proportion`)\cr a value between 0 and 1 specifying y-location.
#' @param width (`grid::unit`)\cr width (as a unit) to use when printing the grob.
#'
#' @return A `grob` of a table containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
#'   and `p-value (log-rank)`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
#' data <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' tbl_grob <- h_grob_coxph(
#'   df = data,
#'   variables = list(tte = "AVAL", is_event = "is_event", arm = "ARMCD"),
#'   control_coxph_pw = control_coxph(conf_level = 0.9), x = 0.5, y = 0.5
#' )
#' grid::grid.draw(tbl_grob)
#' }
#'
#' @export
h_grob_coxph <- function(...,
                         x = 0,
                         y = 0,
                         width = grid::unit(0.4, "npc"),
                         ttheme = gridExtra::ttheme_default(
                           padding = grid::unit(c(1, .5), "lines"),
                           core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5))
                         )) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "h_grob_coxph()",
    details = "`g_km` now generates `ggplot` objects. This function is no longer used within `tern`."
  )
  data <- h_tbl_coxph_pairwise(...)

  width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in")
  height <- width * (nrow(data) + 1) / 12

  w <- paste("    ", c(
    rownames(data)[which.max(nchar(rownames(data)))],
    sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))])
  ))
  w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE)

  w_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strwidth(w[4], units = "in")
  })
  f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]])

  h_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strheight(grid::stringHeight("X"), units = "in")
  })
  f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))])

  if (ttheme$core$fg_params$fontsize == 12) {
    ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h)
  }

  tryCatch(
    expr = {
      gt <- gridExtra::tableGrob(
        d = data,
        theme = ttheme
      ) # ERROR 'data' must be of a vector type, was 'NULL'
      gt$widths <- ((w_unit / sum(w_unit)) * width)
      gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt))
      vp <- grid::viewport(
        x = grid::unit(x, "npc") + grid::unit(1, "lines"),
        y = grid::unit(y, "npc") + grid::unit(1.5, "lines"),
        height = height,
        width = width,
        just = c("left", "bottom")
      )
      grid::gList(
        grid::gTree(
          vp = vp,
          children = grid::gList(gt)
        )
      )
    },
    error = function(w) {
      message(paste(
        "Warning: Cox table will not be displayed as there is",
        "not any level to be compared in the arm variable."
      ))
      return(
        grid::gList(
          grid::gTree(
            vp = NULL,
            children = NULL
          )
        )
      )
    }
  )
}

#' Confidence interval for mean
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
#' geometric mean. It can be used as a `ggplot` helper function for plotting.
#'
#' @inheritParams argument_convention
#' @param n_min (`numeric(1)`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
#' @param gg_helper (`flag`)\cr whether output should be aligned for use with `ggplot`s.
#' @param geom_mean (`flag`)\cr whether the geometric mean should be calculated.
#'
#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
#'
#' @examples
#' stat_mean_ci(sample(10), gg_helper = FALSE)
#'
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
#'   ggplot2::geom_point()
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   geom = "errorbar"
#' )
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   fun.args = list(conf_level = 0.5),
#'   geom = "errorbar"
#' )
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   fun.args = list(conf_level = 0.5, geom_mean = TRUE),
#'   geom = "errorbar"
#' )
#'
#' @export
stat_mean_ci <- function(x,
                         conf_level = 0.95,
                         na.rm = TRUE, # nolint
                         n_min = 2,
                         gg_helper = TRUE,
                         geom_mean = FALSE) {
  if (na.rm) {
    x <- stats::na.omit(x)
  }
  n <- length(x)

  if (!geom_mean) {
    m <- mean(x)
  } else {
    negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0)
    if (negative_values_exist) {
      m <- NA_real_
    } else {
      x <- log(x)
      m <- mean(x)
    }
  }

  if (n < n_min || is.na(m)) {
    ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_)
  } else {
    hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n)
    ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci)
    if (geom_mean) {
      ci <- exp(ci)
    }
  }

  if (gg_helper) {
    m <- ifelse(is.na(m), NA_real_, m)
    ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]])
  }

  return(ci)
}

#' Confidence interval for median
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
#' function for plotting.
#'
#' @inheritParams argument_convention
#' @param gg_helper (`flag`)\cr whether output should be aligned for use with `ggplot`s.
#'
#' @details This function was adapted from `DescTools/versions/0.99.35/source`
#'
#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
#'
#' @examples
#' stat_median_ci(sample(10), gg_helper = FALSE)
#'
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
#'   ggplot2::geom_point()
#' p + ggplot2::stat_summary(
#'   fun.data = stat_median_ci,
#'   geom = "errorbar"
#' )
#'
#' @export
stat_median_ci <- function(x,
                           conf_level = 0.95,
                           na.rm = TRUE, # nolint
                           gg_helper = TRUE) {
  x <- unname(x)
  if (na.rm) {
    x <- x[!is.na(x)]
  }
  n <- length(x)
  med <- stats::median(x)

  k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE)

  # k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
  if (k == 0 || is.na(med)) {
    ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_)
    empir_conf_level <- NA_real_
  } else {
    x_sort <- sort(x)
    ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1])
    empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5)
  }

  if (gg_helper) {
    ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]])
  }

  attr(ci, "conf_level") <- empir_conf_level

  return(ci)
}

#' p-Value of the mean
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the two-sided p-value of the mean.
#'
#' @inheritParams argument_convention
#' @param n_min (`numeric(1)`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
#' @param test_mean (`numeric(1)`)\cr mean value to test under the null hypothesis.
#'
#' @return A p-value.
#'
#' @examples
#' stat_mean_pval(sample(10))
#'
#' stat_mean_pval(rnorm(10), test_mean = 0.5)
#'
#' @export
stat_mean_pval <- function(x,
                           na.rm = TRUE, # nolint
                           n_min = 2,
                           test_mean = 0) {
  if (na.rm) {
    x <- stats::na.omit(x)
  }
  n <- length(x)

  x_mean <- mean(x)
  x_sd <- stats::sd(x)

  if (n < n_min) {
    pv <- c(p_value = NA_real_)
  } else {
    x_se <- stats::sd(x) / sqrt(n)
    ttest <- (x_mean - test_mean) / x_se
    pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1))
  }

  return(pv)
}

#' Proportion difference and confidence interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for calculating the proportion (or risk) difference and confidence interval between arm
#' X (reference group) and arm Y. Risk difference is calculated by subtracting cumulative incidence
#' in arm Y from cumulative incidence in arm X.
#'
#' @inheritParams argument_convention
#' @param x (`list` of `integer`)\cr list of number of occurrences in arm X (reference group).
#' @param y (`list` of `integer`)\cr list of number of occurrences in arm Y. Must be of equal length to `x`.
#' @param N_x (`numeric(1)`)\cr total number of records in arm X.
#' @param N_y (`numeric(1)`)\cr total number of records in arm Y.
#' @param list_names (`character`)\cr names of each variable/level corresponding to pair of proportions in
#'   `x` and `y`. Must be of equal length to `x` and `y`.
#' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`.
#'
#' @return List of proportion differences and CIs corresponding to each pair of number of occurrences in `x` and
#'   `y`. Each list element consists of 3 statistics: proportion difference, CI lower bound, and CI upper bound.
#'
#' @seealso Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()]
#'   with `riskdiff` argument is set to `TRUE` in subsequent analyze functions, adds a column containing
#'   proportion (risk) difference to an `rtables` layout.
#'
#' @examples
#' stat_propdiff_ci(
#'   x = list(0.375), y = list(0.01), N_x = 5, N_y = 5, list_names = "x", conf_level = 0.9
#' )
#'
#' stat_propdiff_ci(
#'   x = list(0.5, 0.75, 1), y = list(0.25, 0.05, 0.5), N_x = 10, N_y = 20, pct = FALSE
#' )
#'
#' @export
stat_propdiff_ci <- function(x,
                             y,
                             N_x, # nolint
                             N_y, # nolint
                             list_names = NULL,
                             conf_level = 0.95,
                             pct = TRUE) {
  checkmate::assert_list(x, types = "numeric")
  checkmate::assert_list(y, types = "numeric", len = length(x))
  checkmate::assert_character(list_names, len = length(x), null.ok = TRUE)
  rd_list <- lapply(seq_along(x), function(i) {
    p_x <- x[[i]] / N_x
    p_y <- y[[i]] / N_y
    rd_ci <- p_x - p_y + c(-1, 1) * stats::qnorm((1 + conf_level) / 2) *
      sqrt(p_x * (1 - p_x) / N_x + p_y * (1 - p_y) / N_y)
    c(p_x - p_y, rd_ci) * ifelse(pct, 100, 1)
  })
  names(rd_list) <- list_names
  rd_list
}

#' Analyze numeric variables in columns
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' The layout-creating function [analyze_vars_in_cols()] creates a layout element to generate a column-wise
#' analysis table.
#'
#' This function sets the analysis methods as column labels and is a wrapper for [rtables::analyze_colvars()].
#' It was designed principally for PK tables.
#'
#' @inheritParams argument_convention
#' @inheritParams rtables::analyze_colvars
#' @param imp_rule (`string` or `NULL`)\cr imputation rule setting. Defaults to `NULL` for no imputation rule. Can
#'   also be `"1/3"` to implement 1/3 imputation rule or `"1/2"` to implement 1/2 imputation rule. In order
#'   to use an imputation rule, the `avalcat_var` argument must be specified. See [imputation_rule()]
#'   for more details on imputation.
#' @param avalcat_var (`string`)\cr if `imp_rule` is not `NULL`, name of variable that indicates whether a
#'   row in the data corresponds to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of
#'   the above (defaults to `"AVALCAT1"`). Variable must be present in the data and should match the variable
#'   used to calculate the `n_blq` statistic (if included in `.stats`).
#' @param cache (`flag`)\cr whether to store computed values in a temporary caching environment. This will
#'   speed up calculations in large tables, but should be set to `FALSE` if the same `rtable` layout is
#'   used for multiple tables with different data. Defaults to `FALSE`.
#' @param row_labels (`character`)\cr as this function works in columns space, usually `.labels`
#'   character vector applies on the column space. You can change the row labels by defining this
#'   parameter to a named character vector with names corresponding to the split values. It defaults
#'   to `NULL` and if it contains only one `string`, it will duplicate that as a row label.
#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current
#'   label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr`
#'   to define row labels. This behavior is not supported as we never need to overload row labels.
#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns.
#'   This option allows you to add multiple instances of this functions, also in a nested fashion,
#'   without adding more splits. This split must happen only one time on a single layout.
#'
#' @return
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
#' in columns, and add it to the table layout.
#'
#' @note
#' * This is an experimental implementation of [rtables::summarize_row_groups()] and [rtables::analyze_colvars()]
#'   that may be subjected to changes as `rtables` extends its support to more complex analysis pipelines in the
#'   column space. We encourage users to read the examples carefully and file issues for different use cases.
#' * In this function, `labelstr` behaves atypically. If `labelstr = NULL` (the default), row labels are assigned
#'   automatically as the split values if `do_summarize_row_groups = FALSE` (the default), and as the group label
#'   if `do_summarize_row_groups = TRUE`.
#'
#' @seealso [analyze_vars()], [rtables::analyze_colvars()].
#'
#' @examples
#' library(dplyr)
#'
#' # Data preparation
#' adpp <- tern_ex_adpp %>% h_pkparam_sort()
#'
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "STRATA1", label_pos = "topleft") %>%
#'   split_rows_by(
#'     var = "SEX",
#'     label_pos = "topleft",
#'     child_labels = "hidden"
#'   ) %>% # Removes duplicated labels
#'   analyze_vars_in_cols(vars = "AGE")
#' result <- build_table(lyt = lyt, df = adpp)
#' result
#'
#' # By selecting just some statistics and ad-hoc labels
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "ARM", label_pos = "topleft") %>%
#'   split_rows_by(
#'     var = "SEX",
#'     label_pos = "topleft",
#'     child_labels = "hidden",
#'     split_fun = drop_split_levels
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AGE",
#'     .stats = c("n", "cv", "geom_mean"),
#'     .labels = c(
#'       n = "aN",
#'       cv = "aCV",
#'       geom_mean = "aGeomMean"
#'     )
#'   )
#' result <- build_table(lyt = lyt, df = adpp)
#' result
#'
#' # Changing row labels
#' lyt <- basic_table() %>%
#'   analyze_vars_in_cols(
#'     vars = "AGE",
#'     row_labels = "some custom label"
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' # Pharmacokinetic parameters
#' lyt <- basic_table() %>%
#'   split_rows_by(
#'     var = "TLG_DISPLAY",
#'     split_label = "PK Parameter",
#'     label_pos = "topleft",
#'     child_labels = "hidden"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL"
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' # Multiple calls (summarize label and analyze underneath)
#' lyt <- basic_table() %>%
#'   split_rows_by(
#'     var = "TLG_DISPLAY",
#'     split_label = "PK Parameter",
#'     label_pos = "topleft"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL",
#'     do_summarize_row_groups = TRUE # does a summarize level
#'   ) %>%
#'   split_rows_by("SEX",
#'     child_labels = "hidden",
#'     label_pos = "topleft"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL",
#'     split_col_vars = FALSE # avoids re-splitting the columns
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' @export
analyze_vars_in_cols <- function(lyt,
                                 vars,
                                 ...,
                                 .stats = c(
                                   "n",
                                   "mean",
                                   "sd",
                                   "se",
                                   "cv",
                                   "geom_cv"
                                 ),
                                 .labels = c(
                                   n = "n",
                                   mean = "Mean",
                                   sd = "SD",
                                   se = "SE",
                                   cv = "CV (%)",
                                   geom_cv = "CV % Geometric Mean"
                                 ),
                                 row_labels = NULL,
                                 do_summarize_row_groups = FALSE,
                                 split_col_vars = TRUE,
                                 imp_rule = NULL,
                                 avalcat_var = "AVALCAT1",
                                 cache = FALSE,
                                 .indent_mods = NULL,
                                 na_str = default_na_str(),
                                 nested = TRUE,
                                 .formats = NULL,
                                 .aligns = NULL) {
  extra_args <- list(...)

  checkmate::assert_string(na_str, na.ok = TRUE, null.ok = TRUE)
  checkmate::assert_character(row_labels, null.ok = TRUE)
  checkmate::assert_int(.indent_mods, null.ok = TRUE)
  checkmate::assert_flag(nested)
  checkmate::assert_flag(split_col_vars)
  checkmate::assert_flag(do_summarize_row_groups)

  # Filtering
  met_grps <- paste0("analyze_vars", c("_numeric", "_counts"))
  .stats <- get_stats(met_grps, stats_in = .stats)
  formats_v <- get_formats_from_stats(stats = .stats, formats_in = .formats)
  labels_v <- get_labels_from_stats(stats = .stats, labels_in = .labels) %>% .unlist_keep_nulls()
  if ("control" %in% names(extra_args)) labels_v <- labels_v %>% labels_use_control(extra_args[["control"]], .labels)

  # Check for vars in the case that one or more are used
  if (length(vars) == 1) {
    vars <- rep(vars, length(.stats))
  } else if (length(vars) != length(.stats)) {
    stop(
      "Analyzed variables (vars) does not have the same ",
      "number of elements of specified statistics (.stats)."
    )
  }

  if (split_col_vars) {
    # Checking there is not a previous identical column split
    clyt <- tail(clayout(lyt), 1)[[1]]

    dummy_lyt <- split_cols_by_multivar(
      lyt = basic_table(),
      vars = vars,
      varlabels = labels_v
    )

    if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) {
      stop(
        "Column split called again with the same values. ",
        "This can create many unwanted columns. Please consider adding ",
        "split_col_vars = FALSE to the last call of ",
        deparse(sys.calls()[[sys.nframe() - 1]]), "."
      )
    }

    # Main col split
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = vars,
      varlabels = labels_v
    )
  }

  env <- new.env() # create caching environment

  if (do_summarize_row_groups) {
    if (length(unique(vars)) > 1) {
      stop("When using do_summarize_row_groups only one label level var should be inserted.")
    }

    # Function list for do_summarize_row_groups. Slightly different handling of labels
    cfun_list <- Map(
      function(stat, use_cache, cache_env) {
        function(u, .spl_context, labelstr, .df_row, ...) {
          # Statistic
          var_row_val <- paste(
            gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
            paste(.spl_context$value, collapse = "_"),
            sep = "_"
          )
          if (use_cache) {
            if (is.null(cache_env[[var_row_val]])) {
              cache_env[[var_row_val]] <- s_summary(u, ...)
            }
            x_stats <- cache_env[[var_row_val]]
          } else {
            x_stats <- s_summary(u, ...)
          }

          if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
            res <- x_stats[[stat]]
          } else {
            timept <- as.numeric(gsub(".*?([0-9\\.]+).*", "\\1", tail(.spl_context$value, 1)))
            res_imp <- imputation_rule(
              .df_row, x_stats, stat,
              imp_rule = imp_rule,
              post = grepl("Predose", tail(.spl_context$value, 1)) || timept > 0,
              avalcat_var = avalcat_var
            )
            res <- res_imp[["val"]]
            na_str <- res_imp[["na_str"]]
          }

          # Label check and replacement
          if (length(row_labels) > 1) {
            if (!(labelstr %in% names(row_labels))) {
              stop(
                "Replacing the labels in do_summarize_row_groups needs a named vector",
                "that contains the split values. In the current split variable ",
                .spl_context$split[nrow(.spl_context)],
                " the labelstr value (split value by default) ", labelstr, " is not in",
                " row_labels names: ", names(row_labels)
              )
            }
            lbl <- unlist(row_labels[labelstr])
          } else {
            lbl <- labelstr
          }

          # Cell creation
          rcell(res,
            label = lbl,
            format = formats_v[names(formats_v) == stat][[1]],
            format_na_str = na_str,
            indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods),
            align = .aligns
          )
        }
      },
      stat = .stats,
      use_cache = cache,
      cache_env = replicate(length(.stats), env)
    )

    # Main call to rtables
    summarize_row_groups(
      lyt = lyt,
      var = unique(vars),
      cfun = cfun_list,
      na_str = na_str,
      extra_args = extra_args
    )
  } else {
    # Function list for analyze_colvars
    afun_list <- Map(
      function(stat, use_cache, cache_env) {
        function(u, .spl_context, .df_row, ...) {
          # Main statistics
          var_row_val <- paste(
            gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
            paste(.spl_context$value, collapse = "_"),
            sep = "_"
          )
          if (use_cache) {
            if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...)
            x_stats <- cache_env[[var_row_val]]
          } else {
            x_stats <- s_summary(u, ...)
          }

          if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
            res <- x_stats[[stat]]
          } else {
            timept <- as.numeric(gsub(".*?([0-9\\.]+).*", "\\1", tail(.spl_context$value, 1)))
            res_imp <- imputation_rule(
              .df_row, x_stats, stat,
              imp_rule = imp_rule,
              post = grepl("Predose", tail(.spl_context$value, 1)) || timept > 0,
              avalcat_var = avalcat_var
            )
            res <- res_imp[["val"]]
            na_str <- res_imp[["na_str"]]
          }

          if (is.list(res)) {
            if (length(res) > 1) {
              stop("The analyzed column produced more than one category of results.")
            } else {
              res <- unlist(res)
            }
          }

          # Label from context
          label_from_context <- .spl_context$value[nrow(.spl_context)]

          # Label switcher
          if (is.null(row_labels)) {
            lbl <- label_from_context
          } else {
            if (length(row_labels) > 1) {
              if (!(label_from_context %in% names(row_labels))) {
                stop(
                  "Replacing the labels in do_summarize_row_groups needs a named vector",
                  "that contains the split values. In the current split variable ",
                  .spl_context$split[nrow(.spl_context)],
                  " the split value ", label_from_context, " is not in",
                  " row_labels names: ", names(row_labels)
                )
              }
              lbl <- unlist(row_labels[label_from_context])
            } else {
              lbl <- row_labels
            }
          }

          # Cell creation
          rcell(res,
            label = lbl,
            format = formats_v[names(formats_v) == stat][[1]],
            format_na_str = na_str,
            indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods),
            align = .aligns
          )
        }
      },
      stat = .stats,
      use_cache = cache,
      cache_env = replicate(length(.stats), env)
    )

    # Main call to rtables
    analyze_colvars(lyt,
      afun = afun_list,
      na_str = na_str,
      nested = nested,
      extra_args = extra_args
    )
  }
}

# Helper function
get_last_col_split <- function(lyt) {
  tail(tail(clayout(lyt), 1)[[1]], 1)[[1]]
}

#' Kaplan-Meier plot
#'
#' @description `r lifecycle::badge("stable")`
#'
#' From a survival model, a graphic is rendered along with tabulated annotation
#' including the number of patient at risk at given time and the median survival
#' per group.
#'
#' @inheritParams argument_convention
#' @param variables (named `list`)\cr variable names. Details are:
#'   * `tte` (`numeric`)\cr variable indicating time-to-event duration values.
#'   * `is_event` (`logical`)\cr event variable. `TRUE` if event, `FALSE` if time to event is censored.
#'   * `arm` (`factor`)\cr the treatment group variable.
#'   * `strata` (`character` or `NULL`)\cr variable names indicating stratification factors.
#' @param control_surv (`list`)\cr parameters for comparison details, specified by using
#'   the helper function [control_surv_timepoint()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
#'   * `conf_type` (`string`)\cr `"plain"` (default), `"log"`, `"log-log"` for confidence interval type,
#'     see more in [survival::survfit()]. Note that the option "none" is no longer supported.
#' @param col (`character`)\cr lines colors. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param lty (`numeric`)\cr line type. If a vector is given, its length should be equal to the number of strata from
#'   [survival::survfit()].
#' @param lwd (`numeric`)\cr line width. If a vector is given, its length should be equal to the number of strata from
#'   [survival::survfit()].
#' @param censor_show (`flag`)\cr whether to show censored observations.
#' @param pch (`string`)\cr name of symbol or character to use as point symbol to indicate censored cases.
#' @param size (`numeric(1)`)\cr size of censored point symbols.
#' @param max_time (`numeric(1)`)\cr maximum value to show on x-axis. Only data values less than or up to
#'   this threshold value will be plotted (defaults to `NULL`).
#' @param xticks (`numeric` or `NULL`)\cr numeric vector of tick positions or a single number with spacing
#'   between ticks on the x-axis. If `NULL` (default), [labeling::extended()] is used to determine
#'   optimal tick positions on the x-axis.
#' @param xlab (`string`)\cr x-axis label.
#' @param yval (`string`)\cr type of plot, to be plotted on the y-axis. Options are `Survival` (default) and `Failure`
#'   probability.
#' @param ylab (`string`)\cr y-axis label.
#' @param title (`string`)\cr plot title.
#' @param footnotes (`string`)\cr plot footnotes.
#' @param font_size (`numeric(1)`)\cr font size to use for all text.
#' @param ci_ribbon (`flag`)\cr whether the confidence interval should be drawn around the Kaplan-Meier curve.
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of patient at risk
#'   matching the main grid of the Kaplan-Meier curve.
#' @param annot_at_risk_title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk`
#'   table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`.
#' @param annot_surv_med (`flag`)\cr compute and add the annotation table on the Kaplan-Meier curve estimating the
#'   median survival time per group.
#' @param annot_coxph (`flag`)\cr whether to add the annotation table from a [survival::coxph()] model.
#' @param annot_stats (`string` or `NULL`)\cr statistics annotations to add to the plot. Options are
#'   `median` (median survival follow-up time) and `min` (minimum survival follow-up time).
#' @param annot_stats_vlines (`flag`)\cr add vertical lines corresponding to each of the statistics
#'   specified by `annot_stats`. If `annot_stats` is `NULL` no lines will be added.
#' @param control_coxph_pw (`list`)\cr parameters for comparison details, specified using the helper function
#'   [control_coxph()]. Some possible parameter options are:
#'   * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1.
#'     Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
#'   * `ties` (`string`)\cr method for tie handling. Default is `"efron"`,
#'     can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()]
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
#' @param ref_group_coxph (`string` or `NULL`)\cr level of arm variable to use as reference group in calculations for
#'   `annot_coxph` table. If `NULL` (default), uses the first level of the arm variable.
#' @param control_annot_surv_med (`list`)\cr parameters to control the position and size of the annotation table added
#'   to the plot when `annot_surv_med = TRUE`, specified using the [control_surv_med_annot()] function. Parameter
#'   options are: `x`, `y`, `w`, `h`, and `fill`. See [control_surv_med_annot()] for details.
#' @param control_annot_coxph (`list`)\cr parameters to control the position and size of the annotation table added
#'   to the plot when `annot_coxph = TRUE`, specified using the [control_coxph_annot()] function. Parameter
#'   options are: `x`, `y`, `w`, `h`, `fill`, and `ref_lbls`. See [control_coxph_annot()] for details.
#' @param legend_pos (`numeric(2)` or `NULL`)\cr vector containing x- and y-coordinates, respectively, for the legend
#'   position relative to the KM plot area. If `NULL` (default), the legend is positioned in the bottom right corner of
#'   the plot, or the middle right of the plot if needed to prevent overlapping.
#' @param rel_height_plot (`proportion`)\cr proportion of total figure height to allocate to the Kaplan-Meier plot.
#'   Relative height of patients at risk table is then `1 - rel_height_plot`. If `annot_at_risk = FALSE` or
#'   `as_list = TRUE`, this parameter is ignored.
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to format the Kaplan-Meier plot.
#' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list when `annot_at_risk = TRUE`.
#'   If `TRUE`, a named list with two elements, `plot` and `table`, will be returned. If `FALSE` (default) the patients
#'   at risk table is printed below the plot via [cowplot::plot_grid()].
#' @param draw `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
#' @param newpage `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
#' @param gp `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
#' @param vp `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
#' @param name `r lifecycle::badge("deprecated")` This function no longer generates `grob` objects.
#' @param annot_coxph_ref_lbls `r lifecycle::badge("deprecated")` Please use the `ref_lbls` element of
#'   `control_annot_coxph` instead.
#' @param position_coxph `r lifecycle::badge("deprecated")`  Please use the `x` and `y` elements of
#'   `control_annot_coxph` instead.
#' @param position_surv_med `r lifecycle::badge("deprecated")` Please use the `x` and `y` elements of
#'   `control_annot_surv_med` instead.
#' @param width_annots `r lifecycle::badge("deprecated")` Please use the `w` element of `control_annot_surv_med`
#'   (for `surv_med`) and `control_annot_coxph` (for `coxph`)."
#'
#' @return A `ggplot` Kaplan-Meier plot and (optionally) summary table.
#'
#' @examples
#' library(dplyr)
#'
#' df <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' variables <- list(tte = "AVAL", is_event = "is_event", arm = "ARMCD")
#'
#' # Basic examples
#' g_km(df = df, variables = variables)
#' g_km(df = df, variables = variables, yval = "Failure")
#'
#' # Examples with customization parameters applied
#' g_km(
#'   df = df,
#'   variables = variables,
#'   control_surv = control_surv_timepoint(conf_level = 0.9),
#'   col = c("grey25", "grey50", "grey75"),
#'   annot_at_risk_title = FALSE,
#'   lty = 1:3,
#'   font_size = 8
#' )
#' g_km(
#'   df = df,
#'   variables = variables,
#'   annot_stats = c("min", "median"),
#'   annot_stats_vlines = TRUE,
#'   max_time = 3000,
#'   ggtheme = ggplot2::theme_minimal()
#' )
#'
#' # Example with pairwise Cox-PH analysis annotation table, adjusted annotation tables
#' g_km(
#'   df = df, variables = variables,
#'   annot_coxph = TRUE,
#'   control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99),
#'   control_annot_coxph = control_coxph_annot(x = 0.26, w = 0.35),
#'   control_annot_surv_med = control_surv_med_annot(x = 0.8, y = 0.9, w = 0.35)
#' )
#'
#' @aliases kaplan_meier
#' @export
g_km <- function(df,
                 variables,
                 control_surv = control_surv_timepoint(),
                 col = NULL,
                 lty = NULL,
                 lwd = 0.5,
                 censor_show = TRUE,
                 pch = 3,
                 size = 2,
                 max_time = NULL,
                 xticks = NULL,
                 xlab = "Days",
                 yval = c("Survival", "Failure"),
                 ylab = paste(yval, "Probability"),
                 ylim = NULL,
                 title = NULL,
                 footnotes = NULL,
                 font_size = 10,
                 ci_ribbon = FALSE,
                 annot_at_risk = TRUE,
                 annot_at_risk_title = TRUE,
                 annot_surv_med = TRUE,
                 annot_coxph = FALSE,
                 annot_stats = NULL,
                 annot_stats_vlines = FALSE,
                 control_coxph_pw = control_coxph(),
                 ref_group_coxph = NULL,
                 control_annot_surv_med = control_surv_med_annot(),
                 control_annot_coxph = control_coxph_annot(),
                 legend_pos = NULL,
                 rel_height_plot = 0.75,
                 ggtheme = NULL,
                 as_list = FALSE,
                 draw = lifecycle::deprecated(),
                 newpage = lifecycle::deprecated(),
                 gp = lifecycle::deprecated(),
                 vp = lifecycle::deprecated(),
                 name = lifecycle::deprecated(),
                 annot_coxph_ref_lbls = lifecycle::deprecated(),
                 position_coxph = lifecycle::deprecated(),
                 position_surv_med = lifecycle::deprecated(),
                 width_annots = lifecycle::deprecated()) {
  # Deprecated argument warnings
  if (lifecycle::is_present(draw)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_km(draw)",
      details = "This argument is no longer used since the plot is now generated as a `ggplot2` object."
    )
  }
  if (lifecycle::is_present(newpage)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_km(newpage)",
      details = "This argument is no longer used since the plot is now generated as a `ggplot2` object."
    )
  }
  if (lifecycle::is_present(gp)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_km(gp)",
      details = "This argument is no longer used since the plot is now generated as a `ggplot2` object."
    )
  }
  if (lifecycle::is_present(vp)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_km(vp)",
      details = "This argument is no longer used since the plot is now generated as a `ggplot2` object."
    )
  }
  if (lifecycle::is_present(name)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_km(name)",
      details = "This argument is no longer used since the plot is now generated as a `ggplot2` object."
    )
  }
  if (lifecycle::is_present(annot_coxph_ref_lbls)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_km(annot_coxph_ref_lbls)",
      details = "Please specify this setting using the 'ref_lbls' element of control_annot_coxph."
    )
    control_annot_coxph[["ref_lbls"]] <- annot_coxph_ref_lbls
  }
  if (lifecycle::is_present(position_coxph)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_km(position_coxph)",
      details = "Please specify this setting using the 'x' and 'y' elements of control_annot_coxph."
    )
    control_annot_coxph[["x"]] <- position_coxph[1]
    control_annot_coxph[["y"]] <- position_coxph[2]
  }
  if (lifecycle::is_present(position_surv_med)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_km(position_surv_med)",
      details = "Please specify this setting using the 'x' and 'y' elements of control_annot_surv_med."
    )
    control_annot_surv_med[["x"]] <- position_surv_med[1]
    control_annot_surv_med[["y"]] <- position_surv_med[2]
  }
  if (lifecycle::is_present(width_annots)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_km(width_annots)",
      details = paste(
        "Please specify widths of annotation tables relative to the plot area using the 'w' element of",
        "control_annot_surv_med (for surv_med) and control_annot_coxph (for coxph)."
      )
    )
    control_annot_surv_med[["w"]] <- as.numeric(width_annots[["surv_med"]])
    control_annot_coxph[["w"]] <- as.numeric(width_annots[["coxph"]])
  }

  checkmate::assert_list(variables)
  checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables))
  checkmate::assert_logical(censor_show, len = 1)
  checkmate::assert_numeric(size, len = 1)
  checkmate::assert_numeric(max_time, len = 1, null.ok = TRUE)
  checkmate::assert_numeric(xticks, null.ok = TRUE)
  checkmate::assert_character(xlab, len = 1, null.ok = TRUE)
  checkmate::assert_character(yval)
  checkmate::assert_character(ylab, null.ok = TRUE)
  checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE)
  checkmate::assert_character(title, len = 1, null.ok = TRUE)
  checkmate::assert_character(footnotes, len = 1, null.ok = TRUE)
  checkmate::assert_numeric(font_size, len = 1)
  checkmate::assert_logical(ci_ribbon, len = 1)
  checkmate::assert_logical(annot_at_risk, len = 1)
  checkmate::assert_logical(annot_at_risk_title, len = 1)
  checkmate::assert_logical(annot_surv_med, len = 1)
  checkmate::assert_logical(annot_coxph, len = 1)
  checkmate::assert_subset(annot_stats, c("median", "min"))
  checkmate::assert_logical(annot_stats_vlines)
  checkmate::assert_list(control_coxph_pw)
  checkmate::assert_character(ref_group_coxph, len = 1, null.ok = TRUE)
  checkmate::assert_list(control_annot_surv_med)
  checkmate::assert_list(control_annot_coxph)
  checkmate::assert_numeric(legend_pos, finite = TRUE, any.missing = FALSE, len = 2, null.ok = TRUE)
  assert_proportion_value(rel_height_plot)
  checkmate::assert_logical(as_list)

  tte <- variables$tte
  is_event <- variables$is_event
  arm <- variables$arm
  assert_valid_factor(df[[arm]])
  armval <- as.character(unique(df[[arm]]))
  assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm))
  checkmate::assert_logical(df[[is_event]], min.len = 1)
  checkmate::assert_numeric(df[[tte]], min.len = 1)
  checkmate::assert_vector(col, len = length(armval), null.ok = TRUE)
  checkmate::assert_vector(lty, null.ok = TRUE)
  checkmate::assert_numeric(lwd, len = 1, null.ok = TRUE)

  if (annot_coxph && length(armval) < 2) {
    stop(paste(
      "When `annot_coxph` = TRUE, `df` must contain at least 2 levels of `variables$arm`",
      "in order to calculate the hazard ratio."
    ))
  }

  # process model
  yval <- match.arg(yval)
  formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm))
  fit_km <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = control_surv$conf_level,
    conf.type = control_surv$conf_type
  )
  data <- h_data_plot(fit_km, armval = armval, max_time = max_time)

  # calculate x-ticks
  xticks <- h_xticks(data = data, xticks = xticks, max_time = max_time)

  # change estimates of survival to estimates of failure (1 - survival)
  if (yval == "Failure") {
    data[c("estimate", "conf.low", "conf.high", "censor")] <- list(
      1 - data$estimate, 1 - data$conf.low, 1 - data$conf.high, 1 - data$censor
    )
  }

  # derive y-axis limits
  if (is.null(ylim)) {
    if (!is.null(max_time)) {
      y_lwr <- min(data[data$time < max_time, ][["estimate"]])
      y_upr <- max(data[data$time < max_time, ][["estimate"]])
    } else {
      y_lwr <- min(data[["estimate"]])
      y_upr <- max(data[["estimate"]])
    }
    ylim <- c(y_lwr, y_upr)
  }

  # initialize ggplot
  gg_plt <- ggplot(
    data = data,
    mapping = aes(
      x = .data[["time"]],
      y = .data[["estimate"]],
      ymin = .data[["conf.low"]],
      ymax = .data[["conf.high"]],
      color = .data[["strata"]],
      fill = .data[["strata"]]
    )
  ) +
    theme_bw(base_size = font_size) +
    scale_y_continuous(limits = ylim, expand = c(0.025, 0)) +
    labs(title = title, x = xlab, y = ylab, caption = footnotes) +
    theme(
      axis.text = element_text(size = font_size),
      axis.title = element_text(size = font_size),
      legend.title = element_blank(),
      legend.text = element_text(size = font_size),
      legend.box.background = element_rect(fill = "white", linewidth = 0.5),
      legend.background = element_blank(),
      legend.position = "inside",
      legend.spacing.y = unit(-0.02, "npc"),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank()
    )

  # derive x-axis limits
  if (!is.null(max_time) && !is.null(xticks)) {
    gg_plt <- gg_plt + scale_x_continuous(
      breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))), expand = c(0.025, 0)
    )
  } else if (!is.null(xticks)) {
    if (max(data$time) <= max(xticks)) {
      gg_plt <- gg_plt + scale_x_continuous(
        breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)), expand = c(0.025, 0)
      )
    } else {
      gg_plt <- gg_plt + scale_x_continuous(breaks = xticks, expand = c(0.025, 0))
    }
  } else if (!is.null(max_time)) {
    gg_plt <- gg_plt + scale_x_continuous(limits = c(0, max_time), expand = c(0.025, 0))
  }

  # set legend position
  if (!is.null(legend_pos)) {
    gg_plt <- gg_plt + theme(legend.position.inside = legend_pos)
  } else {
    max_time2 <- sort(
      data$time,
      partial = nrow(data) - length(armval) - 1
    )[nrow(data) - length(armval) - 1]

    y_rng <- ylim[2] - ylim[1]

    if (yval == "Survival" && all(data$estimate[data$time == max_time2] > ylim[1] + 0.09 * y_rng) &&
      all(data$estimate[data$time == max_time2] < ylim[1] + 0.5 * y_rng)) { # nolint
      gg_plt <- gg_plt +
        theme(
          legend.position.inside = c(1, 0.5),
          legend.justification = c(1.1, 0.6)
        )
    } else {
      gg_plt <- gg_plt +
        theme(
          legend.position.inside = c(1, 0),
          legend.justification = c(1.1, -0.4)
        )
    }
  }

  # add lines
  gg_plt <- if (is.null(lty)) {
    gg_plt + geom_step(linewidth = lwd, na.rm = TRUE)
  } else if (length(lty) == 1) {
    gg_plt + geom_step(linewidth = lwd, lty = lty, na.rm = TRUE)
  } else {
    gg_plt +
      geom_step(aes(lty = .data[["strata"]]), linewidth = lwd, na.rm = TRUE) +
      scale_linetype_manual(values = lty)
  }

  # add censor marks
  if (censor_show) {
    gg_plt <- gg_plt + geom_point(
      data = data[data$n.censor != 0, ],
      aes(x = .data[["time"]], y = .data[["censor"]], shape = "Censored"),
      size = size,
      na.rm = TRUE
    ) +
      scale_shape_manual(name = NULL, values = pch) +
      guides(fill = guide_legend(override.aes = list(shape = NA)))
  }

  # add ci ribbon
  if (ci_ribbon) gg_plt <- gg_plt + geom_ribbon(alpha = 0.3, lty = 0, na.rm = TRUE)

  # control aesthetics
  if (!is.null(col)) {
    gg_plt <- gg_plt +
      scale_color_manual(values = col) +
      scale_fill_manual(values = col)
  }
  if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme

  # annotate with stats (text/vlines)
  if (!is.null(annot_stats)) {
    if ("median" %in% annot_stats) {
      fit_km_all <- survival::survfit(
        formula = stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", 1)),
        data = df,
        conf.int = control_surv$conf_level,
        conf.type = control_surv$conf_type
      )
      gg_plt <- gg_plt +
        annotate(
          "text",
          size = font_size / .pt, col = 1, lineheight = 0.95,
          x = stats::median(fit_km_all) + 0.07 * max(data$time),
          y = ifelse(yval == "Survival", 0.65, 0.35),
          label = paste("Median F/U:\n", round(stats::median(fit_km_all), 1), tolower(df$AVALU[1]))
        )
      if (annot_stats_vlines) {
        gg_plt <- gg_plt +
          annotate(
            "segment",
            x = stats::median(fit_km_all), xend = stats::median(fit_km_all), y = -Inf, yend = Inf,
            linetype = 2, col = "darkgray"
          )
      }
    }
    if ("min" %in% annot_stats) {
      min_fu <- min(df[[tte]])
      gg_plt <- gg_plt +
        annotate(
          "text",
          size = font_size / .pt, col = 1, lineheight = 0.95,
          x = min_fu + max(data$time) * 0.07,
          y = ifelse(yval == "Survival", 0.96, 0.05),
          label = paste("Min. F/U:\n", round(min_fu, 1), tolower(df$AVALU[1]))
        )
      if (annot_stats_vlines) {
        gg_plt <- gg_plt +
          annotate(
            "segment",
            linetype = 2, col = "darkgray",
            x = min_fu, xend = min_fu, y = Inf, yend = -Inf
          )
      }
    }
    gg_plt <- gg_plt + guides(fill = guide_legend(override.aes = list(shape = NA, label = "")))
  }

  # add at risk annotation table
  if (annot_at_risk) {
    annot_tbl <- summary(fit_km, times = xticks, extend = TRUE)
    annot_tbl <- if (is.null(fit_km$strata)) {
      data.frame(
        n.risk = annot_tbl$n.risk,
        time = annot_tbl$time,
        strata = armval
      )
    } else {
      strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
      levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
      data.frame(
        n.risk = annot_tbl$n.risk,
        time = annot_tbl$time,
        strata = annot_tbl$strata
      )
    }

    at_risk_tbl <- as.data.frame(tidyr::pivot_wider(annot_tbl, names_from = "time", values_from = "n.risk")[, -1])
    at_risk_tbl[is.na(at_risk_tbl)] <- 0
    rownames(at_risk_tbl) <- levels(annot_tbl$strata)

    gg_at_risk <- df2gg(
      at_risk_tbl,
      font_size = font_size, col_labels = FALSE, hline = FALSE,
      colwidths = rep(1, ncol(at_risk_tbl))
    ) +
      labs(title = if (annot_at_risk_title) "Patients at Risk:" else NULL, x = xlab) +
      theme_bw(base_size = font_size) +
      theme(
        plot.title = element_text(size = font_size, vjust = 3, face = "bold"),
        panel.border = element_blank(),
        panel.grid = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_text(size = font_size, face = "italic", hjust = 1),
        axis.text.x = element_text(size = font_size),
        axis.line.x = element_line()
      ) +
      coord_cartesian(clip = "off", ylim = c(0.5, nrow(at_risk_tbl)))
    gg_at_risk <- suppressMessages(
      gg_at_risk +
        scale_x_continuous(expand = c(0.025, 0), breaks = seq_along(at_risk_tbl) - 0.5, labels = xticks) +
        scale_y_continuous(labels = rev(levels(annot_tbl$strata)), breaks = seq_len(nrow(at_risk_tbl)))
    )

    if (!as_list) {
      gg_plt <- cowplot::plot_grid(
        gg_plt,
        gg_at_risk,
        align = "v",
        axis = "tblr",
        ncol = 1,
        rel_heights = c(rel_height_plot, 1 - rel_height_plot)
      )
    }
  }

  # add median survival time annotation table
  if (annot_surv_med) {
    surv_med_tbl <- h_tbl_median_surv(fit_km = fit_km, armval = armval)
    bg_fill <- if (isTRUE(control_annot_surv_med[["fill"]])) "#00000020" else control_annot_surv_med[["fill"]]

    gg_surv_med <- df2gg(surv_med_tbl, font_size = font_size, colwidths = c(1, 1, 2), bg_fill = bg_fill) +
      theme(
        axis.text.y = element_text(size = font_size, face = "italic", hjust = 1),
        plot.margin = margin(0, 2, 0, 5)
      ) +
      coord_cartesian(clip = "off", ylim = c(0.5, nrow(surv_med_tbl) + 1.5))
    gg_surv_med <- suppressMessages(
      gg_surv_med +
        scale_x_continuous(expand = c(0.025, 0)) +
        scale_y_continuous(labels = rev(rownames(surv_med_tbl)), breaks = seq_len(nrow(surv_med_tbl)))
    )

    gg_plt <- cowplot::ggdraw(gg_plt) +
      cowplot::draw_plot(
        gg_surv_med,
        control_annot_surv_med[["x"]],
        control_annot_surv_med[["y"]],
        width = control_annot_surv_med[["w"]],
        height = control_annot_surv_med[["h"]],
        vjust = 0.5,
        hjust = 0.5
      )
  }

  # add coxph annotation table
  if (annot_coxph) {
    coxph_tbl <- h_tbl_coxph_pairwise(
      df = df,
      variables = variables,
      ref_group_coxph = ref_group_coxph,
      control_coxph_pw = control_coxph_pw,
      annot_coxph_ref_lbls = control_annot_coxph[["ref_lbls"]]
    )
    bg_fill <- if (isTRUE(control_annot_coxph[["fill"]])) "#00000020" else control_annot_coxph[["fill"]]

    gg_coxph <- df2gg(coxph_tbl, font_size = font_size, colwidths = c(1.1, 1, 3), bg_fill = bg_fill) +
      theme(
        axis.text.y = element_text(size = font_size, face = "italic", hjust = 1),
        plot.margin = margin(0, 2, 0, 5)
      ) +
      coord_cartesian(clip = "off", ylim = c(0.5, nrow(coxph_tbl) + 1.5))
    gg_coxph <- suppressMessages(
      gg_coxph +
        scale_x_continuous(expand = c(0.025, 0)) +
        scale_y_continuous(labels = rev(rownames(coxph_tbl)), breaks = seq_len(nrow(coxph_tbl)))
    )

    gg_plt <- cowplot::ggdraw(gg_plt) +
      cowplot::draw_plot(
        gg_coxph,
        control_annot_coxph[["x"]],
        control_annot_coxph[["y"]],
        width = control_annot_coxph[["w"]],
        height = control_annot_coxph[["h"]],
        vjust = 0.5,
        hjust = 0.5
      )
  }

  if (as_list) {
    list(plot = gg_plt, table = gg_at_risk)
  } else {
    gg_plt
  }
}

#' Odds ratio estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [estimate_odds_ratio()] creates a layout element to compare bivariate responses between
#' two groups by estimating an odds ratio and its confidence interval.
#'
#' The primary analysis variable specified by `vars` is the group variable. Additional variables can be included in the
#' analysis via the `variables` argument, which accepts `arm`, an arm variable, and `strata`, a stratification variable.
#' If more than two arm levels are present, they can be combined into two groups using the `groups_list` argument.
#'
#' @inheritParams split_cols_by_groups
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("estimate_odds_ratio"), type = "sh")``
#' @param method (`string`)\cr whether to use the correct (`"exact"`) calculation in the conditional likelihood or one
#'   of the approximations. See [survival::clogit()] for details.
#'
#' @note
#' * This function uses logistic regression for unstratified analyses, and conditional logistic regression for
#'   stratified analyses. The Wald confidence interval is calculated with the specified confidence level.
#' * For stratified analyses, there is currently no implementation for conditional likelihood confidence intervals,
#'   therefore the likelihood confidence interval is not available as an option.
#' * When `vars` contains only responders or non-responders no odds ratio estimation is possible so the returned
#'   values will be `NA`.
#'
#' @seealso Relevant helper function [h_odds_ratio()].
#'
#' @name odds_ratio
#' @order 1
NULL

#' @describeIn odds_ratio Statistics function which estimates the odds ratio
#'   between a treatment and a control. A `variables` list with `arm` and `strata`
#'   variable names must be passed if a stratified analysis is required.
#'
#' @return
#' * `s_odds_ratio()` returns a named list with the statistics `or_ci`
#'   (containing `est`, `lcl`, and `ucl`) and `n_tot`.
#'
#' @examples
#' # Unstratified analysis.
#' s_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' # Stratified analysis.
#' s_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta,
#'   variables = list(arm = "grp", strata = "strata")
#' )
#'
#' @export
s_odds_ratio <- function(df,
                         .var,
                         .ref_group,
                         .in_ref_col,
                         .df_row,
                         variables = list(arm = NULL, strata = NULL),
                         conf_level = 0.95,
                         groups_list = NULL,
                         method = "exact",
                         ...) {
  y <- list(or_ci = numeric(), n_tot = numeric())

  if (!.in_ref_col) {
    assert_proportion_value(conf_level)
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))

    if (is.null(variables$strata)) {
      data <- data.frame(
        rsp = c(.ref_group[[.var]], df[[.var]]),
        grp = factor(
          rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
          levels = c("ref", "Not-ref")
        )
      )
      y <- or_glm(data, conf_level = conf_level)
    } else {
      assert_df_with_variables(.df_row, c(list(rsp = .var), variables))
      checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE)

      # The group variable prepared for clogit must be synchronised with combination groups definition.
      if (is.null(groups_list)) {
        ref_grp <- as.character(unique(.ref_group[[variables$arm]]))
        trt_grp <- as.character(unique(df[[variables$arm]]))
        grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp)
      } else {
        # If more than one level in reference col.
        reference <- as.character(unique(.ref_group[[variables$arm]]))
        grp_ref_flag <- vapply(
          X = groups_list,
          FUN.VALUE = TRUE,
          FUN = function(x) all(reference %in% x)
        )
        ref_grp <- names(groups_list)[grp_ref_flag]

        # If more than one level in treatment col.
        treatment <- as.character(unique(df[[variables$arm]]))
        grp_trt_flag <- vapply(
          X = groups_list,
          FUN.VALUE = TRUE,
          FUN = function(x) all(treatment %in% x)
        )
        trt_grp <- names(groups_list)[grp_trt_flag]

        grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp)
        grp <- combine_levels(grp, levels = treatment, new_level = trt_grp)
      }

      # The reference level in `grp` must be the same as in the `rtables` column split.
      data <- data.frame(
        rsp = .df_row[[.var]],
        grp = grp,
        strata = interaction(.df_row[variables$strata])
      )
      y_all <- or_clogit(data, conf_level = conf_level, method = method)
      checkmate::assert_string(trt_grp)
      checkmate::assert_subset(trt_grp, names(y_all$or_ci))
      y$or_ci <- y_all$or_ci[[trt_grp]]
      y$n_tot <- y_all$n_tot
    }
  }

  if ("est" %in% names(y$or_ci) && is.na(y$or_ci[["est"]]) && method != "approximate") {
    warning(
      "Unable to compute the odds ratio estimate. Please try re-running the function with ",
      'parameter `method` set to "approximate".'
    )
  }

  y$or_ci <- formatters::with_label(
    x = y$or_ci,
    label = paste0("Odds Ratio (", 100 * conf_level, "% CI)")
  )

  y$n_tot <- formatters::with_label(
    x = y$n_tot,
    label = "Total n"
  )

  y
}

#' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`.
#'
#' @return
#' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' @export
a_odds_ratio <- function(df,
                         ...,
                         .stats = NULL,
                         .stat_names = NULL,
                         .formats = NULL,
                         .labels = NULL,
                         .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_odds_ratio,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("estimate_odds_ratio",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )
  x_stats <- x_stats[.stats]
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(
    .stats, .labels,
    tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn odds_ratio Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_odds_ratio()` to the table layout.
#'
#' @examples
#' set.seed(12)
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50), levels = c("A", "B")),
#'   strata = factor(sample(c("C", "D"), 100, TRUE))
#' )
#'
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   estimate_odds_ratio(vars = "rsp")
#'
#' build_table(l, df = dta)
#'
#' @export
#' @order 2
estimate_odds_ratio <- function(lyt,
                                vars,
                                variables = list(arm = NULL, strata = NULL),
                                conf_level = 0.95,
                                groups_list = NULL,
                                method = "exact",
                                na_str = default_na_str(),
                                nested = TRUE,
                                ...,
                                table_names = vars,
                                show_labels = "hidden",
                                var_labels = vars,
                                .stats = "or_ci",
                                .stat_names = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    variables = list(variables), conf_level = list(conf_level), groups_list = list(groups_list), method = list(method),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_odds_ratio) <- c(formals(a_odds_ratio), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = vars,
    afun = a_odds_ratio,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper functions for odds ratio estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to calculate odds ratios in [estimate_odds_ratio()].
#'
#' @inheritParams odds_ratio
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally
#'   `strata` for [or_clogit()].
#'
#' @return A named `list` of elements `or_ci` and `n_tot`.
#'
#' @seealso [odds_ratio]
#'
#' @name h_odds_ratio
NULL

#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be
#'   exactly 2 groups in `data` as specified by the `grp` variable.
#'
#' @examples
#' # Data with 2 groups.
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)],
#'   strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' # Odds ratio based on glm.
#' or_glm(data, conf_level = 0.95)
#'
#' @export
or_glm <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)
  formula <- stats::as.formula("rsp ~ grp")
  model_fit <- stats::glm(
    formula = formula, data = data,
    family = stats::binomial(link = "logit")
  )

  # Note that here we need to discard the intercept.
  or <- exp(stats::coef(model_fit)[-1])
  or_ci <- exp(
    stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE]
  )

  values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl"))
  n_tot <- stats::setNames(nrow(model_fit$model), "n_tot")

  list(or_ci = values, n_tot = n_tot)
}

#' @describeIn h_odds_ratio Estimates the odds ratio based on [survival::clogit()]. This is done for
#'   the whole data set including all groups, since the results are not the same as when doing
#'   pairwise comparisons between the groups.
#'
#' @examples
#' # Data with 3 groups.
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)],
#'   strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' # Odds ratio based on stratified estimation by conditional logistic regression.
#' or_clogit(data, conf_level = 0.95)
#'
#' @export
or_clogit <- function(data, conf_level, method = "exact") {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
  checkmate::assert_multi_class(data$strata, classes = c("factor", "character"))
  checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE)

  data$grp <- as_factor_keep_attributes(data$grp)
  data$strata <- as_factor_keep_attributes(data$strata)

  # Deviation from convention: `survival::strata` must be simply `strata`.
  formula <- stats::as.formula("rsp ~ grp + strata(strata)")
  model_fit <- clogit_with_tryCatch(formula = formula, data = data, method = method)

  # Create a list with one set of OR estimates and CI per coefficient, i.e.
  # comparison of one group vs. the reference group.
  coef_est <- stats::coef(model_fit)
  ci_est <- stats::confint(model_fit, level = conf_level)
  or_ci <- list()
  for (coef_name in names(coef_est)) {
    grp_name <- gsub("^grp", "", x = coef_name)
    or_ci[[grp_name]] <- stats::setNames(
      object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])),
      nm = c("est", "lcl", "ucl")
    )
  }
  list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n))
}

#' Summarize change from baseline values or absolute baseline values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [summarize_change()] creates a layout element to summarize the change from baseline or absolute
#' baseline values. The primary analysis variable `vars` indicates the numerical change from baseline results.
#'
#' Required secondary analysis variables `value` and `baseline_flag` can be supplied to the function via
#' the `variables` argument. The `value` element should be the name of the analysis value variable, and the
#' `baseline_flag` element should be the name of the flag variable that indicates whether or not records contain
#' baseline values. Depending on the baseline flag given, either the absolute baseline values (at baseline)
#' or the change from baseline values (post-baseline) are then summarized.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("analyze_vars_numeric"), type = "sh")``
#'
#' @name summarize_change
#' @order 1
NULL

#' @describeIn summarize_change Statistics function that summarizes baseline or post-baseline visits.
#'
#' @return
#' * `s_change_from_baseline()` returns the same values returned by [s_summary.numeric()].
#'
#' @note The data in `df` must be either all be from baseline or post-baseline visits. Otherwise
#'   an error will be thrown.
#'
#' @keywords internal
s_change_from_baseline <- function(df, ...) {
  args_list <- list(...)
  .var <- args_list[[".var"]]
  variables <- args_list[["variables"]]

  checkmate::assert_numeric(df[[variables$value]])
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[variables$baseline_flag]])
  checkmate::assert_vector(unique(df[[variables$baseline_flag]]), max.len = 1)
  assert_df_with_variables(df, c(variables, list(chg = .var)))

  combined <- ifelse(
    df[[variables$baseline_flag]],
    df[[variables$value]],
    df[[.var]]
  )
  if (is.logical(combined) && identical(length(combined), 0L)) {
    combined <- numeric(0)
  }
  s_summary(combined, ...)
}

#' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`.
#'
#' @return
#' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_change_from_baseline <- function(df,
                                   ...,
                                   .stats = NULL,
                                   .stat_names = NULL,
                                   .formats = NULL,
                                   .labels = NULL,
                                   .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_change_from_baseline,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in with formatting defaults
  .stats <- get_stats("analyze_vars_numeric", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(.stats, .labels)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  x_stats <- x_stats[.stats]

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn summarize_change Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_change()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_change_from_baseline()` to the table layout.
#'
#' @note To be used after a split on visits in the layout, such that each data subset only contains
#'   either baseline or post-baseline data.
#'
#' @examples
#' library(dplyr)
#'
#' # Fabricate dataset
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   AVISIT = rep(paste0("V", 1:3), 6),
#'   ARM = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL = c(9:1, rep(NA, 9))
#' ) %>%
#'   mutate(ABLFLL = AVISIT == "V1") %>%
#'   group_by(USUBJID) %>%
#'   mutate(
#'     BLVAL = AVAL[ABLFLL],
#'     CHG = AVAL - BLVAL
#'   ) %>%
#'   ungroup()
#'
#' results <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>%
#'   build_table(dta_test)
#'
#' results
#'
#' @export
#' @order 2
summarize_change <- function(lyt,
                             vars,
                             variables,
                             var_labels = vars,
                             na_str = default_na_str(),
                             na_rm = TRUE,
                             nested = TRUE,
                             show_labels = "default",
                             table_names = vars,
                             section_div = NA_character_,
                             ...,
                             .stats = c("n", "mean_sd", "median", "range"),
                             .stat_names = NULL,
                             .formats = c(
                               mean_sd = "xx.xx (xx.xx)",
                               mean_se = "xx.xx (xx.xx)",
                               median = "xx.xx",
                               range = "xx.xx - xx.xx",
                               mean_pval = "xx.xx"
                             ),
                             .labels = NULL,
                             .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    variables = list(variables),
    na_rm = na_rm,
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_change_from_baseline) <- c(formals(a_change_from_baseline), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = vars,
    afun = a_change_from_baseline,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    inclNAs = !na_rm,
    section_div = section_div
  )
}

#' Proportion difference estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analysis function [estimate_proportion_diff()] creates a layout element to estimate the difference in proportion
#' of responders within a studied population. The primary analysis variable, `vars`, is a logical variable indicating
#' whether a response has occurred for each record. See the `method` parameter for options of methods to use when
#' constructing the confidence interval of the proportion difference. A stratification variable can be supplied via the
#' `strata` element of the `variables` argument.
#'
#' @details The possible methods are:
#'
#' - `"waldcc"`: Wald confidence interval with continuity correction \insertCite{Agresti1998}{tern}.
#' - `"wald"`: Wald confidence interval without continuity correction \insertCite{Agresti1998}{tern}.
#' - `"cmh"`: Cochran-Mantel-Haenszel (CMH) confidence interval \insertCite{MantelHaenszel1959}{tern}.
#' - `"cmh_sato"`: CMH confidence interval with Sato variance estimator \insertCite{Sato1989}{tern}.
#' - `"cmh_mn"`: CMH confidence interval with Miettinen and Nurminen confidence interval
#'      \insertCite{MiettinenNurminen1985}{tern}.
#' - `"ha"`: Anderson-Hauck confidence interval \insertCite{HauckAnderson1986}{tern}.
#' - `"newcombe"`: Newcombe confidence interval without continuity correction \insertCite{Newcombe1998}{tern}.
#' - `"newcombecc"`: Newcombe confidence interval with continuity correction \insertCite{Newcombe1998}{tern}.
#' - `"strat_newcombe"`: Stratified Newcombe confidence interval without continuity
#'     correction \insertCite{Yan2010-jt}{tern}.
#' - `"strat_newcombecc"`: Stratified Newcombe confidence interval with continuity
#'     correction \insertCite{Yan2010-jt}{tern}.
#'
#' @inheritParams prop_diff_strat_nc
#' @inheritParams argument_convention
#' @param method (`string`)\cr the method used for the confidence interval estimation.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("estimate_proportion_diff"), type = "sh")``
#'
#' @seealso [d_proportion_diff()]
#'
#' @references
#'   \insertAllCited{}
#'
#' @name prop_diff
#' @order 1
NULL

#' @describeIn prop_diff Statistics function estimating the difference
#'   in terms of responder proportion.
#'
#' @return
#' * `s_proportion_diff()` returns a named list of elements `diff` and `diff_ci`.
#'
#' @note When performing an unstratified analysis, methods `"cmh"`, `"cmh_sato"`, `"strat_newcombe"`,
#'   and `"strat_newcombecc"` are not permitted.
#'
#' @examples
#' s_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' # CMH example with strata
#' s_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "cmh"
#' )
#'
#' @export
s_proportion_diff <- function(df,
                              .var,
                              .ref_group,
                              .in_ref_col,
                              variables = list(strata = NULL),
                              conf_level = 0.95,
                              method = c(
                                "waldcc", "wald", "cmh", "cmh_sato", "cmh_mn",
                                "ha", "newcombe", "newcombecc",
                                "strat_newcombe", "strat_newcombecc"
                              ),
                              weights_method = "cmh",
                              ...) {
  method <- match.arg(method)
  if (
    is.null(variables$strata) &&
      checkmate::test_subset(method, c("cmh", "cmh_sato", "cmh_mn", "strat_newcombe", "strat_newcombecc"))
  ) {
    stop(paste(
      "When performing an unstratified analysis, methods",
      "'cmh', 'cmh_sato', 'cmh_mn', 'strat_newcombe', and 'strat_newcombecc' are not",
      "permitted. Please choose a different method."
    ))
  }
  y <- list(diff = numeric(), diff_ci = numeric())

  if (!.in_ref_col) {
    rsp <- c(.ref_group[[.var]], df[[.var]])
    grp <- factor(
      rep(
        c("ref", "Not-ref"),
        c(nrow(.ref_group), nrow(df))
      ),
      levels = c("ref", "Not-ref")
    )

    if (!is.null(variables$strata)) {
      strata_colnames <- variables$strata
      checkmate::assert_character(strata_colnames, null.ok = FALSE)
      strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)

      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)

      # Merging interaction strata for reference group rows data and remaining
      strata <- c(
        interaction(.ref_group[strata_colnames]),
        interaction(df[strata_colnames])
      )
      strata <- as.factor(strata)
    }

    # Defining the std way to calculate weights for strat_newcombe
    if (!is.null(variables$weights_method)) {
      weights_method <- variables$weights_method
    } else {
      weights_method <- "cmh"
    }

    y <- switch(method,
      "wald" = prop_diff_wald(rsp, grp, conf_level, correct = FALSE),
      "waldcc" = prop_diff_wald(rsp, grp, conf_level, correct = TRUE),
      "ha" = prop_diff_ha(rsp, grp, conf_level),
      "newcombe" = prop_diff_nc(rsp, grp, conf_level, correct = FALSE),
      "newcombecc" = prop_diff_nc(rsp, grp, conf_level, correct = TRUE),
      "strat_newcombe" = prop_diff_strat_nc(rsp,
        grp,
        strata,
        weights_method,
        conf_level,
        correct = FALSE
      ),
      "strat_newcombecc" = prop_diff_strat_nc(rsp,
        grp,
        strata,
        weights_method,
        conf_level,
        correct = TRUE
      ),
      "cmh" = prop_diff_cmh(rsp, grp, strata, conf_level, diff_se = "standard")[c("diff", "diff_ci")],
      "cmh_sato" = prop_diff_cmh(rsp, grp, strata, conf_level, diff_se = "sato")[c("diff", "diff_ci")],
      "cmh_mn" = prop_diff_cmh(rsp, grp, strata, conf_level, diff_se = "miettinen_nurminen")[c("diff", "diff_ci")]
    )

    y$diff <- setNames(y$diff * 100, paste0("diff_", method))
    y$diff_ci <- setNames(y$diff_ci * 100, paste0("diff_ci_", method, c("_l", "_u")))
  }

  attr(y$diff, "label") <- "Difference in Response rate (%)"
  attr(y$diff_ci, "label") <- d_proportion_diff(
    conf_level, method,
    long = FALSE
  )

  y
}

#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`.
#'
#' @return
#' * `a_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .stats = c("diff"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' @export
a_proportion_diff <- function(df,
                              ...,
                              .stats = NULL,
                              .stat_names = NULL,
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  dots_extra_args <- list(...)

  # Check if there are user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
  extra_afun_params <- retrieve_extra_afun_params(
    names(dots_extra_args$.additional_fun_parameters)
  )
  dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore

  # Main statistical functions application
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_proportion_diff,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in with stats defaults if needed
  .stats <- get_stats("estimate_proportion_diff",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )

  x_stats <- x_stats[.stats]

  # Fill in formats/indents/labels with custom input and defaults
  .formats <- get_formats_from_stats(.stats, .formats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)
  if (is.null(.labels)) {
    .labels <- sapply(x_stats, attr, "label")
    .labels <- .labels[nzchar(.labels) & !sapply(.labels, is.null) & !is.na(.labels)]
  }
  .labels <- get_labels_from_stats(.stats, .labels)

  # Auto format handling
  .formats <- apply_auto_formatting(
    .formats,
    x_stats,
    extra_afun_params$.df_row,
    extra_afun_params$.var
  )

  # Get and check statistical names from defaults
  .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn prop_diff Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `estimate_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_proportion_diff()` to the table layout.
#'
#' @examples
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   estimate_proportion_diff(
#'     vars = "rsp",
#'     conf_level = 0.90,
#'     method = "ha"
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
#' @order 2
estimate_proportion_diff <- function(lyt,
                                     vars,
                                     variables = list(strata = NULL),
                                     conf_level = 0.95,
                                     method = c(
                                       "waldcc", "wald", "cmh", "cmh_sato", "cmh_mn",
                                       "ha", "newcombe", "newcombecc",
                                       "strat_newcombe", "strat_newcombecc"
                                     ),
                                     weights_method = "cmh",
                                     var_labels = vars,
                                     na_str = default_na_str(),
                                     nested = TRUE,
                                     show_labels = "hidden",
                                     table_names = vars,
                                     section_div = NA_character_,
                                     ...,
                                     na_rm = TRUE,
                                     .stats = c("diff", "diff_ci"),
                                     .stat_names = NULL,
                                     .formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"),
                                     .labels = NULL,
                                     .indent_mods = c(diff = 0L, diff_ci = 1L)) {
  # Depending on main functions
  extra_args <- list(
    "na_rm" = na_rm,
    "variables" = variables,
    "conf_level" = conf_level,
    "method" = method,
    "weights_method" = weights_method,
    ...
  )

  # Needed defaults
  if (!is.null(.stats)) extra_args[[".stats"]] <- .stats
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_proportion_diff) <- c(
    formals(a_proportion_diff),
    extra_args[[".additional_fun_parameters"]]
  )

  # Main {rtables} structural call
  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = a_proportion_diff,
    na_str = na_str,
    inclNAs = !na_rm,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    table_names = table_names,
    section_div = section_div
  )
}

#' Check proportion difference arguments
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Verifies that and/or convert arguments into valid values to be used in the
#' estimation of difference in responder proportions.
#'
#' @inheritParams prop_diff
#' @inheritParams prop_diff_wald
#'
#' @examples
#' # example code
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#' check_diff_prop_ci(rsp = dta[["rsp"]], grp = dta[["grp"]], conf_level = 0.95)
#' @export
check_diff_prop_ci <- function(rsp,
                               grp,
                               strata = NULL,
                               conf_level,
                               correct = NULL) {
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)
  checkmate::assert_number(conf_level, lower = 0, upper = 1)
  checkmate::assert_flag(correct, null.ok = TRUE)

  if (!is.null(strata)) {
    checkmate::assert_factor(strata, len = length(rsp))
  }

  invisible()
}

#' Description of method used for proportion comparison
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function that describes the analysis in
#' [s_proportion_diff()].
#'
#' @inheritParams s_proportion_diff
#' @param long (`flag`)\cr whether a long (`TRUE`) or a short (`FALSE`, default) description is required.
#'
#' @return A `string` describing the analysis.
#'
#' @seealso [prop_diff]
#'
#' @export
d_proportion_diff <- function(conf_level,
                              method,
                              long = FALSE) {
  label <- paste0(conf_level * 100, "% CI")
  if (long) {
    label <- paste(
      label,
      ifelse(
        method %in% c("cmh", "cmh_sato", "cmh_mn"),
        "for adjusted difference",
        "for difference"
      )
    )
  }

  method_part <- switch(method,
    "cmh" = "CMH, without correction",
    "cmh_sato" = "CMH, Sato variance estimator",
    "cmh_mn" = "CMH, Miettinen and Nurminen",
    "waldcc" = "Wald, with correction",
    "wald" = "Wald, without correction",
    "ha" = "Anderson-Hauck",
    "newcombe" = "Newcombe, without correction",
    "newcombecc" = "Newcombe, with correction",
    "strat_newcombe" = "Stratified Newcombe, without correction",
    "strat_newcombecc" = "Stratified Newcombe, with correction",
    stop(paste(method, "does not have a description"))
  )
  paste0(label, " (", method_part, ")")
}

#' Helper functions to calculate proportion difference
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @inheritParams prop_diff
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
#'   (e.g. reference and treatment group).
#'
#' @return A named `list` of elements `diff` (proportion difference) and `diff_ci`
#'   (proportion difference confidence interval).
#'
#' @seealso [prop_diff()] for implementation of these helper functions.
#'
#' @references
#'   \insertAllCited{}
#'
#' @name h_prop_diff
NULL

#' @describeIn h_prop_diff The Wald interval follows the usual textbook
#'   definition for a single proportion confidence interval using the normal
#'   approximation. It is possible to include a continuity correction for Wald's
#'   interval.
#'
#' @param correct (`flag`)\cr whether to include the continuity correction. For further
#'   information, see [stats::prop.test()].
#'
#' @examples
#' # Wald confidence interval
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
#' grp <- factor(c(rep("A", 10), rep("B", 10)))
#'
#' prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE)
#'
#' @export
prop_diff_wald <- function(rsp,
                           grp,
                           conf_level = 0.95,
                           correct = FALSE) {
  if (isTRUE(correct)) {
    mthd <- "waldcc"
  } else {
    mthd <- "wald"
  }
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, correct = correct
  )

  # check if binary response is coded as logical
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)

  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  # x1 and n1 are non-reference groups.
  diff_ci <- desctools_binom(
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = mthd
  )

  list(
    "diff" = unname(diff_ci[, "est"]),
    "diff_ci" = unname(diff_ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Anderson-Hauck confidence interval \insertCite{HauckAnderson1986}{tern}.
#'
#' @examples
#' # Anderson-Hauck confidence interval
#' ## "Mid" case: 3/4 respond in group A, 1/2 respond in group B.
#' rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
#' grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
#'
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90)
#'
#' ## Edge case: Same proportion of response in A and B.
#' rsp <- c(TRUE, FALSE, TRUE, FALSE)
#' grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
#'
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6)
#'
#' @export
prop_diff_ha <- function(rsp,
                         grp,
                         conf_level) {
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)

  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  # x1 and n1 are non-reference groups.
  ci <- desctools_binom(
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = "ha"
  )
  list(
    "diff" = unname(ci[, "est"]),
    "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Newcombe confidence interval. It is based on
#'   the Wilson score confidence interval for a single binomial proportion \insertCite{Newcombe1998}{tern}.
#'
#' @examples
#' # Newcombe confidence interval
#'
#' set.seed(1)
#' rsp <- c(
#'   sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE),
#'   sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE)
#' )
#' grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
#' table(rsp, grp)
#'
#' prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9)
#'
#' @export
prop_diff_nc <- function(rsp,
                         grp,
                         conf_level,
                         correct = FALSE) {
  if (isTRUE(correct)) {
    mthd <- "scorecc"
  } else {
    mthd <- "score"
  }
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)

  p_grp <- tapply(rsp, grp, mean)
  diff_p <- unname(diff(p_grp))
  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  ci <- desctools_binom(
    # x1 and n1 are non-reference groups.
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = mthd
  )
  list(
    "diff" = unname(ci[, "est"]),
    "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Calculates the weighted difference. This is defined as the difference in
#'   response rates between the experimental treatment group and the control treatment group, adjusted
#'   for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. For the CMH chi-squared
#'   test, use [stats::mantelhaen.test()].
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param diff_se (`string`)\cr method to estimate the standard error for the difference, either
#'   `standard`, `sato` \insertCite{Sato1989}{tern} or
#'   `miettinen_nurminen` \insertCite{MiettinenNurminen1985}{tern}.
#'
#' @examples
#' # Cochran-Mantel-Haenszel confidence interval
#'
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_diff_cmh(
#'   rsp = rsp, grp = grp, strata = interaction(strata_data),
#'   conf_level = 0.90
#' )
#' prop_diff_cmh(
#'   rsp = rsp, grp = grp, strata = interaction(strata_data),
#'   conf_level = 0.90, diff_se = "sato"
#' )
#'
#' @export
prop_diff_cmh <- function(rsp,
                          grp,
                          strata,
                          conf_level = 0.95,
                          diff_se = c("standard", "sato", "miettinen_nurminen")) {
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  diff_se <- match.arg(diff_se)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
  )

  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  # first dimension: FALSE, TRUE
  # 2nd dimension: CONTROL, TX
  # 3rd dimension: levels of strata
  # rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
  t_tbl <- table(
    factor(rsp, levels = c("FALSE", "TRUE")),
    grp,
    strata
  )
  n1 <- colSums(t_tbl[1:2, 1, ])
  n2 <- colSums(t_tbl[1:2, 2, ])
  x1 <- t_tbl[2, 1, ]
  p1 <- x1 / n1
  x2 <- t_tbl[2, 2, ]
  p2 <- x2 / n2
  # CMH weights
  use_stratum <- (n1 > 0) & (n2 > 0)
  n1 <- n1[use_stratum]
  n2 <- n2[use_stratum]
  p1 <- p1[use_stratum]
  p2 <- p2[use_stratum]
  wt <- (n1 * n2 / (n1 + n2))
  wt_normalized <- wt / sum(wt)
  est1 <- sum(wt_normalized * p1)
  est2 <- sum(wt_normalized * p2)
  estimate <- c(est1, est2)
  names(estimate) <- levels(grp)
  se1 <- sqrt(sum(wt_normalized^2 * p1 * (1 - p1) / n1))
  se2 <- sqrt(sum(wt_normalized^2 * p2 * (1 - p2) / n2))
  z <- stats::qnorm((1 + conf_level) / 2)
  err1 <- z * se1
  err2 <- z * se2
  ci1 <- c((est1 - err1), (est1 + err1))
  ci2 <- c((est2 - err2), (est2 + err2))
  estimate_ci <- list(ci1, ci2)
  names(estimate_ci) <- levels(grp)
  diff_est <- est2 - est1

  if (diff_se %in% c("standard", "sato")) {
    se_diff <- if (diff_se == "standard") {
      sqrt(sum(((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2)) * wt_normalized^2))
    } else {
      # Sato variance estimator.
      p_terms <- (n2^2 * x1 - n1^2 * x2 + n1 * n2 * (n1 - n2) / 2) / (n1 + n2)^2
      q_terms <- (x1 * (n2 - x2) + x2 * (n1 - x1)) / (2 * (n1 + n2))
      num <- diff_est * sum(p_terms) + sum(q_terms)
      denom <- sum(wt)^2
      sqrt(num / denom)
    }
    diff_ci <- c(diff_est - z * se_diff, diff_est + z * se_diff)
  } else {
    # Miettinen and Nurminen method is used.
    z_stat_fun <- function(delta) {
      var_est <- h_miettinen_nurminen_var_est(
        n1 = n1, n2 = n2,
        x1 = x1, x2 = x2,
        diff_par = delta
      )$var_est
      num <- sum(wt * (p2 - p1 - delta))
      denom <- sqrt(sum(wt^2 * var_est))
      num / denom
    }
    # Find upper and lower confidence limits by root finding such that
    # z_stat_fun(limit) = +/- z quantile:
    root_lower <- function(delta) z_stat_fun(delta) - z
    root_upper <- function(delta) z_stat_fun(delta) + z
    diff_ci <- c(
      stats::uniroot(root_lower, interval = c(-0.99, diff_est))$root,
      stats::uniroot(root_upper, interval = c(diff_est, 0.99))$root
    )
    # Calculate the standard error separately.
    var_est <- h_miettinen_nurminen_var_est(
      n1 = n1, n2 = n2,
      x1 = x1, x2 = x2,
      diff_par = diff_est
    )$var_est
    se_diff <- sqrt(sum(wt_normalized^2 * var_est))
  }

  list(
    prop = estimate,
    prop_ci = estimate_ci,
    diff = diff_est,
    diff_ci = diff_ci,
    se_diff = se_diff,
    weights = wt_normalized,
    n1 = n1,
    n2 = n2
  )
}

#' Variance Estimates in Strata following Miettinen and Nurminen
#'
#' The variable names in this function follow the notation in the original
#' paper by \insertCite{MiettinenNurminen1985;textual}{tern}, cf. Appendix 1.
#'
#' @param n1 (`numeric`)\cr sample sizes in group 1.
#' @param n2 (`numeric`)\cr sample sizes in group 2.
#' @param x1 (`numeric`)\cr number of responders in group 1.
#' @param x2 (`numeric`)\cr number of responders in group 2.
#' @param diff_par (`numeric`)\cr assumed difference in true proportions
#'   (group 2 minus group 1).
#' @return A named `list` with elements:
#'
#' - `p1_hat`: estimated proportion in group 1
#' - `p2_hat`: estimated proportion in group 2
#' - `var_est`: variance estimate of the difference in proportions
#'
#' @keywords internal
#' @references
#'   \insertAllCited{}
h_miettinen_nurminen_var_est <- function(n1, n2, x1, x2, diff_par) {
  # nolint start
  # Translate to the notation in the paper.
  S0 <- n1
  S1 <- n2
  c0 <- x1
  c1 <- x2
  RD <- diff_par

  # Further definitions.
  S <- S0 + S1
  c <- c0 + c1

  # Coefficients of the third-degree polynomial.
  L3 <- S
  L2 <- (S1 + 2 * S0) * RD - S - c
  L1 <- (S0 * RD - S - 2 * c0) * RD + c
  L0 <- c0 * RD * (1 - RD)
  # nolint end

  # Solution for group 1 proportion.
  q <- L2^3 / (3 * L3)^3 - L1 * L2 / (6 * L3^2) + L0 / (2 * L3)
  p <- sign(q) * sqrt(L2^2 / (3 * L3)^2 - L1 / (3 * L3))
  a <- (1 / 3) * (base::pi + acos(q / p^3))
  p1_hat <- 2 * p * cos(a) - L2 / (3 * L3)

  # Estimated group 2 proportion.
  p2_hat <- p1_hat + RD

  # Variance estimate.
  var_est <- (p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2) *
    S / (S - 1)

  list(
    p1_hat = p1_hat,
    p2_hat = p2_hat,
    var_est = var_est
  )
}

#' @describeIn h_prop_diff Calculates the stratified Newcombe confidence interval and difference in response
#'   rates between the experimental treatment group and the control treatment group, adjusted for stratification
#'   factors. This implementation follows closely the one proposed by \insertCite{Yan2010-jt;textual}{tern}.
#'   Weights can be estimated from the heuristic proposed in [prop_strat_wilson()] or from CMH-derived weights
#'   (see [prop_diff_cmh()]).
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param weights_method (`string`)\cr weights method. Can be either `"cmh"` or `"heuristic"`
#'   and directs the way weights are estimated.
#'
#' @examples
#' # Stratified Newcombe confidence interval
#'
#' set.seed(2)
#' data_set <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   "grp" = sample(c("Placebo", "Treatment"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_diff_strat_nc(
#'   rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
#'   weights_method = "cmh",
#'   conf_level = 0.90
#' )
#'
#' prop_diff_strat_nc(
#'   rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
#'   weights_method = "wilson_h",
#'   conf_level = 0.90
#' )
#'
#' @export
prop_diff_strat_nc <- function(rsp,
                               grp,
                               strata,
                               weights_method = c("cmh", "wilson_h"),
                               conf_level = 0.95,
                               correct = FALSE) {
  weights_method <- match.arg(weights_method)
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
  )
  checkmate::assert_number(conf_level, lower = 0, upper = 1)
  checkmate::assert_flag(correct)
  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  rsp_by_grp <- split(rsp, f = grp)
  strata_by_grp <- split(strata, f = grp)

  # Finding the weights
  weights <- if (identical(weights_method, "cmh")) {
    prop_diff_cmh(rsp = rsp, grp = grp, strata = strata)$weights
  } else if (identical(weights_method, "wilson_h")) {
    prop_strat_wilson(rsp, strata, conf_level = conf_level, correct = correct)$weights
  }
  weights[levels(strata)[!levels(strata) %in% names(weights)]] <- 0

  # Calculating lower (`l`) and upper (`u`) confidence bounds per group.
  strat_wilson_by_grp <- Map(
    prop_strat_wilson,
    rsp = rsp_by_grp,
    strata = strata_by_grp,
    weights = list(weights, weights),
    conf_level = conf_level,
    correct = correct
  )

  ci_ref <- strat_wilson_by_grp[[1]]
  ci_trt <- strat_wilson_by_grp[[2]]
  l_ref <- as.numeric(ci_ref$conf_int[1])
  u_ref <- as.numeric(ci_ref$conf_int[2])
  l_trt <- as.numeric(ci_trt$conf_int[1])
  u_trt <- as.numeric(ci_trt$conf_int[2])

  # Estimating the diff and n_ref, n_trt (it allows different weights to be used)
  t_tbl <- table(
    factor(rsp, levels = c("FALSE", "TRUE")),
    grp,
    strata
  )
  n_ref <- colSums(t_tbl[1:2, 1, ])
  n_trt <- colSums(t_tbl[1:2, 2, ])
  use_stratum <- (n_ref > 0) & (n_trt > 0)
  n_ref <- n_ref[use_stratum]
  n_trt <- n_trt[use_stratum]
  p_ref <- t_tbl[2, 1, use_stratum] / n_ref
  p_trt <- t_tbl[2, 2, use_stratum] / n_trt
  est1 <- sum(weights * p_ref)
  est2 <- sum(weights * p_trt)
  diff_est <- est2 - est1

  lambda1 <- sum(weights^2 / n_ref)
  lambda2 <- sum(weights^2 / n_trt)
  z <- stats::qnorm((1 + conf_level) / 2)

  lower <- diff_est - z * sqrt(lambda2 * l_trt * (1 - l_trt) + lambda1 * u_ref * (1 - u_ref))
  upper <- diff_est + z * sqrt(lambda1 * l_ref * (1 - l_ref) + lambda2 * u_trt * (1 - u_trt))

  list(
    "diff" = diff_est,
    "diff_ci" = c("lower" = lower, "upper" = upper)
  )
}

#' Count patients by most extreme post-baseline toxicity grade per direction of abnormality
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_abnormal_by_worst_grade()] creates a layout element to count patients by highest (worst)
#' analysis toxicity grade post-baseline for each direction, categorized by parameter value.
#'
#' This function analyzes primary analysis variable `var` which indicates toxicity grades. Additional
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to
#' `USUBJID`), a variable to indicate unique subject identifiers, `param` (defaults to `PARAM`), a variable
#' to indicate parameter values, and `grade_dir` (defaults to `GRADE_DIR`), a variable to indicate directions
#' (e.g. High or Low) for each toxicity grade supplied in `var`.
#'
#' For each combination of `param` and `grade_dir` levels, patient counts by worst
#' grade are calculated as follows:
#'   * `1` to `4`: The number of patients with worst grades 1-4, respectively.
#'   * `Any`: The number of patients with at least one abnormality (i.e. grade is not 0).
#'
#' Fractions are calculated by dividing the above counts by the number of patients with at least one
#' valid measurement recorded during treatment.
#'
#' Pre-processing is crucial when using this function and can be done automatically using the
#' [h_adlb_abnormal_by_worst_grade()] helper function. See the description of this function for details on the
#' necessary pre-processing steps.
#'
#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create two row
#' splits, one on variable `param` and one on variable `grade_dir`.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("abnormal_by_worst_grade"), type = "sh")``
#'
#' @seealso [h_adlb_abnormal_by_worst_grade()] which pre-processes ADLB data frames to be used in
#'   [count_abnormal_by_worst_grade()].
#'
#' @name abnormal_by_worst_grade
#' @order 1
NULL

#' @describeIn abnormal_by_worst_grade Statistics function which counts patients by worst grade.
#'
#' @return
#' * `s_count_abnormal_by_worst_grade()` returns the single statistic `count_fraction` with grades 1 to 4 and
#'   "Any" results.
#'
#' @keywords internal
s_count_abnormal_by_worst_grade <- function(df,
                                            .var = "GRADE_ANL",
                                            .spl_context,
                                            variables = list(
                                              id = "USUBJID",
                                              param = "PARAM",
                                              grade_dir = "GRADE_DIR"
                                            ),
                                            ...) {
  checkmate::assert_string(.var)
  assert_valid_factor(df[[.var]])
  assert_valid_factor(df[[variables$param]])
  assert_valid_factor(df[[variables$grade_dir]])
  assert_df_with_variables(df, c(a = .var, variables))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))

  # To verify that the `split_rows_by` are performed with correct variables.
  checkmate::assert_subset(c(variables[["param"]], variables[["grade_dir"]]), .spl_context$split)
  first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
  x_lvls <- c(setdiff(levels(df[[.var]]), "0"), "Any")
  result <- split(numeric(0), factor(x_lvls))

  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
  # Some subjects may have a record for high and low directions but
  # should be counted only once.
  denom <- length(unique(subj_cur_col))

  for (lvl in x_lvls) {
    if (lvl != "Any") {
      df_lvl <- df[df[[.var]] == lvl, ]
    } else {
      df_lvl <- df[df[[.var]] != 0, ]
    }
    num <- length(unique(df_lvl[[variables[["id"]]]]))
    fraction <- ifelse(denom == 0, 0, num / denom)
    result[[lvl]] <- formatters::with_label(c(count = num, fraction = fraction), lvl)
  }

  result <- list(count_fraction = result)
  result
}

#' @describeIn abnormal_by_worst_grade Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_worst_grade()`.
#'
#' @return
#' * `a_count_abnormal_by_worst_grade()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal_by_worst_grade <- function(df,
                                            ...,
                                            .stats = NULL,
                                            .stat_names = NULL,
                                            .formats = NULL,
                                            .labels = NULL,
                                            .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_abnormal_by_worst_grade,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("abnormal_by_worst_grade", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  levels_per_stats <- lapply(x_stats, names)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- x_stats[.stats] %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn abnormal_by_worst_grade Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_worst_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_worst_grade()` to the table layout.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#' adlb <- tern_ex_adlb
#'
#' # Data is modified in order to have some parameters with grades only in one direction
#' # and simulate the real data.
#' adlb$ATOXGR[adlb$PARAMCD == "ALT" & adlb$ATOXGR %in% c("1", "2", "3", "4")] <- "-1"
#' adlb$ANRIND[adlb$PARAMCD == "ALT" & adlb$ANRIND == "HIGH"] <- "LOW"
#' adlb$WGRHIFL[adlb$PARAMCD == "ALT"] <- ""
#'
#' adlb$ATOXGR[adlb$PARAMCD == "IGA" & adlb$ATOXGR %in% c("-1", "-2", "-3", "-4")] <- "1"
#' adlb$ANRIND[adlb$PARAMCD == "IGA" & adlb$ANRIND == "LOW"] <- "HIGH"
#' adlb$WGRLOFL[adlb$PARAMCD == "IGA"] <- ""
#'
#' # Pre-processing
#' adlb_f <- adlb %>% h_adlb_abnormal_by_worst_grade()
#'
#' # Map excludes records without abnormal grade since they should not be displayed
#' # in the table.
#' map <- unique(adlb_f[adlb_f$GRADE_DIR != "ZERO", c("PARAM", "GRADE_DIR", "GRADE_ANL")]) %>%
#'   lapply(as.character) %>%
#'   as.data.frame() %>%
#'   arrange(PARAM, desc(GRADE_DIR), GRADE_ANL)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAM") %>%
#'   split_rows_by("GRADE_DIR", split_fun = trim_levels_to_map(map)) %>%
#'   count_abnormal_by_worst_grade(
#'     var = "GRADE_ANL",
#'     variables = list(id = "USUBJID", param = "PARAM", grade_dir = "GRADE_DIR")
#'   ) %>%
#'   build_table(df = adlb_f)
#'
#' @export
#' @order 2
count_abnormal_by_worst_grade <- function(lyt,
                                          var,
                                          variables = list(
                                            id = "USUBJID",
                                            param = "PARAM",
                                            grade_dir = "GRADE_DIR"
                                          ),
                                          na_str = default_na_str(),
                                          nested = TRUE,
                                          ...,
                                          .stats = "count_fraction",
                                          .stat_names = NULL,
                                          .formats = list(count_fraction = format_count_fraction),
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(extra_args, "variables" = list(variables), ...)

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_count_abnormal_by_worst_grade) <- c(
    formals(a_count_abnormal_by_worst_grade), extra_args[[".additional_fun_parameters"]]
  )

  analyze(
    lyt = lyt,
    vars = var,
    afun = a_count_abnormal_by_worst_grade,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = "hidden"
  )
}

#' Helper function to prepare ADLB for `count_abnormal_by_worst_grade()`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to prepare an ADLB data frame to be used as input in
#' [count_abnormal_by_worst_grade()]. The following pre-processing steps are applied:
#'
#' 1. `adlb` is filtered on variable `avisit` to only include post-baseline visits.
#' 2. `adlb` is filtered on variables `worst_flag_low` and `worst_flag_high` so that only
#'    worst grades (in either direction) are included.
#' 3. From the standard lab grade variable `atoxgr`, the following two variables are derived
#'    and added to `adlb`:
#'   * A grade direction variable (e.g. `GRADE_DIR`). The variable takes value `"HIGH"` when
#'     `atoxgr > 0`, `"LOW"` when `atoxgr < 0`, and `"ZERO"` otherwise.
#'   * A toxicity grade variable (e.g. `GRADE_ANL`) where all negative values from `atoxgr` are
#'     replaced by their absolute values.
#' 4. Unused factor levels are dropped from `adlb` via [droplevels()].
#'
#' @param adlb (`data.frame`)\cr ADLB data frame.
#' @param atoxgr (`string`)\cr name of the analysis toxicity grade variable. This must be a `factor`
#'   variable.
#' @param avisit (`string`)\cr name of the analysis visit variable.
#' @param worst_flag_low (`string`)\cr name of the worst low lab grade flag variable. This variable is
#'   set to `"Y"` when indicating records of worst low lab grades.
#' @param worst_flag_high (`string`)\cr name of the worst high lab grade flag variable. This variable is
#'   set to `"Y"` when indicating records of worst high lab grades.
#'
#' @return `h_adlb_abnormal_by_worst_grade()` returns the `adlb` data frame with two new
#'   variables: `GRADE_DIR` and `GRADE_ANL`.
#'
#' @seealso [abnormal_by_worst_grade]
#'
#' @examples
#' h_adlb_abnormal_by_worst_grade(tern_ex_adlb) %>%
#'   dplyr::select(ATOXGR, GRADE_DIR, GRADE_ANL) %>%
#'   head(10)
#'
#' @export
h_adlb_abnormal_by_worst_grade <- function(adlb,
                                           atoxgr = "ATOXGR",
                                           avisit = "AVISIT",
                                           worst_flag_low = "WGRLOFL",
                                           worst_flag_high = "WGRHIFL") {
  adlb %>%
    dplyr::filter(
      !.data[[avisit]] %in% c("SCREENING", "BASELINE"),
      .data[[worst_flag_low]] == "Y" | .data[[worst_flag_high]] == "Y"
    ) %>%
    dplyr::mutate(
      GRADE_DIR = factor(
        dplyr::case_when(
          .data[[atoxgr]] %in% c("-1", "-2", "-3", "-4") ~ "LOW",
          .data[[atoxgr]] == "0" ~ "ZERO",
          .data[[atoxgr]] %in% c("1", "2", "3", "4") ~ "HIGH"
        ),
        levels = c("LOW", "ZERO", "HIGH")
      ),
      GRADE_ANL = forcats::fct_relevel(
        forcats::fct_recode(.data[[atoxgr]], `1` = "-1", `2` = "-2", `3` = "-3", `4` = "-4"),
        c("0", "1", "2", "3", "4")
      )
    ) %>%
    droplevels()
}

#' Incidence rate estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [estimate_incidence_rate()] creates a layout element to estimate an event rate adjusted for
#' person-years at risk, otherwise known as incidence rate. The primary analysis variable specified via `vars` is
#' the person-years at risk. In addition to this variable, the `n_events` variable for number of events observed (where
#' a value of 1 means an event was observed and 0 means that no event was observed) must also be specified.
#'
#' @inheritParams argument_convention
#' @param control (`list`)\cr parameters for estimation details, specified by using
#'   the helper function [control_incidence_rate()]. Possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate.
#'   * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'     for confidence interval type.
#'   * `input_time_unit` (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'     indicating time unit for data input.
#'   * `num_pt_year` (`numeric`)\cr time unit for desired output (in person-years).
#' @param n_events (`string`)\cr name of integer variable indicating whether an event has been observed (1) or not (0).
#' @param id_var (`string`)\cr name of variable used as patient identifier if `"n_unique"` is included in `.stats`.
#'   Defaults to `"USUBJID"`.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("estimate_incidence_rate"), type = "sh")``
#' @param summarize (`flag`)\cr whether the function should act as an analyze function (`summarize = FALSE`), or a
#'   summarize function (`summarize = TRUE`). Defaults to `FALSE`.
#' @param label_fmt (`string`)\cr how labels should be formatted after a row split occurs if `summarize = TRUE`. The
#'   string should use `"%s"` to represent row split levels, and `"%.labels"` to represent labels supplied to the
#'   `.labels` argument. Defaults to `"%s - %.labels"`.
#'
#' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate].
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(seq(6)),
#'   CNSR = c(0, 1, 1, 0, 0, 0),
#'   AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4),
#'   ARM = factor(c("A", "A", "A", "B", "B", "B")),
#'   STRATA1 = factor(c("X", "Y", "Y", "X", "X", "Y"))
#' )
#' df$n_events <- 1 - df$CNSR
#'
#' @name incidence_rate
#' @order 1
NULL

#' @describeIn incidence_rate Statistics function which estimates the incidence rate and the
#'   associated confidence interval.
#'
#' @return
#' * `s_incidence_rate()` returns the following statistics:
#'   - `person_years`: Total person-years at risk.
#'   - `n_events`: Total number of events observed.
#'   - `rate`: Estimated incidence rate.
#'   - `rate_ci`: Confidence interval for the incidence rate.
#'   - `n_unique`: Total number of patients with at least one event observed.
#'   - `n_rate`: Total number of events observed & estimated incidence rate.
#'
#' @keywords internal
s_incidence_rate <- function(df,
                             .var,
                             ...,
                             n_events,
                             is_event = lifecycle::deprecated(),
                             id_var = "USUBJID",
                             control = control_incidence_rate()) {
  if (lifecycle::is_present(is_event)) {
    checkmate::assert_string(is_event)
    lifecycle::deprecate_warn(
      "0.9.6", "s_incidence_rate(is_event)", "s_incidence_rate(n_events)"
    )
    n_events <- is_event
    df[[n_events]] <- as.numeric(df[[is_event]])
  }

  assert_df_with_variables(df, list(tte = .var, n_events = n_events))
  checkmate::assert_string(.var)
  checkmate::assert_string(n_events)
  checkmate::assert_string(id_var)
  checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
  checkmate::assert_integerish(df[[n_events]], any.missing = FALSE)

  n_unique <- n_available(unique(df[[id_var]][df[[n_events]] == 1]))
  input_time_unit <- control$input_time_unit
  num_pt_year <- control$num_pt_year
  conf_level <- control$conf_level
  person_years <- sum(df[[.var]], na.rm = TRUE) * (
    1 * (input_time_unit == "year") +
      1 / 12 * (input_time_unit == "month") +
      1 / 52.14 * (input_time_unit == "week") +
      1 / 365.24 * (input_time_unit == "day")
  )
  n_events <- sum(df[[n_events]], na.rm = TRUE)

  result <- h_incidence_rate(
    person_years,
    n_events,
    control
  )
  list(
    person_years = formatters::with_label(person_years, "Total patient-years at risk"),
    n_events = formatters::with_label(n_events, "Number of adverse events observed"),
    rate = formatters::with_label(result$rate, paste("AE rate per", num_pt_year, "patient-years")),
    rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level)),
    n_unique = formatters::with_label(n_unique, "Total number of patients with at least one adverse event"),
    n_rate = formatters::with_label(
      c(n_events, result$rate),
      paste("Number of adverse events observed (AE rate per", num_pt_year, "patient-years)")
    )
  )
}

#' @describeIn incidence_rate Formatted analysis function which is used as `afun` in `estimate_incidence_rate()`.
#'
#' @return
#' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_incidence_rate(
#'   df,
#'   .var = "AVAL",
#'   .df_row = df,
#'   n_events = "n_events"
#' )
#'
#' @export
a_incidence_rate <- function(df,
                             labelstr = "",
                             label_fmt = "%s - %.labels",
                             ...,
                             .stats = NULL,
                             .stat_names = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  checkmate::assert_string(label_fmt)

  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Main statistic calculations
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_incidence_rate,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("estimate_incidence_rate", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  x_stats <- x_stats[.stats]
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(.stats, .labels, tern_defaults = lapply(x_stats, attr, "label"))
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  # Apply label format
  if (nzchar(labelstr) > 0) {
    .labels <- sapply(.labels, function(x) gsub("%.labels", x, gsub("%s", labelstr, label_fmt)))
  }

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn incidence_rate Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_incidence_rate()` to the table layout.
#'
#' @examples
#' basic_table(show_colcounts = TRUE) %>%
#'   split_cols_by("ARM") %>%
#'   estimate_incidence_rate(
#'     vars = "AVAL",
#'     n_events = "n_events",
#'     control = control_incidence_rate(
#'       input_time_unit = "month",
#'       num_pt_year = 100
#'     )
#'   ) %>%
#'   build_table(df)
#'
#' # summarize = TRUE
#' basic_table(show_colcounts = TRUE) %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("STRATA1", child_labels = "visible") %>%
#'   estimate_incidence_rate(
#'     vars = "AVAL",
#'     n_events = "n_events",
#'     .stats = c("n_unique", "n_rate"),
#'     summarize = TRUE,
#'     label_fmt = "%.labels"
#'   ) %>%
#'   build_table(df)
#'
#' @export
#' @order 2
estimate_incidence_rate <- function(lyt,
                                    vars,
                                    n_events,
                                    id_var = "USUBJID",
                                    control = control_incidence_rate(),
                                    na_str = default_na_str(),
                                    nested = TRUE,
                                    summarize = FALSE,
                                    label_fmt = "%s - %.labels",
                                    ...,
                                    show_labels = "hidden",
                                    table_names = vars,
                                    .stats = c("person_years", "n_events", "rate", "rate_ci"),
                                    .stat_names = NULL,
                                    .formats = list(rate = "xx.xx", rate_ci = "(xx.xx, xx.xx)"),
                                    .labels = NULL,
                                    .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    n_events = n_events, id_var = id_var, control = list(control), label_fmt = label_fmt,
    ...
  )

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_incidence_rate) <- c(formals(a_incidence_rate), extra_args[[".additional_fun_parameters"]])

  if (!summarize) {
    analyze(
      lyt = lyt,
      vars = vars,
      afun = a_incidence_rate,
      na_str = na_str,
      nested = nested,
      extra_args = extra_args,
      show_labels = show_labels,
      table_names = table_names
    )
  } else {
    summarize_row_groups(
      lyt = lyt,
      var = vars,
      cfun = a_incidence_rate,
      na_str = na_str,
      extra_args = extra_args
    )
  }
}

#' Line plot with optional table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Line plot with optional table.
#'
#' @inheritParams argument_convention
#' @param alt_counts_df (`data.frame` or `NULL`)\cr data set that will be used (only)
#'   to counts objects in groups for stratification.
#' @param variables (named `character`) vector of variable names in `df` which should include:
#'   * `x` (`string`)\cr name of x-axis variable.
#'   * `y` (`string`)\cr name of y-axis variable.
#'   * `group_var` (`string` or `NULL`)\cr name of grouping variable (or strata), i.e. treatment arm.
#'     Can be `NA` to indicate lack of groups.
#'   * `subject_var` (`string` or `NULL`)\cr name of subject variable. Only applies if `group_var` is
#'      not NULL.
#'   * `paramcd` (`string` or `NA`)\cr name of the variable for parameter's code. Used for y-axis label and plot's
#'     subtitle. Can be `NA` if `paramcd` is not to be added to the y-axis label or subtitle.
#'   * `y_unit` (`string` or `NA`)\cr name of variable with units of `y`. Used for y-axis label and plot's subtitle.
#'     Can be `NA` if y unit is not to be added to the y-axis label or subtitle.
#'   * `facet_var` (`string` or `NA`)\cr name of the secondary grouping variable used for plot faceting, i.e. treatment
#'     arm. Can be `NA` to indicate lack of groups.
#' @param mid (`character` or `NULL`)\cr names of the statistics that will be plotted as midpoints.
#'   All the statistics indicated in `mid` variable must be present in the object returned by `sfun`,
#'   and be of a `double` or `numeric` type vector of length one.
#' @param interval (`character` or `NULL`)\cr names of the statistics that will be plotted as intervals.
#'   All the statistics indicated in `interval` variable must be present in the object returned by `sfun`,
#'   and be of a `double` or `numeric` type vector of length two. Set `interval = NULL` if intervals should not be
#'   added to the plot.
#' @param whiskers (`character`)\cr names of the interval whiskers that will be plotted. Names must match names
#'   of the list element `interval` that will be returned by `sfun` (e.g. `mean_ci_lwr` element of
#'   `sfun(x)[["mean_ci"]]`). It is possible to specify one whisker only, or to suppress all whiskers by setting
#'   `interval = NULL`.
#' @param table (`character` or `NULL`)\cr names of the statistics that will be displayed in the table below the plot.
#'   All the statistics indicated in `table` variable must be present in the object returned by `sfun`.
#' @param sfun (`function`)\cr the function to compute the values of required statistics. It must return a named `list`
#'   with atomic vectors. The names of the `list` elements refer to the names of the statistics and are used by `mid`,
#'   `interval`, `table`. It must be able to accept as input a vector with data for which statistics are computed.
#' @param ... optional arguments to `sfun`.
#' @param mid_type (`string`)\cr controls the type of the `mid` plot, it can be point (`"p"`), line (`"l"`),
#'   or point and line (`"pl"`).
#' @param mid_point_size (`numeric(1)`)\cr font size of the `mid` plot points.
#' @param position (`character` or `call`)\cr geom element position adjustment, either as a string, or the result of
#'   a call to a position adjustment function.
#' @param legend_title (`string`)\cr legend title.
#' @param legend_position (`string`)\cr the position of the plot legend (`"none"`, `"left"`, `"right"`, `"bottom"`,
#'   `"top"`, or a two-element numeric vector).
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot.
#' @param xticks (`numeric` or `NULL`)\cr numeric vector of tick positions or a single number with spacing
#'   between ticks on the x-axis, for use when `variables$x` is numeric. If `NULL` (default), [labeling::extended()] is
#'   used to determine optimal tick positions on the x-axis. If `variables$x` is not numeric, this argument is ignored.
#' @param x_lab (`string` or `NULL`)\cr x-axis label. If `NULL` then no label will be added.
#' @param y_lab (`string` or `NULL`)\cr y-axis label. If `NULL` then no label will be added.
#' @param y_lab_add_paramcd (`flag`)\cr whether `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` should be added
#'   to the y-axis label (`y_lab`).
#' @param y_lab_add_unit (`flag`)\cr whether y-axis unit, i.e. `unique(df[[variables["y_unit"]]])` should be added
#'   to the y-axis label (`y_lab`).
#' @param title (`string`)\cr plot title.
#' @param subtitle (`string`)\cr plot subtitle.
#' @param subtitle_add_paramcd (`flag`)\cr whether `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` should be
#'   added to the plot's subtitle (`subtitle`).
#' @param subtitle_add_unit (`flag`)\cr whether the y-axis unit, i.e. `unique(df[[variables["y_unit"]]])` should be
#'   added to the plot's subtitle (`subtitle`).
#' @param caption (`string`)\cr optional caption below the plot.
#' @param table_format (named `vector` or `NULL`)\cr custom formats for descriptive statistics used instead of defaults
#'   in the (optional) table appended to the plot. It is passed directly to the `h_format_row` function through
#'   the `format` parameter. Names of `table_format` must match the names of statistics returned by `sfun` function.
#'   Can be a character vector with values from [formatters::list_valid_format_labels()] or custom format functions.
#' @param table_labels (named `character` or `NULL`)\cr labels for descriptive statistics used in the (optional) table
#'   appended to the plot. Names of `table_labels` must match the names of statistics returned by `sfun` function.
#' @param table_font_size (`numeric(1)`)\cr font size of the text in the table.
#' @param newpage `r lifecycle::badge("deprecated")` not used.
#' @param col (`character`)\cr color(s). See `?ggplot2::aes_colour_fill_alpha` for example values.
#' @param linetype (`character`)\cr line type(s). See `?ggplot2::aes_linetype_size_shape` for example values.
#' @param errorbar_width (`numeric(1)`)\cr width of the error bars.
#' @param rel_height_plot (`proportion`)\cr proportion of total figure height to allocate to the line plot.
#'   Relative height of annotation table is then `1 - rel_height_plot`. If `table = NULL`, this parameter is ignored.
#' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list when `table` is not `NULL`.
#'   If `TRUE`, a named list with two elements, `plot` and `table`, will be returned. If `FALSE` (default) the
#'   annotation table is printed below the plot via [cowplot::plot_grid()].
#'
#' @return A `ggplot` line plot (and statistics table if applicable).
#'
#' @examples
#'
#' adsl <- tern_ex_adsl
#' adlb <- tern_ex_adlb %>% dplyr::filter(ANL01FL == "Y", PARAMCD == "ALT", AVISIT != "SCREENING")
#' adlb$AVISIT <- droplevels(adlb$AVISIT)
#' adlb <- dplyr::mutate(adlb, AVISIT = forcats::fct_reorder(AVISIT, AVISITN, min))
#'
#' # Mean with CI
#' g_lineplot(adlb, adsl, subtitle = "Laboratory Test:")
#'
#' # Mean with CI, no stratification with group_var
#' g_lineplot(adlb, variables = control_lineplot_vars(group_var = NA))
#'
#' # Mean, upper whisker of CI, no group_var(strata) counts N
#' g_lineplot(
#'   adlb,
#'   whiskers = "mean_ci_upr",
#'   title = "Plot of Mean and Upper 95% Confidence Limit by Visit"
#' )
#'
#' # Median with CI
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   mid = "median",
#'   interval = "median_ci",
#'   whiskers = c("median_ci_lwr", "median_ci_upr"),
#'   title = "Plot of Median and 95% Confidence Limits by Visit"
#' )
#'
#' # Mean, +/- SD
#' g_lineplot(adlb, adsl,
#'   interval = "mean_sdi",
#'   whiskers = c("mean_sdi_lwr", "mean_sdi_upr"),
#'   title = "Plot of Median +/- SD by Visit"
#' )
#'
#' # Mean with CI plot with stats table
#' g_lineplot(adlb, adsl, table = c("n", "mean", "mean_ci"))
#'
#' # Mean with CI, table and customized confidence level
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   table = c("n", "mean", "mean_ci"),
#'   control = control_analyze_vars(conf_level = 0.80),
#'   title = "Plot of Mean and 80% Confidence Limits by Visit"
#' )
#'
#' # Mean with CI, table with customized formats/labels
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   table = c("n", "mean", "mean_ci"),
#'   table_format = list(
#'     mean = function(x, ...) {
#'       ifelse(x < 20, round_fmt(x, digits = 3), round_fmt(x, digits = 2))
#'     },
#'     mean_ci = "(xx.xxx, xx.xxx)"
#'   ),
#'   table_labels = list(
#'     mean = "mean",
#'     mean_ci = "95% CI"
#'   )
#' )
#'
#' # Mean with CI, table, filtered data
#' adlb_f <- dplyr::filter(adlb, ARMCD != "ARM A" | AVISIT == "BASELINE")
#' g_lineplot(adlb_f, table = c("n", "mean"))
#'
#' @export
g_lineplot <- function(df,
                       alt_counts_df = NULL,
                       variables = control_lineplot_vars(),
                       mid = "mean",
                       interval = "mean_ci",
                       whiskers = c("mean_ci_lwr", "mean_ci_upr"),
                       table = NULL,
                       sfun = s_summary,
                       ...,
                       mid_type = "pl",
                       mid_point_size = 2,
                       position = ggplot2::position_dodge(width = 0.4),
                       legend_title = NULL,
                       legend_position = "bottom",
                       ggtheme = nestcolor::theme_nest(),
                       xticks = NULL,
                       xlim = NULL,
                       ylim = NULL,
                       x_lab = obj_label(df[[variables[["x"]]]]),
                       y_lab = NULL,
                       y_lab_add_paramcd = TRUE,
                       y_lab_add_unit = TRUE,
                       title = "Plot of Mean and 95% Confidence Limits by Visit",
                       subtitle = "",
                       subtitle_add_paramcd = TRUE,
                       subtitle_add_unit = TRUE,
                       caption = NULL,
                       table_format = NULL,
                       table_labels = NULL,
                       table_font_size = 3,
                       errorbar_width = 0.45,
                       newpage = lifecycle::deprecated(),
                       col = NULL,
                       linetype = NULL,
                       rel_height_plot = 0.5,
                       as_list = FALSE) {
  checkmate::assert_character(variables, any.missing = TRUE)
  checkmate::assert_character(mid, null.ok = TRUE)
  checkmate::assert_character(interval, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_character(linetype, null.ok = TRUE)
  checkmate::assert_numeric(xticks, null.ok = TRUE)
  checkmate::assert_numeric(xlim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE)
  checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE, null.ok = TRUE)
  checkmate::assert_number(errorbar_width, lower = 0)
  checkmate::assert_string(title, null.ok = TRUE)
  checkmate::assert_string(subtitle, null.ok = TRUE)
  assert_proportion_value(rel_height_plot)
  checkmate::assert_logical(as_list)

  if (!is.null(table)) {
    table_format <- get_formats_from_stats(table, formats_in = table_format)
    table_labels <- get_labels_from_stats(table, labels_in = table_labels) %>% .unlist_keep_nulls()
  }

  extra_args <- list(...)
  if ("control" %in% names(extra_args)) {
    if (!is.null(table) && all(table_labels == .unlist_keep_nulls(get_labels_from_stats(table)))) {
      table_labels <- table_labels %>% labels_use_control(extra_args[["control"]])
    }
  }

  if (is.character(interval)) {
    checkmate::assert_vector(whiskers, min.len = 0, max.len = 2)
  }

  if (length(whiskers) == 1) {
    checkmate::assert_character(mid)
  }

  if (is.character(mid)) {
    checkmate::assert_scalar(mid_type)
    checkmate::assert_subset(mid_type, c("pl", "p", "l"))
  }

  x <- variables[["x"]]
  y <- variables[["y"]]
  paramcd <- variables["paramcd"] # NA if paramcd == NA or it is not in variables
  y_unit <- variables["y_unit"] # NA if y_unit == NA or it is not in variables
  if (is.na(variables["group_var"])) {
    group_var <- NULL # NULL if group_var == NA or it is not in variables
  } else {
    group_var <- variables[["group_var"]]
    subject_var <- variables[["subject_var"]]
  }
  if (is.na(variables["facet_var"])) {
    facet_var <- NULL # NULL if facet_var == NA or it is not in variables
  } else {
    facet_var <- variables[["facet_var"]]
  }
  checkmate::assert_flag(y_lab_add_paramcd, null.ok = TRUE)
  checkmate::assert_flag(subtitle_add_paramcd, null.ok = TRUE)
  if ((!is.null(y_lab) && y_lab_add_paramcd) || (!is.null(subtitle) && subtitle_add_paramcd)) {
    checkmate::assert_false(is.na(paramcd))
    checkmate::assert_scalar(unique(df[[paramcd]]))
  }

  checkmate::assert_flag(y_lab_add_unit, null.ok = TRUE)
  checkmate::assert_flag(subtitle_add_unit, null.ok = TRUE)
  if ((!is.null(y_lab) && y_lab_add_unit) || (!is.null(subtitle) && subtitle_add_unit)) {
    checkmate::assert_false(is.na(y_unit))
    checkmate::assert_scalar(unique(df[[y_unit]]))
  }

  if (!is.null(group_var) && !is.null(alt_counts_df)) {
    checkmate::assert_set_equal(unique(alt_counts_df[[group_var]]), unique(df[[group_var]]))
  }

  ####################################### |
  # ---- Compute required statistics ----
  ####################################### |
  # Remove unused levels for x-axis
  if (is.factor(df[[x]])) {
    df[[x]] <- droplevels(df[[x]])
  }

  if (!is.null(facet_var) && !is.null(group_var)) {
    df_grp <- tidyr::expand(df, .data[[facet_var]], .data[[group_var]], .data[[x]]) # expand based on levels of factors
  } else if (!is.null(group_var)) {
    df_grp <- tidyr::expand(df, .data[[group_var]], .data[[x]]) # expand based on levels of factors
  } else {
    df_grp <- tidyr::expand(df, NULL, .data[[x]])
  }

  df_grp <- df_grp %>%
    dplyr::full_join(y = df[, c(facet_var, group_var, x, y)], by = c(facet_var, group_var, x), multiple = "all") %>%
    dplyr::group_by_at(c(facet_var, group_var, x))

  df_stats <- df_grp %>%
    dplyr::summarise(
      data.frame(t(do.call(c, unname(sfun(.data[[y]])[c(mid, interval)])))),
      .groups = "drop"
    )

  df_stats <- df_stats[!is.na(df_stats[[mid]]), ]

  # add number of objects N in group_var (strata)
  if (!is.null(group_var) && !is.null(alt_counts_df)) {
    strata_N <- paste0(group_var, "_N") # nolint

    df_N <- stats::aggregate(eval(parse(text = subject_var)) ~ eval(parse(text = group_var)), data = alt_counts_df, FUN = function(x) length(unique(x))) # nolint
    colnames(df_N) <- c(group_var, "N") # nolint
    df_N[[strata_N]] <- paste0(df_N[[group_var]], " (N = ", df_N$N, ")") # nolint

    # retain strata factor levels
    search_strings <- unique(df_N[[strata_N]])
    matches <- sapply(unique(df_N[[group_var]]), function(x) {
      regex_pattern <- gsub("([][(){}^$.|*+?\\\\])", "\\\\\\1", x)
      search_strings[grepl(
        paste0("^", regex_pattern, "\\b"),
        search_strings
      )]
    })
    df_N[[paste0(group_var, "_N")]] <- factor(df_N[[group_var]]) # nolint
    levels(df_N[[paste0(group_var, "_N")]]) <- unlist(matches) # nolint

    # strata_N should not be in colnames(df_stats)
    checkmate::assert_disjunct(strata_N, colnames(df_stats))

    df_stats <- merge(x = df_stats, y = df_N[, c(group_var, strata_N)], by = group_var)
  } else if (!is.null(group_var)) {
    strata_N <- group_var # nolint
  } else {
    strata_N <- NULL # nolint
  }

  ############################################### |
  # ---- Prepare certain plot's properties. ----
  ############################################### |
  # legend title
  if (is.null(legend_title) && !is.null(group_var) && legend_position != "none") {
    legend_title <- attr(df[[group_var]], "label")
  }

  # y label
  if (!is.null(y_lab)) {
    if (y_lab_add_paramcd) {
      y_lab <- paste(y_lab, unique(df[[paramcd]]))
    }

    if (y_lab_add_unit) {
      y_lab <- paste0(y_lab, " (", unique(df[[y_unit]]), ")")
    }

    y_lab <- trimws(y_lab)
  }

  # subtitle
  if (!is.null(subtitle)) {
    if (subtitle_add_paramcd) {
      subtitle <- paste(subtitle, unique(df[[paramcd]]))
    }

    if (subtitle_add_unit) {
      subtitle <- paste0(subtitle, " (", unique(df[[y_unit]]), ")")
    }

    subtitle <- trimws(subtitle)
  }

  ############################### |
  # ---- Build plot object. ----
  ############################### |
  p <- ggplot2::ggplot(
    data = df_stats,
    mapping = ggplot2::aes(
      x = .data[[x]], y = .data[[mid]],
      color = if (is.null(strata_N)) NULL else .data[[strata_N]],
      shape = if (is.null(strata_N)) NULL else .data[[strata_N]],
      lty = if (is.null(strata_N)) NULL else .data[[strata_N]],
      group = if (is.null(strata_N)) NULL else .data[[strata_N]]
    )
  )

  if (!is.null(group_var) && nlevels(df_stats[[strata_N]]) > 6) {
    p <- p +
      scale_shape_manual(values = seq(15, 15 + nlevels(df_stats[[strata_N]])))
  }

  if (!is.null(mid)) {
    # points
    if (grepl("p", mid_type, fixed = TRUE)) {
      p <- p + ggplot2::geom_point(position = position, size = mid_point_size, na.rm = TRUE)
    }

    # lines - plotted only if there is a strata grouping (group_var)
    if (grepl("l", mid_type, fixed = TRUE) && !is.null(strata_N)) {
      p <- p + ggplot2::geom_line(position = position, na.rm = TRUE)
    }
  }

  # interval
  if (!is.null(interval)) {
    p <- p +
      ggplot2::geom_errorbar(
        ggplot2::aes(ymin = .data[[whiskers[1]]], ymax = .data[[whiskers[max(1, length(whiskers))]]]),
        width = errorbar_width,
        position = position
      )

    if (length(whiskers) == 1) { # lwr or upr only; mid is then required
      # workaround as geom_errorbar does not provide single-direction whiskers
      p <- p +
        ggplot2::geom_linerange(
          data = df_stats[!is.na(df_stats[[whiskers]]), ], # as na.rm =TRUE does not suppress warnings
          ggplot2::aes(ymin = .data[[mid]], ymax = .data[[whiskers]]),
          position = position,
          na.rm = TRUE,
          show.legend = FALSE
        )
    }
  }

  if (is.numeric(df_stats[[x]])) {
    if (length(xticks) == 1) xticks <- seq(from = min(df_stats[[x]]), to = max(df_stats[[x]]), by = xticks)
    p <- p + ggplot2::scale_x_continuous(breaks = if (!is.null(xticks)) xticks else waiver(), limits = xlim)
  }

  p <- p +
    ggplot2::scale_y_continuous(labels = scales::comma, limits = ylim) +
    ggplot2::labs(
      title = title,
      subtitle = subtitle,
      caption = caption,
      color = legend_title,
      lty = legend_title,
      shape = legend_title,
      x = x_lab,
      y = y_lab
    )

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_color_manual(values = col)
  }
  if (!is.null(linetype)) {
    p <- p +
      ggplot2::scale_linetype_manual(values = linetype)
  }

  if (!is.null(facet_var)) {
    p <- p +
      facet_grid(cols = vars(df_stats[[facet_var]]))
  }

  if (!is.null(ggtheme)) {
    p <- p + ggtheme
  } else {
    p <- p +
      ggplot2::theme_bw() +
      ggplot2::theme(
        legend.key.width = grid::unit(1, "cm"),
        legend.position = legend_position,
        legend.direction = ifelse(
          legend_position %in% c("top", "bottom"),
          "horizontal",
          "vertical"
        )
      )
  }

  ############################################################# |
  # ---- Optionally, add table to the bottom of the plot. ----
  ############################################################# |
  if (!is.null(table)) {
    df_stats_table <- df_grp %>%
      dplyr::summarise(
        h_format_row(
          x = sfun(.data[[y]], ...)[table],
          format = table_format,
          labels = table_labels
        ),
        .groups = "drop"
      )

    stats_lev <- rev(setdiff(colnames(df_stats_table), c(group_var, x)))

    df_stats_table <- df_stats_table %>%
      tidyr::pivot_longer(
        cols = -dplyr::all_of(c(group_var, x)),
        names_to = "stat",
        values_to = "value",
        names_ptypes = list(stat = factor(levels = stats_lev))
      )

    tbl <- ggplot2::ggplot(
      df_stats_table,
      ggplot2::aes(x = .data[[x]], y = .data[["stat"]], label = .data[["value"]])
    ) +
      ggplot2::geom_text(size = table_font_size) +
      ggplot2::theme_bw() +
      ggplot2::theme(
        panel.border = ggplot2::element_blank(),
        panel.grid.major = ggplot2::element_blank(),
        panel.grid.minor = ggplot2::element_blank(),
        axis.ticks = ggplot2::element_blank(),
        axis.title = ggplot2::element_blank(),
        axis.text.x = ggplot2::element_blank(),
        axis.text.y = ggplot2::element_text(
          size = table_font_size * ggplot2::.pt,
          margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 5)
        ),
        strip.text = ggplot2::element_text(hjust = 0),
        strip.text.x = ggplot2::element_text(
          size = table_font_size * ggplot2::.pt,
          margin = ggplot2::margin(1.5, 0, 1.5, 0, "pt")
        ),
        strip.background = ggplot2::element_rect(fill = "grey95", color = NA),
        legend.position = "none"
      )

    if (!is.null(group_var)) {
      tbl <- tbl + ggplot2::facet_wrap(facets = group_var, ncol = 1)
    }

    if (!as_list) {
      # align plot and table
      cowplot::plot_grid(
        p,
        tbl,
        ncol = 1,
        align = "v",
        axis = "tblr",
        rel_heights = c(rel_height_plot, 1 - rel_height_plot)
      )
    } else {
      list(plot = p, table = tbl)
    }
  } else {
    p
  }
}

#' Helper function to format the optional `g_lineplot` table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param x (named `list`)\cr list of numerical values to be formatted and optionally labeled.
#'   Elements of `x` must be `numeric` vectors.
#' @param format (named `character` or `NULL`)\cr format patterns for `x`. Names of the `format` must
#'   match the names of `x`. This parameter is passed directly to the `rtables::format_rcell`
#'   function through the `format` parameter.
#' @param labels (named `character` or `NULL`)\cr optional labels for `x`. Names of the `labels` must
#'   match the names of `x`. When a label is not specified for an element of `x`,
#'   then this function tries to use `label` or `names` (in this order) attribute of that element
#'   (depending on which one exists and it is not `NULL` or `NA` or `NaN`). If none of these attributes
#'   are attached to a given element of `x`, then the label is automatically generated.
#'
#' @return A single row `data.frame` object.
#'
#' @examples
#' mean_ci <- c(48, 51)
#' x <- list(mean = 50, mean_ci = mean_ci)
#' format <- c(mean = "xx.x", mean_ci = "(xx.xx, xx.xx)")
#' labels <- c(mean = "My Mean")
#' h_format_row(x, format, labels)
#'
#' attr(mean_ci, "label") <- "Mean 95% CI"
#' x <- list(mean = 50, mean_ci = mean_ci)
#' h_format_row(x, format, labels)
#'
#' @export
h_format_row <- function(x, format, labels = NULL) {
  # cell: one row, one column data.frame
  format_cell <- function(x, format, label = NULL) {
    fc <- format_rcell(x = x, format = format)
    if (is.na(fc)) {
      fc <- "NA"
    }
    x_label <- attr(x, "label")
    if (!is.null(label) && !is.na(label)) {
      names(fc) <- label
    } else if (!is.null(x_label) && !is.na(x_label)) {
      names(fc) <- x_label
    } else if (length(x) == length(fc)) {
      names(fc) <- names(x)
    }
    as.data.frame(t(fc))
  }

  row <- do.call(
    cbind,
    lapply(
      names(x), function(xn) format_cell(x[[xn]], format = format[[xn]], label = labels[xn])
    )
  )

  row
}

#' Control function for `g_lineplot()`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Default values for `variables` parameter in `g_lineplot` function.
#' A variable's default value can be overwritten for any variable.
#'
#' @param x (`string`)\cr x-variable name.
#' @param y (`string`)\cr y-variable name.
#' @param group_var (`string` or `NA`)\cr group variable name.
#' @param subject_var (`string` or `NA`)\cr subject variable name.
#' @param facet_var (`string` or `NA`)\cr faceting variable name.
#' @param paramcd (`string` or `NA`)\cr parameter code variable name.
#' @param y_unit (`string` or `NA`)\cr y-axis unit variable name.
#'
#' @return A named character vector of variable names.
#'
#' @examples
#' control_lineplot_vars()
#' control_lineplot_vars(group_var = NA)
#'
#' @export
control_lineplot_vars <- function(x = "AVISIT",
                                  y = "AVAL",
                                  group_var = "ARM",
                                  facet_var = NA,
                                  paramcd = "PARAMCD",
                                  y_unit = "AVALU",
                                  subject_var = "USUBJID") {
  checkmate::assert_string(x)
  checkmate::assert_string(y)
  checkmate::assert_string(group_var, na.ok = TRUE, null.ok = TRUE)
  checkmate::assert_string(facet_var, na.ok = TRUE, null.ok = TRUE)
  checkmate::assert_string(subject_var, na.ok = TRUE, null.ok = TRUE)
  checkmate::assert_string(paramcd, na.ok = TRUE, null.ok = TRUE)
  checkmate::assert_string(y_unit, na.ok = TRUE, null.ok = TRUE)

  variables <- c(
    x = x, y = y, group_var = group_var, paramcd = paramcd,
    y_unit = y_unit, subject_var = subject_var, facet_var = facet_var
  )
  return(variables)
}

#' Convert `rtable` objects to `ggplot` objects
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Given a [rtables::rtable()] object, performs basic conversion to a [ggplot2::ggplot()] object built using
#' functions from the `ggplot2` package. Any table titles and/or footnotes are ignored.
#'
#' @param tbl (`VTableTree`)\cr `rtables` table object.
#' @param fontsize (`numeric(1)`)\cr font size.
#' @param colwidths (`numeric` or `NULL`)\cr a vector of column widths. Each element's position in
#'   `colwidths` corresponds to the column of `tbl` in the same position. If `NULL`, column widths
#'   are calculated according to maximum number of characters per column.
#' @param lbl_col_padding (`numeric`)\cr additional padding to use when calculating spacing between
#'   the first (label) column and the second column of `tbl`. If `colwidths` is specified,
#'   the width of the first column becomes `colwidths[1] + lbl_col_padding`. Defaults to 0.
#'
#' @return A `ggplot` object.
#'
#' @examples
#' dta <- data.frame(
#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
#'   AVISIT  = rep(paste0("V", 1:3), 6),
#'   AVAL    = c(9:1, rep(NA, 9))
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze_vars(vars = "AVAL")
#'
#' tbl <- build_table(lyt, df = dta)
#'
#' rtable2gg(tbl)
#'
#' rtable2gg(tbl, fontsize = 15, colwidths = c(2, 1, 1, 1))
#'
#' @export
rtable2gg <- function(tbl, fontsize = 12, colwidths = NULL, lbl_col_padding = 0) {
  mat <- rtables::matrix_form(tbl, indent_rownames = TRUE)
  mat_strings <- formatters::mf_strings(mat)
  mat_aligns <- formatters::mf_aligns(mat)
  mat_indent <- formatters::mf_rinfo(mat)$indent
  mat_display <- formatters::mf_display(mat)
  nlines_hdr <- formatters::mf_nlheader(mat)
  shared_hdr_rows <- which(apply(mat_display, 1, function(x) (any(!x))))

  tbl_df <- data.frame(mat_strings)
  body_rows <- seq(nlines_hdr + 1, nrow(tbl_df))
  mat_aligns <- apply(mat_aligns, 1:2, function(x) if (x == "left") 0 else if (x == "right") 1 else 0.5)

  # Apply indentation in first column
  tbl_df[body_rows, 1] <- sapply(body_rows, function(i) {
    ind_i <- mat_indent[i - nlines_hdr] * 4
    if (ind_i > 0) paste0(paste(rep(" ", ind_i), collapse = ""), tbl_df[i, 1]) else tbl_df[i, 1]
  })

  # Get column widths
  if (is.null(colwidths)) {
    colwidths <- apply(tbl_df, 2, function(x) max(nchar(x))) + 1
  }
  tot_width <- sum(colwidths) + lbl_col_padding

  if (length(shared_hdr_rows) > 0) {
    tbl_df <- tbl_df[-shared_hdr_rows, ]
    mat_aligns <- mat_aligns[-shared_hdr_rows, ]
  }

  res <- ggplot(data = tbl_df) +
    theme_void() +
    scale_x_continuous(limits = c(0, tot_width)) +
    scale_y_continuous(limits = c(0, nrow(mat_strings))) +
    annotate(
      "segment",
      x = 0, xend = tot_width,
      y = nrow(mat_strings) - nlines_hdr + 0.5, yend = nrow(mat_strings) - nlines_hdr + 0.5
    )

  # If header content spans multiple columns, center over these columns
  if (length(shared_hdr_rows) > 0) {
    mat_strings[shared_hdr_rows, ] <- trimws(mat_strings[shared_hdr_rows, ])
    for (hr in shared_hdr_rows) {
      hdr_lbls <- mat_strings[1:hr, mat_display[hr, -1]]
      hdr_lbls <- matrix(hdr_lbls[nzchar(hdr_lbls)], nrow = hr)
      for (idx_hl in seq_len(ncol(hdr_lbls))) {
        cur_lbl <- tail(hdr_lbls[, idx_hl], 1)
        which_cols <- if (hr == 1) {
          which(mat_strings[hr, ] == hdr_lbls[idx_hl])
        } else { # for >2 col splits, only print labels for each unique combo of nested columns
          which(
            apply(mat_strings[1:hr, ], 2, function(x) all(x == hdr_lbls[1:hr, idx_hl]))
          )
        }
        line_pos <- c(
          sum(colwidths[1:(which_cols[1] - 1)]) + 1 + lbl_col_padding,
          sum(colwidths[1:max(which_cols)]) - 1 + lbl_col_padding
        )

        res <- res +
          annotate(
            "text",
            x = mean(line_pos),
            y = nrow(mat_strings) + 1 - hr,
            label = cur_lbl,
            size = fontsize / .pt
          ) +
          annotate(
            "segment",
            x = line_pos[1],
            xend = line_pos[2],
            y = nrow(mat_strings) - hr + 0.5,
            yend = nrow(mat_strings) - hr + 0.5
          )
      }
    }
  }

  # Add table columns
  for (i in seq_len(ncol(tbl_df))) {
    res <- res + annotate(
      "text",
      x = if (i == 1) 0 else sum(colwidths[1:i]) - 0.5 * colwidths[i] + lbl_col_padding,
      y = rev(seq_len(nrow(tbl_df))),
      label = tbl_df[, i],
      hjust = mat_aligns[, i],
      size = fontsize / .pt
    )
  }

  res
}

#' Convert `data.frame` object to `ggplot` object
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Given a `data.frame` object, performs basic conversion to a [ggplot2::ggplot()] object built using
#' functions from the `ggplot2` package.
#'
#' @param df (`data.frame`)\cr a data frame.
#' @param colwidths (`numeric` or `NULL`)\cr a vector of column widths. Each element's position in
#'   `colwidths` corresponds to the column of `df` in the same position. If `NULL`, column widths
#'   are calculated according to maximum number of characters per column.
#' @param font_size (`numeric(1)`)\cr font size.
#' @param col_labels (`flag`)\cr whether the column names (labels) of `df` should be used as the first row
#'   of the output table.
#' @param col_lab_fontface (`string`)\cr font face to apply to the first row (of column labels
#'   if `col_labels = TRUE`). Defaults to `"bold"`.
#' @param hline (`flag`)\cr whether a horizontal line should be printed below the first row of the table.
#' @param bg_fill (`string`)\cr table background fill color.
#'
#' @return A `ggplot` object.
#'
#' @examples
#' \dontrun{
#' df2gg(head(iris, 5))
#'
#' df2gg(head(iris, 5), font_size = 15, colwidths = c(1, 1, 1, 1, 1))
#' }
#' @keywords internal
df2gg <- function(df,
                  colwidths = NULL,
                  font_size = 10,
                  col_labels = TRUE,
                  col_lab_fontface = "bold",
                  hline = TRUE,
                  bg_fill = NULL) {
  # convert to text
  df <- as.data.frame(apply(df, 1:2, function(x) if (is.na(x)) "NA" else as.character(x)))

  if (col_labels) {
    df <- as.matrix(df)
    df <- rbind(colnames(df), df)
  }

  # Get column widths
  if (is.null(colwidths)) {
    colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE))
  }
  tot_width <- sum(colwidths)

  res <- ggplot(data = df) +
    theme_void() +
    scale_x_continuous(limits = c(0, tot_width)) +
    scale_y_continuous(limits = c(1, nrow(df)))

  if (!is.null(bg_fill)) res <- res + theme(plot.background = element_rect(fill = bg_fill))

  if (hline) {
    res <- res +
      annotate(
        "segment",
        x = 0 + 0.2 * colwidths[2], xend = tot_width - 0.1 * tail(colwidths, 1),
        y = nrow(df) - 0.5, yend = nrow(df) - 0.5
      )
  }

  for (i in seq_len(ncol(df))) {
    line_pos <- c(
      if (i == 1) 0 else sum(colwidths[1:(i - 1)]),
      sum(colwidths[1:i])
    )
    res <- res +
      annotate(
        "text",
        x = mean(line_pos),
        y = rev(seq_len(nrow(df))),
        label = df[, i],
        size = font_size / .pt,
        fontface = if (col_labels) {
          c(col_lab_fontface, rep("plain", nrow(df) - 1))
        } else {
          rep("plain", nrow(df))
        }
      )
  }

  res
}

#' Subgroup treatment effect pattern (STEP) fit for binary (response) outcome
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This fits the Subgroup Treatment Effect Pattern logistic regression models for a binary
#' (response) outcome. The treatment arm variable must have exactly 2 levels,
#' where the first one is taken as reference and the estimated odds ratios are
#' for the comparison of the second level vs. the first one.
#'
#' The (conditional) logistic regression model which is fit is:
#'
#' `response ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
#'
#' where `degree` is specified by `control_step()`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `character`)\cr list of analysis variables:
#'   needs `response`, `arm`, `biomarker`, and optional `covariates` and `strata`.
#' @param control (named `list`)\cr combined control list from [control_step()]
#'   and [control_logistic()].
#'
#' @return A matrix of class `step`. The first part of the columns describe the
#'   subgroup intervals used for the biomarker variable, including where the
#'   center of the intervals are and their bounds. The second part of the
#'   columns contain the estimates for the treatment arm comparison.
#'
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
#'
#' @seealso [control_step()] and [control_logistic()] for the available
#'   customization options.
#'
#' @examples
#' # Testing dataset with just two treatment arms.
#' library(survival)
#' library(dplyr)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(
#'     PARAMCD == "BESRSPI",
#'     ARM %in% c("B: Placebo", "A: Drug X")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to have Placebo as reference arm for Odds Ratio calculations.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     RSP = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     SEX = factor(SEX)
#'   )
#'
#' variables <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = "AGE",
#'   response = "RSP"
#' )
#'
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
#' # We use a large enough bandwidth to avoid too small subgroups and linear separation in those.
#' step_matrix <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = 0.9))
#' )
#' dim(step_matrix)
#' head(step_matrix)
#'
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
#' # models. Or specify different logistic regression options, including confidence level.
#' step_matrix2 <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(conf_level = 0.9), control_step(bandwidth = NULL, degree = 1))
#' )
#'
#' # Use a global constant model. This is helpful as a reference for the subgroup models.
#' step_matrix3 <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = NULL, num_points = 2L))
#' )
#'
#' # It is also possible to use strata, i.e. use conditional logistic regression models.
#' variables2 <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = "AGE",
#'   response = "RSP",
#'   strata = c("STRATA1", "STRATA2")
#' )
#'
#' step_matrix4 <- fit_rsp_step(
#'   variables = variables2,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = NULL))
#' )
#'
#' @export
fit_rsp_step <- function(variables,
                         data,
                         control = c(control_step(), control_logistic())) {
  assert_df_with_variables(data, variables)
  checkmate::assert_list(control, names = "named")
  data <- data[!is.na(data[[variables$biomarker]]), ]
  window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
  interval_center <- window_sel$interval[, "Interval Center"]
  form <- h_step_rsp_formula(variables = variables, control = control)
  estimates <- if (is.null(control$bandwidth)) {
    h_step_rsp_est(
      formula = form,
      data = data,
      variables = variables,
      x = interval_center,
      control = control
    )
  } else {
    tmp <- mapply(
      FUN = h_step_rsp_est,
      x = interval_center,
      subset = as.list(as.data.frame(window_sel$sel)),
      MoreArgs = list(
        formula = form,
        data = data,
        variables = variables,
        control = control
      )
    )
    # Maybe we find a more elegant solution than this.
    rownames(tmp) <- c("n", "logor", "se", "ci_lower", "ci_upper")
    t(tmp)
  }
  result <- cbind(window_sel$interval, estimates)
  structure(
    result,
    class = c("step", "matrix"),
    variables = variables,
    control = control
  )
}

#' Estimate proportions of each level of a variable
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze & summarize function [estimate_multinomial_response()] creates a layout element to estimate the
#' proportion and proportion confidence interval for each level of a factor variable. The primary analysis variable,
#' `var`, should be a factor variable, the values of which will be used as labels within the output table.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("estimate_multinomial_response"), type = "sh")``
#'
#' @seealso Relevant description function [d_onco_rsp_label()].
#'
#' @name estimate_multinomial_rsp
#' @order 1
NULL

#' Description of standard oncology response
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Describe the oncology response in a standard way.
#'
#' @param x (`character`)\cr the standard oncology codes to be described.
#'
#' @return Response labels.
#'
#' @seealso [estimate_multinomial_rsp()]
#'
#' @examples
#' d_onco_rsp_label(
#'   c("CR", "PR", "SD", "NON CR/PD", "PD", "NE", "Missing", "<Missing>", "NE/Missing")
#' )
#'
#' # Adding some values not considered in d_onco_rsp_label
#'
#' d_onco_rsp_label(
#'   c("CR", "PR", "hello", "hi")
#' )
#'
#' @export
d_onco_rsp_label <- function(x) {
  x <- as.character(x)
  desc <- c(
    CR           = "Complete Response (CR)",
    PR           = "Partial Response (PR)",
    MR           = "Minimal/Minor Response (MR)",
    MRD          = "Minimal Residual Disease (MRD)",
    SD           = "Stable Disease (SD)",
    PD           = "Progressive Disease (PD)",
    `NON CR/PD`  = "Non-CR or Non-PD (NON CR/PD)",
    NE           = "Not Evaluable (NE)",
    `NE/Missing` = "Missing or unevaluable",
    Missing      = "Missing",
    `NA`         = "Not Applicable (NA)",
    ND           = "Not Done (ND)"
  )

  values_label <- vapply(
    X = x,
    FUN.VALUE = character(1),
    function(val) {
      if (val %in% names(desc)) desc[val] else val
    }
  )

  factor(values_label, levels = c(intersect(desc, values_label), setdiff(values_label, desc)))
}

#' @describeIn estimate_multinomial_rsp Statistics function which feeds the length of `x` as number
#'   of successes, and `.N_col` as total number of successes and failures into [s_proportion()].
#'
#' @return
#' * `s_length_proportion()` returns statistics from [s_proportion()].
#'
#' @examples
#' s_length_proportion(rep("CR", 10), .N_col = 100)
#' s_length_proportion(factor(character(0)), .N_col = 100)
#'
#' @export
s_length_proportion <- function(x,
                                ...,
                                .N_col) { # nolint
  checkmate::assert_multi_class(x, classes = c("factor", "character"))
  checkmate::assert_vector(x, min.len = 0, max.len = .N_col)
  checkmate::assert_vector(unique(x), min.len = 0, max.len = 1)

  n_true <- length(x)
  n_false <- .N_col - n_true
  x_logical <- rep(c(TRUE, FALSE), c(n_true, n_false))
  s_proportion(df = x_logical, ...)
}

#' @describeIn estimate_multinomial_rsp Formatted analysis function which is used as `afun`
#'   in `estimate_multinomial_response()`.
#'
#' @return
#' * `a_length_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_length_proportion(rep("CR", 10), .N_col = 100)
#' a_length_proportion(factor(character(0)), .N_col = 100)
#'
#' @export
a_length_proportion <- function(x,
                                ...,
                                .stats = NULL,
                                .stat_names = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_length_proportion,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      x = list(x),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("estimate_multinomial_response",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )
  x_stats <- x_stats[.stats]
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(
    .stats, .labels,
    tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn estimate_multinomial_rsp Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()] and
#'   [rtables::summarize_row_groups()].
#'
#' @return
#' * `estimate_multinomial_response()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_length_proportion()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' # Use of the layout creating function.
#' dta_test <- data.frame(
#'   USUBJID = paste0("S", 1:12),
#'   ARM     = factor(rep(LETTERS[1:3], each = 4)),
#'   AVAL    = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
#' ) %>% mutate(
#'   AVALC = factor(AVAL,
#'     levels = c(0, 1),
#'     labels = c("Complete Response (CR)", "Partial Response (PR)")
#'   )
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   estimate_multinomial_response(var = "AVALC")
#'
#' tbl <- build_table(lyt, dta_test)
#'
#' tbl
#'
#' @export
#' @order 2
estimate_multinomial_response <- function(lyt,
                                          var,
                                          na_str = default_na_str(),
                                          nested = TRUE,
                                          ...,
                                          show_labels = "hidden",
                                          table_names = var,
                                          .stats = "prop_ci",
                                          .stat_names = NULL,
                                          .formats = list(prop_ci = "(xx.xx, xx.xx)"),
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(extra_args, ...)

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_length_proportion) <- c(formals(a_length_proportion), extra_args[[".additional_fun_parameters"]])

  lyt <- split_rows_by(lyt, var = var)
  lyt <- summarize_row_groups(lyt, na_str = na_str)

  analyze(
    lyt = lyt,
    vars = var,
    afun = a_length_proportion,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Difference test for two proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [test_proportion_diff()] creates a layout element to test the difference between two
#' proportions. The primary analysis variable, `vars`, indicates whether a response has occurred for each record. See
#' the `method` parameter for options of methods to use to calculate the p-value. The argument `alternative`
#' specifies the direction of the alternative hypothesis. Additionally, a stratification variable can be
#' supplied via the `strata` element of the `variables` argument.
#'
#' @inheritParams argument_convention
#' @param method (`string`)\cr one of `chisq`, `cmh`, `cmh_wh`, `fisher`, or `schouten`;
#'   specifies the test used to calculate the p-value.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("test_proportion_diff"), type = "sh")``
#'
#' @seealso [h_prop_diff_test]
#'
#' @name prop_diff_test
#' @order 1
NULL

#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
#'
#' @return
#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
#'   describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
#'
#' @examples
#'
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#' s_test_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = NULL,
#'   method = "chisq"
#' )
#'
#' @export
s_test_proportion_diff <- function(df,
                                   .var,
                                   .ref_group,
                                   .in_ref_col,
                                   variables = list(strata = NULL),
                                   method = c("chisq", "schouten", "fisher", "cmh", "cmh_wh"),
                                   alternative = c("two.sided", "less", "greater"),
                                   ...) {
  method <- match.arg(method)
  y <- list(pval = numeric())

  if (!.in_ref_col) {
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))
    rsp <- factor(
      c(.ref_group[[.var]], df[[.var]]),
      levels = c("TRUE", "FALSE")
    )
    grp <- factor(
      rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
      levels = c("ref", "Not-ref")
    )

    if (!is.null(variables$strata) || method %in% c("cmh", "cmh_wh")) {
      strata <- variables$strata
      checkmate::assert_false(is.null(strata))
      strata_vars <- stats::setNames(as.list(strata), strata)
      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)
      strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
    }

    tbl <- switch(method,
      cmh = table(grp, rsp, strata),
      cmh_wh = table(grp, rsp, strata),
      table(grp, rsp)
    )

    y$pval <- switch(method,
      chisq = prop_chisq(tbl, alternative = alternative),
      cmh = prop_cmh(tbl, alternative = alternative),
      fisher = prop_fisher(tbl, alternative = alternative),
      schouten = prop_schouten(tbl, alternative = alternative),
      cmh_wh = prop_cmh(tbl, alternative = alternative, transform = "wilson_hilferty")
    )
  }

  y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method, alternative = alternative))
  y
}

#' Description of the difference test between two proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
#'
#' @inheritParams s_test_proportion_diff
#'
#' @return A `string` describing the test from which the p-value is derived.
#'
#' @export
d_test_proportion_diff <- function(method, alternative = c("two.sided", "less", "greater")) {
  checkmate::assert_string(method)
  alternative <- match.arg(alternative)

  meth_part <- switch(method,
    "schouten" = "Chi-Squared Test with Schouten Correction",
    "chisq" = "Chi-Squared Test",
    "cmh" = "Cochran-Mantel-Haenszel Test",
    "cmh_wh" = "Cochran-Mantel-Haenszel Test with Wilson-Hilferty Transformation",
    "fisher" = "Fisher's Exact Test",
    stop(paste(method, "does not have a description"))
  )
  alt_part <- switch(alternative,
    two.sided = "",
    less = ", 1-sided, direction less",
    greater = ", 1-sided, direction greater"
  )
  paste0("p-value (", meth_part, alt_part, ")")
}

#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
#'
#' @return
#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_test_proportion_diff <- function(df,
                                   ...,
                                   .stats = NULL,
                                   .stat_names = NULL,
                                   .formats = NULL,
                                   .labels = NULL,
                                   .indent_mods = NULL) {
  dots_extra_args <- list(...)

  # Check if there are user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
  extra_afun_params <- retrieve_extra_afun_params(
    names(dots_extra_args$.additional_fun_parameters)
  )
  dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore

  # Main statistical functions application
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_test_proportion_diff,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in with stats defaults if needed
  .stats <- get_stats("test_proportion_diff",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )

  x_stats <- x_stats[.stats]

  # Fill in formats/indents/labels with custom input and defaults
  .formats <- get_formats_from_stats(.stats, .formats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)
  if (is.null(.labels)) {
    .labels <- sapply(x_stats, attr, "label")
    .labels <- .labels[nzchar(.labels) & !sapply(.labels, is.null) & !is.na(.labels)]
  }
  .labels <- get_labels_from_stats(.stats, .labels)

  # Auto format handling
  .formats <- apply_auto_formatting(
    .formats,
    x_stats,
    extra_afun_params$.df_row,
    extra_afun_params$.var
  )

  # Get and check statistical names from defaults
  .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_test_proportion_diff()` to the table layout.
#'
#' @examples
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50)),
#'   strata = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
#' )
#'
#' # With `rtables` pipelines.
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   test_proportion_diff(
#'     vars = "rsp",
#'     method = "cmh", variables = list(strata = "strata")
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
#' @order 2
test_proportion_diff <- function(lyt,
                                 vars,
                                 variables = list(strata = NULL),
                                 method = c("chisq", "schouten", "fisher", "cmh", "cmh_wh"),
                                 alternative = c("two.sided", "less", "greater"),
                                 var_labels = vars,
                                 na_str = default_na_str(),
                                 nested = TRUE,
                                 show_labels = "hidden",
                                 table_names = vars,
                                 section_div = NA_character_,
                                 ...,
                                 na_rm = TRUE,
                                 .stats = c("pval"),
                                 .stat_names = NULL,
                                 .formats = c(pval = "x.xxxx | (<0.0001)"),
                                 .labels = NULL,
                                 .indent_mods = c(pval = 1L)) {
  # Depending on main functions
  extra_args <- list(
    "na_rm" = na_rm,
    "variables" = variables,
    "method" = method,
    "alternative" = alternative,
    ...
  )

  # Needed defaults
  if (!is.null(.stats)) extra_args[[".stats"]] <- .stats
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_test_proportion_diff) <- c(
    formals(a_test_proportion_diff),
    extra_args[[".additional_fun_parameters"]]
  )

  # Main {rtables} structural call
  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = a_test_proportion_diff,
    na_str = na_str,
    inclNAs = !na_rm,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    table_names = table_names,
    section_div = section_div
  )
}

#' Helper functions to test proportion differences
#'
#' Helper functions to implement various tests on the difference between two proportions.
#'
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
#' @inheritParams argument_convention
#'
#' @return A p-value.
#'
#' @seealso [prop_diff_test()] for implementation of these helper functions.
#'
#' @name h_prop_diff_test
NULL

#' @describeIn h_prop_diff_test Performs Chi-Squared test. Internally calls [stats::prop.test()].
#'
#' @keywords internal
prop_chisq <- function(tbl, alternative = c("two.sided", "less", "greater")) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }
  stats::prop.test(tbl, correct = FALSE, alternative = alternative)$p.value
}

#' @describeIn h_prop_diff_test Performs stratified Cochran-Mantel-Haenszel test,
#'   using [stats::mantelhaen.test()] internally.
#'   Note that strata with less than two observations are automatically discarded.
#'
#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
#'   (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
#' @param transform (`string`)\cr either `none` or `wilson_hilferty`; specifies whether to apply
#'   the Wilson-Hilferty transformation of the chi-squared statistic.
#'
#' @keywords internal
prop_cmh <- function(ary,
                     alternative = c("two.sided", "less", "greater"),
                     transform = c("none", "wilson_hilferty")) {
  checkmate::assert_array(ary)
  checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
  checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
  alternative <- match.arg(alternative)
  transform <- match.arg(transform)

  strata_sizes <- apply(ary, MARGIN = 3, sum)
  if (any(strata_sizes < 5)) {
    warning("<5 data points in some strata. CMH test may be incorrect.")
    ary <- ary[, , strata_sizes > 1]
  }

  cmh_res <- stats::mantelhaen.test(ary, correct = FALSE, alternative = alternative)

  if (transform == "none") {
    cmh_res$p.value
  } else {
    chisq_stat <- unname(cmh_res$statistic)
    df <- unname(cmh_res$parameter)
    num <- (chisq_stat / df)^(1 / 3) - (1 - 2 / (9 * df))
    denom <- sqrt(2 / (9 * df))
    wh_stat <- num / denom

    if (alternative == "two.sided") {
      2 * stats::pnorm(-abs(wh_stat))
    } else {
      stats::pnorm(wh_stat, lower.tail = (alternative == "greater"))
    }
  }
}

#' @describeIn h_prop_diff_test Performs the Chi-Squared test with Schouten correction.
#'
#' @seealso Schouten correction is based upon \insertCite{Schouten1980-kd;textual}{tern}.
#'
#' @keywords internal
prop_schouten <- function(tbl, alternative = c("two.sided", "less", "greater")) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  alternative <- match.arg(alternative)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }

  n <- sum(tbl)
  n1 <- sum(tbl[1, ])
  n2 <- sum(tbl[2, ])

  ad <- diag(tbl)
  bc <- diag(apply(tbl, 2, rev))
  ac <- tbl[, 1]
  bd <- tbl[, 2]

  t_schouten <- (n - 1) *
    (abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 /
    (n1 * n2 * sum(ac) * sum(bd))

  if (alternative == "two.sided") {
    stats::pchisq(t_schouten, df = 1, lower.tail = FALSE)
  } else {
    # This follows the logic in stats::prop.test for one-sided p-values.
    x1 <- tbl[1, 1]
    x2 <- tbl[2, 1]
    delta <- (x1 / n1) - (x2 / n2)
    z <- sign(delta) * sqrt(t_schouten)
    stats::pnorm(z, lower.tail = (alternative == "less"))
  }
}

#' @describeIn h_prop_diff_test Performs the Fisher's exact test. Internally calls [stats::fisher.test()].
#'
#' @keywords internal
prop_fisher <- function(tbl, alternative = c("two.sided", "less", "greater")) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  alternative <- match.arg(alternative) # Is needed here, because stats::fisher.test does not handle defaults.
  tbl <- tbl[, c("TRUE", "FALSE")]
  stats::fisher.test(tbl, alternative = alternative)$p.value
}

#' Count patients with toxicity grades that have worsened from baseline by highest grade post-baseline
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_abnormal_lab_worsen_by_baseline()] creates a layout element to count patients with
#' analysis toxicity grades which have worsened from baseline, categorized by highest (worst) grade post-baseline.
#'
#' This function analyzes primary analysis variable `var` which indicates analysis toxicity grades. Additional
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to `USUBJID`),
#' a variable to indicate unique subject identifiers, `baseline_var` (defaults to `BTOXGR`), a variable to indicate
#' baseline toxicity grades, and `direction_var` (defaults to `GRADDIR`), a variable to indicate toxicity grade
#' directions of interest to include (e.g. `"H"` (high), `"L"` (low), or `"B"` (both)).
#'
#' For the direction(s) specified in `direction_var`, patient counts by worst grade for patients who have
#' worsened from baseline are calculated as follows:
#'   * `1` to `4`: The number of patients who have worsened from their baseline grades with worst
#'     grades 1-4, respectively.
#'   * `Any`: The total number of patients who have worsened from their baseline grades.
#'
#' Fractions are calculated by dividing the above counts by the number of patients who's analysis toxicity grades
#' have worsened from baseline toxicity grades during treatment.
#'
#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create a row
#' split on variable `direction_var`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `string`)\cr list of additional analysis variables including:
#'   * `id` (`string`)\cr subject variable name.
#'   * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable.
#'   * `direction_var` (`string`)\cr see `direction_var` for more details.
#' @param .stats (`character`)\cr statistics to select for the table.
#' @param table_names `r lifecycle::badge("deprecated")` this parameter has no effect.
#'
#'   Options are: ``r shQuote(get_stats("abnormal_lab_worsen_by_baseline"), type = "sh")``
#'
#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()] which are used within
#' [s_count_abnormal_lab_worsen_by_baseline()] to process input data.
#'
#' @name abnormal_lab_worsen_by_baseline
#' @order 1
NULL

#' @describeIn abnormal_lab_worsen_by_baseline Statistics function for patients whose worst post-baseline
#'   lab grades are worse than their baseline grades.
#'
#' @return
#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst
#'   post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades
#'   "1", "2", "3", "4" and "Any".
#'
#' @keywords internal
s_count_abnormal_lab_worsen_by_baseline <- function(df,
                                                    .var = "ATOXGR",
                                                    variables = list(
                                                      id = "USUBJID",
                                                      baseline_var = "BTOXGR",
                                                      direction_var = "GRADDR"
                                                    ),
                                                    ...) {
  checkmate::assert_string(.var)
  checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var"))
  checkmate::assert_string(variables$id)
  checkmate::assert_string(variables$baseline_var)
  checkmate::assert_string(variables$direction_var)
  assert_df_with_variables(df, c(aval = .var, variables[1:3]))
  assert_list_of_variables(variables)

  h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var)
}

#' @describeIn abnormal_lab_worsen_by_baseline Formatted analysis function which is used as `afun`
#'   in `count_abnormal_lab_worsen_by_baseline()`.
#'
#' @return
#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with
#'   formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal_lab_worsen_by_baseline <- function(df,
                                                    ...,
                                                    .stats = NULL,
                                                    .stat_names = NULL,
                                                    .formats = NULL,
                                                    .labels = NULL,
                                                    .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_abnormal_lab_worsen_by_baseline,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats(
    "abnormal_lab_worsen_by_baseline",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )
  levels_per_stats <- lapply(x_stats, names)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- x_stats[.stats] %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn abnormal_lab_worsen_by_baseline Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting
#'   functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted
#'   rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   add_colcounts() %>%
#'   split_rows_by("PARAMCD") %>%
#'   split_rows_by("GRADDR") %>%
#'   count_abnormal_lab_worsen_by_baseline(
#'     var = "ATOXGR",
#'     variables = list(
#'       id = "USUBJID",
#'       baseline_var = "BTOXGR",
#'       direction_var = "GRADDR"
#'     )
#'   ) %>%
#'   append_topleft("Direction of Abnormality") %>%
#'   build_table(df = df, alt_counts_df = tern_ex_adsl)
#'
#' @export
#' @order 2
count_abnormal_lab_worsen_by_baseline <- function(lyt,
                                                  var,
                                                  variables = list(
                                                    id = "USUBJID",
                                                    baseline_var = "BTOXGR",
                                                    direction_var = "GRADDR"
                                                  ),
                                                  na_str = default_na_str(),
                                                  nested = TRUE,
                                                  ...,
                                                  table_names = lifecycle::deprecated(),
                                                  .stats = "fraction",
                                                  .stat_names = NULL,
                                                  .formats = list(fraction = format_fraction),
                                                  .labels = NULL,
                                                  .indent_mods = NULL) {
  checkmate::assert_string(var)

  # Deprecated argument warning
  if (lifecycle::is_present(table_names)) {
    lifecycle::deprecate_warn(
      "0.9.8", "count_abnormal_lab_worsen_by_baseline(table_names)",
      details = "The argument has no effect on the output."
    )
  }

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(extra_args, "variables" = list(variables), ...)

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_count_abnormal_lab_worsen_by_baseline) <- c(
    formals(a_count_abnormal_lab_worsen_by_baseline), extra_args[[".additional_fun_parameters"]]
  )

  analyze(
    lyt = lyt,
    vars = var,
    afun = a_count_abnormal_lab_worsen_by_baseline,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = "hidden"
  )
}

#' Helper function to prepare ADLB with worst labs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to prepare a `df` for generate the patient count shift table.
#'
#' @param adlb (`data.frame`)\cr ADLB data frame.
#' @param worst_flag_low (named `vector`)\cr worst low post-baseline lab grade flag variable. See how this is
#'   implemented in the following examples.
#' @param worst_flag_high (named `vector`)\cr worst high post-baseline lab grade flag variable. See how this is
#'   implemented in the following examples.
#' @param direction_var (`string`)\cr name of the direction variable specifying the direction of the shift table of
#'   interest. Only lab records flagged by `L`, `H` or `B` are included in the shift table.
#'   * `L`: low direction only
#'   * `H`: high direction only
#'   * `B`: both low and high directions
#'
#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the
#'   worst labs specified according to `worst_flag_low` or `worst_flag_high` for the
#'   direction specified according to `direction_var`. For instance, for a lab that is
#'   needed for the low direction only, only records flagged by `worst_flag_low` are
#'   selected. For a lab that is needed for both low and high directions, the worst
#'   low records are selected for the low direction, and the worst high record are selected
#'   for the high direction.
#'
#' @seealso [abnormal_lab_worsen_by_baseline]
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' @export
h_adlb_worsen <- function(adlb,
                          worst_flag_low = NULL,
                          worst_flag_high = NULL,
                          direction_var) {
  checkmate::assert_string(direction_var)
  checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H"))
  assert_df_with_variables(adlb, list("Col" = direction_var))

  if (any(unique(adlb[[direction_var]]) == "H")) {
    assert_df_with_variables(adlb, list("High" = names(worst_flag_high)))
  }

  if (any(unique(adlb[[direction_var]]) == "L")) {
    assert_df_with_variables(adlb, list("Low" = names(worst_flag_low)))
  }

  if (any(unique(adlb[[direction_var]]) == "B")) {
    assert_df_with_variables(
      adlb,
      list(
        "Low" = names(worst_flag_low),
        "High" = names(worst_flag_high)
      )
    )
  }

  # extract patients with worst post-baseline lab, either low or high or both
  worst_flag <- c(worst_flag_low, worst_flag_high)
  col_names <- names(worst_flag)
  filter_values <- worst_flag
  temp <- Map(
    function(x, y) which(adlb[[x]] == y),
    col_names,
    filter_values
  )
  position_satisfy_filters <- Reduce(union, temp)

  # select variables of interest
  adlb_f <- adlb[position_satisfy_filters, ]

  # generate subsets for different directionality
  adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ]
  adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ]
  adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ]

  # for labs requiring both high and low, data is duplicated and will be stacked on top of each other
  adlb_f_b_h <- adlb_f_b
  adlb_f_b_l <- adlb_f_b

  # extract data with worst lab
  if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) {
    # change H to High, L to Low
    adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
    adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))

    # change, B to High and Low
    adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
    adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))

    adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
    adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]

    out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l)
  } else if (!is.null(worst_flag_high)) {
    adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
    adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))

    adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]

    out <- rbind(adlb_out_h, adlb_out_b_h)
  } else if (!is.null(worst_flag_low)) {
    adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
    adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))

    adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
    adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]

    out <- rbind(adlb_out_l, adlb_out_b_l)
  }

  # label
  formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE)

  out
}

#' Helper function to analyze patients for `s_count_abnormal_lab_worsen_by_baseline()`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to count the number of patients and the fraction of patients according to
#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`,
#' and the direction of interest specified in `direction_var`.
#'
#' @inheritParams argument_convention
#' @inheritParams h_adlb_worsen
#' @param baseline_var (`string`)\cr name of the baseline lab grade variable.
#'
#' @return The counts and fraction of patients
#'   whose worst post-baseline lab grades are worse than their baseline grades, for
#'   post-baseline worst grades "1", "2", "3", "4" and "Any".
#'
#' @seealso [abnormal_lab_worsen_by_baseline]
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' # `h_worsen_counter`
#' h_worsen_counter(
#'   df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"),
#'   id = "USUBJID",
#'   .var = "ATOXGR",
#'   baseline_var = "BTOXGR",
#'   direction_var = "GRADDR"
#' )
#'
#' @export
h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) {
  checkmate::assert_string(id)
  checkmate::assert_string(.var)
  checkmate::assert_string(baseline_var)
  checkmate::assert_scalar(unique(df[[direction_var]]))
  checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low"))
  assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var)))

  # remove post-baseline missing
  df <- df[df[[.var]] != "<Missing>", ]

  # obtain directionality
  direction <- unique(df[[direction_var]])

  if (direction == "Low") {
    grade <- -1:-4
    worst_grade <- -4
  } else if (direction == "High") {
    grade <- 1:4
    worst_grade <- 4
  }

  if (nrow(df) > 0) {
    by_grade <- lapply(grade, function(i) {
      # filter baseline values that is less than i or <Missing>
      df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ]
      # num: number of patients with post-baseline worst lab equal to i
      num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE]))
      # denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction
      denom <- length(unique(df_temp[[id]]))
      rm(df_temp)
      c(num = num, denom = denom)
    })
  } else {
    by_grade <- lapply(1, function(i) {
      c(num = 0, denom = 0)
    })
  }

  names(by_grade) <- as.character(seq_along(by_grade))

  # baseline grade less 4 or missing
  df_temp <- df[!df[[baseline_var]] %in% worst_grade, ]

  # denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction
  denom <- length(unique(df_temp[, id, drop = TRUE]))

  # condition 1: missing baseline and in the direction of abnormality
  con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade)
  df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ]

  # condition 2: if post-baseline values are present then post-baseline values must be worse than baseline
  if (direction == "Low") {
    con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]])))
  } else {
    con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]])))
  }

  # number of patients satisfy either conditions 1 or 2
  num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE]))

  list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom))))
}

#' Count the number of patients with particular flags
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_patients_with_flags()] creates a layout element to calculate counts of patients for
#' which user-specified flags are present.
#'
#' This function analyzes primary analysis variable `var` which indicates unique subject identifiers. Flags
#' variables to analyze are specified by the user via the `flag_variables` argument, and must either take value
#' `TRUE` (flag present) or `FALSE` (flag absent) for each record.
#'
#' If there are multiple records with the same flag present for a patient, only one occurrence is counted.
#'
#' @inheritParams argument_convention
#' @param flag_variables (`character`)\cr a vector specifying the names of `logical` variables from analysis dataset
#'   used for counting the number of unique identifiers.
#' @param flag_labels (`character`)\cr vector of labels to use for flag variables. If any labels are also specified via
#'   the `.labels` parameter, the `.labels` values will take precedence and replace these labels.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("count_patients_with_flags"), type = "sh")``
#'
#' @seealso [count_patients_with_event]
#'
#' @name count_patients_with_flags
#' @order 1
NULL

#' @describeIn count_patients_with_flags Statistics function which counts the number of patients for which
#'   a particular flag variable is `TRUE`.
#'
#' @inheritParams analyze_variables
#' @param .var (`string`)\cr name of the column that contains the unique identifier.
#'
#' @note If `flag_labels` is not specified, variables labels will be extracted from `df`. If variables are not
#'   labeled, variable names will be used instead. Alternatively, a named `vector` can be supplied to
#'   `flag_variables` such that within each name-value pair the name corresponds to the variable name and the value is
#'   the label to use for this variable.
#'
#' @return
#' * `s_count_patients_with_flags()` returns the count and the fraction of unique identifiers with each particular
#'   flag as a list of statistics `n`, `count`, `count_fraction`, and `n_blq`, with one element per flag.
#'
#' @examples
#' # `s_count_patients_with_flags()`
#'
#' s_count_patients_with_flags(
#'   adae,
#'   "SUBJID",
#'   flag_variables = c("fl1", "fl2", "fl3", "fl4"),
#'   denom = "N_col",
#'   .N_col = 1000
#' )
#'
#' @export
s_count_patients_with_flags <- function(df,
                                        .var,
                                        .N_col = ncol(df), # nolint
                                        .N_row = nrow(df), # nolint
                                        ...,
                                        flag_variables,
                                        flag_labels = NULL,
                                        denom = c("n", "N_col", "N_row")) {
  checkmate::assert_character(flag_variables)
  if (!is.null(flag_labels)) {
    checkmate::assert_character(flag_labels, len = length(flag_variables), any.missing = FALSE)
    flag_names <- flag_labels
  } else {
    if (is.null(names(flag_variables))) {
      flag_names <- formatters::var_labels(df[flag_variables], fill = TRUE)
    } else {
      flag_names <- unname(flag_variables)
      flag_variables <- names(flag_variables)
    }
  }
  checkmate::assert_subset(flag_variables, colnames(df))

  temp <- sapply(flag_variables, function(x) {
    tmp <- Map(function(y) which(df[[y]]), x)
    position_satisfy_flags <- Reduce(intersect, tmp)
    id_satisfy_flags <- as.character(unique(df[position_satisfy_flags, ][[.var]]))
    s_count_values(
      x = as.character(unique(df[[.var]])),
      values = id_satisfy_flags,
      denom = denom,
      .N_col = .N_col,
      .N_row = .N_row
    )
  })
  colnames(temp) <- flag_names
  temp <- data.frame(t(temp))
  result <- as.list(temp)
  if (length(flag_variables) == 1) {
    for (i in seq(3)) names(result[[i]]) <- flag_names[1]
  }
  result
}

#' @describeIn count_patients_with_flags Formatted analysis function which is used as `afun`
#'   in `count_patients_with_flags()`.
#'
#' @return
#' * `a_count_patients_with_flags()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_count_patients_with_flags(
#'   adae,
#'   .N_col = 10L,
#'   .N_row = 10L,
#'   .var = "USUBJID",
#'   flag_variables = c("fl1", "fl2", "fl3", "fl4")
#' )
#'
#' @export
a_count_patients_with_flags <- function(df,
                                        labelstr = "",
                                        ...,
                                        .stats = NULL,
                                        .stat_names = NULL,
                                        .formats = NULL,
                                        .labels = NULL,
                                        .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL
  flag_variables <- dots_extra_args[["flag_variables"]]
  flag_labels <- dots_extra_args[["flag_labels"]]

  if (is.null(names(flag_variables))) flag_variables <- formatters::var_labels(df, fill = TRUE)[flag_variables]
  if (is.null(flag_labels)) flag_labels <- flag_variables

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_patients_with_flags,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("count_patients_with_flags", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  levels_per_stats <- rep(list(names(flag_variables)), length(.stats)) %>% stats::setNames(.stats)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(
    .stats, .labels, levels_per_stats,
    tern_defaults = flag_labels %>% stats::setNames(names(flag_variables))
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- x_stats[.stats] %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn count_patients_with_flags Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_patients_with_flags()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_patients_with_flags()` to the table layout.
#'
#' @examples
#' # Add labelled flag variables to analysis dataset.
#' adae <- tern_ex_adae %>%
#'   dplyr::mutate(
#'     fl1 = TRUE %>% with_label("Total AEs"),
#'     fl2 = (TRTEMFL == "Y") %>%
#'       with_label("Total number of patients with at least one adverse event"),
#'     fl3 = (TRTEMFL == "Y" & AEOUT == "FATAL") %>%
#'       with_label("Total number of patients with fatal AEs"),
#'     fl4 = (TRTEMFL == "Y" & AEOUT == "FATAL" & AEREL == "Y") %>%
#'       with_label("Total number of patients with related fatal AEs")
#'   )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_patients_with_flags(
#'     "SUBJID",
#'     flag_variables = c("fl1", "fl2", "fl3", "fl4"),
#'     denom = "N_col"
#'   )
#'
#' build_table(lyt, adae, alt_counts_df = tern_ex_adsl)
#'
#' @export
#' @order 2
count_patients_with_flags <- function(lyt,
                                      var,
                                      flag_variables,
                                      flag_labels = NULL,
                                      var_labels = var,
                                      show_labels = "hidden",
                                      riskdiff = FALSE,
                                      na_str = default_na_str(),
                                      nested = TRUE,
                                      ...,
                                      table_names = paste0("tbl_flags_", var),
                                      .stats = "count_fraction",
                                      .stat_names = NULL,
                                      .formats = list(count_fraction = format_count_fraction_fixed_dp),
                                      .indent_mods = NULL,
                                      .labels = NULL) {
  checkmate::assert_flag(riskdiff)
  afun <- if (isFALSE(riskdiff)) a_count_patients_with_flags else afun_riskdiff

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    flag_variables = list(flag_variables), flag_labels = list(flag_labels),
    if (!isFALSE(riskdiff)) list(afun = list("s_count_patients_with_flags" = a_count_patients_with_flags)),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Count specific values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_values()] creates a layout element to calculate counts of specific values within a
#' variable of interest.
#'
#' This function analyzes one or more variables of interest supplied as a vector to `vars`. Values to
#' count for variable(s) in `vars` can be given as a vector via the `values` argument. One row of
#' counts will be generated for each variable.
#'
#' @inheritParams argument_convention
#' @param values (`character`)\cr specific values that should be counted.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("count_values"), type = "sh")``
#'
#' @note
#' * For `factor` variables, `s_count_values` checks whether `values` are all included in the levels of `x`
#'   and fails otherwise.
#' * For `count_values()`, variable labels are shown when there is more than one element in `vars`,
#'   otherwise they are hidden.
#'
#' @name count_values
#' @order 1
NULL

#' @describeIn count_values S3 generic function to count values.
#'
#' @inheritParams s_summary.logical
#'
#' @return
#' * `s_count_values()` returns output of [s_summary()] for specified values of a non-numeric variable.
#'
#' @export
s_count_values <- function(x,
                           values,
                           na.rm = TRUE, # nolint
                           denom = c("n", "N_col", "N_row"),
                           ...) {
  UseMethod("s_count_values", x)
}

#' @describeIn count_values Method for `character` class.
#'
#' @method s_count_values character
#'
#' @examples
#' # `s_count_values.character`
#' s_count_values(x = c("a", "b", "a"), values = "a")
#' s_count_values(x = c("a", "b", "a", NA, NA), values = "b", na.rm = FALSE)
#'
#' @export
s_count_values.character <- function(x,
                                     values = "Y",
                                     na.rm = TRUE, # nolint
                                     ...) {
  checkmate::assert_character(values)

  if (na.rm) {
    x <- x[!is.na(x)]
  }

  is_in_values <- x %in% values

  s_summary(is_in_values, na_rm = na.rm, ...)
}

#' @describeIn count_values Method for `factor` class. This makes an automatic
#'   conversion to `character` and then forwards to the method for characters.
#'
#' @method s_count_values factor
#'
#' @examples
#' # `s_count_values.factor`
#' s_count_values(x = factor(c("a", "b", "a")), values = "a")
#'
#' @export
s_count_values.factor <- function(x,
                                  values = "Y",
                                  ...) {
  s_count_values(as.character(x), values = as.character(values), ...)
}

#' @describeIn count_values Method for `logical` class.
#'
#' @method s_count_values logical
#'
#' @examples
#' # `s_count_values.logical`
#' s_count_values(x = c(TRUE, FALSE, TRUE))
#'
#' @export
s_count_values.logical <- function(x, values = TRUE, ...) {
  checkmate::assert_logical(values)
  s_count_values(as.character(x), values = as.character(values), ...)
}

#' @describeIn count_values Formatted analysis function which is used as `afun`
#'   in `count_values()`.
#'
#' @return
#' * `a_count_values()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # `a_count_values`
#' a_count_values(x = factor(c("a", "b", "a")), values = "a", .N_col = 10, .N_row = 10)
#'
#' @export
a_count_values <- function(x,
                           ...,
                           .stats = NULL,
                           .stat_names = NULL,
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Main statistic calculations
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_values,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      x = list(x),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("analyze_vars_counts", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(.stats, .labels)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  x_stats <- x_stats[.stats]

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn count_values Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_values()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_values()` to the table layout.
#'
#' @examples
#' # `count_values`
#' basic_table() %>%
#'   count_values("Species", values = "setosa") %>%
#'   build_table(iris)
#'
#' @export
#' @order 2
count_values <- function(lyt,
                         vars,
                         values,
                         na_str = default_na_str(),
                         na_rm = TRUE,
                         nested = TRUE,
                         ...,
                         table_names = vars,
                         .stats = "count_fraction",
                         .stat_names = NULL,
                         .formats = c(count_fraction = "xx (xx.xx%)", count = "xx"),
                         .labels = c(count_fraction = paste(values, collapse = ", ")),
                         .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    na_rm = na_rm, values = list(values),
    ...
  )

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_count_values) <- c(formals(a_count_values), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt,
    vars,
    afun = a_count_values,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
    table_names = table_names
  )
}

#' Missing data
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Substitute missing data with a string or factor level.
#'
#' @param x (`factor` or `character`)\cr values for which any missing values should be substituted.
#' @param label (`string`)\cr string that missing data should be replaced with.
#' @param drop_na (`flag`)\cr if `TRUE` and `x` is a factor, any levels
#'   that are only `label` will be dropped.
#'
#' @return `x` with any `NA` values substituted by `label`.
#'
#' @examples
#' explicit_na(c(NA, "a", "b"))
#' is.na(explicit_na(c(NA, "a", "b")))
#'
#' explicit_na(factor(c(NA, "a", "b")))
#' is.na(explicit_na(factor(c(NA, "a", "b"))))
#'
#' explicit_na(sas_na(c("a", "")))
#'
#' explicit_na(factor(levels = c(NA, "a")))
#' explicit_na(factor(levels = c(NA, "a")), drop_na = TRUE) # previous default
#'
#' @export
explicit_na <- function(x, label = default_na_str(), drop_na = default_drop_na()) {
  checkmate::assert_string(label, na.ok = TRUE)
  checkmate::assert_flag(drop_na)

  if (is.factor(x)) {
    x <- forcats::fct_na_value_to_level(x, label)
    if (drop_na) {
      x <- forcats::fct_drop(x, only = label)
    }
  } else if (is.character(x)) {
    x[is.na(x)] <- label
  } else {
    stop("only factors and character vectors allowed")
  }

  x
}
#' @describeIn explicit_na should `NA` values without a dedicated level be dropped?
#'
#' @return
#' * `tern_default_drop_na`: (`flag`)\cr default value for `drop_na` argument in `explicit_na()`.
#'
#' @export
default_drop_na <- function() {
  getOption("tern_default_drop_na", default = TRUE)
}

#' @describeIn explicit_na Setter for default `NA` value replacement string. Sets the
#'   option `"tern_default_drop_na"` within the R environment.
#'
#' @return
#' * `tern_default_drop_na` has no return value.
#'
#' @export
set_default_drop_na <- function(drop_na) {
  checkmate::assert_flag(drop_na, null.ok = TRUE)
  options("tern_default_drop_na" = drop_na)
}

#' Convert strings to `NA`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to
#' convert these values to `NA`s.
#'
#' @inheritParams explicit_na
#' @param empty (`flag`)\cr if `TRUE`, empty strings get replaced by `NA`.
#' @param whitespaces (`flag`)\cr if `TRUE`, strings made from only whitespaces get replaced with `NA`.
#'
#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of
#'   `empty` and `whitespaces`.
#'
#' @examples
#' sas_na(c("1", "", " ", "   ", "b"))
#' sas_na(factor(c("", " ", "b")))
#'
#' is.na(sas_na(c("1", "", " ", "   ", "b")))
#'
#' @export
sas_na <- function(x, empty = TRUE, whitespaces = TRUE) {
  checkmate::assert_flag(empty)
  checkmate::assert_flag(whitespaces)

  if (is.factor(x)) {
    empty_levels <- levels(x) == ""
    if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA

    ws_levels <- grepl("^\\s+$", levels(x))
    if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA

    x
  } else if (is.character(x)) {
    if (empty) x[x == ""] <- NA_character_

    if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_

    x
  } else {
    stop("only factors and character vectors allowed")
  }
}

#' Helper functions for multivariate logistic regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions used in calculations for logistic regression.
#'
#' @inheritParams argument_convention
#' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family.
#'   Limited functionality is also available for conditional logistic regression models fitted by
#'   [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()].
#' @param x (`character`)\cr a variable or interaction term in `fit_glm` (depending on the helper function used).
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' @name h_logistic_regression
NULL

#' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted
#'   model assuming only one interaction term.
#'
#' @return Vector of names of interaction variables.
#'
#' @export
h_get_interaction_vars <- function(fit_glm) {
  checkmate::assert_class(fit_glm, "glm")
  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  terms_order <- attr(stats::terms(fit_glm), "order")
  interaction_term <- terms_name[terms_order == 2]
  checkmate::assert_string(interaction_term)
  strsplit(interaction_term, split = ":")[[1]]
}

#' @describeIn h_logistic_regression Helper function to get the right coefficient name from the
#'   interaction variable names and the given levels. The main value here is that the order
#'   of first and second variable is checked in the `interaction_vars` input.
#'
#' @param interaction_vars (`character(2)`)\cr interaction variable names.
#' @param first_var_with_level (`character(2)`)\cr the first variable name with the interaction level.
#' @param second_var_with_level (`character(2)`)\cr the second variable name with the interaction level.
#'
#' @return Name of coefficient.
#'
#' @export
h_interaction_coef_name <- function(interaction_vars,
                                    first_var_with_level,
                                    second_var_with_level) {
  checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE)
  checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE)
  checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE)
  checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars)

  first_name <- paste(first_var_with_level, collapse = "")
  second_name <- paste(second_var_with_level, collapse = "")
  if (first_var_with_level[1] == interaction_vars[1]) {
    paste(first_name, second_name, sep = ":")
  } else if (second_var_with_level[1] == interaction_vars[1]) {
    paste(second_name, first_name, sep = ":")
  }
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   for the case when both the odds ratio and the interaction variable are categorical.
#'
#' @param odds_ratio_var (`string`)\cr the odds ratio variable.
#' @param interaction_var (`string`)\cr the interaction variable.
#'
#' @return Odds ratio.
#'
#' @export
h_or_cat_interaction <- function(odds_ratio_var,
                                 interaction_var,
                                 fit_glm,
                                 conf_level = 0.95) {
  interaction_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_string(odds_ratio_var)
  checkmate::assert_string(interaction_var)
  checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars)
  checkmate::assert_vector(interaction_vars, len = 2)

  xs_level <- fit_glm$xlevels
  xs_coef <- stats::coef(fit_glm)
  xs_vcov <- stats::vcov(fit_glm)
  y <- list()
  for (var_level in xs_level[[odds_ratio_var]][-1]) {
    x <- list()
    for (ref_level in xs_level[[interaction_var]]) {
      coef_names <- paste0(odds_ratio_var, var_level)
      if (ref_level != xs_level[[interaction_var]][1]) {
        interaction_coef_name <- h_interaction_coef_name(
          interaction_vars,
          c(odds_ratio_var, var_level),
          c(interaction_var, ref_level)
        )
        coef_names <- c(
          coef_names,
          interaction_coef_name
        )
      }
      if (length(coef_names) > 1) {
        ones <- t(c(1, 1))
        est <- as.numeric(ones %*% xs_coef[coef_names])
        se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones)))
      } else {
        est <- xs_coef[coef_names]
        se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
      }
      or <- exp(est)
      ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
      x[[ref_level]] <- list(or = or, ci = ci)
    }
    y[[var_level]] <- x
  }
  y
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   for the case when either the odds ratio or the interaction variable is continuous.
#'
#' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise
#'   the median is used.
#'
#' @return Odds ratio.
#'
#' @note We don't provide a function for the case when both variables are continuous because
#'   this does not arise in this table, as the treatment arm variable will always be involved
#'   and categorical.
#'
#' @export
h_or_cont_interaction <- function(odds_ratio_var,
                                  interaction_var,
                                  fit_glm,
                                  at = NULL,
                                  conf_level = 0.95) {
  interaction_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_string(odds_ratio_var)
  checkmate::assert_string(interaction_var)
  checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars)
  checkmate::assert_vector(interaction_vars, len = 2)
  checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE)
  xs_level <- fit_glm$xlevels
  xs_coef <- stats::coef(fit_glm)
  xs_vcov <- stats::vcov(fit_glm)
  xs_class <- attr(fit_glm$terms, "dataClasses")
  model_data <- fit_glm$model
  if (!is.null(at)) {
    checkmate::assert_set_equal(xs_class[interaction_var], "numeric")
  }
  y <- list()
  if (xs_class[interaction_var] == "numeric") {
    if (is.null(at)) {
      at <- ceiling(stats::median(model_data[[interaction_var]]))
    }

    for (var_level in xs_level[[odds_ratio_var]][-1]) {
      x <- list()
      for (increment in at) {
        coef_names <- paste0(odds_ratio_var, var_level)
        if (increment != 0) {
          interaction_coef_name <- h_interaction_coef_name(
            interaction_vars,
            c(odds_ratio_var, var_level),
            c(interaction_var, "")
          )
          coef_names <- c(
            coef_names,
            interaction_coef_name
          )
        }
        if (length(coef_names) > 1) {
          xvec <- t(c(1, increment))
          est <- as.numeric(xvec %*% xs_coef[coef_names])
          se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec)))
        } else {
          est <- xs_coef[coef_names]
          se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
        }
        or <- exp(est)
        ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
        x[[as.character(increment)]] <- list(or = or, ci = ci)
      }
      y[[var_level]] <- x
    }
  } else {
    checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric")
    checkmate::assert_set_equal(xs_class[interaction_var], "factor")
    for (var_level in xs_level[[interaction_var]]) {
      coef_names <- odds_ratio_var
      if (var_level != xs_level[[interaction_var]][1]) {
        interaction_coef_name <- h_interaction_coef_name(
          interaction_vars,
          c(odds_ratio_var, ""),
          c(interaction_var, var_level)
        )
        coef_names <- c(
          coef_names,
          interaction_coef_name
        )
      }
      if (length(coef_names) > 1) {
        xvec <- t(c(1, 1))
        est <- as.numeric(xvec %*% xs_coef[coef_names])
        se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec)))
      } else {
        est <- xs_coef[coef_names]
        se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
      }
      or <- exp(est)
      ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
      y[[var_level]] <- list(or = or, ci = ci)
    }
  }
  y
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and
#'   [h_or_cat_interaction()].
#'
#' @return Odds ratio.
#'
#' @export
h_or_interaction <- function(odds_ratio_var,
                             interaction_var,
                             fit_glm,
                             at = NULL,
                             conf_level = 0.95) {
  xs_class <- attr(fit_glm$terms, "dataClasses")
  if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) {
    h_or_cont_interaction(
      odds_ratio_var,
      interaction_var,
      fit_glm,
      at = at,
      conf_level = conf_level
    )
  } else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) {
    h_or_cat_interaction(
      odds_ratio_var,
      interaction_var,
      fit_glm,
      conf_level = conf_level
    )
  } else {
    stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor")
  }
}

#' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table
#'   of numbers of patients.
#'
#' @param terms (`character`)\cr simple terms.
#' @param table (`table`)\cr table containing numbers for terms.
#'
#' @return Term labels containing numbers of patients.
#'
#' @export
h_simple_term_labels <- function(terms,
                                 table) {
  checkmate::assert_true(is.table(table))
  checkmate::assert_multi_class(terms, classes = c("factor", "character"))
  terms <- as.character(terms)
  term_n <- table[terms]
  paste0(terms, ", n = ", term_n)
}

#' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table
#'   of numbers of patients.
#'
#' @param terms1 (`character`)\cr terms for first dimension (rows).
#' @param terms2 (`character`)\cr terms for second dimension (rows).
#' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the
#'   number of patients. In that case they can only be scalar (strings).
#'
#' @return Term labels containing numbers of patients.
#'
#' @export
h_interaction_term_labels <- function(terms1,
                                      terms2,
                                      table,
                                      any = FALSE) {
  checkmate::assert_true(is.table(table))
  checkmate::assert_flag(any)
  checkmate::assert_multi_class(terms1, classes = c("factor", "character"))
  checkmate::assert_multi_class(terms2, classes = c("factor", "character"))
  terms1 <- as.character(terms1)
  terms2 <- as.character(terms2)
  if (any) {
    checkmate::assert_scalar(terms1)
    checkmate::assert_scalar(terms2)
    paste0(
      terms1, " or ", terms2, ", n = ",
      # Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract.
      sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2]
    )
  } else {
    term_n <- table[cbind(terms1, terms2)]
    paste0(terms1, " * ", terms2, ", n = ", term_n)
  }
}

#' @describeIn h_logistic_regression Helper function to tabulate the main effect
#'   results of a (conditional) logistic regression model.
#'
#' @return Tabulated main effect results from a logistic regression model.
#'
#' @examples
#' h_glm_simple_term_extract("AGE", mod1)
#' h_glm_simple_term_extract("ARMCD", mod1)
#'
#' @export
h_glm_simple_term_extract <- function(x, fit_glm) {
  checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
  checkmate::assert_string(x)

  xs_class <- attr(fit_glm$terms, "dataClasses")
  xs_level <- fit_glm$xlevels
  xs_coef <- summary(fit_glm)$coefficients
  stats <- if (inherits(fit_glm, "glm")) {
    c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
  } else {
    c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)")
  }
  # Make sure x is not an interaction term.
  checkmate::assert_subset(x, names(xs_class))
  x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1])
  x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE)
  colnames(x_stats) <- names(stats)
  x_stats$estimate <- as.list(x_stats$estimate)
  x_stats$std_error <- as.list(x_stats$std_error)
  x_stats$pvalue <- as.list(x_stats$pvalue)
  x_stats$df <- as.list(1)
  if (xs_class[x] == "numeric") {
    x_stats$term <- x
    x_stats$term_label <- if (inherits(fit_glm, "glm")) {
      formatters::var_labels(fit_glm$data[x], fill = TRUE)
    } else {
      # We just fill in here with the `term` itself as we don't have the data available.
      x
    }
    x_stats$is_variable_summary <- FALSE
    x_stats$is_term_summary <- TRUE
  } else {
    checkmate::assert_class(fit_glm, "glm")
    # The reason is that we don't have the original data set in the `clogit` object
    # and therefore cannot determine the `x_numbers` here.
    x_numbers <- table(fit_glm$data[[x]])
    x_stats$term <- xs_level[[x]][-1]
    x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers)
    x_stats$is_variable_summary <- FALSE
    x_stats$is_term_summary <- TRUE
    main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald")
    x_main <- data.frame(
      pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE],
      term = xs_level[[x]][1],
      term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)),
      df = main_effects[x, "Df", drop = TRUE],
      stringsAsFactors = FALSE
    )
    x_main$pvalue <- as.list(x_main$pvalue)
    x_main$df <- as.list(x_main$df)
    x_main$estimate <- list(numeric(0))
    x_main$std_error <- list(numeric(0))
    if (length(xs_level[[x]][-1]) == 1) {
      x_main$pvalue <- list(numeric(0))
      x_main$df <- list(numeric(0))
    }
    x_main$is_variable_summary <- TRUE
    x_main$is_term_summary <- FALSE
    x_stats <- rbind(x_main, x_stats)
  }
  x_stats$variable <- x
  x_stats$variable_label <- if (inherits(fit_glm, "glm")) {
    formatters::var_labels(fit_glm$data[x], fill = TRUE)
  } else {
    x
  }
  x_stats$interaction <- ""
  x_stats$interaction_label <- ""
  x_stats$reference <- ""
  x_stats$reference_label <- ""
  rownames(x_stats) <- NULL
  x_stats[c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "is_variable_summary",
    "is_term_summary"
  )]
}

#' @describeIn h_logistic_regression Helper function to tabulate the interaction term
#'   results of a logistic regression model.
#'
#' @return Tabulated interaction term results from a logistic regression model.
#'
#' @examples
#' h_glm_interaction_extract("ARMCD:AGE", mod2)
#'
#' @export
h_glm_interaction_extract <- function(x, fit_glm) {
  vars <- h_get_interaction_vars(fit_glm)
  xs_class <- attr(fit_glm$terms, "dataClasses")

  checkmate::assert_string(x)

  # Only take two-way interaction
  checkmate::assert_vector(vars, len = 2)

  # Only consider simple case: first variable in interaction is arm, a categorical variable
  checkmate::assert_disjunct(xs_class[vars[1]], "numeric")

  xs_level <- fit_glm$xlevels
  xs_coef <- summary(fit_glm)$coefficients
  main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald")
  stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
  v1_comp <- xs_level[[vars[1]]][-1]
  if (xs_class[vars[2]] == "numeric") {
    x_stats <- as.data.frame(
      xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE],
      stringsAsFactors = FALSE
    )
    colnames(x_stats) <- names(stats)
    x_stats$term <- v1_comp
    x_numbers <- table(fit_glm$data[[vars[1]]])
    x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers)
    v1_ref <- xs_level[[vars[1]]][1]
    term_main <- v1_ref
    ref_label <- h_simple_term_labels(v1_ref, x_numbers)
  } else if (xs_class[vars[2]] != "numeric") {
    v2_comp <- xs_level[[vars[2]]][-1]
    v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp)
    x_sel <- paste(
      paste0(vars[1], v1_v2_grid$v1),
      paste0(vars[2], v1_v2_grid$v2),
      sep = ":"
    )
    x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE)
    colnames(x_stats) <- names(stats)
    x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2)
    x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]])
    x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers)
    v1_ref <- xs_level[[vars[1]]][1]
    v2_ref <- xs_level[[vars[2]]][1]
    term_main <- paste(vars[1], vars[2], sep = " * ")
    ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE)
  }
  x_stats$df <- as.list(1)
  x_stats$pvalue <- as.list(x_stats$pvalue)
  x_stats$is_variable_summary <- FALSE
  x_stats$is_term_summary <- TRUE
  x_main <- data.frame(
    pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE],
    term = term_main,
    term_label = paste("Reference", ref_label),
    df = main_effects[x, "Df", drop = TRUE],
    stringsAsFactors = FALSE
  )
  x_main$pvalue <- as.list(x_main$pvalue)
  x_main$df <- as.list(x_main$df)
  x_main$estimate <- list(numeric(0))
  x_main$std_error <- list(numeric(0))
  x_main$is_variable_summary <- TRUE
  x_main$is_term_summary <- FALSE

  x_stats <- rbind(x_main, x_stats)
  x_stats$variable <- x
  x_stats$variable_label <- paste(
    "Interaction of",
    formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE),
    "*",
    formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE)
  )
  x_stats$interaction <- ""
  x_stats$interaction_label <- ""
  x_stats$reference <- ""
  x_stats$reference_label <- ""
  rownames(x_stats) <- NULL
  x_stats[c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "is_variable_summary",
    "is_term_summary"
  )]
}

#' @describeIn h_logistic_regression Helper function to tabulate the interaction
#'   results of a logistic regression model. This basically is a wrapper for
#'   [h_or_interaction()] and [h_glm_simple_term_extract()] which puts the results
#'   in the right data frame format.
#'
#' @return A `data.frame` of tabulated interaction term results from a logistic regression model.
#'
#' @examples
#' h_glm_inter_term_extract("AGE", "ARMCD", mod2)
#'
#' @export
h_glm_inter_term_extract <- function(odds_ratio_var,
                                     interaction_var,
                                     fit_glm,
                                     ...) {
  # First obtain the main effects.
  main_stats <- h_glm_simple_term_extract(odds_ratio_var, fit_glm)
  main_stats$is_reference_summary <- FALSE
  main_stats$odds_ratio <- NA
  main_stats$lcl <- NA
  main_stats$ucl <- NA

  # Then we get the odds ratio estimates and put into df form.
  or_numbers <- h_or_interaction(odds_ratio_var, interaction_var, fit_glm, ...)
  is_num_or_var <- attr(fit_glm$terms, "dataClasses")[odds_ratio_var] == "numeric"

  if (is_num_or_var) {
    # Numeric OR variable case.
    references <- names(or_numbers)
    n_ref <- length(references)

    extract_from_list <- function(l, name, pos = 1) {
      unname(unlist(
        lapply(or_numbers, function(x) {
          x[[name]][pos]
        })
      ))
    }
    or_stats <- data.frame(
      variable = odds_ratio_var,
      variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      term = odds_ratio_var,
      term_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      interaction = interaction_var,
      interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)),
      reference = references,
      reference_label = references,
      estimate = NA,
      std_error = NA,
      odds_ratio = extract_from_list(or_numbers, "or"),
      lcl = extract_from_list(or_numbers, "ci", pos = "lcl"),
      ucl = extract_from_list(or_numbers, "ci", pos = "ucl"),
      df = NA,
      pvalue = NA,
      is_variable_summary = FALSE,
      is_term_summary = FALSE,
      is_reference_summary = TRUE
    )
  } else {
    # Categorical OR variable case.
    references <- names(or_numbers[[1]])
    n_ref <- length(references)

    extract_from_list <- function(l, name, pos = 1) {
      unname(unlist(
        lapply(or_numbers, function(x) {
          lapply(x, function(y) y[[name]][pos])
        })
      ))
    }
    or_stats <- data.frame(
      variable = odds_ratio_var,
      variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      term = rep(names(or_numbers), each = n_ref),
      term_label = h_simple_term_labels(rep(names(or_numbers), each = n_ref), table(fit_glm$data[[odds_ratio_var]])),
      interaction = interaction_var,
      interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)),
      reference = unlist(lapply(or_numbers, names)),
      reference_label = unlist(lapply(or_numbers, names)),
      estimate = NA,
      std_error = NA,
      odds_ratio = extract_from_list(or_numbers, "or"),
      lcl = extract_from_list(or_numbers, "ci", pos = "lcl"),
      ucl = extract_from_list(or_numbers, "ci", pos = "ucl"),
      df = NA,
      pvalue = NA,
      is_variable_summary = FALSE,
      is_term_summary = FALSE,
      is_reference_summary = TRUE
    )
  }

  df <- rbind(
    main_stats[, names(or_stats)],
    or_stats
  )
  df[order(-df$is_variable_summary, df$term, -df$is_term_summary, df$reference), ]
}

#' @describeIn h_logistic_regression Helper function to tabulate the results including
#'   odds ratios and confidence intervals of simple terms.
#'
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
#'
#' @examples
#' h_logistic_simple_terms("AGE", mod1)
#'
#' @export
h_logistic_simple_terms <- function(x, fit_glm, conf_level = 0.95) {
  checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
  if (inherits(fit_glm, "glm")) {
    checkmate::assert_set_equal(fit_glm$family$family, "binomial")
  }
  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  xs_class <- attr(fit_glm$terms, "dataClasses")
  interaction <- terms_name[which(!terms_name %in% names(xs_class))]
  checkmate::assert_subset(x, terms_name)
  if (length(interaction) != 0) {
    # Make sure any item in x is not part of interaction term
    checkmate::assert_disjunct(x, unlist(strsplit(interaction, ":")))
  }
  x_stats <- lapply(x, h_glm_simple_term_extract, fit_glm)
  x_stats <- do.call(rbind, x_stats)
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  x_stats$odds_ratio <- lapply(x_stats$estimate, exp)
  x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ci <- Map(function(lcl, ucl) c(lcl, ucl), lcl = x_stats$lcl, ucl = x_stats$ucl)
  x_stats
}

#' @describeIn h_logistic_regression Helper function to tabulate the results including
#'   odds ratios and confidence intervals of interaction terms.
#'
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
#'
#' @examples
#' h_logistic_inter_terms(c("RACE", "AGE", "ARMCD", "AGE:ARMCD"), mod2)
#'
#' @export
h_logistic_inter_terms <- function(x,
                                   fit_glm,
                                   conf_level = 0.95,
                                   at = NULL) {
  # Find out the interaction variables and interaction term.
  inter_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_vector(inter_vars, len = 2)


  inter_term_index <- intersect(grep(inter_vars[1], x), grep(inter_vars[2], x))
  inter_term <- x[inter_term_index]

  # For the non-interaction vars we need the standard stuff.
  normal_terms <- setdiff(x, union(inter_vars, inter_term))

  x_stats <- lapply(normal_terms, h_glm_simple_term_extract, fit_glm)
  x_stats <- do.call(rbind, x_stats)
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  x_stats$odds_ratio <- lapply(x_stats$estimate, exp)
  x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  normal_stats <- x_stats
  normal_stats$is_reference_summary <- FALSE

  # Now the interaction term itself.
  inter_term_stats <- h_glm_interaction_extract(inter_term, fit_glm)
  inter_term_stats$odds_ratio <- NA
  inter_term_stats$lcl <- NA
  inter_term_stats$ucl <- NA
  inter_term_stats$is_reference_summary <- FALSE

  is_intervar1_numeric <- attr(fit_glm$terms, "dataClasses")[inter_vars[1]] == "numeric"

  # Interaction stuff.
  inter_stats_one <- h_glm_inter_term_extract(
    inter_vars[1],
    inter_vars[2],
    fit_glm,
    conf_level = conf_level,
    at = `if`(is_intervar1_numeric, NULL, at)
  )
  inter_stats_two <- h_glm_inter_term_extract(
    inter_vars[2],
    inter_vars[1],
    fit_glm,
    conf_level = conf_level,
    at = `if`(is_intervar1_numeric, at, NULL)
  )

  # Now just combine everything in one data frame.
  col_names <- c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "odds_ratio",
    "lcl",
    "ucl",
    "is_variable_summary",
    "is_term_summary",
    "is_reference_summary"
  )
  df <- rbind(
    inter_stats_one[, col_names],
    inter_stats_two[, col_names],
    inter_term_stats[, col_names]
  )
  if (length(normal_terms) > 0) {
    df <- rbind(
      normal_stats[, col_names],
      df
    )
  }
  df$ci <- combine_vectors(df$lcl, df$ucl)
  df
}

#' Confidence intervals for a difference of binomials
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Several confidence intervals for the difference between proportions.
#'
#' @name desctools_binom
NULL

#' Recycle list of parameters
#'
#' This function recycles all supplied elements to the maximal dimension.
#'
#' @param ... (`any`)\cr elements to recycle.
#'
#' @return A `list`.
#'
#' @keywords internal
#' @noRd
h_recycle <- function(...) {
  lst <- list(...)
  maxdim <- max(lengths(lst))
  res <- lapply(lst, rep, length.out = maxdim)
  attr(res, "maxdim") <- maxdim
  return(res)
}

#' @describeIn desctools_binom Several confidence intervals for the difference between proportions.
#'
#' @return A `matrix` of 3 values:
#'   * `est`: estimate of proportion difference.
#'   * `lwr.ci`: estimate of lower end of the confidence interval.
#'   * `upr.ci`: estimate of upper end of the confidence interval.
#'
#' @keywords internal
desctools_binom <- function(x1,
                            n1,
                            x2,
                            n2,
                            conf.level = 0.95, # nolint
                            sides = c("two.sided", "left", "right"),
                            method = c(
                              "ac", "wald", "waldcc", "score", "scorecc", "mn", "mee", "blj", "ha", "hal", "jp"
                            )) {
  if (missing(sides)) {
    sides <- match.arg(sides)
  }
  if (missing(method)) {
    method <- match.arg(method)
  }
  iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, method) { # nolint
    if (sides != "two.sided") {
      conf.level <- 1 - 2 * (1 - conf.level) # nolint
    }
    alpha <- 1 - conf.level
    kappa <- stats::qnorm(1 - alpha / 2)
    p1_hat <- x1 / n1
    p2_hat <- x2 / n2
    est <- p1_hat - p2_hat
    switch(method,
      wald = {
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      waldcc = {
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        term2 <- term2 + 0.5 * (1 / n1 + 1 / n2)
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      ac = {
        n1 <- n1 + 2
        n2 <- n2 + 2
        x1 <- x1 + 1
        x2 <- x2 + 1
        p1_hat <- x1 / n1
        p2_hat <- x2 / n2
        est1 <- p1_hat - p2_hat
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        ci_lwr <- max(-1, est1 - term2)
        ci_upr <- min(1, est1 + term2)
      },
      exact = {
        ci_lwr <- NA
        ci_upr <- NA
      },
      score = {
        w1 <- desctools_binomci(
          x = x1, n = n1, conf.level = conf.level,
          method = "wilson"
        )
        w2 <- desctools_binomci(
          x = x2, n = n2, conf.level = conf.level,
          method = "wilson"
        )
        l1 <- w1[2]
        u1 <- w1[3]
        l2 <- w2[2]
        u2 <- w2[3]
        ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 + u2 * (1 - u2) / n2)
        ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 + l2 * (1 - l2) / n2)
      },
      scorecc = {
        w1 <- desctools_binomci(
          x = x1, n = n1, conf.level = conf.level,
          method = "wilsoncc"
        )
        w2 <- desctools_binomci(
          x = x2, n = n2, conf.level = conf.level,
          method = "wilsoncc"
        )
        l1 <- w1[2]
        u1 <- w1[3]
        l2 <- w2[2]
        u2 <- w2[3]
        ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 + (u2 - p2_hat)^2))
        ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat - l2)^2))
      },
      mee = {
        .score <- function(p1, n1, p2, n2, dif) {
          if (dif > 1) dif <- 1
          if (dif < -1) dif <- -1
          diff <- p1 - p2 - dif
          if (abs(diff) == 0) {
            res <- 0
          } else {
            t <- n2 / n1
            a <- 1 + t
            b <- -(1 + t + p1 + t * p2 + dif * (t + 2))
            c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2
            d <- -p1 * dif * (1 + dif)
            v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2
            if (abs(v) < .Machine$double.eps) v <- 0
            s <- sqrt((b / a / 3)^2 - c / a / 3)
            u <- ifelse(v > 0, 1, -1) * s
            w <- (3.141592654 + acos(v / u^3)) / 3
            p1d <- 2 * u * cos(w) - b / a / 3
            p2d <- p1d - dif
            n <- n1 + n2
            res <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2)
          }
          return(sqrt(res))
        }
        pval <- function(delta) {
          z <- (est - delta) / .score(p1_hat, n1, p2_hat, n2, delta)
          2 * min(stats::pnorm(z), 1 - stats::pnorm(z))
        }
        ci_lwr <- max(-1, stats::uniroot(function(delta) {
          pval(delta) - alpha
        }, interval = c(-1 + 1e-06, est - 1e-06))$root)
        ci_upr <- min(1, stats::uniroot(function(delta) {
          pval(delta) - alpha
        }, interval = c(est + 1e-06, 1 - 1e-06))$root)
      },
      blj = {
        p1_dash <- (x1 + 0.5) / (n1 + 1)
        p2_dash <- (x2 + 0.5) / (n2 + 1)
        vd <- p1_dash * (1 - p1_dash) / n1 + p2_dash * (1 - p2_dash) / n2
        term2 <- kappa * sqrt(vd)
        est_dash <- p1_dash - p2_dash
        ci_lwr <- max(-1, est_dash - term2)
        ci_upr <- min(1, est_dash + term2)
      },
      ha = {
        term2 <- 1 /
          (2 * min(n1, n2)) + kappa * sqrt(p1_hat * (1 - p1_hat) / (n1 - 1) + p2_hat * (1 - p2_hat) / (n2 - 1))
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      mn = {
        .conf <- function(x1, n1, x2, n2, z, lower = FALSE) {
          p1 <- x1 / n1
          p2 <- x2 / n2
          p_hat <- p1 - p2
          dp <- 1 + ifelse(lower, 1, -1) * p_hat
          i <- 1
          while (i <= 50) {
            dp <- 0.5 * dp
            y <- p_hat + ifelse(lower, -1, 1) * dp
            score <- .score(p1, n1, p2, n2, y)
            if (score < z) {
              p_hat <- y
            }
            if ((dp < 1e-07) || (abs(z - score) < 1e-06)) {
              (break)()
            } else {
              i <- i + 1
            }
          }
          return(y)
        }
        .score <- function(p1, n1, p2, n2, dif) {
          diff <- p1 - p2 - dif
          if (abs(diff) == 0) {
            res <- 0
          } else {
            t <- n2 / n1
            a <- 1 + t
            b <- -(1 + t + p1 + t * p2 + dif * (t + 2))
            c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2
            d <- -p1 * dif * (1 + dif)
            v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2
            s <- sqrt((b / a / 3)^2 - c / a / 3)
            u <- ifelse(v > 0, 1, -1) * s
            w <- (3.141592654 + acos(v / u^3)) / 3
            p1d <- 2 * u * cos(w) - b / a / 3
            p2d <- p1d - dif
            n <- n1 + n2
            var <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) * n / (n - 1)
            res <- diff^2 / var
          }
          return(res)
        }
        z <- stats::qchisq(conf.level, 1)
        ci_lwr <- max(-1, .conf(x1, n1, x2, n2, z, TRUE))
        ci_upr <- min(1, .conf(x1, n1, x2, n2, z, FALSE))
      },
      beal = {
        a <- p1_hat + p2_hat
        b <- p1_hat - p2_hat
        u <- ((1 / n1) + (1 / n2)) / 4
        v <- ((1 / n1) - (1 / n2)) / 4
        V <- u * ((2 - a) * a - b^2) + 2 * v * (1 - a) * b # nolint
        z <- stats::qchisq(p = 1 - alpha / 2, df = 1)
        A <- sqrt(z * (V + z * u^2 * (2 - a) * a + z * v^2 * (1 - a)^2)) # nolint
        B <- (b + z * v * (1 - a)) / (1 + z * u) # nolint
        ci_lwr <- max(-1, B - A / (1 + z * u))
        ci_upr <- min(1, B + A / (1 + z * u))
      },
      hal = {
        psi <- (p1_hat + p2_hat) / 2
        u <- (1 / n1 + 1 / n2) / 4
        v <- (1 / n1 - 1 / n2) / 4
        z <- kappa
        theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u)
        w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) *
          (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint
        c(theta + w, theta - w)
        ci_lwr <- max(-1, theta - w)
        ci_upr <- min(1, theta + w)
      },
      jp = {
        psi <- 0.5 * ((x1 + 0.5) / (n1 + 1) + (x2 + 0.5) / (n2 + 1))
        u <- (1 / n1 + 1 / n2) / 4
        v <- (1 / n1 - 1 / n2) / 4
        z <- kappa
        theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u)
        w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) *
          (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint
        c(theta + w, theta - w)
        ci_lwr <- max(-1, theta - w)
        ci_upr <- min(1, theta + w)
      },
    )
    ci <- c(
      est = est, lwr.ci = min(ci_lwr, ci_upr),
      upr.ci = max(ci_lwr, ci_upr)
    )
    if (sides == "left") {
      ci[3] <- 1
    } else if (sides == "right") {
      ci[2] <- -1
    }
    return(ci)
  }
  method <- match.arg(arg = method, several.ok = TRUE)
  sides <- match.arg(arg = sides, several.ok = TRUE)
  lst <- h_recycle(
    x1 = x1, n1 = n1, x2 = x2, n2 = n2, conf.level = conf.level,
    sides = sides, method = method
  )
  res <- t(sapply(1:attr(lst, "maxdim"), function(i) {
    iBinomDiffCI(
      x1 = lst$x1[i],
      n1 = lst$n1[i], x2 = lst$x2[i], n2 = lst$n2[i], conf.level = lst$conf.level[i],
      sides = lst$sides[i], method = lst$method[i]
    )
  }))
  lgn <- h_recycle(x1 = if (is.null(names(x1))) {
    paste("x1", seq_along(x1), sep = ".")
  } else {
    names(x1)
  }, n1 = if (is.null(names(n1))) {
    paste("n1", seq_along(n1), sep = ".")
  } else {
    names(n1)
  }, x2 = if (is.null(names(x2))) {
    paste("x2", seq_along(x2), sep = ".")
  } else {
    names(x2)
  }, n2 = if (is.null(names(n2))) {
    paste("n2", seq_along(n2), sep = ".")
  } else {
    names(n2)
  }, conf.level = conf.level, sides = sides, method = method)
  xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
    length(unique(x)) !=
      1
  })]), 1, paste, collapse = ":")
  rownames(res) <- xn
  return(res)
}

#' @describeIn desctools_binom Compute confidence intervals for binomial proportions.
#'
#' @param x (`integer(1)`)\cr number of successes.
#' @param n (`integer(1)`)\cr number of trials.
#' @param conf.level (`proportion`)\cr confidence level, defaults to 0.95.
#' @param sides (`string`)\cr side of the confidence interval to compute. Must be one of `"two-sided"` (default),
#'   `"left"`, or `"right"`.
#' @param method (`string`)\cr method to use. Can be one out of: `"wald"`, `"wilson"`, `"wilsoncc"`,
#' `"agresti-coull"`, `"jeffreys"`, `"modified wilson"`, `"modified jeffreys"`, `"clopper-pearson"`, `"arcsine"`,
#' `"logit"`, `"witting"`, `"pratt"`, `"midp"`, `"lik"`, and `"blaker"`.
#'
#' @return A `matrix` with 3 columns containing:
#'   * `est`: estimate of proportion difference.
#'   * `lwr.ci`: lower end of the confidence interval.
#'   * `upr.ci`: upper end of the confidence interval.
#'
#' @keywords internal
desctools_binomci <- function(x,
                              n,
                              conf.level = 0.95, # nolint
                              sides = c("two.sided", "left", "right"),
                              method = c(
                                "wilson", "wald", "waldcc", "agresti-coull",
                                "jeffreys", "modified wilson", "wilsoncc", "modified jeffreys",
                                "clopper-pearson", "arcsine", "logit", "witting", "pratt",
                                "midp", "lik", "blaker"
                              ),
                              rand = 123,
                              tol = 1e-05) {
  if (missing(method)) {
    method <- "wilson"
  }
  if (missing(sides)) {
    sides <- "two.sided"
  }
  iBinomCI <- function(x, n, conf.level = 0.95, sides = c("two.sided", "left", "right"), # nolint
                       method = c(
                         "wilson", "wilsoncc", "wald",
                         "waldcc", "agresti-coull", "jeffreys", "modified wilson",
                         "modified jeffreys", "clopper-pearson", "arcsine", "logit",
                         "witting", "pratt", "midp", "lik", "blaker"
                       ),
                       rand = 123,
                       tol = 1e-05) {
    if (length(x) != 1) {
      stop("'x' has to be of length 1 (number of successes)")
    }
    if (length(n) != 1) {
      stop("'n' has to be of length 1 (number of trials)")
    }
    if (length(conf.level) != 1) {
      stop("'conf.level' has to be of length 1 (confidence level)")
    }
    if (conf.level < 0.5 || conf.level > 1) {
      stop("'conf.level' has to be in [0.5, 1]")
    }
    sides <- match.arg(sides, choices = c(
      "two.sided", "left",
      "right"
    ), several.ok = FALSE)
    if (sides != "two.sided") {
      conf.level <- 1 - 2 * (1 - conf.level) # nolint
    }
    alpha <- 1 - conf.level
    kappa <- stats::qnorm(1 - alpha / 2)
    p_hat <- x / n
    q_hat <- 1 - p_hat
    est <- p_hat
    switch(match.arg(arg = method, choices = c(
      "wilson",
      "wald", "waldcc", "wilsoncc", "agresti-coull", "jeffreys",
      "modified wilson", "modified jeffreys", "clopper-pearson",
      "arcsine", "logit", "witting", "pratt", "midp", "lik",
      "blaker"
    )),
    wald = {
      term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n)
      ci_lwr <- max(0, p_hat - term2)
      ci_upr <- min(1, p_hat + term2)
    },
    waldcc = {
      term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n)
      term2 <- term2 + 1 / (2 * n)
      ci_lwr <- max(0, p_hat - term2)
      ci_upr <- min(1, p_hat + term2)
    },
    wilson = {
      term1 <- (x + kappa^2 / 2) / (n + kappa^2)
      term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n))
      ci_lwr <- max(0, term1 - term2)
      ci_upr <- min(1, term1 + term2)
    },
    wilsoncc = {
      lci <- (
        2 * x + kappa^2 - 1 - kappa * sqrt(kappa^2 - 2 - 1 / n + 4 * p_hat * (n * q_hat + 1))
      ) / (2 * (n + kappa^2))
      uci <- (
        2 * x + kappa^2 + 1 + kappa * sqrt(kappa^2 + 2 - 1 / n + 4 * p_hat * (n * q_hat - 1))
      ) / (2 * (n + kappa^2))
      ci_lwr <- max(0, ifelse(p_hat == 0, 0, lci))
      ci_upr <- min(1, ifelse(p_hat == 1, 1, uci))
    },
    `agresti-coull` = {
      x_tilde <- x + kappa^2 / 2
      n_tilde <- n + kappa^2
      p_tilde <- x_tilde / n_tilde
      q_tilde <- 1 - p_tilde
      est <- p_tilde
      term2 <- kappa * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
      ci_lwr <- max(0, p_tilde - term2)
      ci_upr <- min(1, p_tilde + term2)
    },
    jeffreys = {
      if (x == 0) {
        ci_lwr <- 0
      } else {
        ci_lwr <- stats::qbeta(
          alpha / 2,
          x + 0.5, n - x + 0.5
        )
      }
      if (x == n) {
        ci_upr <- 1
      } else {
        ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5)
      }
    },
    `modified wilson` = {
      term1 <- (x + kappa^2 / 2) / (n + kappa^2)
      term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n))
      if ((n <= 50 & x %in% c(1, 2)) | (n >= 51 & x %in% c(1:3))) {
        ci_lwr <- 0.5 * stats::qchisq(alpha, 2 * x) / n
      } else {
        ci_lwr <- max(0, term1 - term2)
      }
      if ((n <= 50 & x %in% c(n - 1, n - 2)) | (n >= 51 & x %in% c(n - (1:3)))) {
        ci_upr <- 1 - 0.5 * stats::qchisq(
          alpha,
          2 * (n - x)
        ) / n
      } else {
        ci_upr <- min(1, term1 + term2)
      }
    },
    `modified jeffreys` = {
      if (x == n) {
        ci_lwr <- (alpha / 2)^(1 / n)
      } else {
        if (x <= 1) {
          ci_lwr <- 0
        } else {
          ci_lwr <- stats::qbeta(
            alpha / 2,
            x + 0.5, n - x + 0.5
          )
        }
      }
      if (x == 0) {
        ci_upr <- 1 - (alpha / 2)^(1 / n)
      } else {
        if (x >= n - 1) {
          ci_upr <- 1
        } else {
          ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5)
        }
      }
    },
    `clopper-pearson` = {
      ci_lwr <- stats::qbeta(alpha / 2, x, n - x + 1)
      ci_upr <- stats::qbeta(1 - alpha / 2, x + 1, n - x)
    },
    arcsine = {
      p_tilde <- (x + 0.375) / (n + 0.75)
      est <- p_tilde
      ci_lwr <- sin(asin(sqrt(p_tilde)) - 0.5 * kappa / sqrt(n))^2
      ci_upr <- sin(asin(sqrt(p_tilde)) + 0.5 * kappa / sqrt(n))^2
    },
    logit = {
      lambda_hat <- log(x / (n - x))
      V_hat <- n / (x * (n - x)) # nolint
      lambda_lower <- lambda_hat - kappa * sqrt(V_hat)
      lambda_upper <- lambda_hat + kappa * sqrt(V_hat)
      ci_lwr <- exp(lambda_lower) / (1 + exp(lambda_lower))
      ci_upr <- exp(lambda_upper) / (1 + exp(lambda_upper))
    },
    witting = {
      set.seed(rand)
      x_tilde <- x + stats::runif(1, min = 0, max = 1)
      pbinom_abscont <- function(q, size, prob) {
        v <- trunc(q)
        term1 <- stats::pbinom(v - 1, size = size, prob = prob)
        term2 <- (q - v) * stats::dbinom(v, size = size, prob = prob)
        return(term1 + term2)
      }
      qbinom_abscont <- function(p, size, x) {
        fun <- function(prob, size, x, p) {
          pbinom_abscont(x, size, prob) - p
        }
        stats::uniroot(fun,
          interval = c(0, 1), size = size,
          x = x, p = p
        )$root
      }
      ci_lwr <- qbinom_abscont(1 - alpha, size = n, x = x_tilde)
      ci_upr <- qbinom_abscont(alpha, size = n, x = x_tilde)
    },
    pratt = {
      if (x == 0) {
        ci_lwr <- 0
        ci_upr <- 1 - alpha^(1 / n)
      } else if (x == 1) {
        ci_lwr <- 1 - (1 - alpha / 2)^(1 / n)
        ci_upr <- 1 - (alpha / 2)^(1 / n)
      } else if (x == (n - 1)) {
        ci_lwr <- (alpha / 2)^(1 / n)
        ci_upr <- (1 - alpha / 2)^(1 / n)
      } else if (x == n) {
        ci_lwr <- alpha^(1 / n)
        ci_upr <- 1
      } else {
        z <- stats::qnorm(1 - alpha / 2)
        A <- ((x + 1) / (n - x))^2 # nolint
        B <- 81 * (x + 1) * (n - x) - 9 * n - 8 # nolint
        C <- (0 - 3) * z * sqrt(9 * (x + 1) * (n - x) * (9 * n + 5 - z^2) + n + 1) # nolint
        D <- 81 * (x + 1)^2 - 9 * (x + 1) * (2 + z^2) + 1 # nolint
        E <- 1 + A * ((B + C) / D)^3 # nolint
        ci_upr <- 1 / E
        A <- (x / (n - x - 1))^2 # nolint
        B <- 81 * x * (n - x - 1) - 9 * n - 8 # nolint
        C <- 3 * z * sqrt(9 * x * (n - x - 1) * (9 * n + 5 - z^2) + n + 1) # nolint
        D <- 81 * x^2 - 9 * x * (2 + z^2) + 1 # nolint
        E <- 1 + A * ((B + C) / D)^3 # nolint
        ci_lwr <- 1 / E
      }
    },
    midp = {
      f_low <- function(pi, x, n) {
        1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x,
          size = n, prob = pi, lower.tail = FALSE
        ) -
          (1 - conf.level) / 2
      }
      f_up <- function(pi, x, n) {
        1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x - 1, size = n, prob = pi) - (1 - conf.level) / 2
      }
      ci_lwr <- 0
      ci_upr <- 1
      if (x != 0) {
        ci_lwr <- stats::uniroot(f_low,
          interval = c(0, p_hat),
          x = x, n = n
        )$root
      }
      if (x != n) {
        ci_upr <- stats::uniroot(f_up, interval = c(
          p_hat,
          1
        ), x = x, n = n)$root
      }
    },
    lik = {
      ci_lwr <- 0
      ci_upr <- 1
      z <- stats::qnorm(1 - alpha * 0.5)
      tol <- .Machine$double.eps^0.5
      BinDev <- function(y, x, mu, wt, bound = 0, tol = .Machine$double.eps^0.5, # nolint
                         ...) {
        ll_y <- ifelse(y %in% c(0, 1), 0, stats::dbinom(x, wt,
          y,
          log = TRUE
        ))
        ll_mu <- ifelse(mu %in% c(0, 1), 0, stats::dbinom(x,
          wt, mu,
          log = TRUE
        ))
        res <- ifelse(abs(y - mu) < tol, 0, sign(y - mu) * sqrt(-2 * (ll_y - ll_mu)))
        return(res - bound)
      }
      if (x != 0 && tol < p_hat) {
        ci_lwr <- if (BinDev(
          tol, x, p_hat, n, -z,
          tol
        ) <= 0) {
          stats::uniroot(
            f = BinDev, interval = c(tol, if (p_hat < tol || p_hat == 1) {
              1 - tol
            } else {
              p_hat
            }), bound = -z,
            x = x, mu = p_hat, wt = n
          )$root
        }
      }
      if (x != n && p_hat < (1 - tol)) {
        ci_upr <- if (
          BinDev(y = 1 - tol, x = x, mu = ifelse(p_hat > 1 - tol, tol, p_hat), wt = n, bound = z, tol = tol) < 0) { # nolint
          ci_lwr <- if (BinDev(
            tol, x, if (p_hat < tol || p_hat == 1) {
              1 - tol
            } else {
              p_hat
            }, n,
            -z, tol
          ) <= 0) {
            stats::uniroot(
              f = BinDev, interval = c(tol, p_hat),
              bound = -z, x = x, mu = p_hat, wt = n
            )$root
          }
        } else {
          stats::uniroot(
            f = BinDev, interval = c(if (p_hat > 1 - tol) {
              tol
            } else {
              p_hat
            }, 1 - tol), bound = z,
            x = x, mu = p_hat, wt = n
          )$root
        }
      }
    },
    blaker = {
      acceptbin <- function(x, n, p) {
        p1 <- 1 - stats::pbinom(x - 1, n, p)
        p2 <- stats::pbinom(x, n, p)
        a1 <- p1 + stats::pbinom(stats::qbinom(p1, n, p) - 1, n, p)
        a2 <- p2 + 1 - stats::pbinom(
          stats::qbinom(1 - p2, n, p), n,
          p
        )
        return(min(a1, a2))
      }
      ci_lwr <- 0
      ci_upr <- 1
      if (x != 0) {
        ci_lwr <- stats::qbeta((1 - conf.level) / 2, x, n - x + 1)
        while (acceptbin(x, n, ci_lwr + tol) < (1 - conf.level)) {
          ci_lwr <- ci_lwr + tol
        }
      }
      if (x != n) {
        ci_upr <- stats::qbeta(1 - (1 - conf.level) / 2, x + 1, n - x)
        while (acceptbin(x, n, ci_upr - tol) < (1 - conf.level)) {
          ci_upr <- ci_upr - tol
        }
      }
    }
    )
    ci <- c(est = est, lwr.ci = max(0, ci_lwr), upr.ci = min(
      1,
      ci_upr
    ))
    if (sides == "left") {
      ci[3] <- 1
    } else if (sides == "right") {
      ci[2] <- 0
    }
    return(ci)
  }
  lst <- list(
    x = x, n = n, conf.level = conf.level, sides = sides,
    method = method, rand = rand
  )
  maxdim <- max(unlist(lapply(lst, length)))
  lgp <- lapply(lst, rep, length.out = maxdim)
  lgn <- h_recycle(x = if (is.null(names(x))) {
    paste("x", seq_along(x), sep = ".")
  } else {
    names(x)
  }, n = if (is.null(names(n))) {
    paste("n", seq_along(n), sep = ".")
  } else {
    names(n)
  }, conf.level = conf.level, sides = sides, method = method)
  xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
    length(unique(x)) !=
      1
  })]), 1, paste, collapse = ":")
  res <- t(sapply(1:maxdim, function(i) {
    iBinomCI(
      x = lgp$x[i],
      n = lgp$n[i], conf.level = lgp$conf.level[i], sides = lgp$sides[i],
      method = lgp$method[i], rand = lgp$rand[i]
    )
  }))
  colnames(res)[1] <- c("est")
  rownames(res) <- xn
  return(res)
}

#' Additional assertions to use with `checkmate`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Additional assertion functions which can be used together with the `checkmate` package.
#'
#' @inheritParams checkmate::assert_factor
#' @param x (`any`)\cr object to test.
#' @param df (`data.frame`)\cr data set to test.
#' @param variables (named `list` of `character`)\cr list of variables to test.
#' @param include_boundaries (`flag`)\cr whether to include boundaries when testing
#'   for proportions.
#' @param na_level (`string`)\cr the string you have been using to represent NA or
#'   missing data. For `NA` values please consider using directly [is.na()] or
#'   similar approaches.
#'
#' @return Nothing if assertion passes, otherwise prints the error message.
#'
#' @name assertions
NULL

check_list_of_variables <- function(x) {
  # drop NULL elements in list
  x <- Filter(Negate(is.null), x)

  res <- checkmate::check_list(x,
    names = "named",
    min.len = 1,
    any.missing = FALSE,
    types = "character"
  )
  # no empty strings allowed
  if (isTRUE(res)) {
    res <- checkmate::check_character(unlist(x), min.chars = 1)
  }
  res
}
#' @describeIn assertions Checks whether `x` is a valid list of variable names.
#'   `NULL` elements of the list `x` are dropped with `Filter(Negate(is.null), x)`.
#'
#' @keywords internal
assert_list_of_variables <- checkmate::makeAssertionFunction(check_list_of_variables)

check_df_with_variables <- function(df, variables, na_level = NULL) {
  checkmate::assert_data_frame(df)
  assert_list_of_variables(variables)

  # flag for equal variables and column names
  err_flag <- all(unlist(variables) %in% colnames(df))
  checkmate::assert_flag(err_flag)

  if (isFALSE(err_flag)) {
    vars <- setdiff(unlist(variables), colnames(df))
    return(paste(
      deparse(substitute(df)),
      "does not contain all specified variables as column names. Missing from data frame:",
      paste(vars, collapse = ", ")
    ))
  }
  # checking if na_level is present and in which column
  if (!is.null(na_level)) {
    checkmate::assert_string(na_level)
    res <- unlist(lapply(as.list(df)[unlist(variables)], function(x) any(x == na_level)))
    if (any(res)) {
      return(paste0(
        deparse(substitute(df)), " contains explicit na_level (", na_level,
        ") in the following columns: ", paste0(unlist(variables)[res],
          collapse = ", "
        )
      ))
    }
  }
  return(TRUE)
}
#' @describeIn assertions Check whether `df` is a data frame with the analysis `variables`.
#'   Please notice how this produces an error when not all variables are present in the
#'   data.frame while the opposite is not required.
#'
#' @examples
#' x <- data.frame(
#'   a = 1:10,
#'   b = rnorm(10)
#' )
#' assert_df_with_variables(x, variables = list(a = "a", b = "b"))
#'
#' x <- ex_adsl
#' assert_df_with_variables(x, list(a = "ARM", b = "USUBJID"))
#'
#' @export
assert_df_with_variables <- checkmate::makeAssertionFunction(check_df_with_variables)

check_valid_factor <- function(x,
                               min.levels = 1, # nolint
                               max.levels = NULL, # nolint
                               null.ok = TRUE, # nolint
                               any.missing = TRUE, # nolint
                               n.levels = NULL, # nolint
                               len = NULL) {
  # checks on levels insertion
  checkmate::assert_int(min.levels, lower = 1)

  # main factor check
  res <- checkmate::check_factor(x,
    min.levels = min.levels,
    null.ok = null.ok,
    max.levels = max.levels,
    any.missing = any.missing,
    n.levels = n.levels
  )

  # no empty strings allowed
  if (isTRUE(res)) {
    res <- checkmate::check_character(levels(x), min.chars = 1)
  }

  return(res)
}
#' @describeIn assertions Check whether `x` is a valid factor (i.e. has levels and no empty
#'   string levels). Note that `NULL` and `NA` elements are allowed.
#'
#' @keywords internal
assert_valid_factor <- checkmate::makeAssertionFunction(check_valid_factor)

check_df_with_factors <- function(df,
                                  variables,
                                  min.levels = 1, # nolint
                                  max.levels = NULL, # nolint
                                  any.missing = TRUE, # nolint
                                  na_level = NULL) {
  res <- check_df_with_variables(df, variables, na_level)
  # checking if all the columns specified by variables are valid factors
  if (isTRUE(res)) {
    # searching the data.frame with selected columns (variables) as a list
    res <- lapply(
      X = as.list(df)[unlist(variables)],
      FUN = check_valid_factor,
      min.levels = min.levels,
      max.levels = max.levels,
      any.missing = any.missing
    )
    res_lo <- unlist(vapply(res, Negate(isTRUE), logical(1)))
    if (any(res_lo)) {
      return(paste0(
        deparse(substitute(df)), " does not contain only factor variables among:",
        "\n* Column `", paste0(unlist(variables)[res_lo],
          "` of the data.frame -> ", res[res_lo],
          collapse = "\n* "
        )
      ))
    } else {
      res <- TRUE
    }
  }
  return(res)
}

#' @describeIn assertions Check whether `df` is a data frame where the analysis `variables`
#'   are all factors. Note that the creation of `NA` by direct call of `factor()` will
#'   trim `NA` levels out of the vector list itself.
#'
#' @examples
#' x <- ex_adsl
#' assert_df_with_factors(x, list(a = "ARM"))
#'
#' @export
assert_df_with_factors <- checkmate::makeAssertionFunction(check_df_with_factors)

#' @describeIn assertions Check whether `x` is a proportion: number between 0 and 1.
#'
#' @examples
#' assert_proportion_value(0.95)
#' assert_proportion_value(1.0, include_boundaries = TRUE)
#'
#' @export
assert_proportion_value <- function(x, include_boundaries = FALSE) {
  checkmate::assert_number(x, lower = 0, upper = 1)
  checkmate::assert_flag(include_boundaries)
  if (isFALSE(include_boundaries)) {
    checkmate::assert_true(x > 0)
    checkmate::assert_true(x < 1)
  }
}

#' Count occurrences by grade
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_occurrences_by_grade()] creates a layout element to calculate occurrence counts by grade.
#'
#' This function analyzes primary analysis variable `var` which indicates toxicity grades. The `id` variable
#' is used to indicate unique subject identifiers (defaults to `USUBJID`). The user can also supply a list of
#' custom groups of grades to analyze via the `grade_groups` parameter. The `remove_single`  argument will
#' remove single grades from the analysis so that *only* grade groups are analyzed.
#'
#' If there are multiple grades recorded for one patient only the highest grade level is counted.
#'
#' The summarize function [summarize_occurrences_by_grade()] performs the same function as
#' [count_occurrences_by_grade()] except it creates content rows, not data rows, to summarize the current table
#' row/column context and operates on the level of the latest row split or the root of the table if no row splits have
#' occurred.
#'
#' @inheritParams count_occurrences
#' @inheritParams argument_convention
#' @param grade_groups (named `list` of `character`)\cr list containing groupings of grades.
#' @param remove_single (`flag`)\cr `TRUE` to not include the elements of one-element grade groups
#'   in the the output list; in this case only the grade groups names will be included in the output. If
#'   `only_grade_groups` is set to `TRUE` this argument is ignored.
#' @param only_grade_groups (`flag`)\cr whether only the specified grade groups should be
#'   included, with individual grade rows removed (`TRUE`), or all grades and grade groups
#'   should be displayed (`FALSE`).
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("count_occurrences_by_grade"), type = "sh")``
#'
#' @seealso Relevant helper function [h_append_grade_groups()].
#'
#' @name count_occurrences_by_grade
#' @order 1
NULL

#' Helper function for `s_count_occurrences_by_grade()`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function for [s_count_occurrences_by_grade()] to insert grade groupings into list with
#' individual grade frequencies. The order of the final result follows the order of `grade_groups`.
#' The elements under any-grade group (if any), i.e. the grade group equal to `refs` will be moved to
#' the end. Grade groups names must be unique.
#'
#' @inheritParams count_occurrences_by_grade
#' @param refs (named `list` of `numeric`)\cr named list where each name corresponds to a reference grade level
#'   and each entry represents a count.
#'
#' @return Formatted list of grade groupings.
#'
#' @examples
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(1:5),
#'     "Grade 1-2" = c("1", "2"),
#'     "Grade 3-4" = c("3", "4")
#'   ),
#'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
#' )
#'
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(5:1),
#'     "Grade A" = "5",
#'     "Grade B" = c("4", "3")
#'   ),
#'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
#' )
#'
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(1:5),
#'     "Grade 1-2" = c("1", "2"),
#'     "Grade 3-4" = c("3", "4")
#'   ),
#'   list("1" = 10, "2" = 5, "3" = 0)
#' )
#'
#' @export
h_append_grade_groups <- function(grade_groups, refs, remove_single = TRUE, only_grade_groups = FALSE) {
  checkmate::assert_list(grade_groups)
  checkmate::assert_list(refs)
  refs_orig <- refs
  elements <- unique(unlist(grade_groups))

  ### compute sums in groups
  grp_sum <- lapply(grade_groups, function(i) do.call(sum, refs[i]))
  if (!checkmate::test_subset(elements, names(refs))) {
    padding_el <- setdiff(elements, names(refs))
    refs[padding_el] <- 0
  }
  result <- c(grp_sum, refs)

  ### order result while keeping grade_groups's ordering
  ordr <- grade_groups

  # elements of any-grade group (if any) will be moved to the end
  is_any <- sapply(grade_groups, setequal, y = names(refs))
  ordr[is_any] <- list(character(0)) # hide elements under any-grade group

  # groups-elements combined sequence
  ordr <- c(lapply(names(ordr), function(g) c(g, ordr[[g]])), recursive = TRUE, use.names = FALSE)
  ordr <- ordr[!duplicated(ordr)]

  # append remaining elements (if any)
  ordr <- union(ordr, unlist(grade_groups[is_any])) # from any-grade group
  ordr <- union(ordr, names(refs)) # from refs

  # remove elements of single-element groups, if any
  if (only_grade_groups) {
    ordr <- intersect(ordr, names(grade_groups))
  } else if (remove_single) {
    is_single <- sapply(grade_groups, length) == 1L
    ordr <- setdiff(ordr, unlist(grade_groups[is_single]))
  }

  # apply the order
  result <- result[ordr]

  # remove groups without any elements in the original refs
  # note: it's OK if groups have 0 value
  keep_grp <- vapply(grade_groups, function(x, rf) {
    any(x %in% rf)
  }, rf = names(refs_orig), logical(1))

  keep_el <- names(result) %in% names(refs_orig) | names(result) %in% names(keep_grp)[keep_grp]
  result <- result[keep_el]

  result
}

#' @describeIn count_occurrences_by_grade Statistics function which counts the
#'  number of patients by highest grade.
#'
#' @return
#' * `s_count_occurrences_by_grade()` returns a list of counts and fractions with one element per grade level or
#'   grade level grouping.
#'
#' @examples
#' s_count_occurrences_by_grade(
#'   df,
#'   .N_col = 10L,
#'   .var = "AETOXGR",
#'   id = "USUBJID",
#'   grade_groups = list("ANY" = levels(df$AETOXGR))
#' )
#'
#' @export
s_count_occurrences_by_grade <- function(df,
                                         labelstr = "",
                                         .var,
                                         .N_row, # nolint
                                         .N_col, # nolint
                                         ...,
                                         id = "USUBJID",
                                         grade_groups = list(),
                                         remove_single = TRUE,
                                         only_grade_groups = FALSE,
                                         denom = c("N_col", "n", "N_row")) {
  assert_valid_factor(df[[.var]])
  assert_df_with_variables(df, list(grade = .var, id = id))

  denom <- match.arg(denom) %>%
    switch(
      n = nlevels(factor(df[[id]])),
      N_row = .N_row,
      N_col = .N_col
    )

  if (nrow(df) < 1) {
    grade_levels <- levels(df[[.var]])
    l_count <- as.list(rep(0, length(grade_levels)))
    names(l_count) <- grade_levels
  } else {
    if (isTRUE(is.factor(df[[id]]))) {
      assert_valid_factor(df[[id]], any.missing = FALSE)
    } else {
      checkmate::assert_character(df[[id]], min.chars = 1, any.missing = FALSE)
    }
    checkmate::assert_count(.N_col)

    id <- df[[id]]
    grade <- df[[.var]]

    if (!is.ordered(grade)) {
      grade_lbl <- obj_label(grade)
      lvls <- levels(grade)
      if (sum(grepl("^\\d+$", lvls)) %in% c(0, length(lvls))) {
        lvl_ord <- lvls
      } else {
        lvls[!grepl("^\\d+$", lvls)] <- min(as.numeric(lvls[grepl("^\\d+$", lvls)])) - 1
        lvl_ord <- levels(grade)[order(as.numeric(lvls))]
      }
      grade <- formatters::with_label(factor(grade, levels = lvl_ord, ordered = TRUE), grade_lbl)
    }

    missing_lvl <- grepl("missing", tolower(levels(grade)))
    if (any(missing_lvl)) {
      grade <- factor(
        grade,
        levels = c(levels(grade)[!missing_lvl], levels(grade)[missing_lvl]),
        ordered = is.ordered(grade)
      )
    }
    df_max <- stats::aggregate(grade ~ id, FUN = max, drop = FALSE)
    l_count <- as.list(table(df_max$grade))
  }

  if (length(grade_groups) > 0) {
    l_count <- h_append_grade_groups(grade_groups, l_count, remove_single, only_grade_groups)
  }

  l_count_fraction <- lapply(
    l_count,
    function(i, denom) {
      if (i == 0 && denom == 0) {
        c(0, 0)
      } else {
        c(i, i / denom)
      }
    },
    denom = denom
  )

  list(
    count_fraction = l_count_fraction,
    count_fraction_fixed_dp = l_count_fraction
  )
}

#' @describeIn count_occurrences_by_grade Formatted analysis function which is used as `afun`
#'   in `count_occurrences_by_grade()`.
#'
#' @return
#' * `a_count_occurrences_by_grade()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_count_occurrences_by_grade(
#'   df,
#'   .N_col = 10L,
#'   .N_row = 10L,
#'   .var = "AETOXGR",
#'   id = "USUBJID",
#'   grade_groups = list("ANY" = levels(df$AETOXGR))
#' )
#'
#' @export
a_count_occurrences_by_grade <- function(df,
                                         labelstr = "",
                                         ...,
                                         .stats = NULL,
                                         .stat_names = NULL,
                                         .formats = NULL,
                                         .labels = NULL,
                                         .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_occurrences_by_grade,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      labelstr = list(labelstr),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("count_occurrences_by_grade", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  x_stats <- x_stats[.stats]
  levels_per_stats <- lapply(x_stats, names)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- x_stats[.stats] %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn count_occurrences_by_grade Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_occurrences_by_grade()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(1:6, 1)),
#'   ARM = factor(c("A", "A", "A", "B", "B", "B", "A"), levels = c("A", "B")),
#'   AETOXGR = factor(c(1, 2, 3, 4, 1, 2, 3), levels = c(1:5)),
#'   AESEV = factor(
#'     x = c("MILD", "MODERATE", "SEVERE", "MILD", "MILD", "MODERATE", "SEVERE"),
#'     levels = c("MILD", "MODERATE", "SEVERE")
#'   ),
#'   stringsAsFactors = FALSE
#' )
#'
#' df_adsl <- df %>%
#'   select(USUBJID, ARM) %>%
#'   unique()
#'
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences_by_grade(
#'     var = "AESEV",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' # Define additional grade groupings.
#' grade_groups <- list(
#'   "-Any-" = c("1", "2", "3", "4", "5"),
#'   "Grade 1-2" = c("1", "2"),
#'   "Grade 3-5" = c("3", "4", "5")
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences_by_grade(
#'     var = "AETOXGR",
#'     grade_groups = grade_groups,
#'     only_grade_groups = TRUE
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
#' @order 2
count_occurrences_by_grade <- function(lyt,
                                       var,
                                       id = "USUBJID",
                                       grade_groups = list(),
                                       remove_single = TRUE,
                                       only_grade_groups = FALSE,
                                       var_labels = var,
                                       show_labels = "default",
                                       riskdiff = FALSE,
                                       na_str = default_na_str(),
                                       nested = TRUE,
                                       ...,
                                       table_names = var,
                                       .stats = "count_fraction",
                                       .stat_names = NULL,
                                       .formats = list(count_fraction = format_count_fraction_fixed_dp),
                                       .labels = NULL,
                                       .indent_mods = NULL) {
  checkmate::assert_flag(riskdiff)
  afun <- if (isFALSE(riskdiff)) a_count_occurrences_by_grade else afun_riskdiff

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    id = id, grade_groups = list(grade_groups), remove_single = remove_single, only_grade_groups = only_grade_groups,
    if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade)),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' @describeIn count_occurrences_by_grade Layout-creating function which can take content function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_occurrences_by_grade()` to the table layout.
#'
#' @examples
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
#'   summarize_occurrences_by_grade(
#'     var = "AESEV",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
#'   summarize_occurrences_by_grade(
#'     var = "AETOXGR",
#'     grade_groups = grade_groups
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
#' @order 3
summarize_occurrences_by_grade <- function(lyt,
                                           var,
                                           id = "USUBJID",
                                           grade_groups = list(),
                                           remove_single = TRUE,
                                           only_grade_groups = FALSE,
                                           riskdiff = FALSE,
                                           na_str = default_na_str(),
                                           ...,
                                           .stats = "count_fraction",
                                           .stat_names = NULL,
                                           .formats = list(count_fraction = format_count_fraction_fixed_dp),
                                           .labels = NULL,
                                           .indent_mods = 0L) {
  checkmate::assert_flag(riskdiff)
  afun <- if (isFALSE(riskdiff)) a_count_occurrences_by_grade else afun_riskdiff

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (is.null(.indent_mods)) {
    indent_mod <- 0L
  } else if (length(.indent_mods) == 1) {
    indent_mod <- .indent_mods
  } else {
    indent_mod <- 0L
    extra_args[[".indent_mods"]] <- .indent_mods
  }

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    id = id, grade_groups = list(grade_groups), remove_single = remove_single, only_grade_groups = only_grade_groups,
    if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade)),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = afun,
    na_str = na_str,
    extra_args = extra_args,
    indent_mod = indent_mod
  )
}

#' Tabulate biomarker effects on binary response by subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The [tabulate_rsp_biomarkers()] function creates a layout element to tabulate the estimated biomarker effects on a
#' binary response endpoint across subgroups, returning statistics including response rate and odds ratio for each
#' population subgroup. The table is created from `df`, a list of data frames returned by [extract_rsp_biomarkers()],
#' with the statistics to include specified via the `vars` parameter.
#'
#' A forest plot can be created from the resulting table using the [g_forest()] function.
#'
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#'   [extract_rsp_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot`: Total number of patients per group.
#'   * `n_rsp`: Total number of responses per group.
#'   * `prop`: Total response proportion per group.
#'   * `or`: Odds ratio.
#'   * `ci`: Confidence interval of odds ratio.
#'   * `pval`: p-value of the effect.
#'   Note, the statistics `n_tot`, `or` and `ci` are required.
#'
#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. The tables are then typically used as input for forest plots.
#'
#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
#'   not start from an input layout `lyt`. This is because internally the table is
#'   created by combining multiple subtables.
#'
#' @seealso [extract_rsp_biomarkers()]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' df <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adrs_f
#' )
#'
#' \donttest{
#' ## Table with default columns.
#' tabulate_rsp_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_rsp_biomarkers(
#'   df = df,
#'   vars = c("n_rsp", "ci", "n_tot", "prop", "or")
#' )
#'
#' ## Finally produce the forest plot.
#' g_forest(tab, xlim = c(0.7, 1.4))
#' }
#'
#' @export
#' @name response_biomarkers_subgroups
tabulate_rsp_biomarkers <- function(df,
                                    vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
                                    na_str = default_na_str(),
                                    ...,
                                    .stat_names = NULL,
                                    .formats = NULL,
                                    .labels = NULL,
                                    .indent_mods = NULL) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(df$biomarker)
  checkmate::assert_character(df$biomarker_label)
  checkmate::assert_subset(vars, get_stats("tabulate_rsp_biomarkers"))

  # Process standard extra arguments
  extra_args <- list(".stats" = vars)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  colvars <- d_rsp_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1]
  )

  # Process additional arguments to the statistic function
  extra_args <- c(extra_args, biomarker = TRUE, ...)

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_response_subgroups) <- c(formals(a_response_subgroups), extra_args[[".additional_fun_parameters"]])

  # Create "ci" column from "lcl" and "ucl"
  df$ci <- combine_vectors(df$lcl, df$ucl)

  df_subs <- split(df, f = df$biomarker)
  tbls <- lapply(
    df_subs,
    function(df) {
      lyt <- basic_table()

      # Split cols by the multiple variables to populate into columns.
      lyt <- split_cols_by_multivar(
        lyt = lyt,
        vars = colvars$vars,
        varlabels = colvars$labels
      )

      # Row split by biomarker
      lyt <- split_rows_by(
        lyt = lyt,
        var = "biomarker_label",
        nested = FALSE
      )

      # Add "All Patients" row
      lyt <- split_rows_by(
        lyt = lyt,
        var = "row_type",
        split_fun = keep_split_levels("content"),
        nested = TRUE,
        child_labels = "hidden"
      )
      lyt <- analyze_colvars(
        lyt = lyt,
        afun = a_response_subgroups,
        na_str = na_str,
        extra_args = c(extra_args, overall = TRUE)
      )

      # Add analysis rows
      if ("analysis" %in% df$row_type) {
        lyt <- split_rows_by(
          lyt = lyt,
          var = "row_type",
          split_fun = keep_split_levels("analysis"),
          nested = TRUE,
          child_labels = "hidden"
        )
        lyt <- split_rows_by(
          lyt = lyt,
          var = "var_label",
          nested = TRUE,
          indent_mod = 1L
        )
        lyt <- analyze_colvars(
          lyt = lyt,
          afun = a_response_subgroups,
          na_str = na_str,
          inclNAs = TRUE,
          extra_args = extra_args
        )
      }
      build_table(lyt, df = df)
    }
  )

  result <- do.call(rbind, tbls)

  n_id <- grep("n_tot", vars)
  or_id <- match("or", vars)
  ci_id <- match("ci", vars)
  structure(
    result,
    forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
    col_x = or_id,
    col_ci = ci_id,
    col_symbol_size = n_id
  )
}

#' Prepare response data estimates for multiple biomarkers in a single data frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of responses, patients and overall response rate,
#' as well as odds ratio estimates, confidence intervals and p-values,
#' for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a
#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
#' biomarker variables) and optionally `covariates`, `subgroups` and `strata`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param control (named `list`)\cr controls for the response definition and the
#'   confidence level produced by [control_logistic()].
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
#'   `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#'   `var_label`, and `row_type`.
#'
#' @note You can also specify a continuous variable in `rsp` and then use the
#'   `response_definition` control to convert that internally to a logical
#'   variable reflecting binary response.
#'
#' @seealso [h_logistic_mult_cont_df()] which is used internally.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#'
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in logistic regression models with one covariate `RACE`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#' df <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adrs_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
#' # which is then binarized internally (response is defined as this variable
#' # being larger than 750).
#' df_grouped <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "EOSDY",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2",
#'     strata = "STRATA1"
#'   ),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   ),
#'   control = control_logistic(
#'     response_definition = "I(response > 750)"
#'   )
#' )
#' df_grouped
#'
#' @export
extract_rsp_biomarkers <- function(variables,
                                   data,
                                   groups_lists = list(),
                                   control = control_logistic(),
                                   label_all = "All Patients") {
  if ("strat" %in% names(variables)) {
    warning(
      "Warning: the `strat` element name of the `variables` list argument to `extract_rsp_biomarkers() ",
      "was deprecated in tern 0.9.4.\n  ",
      "Please use the name `strata` instead of `strat` in the `variables` argument."
    )
    variables[["strata"]] <- variables[["strat"]]
  }

  assert_list_of_variables(variables)
  checkmate::assert_string(variables$rsp)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_string(label_all)

  # Start with all patients.
  result_all <- h_logistic_mult_cont_df(
    variables = variables,
    data = data,
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"
  if (is.null(variables$subgroups)) {
    # Only return result for all patients.
    result_all
  } else {
    # Add subgroups results.
    l_data <- h_split_by_subgroups(
      data,
      variables$subgroups,
      groups_lists = groups_lists
    )
    l_result <- lapply(l_data, function(grp) {
      result <- h_logistic_mult_cont_df(
        variables = variables,
        data = grp$df,
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Control function for descriptive statistics
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify
#' details for [s_summary()]. This function family is mainly used by [analyze_vars()].
#'
#' @inheritParams argument_convention
#' @param quantiles (`numeric(2)`)\cr vector of length two to specify the quantiles to calculate.
#' @param quantile_type (`numeric(1)`)\cr number between 1 and 9 selecting quantile algorithms to be used.
#'   Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`.
#'   This differs from R's default. See more about `type` in [stats::quantile()].
#' @param test_mean (`numeric(1)`)\cr number to test against the mean under the null hypothesis when calculating
#'   p-value.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @export
control_analyze_vars <- function(conf_level = 0.95,
                                 quantiles = c(0.25, 0.75),
                                 quantile_type = 2,
                                 test_mean = 0) {
  checkmate::assert_vector(quantiles, len = 2)
  checkmate::assert_int(quantile_type, lower = 1, upper = 9)
  checkmate::assert_numeric(test_mean)
  lapply(quantiles, assert_proportion_value)
  assert_proportion_value(conf_level)
  list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean)
}

# Helper function to fix numeric or counts pval if necessary
.correct_num_or_counts_pval <- function(type, .stats) {
  if (type == "numeric") {
    if (!is.null(.stats) && any(grepl("^pval", .stats))) {
      .stats[grepl("^pval", .stats)] <- "pval" # tmp fix xxx
    }
  } else {
    if (!is.null(.stats) && any(grepl("^pval", .stats))) {
      .stats[grepl("^pval", .stats)] <- "pval_counts" # tmp fix xxx
    }
  }
  .stats
}

#' Analyze variables
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [analyze_vars()] creates a layout element to summarize one or more variables, using the S3
#' generic function [s_summary()] to calculate a list of summary statistics. A list of all available statistics for
#' numeric variables can be viewed by running `get_stats("analyze_vars_numeric")` and for non-numeric variables by
#' running `get_stats("analyze_vars_counts")`. Use the `.stats` parameter to specify the statistics to include in your
#' output summary table. Use `compare_with_ref_group = TRUE` to compare the variable with reference groups.
#'
#' @details
#' **Automatic digit formatting:** The number of digits to display can be automatically determined from the analyzed
#' variable(s) (`vars`) for certain statistics by setting the statistic format to `"auto"` in `.formats`.
#' This utilizes the [format_auto()] formatting function. Note that only data for the current row & variable (for all
#' columns) will be considered (`.df_row[[.var]]`, see [`rtables::additional_fun_params`]) and not the whole dataset.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options for numeric variables are: ``r shQuote(get_stats("analyze_vars_numeric"), type = "sh")``
#'
#'   Options for non-numeric variables are: ``r shQuote(get_stats("analyze_vars_counts"), type = "sh")``
#'
#' @name analyze_variables
#' @order 1
NULL

#' @describeIn analyze_variables S3 generic function to produces a variable summary.
#'
#' @return
#' * `s_summary()` returns different statistics depending on the class of `x`.
#'
#' @export
s_summary <- function(x, ...) {
  UseMethod("s_summary", x)
}

#' @describeIn analyze_variables Method for `numeric` class.
#'
#' @param control (`list`)\cr parameters for descriptive statistics details, specified by using
#'   the helper function [control_analyze_vars()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for mean and median.
#'   * `quantiles` (`numeric(2)`)\cr vector of length two to specify the quantiles.
#'   * `quantile_type` (`numeric(1)`)\cr between 1 and 9 selecting quantile algorithms to be used.
#'     See more about `type` in [stats::quantile()].
#'   * `test_mean` (`numeric(1)`)\cr value to test against the mean under the null hypothesis when calculating p-value.
#'
#' @return
#'   * If `x` is of class `numeric`, returns a `list` with the following named `numeric` items:
#'     * `n`: The [length()] of `x`.
#'     * `sum`: The [sum()] of `x`.
#'     * `mean`: The [mean()] of `x`.
#'     * `sd`: The [stats::sd()] of `x`.
#'     * `se`: The standard error of `x` mean, i.e.: (`sd(x) / sqrt(length(x))`).
#'     * `mean_sd`: The [mean()] and [stats::sd()] of `x`.
#'     * `mean_se`: The [mean()] of `x` and its standard error (see above).
#'     * `mean_ci`: The CI for the mean of `x` (from [stat_mean_ci()]).
#'     * `mean_sei`: The SE interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()] / [sqrt()]).
#'     * `mean_sdi`: The SD interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()]).
#'     * `mean_pval`: The two-sided p-value of the mean of `x` (from [stat_mean_pval()]).
#'     * `median`: The [stats::median()] of `x`.
#'     * `mad`: The median absolute deviation of `x`, i.e.: ([stats::median()] of `xc`,
#'       where `xc` = `x` - [stats::median()]).
#'     * `median_ci`: The CI for the median of `x` (from [stat_median_ci()]).
#'     * `quantiles`: Two sample quantiles of `x` (from [stats::quantile()]).
#'     * `iqr`: The [stats::IQR()] of `x`.
#'     * `range`: The [range_noinf()] of `x`.
#'     * `min`: The [max()] of `x`.
#'     * `max`: The [min()] of `x`.
#'     * `median_range`: The [median()] and [range_noinf()] of `x`.
#'     * `cv`: The coefficient of variation of `x`, i.e.: ([stats::sd()] / [mean()] * 100).
#'     * `geom_mean`: The geometric mean of `x`, i.e.: (`exp(mean(log(x)))`).
#'     * `geom_cv`: The geometric coefficient of variation of `x`, i.e.: (`sqrt(exp(sd(log(x)) ^ 2) - 1) * 100`).
#'
#' @note
#' * If `x` is an empty vector, `NA` is returned. This is the expected feature so as to return `rcell` content in
#'   `rtables` when the intersection of a column and a row delimits an empty data selection.
#' * When the `mean` function is applied to an empty vector, `NA` will be returned instead of `NaN`, the latter
#'   being standard behavior in R.
#'
#' @method s_summary numeric
#'
#' @examples
#' # `s_summary.numeric`
#'
#' ## Basic usage: empty numeric returns NA-filled items.
#' s_summary(numeric())
#'
#' ## Management of NA values.
#' x <- c(NA_real_, 1)
#' s_summary(x, na_rm = TRUE)
#' s_summary(x, na_rm = FALSE)
#'
#' x <- c(NA_real_, 1, 2)
#' s_summary(x)
#'
#' ## Benefits in `rtables` contructions:
#' dta_test <- data.frame(
#'   Group = rep(LETTERS[seq(3)], each = 2),
#'   sub_group = rep(letters[seq(2)], each = 3),
#'   x = seq(6)
#' )
#'
#' ## The summary obtained in with `rtables`:
#' basic_table() %>%
#'   split_cols_by(var = "Group") %>%
#'   split_rows_by(var = "sub_group") %>%
#'   analyze(vars = "x", afun = s_summary) %>%
#'   build_table(df = dta_test)
#'
#' ## By comparison with `lapply`:
#' X <- split(dta_test, f = with(dta_test, interaction(Group, sub_group)))
#' lapply(X, function(x) s_summary(x$x))
#'
#' @export
s_summary.numeric <- function(x, control = control_analyze_vars(), ...) {
  checkmate::assert_numeric(x)
  args_list <- list(...)
  .N_row <- args_list[[".N_row"]] # nolint
  .N_col <- args_list[[".N_col"]] # nolint
  na_rm <- args_list[["na_rm"]] %||% TRUE
  compare_with_ref_group <- args_list[["compare_with_ref_group"]]

  if (na_rm) {
    x <- x[!is.na(x)]
  } # no explicit NA because it should be numeric

  y <- list()

  y$n <- c("n" = length(x))

  y$sum <- c("sum" = ifelse(length(x) == 0, NA_real_, sum(x, na.rm = FALSE)))

  y$mean <- c("mean" = ifelse(length(x) == 0, NA_real_, mean(x, na.rm = FALSE)))

  y$sd <- c("sd" = stats::sd(x, na.rm = FALSE))

  y$se <- c("se" = stats::sd(x, na.rm = FALSE) / sqrt(length(stats::na.omit(x))))

  y$mean_sd <- c(y$mean, "sd" = stats::sd(x, na.rm = FALSE))

  y$mean_se <- c(y$mean, y$se)

  mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
  y$mean_ci <- formatters::with_label(mean_ci, paste("Mean", f_conf_level(control$conf_level)))

  mean_sei <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) / sqrt(y$n)
  names(mean_sei) <- c("mean_sei_lwr", "mean_sei_upr")
  y$mean_sei <- formatters::with_label(mean_sei, "Mean -/+ 1xSE")

  mean_sdi <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE)
  names(mean_sdi) <- c("mean_sdi_lwr", "mean_sdi_upr")
  y$mean_sdi <- formatters::with_label(mean_sdi, "Mean -/+ 1xSD")
  mean_ci_3d <- c(y$mean, y$mean_ci)
  y$mean_ci_3d <- formatters::with_label(mean_ci_3d, paste0("Mean (", f_conf_level(control$conf_level), ")"))

  mean_pval <- stat_mean_pval(x, test_mean = control$test_mean, na.rm = FALSE, n_min = 2)
  y$mean_pval <- formatters::with_label(mean_pval, paste("Mean", f_pval(control$test_mean)))

  y$median <- c("median" = stats::median(x, na.rm = FALSE))

  y$mad <- c("mad" = stats::median(x - y$median, na.rm = FALSE))

  median_ci <- stat_median_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
  y$median_ci <- formatters::with_label(median_ci, paste("Median", f_conf_level(control$conf_level)))

  median_ci_3d <- c(y$median, median_ci)
  y$median_ci_3d <- formatters::with_label(median_ci_3d, paste0("Median (", f_conf_level(control$conf_level), ")"))

  q <- control$quantiles
  if (any(is.na(x))) {
    qnts <- rep(NA_real_, length(q))
  } else {
    qnts <- stats::quantile(x, probs = q, type = control$quantile_type, na.rm = FALSE)
  }
  names(qnts) <- paste("quantile", q, sep = "_")
  y$quantiles <- formatters::with_label(qnts, paste0(paste(paste0(q * 100, "%"), collapse = " and "), "-ile"))

  y$iqr <- c("iqr" = ifelse(
    any(is.na(x)),
    NA_real_,
    stats::IQR(x, na.rm = FALSE, type = control$quantile_type)
  ))

  y$range <- stats::setNames(range_noinf(x, na.rm = FALSE), c("min", "max"))
  y$min <- y$range[1]
  y$max <- y$range[2]

  y$median_range <- formatters::with_label(c(y$median, y$range), "Median (Min - Max)")

  y$cv <- c("cv" = unname(y$sd) / unname(y$mean) * 100)

  # Geometric Mean - Convert negative values to NA for log calculation.
  geom_verbose <- args_list[["geom_verbose"]] %||% FALSE # Additional info if requested
  checkmate::assert_flag(geom_verbose)
  x_no_negative_vals <- x
  if (identical(x_no_negative_vals, numeric())) {
    x_no_negative_vals <- NA
  }
  x_no_negative_vals[x_no_negative_vals <= 0] <- NA
  if (geom_verbose) {
    if (any(x <= 0)) {
      warning("Negative values were converted to NA for calculation of the geometric mean.")
    }
    if (all(is.na(x_no_negative_vals))) {
      warning("Since all values are negative or NA, the geometric mean is NA.")
    }
  }
  y$geom_mean <- c("geom_mean" = exp(mean(log(x_no_negative_vals), na.rm = FALSE)))
  y$geom_sd <- c("geom_sd" = geom_sd <- exp(sd(log(x_no_negative_vals), na.rm = FALSE)))
  y$geom_mean_sd <- c(y$geom_mean, y$geom_sd)
  geom_mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE, geom_mean = TRUE)
  y$geom_mean_ci <- formatters::with_label(geom_mean_ci, paste("Geometric Mean", f_conf_level(control$conf_level)))

  y$geom_cv <- c("geom_cv" = sqrt(exp(stats::sd(log(x_no_negative_vals), na.rm = FALSE) ^ 2) - 1) * 100) # styler: off

  geom_mean_ci_3d <- c(y$geom_mean, y$geom_mean_ci)
  y$geom_mean_ci_3d <- formatters::with_label(
    geom_mean_ci_3d,
    paste0("Geometric Mean (", f_conf_level(control$conf_level), ")")
  )

  # Compare with reference group
  if (isTRUE(compare_with_ref_group)) {
    .ref_group <- args_list[[".ref_group"]]
    .in_ref_col <- args_list[[".in_ref_col"]]
    checkmate::assert_numeric(.ref_group)
    checkmate::assert_flag(.in_ref_col)

    y$pval <- numeric()
    if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) {
      y$pval <- stats::t.test(x, .ref_group)$p.value
    }
  }

  y
}

#' @describeIn analyze_variables Method for `factor` class.
#'
#' @return
#'   * If `x` is of class `factor` or converted from `character`, returns a `list` with named `numeric` items:
#'     * `n`: The [length()] of `x`.
#'     * `count`: A list with the number of cases for each level of the factor `x`.
#'     * `count_fraction`: Similar to `count` but also includes the proportion of cases for each level of the
#'       factor `x` relative to the denominator, or `NA` if the denominator is zero.
#'
#' @note
#' * If `x` is an empty `factor`, a list is still returned for `counts` with one element
#'   per factor level. If there are no levels in `x`, the function fails.
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
#'   set `na_rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
#'   factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the
#'   default `na_level` (`"<Missing>"`) will also be excluded when `na_rm` is set to `TRUE`.
#'
#' @method s_summary factor
#'
#' @examples
#' # `s_summary.factor`
#'
#' ## Basic usage:
#' s_summary(factor(c("a", "a", "b", "c", "a")))
#'
#' # Empty factor returns zero-filled items.
#' s_summary(factor(levels = c("a", "b", "c")))
#'
#' ## Management of NA values.
#' x <- factor(c(NA, "Female"))
#' x <- explicit_na(x)
#' s_summary(x, na_rm = TRUE)
#' s_summary(x, na_rm = FALSE)
#'
#' ## Different denominators.
#' x <- factor(c("a", "a", "b", "c", "a"))
#' s_summary(x, denom = "N_row", .N_row = 10L)
#' s_summary(x, denom = "N_col", .N_col = 20L)
#'
#' @export
s_summary.factor <- function(x, denom = c("n", "N_col", "N_row"), ...) {
  assert_valid_factor(x)
  args_list <- list(...)
  .N_row <- args_list[[".N_row"]] # nolint
  .N_col <- args_list[[".N_col"]] # nolint
  na_rm <- args_list[["na_rm"]] %||% TRUE
  na_str <- args_list[["na_str"]] %||% "NA"
  na_str_drop <- args_list[["na_str_drop"]]
  verbose <- args_list[["verbose"]] %||% TRUE
  compare_with_ref_group <- args_list[["compare_with_ref_group"]]
  checkmate::assert_string(na_str_drop, null.ok = TRUE)

  if (na_rm) {
    x <- x[!is.na(x)]
    if (!is.null(na_str_drop)) {
      x <- fct_discard(x, na_str_drop)
    }
  } else {
    x <- x %>% explicit_na(label = na_str)
  }

  y <- list()

  y$n <- list("n" = c("n" = length(x))) # all list of a list

  y$count <- lapply(as.list(table(x, useNA = "ifany")), setNames, nm = "count")

  denom <- match.arg(denom) %>%
    switch(
      n = length(x),
      N_row = .N_row,
      N_col = .N_col
    )

  y$count_fraction <- lapply(
    y$count,
    function(x) {
      c(x, "p" = ifelse(denom > 0, x / denom, 0))
    }
  )

  y$count_fraction_fixed_dp <- y$count_fraction

  y$fraction <- lapply(
    y$count,
    function(count) c("num" = unname(count), "denom" = denom)
  )

  y$n_blq <- list("n_blq" = c("n_blq" = sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", x))))

  if (isTRUE(compare_with_ref_group)) {
    .ref_group <- as_factor_keep_attributes(args_list[[".ref_group"]], verbose = verbose)
    .in_ref_col <- args_list[[".in_ref_col"]]
    checkmate::assert_flag(.in_ref_col)
    assert_valid_factor(x)
    assert_valid_factor(.ref_group)

    if (na_rm) {
      x <- x[!is.na(x)] %>% fct_discard("<Missing>")
      .ref_group <- .ref_group[!is.na(.ref_group)] %>% fct_discard("<Missing>")
    } else {
      x <- x %>% explicit_na(label = na_str)
      .ref_group <- .ref_group %>% explicit_na(label = na_str)
    }

    if ("NA" %in% levels(x)) levels(.ref_group) <- c(levels(.ref_group), "NA")
    checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2)

    y$pval_counts <- numeric()
    if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
      tab <- rbind(table(x), table(.ref_group))
      res <- suppressWarnings(stats::chisq.test(tab))
      y$pval_counts <- res$p.value
    }
  }

  y
}

#' @describeIn analyze_variables Method for `character` class. This makes an automatic
#'   conversion to factor (with a warning) and then forwards to the method for factors.
#'
#' @note
#' * Automatic conversion of character to factor does not guarantee that the table
#'   can be generated correctly. In particular for sparse tables this very likely can fail.
#'   It is therefore better to always pre-process the dataset such that factors are manually
#'   created from character variables before passing the dataset to [rtables::build_table()].
#'
#' @method s_summary character
#'
#' @examples
#' # `s_summary.character`
#'
#' ## Basic usage:
#' s_summary(c("a", "a", "b", "c", "a"), verbose = FALSE)
#' s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na_rm = FALSE, verbose = FALSE)
#'
#' @export
s_summary.character <- function(x, denom = c("n", "N_col", "N_row"), ...) {
  args_list <- list(...)
  na_rm <- args_list[["na_rm"]] %||% TRUE
  na_str <- args_list[["na_str"]] %||% "NA"
  verbose <- args_list[["verbose"]] %||% TRUE

  if (na_rm) {
    y <- as_factor_keep_attributes(x, verbose = verbose)
  } else {
    y <- as_factor_keep_attributes(x, verbose = verbose, na_level = na_str)
  }

  s_summary(x = y, denom = denom, ...)
}

#' @describeIn analyze_variables Method for `logical` class.
#'
#' @return
#'   * If `x` is of class `logical`, returns a `list` with named `numeric` items:
#'     * `n`: The [length()] of `x` (possibly after removing `NA`s).
#'     * `count`: Count of `TRUE` in `x`.
#'     * `count_fraction`: Count and proportion of `TRUE` in `x` relative to the denominator, or `NA` if the
#'       denominator is zero. Note that `NA`s in `x` are never counted or leading to `NA` here.
#'
#' @method s_summary logical
#'
#' @examples
#' # `s_summary.logical`
#'
#' ## Basic usage:
#' s_summary(c(TRUE, FALSE, TRUE, TRUE))
#'
#' # Empty factor returns zero-filled items.
#' s_summary(as.logical(c()))
#'
#' ## Management of NA values.
#' x <- c(NA, TRUE, FALSE)
#' s_summary(x, na_rm = TRUE)
#' s_summary(x, na_rm = FALSE)
#'
#' ## Different denominators.
#' x <- c(TRUE, FALSE, TRUE, TRUE)
#' s_summary(x, denom = "N_row", .N_row = 10L)
#' s_summary(x, denom = "N_col", .N_col = 20L)
#'
#' @export
s_summary.logical <- function(x, denom = c("n", "N_col", "N_row"), ...) {
  checkmate::assert_logical(x)
  args_list <- list(...)
  .N_row <- args_list[[".N_row"]] # nolint
  .N_col <- args_list[[".N_col"]] # nolint
  na_rm <- args_list[["na_rm"]] %||% TRUE
  compare_with_ref_group <- args_list[["compare_with_ref_group"]]

  if (na_rm) {
    x <- x[!is.na(x)]
  } # na values are and should be logical here

  y <- list()
  y$n <- c("n" = length(x))
  denom <- match.arg(denom) %>%
    switch(
      n = length(x),
      N_row = .N_row,
      N_col = .N_col
    )
  y$count <- c("count" = sum(x, na.rm = TRUE))
  y$count_fraction <- c(y$count, "fraction" = ifelse(denom > 0, y$count / denom, 0))
  y$count_fraction_fixed_dp <- y$count_fraction
  y$fraction <- c("num" = unname(y$count), "denom" = denom)
  y$n_blq <- c("n_blq" = 0L)


  if (isTRUE(compare_with_ref_group)) {
    .ref_group <- args_list[[".ref_group"]]
    .in_ref_col <- args_list[[".in_ref_col"]]
    checkmate::assert_flag(.in_ref_col)

    if (na_rm) {
      x <- stats::na.omit(x)
      .ref_group <- stats::na.omit(.ref_group)
    } else {
      x[is.na(x)] <- FALSE
      .ref_group[is.na(.ref_group)] <- FALSE
    }

    y$pval_counts <- numeric()
    if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
      x <- factor(x, levels = c(TRUE, FALSE))
      .ref_group <- factor(.ref_group, levels = c(TRUE, FALSE))
      tbl <- rbind(table(x), table(.ref_group))
      y$pval_counts <- suppressWarnings(prop_chisq(tbl))
    }
  }

  y
}

#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()` and
#'   `compare_vars()` and as `cfun` in `summarize_colvars()`.
#'
#' @param compare_with_ref_group (`flag`)\cr whether comparison statistics should be analyzed instead of summary
#'   statistics (`compare_with_ref_group = TRUE` adds `pval` statistic comparing
#'   against reference group).
#'
#' @return
#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @note
#' * To use for comparison (with additional p-value statistic), parameter
#'   `compare_with_ref_group` must be set to `TRUE`.
#' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
#'
#' @examples
#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
#' a_summary(
#'   factor(c("a", "a", "b", "c", "a")),
#'   .ref_group = factor(c("a", "a", "b", "c")), compare_with_ref_group = TRUE, .in_ref_col = TRUE
#' )
#'
#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
#' a_summary(
#'   c("A", "B", "A", "C"),
#'   .ref_group = c("B", "A", "C"), .var = "x", compare_with_ref_group = TRUE, verbose = FALSE,
#'   .in_ref_col = FALSE
#' )
#'
#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
#' a_summary(
#'   c(TRUE, FALSE, FALSE, TRUE, TRUE),
#'   .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare_with_ref_group = TRUE,
#'   .in_ref_col = FALSE
#' )
#'
#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
#' a_summary(rnorm(10, 5, 1),
#'   .ref_group = rnorm(20, -5, 1), .var = "bla", compare_with_ref_group = TRUE,
#'   .in_ref_col = FALSE
#' )
#'
#' @export
a_summary <- function(x,
                      ...,
                      .stats = NULL,
                      .stat_names = NULL,
                      .formats = NULL,
                      .labels = NULL,
                      .indent_mods = NULL) {
  dots_extra_args <- list(...)

  # Check if there are user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats # just the labels of stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Correction of the pval indication if it is numeric or counts
  type <- ifelse(is.numeric(x), "numeric", "counts") # counts is "categorical"
  .stats <- .correct_num_or_counts_pval(type, .stats)

  # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
  extra_afun_params <- retrieve_extra_afun_params(
    names(dots_extra_args$.additional_fun_parameters)
  )
  dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore

  # Check if compare_with_ref_group is TRUE but no ref col is set
  if (isTRUE(dots_extra_args$compare_with_ref_group) &&
    all(
      length(dots_extra_args[[".ref_group"]]) == 0, # only used for testing
      length(extra_afun_params[[".ref_group"]]) == 0
    )
  ) {
    stop(
      "For comparison (compare_with_ref_group = TRUE), the reference group must be specified.",
      "\nSee ref_group in split_cols_by()."
    )
  }

  # Main statistical functions application
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_summary,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      x = list(x),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in with stats defaults if needed
  met_grp <- paste0(c("analyze_vars", type), collapse = "_")
  .stats <- get_stats(
    met_grp,
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions),
    add_pval = dots_extra_args$compare_with_ref_group %||% FALSE
  )

  x_stats <- x_stats[.stats]

  is_char <- is.character(x) || is.factor(x)
  if (is_char) {
    x_stats <- x_stats[sapply(x_stats, \(x) length(x) > 0 || is.numeric(x))] # only return non-empty stats
    levels_per_stats <- lapply(x_stats, names)
  } else {
    levels_per_stats <- names(x_stats) %>%
      as.list() %>%
      setNames(names(x_stats))
  }

  # Fill in formats/indents/labels with custom input and defaults
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)
  lbls <- get_labels_from_stats(.stats, .labels, levels_per_stats)

  if (is_char) {
    # Keep pval_counts stat if present from comparisons and empty
    if ("pval_counts" %in% names(x_stats) && length(x_stats[["pval_counts"]]) == 0) {
      x_stats[["pval_counts"]] <- list(NULL) %>% setNames("pval_counts")
    }

    # Unlist stats
    x_stats <- x_stats %>%
      .unlist_keep_nulls() %>%
      setNames(names(.formats))
  }

  # Check for custom labels from control_analyze_vars
  .labels <- if ("control" %in% names(dots_extra_args)) {
    labels_use_control(lbls, dots_extra_args[["control"]], .labels)
  } else {
    lbls
  }

  # Auto format handling
  .formats <- apply_auto_formatting(
    .formats,
    x_stats,
    extra_afun_params$.df_row,
    extra_afun_params$.var
  )

  # Get and check statistical names from defaults
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn analyze_variables Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param na_str_drop (`string`)\cr Additional `NA` string to be dropped from factor calculations. If `NULL`
#'   nothing will be removed beyond standard `NA` handling.
#' @param ... additional arguments passed to `s_summary()`, including:
#'   * `denom`: (`string`) See parameter description below.
#'   * `.N_row`: (`numeric(1)`) Row-wise N (row group count) for the group of observations being analyzed (i.e. with no
#'     column-based subsetting).
#'   * `.N_col`: (`numeric(1)`) Column-wise N (column count) for the full column being tabulated within.
#'   * `verbose`: (`flag`) Whether additional warnings and messages should be printed. Mainly used to print out
#'     information about factor casting. Defaults to `TRUE`. Used for `character`/`factor` variables only.
#' @param compare_with_ref_group (logical)\cr whether to compare the variable with a reference group.
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#' @param formats_var (`NULL` or `string`)\cr Passed to [rtables::analyze()]. `.formats` must be `"default"` and
#' `format` must be `NULL` when this is non-NULL.
#' @param format (`NULL`, `list`, `string` or `function`)\cr Passed to [rtables::analyze()]. `.formats` must be
#' `"default"` and `formats_var` must be `NULL` when this is non-NULL.
#' @param na_strs_var (`string` or `NULL`)\cr Passed to `analyze`. `na_str` must be
#'   `NA` when this is non-NULL.
#'
#' @return
#' * `analyze_vars()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_summary()` to the table layout.
#'
#' @examples
#' ## Fabricated dataset.
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT  = rep(paste0("V", 1:3), 6),
#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL    = c(9:1, rep(NA, 9))
#' )
#'
#' # `analyze_vars()` in `rtables` pipelines
#' ## Default output within a `rtables` pipeline.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze_vars(vars = "AVAL")
#'
#' build_table(l, df = dta_test)
#'
#' ## Select and format statistics output.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze_vars(
#'     vars = "AVAL",
#'     .stats = c("n", "mean_sd", "quantiles"),
#'     .formats = c("mean_sd" = "xx.x, xx.x"),
#'     .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3"))
#'   )
#'
#' build_table(l, df = dta_test)
#'
#' ## Use arguments interpreted by `s_summary`.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze_vars(vars = "AVAL", na_rm = FALSE)
#'
#' build_table(l, df = dta_test)
#'
#' ## Handle `NA` levels first when summarizing factors.
#' dta_test$AVISIT <- NA_character_
#' dta_test <- df_explicit_na(dta_test)
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   analyze_vars(vars = "AVISIT", na_rm = FALSE)
#'
#' build_table(l, df = dta_test)
#'
#' # auto format
#' dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4))
#' basic_table() %>%
#'   analyze_vars(
#'     vars = "VAR",
#'     .stats = c("n", "mean", "mean_sd", "range"),
#'     .formats = c("mean_sd" = "auto", "range" = "auto")
#'   ) %>%
#'   build_table(dt)
#'
#' @export
#' @order 2
analyze_vars <- function(lyt,
                         vars,
                         var_labels = vars,
                         na_str = default_na_str(),
                         na_str_drop = "<Missing>",
                         nested = TRUE,
                         show_labels = "default",
                         table_names = vars,
                         section_div = NA_character_,
                         ...,
                         na_rm = TRUE,
                         compare_with_ref_group = FALSE,
                         .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                         .stat_names = NULL,
                         .formats = NULL,
                         .labels = NULL,
                         .indent_mods = NULL,
                         formats_var = NULL,
                         na_strs_var = NULL,
                         format = NULL) {
  # Depending on main functions
  extra_args <- list(
    "na_rm" = na_rm,
    "na_str_drop" = na_str_drop,
    "compare_with_ref_group" = compare_with_ref_group,
    ...
  )

  ## handle na_str = NA (logical) for user convenience
  if (identical(na_str, NA)) {
    na_str <- NA_character_
  }

  if (!is.null(formats_var) && !identical(.formats, "default")) {
    stop(
      ".formats must be set to 'default' when specifying a formats variable ",
      "(got formats_var: ",
      formats_var,
      ")."
    )
  }

  if (!is.null(format) && !identical(.formats, "default")) {
    stop(
      ".formats must be set to 'default' when passing the format argument down ",
      "to analyze() (got format class:",
      paste(class(format), collapse = " - "),
      ")."
    )
  }


  if (!is.null(na_strs_var) && !identical(na_str, NA_character_)) {
    stop(
      "na_str must be set to NA when specifying an na strings variable ",
      "(got na_strs_var: ",
      na_strs_var,
      ")."
    )
  }

  # Needed defaults
  if (!is.null(.stats)) extra_args[[".stats"]] <- .stats
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_summary) <- c(
    formals(a_summary),
    extra_args[[".additional_fun_parameters"]]
  )

  # Main {rtables} structural call
  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = a_summary,
    na_str = na_str,
    inclNAs = !na_rm,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    table_names = table_names,
    section_div = section_div,
    formats_var = formats_var,
    na_strs_var = na_strs_var
  )
}

#' Helper functions for tabulating biomarker effects on binary response by subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions which are documented here separately to not confuse the user
#' when reading about the user-facing functions.
#'
#' @inheritParams response_biomarkers_subgroups
#' @inheritParams extract_rsp_biomarkers
#' @inheritParams argument_convention
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' @name h_response_biomarkers_subgroups
NULL

#' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list
#'   to the "logistic regression" variable list. The reason is that currently there is an
#'   inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`.
#'
#' @param biomarker (`string`)\cr the name of the biomarker variable.
#'
#' @return
#' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`.
#'
#' @examples
#' # This is how the variable list is converted internally.
#' h_rsp_to_logistic_variables(
#'   variables = list(
#'     rsp = "RSP",
#'     covariates = c("A", "B"),
#'     strata = "D"
#'   ),
#'   biomarker = "AGE"
#' )
#'
#' @export
h_rsp_to_logistic_variables <- function(variables, biomarker) {
  if ("strat" %in% names(variables)) {
    warning(
      "Warning: the `strat` element name of the `variables` list argument to `h_rsp_to_logistic_variables() ",
      "was deprecated in tern 0.9.4.\n  ",
      "Please use the name `strata` instead of `strat` in the `variables` argument."
    )
    variables[["strata"]] <- variables[["strat"]]
  }
  checkmate::assert_list(variables)
  checkmate::assert_string(variables$rsp)
  checkmate::assert_string(biomarker)
  list(
    response = variables$rsp,
    arm = biomarker,
    covariates = variables$covariates,
    strata = variables$strata
  )
}

#' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and
#'   overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple
#'   biomarkers in a given single data set.
#'   `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
#'   `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates`
#'   and `strata`.
#'
#' @return
#' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
#'
#' @examples
#' # For a single population, estimate separately the effects
#' # of two biomarkers.
#' df <- h_logistic_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX"
#'   ),
#'   data = adrs_f
#' )
#' df
#'
#' # If the data set is empty, still the corresponding rows with missings are returned.
#' h_coxreg_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strata = "STRATA1"
#'   ),
#'   data = adrs_f[NULL, ]
#' )
#'
#' @export
h_logistic_mult_cont_df <- function(variables,
                                    data,
                                    control = control_logistic()) {
  if ("strat" %in% names(variables)) {
    warning(
      "Warning: the `strat` element name of the `variables` list argument to `h_logistic_mult_cont_df() ",
      "was deprecated in tern 0.9.4.\n  ",
      "Please use the name `strata` instead of `strat` in the `variables` argument."
    )
    variables[["strata"]] <- variables[["strat"]]
  }
  assert_df_with_variables(data, variables)

  checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  conf_level <- control[["conf_level"]]
  pval_label <- "p-value (Wald)"

  # If there is any data, run model, otherwise return empty results.
  if (nrow(data) > 0) {
    bm_cols <- match(variables$biomarkers, names(data))
    l_result <- lapply(variables$biomarkers, function(bm) {
      model_fit <- fit_logistic(
        variables = h_rsp_to_logistic_variables(variables, bm),
        data = data,
        response_definition = control$response_definition
      )
      result <- h_logistic_simple_terms(
        x = bm,
        fit_glm = model_fit,
        conf_level = control$conf_level
      )
      resp_vector <- if (inherits(model_fit, "glm")) {
        model_fit$model[[variables$rsp]]
      } else {
        as.logical(as.matrix(model_fit$y)[, "status"])
      }
      data.frame(
        # Dummy column needed downstream to create a nested header.
        biomarker = bm,
        biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
        n_tot = length(resp_vector),
        n_rsp = sum(resp_vector),
        prop = mean(resp_vector),
        or = as.numeric(result[1L, "odds_ratio"]),
        lcl = as.numeric(result[1L, "lcl"]),
        ucl = as.numeric(result[1L, "ucl"]),
        conf_level = conf_level,
        pval = as.numeric(result[1L, "pvalue"]),
        pval_label = pval_label,
        stringsAsFactors = FALSE
      )
    })
    do.call(rbind, args = c(l_result, make.row.names = FALSE))
  } else {
    data.frame(
      biomarker = variables$biomarkers,
      biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
      n_tot = 0L,
      n_rsp = 0L,
      prop = NA,
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      pval = NA,
      pval_label = pval_label,
      row.names = seq_along(variables$biomarkers),
      stringsAsFactors = FALSE
    )
  }
}

#' Formatting functions
#'
#' See below for the list of formatting functions created in `tern` to work with `rtables`.
#'
#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional
#' custom formats can be created via the [`formatters::sprintf_format()`] function.
#'
#' @family formatting functions
#' @name formatting_functions
NULL

#' Format fraction and percentage
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction together with ratio in percent.
#'
#' @param x (named `integer`)\cr vector with elements `num` and `denom`.
#' @param ... not used. Required for `rtables` interface.
#'
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
#'
#' @examples
#' format_fraction(x = c(num = 2L, denom = 3L))
#' format_fraction(x = c(num = 0L, denom = 3L))
#'
#' @family formatting functions
#' @export
format_fraction <- function(x, ...) {
  attr(x, "label") <- NULL

  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])

  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", round(x["num"] / x["denom"] * 100, 1), "%)"
    )
  }

  return(result)
}

#' Format fraction and percentage with fixed single decimal place
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction together with ratio in percent with fixed single decimal place.
#' Includes trailing zero in case of whole number percentages to always keep one decimal place.
#'
#' @inheritParams format_fraction
#'
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
#'
#' @examples
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L))
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L))
#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L))
#'
#' @family formatting functions
#' @export
format_fraction_fixed_dp <- function(x, ...) {
  attr(x, "label") <- NULL
  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])

  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)"
    )
  }
  return(result)
}

#' Format count and fraction
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a count together with fraction with special consideration when count is `0`.
#'
#' @param x (`numeric(2)`)\cr vector of length 2 with count and fraction, respectively.
#' @param ... not used. Required for `rtables` interface.
#'
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
#'
#' @examples
#' format_count_fraction(x = c(2, 0.6667))
#' format_count_fraction(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_count_fraction <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] == 0) {
    "0"
  } else {
    paste0(x[1], " (", round(x[2] * 100, 1), "%)")
  }

  return(result)
}

#' Format count and percentage with fixed single decimal place
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Formats a count together with fraction with special consideration when count is `0`.
#'
#' @inheritParams format_count_fraction
#'
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
#'
#' @examples
#' format_count_fraction_fixed_dp(x = c(2, 0.6667))
#' format_count_fraction_fixed_dp(x = c(2, 0.5))
#' format_count_fraction_fixed_dp(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_count_fraction_fixed_dp <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] == 0) {
    "0"
  } else if (.is_equal_float(x[2], 1)) {
    sprintf("%d (100%%)", x[1])
  } else {
    sprintf("%d (%.1f%%)", x[1], x[2] * 100)
  }

  return(result)
}

#' Format count and fraction with special case for count < 10
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a count together with fraction with special consideration when count is less than 10.
#'
#' @inheritParams format_count_fraction
#'
#' @return A string in the format `count (fraction %)`. If `count` is less than 10, only `count` is printed.
#'
#' @examples
#' format_count_fraction_lt10(x = c(275, 0.9673))
#' format_count_fraction_lt10(x = c(2, 0.6667))
#' format_count_fraction_lt10(x = c(9, 1))
#'
#' @family formatting functions
#' @export
format_count_fraction_lt10 <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] < 10) {
    paste0(x[1])
  } else {
    paste0(x[1], " (", round(x[2] * 100, 1), "%)")
  }

  return(result)
}

#' Format XX as a formatting function
#'
#' Translate a string where x and dots are interpreted as number place
#' holders, and others as formatting elements.
#'
#' @param str (`string`)\cr template.
#'
#' @return An `rtables` formatting function.
#'
#' @examples
#' test <- list(c(1.658, 0.5761), c(1e1, 785.6))
#'
#' z <- format_xx("xx (xx.x)")
#' sapply(test, z)
#'
#' z <- format_xx("xx.x - xx.x")
#' sapply(test, z)
#'
#' z <- format_xx("xx.x, incl. xx.x% NE")
#' sapply(test, z)
#'
#' @family formatting functions
#' @export
format_xx <- function(str) {
  # Find position in the string.
  positions <- gregexpr(pattern = "x+\\.x+|x+", text = str, perl = TRUE)
  x_positions <- regmatches(x = str, m = positions)[[1]]

  # Roundings depends on the number of x behind [.].
  roundings <- lapply(
    X = x_positions,
    function(x) {
      y <- strsplit(split = "\\.", x = x)[[1]]
      rounding <- function(x) {
        round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0))
      }
      return(rounding)
    }
  )

  rtable_format <- function(x, output) {
    values <- Map(y = x, fun = roundings, function(y, fun) fun(y))
    regmatches(x = str, m = positions)[[1]] <- values
    return(str)
  }

  return(rtable_format)
}

#' Format numeric values by significant figures
#'
#' Format numeric values to print with a specified number of significant figures.
#'
#' @param sigfig (`integer(1)`)\cr number of significant figures to display.
#' @param format (`string`)\cr the format label (string) to apply when printing the value. Decimal
#'   places in string are ignored in favor of formatting by significant figures. Formats options are:
#'   `"xx"`, `"xx / xx"`, `"(xx, xx)"`, `"xx - xx"`, and `"xx (xx)"`.
#' @param num_fmt (`string`)\cr numeric format modifiers to apply to the value. Defaults to `"fg"` for
#'   standard significant figures formatting - fixed (non-scientific notation) format (`"f"`)
#'   and `sigfig` equal to number of significant figures instead of decimal places (`"g"`). See the
#'   [formatC()] `format` argument for more options.
#'
#' @return An `rtables` formatting function.
#'
#' @examples
#' fmt_3sf <- format_sigfig(3)
#' fmt_3sf(1.658)
#' fmt_3sf(1e1)
#'
#' fmt_5sf <- format_sigfig(5)
#' fmt_5sf(0.57)
#' fmt_5sf(0.000025645)
#'
#' @family formatting functions
#' @export
format_sigfig <- function(sigfig, format = "xx", num_fmt = "fg") {
  checkmate::assert_integerish(sigfig)
  format <- gsub("xx\\.|xx\\.x+", "xx", format)
  checkmate::assert_choice(format, c("xx", "xx / xx", "(xx, xx)", "xx - xx", "xx (xx)"))
  function(x, ...) {
    if (!is.numeric(x)) stop("`format_sigfig` cannot be used for non-numeric values. Please choose another format.")
    num <- formatC(signif(x, digits = sigfig), digits = sigfig, format = num_fmt, flag = "#")
    num <- gsub("\\.$", "", num) # remove trailing "."

    format_value(num, format)
  }
}

#' Format fraction with lower threshold
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction when the second element of the input `x` is the fraction. It applies
#' a lower threshold, below which it is just stated that the fraction is smaller than that.
#'
#' @param threshold (`proportion`)\cr lower threshold.
#'
#' @return An `rtables` formatting function that takes numeric input `x` where the second
#'   element is the fraction that is formatted. If the fraction is above or equal to the threshold,
#'   then it is displayed in percentage. If it is positive but below the threshold, it returns,
#'   e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned.
#'
#' @examples
#' format_fun <- format_fraction_threshold(0.05)
#' format_fun(x = c(20, 0.1))
#' format_fun(x = c(2, 0.01))
#' format_fun(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_fraction_threshold <- function(threshold) {
  assert_proportion_value(threshold)
  string_below_threshold <- paste0("<", round(threshold * 100))
  function(x, ...) {
    assert_proportion_value(x[2], include_boundaries = TRUE)
    ifelse(
      x[2] > 0.01,
      round(x[2] * 100),
      ifelse(
        x[2] == 0,
        "0",
        string_below_threshold
      )
    )
  }
}

#' Format extreme values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' `rtables` formatting functions that handle extreme values.
#'
#' @param digits (`integer(1)`)\cr number of decimal places to display.
#'
#' @details For each input, apply a format to the specified number of `digits`. If the value is
#'    below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is
#'    above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2.
#'    If it is zero, then returns "0.00".
#'
#' @family formatting functions
#' @name extreme_format
NULL

#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings
#'  used in Formatting Functions. Returns a list with elements `threshold` and `format_string`.
#'
#' @return
#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds,
#'   and `format_string`, with thresholds formatted as strings.
#'
#' @examples
#' h_get_format_threshold(2L)
#'
#' @export
h_get_format_threshold <- function(digits = 2L) {
  checkmate::assert_integerish(digits)

  low_threshold <- 1 / (10 ^ digits) # styler: off
  high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off

  string_below_threshold <- paste0("<", low_threshold)
  string_above_threshold <- paste0(">", high_threshold)

  list(
    "threshold" = c(low = low_threshold, high = high_threshold),
    "format_string" = c(low = string_below_threshold, high = string_above_threshold)
  )
}

#' @describeIn extreme_format Internal helper function to apply a threshold format to a value.
#'   Creates a formatted string to be used in Formatting Functions.
#'
#' @param x (`numeric(1)`)\cr value to format.
#'
#' @return
#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation
#'   of the given value to the digit threshold, as a formatted string.
#'
#' @examples
#' h_format_threshold(0.001)
#' h_format_threshold(1000)
#'
#' @export
h_format_threshold <- function(x, digits = 2L) {
  if (is.na(x)) {
    return(x)
  }

  checkmate::assert_numeric(x, lower = 0)

  l_fmt <- h_get_format_threshold(digits)

  result <- if (x < l_fmt$threshold["low"] && 0 < x) {
    l_fmt$format_string["low"]
  } else if (x > l_fmt$threshold["high"]) {
    l_fmt$format_string["high"]
  } else {
    sprintf(fmt = paste0("%.", digits, "f"), x)
  }

  unname(result)
}

#' Format a single extreme value
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Create a formatting function for a single extreme value.
#'
#' @inheritParams extreme_format
#'
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value.
#'
#' @examples
#' format_fun <- format_extreme_values(2L)
#' format_fun(x = 0.127)
#' format_fun(x = Inf)
#' format_fun(x = 0)
#' format_fun(x = 0.009)
#'
#' @family formatting functions
#' @export
format_extreme_values <- function(digits = 2L) {
  function(x, ...) {
    checkmate::assert_scalar(x, na.ok = TRUE)

    h_format_threshold(x = x, digits = digits)
  }
}

#' Format extreme values part of a confidence interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formatting Function for extreme values part of a confidence interval. Values
#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2.
#'
#' @inheritParams extreme_format
#'
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme
#'   values confidence interval.
#'
#' @examples
#' format_fun <- format_extreme_values_ci(2L)
#' format_fun(x = c(0.127, Inf))
#' format_fun(x = c(0, 0.009))
#'
#' @family formatting functions
#' @export
format_extreme_values_ci <- function(digits = 2L) {
  function(x, ...) {
    checkmate::assert_vector(x, len = 2)
    l_result <- h_format_threshold(x = x[1], digits = digits)
    h_result <- h_format_threshold(x = x[2], digits = digits)

    paste0("(", l_result, ", ", h_result, ")")
  }
}

#' Format automatically using data significant digits
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formatting function for the majority of default methods used in [analyze_vars()].
#' For non-derived values, the significant digits of data is used (e.g. range), while derived
#' values have one more digits (measure of location and dispersion like mean, standard deviation).
#' This function can be called internally with "auto" like, for example,
#' `.formats = c("mean" = "auto")`. See details to see how this works with the inner function.
#'
#' @param dt_var (`numeric`)\cr variable data the statistics were calculated from. Used only to
#'   find significant digits. In [analyze_vars] this comes from `.df_row` (see
#'   [rtables::additional_fun_params]), and it is the row data after the above row splits. No
#'   column split is considered.
#' @param x_stat (`string`)\cr string indicating the current statistical method used.
#'
#' @return A string that `rtables` prints in a table cell.
#'
#' @details
#' The internal function is needed to work with `rtables` default structure for
#' format functions, i.e. `function(x, ...)`, where is x are results from statistical evaluation.
#' It can be more than one element (e.g. for `.stats = "mean_sd"`).
#'
#' @examples
#' x_todo <- c(0.001, 0.2, 0.0011000, 3, 4)
#' res <- c(mean(x_todo[1:3]), sd(x_todo[1:3]))
#'
#' # x is the result coming into the formatting function -> res!!
#' format_auto(dt_var = x_todo, x_stat = "mean_sd")(x = res)
#' format_auto(x_todo, "range")(x = range(x_todo))
#' no_sc_x <- c(0.0000001, 1)
#' format_auto(no_sc_x, "range")(x = no_sc_x)
#'
#' @family formatting functions
#' @export
format_auto <- function(dt_var, x_stat) {
  function(x = "", ...) {
    checkmate::assert_numeric(x, min.len = 1)
    checkmate::assert_numeric(dt_var, min.len = 1)
    # Defaults - they may be a param in the future
    der_stats <- c(
      "mean", "sd", "se", "median", "geom_mean", "quantiles", "iqr",
      "mean_sd", "mean_se", "mean_se", "mean_ci", "mean_sei", "mean_sdi",
      "median_ci"
    )
    nonder_stats <- c("n", "range", "min", "max")

    # Safenet for miss-modifications
    stopifnot(length(intersect(der_stats, nonder_stats)) == 0) # nolint
    checkmate::assert_choice(x_stat, c(der_stats, nonder_stats))

    # Finds the max number of digits in data
    detect_dig <- vapply(dt_var, count_decimalplaces, FUN.VALUE = numeric(1)) %>%
      max()

    if (x_stat %in% der_stats) {
      detect_dig <- detect_dig + 1
    }

    # Render input
    str_vals <- formatC(x, digits = detect_dig, format = "f")
    def_fmt <- get_formats_from_stats(x_stat)[[x_stat]]
    str_fmt <- str_extract(def_fmt, invert = FALSE)[[1]]
    if (length(str_fmt) != length(str_vals)) {
      stop(
        "Number of inserted values as result (", length(str_vals),
        ") is not the same as there should be in the default tern formats for ",
        x_stat, " (-> ", def_fmt, " needs ", length(str_fmt), " values). ",
        "See tern_default_formats to check all of them."
      )
    }

    # Squashing them together
    inv_str_fmt <- str_extract(def_fmt, invert = TRUE)[[1]]
    stopifnot(length(inv_str_fmt) == length(str_vals) + 1) # nolint

    out <- vector("character", length = length(inv_str_fmt) + length(str_vals))
    is_even <- seq_along(out) %% 2 == 0
    out[is_even] <- str_vals
    out[!is_even] <- inv_str_fmt

    return(paste0(out, collapse = ""))
  }
}

# Utility function that could be useful in general
str_extract <- function(string, pattern = "xx|xx\\.|xx\\.x+", invert = FALSE) {
  regmatches(string, gregexpr(pattern, string), invert = invert)
}

# Helper function
count_decimalplaces <- function(dec) {
  if (is.na(dec)) {
    return(0)
  } else if (abs(dec - round(dec)) > .Machine$double.eps^0.5) { # For precision
    nchar(strsplit(format(dec, scientific = FALSE, trim = FALSE), ".", fixed = TRUE)[[1]][[2]])
  } else {
    return(0)
  }
}

#' Apply automatic formatting
#'
#' Checks if any of the listed formats in `.formats` are `"auto"`, and replaces `"auto"` with
#' the correct implementation of `format_auto` for the given statistics, data, and variable.
#'
#' @inheritParams argument_convention
#' @param x_stats (named `list`)\cr a named list of statistics where each element corresponds
#'   to an element in `.formats`, with matching names.
#'
#' @keywords internal
apply_auto_formatting <- function(.formats, x_stats, .df_row, .var) {
  is_auto_fmt <- vapply(.formats, function(ii) is.character(ii) && ii == "auto", logical(1))
  if (any(is_auto_fmt)) {
    auto_stats <- x_stats[is_auto_fmt]
    var_df <- .df_row[[.var]] # xxx this can be extended for the WHOLE data or single facets
    .formats[is_auto_fmt] <- lapply(names(auto_stats), format_auto, dt_var = var_df)
  }
  .formats
}

#' Get default statistical methods and their associated formats, labels, and indent modifiers
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Utility functions to get valid statistic methods for different method groups
#' (`.stats`) and their associated formats (`.formats`), labels (`.labels`), and indent modifiers
#' (`.indent_mods`). This utility is used across `tern`, but some of its working principles can be
#' seen in [analyze_vars()]. See notes to understand why this is experimental.
#'
#' @param stats (`character`)\cr statistical methods to return defaults for.
#' @param levels_per_stats (named `list` of `character` or `NULL`)\cr named list where the name of each element is a
#'   statistic from `stats` and each element is the levels of a `factor` or `character` variable (or variable name),
#'   each corresponding to a single row, for which the named statistic should be calculated for. If a statistic is only
#'   calculated once (one row), the element can be either `NULL` or the name of the statistic. Each list element will be
#'   flattened such that the names of the list elements returned by the function have the format `statistic.level` (or
#'   just `statistic` for statistics calculated for a single row). Defaults to `NULL`.
#' @param tern_defaults (`list` or `vector`)\cr defaults to use to fill in missing values if no user input is given.
#'   Must be of the same type as the values that are being filled in (e.g. indentation must be integers).
#'
#' @details
#' Current choices for `type` are `counts` and `numeric` for [analyze_vars()] and affect `get_stats()`.
#'
#' @note
#' These defaults are experimental because we use the names of functions to retrieve the default
#' statistics. This should be generalized in groups of methods according to more reasonable groupings.
#'
#' @name default_stats_formats_labels
NULL

#' @describeIn default_stats_formats_labels Get statistics available for a given method
#'   group (analyze function). To check available defaults see `tern::tern_default_stats` list.
#'
#' @param method_groups (`character`)\cr indicates the statistical method group (`tern` analyze function)
#'   to retrieve default statistics for. A character vector can be used to specify more than one statistical
#'   method group.
#' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. If custom statistical
#'   functions are used, `stats_in` needs to have them in too.
#' @param custom_stats_in (`character`)\cr custom statistics to add to the default statistics.
#' @param add_pval (`flag`)\cr should `"pval"` (or `"pval_counts"` if `method_groups` contains
#'   `"analyze_vars_counts"`) be added to the statistical methods?
#'
#' @return
#' * `get_stats()` returns a `character` vector of statistical methods.
#'
#' @examples
#' # analyze_vars is numeric
#' num_stats <- get_stats("analyze_vars_numeric") # also the default
#'
#' # Other type
#' cnt_stats <- get_stats("analyze_vars_counts")
#'
#' # Weirdly taking the pval from count_occurrences
#' only_pval <- get_stats("count_occurrences", add_pval = TRUE, stats_in = "pval")
#'
#' # All count_occurrences
#' all_cnt_occ <- get_stats("count_occurrences")
#'
#' # Multiple
#' get_stats(c("count_occurrences", "analyze_vars_counts"))
#'
#' @export
get_stats <- function(method_groups = "analyze_vars_numeric",
                      stats_in = NULL, custom_stats_in = NULL, add_pval = FALSE) {
  checkmate::assert_character(method_groups)
  checkmate::assert_character(stats_in, null.ok = TRUE)
  checkmate::assert_character(custom_stats_in, null.ok = TRUE)
  checkmate::assert_flag(add_pval)

  # Default is still numeric
  if (any(method_groups == "analyze_vars")) {
    method_groups[method_groups == "analyze_vars"] <- "analyze_vars_numeric"
  }

  type_tmp <- ifelse(any(grepl("counts$", method_groups)), "counts", "numeric") # for pval checks

  # Defaults for loop
  out <- NULL

  # Loop for multiple method groups
  for (mgi in method_groups) {
    if (mgi %in% names(tern_default_stats)) {
      out_tmp <- tern_default_stats[[mgi]]
    } else {
      stop("The selected method group (", mgi, ") has no default statistical method.")
    }
    out <- unique(c(out, out_tmp))
  }

  # Add custom stats
  out <- c(out, custom_stats_in)

  # If you added pval to the stats_in you certainly want it
  if (!is.null(stats_in) && any(grepl("^pval", stats_in))) {
    stats_in_pval_value <- stats_in[grepl("^pval", stats_in)]

    # Must be only one value between choices
    checkmate::assert_choice(stats_in_pval_value, c("pval", "pval_counts", "pvalue"))

    # Mismatch with counts and numeric
    if (any(grepl("counts", method_groups)) && stats_in_pval_value != "pval_counts" ||
      any(grepl("numeric", method_groups)) && stats_in_pval_value != "pval") { # nolint
      stop(
        "Inserted p-value (", stats_in_pval_value, ") is not valid for type ",
        type_tmp, ". Use ", paste(ifelse(stats_in_pval_value == "pval", "pval_counts", "pval")),
        " instead."
      )
    }

    # Lets add it even if present (thanks to unique)
    add_pval <- TRUE
  }

  # Mainly used in "analyze_vars" but it could be necessary elsewhere
  if (isTRUE(add_pval)) {
    if (any(grepl("counts", method_groups))) {
      out <- unique(c(out, "pval_counts"))
    } else {
      out <- unique(c(out, "pval"))
    }
  }

  # Filtering for stats_in (character vector)
  if (!is.null(stats_in)) {
    out <- intersect(stats_in, out) # It orders them too
  }

  # If intersect did not find matches (and no pval?) -> error
  if (length(out) == 0) {
    stop(
      "The selected method group(s) (", paste0(method_groups, collapse = ", "), ")",
      " do not have the required default statistical methods:\n",
      paste0(stats_in, collapse = " ")
    )
  }

  out
}

#' @describeIn default_stats_formats_labels Get statistical *names* available for a given method
#'   group (analyze function). Please use the `s_*` functions to get the statistical names.
#' @param stat_results (`list`)\cr list of statistical results. It should be used close to the end of
#'   a statistical function. See examples for a structure with two statistical results and two groups.
#' @param stat_names_in (`character`)\cr custom modification of statistical values.
#'
#' @return
#' * `get_stat_names()` returns a named list of `character` vectors, indicating the names of
#'    statistical outputs.
#'
#' @examples
#' stat_results <- list(
#'   "n" = list("M" = 1, "F" = 2),
#'   "count_fraction" = list("M" = c(1, 0.2), "F" = c(2, 0.1))
#' )
#' get_stat_names(stat_results)
#' get_stat_names(stat_results, list("n" = "argh"))
#'
#' @export
get_stat_names <- function(stat_results, stat_names_in = NULL) {
  checkmate::assert_character(names(stat_results), min.len = 1)
  checkmate::assert_list(stat_names_in, null.ok = TRUE)

  stat_nms_from_stats <- lapply(stat_results, function(si) {
    nm <- names(si)
    if (is.null(nm)) {
      nm <- rep(NA_character_, length(si)) # no statistical names
    }
    nm
  })

  # Modify some with custom stat names
  if (!is.null(stat_names_in)) {
    # Stats is the main
    common_names <- intersect(names(stat_nms_from_stats), names(stat_names_in))
    stat_nms_from_stats[common_names] <- stat_names_in[common_names]
  }

  stat_nms_from_stats
}

# Utility function used to separate custom stats (user-defined functions) from defaults
.split_std_from_custom_stats <- function(stats_in) {
  out <- list(default_stats = NULL, custom_stats = NULL, all_stats = NULL)
  if (is.list(stats_in)) {
    is_custom_fnc <- sapply(stats_in, is.function)
    checkmate::assert_list(stats_in[is_custom_fnc], types = "function", names = "named")
    out[["custom_stats"]] <- stats_in[is_custom_fnc]
    out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc])
    all_stats <- names(stats_in) # to keep the order
    all_stats[!is_custom_fnc] <- out[["default_stats"]]
    out[["all_stats"]] <- all_stats
  } else {
    out[["default_stats"]] <- out[["all_stats"]] <- stats_in
  }
  out
}

# Utility function to apply statistical functions
.apply_stat_functions <- function(default_stat_fnc, custom_stat_fnc_list, args_list) {
  # Default checks
  checkmate::assert_function(default_stat_fnc)
  checkmate::assert_list(custom_stat_fnc_list, types = "function", null.ok = TRUE, names = "named")
  checkmate::assert_list(args_list)

  # Checking custom stats have same formals
  if (!is.null(custom_stat_fnc_list)) {
    fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]]
    for (fnc in custom_stat_fnc_list) {
      if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) {
        stop(
          "The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ",
          "as the default statistical function. In this case your custom function has ", names(formals(fnc))[[1]],
          " as first parameter, while the default function has ", fundamental_call_to_data, "."
        )
      }
      if (!any(names(formals(fnc)) == "...")) {
        stop(
          "The custom statistical function needs to have `...` as a parameter to accept additional arguments. ",
          "In this case your custom function does not have `...`."
        )
      }
    }
  }

  # Applying
  out_default <- do.call(default_stat_fnc, args = args_list)
  out_custom <- lapply(custom_stat_fnc_list, function(fnc) do.call(fnc, args = args_list))

  # Merging
  c(out_default, out_custom)
}

#' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics.
#'   To check available defaults see list `tern::tern_default_formats`.
#'
#' @param formats_in (named `vector`)\cr custom formats to use instead of defaults. Can be a character vector with
#'   values from [formatters::list_valid_format_labels()] or custom format functions. Defaults to `NULL` for any rows
#'   with no value is provided. See Details.
#'
#' @details if `formats_in` is `"default"`, instead of populating the
#'   return value with tern defaults, the return value will specify
#'   the `"default"` format for each element. This is useful
#'   primarily when formatting behavior should be inherited from a
#'   format specified via the `format` or `formats_var` argument to
#'   `analyze`.
#'
#' @return
#' * `get_formats_from_stats()` returns a named list of formats as strings or functions.
#'
#' @note Formats in `tern` and `rtables` can be functions that take in the table cell value and
#'   return a string. This is well documented in `vignette("custom_appearance", package = "rtables")`.
#'
#' @examples
#' # Defaults formats
#' get_formats_from_stats(num_stats)
#' get_formats_from_stats(cnt_stats)
#' get_formats_from_stats(only_pval)
#' get_formats_from_stats(all_cnt_occ)
#'
#' # Addition of customs
#' get_formats_from_stats(all_cnt_occ, formats_in = c("fraction" = c("xx")))
#' get_formats_from_stats(all_cnt_occ, formats_in = list("fraction" = c("xx.xx", "xx")))
#'
#' @seealso [formatting_functions]
#'
#' @export
get_formats_from_stats <- function(stats,
                                   formats_in = NULL,
                                   levels_per_stats = NULL,
                                   tern_defaults = tern_default_formats) {
  checkmate::assert_character(stats, min.len = 1)
  # It may be a list if there is a function in the formats
  if (checkmate::test_list(formats_in, null.ok = TRUE)) {
    checkmate::assert_list(formats_in, null.ok = TRUE)
    # Or it may be a vector of characters
  } else {
    checkmate::assert_character(formats_in, null.ok = TRUE)
  }
  checkmate::assert_list(levels_per_stats, null.ok = TRUE)

  # If unnamed formats given as formats_in and same number of stats, use one format per stat
  if (
    !is.null(formats_in) && length(formats_in) == length(stats) &&
      is.null(names(formats_in)) && is.null(levels_per_stats)
  ) {
    out <- as.list(formats_in) %>% setNames(stats)
    return(out)
  }

  full_default <- identical(formats_in, "default")

  if (full_default) {
    ## act as if we got NULL to get the right structure for return value
    ## then replace each element with "default" below
    formats_in <- NULL
  }

  # If levels_per_stats not given, assume one row per statistic
  if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats)

  # Apply custom formats
  out <- .fill_in_vals_by_stats(levels_per_stats, formats_in, tern_defaults)

  if (full_default) {
    out <- setNames(rep("default", length(out)), names(out))
  } else {
    # Default to NULL if no format
    which_null <- names(which(sapply(levels_per_stats, is.null)))
    levels_per_stats[which_null] <- which_null
    case_input_is_not_stat <- unlist(out, use.names = FALSE) == unlist(levels_per_stats, use.names = FALSE)
    out[names(out) == out | case_input_is_not_stat] <- list(NULL)
  }

  out
}

#' @describeIn default_stats_formats_labels Get labels corresponding to a list of statistics.
#'   To check for available defaults see list `tern::tern_default_labels`.
#'
#' @param labels_in (named `character`)\cr custom labels to use instead of defaults. If no value is provided, the
#'   variable level (if rows correspond to levels of a variable) or statistic name will be used as label.
#' @param label_attr_from_stats (named `list`)\cr if `labels_in = NULL`, then this will be used instead. It is a list
#'   of values defined in statistical functions as default labels. Values are ignored if `labels_in` is provided or `""`
#'   values are provided.
#'
#' @return
#' * `get_labels_from_stats()` returns a named list of labels as strings.
#'
#' @examples
#' # Defaults labels
#' get_labels_from_stats(num_stats)
#' get_labels_from_stats(cnt_stats)
#' get_labels_from_stats(only_pval)
#' get_labels_from_stats(all_cnt_occ)
#'
#' # Addition of customs
#' get_labels_from_stats(all_cnt_occ, labels_in = c("fraction" = "Fraction"))
#' get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions")))
#'
#' @export
get_labels_from_stats <- function(stats,
                                  labels_in = NULL,
                                  levels_per_stats = NULL,
                                  label_attr_from_stats = NULL,
                                  tern_defaults = tern_default_labels) {
  checkmate::assert_character(stats, min.len = 1)

  # If labels_in is NULL, use label_attr_from_stats
  if (is.null(labels_in)) {
    labels_in <- label_attr_from_stats
    labels_in <- label_attr_from_stats[
      nzchar(label_attr_from_stats) &
        !sapply(label_attr_from_stats, is.null) &
        !is.na(label_attr_from_stats)
    ]
  }

  # It may be a list
  if (checkmate::test_list(labels_in, null.ok = TRUE)) {
    checkmate::assert_list(labels_in, null.ok = TRUE)
    # Or it may be a vector of characters
  } else {
    checkmate::assert_character(labels_in, null.ok = TRUE)
  }
  checkmate::assert_list(levels_per_stats, null.ok = TRUE)

  # If unnamed labels given as labels_in and same number of stats, use one label per stat
  if (
    !is.null(labels_in) && length(labels_in) == length(stats) &&
      is.null(names(labels_in)) && is.null(levels_per_stats)
  ) {
    out <- as.list(labels_in) %>% setNames(stats)
    return(out)
  }

  # If levels_per_stats not given, assume one row per statistic
  if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats)

  # Apply custom labels
  out <- .fill_in_vals_by_stats(levels_per_stats, labels_in, tern_defaults)
  out
}

#' @describeIn default_stats_formats_labels Get row indent modifiers corresponding to a list of statistics/rows.
#'
#' @param indents_in (named `integer`)\cr custom row indent modifiers to use instead of defaults. Defaults to `0L` for
#'   all values.
#' @param row_nms `r lifecycle::badge("deprecated")` Deprecation cycle started. See the `levels_per_stats` parameter
#'   for details.
#'
#' @return
#' * `get_indents_from_stats()` returns a named list of indentation modifiers as integers.
#'
#' @examples
#' get_indents_from_stats(all_cnt_occ, indents_in = 3L)
#' get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L))
#' get_indents_from_stats(
#'   all_cnt_occ,
#'   indents_in = list(a = 2L, count.a = 1L, count.b = 5L)
#' )
#'
#' @export
get_indents_from_stats <- function(stats,
                                   indents_in = NULL,
                                   levels_per_stats = NULL,
                                   tern_defaults = as.list(rep(0L, length(stats))) %>% setNames(stats),
                                   row_nms = lifecycle::deprecated()) {
  checkmate::assert_character(stats, min.len = 1)
  # It may be a list
  if (checkmate::test_list(indents_in, null.ok = TRUE)) {
    checkmate::assert_list(indents_in, null.ok = TRUE)
    # Or it may be a vector of integers
  } else {
    checkmate::assert_integerish(indents_in, null.ok = TRUE)
  }
  checkmate::assert_list(levels_per_stats, null.ok = TRUE)

  # If levels_per_stats not given, assume one row per statistic
  if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats)

  # Single indentation level for all rows
  if (is.null(names(indents_in)) && length(indents_in) == 1) {
    out <- rep(indents_in, length(levels_per_stats %>% unlist()))
    return(out)
  }

  # Apply custom indentation
  out <- .fill_in_vals_by_stats(levels_per_stats, indents_in, tern_defaults)
  out
}

# Function to loop over each stat and levels to set correct values
.fill_in_vals_by_stats <- function(levels_per_stats, user_in, tern_defaults) {
  out <- list()

  for (stat_i in names(levels_per_stats)) {
    # Get all levels of the statistic
    all_lvls <- levels_per_stats[[stat_i]]

    if ((length(all_lvls) == 1 && all_lvls == stat_i) || is.null(all_lvls)) { # One row per statistic
      out[[stat_i]] <- if (stat_i %in% names(user_in)) { # 1. Check for stat_i in user input
        user_in[[stat_i]]
      } else if (stat_i %in% names(tern_defaults)) { # 2. Check for stat_i in tern defaults
        tern_defaults[[stat_i]]
      } else { # 3. Otherwise stat_i
        stat_i
      }
    } else { # One row per combination of variable level and statistic
      # Loop over levels for each statistic
      for (lev_i in all_lvls) {
        # Construct row name (stat_i.lev_i)
        row_nm <- paste(stat_i, lev_i, sep = ".")

        out[[row_nm]] <- if (row_nm %in% names(user_in)) { # 1. Check for stat_i.lev_i in user input
          user_in[[row_nm]]
        } else if (lev_i %in% names(user_in)) { # 2. Check for lev_i in user input
          user_in[[lev_i]]
        } else if (stat_i %in% names(user_in)) { # 3. Check for stat_i in user input
          user_in[[stat_i]]
        } else if (lev_i %in% names(tern_defaults)) { # 4. Check for lev_i in tern defaults (only used for labels)
          tern_defaults[[lev_i]]
        } else if (stat_i %in% names(tern_defaults)) { # 5. Check for stat_i in tern defaults
          tern_defaults[[stat_i]]
        } else { # 6. Otherwise lev_i
          lev_i
        }
      }
    }
  }

  out
}

# Custom unlist function to retain NULL as "NULL" or NA
.unlist_keep_nulls <- function(lst, null_placeholder = "NULL", recursive = FALSE) {
  lapply(lst, function(x) if (is.null(x)) null_placeholder else x) %>%
    unlist(recursive = recursive)
}

#' Update labels according to control specifications
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Given a list of statistic labels and and a list of control parameters, updates labels with a relevant
#' control specification. For example, if control has element `conf_level` set to `0.9`, the default
#' label for statistic `mean_ci` will be updated to `"Mean 90% CI"`. Any labels that are supplied
#' via `labels_custom` will not be updated regardless of `control`.
#'
#' @param labels_default (named `character`)\cr a named vector of statistic labels to modify
#'   according to the control specifications. Labels that are explicitly defined in `labels_custom` will
#'   not be affected.
#' @param labels_custom (named `character`)\cr named vector of labels that are customized by
#'   the user and should not be affected by `control`.
#' @param control (named `list`)\cr list of control parameters to apply to adjust default labels.
#'
#' @return A named character vector of labels with control specifications applied to relevant labels.
#'
#' @examples
#' control <- list(conf_level = 0.80, quantiles = c(0.1, 0.83), test_mean = 0.57)
#' get_labels_from_stats(c("mean_ci", "quantiles", "mean_pval")) %>%
#'   labels_use_control(control = control)
#'
#' @export
labels_use_control <- function(labels_default, control, labels_custom = NULL) {
  if ("conf_level" %in% names(control)) {
    labels_default <- sapply(
      names(labels_default),
      function(x) {
        if (!x %in% names(labels_custom)) {
          gsub(labels_default[[x]], pattern = "[0-9]+% CI", replacement = f_conf_level(control[["conf_level"]]))
        } else {
          labels_default[[x]]
        }
      }
    )
  }
  if ("quantiles" %in% names(control) && "quantiles" %in% names(labels_default) &&
    !"quantiles" %in% names(labels_custom)) { # nolint
    labels_default["quantiles"] <- gsub(
      "[0-9]+% and [0-9]+", paste0(control[["quantiles"]][1] * 100, "% and ", control[["quantiles"]][2] * 100, ""),
      labels_default["quantiles"]
    )
  }
  if ("quantiles" %in% names(control) && "quantiles_lower" %in% names(labels_default) &&
    !"quantiles_lower" %in% names(labels_custom)) { # nolint
    labels_default["quantiles_lower"] <- gsub(
      "[0-9]+%-ile", paste0(control[["quantiles"]][1] * 100, "%-ile", ""),
      labels_default["quantiles_lower"]
    )
  }
  if ("quantiles" %in% names(control) && "quantiles_upper" %in% names(labels_default) &&
    !"quantiles_upper" %in% names(labels_custom)) { # nolint
    labels_default["quantiles_upper"] <- gsub(
      "[0-9]+%-ile", paste0(control[["quantiles"]][2] * 100, "%-ile", ""),
      labels_default["quantiles_upper"]
    )
  }
  if ("test_mean" %in% names(control) && "mean_pval" %in% names(labels_default) &&
    !"mean_pval" %in% names(labels_custom)) { # nolint
    labels_default["mean_pval"] <- gsub(
      "p-value \\(H0: mean = [0-9\\.]+\\)", f_pval(control[["test_mean"]]), labels_default["mean_pval"]
    )
  }

  labels_default
}

# tern_default_stats -----------------------------------------------------------
#' @describeIn default_stats_formats_labels Named list of available statistics by method group for `tern`.
#'
#' @format
#' * `tern_default_stats` is a named list of available statistics, with each element
#'   named for their corresponding statistical method group.
#'
#' @export
tern_default_stats <- list(
  abnormal = c("fraction"),
  abnormal_by_baseline = c("fraction"),
  abnormal_by_marked = c("count_fraction", "count_fraction_fixed_dp"),
  abnormal_by_worst_grade = c("count_fraction", "count_fraction_fixed_dp"),
  abnormal_lab_worsen_by_baseline = c("fraction"),
  analyze_patients_exposure_in_cols = c("n_patients", "sum_exposure"),
  analyze_vars_counts = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "fraction", "n_blq"),
  analyze_vars_numeric = c(
    "n", "sum", "mean", "sd", "se", "mean_sd", "mean_se", "mean_ci", "mean_sei", "mean_sdi", "mean_pval",
    "median", "mad", "median_ci", "quantiles", "iqr", "range", "min", "max", "median_range", "cv",
    "geom_mean", "geom_sd", "geom_mean_sd", "geom_mean_ci", "geom_cv",
    "median_ci_3d",
    "mean_ci_3d", "geom_mean_ci_3d"
  ),
  count_cumulative = c("count_fraction"),
  count_missed_doses = c("n", "count_fraction"),
  count_occurrences = c("count", "count_fraction", "count_fraction_fixed_dp", "fraction"),
  count_occurrences_by_grade = c("count_fraction", "count_fraction_fixed_dp"),
  count_patients_with_event = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
  count_patients_with_flags = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
  count_values = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
  coxph_pairwise = c("pvalue", "hr", "hr_ci", "n_tot", "n_tot_events"),
  estimate_incidence_rate = c("person_years", "n_events", "rate", "rate_ci", "n_unique", "n_rate"),
  estimate_multinomial_response = c("n_prop", "prop_ci"),
  estimate_odds_ratio = c("or_ci", "n_tot"),
  estimate_proportion = c("n_prop", "prop_ci"),
  estimate_proportion_diff = c("diff", "diff_ci"),
  summarize_ancova = c("n", "lsmean", "lsmean_diff", "lsmean_diff_ci", "pval"),
  summarize_coxreg = c("n", "hr", "ci", "pval", "pval_inter"),
  summarize_glm_count = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
  summarize_num_patients = c("unique", "nonunique", "unique_count"),
  summarize_patients_events_in_cols = c("unique", "all"),
  surv_time = c(
    "median", "median_ci", "median_ci_3d", "quantiles",
    "quantiles_lower", "quantiles_upper", "range_censor", "range_event", "range"
  ),
  surv_timepoint = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "event_free_rate_3d"),
  surv_timepoint_diff = c("rate_diff", "rate_diff_ci", "ztest_pval", "rate_diff_ci_3d"),
  tabulate_rsp_biomarkers = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
  tabulate_rsp_subgroups = c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval", "riskdiff"),
  tabulate_survival_biomarkers = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
  tabulate_survival_subgroups = c("n_tot_events", "n_events", "n_tot", "n", "median", "hr", "ci", "pval", "riskdiff"),
  test_proportion_diff = c("pval")
)

# tern_default_formats ---------------------------------------------------------
#' @describeIn default_stats_formats_labels Named vector of default formats for `tern`.
#'
#' @format
#' * `tern_default_formats` is a named vector of available default formats, with each element
#'   named for their corresponding statistic.
#'
#' @export
tern_default_formats <- c(
  ci = list(format_extreme_values_ci(2L)),
  count = "xx.",
  count_fraction = format_count_fraction,
  count_fraction_fixed_dp = format_count_fraction_fixed_dp,
  cv = "xx.x",
  event_free_rate = "xx.xx",
  fraction = format_fraction_fixed_dp,
  geom_cv = "xx.x",
  geom_mean = "xx.x",
  geom_mean_ci = "(xx.xx, xx.xx)",
  geom_mean_ci_3d = "xx.xx (xx.xx - xx.xx)",
  geom_mean_sd = "xx.x (xx.x)",
  geom_sd = "xx.x",
  hr = list(format_extreme_values(2L)),
  hr_ci = "(xx.xx, xx.xx)",
  hr_ci_3d = "xx.xx (xx.xx - xx.xx)",
  iqr = "xx.x",
  lsmean = "xx.xx",
  lsmean_diff = "xx.xx",
  lsmean_diff_ci = "(xx.xx, xx.xx)",
  mad = "xx.x",
  max = "xx.x",
  mean = "xx.x",
  mean_ci = "(xx.xx, xx.xx)",
  mean_ci_3d = "xx.xx (xx.xx - xx.xx)",
  mean_pval = "x.xxxx | (<0.0001)",
  mean_sd = "xx.x (xx.x)",
  mean_sdi = "(xx.xx, xx.xx)",
  mean_se = "xx.x (xx.x)",
  mean_sei = "(xx.xx, xx.xx)",
  median = "xx.x",
  median_ci = "(xx.xx, xx.xx)",
  median_ci_3d = "xx.xx (xx.xx - xx.xx)",
  median_range = "xx.x (xx.x - xx.x)",
  min = "xx.x",
  n = "xx.",
  n_blq = "xx.",
  n_events = "xx",
  n_patients = "xx (xx.x%)",
  n_prop = "xx (xx.x%)",
  n_rate = "xx (xx.x)",
  n_rsp = "xx",
  n_tot = "xx",
  n_tot_events = "xx",
  n_unique = "xx",
  nonunique = "xx",
  or = list(format_extreme_values(2L)),
  or_ci = "xx.xx (xx.xx - xx.xx)",
  person_years = "xx.x",
  prop = "xx.x%",
  prop_ci = "(xx.x, xx.x)",
  pt_at_risk = "xx",
  pval = "x.xxxx | (<0.0001)",
  pvalue = "x.xxxx | (<0.0001)",
  pval_counts = "x.xxxx | (<0.0001)",
  quantiles = "xx.x - xx.x",
  quantiles_lower = "xx.xx (xx.xx - xx.xx)",
  quantiles_upper = "xx.xx (xx.xx - xx.xx)",
  range = "xx.x - xx.x",
  range_censor = "xx.x to xx.x",
  range_event = "xx.x to xx.x",
  rate = "xx.xxxx",
  rate_ci = "(xx.xxxx, xx.xxxx)",
  rate_diff = "xx.xx",
  rate_diff_ci = "(xx.xx, xx.xx)",
  rate_diff_ci_3d = format_xx("xx.xx (xx.xx, xx.xx)"),
  rate_ratio = "xx.xxxx",
  rate_ratio_ci = "(xx.xxxx, xx.xxxx)",
  rate_se = "xx.xx",
  riskdiff = "xx.x (xx.x - xx.x)",
  sd = "xx.x",
  se = "xx.x",
  sum = "xx.x",
  sum_exposure = "xx",
  unique = format_count_fraction_fixed_dp,
  unique_count = "xx",
  ztest_pval = "x.xxxx | (<0.0001)"
)

# tern_default_labels ----------------------------------------------------------
#' @describeIn default_stats_formats_labels Named `character` vector of default labels for `tern`.
#'
#' @format
#' * `tern_default_labels` is a named `character` vector of available default labels, with each element
#'   named for their corresponding statistic.
#'
#' @export
tern_default_labels <- c(
  cv = "CV (%)",
  iqr = "IQR",
  geom_cv = "CV % Geometric Mean",
  geom_mean = "Geometric Mean",
  geom_mean_sd = "Geometric Mean (SD)",
  geom_mean_ci = "Geometric Mean 95% CI",
  geom_mean_ci_3d = "Geometric Mean (95% CI)",
  geom_sd = "Geometric SD",
  mad = "Median Absolute Deviation",
  max = "Maximum",
  mean = "Mean",
  mean_ci = "Mean 95% CI",
  mean_ci_3d = "Mean (95% CI)",
  mean_pval = "Mean p-value (H0: mean = 0)",
  mean_sd = "Mean (SD)",
  mean_sdi = "Mean -/+ 1xSD",
  mean_se = "Mean (SE)",
  mean_sei = "Mean -/+ 1xSE",
  median = "Median",
  median_ci = "Median 95% CI",
  median_ci_3d = "Median (95% CI)",
  median_range = "Median (Min - Max)",
  min = "Minimum",
  n = "n",
  n_blq = "n_blq",
  nonunique = "Number of events",
  pval = "p-value (t-test)", # Default for numeric
  pval_counts = "p-value (chi-squared test)", # Default for counts
  quantiles = "25% and 75%-ile",
  quantiles_lower = "25%-ile (95% CI)",
  quantiles_upper = "75%-ile (95% CI)",
  range = "Min - Max",
  range_censor = "Range (censored)",
  range_event = "Range (event)",
  rate = "Adjusted Rate",
  rate_ratio = "Adjusted Rate Ratio",
  sd = "SD",
  se = "SE",
  sum = "Sum",
  unique = "Number of patients with at least one event"
)

#' @describeIn default_stats_formats_labels Quick function to retrieve default formats for summary statistics:
#'   [analyze_vars()] and [analyze_vars_in_cols()] principally.
#'
#' @param type (`string`)\cr `"numeric"` or `"counts"`.
#'
#' @return
#' * `summary_formats()` returns a named `vector` of default statistic formats for the given data type.
#'
#' @examples
#' summary_formats()
#' summary_formats(type = "counts", include_pval = TRUE)
#'
#' @export
summary_formats <- function(type = "numeric", include_pval = FALSE) {
  met_grp <- paste0(c("analyze_vars", type), collapse = "_")
  get_formats_from_stats(get_stats(met_grp, add_pval = include_pval))
}

#' @describeIn default_stats_formats_labels Quick function to retrieve default labels for summary statistics.
#'   Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`.
#'
#' @param include_pval (`flag`)\cr same as the `add_pval` argument in [get_stats()].
#'
#' @details
#' `summary_*` quick get functions for labels or formats uses `get_stats` and `get_labels_from_stats` or
#' `get_formats_from_stats` respectively to retrieve relevant information.
#'
#' @return
#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type.
#'
#' @examples
#' summary_labels()
#' summary_labels(type = "counts", include_pval = TRUE)
#'
#' @export
summary_labels <- function(type = "numeric", include_pval = FALSE) {
  met_grp <- paste0(c("analyze_vars", type), collapse = "_")
  get_labels_from_stats(get_stats(met_grp, add_pval = include_pval))
}

#' Subgroup treatment effect pattern (STEP) fit for survival outcome
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This fits the subgroup treatment effect pattern (STEP) models for a survival outcome. The treatment arm
#' variable must have exactly 2 levels, where the first one is taken as reference and the estimated
#' hazard ratios are for the comparison of the second level vs. the first one.
#'
#' The model which is fit is:
#'
#' `Surv(time, event) ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
#'
#' where `degree` is specified by `control_step()`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `character`)\cr list of analysis variables: needs `time`, `event`,
#'   `arm`, `biomarker`, and optional `covariates` and `strata`.
#' @param control (named `list`)\cr combined control list from [control_step()] and [control_coxph()].
#'
#' @return A matrix of class `step`. The first part of the columns describe the subgroup intervals used
#'   for the biomarker variable, including where the center of the intervals are and their bounds. The
#'   second part of the columns contain the estimates for the treatment arm comparison.
#'
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
#'
#' @seealso [control_step()] and [control_coxph()] for the available customization options.
#'
#' @examples
#' # Testing dataset with just two treatment arms.
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("ARM" = "Treatment Arm", "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' variables <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = c("AGE", "BMRKR2"),
#'   event = "is_event",
#'   time = "AVAL"
#' )
#'
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
#' step_matrix <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f
#' )
#' dim(step_matrix)
#' head(step_matrix)
#'
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
#' # models. Or specify different Cox regression options.
#' step_matrix2 <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f,
#'   control = c(control_coxph(conf_level = 0.9), control_step(degree = 2))
#' )
#'
#' # Use a global model with cubic interaction and only 5 points.
#' step_matrix3 <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f,
#'   control = c(control_coxph(), control_step(bandwidth = NULL, degree = 3, num_points = 5L))
#' )
#'
#' @export
fit_survival_step <- function(variables,
                              data,
                              control = c(control_step(), control_coxph())) {
  checkmate::assert_list(control)
  assert_df_with_variables(data, variables)
  data <- data[!is.na(data[[variables$biomarker]]), ]
  window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
  interval_center <- window_sel$interval[, "Interval Center"]
  form <- h_step_survival_formula(variables = variables, control = control)
  estimates <- if (is.null(control$bandwidth)) {
    h_step_survival_est(
      formula = form,
      data = data,
      variables = variables,
      x = interval_center,
      control = control
    )
  } else {
    tmp <- mapply(
      FUN = h_step_survival_est,
      x = interval_center,
      subset = as.list(as.data.frame(window_sel$sel)),
      MoreArgs = list(
        formula = form,
        data = data,
        variables = variables,
        control = control
      )
    )
    # Maybe we find a more elegant solution than this.
    rownames(tmp) <- c("n", "events", "loghr", "se", "ci_lower", "ci_upper")
    t(tmp)
  }
  result <- cbind(window_sel$interval, estimates)
  structure(
    result,
    class = c("step", "matrix"),
    variables = variables,
    control = control
  )
}

# summarize_glm_count ----------------------------------------------------------
#' Summarize Poisson negative binomial regression
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Summarize results of a Poisson negative binomial regression.
#' This can be used to analyze count and/or frequency data using a linear model.
#' It is specifically useful for analyzing count data (using the Poisson or Negative
#' Binomial distribution) that is result of a generalized linear model of one (e.g. arm) or more
#' covariates.
#'
#' @inheritParams h_glm_count
#' @inheritParams argument_convention
#' @param rate_mean_method (`character(1)`)\cr method used to estimate the mean odds ratio. Defaults to `emmeans`.
#'   see details for more information.
#' @param scale (`numeric(1)`)\cr linear scaling factor for rate and confidence intervals. Defaults to `1`.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("summarize_glm_count"), type = "sh")``
#'
#' @details
#' `summarize_glm_count()` uses `s_glm_count()` to calculate the statistics for the table. This
#' analysis function uses [h_glm_count()] to estimate the GLM with [stats::glm()] for Poisson and Quasi-Poisson
#' distributions or [MASS::glm.nb()] for Negative Binomial distribution. All methods assume a
#' logarithmic link function.
#'
#' At this point, rates and confidence intervals are estimated from the model using
#' either [emmeans::emmeans()] when `rate_mean_method = "emmeans"` or [h_ppmeans()]
#' when `rate_mean_method = "ppmeans"`.
#'
#' If a reference group is specified while building the table with `split_cols_by(ref_group)`,
#' no rate ratio or `p-value` are calculated. Otherwise, we use [emmeans::contrast()] to
#' calculate the rate ratio and `p-value` for the reference group. Values are always estimated
#' with `method = "trt.vs.ctrl"` and `ref` equal to the first `arm` value.
#'
#' @name summarize_glm_count
NULL

#' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_glm_count()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE")
#' anl$AVAL_f <- as.factor(anl$AVAL)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM", ref_group = "B: Placebo") %>%
#'   add_colcounts() %>%
#'   analyze_vars(
#'     "AVAL_f",
#'     var_labels = "Number of exacerbations per patient",
#'     .stats = c("count_fraction"),
#'     .formats = c("count_fraction" = "xx (xx.xx%)"),
#'     .labels = c("Number of exacerbations per patient")
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL),
#'     conf_level = 0.95,
#'     distribution = "poisson",
#'     rate_mean_method = "emmeans",
#'     var_labels = "Adjusted (P) exacerbation rate (per year)",
#'     table_names = "adjP",
#'     .stats = c("rate"),
#'     .labels = c(rate = "Rate")
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
#'     conf_level = 0.95,
#'     distribution = "quasipoisson",
#'     rate_mean_method = "ppmeans",
#'     var_labels = "Adjusted (QP) exacerbation rate (per year)",
#'     table_names = "adjQP",
#'     .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
#'     .labels = c(
#'       rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
#'       rate_ratio_ci = "Rate Ratio CI", pval = "p value"
#'     )
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
#'     conf_level = 0.95,
#'     distribution = "negbin",
#'     rate_mean_method = "emmeans",
#'     var_labels = "Adjusted (NB) exacerbation rate (per year)",
#'     table_names = "adjNB",
#'     .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
#'     .labels = c(
#'       rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
#'       rate_ratio_ci = "Rate Ratio CI", pval = "p value"
#'     )
#'   )
#'
#' build_table(lyt = lyt, df = anl)
#'
#' @export
summarize_glm_count <- function(lyt,
                                vars,
                                variables,
                                distribution,
                                conf_level,
                                rate_mean_method = c("emmeans", "ppmeans")[1],
                                weights = stats::weights,
                                scale = 1,
                                var_labels,
                                na_str = default_na_str(),
                                nested = TRUE,
                                ...,
                                show_labels = "visible",
                                table_names = vars,
                                .stats = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
                                .stat_names = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = list("rate_ci" = 1L, "rate_ratio_ci" = 1L, "pval" = 1L)) {
  checkmate::assert_choice(rate_mean_method, c("emmeans", "ppmeans"))

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    variables = list(variables), distribution = list(distribution), conf_level = list(conf_level),
    rate_mean_method = list(rate_mean_method), weights = list(weights), scale = list(scale),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_glm_count) <- c(formals(a_glm_count), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = vars,
    afun = a_glm_count,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' @describeIn summarize_glm_count Statistics function that produces a named list of results
#'   of the investigated Poisson model.
#'
#' @return
#' * `s_glm_count()` returns a named `list` of 5 statistics:
#'   * `n`: Count of complete sample size for the group.
#'   * `rate`: Estimated event rate per follow-up time.
#'   * `rate_ci`: Confidence level for estimated rate per follow-up time.
#'   * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm.
#'   * `rate_ratio_ci`: Confidence level for the rate ratio.
#'   * `pval`: p-value.
#'
#' @keywords internal
s_glm_count <- function(df,
                        .var,
                        .df_row,
                        .ref_group,
                        .in_ref_col,
                        variables,
                        distribution,
                        conf_level,
                        rate_mean_method,
                        weights,
                        scale = 1,
                        ...) {
  arm <- variables$arm

  y <- df[[.var]]
  smry_level <- as.character(unique(df[[arm]]))

  # ensure there is only 1 value
  checkmate::assert_scalar(smry_level)

  results <- h_glm_count(
    .var = .var,
    .df_row = .df_row,
    variables = variables,
    distribution = distribution,
    weights
  )

  if (rate_mean_method == "emmeans") {
    emmeans_smry <- summary(results$emmeans_fit, level = conf_level)
  } else if (rate_mean_method == "ppmeans") {
    emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level)
  }

  emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ]

  # This happens if there is a reference col. No Ratio is calculated?
  if (.in_ref_col) {
    list(
      n = length(y[!is.na(y)]),
      rate = formatters::with_label(
        ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate * scale),
        "Adjusted Rate"
      ),
      rate_ci = formatters::with_label(
        c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
        f_conf_level(conf_level)
      ),
      rate_ratio = formatters::with_label(numeric(), "Adjusted Rate Ratio"),
      rate_ratio_ci = formatters::with_label(numeric(), f_conf_level(conf_level)),
      pval = formatters::with_label(numeric(), "p-value")
    )
  } else {
    emmeans_contrasts <- emmeans::contrast(
      results$emmeans_fit,
      method = "trt.vs.ctrl",
      ref = grep(
        as.character(unique(.ref_group[[arm]])),
        as.data.frame(results$emmeans_fit)[[arm]]
      )
    )

    contrasts_smry <- summary(
      emmeans_contrasts,
      infer = TRUE,
      adjust = "none"
    )

    smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ]

    list(
      n = length(y[!is.na(y)]),
      rate = formatters::with_label(
        ifelse(distribution == "negbin",
          emmeans_smry_level$response * scale,
          emmeans_smry_level$rate * scale
        ),
        "Adjusted Rate"
      ),
      rate_ci = formatters::with_label(
        c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
        f_conf_level(conf_level)
      ),
      rate_ratio = formatters::with_label(
        smry_contrasts_level$ratio,
        "Adjusted Rate Ratio"
      ),
      rate_ratio_ci = formatters::with_label(
        c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL),
        f_conf_level(conf_level)
      ),
      pval = formatters::with_label(
        smry_contrasts_level$p.value,
        "p-value"
      )
    )
  }
}

#' @describeIn summarize_glm_count Formatted analysis function which is used as `afun` in `summarize_glm_count()`.
#'
#' @return
#' * `a_glm_count()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_glm_count <- function(df,
                        ...,
                        .stats = NULL,
                        .stat_names = NULL,
                        .formats = NULL,
                        .labels = NULL,
                        .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_glm_count,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("summarize_glm_count",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(.stats, .labels)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  x_stats <- x_stats[.stats]

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

# h_glm_count ------------------------------------------------------------------

#' Helper functions for Poisson models
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Helper functions that returns the results of [stats::glm()] when Poisson or Quasi-Poisson
#' distributions are needed (see `family` parameter), or [MASS::glm.nb()] for Negative Binomial
#' distributions. Link function for the GLM is `log`.
#'
#' @inheritParams argument_convention
#'
#' @seealso [summarize_glm_count]
#'
#' @name h_glm_count
NULL

#' @describeIn h_glm_count Helper function to return the results of the
#'   selected model (Poisson, Quasi-Poisson, negative binomial).
#'
#' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called
#'   in `.var` and `variables`.
#' @param variables (named `list` of `string`)\cr list of additional analysis variables, with
#'   expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
#'     groups will be summarized. Specifically, the first level of `arm` variable is taken as the
#'     reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as
#'     `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'   * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
#' @param distribution (`character`)\cr a character value specifying the distribution
#'   used in the regression (Poisson, Quasi-Poisson, negative binomial).
#' @param weights (`character`)\cr a character vector specifying weights used
#'   in averaging predictions. Number of weights must equal the number of levels included in the covariates.
#'   Weights option passed to [emmeans::emmeans()].
#'
#' @return
#' * `h_glm_count()` returns the results of the selected model.
#'
#' @keywords internal
h_glm_count <- function(.var,
                        .df_row,
                        variables,
                        distribution,
                        weights) {
  checkmate::assert_subset(distribution, c("poisson", "quasipoisson", "negbin"), empty.ok = FALSE)
  switch(distribution,
    poisson = h_glm_poisson(.var, .df_row, variables, weights),
    quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights),
    negbin = h_glm_negbin(.var, .df_row, variables, weights)
  )
}

#' @describeIn h_glm_count Helper function to return results of a Poisson model.
#'
#' @return
#' * `h_glm_poisson()` returns the results of a Poisson model.
#'
#' @keywords internal
h_glm_poisson <- function(.var,
                          .df_row,
                          variables,
                          weights) {
  arm <- variables$arm
  covariates <- variables$covariates

  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  if (is.null(variables$offset)) {
    glm_fit <- stats::glm(
      formula = formula,
      data = .df_row,
      family = stats::poisson(link = "log")
    )
  } else {
    offset <- .df_row[[variables$offset]]
    glm_fit <- stats::glm(
      formula = formula,
      offset = offset,
      data = .df_row,
      family = stats::poisson(link = "log")
    )
  }

  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

#' @describeIn h_glm_count Helper function to return results of a Quasi-Poisson model.
#'
#' @return
#' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model.
#'
#' @keywords internal
h_glm_quasipoisson <- function(.var,
                               .df_row,
                               variables,
                               weights) {
  arm <- variables$arm
  covariates <- variables$covariates

  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  if (is.null(variables$offset)) {
    glm_fit <- stats::glm(
      formula = formula,
      data = .df_row,
      family = stats::quasipoisson(link = "log")
    )
  } else {
    offset <- .df_row[[variables$offset]]
    glm_fit <- stats::glm(
      formula = formula,
      offset = offset,
      data = .df_row,
      family = stats::quasipoisson(link = "log")
    )
  }
  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

#' @describeIn h_glm_count Helper function to return results of a negative binomial model.
#'
#' @return
#' * `h_glm_negbin()` returns the results of a negative binomial model.
#'
#' @keywords internal
h_glm_negbin <- function(.var,
                         .df_row,
                         variables,
                         weights) {
  arm <- variables$arm
  covariates <- variables$covariates
  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  if (is.null(variables$offset)) {
    formula <- stats::as.formula(paste0(
      .var, " ~ ",
      " + ",
      paste(covariates, collapse = " + "),
      " + ",
      arm
    ))
  } else {
    offset <- variables$offset
    formula_txt <- sprintf(
      "%s ~ %s + %s + offset(%s)",
      .var,
      arm, paste0(covariates, collapse = " + "), offset
    )
    formula <- stats::as.formula(
      formula_txt
    )
  }

  glm_fit <- MASS::glm.nb(
    formula = formula,
    data = .df_row,
    link = "log"
  )

  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

# h_ppmeans --------------------------------------------------------------------
#' Function to return the estimated means using predicted probabilities
#'
#' @description
#' For each arm level, the predicted mean rate is calculated using the fitted model object, with `newdata`
#' set to the result of `stats::model.frame`, a reconstructed data or the original data, depending on the
#' object formula (coming from the fit). The confidence interval is derived using the `conf_level` parameter.
#'
#' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm.
#' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called in `.var` and `variables`.
#' @param arm (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
#'   summarized. Specifically, the first level of `arm` variable is taken as the reference group.
#' @param conf_level (`proportion`)\cr value used to derive the confidence interval for the rate.
#'
#' @return
#' * `h_ppmeans()` returns the estimated means.
#'
#' @seealso [summarize_glm_count()].
#'
#' @export
h_ppmeans <- function(obj, .df_row, arm, conf_level) {
  alpha <- 1 - conf_level
  p <- 1 - alpha / 2

  arm_levels <- levels(.df_row[[arm]])

  out <- lapply(arm_levels, function(lev) {
    temp <- .df_row
    temp[[arm]] <- factor(lev, levels = arm_levels)

    mf <- stats::model.frame(obj$formula, data = temp)
    X <- stats::model.matrix(obj$formula, data = mf) # nolint

    rate <- stats::predict(obj, newdata = mf, type = "response")
    rate_hat <- mean(rate)

    zz <- colMeans(rate * X)
    se <- sqrt(as.numeric(t(zz) %*% stats::vcov(obj) %*% zz))
    rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat)
    rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat)

    c(rate_hat, rate_lwr, rate_upr)
  })

  names(out) <- arm_levels
  out <- do.call(rbind, out)
  if ("negbin" %in% class(obj)) {
    colnames(out) <- c("response", "asymp.LCL", "asymp.UCL")
  } else {
    colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL")
  }
  out <- as.data.frame(out)
  out[[arm]] <- rownames(out)
  out
}

#' Count number of patients and sum exposure across all patients in columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [analyze_patients_exposure_in_cols()] creates a layout element to count total numbers of
#' patients and sum an analysis value (i.e. exposure) across all patients in columns.
#'
#' The primary analysis variable `ex_var` is the exposure variable used to calculate the `sum_exposure` statistic. The
#' `id` variable is used to uniquely identify patients in the data such that only unique patients are counted in the
#' `n_patients` statistic, and the `var` variable is used to create a row split if needed. The percentage returned as
#' part of the `n_patients` statistic is the proportion of all records that correspond to a unique patient.
#'
#' The summarize function [summarize_patients_exposure_in_cols()] performs the same function as
#' [analyze_patients_exposure_in_cols()] except it creates content rows, not data rows, to summarize the current table
#' row/column context and operates on the level of the latest row split or the root of the table if no row splits have
#' occurred.
#'
#' If a column split has not yet been performed in the table, `col_split` must be set to `TRUE` for the first call of
#' [analyze_patients_exposure_in_cols()] or [summarize_patients_exposure_in_cols()].
#'
#' @inheritParams argument_convention
#' @param ex_var (`string`)\cr name of the variable in `df` containing exposure values.
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty, this will be used as label.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("analyze_patients_exposure_in_cols"), type = "sh")``
#'
#' @name summarize_patients_exposure_in_cols
#' @order 1
NULL

#' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers
#'   of patients and the sum of exposure across all patients.
#'
#' @return
#' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics:
#'   * `n_patients`: Number of unique patients in `df`.
#'   * `sum_exposure`: Sum of `ex_var` across all patients in `df`.
#'
#' @keywords internal
s_count_patients_sum_exposure <- function(df,
                                          labelstr = "",
                                          .stats = c("n_patients", "sum_exposure"),
                                          .N_col, # nolint
                                          ...,
                                          ex_var = "AVAL",
                                          id = "USUBJID",
                                          custom_label = NULL,
                                          var_level = NULL) {
  assert_df_with_variables(df, list(ex_var = ex_var, id = id))
  checkmate::assert_string(id)
  checkmate::assert_string(labelstr)
  checkmate::assert_string(custom_label, null.ok = TRUE)
  checkmate::assert_numeric(df[[ex_var]])
  checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure")))

  row_label <- if (labelstr != "") {
    labelstr
  } else if (!is.null(var_level)) {
    var_level
  } else if (!is.null(custom_label)) {
    custom_label
  } else {
    "Total patients numbers/person time"
  }

  y <- list()

  if ("n_patients" %in% .stats) {
    y$n_patients <-
      formatters::with_label(
        s_num_patients_content(
          df = df,
          .N_col = .N_col, # nolint
          .var = id,
          labelstr = ""
        )$unique,
        row_label
      )
  }
  if ("sum_exposure" %in% .stats) {
    y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label)
  }
  y
}

#' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in
#'   [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in
#'   [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`.
#'
#' @return
#' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()].
#'
#' @export
a_count_patients_sum_exposure <- function(df,
                                          labelstr = "",
                                          ...,
                                          .stats = NULL,
                                          .stat_names = NULL,
                                          .formats = NULL,
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  checkmate::assert_character(.stats, len = 1)

  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  add_total_level <- dots_extra_args$add_total_level
  checkmate::assert_flag(add_total_level)

  var <- dots_extra_args$var
  if (!is.null(var)) {
    assert_df_with_variables(df, list(var = var))
    df[[var]] <- as.factor(df[[var]])
  }

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  x_stats <- list()
  if (!is.null(var)) {
    for (lvl in levels(df[[var]])) {
      x_stats_i <- .apply_stat_functions(
        default_stat_fnc = s_count_patients_sum_exposure,
        custom_stat_fnc_list = custom_stat_functions,
        args_list = c(
          df = list(subset(df, get(var) == lvl)),
          labelstr = list(labelstr),
          var_level = lvl,
          extra_afun_params,
          dots_extra_args
        )
      )
      x_stats[[.stats]][[lvl]] <- x_stats_i[[.stats]]
    }
  }

  if (add_total_level || is.null(var)) {
    x_stats_total <- .apply_stat_functions(
      default_stat_fnc = s_count_patients_sum_exposure,
      custom_stat_fnc_list = custom_stat_functions,
      args_list = c(
        df = list(df),
        labelstr = list(labelstr),
        extra_afun_params,
        dots_extra_args
      )
    )
    x_stats[[.stats]][["Total"]] <- x_stats_total[[.stats]]
  }

  # Fill in formatting defaults
  .stats <- get_stats(
    "analyze_patients_exposure_in_cols",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )
  x_stats <- x_stats[.stats]
  levels_per_stats <- lapply(x_stats, names)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(
    .stats, .labels, levels_per_stats,
    tern_defaults = c(lapply(x_stats[[1]], attr, "label"), tern_default_labels)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- x_stats[.stats] %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
#'   function arguments and additional format arguments. This function is a wrapper for
#'   [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further
#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
#'   add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
#'   columns, to the table layout.
#'
#' @examples
#' lyt5 <- basic_table() %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE)
#'
#' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl)
#' result5
#'
#' lyt6 <- basic_table() %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure")
#'
#' result6 <- build_table(lyt6, df = df, alt_counts_df = adsl)
#' result6
#'
#' @export
#' @order 3
summarize_patients_exposure_in_cols <- function(lyt,
                                                var,
                                                ex_var = "AVAL",
                                                id = "USUBJID",
                                                add_total_level = FALSE,
                                                custom_label = NULL,
                                                col_split = TRUE,
                                                na_str = default_na_str(),
                                                ...,
                                                .stats = c("n_patients", "sum_exposure"),
                                                .stat_names = NULL,
                                                .formats = NULL,
                                                .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                                .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list()
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  col_labels <- unlist(.labels[.stats])
  .labels <- .labels[!names(.labels) %in% c("n_patients", "sum_exposure")]
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label,
    ...
  )

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_count_patients_sum_exposure) <- c(
    formals(a_count_patients_sum_exposure), extra_args[[".additional_fun_parameters"]]
  )

  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(var, length(.stats)),
      varlabels = col_labels,
      extra_args = list(.stats = .stats)
    )
  }
  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = a_count_patients_sum_exposure,
    na_str = na_str,
    extra_args = extra_args
  )
}

#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
#'   function arguments and additional format arguments. This function is a wrapper for
#'   [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()].
#'
#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required
#'   column split has been done already earlier in the layout pipe.
#'
#' @return
#' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further
#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
#'   add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
#'   columns, to the table layout.
#'
#' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows,
#'   `analyze_patients_exposure_in_cols()` generates data rows which will _not_ be repeated on multiple
#'   pages when pagination is used.
#'
#' @examples
#' set.seed(1)
#' df <- data.frame(
#'   USUBJID = c(paste("id", seq(1, 12), sep = "")),
#'   ARMCD = c(rep("ARM A", 6), rep("ARM B", 6)),
#'   SEX = c(rep("Female", 6), rep("Male", 6)),
#'   AVAL = as.numeric(sample(seq(1, 20), 12)),
#'   stringsAsFactors = TRUE
#' )
#' adsl <- data.frame(
#'   USUBJID = c(paste("id", seq(1, 12), sep = "")),
#'   ARMCD = c(rep("ARM A", 2), rep("ARM B", 2)),
#'   SEX = c(rep("Female", 2), rep("Male", 2)),
#'   stringsAsFactors = TRUE
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE)
#' result <- build_table(lyt, df = df, alt_counts_df = adsl)
#' result
#'
#' lyt2 <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
#'   summarize_patients_exposure_in_cols(
#'     var = "AVAL", col_split = TRUE,
#'     .stats = "n_patients", custom_label = "some custom label"
#'   ) %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL")
#' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl)
#' result2
#'
#' lyt3 <- basic_table() %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL")
#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl)
#' result3
#'
#' # Adding total levels and custom label
#' lyt4 <- basic_table(
#'   show_colcounts = TRUE
#' ) %>%
#'   analyze_patients_exposure_in_cols(
#'     var = "ARMCD",
#'     col_split = TRUE,
#'     add_total_level = TRUE,
#'     custom_label = "TOTAL"
#'   ) %>%
#'   append_topleft(c("", "Sex"))
#'
#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl)
#' result4
#'
#' @export
#' @order 2
analyze_patients_exposure_in_cols <- function(lyt,
                                              var = NULL,
                                              ex_var = "AVAL",
                                              id = "USUBJID",
                                              add_total_level = FALSE,
                                              custom_label = NULL,
                                              col_split = TRUE,
                                              na_str = default_na_str(),
                                              .stats = c("n_patients", "sum_exposure"),
                                              .stat_names = NULL,
                                              .formats = NULL,
                                              .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                              .indent_mods = NULL,
                                              ...) {
  # Process standard extra arguments
  extra_args <- list()
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  col_labels <- unlist(.labels[.stats])
  .labels <- .labels[!names(.labels) %in% c("n_patients", "sum_exposure")]
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    var = var, ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label,
    ...
  )

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_count_patients_sum_exposure) <- c(
    formals(a_count_patients_sum_exposure), extra_args[[".additional_fun_parameters"]]
  )

  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(ex_var, length(.stats)),
      varlabels = col_labels,
      extra_args = list(.stats = .stats)
    )
  }

  analyze_colvars(
    lyt = lyt,
    afun = a_count_patients_sum_exposure,
    na_str = na_str,
    extra_args = extra_args
  )
}

#' Horizontal waterfall plot
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup.
#'
#' @param height (`numeric`)\cr vector containing values to be plotted as the waterfall bars.
#' @param id (`character`)\cr vector containing identifiers to use as the x-axis label for the waterfall bars.
#' @param col (`character`)\cr color(s).
#' @param col_var (`factor`, `character`, or `NULL`)\cr categorical variable for bar coloring. `NULL` by default.
#' @param xlab (`string`)\cr x label. Default is `"ID"`.
#' @param ylab (`string`)\cr y label. Default is `"Value"`.
#' @param title (`string`)\cr text to be displayed as plot title.
#' @param col_legend_title (`string`)\cr text to be displayed as legend title.
#'
#' @return A `ggplot` waterfall plot.
#'
#' @examples
#' library(dplyr)
#'
#' g_waterfall(height = c(3, 5, -1), id = letters[1:3])
#'
#' g_waterfall(
#'   height = c(3, 5, -1),
#'   id = letters[1:3],
#'   col_var = letters[1:3]
#' )
#'
#' adsl_f <- tern_ex_adsl %>%
#'   select(USUBJID, STUDYID, ARM, ARMCD, SEX)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "OVRINV") %>%
#'   mutate(pchg = rnorm(n(), 10, 50))
#'
#' adrs_f <- head(adrs_f, 30)
#' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ]
#' head(adrs_f)
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = adrs_f$USUBJID,
#'   col_var = adrs_f$AVALC
#' )
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
#'   col_var = adrs_f$SEX
#' )
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
#'   xlab = "ID",
#'   ylab = "Percentage Change",
#'   title = "Waterfall plot"
#' )
#'
#' @export
g_waterfall <- function(height,
                        id,
                        col_var = NULL,
                        col = getOption("ggplot2.discrete.colour"),
                        xlab = NULL,
                        ylab = NULL,
                        col_legend_title = NULL,
                        title = NULL) {
  if (!is.null(col_var)) {
    check_same_n(height = height, id = id, col_var = col_var)
  } else {
    check_same_n(height = height, id = id)
  }

  checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  xlabel <- deparse(substitute(id))
  ylabel <- deparse(substitute(height))

  col_label <- if (!missing(col_var)) {
    deparse(substitute(col_var))
  }

  xlab <- if (is.null(xlab)) xlabel else xlab
  ylab <- if (is.null(ylab)) ylabel else ylab
  col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title

  plot_data <- data.frame(
    height = height,
    id = as.character(id),
    col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)),
    stringsAsFactors = FALSE
  )

  plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ]

  p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) +
    ggplot2::geom_col() +
    ggplot2::geom_text(
      label = format(plot_data_ord$height, digits = 2),
      vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5)
    ) +
    ggplot2::xlab(xlab) +
    ggplot2::ylab(ylab) +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5))

  if (!is.null(col_var)) {
    p <- p +
      ggplot2::aes(fill = col_var) +
      ggplot2::labs(fill = col_legend_title) +
      ggplot2::theme(
        legend.position = "bottom",
        legend.background = ggplot2::element_blank(),
        legend.title = ggplot2::element_text(face = "bold"),
        legend.box.background = ggplot2::element_rect(colour = "black")
      )
  }

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_fill_manual(values = col)
  }

  if (!is.null(title)) {
    p <- p +
      ggplot2::labs(title = title) +
      ggplot2::theme(plot.title = ggplot2::element_text(face = "bold"))
  }

  p
}

#' Individual patient plots
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Line plot(s) displaying trend in patients' parameter values over time is rendered.
#' Patients' individual baseline values can be added to the plot(s) as reference.
#'
#' @inheritParams argument_convention
#' @param xvar (`string`)\cr time point variable to be plotted on x-axis.
#' @param yvar (`string`)\cr continuous analysis variable to be plotted on y-axis.
#' @param xlab (`string`)\cr plot label for x-axis.
#' @param ylab (`string`)\cr plot label for y-axis.
#' @param id_var (`string`)\cr variable used as patient identifier.
#' @param title (`string`)\cr title for plot.
#' @param subtitle (`string`)\cr subtitle for plot.
#' @param add_baseline_hline (`flag`)\cr adds horizontal line at baseline y-value on
#'   plot when `TRUE`.
#' @param yvar_baseline (`string`)\cr variable with baseline values only.
#'   Ignored when `add_baseline_hline` is `FALSE`.
#' @param ggtheme (`theme`)\cr optional graphical theme function as provided
#'   by `ggplot2` to control outlook of plot. Use `ggplot2::theme()` to tweak the display.
#' @param plotting_choices (`string`)\cr specifies options for displaying
#'   plots. Must be one of `"all_in_one"`, `"split_by_max_obs"`, or `"separate_by_obs"`.
#' @param max_obs_per_plot (`integer(1)`)\cr number of observations to be plotted on one
#'   plot. Ignored if `plotting_choices` is not `"separate_by_obs"`.
#' @param caption (`string`)\cr optional caption below the plot.
#' @param col (`character`)\cr line colors.
#'
#' @seealso Relevant helper function [h_g_ipp()].
#'
#' @name g_ipp
#' @aliases individual_patient_plot
NULL

#' Helper function to create simple line plot over time
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function that generates a simple line plot displaying parameter trends over time.
#'
#' @inheritParams argument_convention
#' @inheritParams g_ipp
#'
#' @return A `ggplot` line plot.
#'
#' @seealso [g_ipp()] which uses this function.
#'
#' @examples
#' library(dplyr)
#'
#' # Select a small sample of data to plot.
#' adlb <- tern_ex_adlb %>%
#'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
#'   slice(1:36)
#'
#' p <- h_g_ipp(
#'   df = adlb,
#'   xvar = "AVISIT",
#'   yvar = "AVAL",
#'   xlab = "Visit",
#'   id_var = "USUBJID",
#'   ylab = "SGOT/ALT (U/L)",
#'   add_baseline_hline = TRUE
#' )
#' p
#'
#' @export
h_g_ipp <- function(df,
                    xvar,
                    yvar,
                    xlab,
                    ylab,
                    id_var,
                    title = "Individual Patient Plots",
                    subtitle = "",
                    caption = NULL,
                    add_baseline_hline = FALSE,
                    yvar_baseline = "BASE",
                    ggtheme = nestcolor::theme_nest(),
                    col = NULL) {
  checkmate::assert_string(xvar)
  checkmate::assert_string(yvar)
  checkmate::assert_string(yvar_baseline)
  checkmate::assert_string(id_var)
  checkmate::assert_string(xlab)
  checkmate::assert_string(ylab)
  checkmate::assert_string(title)
  checkmate::assert_string(subtitle)
  checkmate::assert_subset(c(xvar, yvar, yvar_baseline, id_var), colnames(df))
  checkmate::assert_data_frame(df)
  checkmate::assert_flag(add_baseline_hline)
  checkmate::assert_character(col, null.ok = TRUE)

  p <- ggplot2::ggplot(
    data = df,
    mapping = ggplot2::aes(
      x = .data[[xvar]],
      y = .data[[yvar]],
      group = .data[[id_var]],
      colour = .data[[id_var]]
    )
  ) +
    ggplot2::geom_line(linewidth = 0.4) +
    ggplot2::geom_point(size = 2) +
    ggplot2::labs(
      x = xlab,
      y = ylab,
      title = title,
      subtitle = subtitle,
      caption = caption
    ) +
    ggtheme

  if (add_baseline_hline) {
    baseline_df <- df[, c(id_var, yvar_baseline)]
    baseline_df <- unique(baseline_df)

    p <- p +
      ggplot2::geom_hline(
        data = baseline_df,
        mapping = ggplot2::aes(
          yintercept = .data[[yvar_baseline]],
          colour = .data[[id_var]]
        ),
        linetype = "dotdash",
        linewidth = 0.4
      ) +
      ggplot2::geom_text(
        data = baseline_df,
        mapping = ggplot2::aes(
          x = 1,
          y = .data[[yvar_baseline]],
          label = .data[[id_var]],
          colour = .data[[id_var]]
        ),
        nudge_y = 0.025 * (max(df[, yvar], na.rm = TRUE) - min(df[, yvar], na.rm = TRUE)),
        vjust = "right",
        size = 2
      )

    if (!is.null(col)) {
      p <- p +
        ggplot2::scale_color_manual(values = col)
    }
  }
  p
}

#' @describeIn g_ipp Plotting function for individual patient plots which, depending on user
#'   preference, renders a single graphic or compiles a list of graphics that show trends in individual's parameter
#'   values over time.
#'
#' @return A `ggplot` object or a list of `ggplot` objects.
#'
#' @examples
#' library(dplyr)
#'
#' # Select a small sample of data to plot.
#' adlb <- tern_ex_adlb %>%
#'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
#'   slice(1:36)
#'
#' plot_list <- g_ipp(
#'   df = adlb,
#'   xvar = "AVISIT",
#'   yvar = "AVAL",
#'   xlab = "Visit",
#'   ylab = "SGOT/ALT (U/L)",
#'   title = "Individual Patient Plots",
#'   add_baseline_hline = TRUE,
#'   plotting_choices = "split_by_max_obs",
#'   max_obs_per_plot = 5
#' )
#' plot_list
#'
#' @export
g_ipp <- function(df,
                  xvar,
                  yvar,
                  xlab,
                  ylab,
                  id_var = "USUBJID",
                  title = "Individual Patient Plots",
                  subtitle = "",
                  caption = NULL,
                  add_baseline_hline = FALSE,
                  yvar_baseline = "BASE",
                  ggtheme = nestcolor::theme_nest(),
                  plotting_choices = c("all_in_one", "split_by_max_obs", "separate_by_obs"),
                  max_obs_per_plot = 4,
                  col = NULL) {
  checkmate::assert_count(max_obs_per_plot)
  checkmate::assert_subset(plotting_choices, c("all_in_one", "split_by_max_obs", "separate_by_obs"))
  checkmate::assert_character(col, null.ok = TRUE)

  plotting_choices <- match.arg(plotting_choices)

  if (plotting_choices == "all_in_one") {
    p <- h_g_ipp(
      df = df,
      xvar = xvar,
      yvar = yvar,
      xlab = xlab,
      ylab = ylab,
      id_var = id_var,
      title = title,
      subtitle = subtitle,
      caption = caption,
      add_baseline_hline = add_baseline_hline,
      yvar_baseline = yvar_baseline,
      ggtheme = ggtheme,
      col = col
    )

    return(p)
  } else if (plotting_choices == "split_by_max_obs") {
    id_vec <- unique(df[[id_var]])
    id_list <- split(
      id_vec,
      rep(1:ceiling(length(id_vec) / max_obs_per_plot),
        each = max_obs_per_plot,
        length.out = length(id_vec)
      )
    )

    df_list <- list()
    plot_list <- list()

    for (i in seq_along(id_list)) {
      df_list[[i]] <- df[df[[id_var]] %in% id_list[[i]], ]

      plots <- h_g_ipp(
        df = df_list[[i]],
        xvar = xvar,
        yvar = yvar,
        xlab = xlab,
        ylab = ylab,
        id_var = id_var,
        title = title,
        subtitle = subtitle,
        caption = caption,
        add_baseline_hline = add_baseline_hline,
        yvar_baseline = yvar_baseline,
        ggtheme = ggtheme,
        col = col
      )

      plot_list[[i]] <- plots
    }
    return(plot_list)
  } else {
    ind_df <- split(df, df[[id_var]])
    plot_list <- lapply(
      ind_df,
      function(x) {
        h_g_ipp(
          df = x,
          xvar = xvar,
          yvar = yvar,
          xlab = xlab,
          ylab = ylab,
          id_var = id_var,
          title = title,
          subtitle = subtitle,
          caption = caption,
          add_baseline_hline = add_baseline_hline,
          yvar_baseline = yvar_baseline,
          ggtheme = ggtheme,
          col = col
        )
      }
    )

    return(plot_list)
  }
}

#' Helper functions for tabulating survival duration by subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that tabulate in a data frame statistics such as median survival
#' time and hazard ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_coxph_pairwise
#' @inheritParams survival_duration_subgroups
#' @param arm (`factor`)\cr the treatment group variable.
#'
#' @details Main functionality is to prepare data for use in a layout-creating function.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' @name h_survival_duration_subgroups
NULL

#' @describeIn h_survival_duration_subgroups Helper to prepare a data frame of median survival times by arm.
#'
#' @return
#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`.
#'
#' @examples
#' # Extract median survival time for one group.
#' h_survtime_df(
#'   tte = adtte_f$AVAL,
#'   is_event = adtte_f$is_event,
#'   arm = adtte_f$ARM
#' )
#'
#' @export
h_survtime_df <- function(tte, is_event, arm) {
  checkmate::assert_numeric(tte)
  checkmate::assert_logical(is_event, len = length(tte))
  assert_valid_factor(arm, len = length(tte))

  df_tte <- data.frame(
    tte = tte,
    is_event = is_event,
    stringsAsFactors = FALSE
  )

  # Delete NAs
  non_missing_rows <- stats::complete.cases(df_tte)
  df_tte <- df_tte[non_missing_rows, ]
  arm <- arm[non_missing_rows]

  lst_tte <- split(df_tte, arm)
  lst_results <- Map(function(x, arm) {
    if (nrow(x) > 0) {
      s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event")
      median_est <- unname(as.numeric(s_surv$median))
      n_events <- sum(x$is_event)
    } else {
      median_est <- NA
      n_events <- NA
    }

    data.frame(
      arm = arm,
      n = nrow(x),
      n_events = n_events,
      median = median_est,
      stringsAsFactors = FALSE
    )
  }, lst_tte, names(lst_tte))

  df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
  df$arm <- factor(df$arm, levels = levels(arm))
  df
}

#' @describeIn h_survival_duration_subgroups Summarizes median survival times by arm and across subgroups
#'    in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
#'    requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
#'    groupings for `subgroups` variables.
#'
#' @return
#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`,
#'   `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Extract median survival time for multiple groups.
#' h_survtime_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Define groupings for BMRKR2 levels.
#' h_survtime_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_survtime_subgroups_df <- function(variables,
                                    data,
                                    groups_lists = list(),
                                    label_all = "All Patients") {
  checkmate::assert_character(variables$tte)
  checkmate::assert_character(variables$is_event)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)

  assert_df_with_variables(data, variables)

  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]])
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
    l_result <- lapply(l_data, function(grp) {
      result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]])
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn h_survival_duration_subgroups Helper to prepare a data frame with estimates of
#'   treatment hazard ratio.
#'
#' @param strata_data (`factor`, `data.frame`, or `NULL`)\cr required if stratified analysis is performed.
#'
#' @return
#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`,
#'   `conf_level`, `pval` and `pval_label`.
#'
#' @examples
#' # Extract hazard ratio for one group.
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM)
#'
#' # Extract hazard ratio for one group with stratification factor.
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1)
#'
#' @export
h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) {
  checkmate::assert_numeric(tte)
  checkmate::assert_logical(is_event, len = length(tte))
  assert_valid_factor(arm, n.levels = 2, len = length(tte))

  df_tte <- data.frame(tte = tte, is_event = is_event)
  strata_vars <- NULL

  if (!is.null(strata_data)) {
    if (is.data.frame(strata_data)) {
      strata_vars <- names(strata_data)
      checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte))
      assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars)))
    } else {
      assert_valid_factor(strata_data, len = nrow(df_tte))
      strata_vars <- "strata_data"
    }
    df_tte[strata_vars] <- strata_data
  }

  l_df <- split(df_tte, arm)

  if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
    # Hazard ratio and CI.
    result <- s_coxph_pairwise(
      df = l_df[[2]],
      .ref_group = l_df[[1]],
      .in_ref_col = FALSE,
      .var = "tte",
      is_event = "is_event",
      strata = strata_vars,
      control = control
    )

    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = unname(as.numeric(result$n_tot)),
      n_tot_events = unname(as.numeric(result$n_tot_events)),
      hr = unname(as.numeric(result$hr)),
      lcl = unname(result$hr_ci[1]),
      ucl = unname(result$hr_ci[2]),
      conf_level = control[["conf_level"]],
      pval = as.numeric(result$pvalue),
      pval_label = obj_label(result$pvalue),
      stringsAsFactors = FALSE
    )
  } else if (
    (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) ||
      (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
  ) {
    df_tte_complete <- df_tte[stats::complete.cases(df_tte), ]
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = nrow(df_tte_complete),
      n_tot_events = sum(df_tte_complete$is_event),
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = control[["conf_level"]],
      pval = NA,
      pval_label = NA,
      stringsAsFactors = FALSE
    )
  } else {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = 0L,
      n_tot_events = 0L,
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = control[["conf_level"]],
      pval = NA,
      pval_label = NA,
      stringsAsFactors = FALSE
    )
  }

  df
}

#' @describeIn h_survival_duration_subgroups Summarizes estimates of the treatment hazard ratio
#'   across subgroups in a data frame. `variables` corresponds to the names of variables found in
#'   `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and
#'   optionally `subgroups` and `strata`. `groups_lists` optionally specifies
#'   groupings for `subgroups` variables.
#'
#' @return
#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`,
#'   `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Extract hazard ratio for multiple groups.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Define groupings of BMRKR2 levels.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' # Extract hazard ratio for multiple groups with stratification factors.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2"),
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#'
#' @export
h_coxph_subgroups_df <- function(variables,
                                 data,
                                 groups_lists = list(),
                                 control = control_coxph(),
                                 label_all = "All Patients") {
  if ("strat" %in% names(variables)) {
    warning(
      "Warning: the `strat` element name of the `variables` list argument to `h_coxph_subgroups_df() ",
      "was deprecated in tern 0.9.4.\n  ",
      "Please use the name `strata` instead of `strat` in the `variables` argument."
    )
    variables[["strata"]] <- variables[["strat"]]
  }

  checkmate::assert_character(variables$tte)
  checkmate::assert_character(variables$is_event)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_character(variables$strata, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_coxph_df(
    tte = data[[variables$tte]],
    is_event = data[[variables$is_event]],
    arm = data[[variables$arm]],
    strata_data = if (is.null(variables$strata)) NULL else data[variables$strata],
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      result <- h_coxph_df(
        tte = grp$df[[variables$tte]],
        is_event = grp$df[[variables$is_event]],
        arm = grp$df[[variables$arm]],
        strata_data = if (is.null(variables$strata)) NULL else grp$df[variables$strata],
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })

    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Split data frame by subgroups
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Split a data frame into a non-nested list of subsets.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_duration_subgroups
#' @param data (`data.frame`)\cr dataset to split.
#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets.
#'   Unused levels not present in `data` are dropped. Note that the order in this vector
#'   determines the order in the downstream table.
#'
#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`).
#'
#' @details Main functionality is to prepare data for use in forest plot layouts.
#'
#' @examples
#' df <- data.frame(
#'   x = c(1:5),
#'   y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
#'   z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
#' )
#' formatters::var_labels(df) <- paste("label for", names(df))
#'
#' h_split_by_subgroups(
#'   data = df,
#'   subgroups = c("y", "z")
#' )
#'
#' h_split_by_subgroups(
#'   data = df,
#'   subgroups = c("y", "z"),
#'   groups_lists = list(
#'     y = list("AB" = c("A", "B"), "C" = "C")
#'   )
#' )
#'
#' @export
h_split_by_subgroups <- function(data,
                                 subgroups,
                                 groups_lists = list()) {
  checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(groups_lists, names = "named")
  checkmate::assert_subset(names(groups_lists), subgroups)
  assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups)))

  data_labels <- unname(formatters::var_labels(data))
  df_subgroups <- data[, subgroups, drop = FALSE]
  subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE)

  l_labels <- Map(function(grp_i, name_i) {
    existing_levels <- levels(droplevels(grp_i))
    grp_levels <- if (name_i %in% names(groups_lists)) {
      # For this variable groupings are defined. We check which groups are contained in the data.
      group_list_i <- groups_lists[[name_i]]
      group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE)
      names(which(group_has_levels))
    } else {
      existing_levels
    }
    df_labels <- data.frame(
      subgroup = grp_levels,
      var = name_i,
      var_label = unname(subgroup_labels[name_i]),
      stringsAsFactors = FALSE # Rationale is that subgroups may not be unique.
    )
  }, df_subgroups, names(df_subgroups))

  # Create a data frame with one row per subgroup.
  df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE))
  row_label <- paste0(df_labels$var, ".", df_labels$subgroup)
  row_split_var <- factor(row_label, levels = row_label)

  # Create a list of data subsets.
  lapply(split(df_labels, row_split_var), function(row_i) {
    which_row <- if (row_i$var %in% names(groups_lists)) {
      data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]]
    } else {
      data[[row_i$var]] == row_i$subgroup
    }
    df <- data[which_row, ]
    rownames(df) <- NULL
    formatters::var_labels(df) <- data_labels

    list(
      df = df,
      df_labels = data.frame(row_i, row.names = NULL)
    )
  })
}

#' Helper functions for Cox proportional hazards regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()].
#'
#' @inheritParams argument_convention
#' @inheritParams h_coxreg_univar_extract
#' @inheritParams cox_regression_inter
#' @inheritParams control_coxreg
#'
#' @seealso [cox_regression]
#'
#' @name h_cox_regression
NULL

#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used
#'   internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models.
#'
#' @return
#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]).
#'
#' @examples
#' # `h_coxreg_univar_formulas`
#'
#' ## Simple formulas.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
#'   )
#' )
#'
#' ## Addition of an optional strata.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
#'     strata = "SITE"
#'   )
#' )
#'
#' ## Inclusion of the interaction term.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
#'     strata = "SITE"
#'   ),
#'   interaction = TRUE
#' )
#'
#' ## Only covariates fitted in separate models.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", covariates = c("X", "y")
#'   )
#' )
#'
#' @export
h_coxreg_univar_formulas <- function(variables,
                                     interaction = FALSE) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  checkmate::assert_flag(interaction)

  if (!has_arm || is.null(variables$covariates)) {
    checkmate::assert_false(interaction)
  }

  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$covariates)) {
    forms <- paste0(
      "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
      ifelse(has_arm, variables$arm, "1"),
      ifelse(interaction, " * ", " + "),
      variables$covariates,
      ifelse(
        !is.null(variables$strata),
        paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"),
        ""
      )
    )
  } else {
    forms <- NULL
  }
  nams <- variables$covariates
  if (has_arm) {
    ref <- paste0(
      "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
      variables$arm,
      ifelse(
        !is.null(variables$strata),
        paste0(
          " + strata(", paste0(variables$strata, collapse = ", "), ")"
        ),
        ""
      )
    )
    forms <- c(ref, forms)
    nams <- c("ref", nams)
  }
  stats::setNames(forms, nams)
}

#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas
#'   string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox
#'   regression models. Interactions will not be included in multivariate Cox regression model.
#'
#' @return
#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]).
#'
#' @examples
#' # `h_coxreg_multivar_formula`
#'
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
#'   )
#' )
#'
#' # Addition of an optional strata.
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
#'     strata = "SITE"
#'   )
#' )
#'
#' # Example without treatment arm.
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
#'     strata = "SITE"
#'   )
#' )
#'
#' @export
h_coxreg_multivar_formula <- function(variables) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  y <- paste0(
    "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
    ifelse(has_arm, variables$arm, "1")
  )
  if (length(variables$covariates) > 0) {
    y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ")
  }
  if (!is.null(variables$strata)) {
    y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")")
  }
  y
}

#' @describeIn h_cox_regression Utility function to help tabulate the result of
#'   a univariate Cox regression model.
#'
#' @param effect (`string`)\cr the treatment variable.
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
#'
#' @return
#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`,
#'   `n`, `hr`, `lcl`, `ucl`, and `pval`.
#'
#' @examples
#' library(survival)
#'
#' dta_simple <- data.frame(
#'   time = c(5, 5, 10, 10, 5, 5, 10, 10),
#'   status = c(0, 0, 1, 0, 0, 1, 1, 1),
#'   armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
#'   var1 = c(45, 55, 65, 75, 55, 65, 85, 75),
#'   var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
#' )
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
#' result <- h_coxreg_univar_extract(
#'   effect = "armcd", covar = "armcd", mod = mod, data = dta_simple
#' )
#' result
#'
#' @export
h_coxreg_univar_extract <- function(effect,
                                    covar,
                                    data,
                                    mod,
                                    control = control_coxreg()) {
  checkmate::assert_string(covar)
  checkmate::assert_string(effect)
  checkmate::assert_class(mod, "coxph")
  test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]

  mod_aov <- muffled_car_anova(mod, test_statistic)
  msum <- summary(mod, conf.int = control$conf_level)
  sum_cox <- broom::tidy(msum)

  # Combine results together.
  effect_aov <- mod_aov[effect, , drop = TRUE]
  pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]]
  sum_main <- sum_cox[grepl(effect, sum_cox$level), ]

  term_label <- if (effect == covar) {
    paste0(
      levels(data[[covar]])[2],
      " vs control (",
      levels(data[[covar]])[1],
      ")"
    )
  } else {
    unname(labels_or_names(data[covar]))
  }
  data.frame(
    effect = ifelse(covar == effect, "Treatment:", "Covariate:"),
    term = covar,
    term_label = term_label,
    level = levels(data[[effect]])[2],
    n = mod[["n"]],
    hr = unname(sum_main["exp(coef)"]),
    lcl = unname(sum_main[grep("lower", names(sum_main))]),
    ucl = unname(sum_main[grep("upper", names(sum_main))]),
    pval = pval,
    stringsAsFactors = FALSE
  )
}

#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help
#'   tabulate the result of a multivariate Cox regression model for a treatment/covariate variable.
#'
#' @return
#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`,
#'   `n`, `term`, and `term_label`.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
#' result <- h_coxreg_multivar_extract(
#'   var = "var1", mod = mod, data = dta_simple
#' )
#' result
#'
#' @export
h_coxreg_multivar_extract <- function(var,
                                      data,
                                      mod,
                                      control = control_coxreg()) {
  test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
  mod_aov <- muffled_car_anova(mod, test_statistic)

  msum <- summary(mod, conf.int = control$conf_level)
  sum_anova <- broom::tidy(mod_aov)
  sum_cox <- broom::tidy(msum)

  ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")]
  names(ret_anova)[2] <- "pval"
  if (is.factor(data[[var]])) {
    ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
  } else {
    ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
  }
  names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl")
  varlab <- unname(labels_or_names(data[var]))
  ret_cox$term <- varlab

  if (is.numeric(data[[var]])) {
    ret <- ret_cox
    ret$term_label <- ret$term
  } else if (length(levels(data[[var]])) <= 2) {
    ret_anova$pval <- NA
    ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
    ret_cox$level <- gsub(var, "", ret_cox$level)
    ret_cox$term_label <- ret_cox$level
    ret <- dplyr::bind_rows(ret_anova, ret_cox)
  } else {
    ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
    ret_cox$level <- gsub(var, "", ret_cox$level)
    ret_cox$term_label <- ret_cox$level
    ret <- dplyr::bind_rows(ret_anova, ret_cox)
  }

  as.data.frame(ret)
}

#' Univariate formula special term
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The special term `univariate` indicate that the model should be fitted individually for
#' every variable included in univariate.
#'
#' @param x (`character`)\cr a vector of variable names separated by commas.
#'
#' @return When used within a model formula, produces univariate models for each variable provided.
#'
#' @details
#' If provided alongside with pairwise specification, the model
#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models
#' + `y ~ ARM`
#' + `y ~ ARM + SEX`
#' + `y ~ ARM + AGE`
#' + `y ~ ARM + RACE`
#'
#' @export
univariate <- function(x) {
  structure(x, varname = deparse(substitute(x)))
}

# Get the right-hand-term of a formula
rht <- function(x) {
  checkmate::assert_formula(x)
  y <- as.character(rev(x)[[1]])
  return(y)
}

#' Hazard ratio estimation in interactions
#'
#' This function estimates the hazard ratios between arms when an interaction variable is given with
#' specific values.
#'
#' @param variable,given (`character(2)`)\cr names of the two variables in the interaction. We seek the estimation of
#'   the levels of `variable` given the levels of `given`.
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels given by [levels()].
#' @param mmat (named `numeric`) a vector filled with `0`s used as a template to obtain the design matrix.
#' @param coef (`numeric`)\cr vector of estimated coefficients.
#' @param vcov (`matrix`)\cr variance-covariance matrix of underlying model.
#' @param conf_level (`proportion`)\cr confidence level of estimate intervals.
#'
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
#'   and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex.
#'   The cox regression estimates the coefficients along with a variance-covariance matrix for:
#'
#'   - b1 (arm b), b2 (arm c)
#'   - b3 (sex m)
#'   - b4 (arm b: sex m), b5 (arm c: sex m)
#'
#'   Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation
#'   will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5),
#'   therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained
#'   as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95.
#'
#' @return A list of matrices (one per level of variable) with rows corresponding to the combinations of
#'   `variable` and `given`, with columns:
#'   * `coef_hat`: Estimation of the coefficient.
#'   * `coef_se`: Standard error of the estimation.
#'   * `hr`: Hazard ratio.
#'   * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
#'
#' @seealso [s_cox_multivariate()].
#'
#' @examples
#' library(dplyr)
#' library(survival)
#'
#' ADSL <- tern_ex_adsl %>%
#'   filter(SEX %in% c("F", "M"))
#'
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS")
#' adtte$ARMCD <- droplevels(adtte$ARMCD)
#' adtte$SEX <- droplevels(adtte$SEX)
#'
#' mod <- coxph(
#'   formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2,
#'   data = adtte
#' )
#'
#' mmat <- stats::model.matrix(mod)[1, ]
#' mmat[!mmat == 0] <- 0
#'
#' @keywords internal
estimate_coef <- function(variable, given,
                          lvl_var, lvl_given,
                          coef,
                          mmat,
                          vcov,
                          conf_level = 0.95) {
  var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
  giv_lvl <- paste0(given, lvl_given)

  design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
  design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
  design_mat <- within(
    data = design_mat,
    expr = {
      inter <- paste0(variable, ":", given)
      rev_inter <- paste0(given, ":", variable)
    }
  )

  split_by_variable <- design_mat$variable
  interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")

  design_mat <- apply(
    X = design_mat, MARGIN = 1, FUN = function(x) {
      mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
      return(mmat)
    }
  )
  colnames(design_mat) <- interaction_names

  betas <- as.matrix(coef)

  coef_hat <- t(design_mat) %*% betas
  dimnames(coef_hat)[2] <- "coef"

  coef_se <- apply(design_mat, 2, function(x) {
    vcov_el <- as.logical(x)
    y <- vcov[vcov_el, vcov_el]
    y <- sum(y)
    y <- sqrt(y)
    return(y)
  })

  q_norm <- stats::qnorm((1 + conf_level) / 2)
  y <- cbind(coef_hat, `se(coef)` = coef_se)

  y <- apply(y, 1, function(x) {
    x["hr"] <- exp(x["coef"])
    x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
    x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])

    return(x)
  })

  y <- t(y)
  y <- by(y, split_by_variable, identity)
  y <- lapply(y, as.matrix)

  attr(y, "details") <- paste0(
    "Estimations of ", variable,
    " hazard ratio given the level of ", given, " compared to ",
    variable, " level ", lvl_var[1], "."
  )
  return(y)
}

#' `tryCatch` around `car::Anova`
#'
#' Captures warnings when executing [car::Anova].
#'
#' @inheritParams car::Anova
#'
#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings.
#'
#' @examples
#' # `car::Anova` on cox regression model including strata and expected
#' # a likelihood ratio test triggers a warning as only Wald method is
#' # accepted.
#'
#' library(survival)
#'
#' mod <- coxph(
#'   formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps),
#'   data = ovarian
#' )
#'
#' @keywords internal
try_car_anova <- function(mod,
                          test.statistic) { # nolint
  y <- tryCatch(
    withCallingHandlers(
      expr = {
        warn_text <- c()
        list(
          aov = car::Anova(
            mod,
            test.statistic = test.statistic,
            type = "III"
          ),
          warn_text = warn_text
        )
      },
      warning = function(w) {
        # If a warning is detected it is handled as "w".
        warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w))

        # A warning is sometimes expected, then, we want to restart
        # the execution while ignoring the warning.
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )

  return(y)
}

#' Fit a Cox regression model and ANOVA
#'
#' The functions derives the effect p-values using [car::Anova()] from [survival::coxph()] results.
#'
#' @inheritParams t_coxreg
#'
#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and
#'   `aov` (result of [car::Anova()]).
#'
#' @noRd
fit_n_aov <- function(formula,
                      data = data,
                      conf_level = conf_level,
                      pval_method = c("wald", "likelihood"),
                      ...) {
  pval_method <- match.arg(pval_method)

  environment(formula) <- environment()
  suppressWarnings({
    # We expect some warnings due to coxph which fails strict programming.
    mod <- survival::coxph(formula, data = data, ...)
    msum <- summary(mod, conf.int = conf_level)
  })

  aov <- try_car_anova(
    mod,
    test.statistic = switch(pval_method,
      "wald" = "Wald",
      "likelihood" = "LR"
    )
  )

  warn_attr <- aov$warn_text
  if (!is.null(aov$warn_text)) message(warn_attr)

  aov <- aov$aov
  y <- list(mod = mod, msum = msum, aov = aov)
  attr(y, "message") <- warn_attr

  return(y)
}

# argument_checks
check_formula <- function(formula) {
  if (!(inherits(formula, "formula"))) {
    stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.")
  }

  invisible()
}

check_covariate_formulas <- function(covariates) {
  if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) || is.null(covariates)) {
    stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).")
  }

  invisible()
}

name_covariate_names <- function(covariates) {
  miss_names <- names(covariates) == ""
  no_names <- is.null(names(covariates))
  if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name")
  if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
  return(covariates)
}

check_increments <- function(increments, covariates) {
  if (!is.null(increments)) {
    covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
    lapply(
      X = names(increments), FUN = function(x) {
        if (!x %in% covariates) {
          warning(
            paste(
              "Check `increments`, the `increment` for ", x,
              "doesn't match any names in investigated covariate(s)."
            )
          )
        }
      }
    )
  }

  invisible()
}

#' Multivariate Cox model - summarized results
#'
#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or
#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually
#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the
#' covariates included in the model.
#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the
#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis,
#' the p.values need to be interpreted with caution. (**Statistical Analysis of Clinical Trials Data with R**,
#' `NEST's bookdown`)
#'
#' @param formula (`formula`)\cr a formula corresponding to the investigated [survival::Surv()] survival model
#'   including covariates.
#' @param data (`data.frame`)\cr a data frame which includes the variable in formula and covariates.
#' @param conf_level (`proportion`)\cr the confidence level for the hazard ratio interval estimations. Default is 0.95.
#' @param pval_method (`string`)\cr the method used for the estimation of p-values, should be one of
#'   `"wald"` (default) or `"likelihood"`.
#' @param ... optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the
#'   method for tie handling, one of `exact` (default), `efron`, `breslow`.
#'
#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`.
#'
#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms
#'   but is out of scope as defined by the  Global Data Standards Repository
#'   (**`GDS_Standard_TLG_Specs_Tables_2.doc`**).
#'
#' @seealso [estimate_coef()].
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered
#' adtte_f <- filter(
#'   adtte_f,
#'   PARAMCD == "OS" &
#'     SEX %in% c("F", "M") &
#'     RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
#' )
#' adtte_f$SEX <- droplevels(adtte_f$SEX)
#' adtte_f$RACE <- droplevels(adtte_f$RACE)
#'
#' @keywords internal
s_cox_multivariate <- function(formula, data,
                               conf_level = 0.95,
                               pval_method = c("wald", "likelihood"),
                               ...) {
  tf <- stats::terms(formula, specials = c("strata"))
  covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))]
  lapply(
    X = covariates,
    FUN = function(x) {
      if (is.character(data[[x]])) {
        data[[x]] <<- as.factor(data[[x]])
      }
      invisible()
    }
  )
  pval_method <- match.arg(pval_method)

  # Results directly exported from environment(fit_n_aov) to environment(s_function_draft)
  y <- fit_n_aov(
    formula = formula,
    data = data,
    conf_level = conf_level,
    pval_method = pval_method,
    ...
  )
  mod <- y$mod
  aov <- y$aov
  msum <- y$msum
  list2env(as.list(y), environment())

  all_term_labs <- attr(mod$terms, "term.labels")
  term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)]
  names(term_labs) <- term_labs

  coef_inter <- NULL
  if (any(attr(mod$terms, "order") > 1)) {
    for_inter <- all_term_labs[attr(mod$terms, "order") > 1]
    names(for_inter) <- for_inter
    mmat <- stats::model.matrix(mod)[1, ]
    mmat[!mmat == 0] <- 0
    mcoef <- stats::coef(mod)
    mvcov <- stats::vcov(mod)

    estimate_coef_local <- function(variable, given) {
      estimate_coef(
        variable, given,
        coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level,
        lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]])
      )
    }

    coef_inter <- lapply(
      for_inter, function(x) {
        y <- attr(mod$terms, "factors")[, x]
        y <- names(y[y > 0])
        Map(estimate_coef_local, variable = y, given = rev(y))
      }
    )
  }

  list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter)
}

#' Helper functions for tabulating binary response by subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that tabulate in a data frame statistics such as response rate
#' and odds ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param arm (`factor`)\cr the treatment group variable.
#'
#' @details Main functionality is to prepare data for use in a layout-creating function.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' @name h_response_subgroups
NULL

#' @describeIn h_response_subgroups Helper to prepare a data frame of binary responses by arm.
#'
#' @return
#' * `h_proportion_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, and `prop`.
#'
#' @examples
#' h_proportion_df(
#'   c(TRUE, FALSE, FALSE),
#'   arm = factor(c("A", "A", "B"), levels = c("A", "B"))
#' )
#'
#' @export
h_proportion_df <- function(rsp, arm) {
  checkmate::assert_logical(rsp)
  assert_valid_factor(arm, len = length(rsp))
  non_missing_rsp <- !is.na(rsp)
  rsp <- rsp[non_missing_rsp]
  arm <- arm[non_missing_rsp]

  lst_rsp <- split(rsp, arm)
  lst_results <- Map(function(x, arm) {
    if (length(x) > 0) {
      s_prop <- s_proportion(df = x)
      data.frame(
        arm = arm,
        n = length(x),
        n_rsp = unname(s_prop$n_prop[1]),
        prop = unname(s_prop$n_prop[2]),
        stringsAsFactors = FALSE
      )
    } else {
      data.frame(
        arm = arm,
        n = 0L,
        n_rsp = NA,
        prop = NA,
        stringsAsFactors = FALSE
      )
    }
  }, lst_rsp, names(lst_rsp))

  df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
  df$arm <- factor(df$arm, levels = levels(arm))
  df
}

#' @describeIn h_response_subgroups Summarizes proportion of binary responses by arm and across subgroups
#'    in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
#'    requires elements `rsp`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
#'    groupings for `subgroups` variables.
#'
#' @return
#' * `h_proportion_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`,
#'   `var`, `var_label`, and `row_type`.
#'
#' @examples
#' h_proportion_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#'
#' # Define groupings for BMRKR2 levels.
#' h_proportion_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_proportion_subgroups_df <- function(variables,
                                      data,
                                      groups_lists = list(),
                                      label_all = "All Patients") {
  checkmate::assert_character(variables$rsp)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_proportion_df(data[[variables$rsp]], data[[variables$arm]])
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      result <- h_proportion_df(grp$df[[variables$rsp]], grp$df[[variables$arm]])
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn h_response_subgroups Helper to prepare a data frame with estimates of
#'   the odds ratio between a treatment and a control arm.
#'
#' @inheritParams response_subgroups
#' @param strata_data (`factor`, `data.frame`, or `NULL`)\cr required if stratified analysis is performed.
#'
#' @return
#' * `h_odds_ratio_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, and
#'   optionally `pval` and `pval_label`.
#'
#' @examples
#' # Unstratatified analysis.
#' h_odds_ratio_df(
#'   c(TRUE, FALSE, FALSE, TRUE),
#'   arm = factor(c("A", "A", "B", "B"), levels = c("A", "B"))
#' )
#'
#' # Include p-value.
#' h_odds_ratio_df(adrs_f$rsp, adrs_f$ARM, method = "chisq")
#'
#' # Stratatified analysis.
#' h_odds_ratio_df(
#'   rsp = adrs_f$rsp,
#'   arm = adrs_f$ARM,
#'   strata_data = adrs_f[, c("STRATA1", "STRATA2")],
#'   method = "cmh"
#' )
#'
#' @export
h_odds_ratio_df <- function(rsp, arm, strata_data = NULL, conf_level = 0.95, method = NULL) {
  assert_valid_factor(arm, n.levels = 2, len = length(rsp))

  df_rsp <- data.frame(
    rsp = rsp,
    arm = arm
  )

  if (!is.null(strata_data)) {
    strata_var <- interaction(strata_data, drop = TRUE)
    strata_name <- "strata"

    assert_valid_factor(strata_var, len = nrow(df_rsp))

    df_rsp[[strata_name]] <- strata_var
  } else {
    strata_name <- NULL
  }

  l_df <- split(df_rsp, arm)

  if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
    # Odds ratio and CI.
    result_odds_ratio <- s_odds_ratio(
      df = l_df[[2]],
      .var = "rsp",
      .ref_group = l_df[[1]],
      .in_ref_col = FALSE,
      .df_row = df_rsp,
      variables = list(arm = "arm", strata = strata_name),
      conf_level = conf_level
    )

    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = unname(result_odds_ratio$n_tot["n_tot"]),
      or = unname(result_odds_ratio$or_ci["est"]),
      lcl = unname(result_odds_ratio$or_ci["lcl"]),
      ucl = unname(result_odds_ratio$or_ci["ucl"]),
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )

    if (!is.null(method)) {
      # Test for difference.
      result_test <- s_test_proportion_diff(
        df = l_df[[2]],
        .var = "rsp",
        .ref_group = l_df[[1]],
        .in_ref_col = FALSE,
        variables = list(strata = strata_name),
        method = method
      )

      df$pval <- as.numeric(result_test$pval)
      df$pval_label <- obj_label(result_test$pval)
    }

    # In those cases cannot go through the model so will obtain n_tot from data.
  } else if (
    (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) ||
      (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
  ) {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = sum(stats::complete.cases(df_rsp)),
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )
    if (!is.null(method)) {
      df$pval <- NA
      df$pval_label <- NA
    }
  } else {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = 0L,
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )

    if (!is.null(method)) {
      df$pval <- NA
      df$pval_label <- NA
    }
  }

  df
}

#' @describeIn h_response_subgroups Summarizes estimates of the odds ratio between a treatment and a control
#'   arm across subgroups in a data frame. `variables` corresponds to the names of variables found in
#'   `data`, passed as a named list and requires elements `rsp`, `arm` and optionally `subgroups`
#'   and `strata`. `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @return
#' * `h_odds_ratio_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`,
#'   `conf_level`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Unstratified analysis.
#' h_odds_ratio_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#'
#' # Stratified analysis.
#' h_odds_ratio_subgroups_df(
#'   variables = list(
#'     rsp = "rsp",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2"),
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adrs_f
#' )
#'
#' # Define groupings of BMRKR2 levels.
#' h_odds_ratio_subgroups_df(
#'   variables = list(
#'     rsp = "rsp",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_odds_ratio_subgroups_df <- function(variables,
                                      data,
                                      groups_lists = list(),
                                      conf_level = 0.95,
                                      method = NULL,
                                      label_all = "All Patients") {
  if ("strat" %in% names(variables)) {
    warning(
      "Warning: the `strat` element name of the `variables` list argument to `h_odds_ratio_subgroups_df() ",
      "was deprecated in tern 0.9.4.\n  ",
      "Please use the name `strata` instead of `strat` in the `variables` argument."
    )
    variables[["strata"]] <- variables[["strat"]]
  }

  checkmate::assert_character(variables$rsp)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_character(variables$strata, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  strata_data <- if (is.null(variables$strata)) {
    NULL
  } else {
    data[, variables$strata, drop = FALSE]
  }

  # Add All Patients.
  result_all <- h_odds_ratio_df(
    rsp = data[[variables$rsp]],
    arm = data[[variables$arm]],
    strata_data = strata_data,
    conf_level = conf_level,
    method = method
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      grp_strata_data <- if (is.null(variables$strata)) {
        NULL
      } else {
        grp$df[, variables$strata, drop = FALSE]
      }

      result <- h_odds_ratio_df(
        rsp = grp$df[[variables$rsp]],
        arm = grp$df[[variables$arm]],
        strata_data = grp_strata_data,
        conf_level = conf_level,
        method = method
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })

    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Count patients with marked laboratory abnormalities
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_abnormal_by_marked()] creates a layout element to count patients with marked laboratory
#' abnormalities for each direction of abnormality, categorized by parameter value.
#'
#' This function analyzes primary analysis variable `var` which indicates whether a single, replicated,
#' or last marked laboratory abnormality was observed. Levels of `var` to include for each marked lab
#' abnormality (`single` and `last_replicated`) can be supplied via the `category` parameter. Additional
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults
#' to `USUBJID`), a variable to indicate unique subject identifiers, `param` (defaults to `PARAM`), a
#' variable to indicate parameter values, and `direction` (defaults to `abn_dir`), a variable to indicate
#' abnormality directions.
#'
#' For each combination of `param` and `direction` levels, marked lab abnormality counts are calculated
#' as follows:
#'   * `Single, not last` & `Last or replicated`: The number of patients with `Single, not last`
#'     and `Last or replicated` values, respectively.
#'   * `Any`: The number of patients with either single or replicated marked abnormalities.
#'
#' Fractions are calculated by dividing the above counts by the number of patients with at least one
#' valid measurement recorded during the analysis.
#'
#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create two
#' row splits, one on variable `param` and one on variable `direction`.
#'
#' @inheritParams argument_convention
#' @param category (`list`)\cr a list with different marked category names for single
#'   and last or replicated.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("abnormal_by_marked"), type = "sh")``
#'
#' @note `Single, not last` and `Last or replicated` levels are mutually exclusive. If a patient has
#'   abnormalities that meet both the `Single, not last` and `Last or replicated` criteria, then the
#'   patient will be counted only under the `Last or replicated` category.
#'
#' @name abnormal_by_marked
#' @order 1
NULL

#' @describeIn abnormal_by_marked Statistics function for patients with marked lab abnormalities.
#'
#' @return
#' * `s_count_abnormal_by_marked()` returns statistic `count_fraction` with `Single, not last`,
#'   `Last or replicated`, and `Any` results.
#'
#' @keywords internal
s_count_abnormal_by_marked <- function(df,
                                       .var = "AVALCAT1",
                                       .spl_context,
                                       category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")),
                                       variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir"),
                                       ...) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_list(category)
  checkmate::assert_subset(names(category), c("single", "last_replicated"))
  checkmate::assert_subset(names(variables), c("id", "param", "direction"))
  checkmate::assert_vector(unique(df[[variables$direction]]), max.len = 1)

  assert_df_with_variables(df, c(aval = .var, variables))
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))


  first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
  # Patients in the denominator have at least one post-baseline visit.
  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
  # Some subjects may have a record for high and low directions but
  # should be counted only once.
  denom <- length(unique(subj_cur_col))

  if (denom != 0) {
    subjects_last_replicated <- unique(
      df[df[[.var]] %in% category[["last_replicated"]], variables$id, drop = TRUE]
    )
    subjects_single <- unique(
      df[df[[.var]] %in% category[["single"]], variables$id, drop = TRUE]
    )
    # Subjects who have both single and last/replicated abnormalities are counted in only the last/replicated group.
    subjects_single <- setdiff(subjects_single, subjects_last_replicated)
    n_single <- length(subjects_single)
    n_last_replicated <- length(subjects_last_replicated)
    n_any <- n_single + n_last_replicated
    result <- list(count_fraction = list(
      "Single, not last" = c(n_single, n_single / denom),
      "Last or replicated" = c(n_last_replicated, n_last_replicated / denom),
      "Any Abnormality" = c(n_any, n_any / denom)
    ))
  } else {
    result <- list(count_fraction = list(
      "Single, not last" = c(0, 0),
      "Last or replicated" = c(0, 0),
      "Any Abnormality" = c(0, 0)
    ))
  }

  result
}

#' @describeIn abnormal_by_marked Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_marked()`.
#'
#' @return
#' * `a_count_abnormal_by_marked()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal_by_marked <- function(df,
                                       ...,
                                       .stats = NULL,
                                       .stat_names = NULL,
                                       .formats = NULL,
                                       .labels = NULL,
                                       .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_abnormal_by_marked,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("abnormal_by_marked", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  levels_per_stats <- lapply(x_stats, names)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- x_stats[.stats] %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn abnormal_by_marked Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_marked()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_marked()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(rep(1, 5), rep(2, 5), rep(1, 5), rep(2, 5))),
#'   ARMCD = factor(c(rep("ARM A", 5), rep("ARM B", 5), rep("ARM A", 5), rep("ARM B", 5))),
#'   ANRIND = factor(c(
#'     "NORMAL", "HIGH", "HIGH", "HIGH HIGH", "HIGH",
#'     "HIGH", "HIGH", "HIGH HIGH", "NORMAL", "HIGH HIGH", "NORMAL", "LOW", "LOW", "LOW LOW", "LOW",
#'     "LOW", "LOW", "LOW LOW", "NORMAL", "LOW LOW"
#'   )),
#'   ONTRTFL = rep(c("", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), 2),
#'   PARAMCD = factor(c(rep("CRP", 10), rep("ALT", 10))),
#'   AVALCAT1 = factor(rep(c("", "", "", "SINGLE", "REPLICATED", "", "", "LAST", "", "SINGLE"), 2)),
#'   stringsAsFactors = FALSE
#' )
#'
#' df <- df %>%
#'   mutate(abn_dir = factor(
#'     case_when(
#'       ANRIND == "LOW LOW" ~ "Low",
#'       ANRIND == "HIGH HIGH" ~ "High",
#'       TRUE ~ ""
#'     ),
#'     levels = c("Low", "High")
#'   ))
#'
#' # Select only post-baseline records.
#' df <- df %>% filter(ONTRTFL == "Y")
#' df_crp <- df %>%
#'   filter(PARAMCD == "CRP") %>%
#'   droplevels()
#' full_parent_df <- list(df_crp, "not_needed")
#' cur_col_subset <- list(rep(TRUE, nrow(df_crp)), "not_needed")
#' spl_context <- data.frame(
#'   split = c("PARAMCD", "GRADE_DIR"),
#'   full_parent_df = I(full_parent_df),
#'   cur_col_subset = I(cur_col_subset)
#' )
#'
#' map <- unique(
#'   df[df$abn_dir %in% c("Low", "High") & df$AVALCAT1 != "", c("PARAMCD", "abn_dir")]
#' ) %>%
#'   lapply(as.character) %>%
#'   as.data.frame() %>%
#'   arrange(PARAMCD, abn_dir)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAMCD") %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = "unique_count"
#'   ) %>%
#'   split_rows_by(
#'     "abn_dir",
#'     split_fun = trim_levels_to_map(map)
#'   ) %>%
#'   count_abnormal_by_marked(
#'     var = "AVALCAT1",
#'     variables = list(
#'       id = "USUBJID",
#'       param = "PARAMCD",
#'       direction = "abn_dir"
#'     )
#'   ) %>%
#'   build_table(df = df)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAMCD") %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = "unique_count"
#'   ) %>%
#'   split_rows_by(
#'     "abn_dir",
#'     split_fun = trim_levels_in_group("abn_dir")
#'   ) %>%
#'   count_abnormal_by_marked(
#'     var = "AVALCAT1",
#'     variables = list(
#'       id = "USUBJID",
#'       param = "PARAMCD",
#'       direction = "abn_dir"
#'     )
#'   ) %>%
#'   build_table(df = df)
#'
#' @export
#' @order 2
count_abnormal_by_marked <- function(lyt,
                                     var,
                                     category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")),
                                     variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir"),
                                     na_str = default_na_str(),
                                     nested = TRUE,
                                     ...,
                                     .stats = "count_fraction",
                                     .stat_names = NULL,
                                     .formats = list(count_fraction = format_count_fraction),
                                     .labels = NULL,
                                     .indent_mods = NULL) {
  checkmate::assert_string(var)

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(extra_args, "category" = list(category), "variables" = list(variables), ...)

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_count_abnormal_by_marked) <- c(
    formals(a_count_abnormal_by_marked), extra_args[[".additional_fun_parameters"]]
  )

  analyze(
    lyt = lyt,
    vars = var,
    afun = a_count_abnormal_by_marked,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = "hidden"
  )
}

#' Survival time analysis
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [surv_time()] creates a layout element to analyze survival time by calculating survival time
#' median, median confidence interval, quantiles, and range (for all, censored, or event patients). The primary
#' analysis variable `vars` is the time variable and the secondary analysis variable `is_event` indicates whether or
#' not an event has occurred.
#'
#' @inheritParams argument_convention
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_surv_time()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival time.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", or "log-log",
#'     see more in [survival::survfit()]. Note option "none" is not supported.
#'   * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time.
#' @param ref_fn_censor (`flag`)\cr whether referential footnotes indicating censored observations should be printed
#'   when the `range` statistic is included.
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("surv_time"), type = "sh")``
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#' df <- adtte_f %>% filter(ARMCD == "ARM A")
#'
#' @name survival_time
#' @order 1
NULL

#' @describeIn survival_time Statistics function which analyzes survival times.
#'
#' @return
#' * `s_surv_time()` returns the statistics:
#'   * `median`: Median survival time.
#'   * `median_ci`: Confidence interval for median time.
#'   * `median_ci_3d`: Median with confidence interval for median time.
#'   * `quantiles`: Survival time for two specified quantiles.
#'   * `quantiles_lower`: quantile with confidence interval for the first specified quantile.
#'   * `quantiles_upper`: quantile with confidence interval for the second specified quantile.
#'   * `range_censor`: Survival time range for censored observations.
#'   * `range_event`: Survival time range for observations with events.
#'   * `range`: Survival time range for all observations.
#'
#' @keywords internal
s_surv_time <- function(df,
                        .var,
                        ...,
                        is_event,
                        control = control_surv_time()) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level
  quantiles <- control$quantiles

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = conf_level,
    conf.type = conf_type
  )
  srv_tab <- summary(srv_fit, extend = TRUE)$table
  srv_qt_tab_pre <- stats::quantile(srv_fit, probs = quantiles)
  srv_qt_tab <- srv_qt_tab_pre$quantile
  range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE)
  range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE)
  range <- range_noinf(df[[.var]], na.rm = TRUE)

  names(quantiles) <- as.character(100 * quantiles)
  srv_qt_tab_pre <- unlist(srv_qt_tab_pre)
  srv_qt_ci <- lapply(quantiles, function(x) {
    name <- as.character(100 * x)

    c(
      srv_qt_tab_pre[[paste0("quantile.", name)]],
      srv_qt_tab_pre[[paste0("lower.", name)]],
      srv_qt_tab_pre[[paste0("upper.", name)]]
    )
  })

  list(
    median = formatters::with_label(unname(srv_tab["median"]), "Median"),
    median_ci = formatters::with_label(
      unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))]), f_conf_level(conf_level)
    ),
    quantiles = formatters::with_label(
      unname(srv_qt_tab), paste0(quantiles[1] * 100, "% and ", quantiles[2] * 100, "%-ile")
    ),
    range_censor = formatters::with_label(range_censor, "Range (censored)"),
    range_event = formatters::with_label(range_event, "Range (event)"),
    range = formatters::with_label(range, "Range"),
    median_ci_3d = formatters::with_label(
      c(
        unname(srv_tab["median"]),
        unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))])
      ),
      paste0("Median (", f_conf_level(conf_level), ")")
    ),
    quantiles_lower = formatters::with_label(
      unname(srv_qt_ci[[1]]), paste0(quantiles[1] * 100, "%-ile (", f_conf_level(conf_level), ")")
    ),
    quantiles_upper = formatters::with_label(
      unname(srv_qt_ci[[2]]), paste0(quantiles[2] * 100, "%-ile (", f_conf_level(conf_level), ")")
    )
  )
}

#' @describeIn survival_time Formatted analysis function which is used as `afun` in `surv_time()`.
#'
#' @return
#' * `a_surv_time()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_surv_time(
#'   df,
#'   .df_row = df,
#'   .var = "AVAL",
#'   is_event = "is_event"
#' )
#'
#' @export
a_surv_time <- function(df,
                        labelstr = "",
                        ...,
                        .stats = NULL,
                        .stat_names = NULL,
                        .formats = NULL,
                        .labels = NULL,
                        .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Main statistic calculations
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_surv_time,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      labelstr = list(labelstr),
      extra_afun_params,
      dots_extra_args
    )
  )

  rng_censor_lwr <- x_stats[["range_censor"]][1]
  rng_censor_upr <- x_stats[["range_censor"]][2]

  # Fill in formatting defaults
  .stats <- get_stats("surv_time", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(.stats, .labels) %>% labels_use_control(dots_extra_args$control)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  x_stats <- x_stats[.stats]

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  # Get cell footnotes
  cell_fns <- stats::setNames(vector("list", length = length(x_stats)), .labels)
  if ("range" %in% names(x_stats) && "ref_fn_censor" %in% names(dots_extra_args) && dots_extra_args$ref_fn_censor) {
    if (identical(x_stats[["range"]][1], rng_censor_lwr) && identical(x_stats[["range"]][2], rng_censor_upr)) {
      cell_fns[[.labels[["range"]]]] <- "Censored observations: range minimum & maximum"
    } else if (identical(x_stats[["range"]][1], rng_censor_lwr)) {
      cell_fns[[.labels[["range"]]]] <- "Censored observation: range minimum"
    } else if (identical(x_stats[["range"]][2], rng_censor_upr)) {
      cell_fns[[.labels[["range"]]]] <- "Censored observation: range maximum"
    }
  }

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls(),
    .cell_footnotes = cell_fns
  )
}

#' @describeIn survival_time Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `surv_time()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_surv_time()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD") %>%
#'   add_colcounts() %>%
#'   surv_time(
#'     vars = "AVAL",
#'     var_labels = "Survival Time (Months)",
#'     is_event = "is_event",
#'     control = control_surv_time(conf_level = 0.9, conf_type = "log-log")
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
#' @order 2
surv_time <- function(lyt,
                      vars,
                      is_event,
                      control = control_surv_time(),
                      ref_fn_censor = TRUE,
                      na_str = default_na_str(),
                      nested = TRUE,
                      ...,
                      var_labels = "Time to Event",
                      show_labels = "visible",
                      table_names = vars,
                      .stats = c("median", "median_ci", "quantiles", "range"),
                      .stat_names = NULL,
                      .formats = list(
                        median_ci = "(xx.x, xx.x)", quantiles = "xx.x, xx.x", range = "xx.x to xx.x",
                        quantiles_lower = "xx.x (xx.x - xx.x)", quantiles_upper = "xx.x (xx.x - xx.x)",
                        median_ci_3d = "xx.x (xx.x - xx.x)"
                      ),
                      .labels = list(median_ci = "95% CI", range = "Range"),
                      .indent_mods = list(median_ci = 1L)) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    is_event = is_event, control = list(control), ref_fn_censor = ref_fn_censor,
    ...
  )

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_surv_time) <- c(formals(a_surv_time), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = vars,
    afun = a_surv_time,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )
}

#' Encode categorical missing values in a data frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function to encode missing entries across groups of categorical
#' variables in a data frame.
#'
#' @details Missing entries are those with `NA` or empty strings and will
#'   be replaced with a specified value. If factor variables include missing
#'   values, the missing value will be inserted as the last level.
#'   Similarly, in case character or logical variables should be converted to factors
#'   with the `char_as_factor` or `logical_as_factor` options, the missing values will
#'   be set as the last level.
#'
#' @param data (`data.frame`)\cr data set.
#' @param omit_columns (`character`)\cr names of variables from `data` that should
#'   not be modified by this function.
#' @param char_as_factor (`flag`)\cr whether to convert character variables
#'   in `data` to factors.
#' @param logical_as_factor (`flag`)\cr whether to convert logical variables
#'   in `data` to factors.
#' @param na_level (`string`)\cr string used to replace all `NA` or empty
#'   values inside non-`omit_columns` columns.
#'
#' @return A `data.frame` with the chosen modifications applied.
#'
#' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions.
#'
#' @examples
#' my_data <- data.frame(
#'   u = c(TRUE, FALSE, NA, TRUE),
#'   v = factor(c("A", NA, NA, NA), levels = c("Z", "A")),
#'   w = c("A", "B", NA, "C"),
#'   x = c("D", "E", "F", NA),
#'   y = c("G", "H", "I", ""),
#'   z = c(1, 2, 3, 4),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Example 1
#' # Encode missing values in all character or factor columns.
#' df_explicit_na(my_data)
#' # Also convert logical columns to factor columns.
#' df_explicit_na(my_data, logical_as_factor = TRUE)
#' # Encode missing values in a subset of columns.
#' df_explicit_na(my_data, omit_columns = c("x", "y"))
#'
#' # Example 2
#' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable.
#' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not
#' # included when generating `rtables`.
#' adsl <- tern_ex_adsl
#' adsl$SEX[adsl$SEX == "M"] <- NA
#' adsl <- df_explicit_na(adsl)
#'
#' # If you want the `Na` values to be displayed in the table use the `na_level` argument.
#' adsl <- tern_ex_adsl
#' adsl$SEX[adsl$SEX == "M"] <- NA
#' adsl <- df_explicit_na(adsl, na_level = "Missing Values")
#'
#' # Example 3
#' # Numeric variables that have missing values are not altered. This means that any `NA` value in
#' # a numeric variable will not be included in the summary statistics, nor will they be included
#' # in the denominator value for calculating the percent values.
#' adsl <- tern_ex_adsl
#' adsl$AGE[adsl$AGE < 30] <- NA
#' adsl <- df_explicit_na(adsl)
#'
#' @export
df_explicit_na <- function(data,
                           omit_columns = NULL,
                           char_as_factor = TRUE,
                           logical_as_factor = FALSE,
                           na_level = "<Missing>") {
  checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE)
  checkmate::assert_data_frame(data)
  checkmate::assert_flag(char_as_factor)
  checkmate::assert_flag(logical_as_factor)
  checkmate::assert_string(na_level)

  target_vars <- if (is.null(omit_columns)) {
    names(data)
  } else {
    setdiff(names(data), omit_columns) # May have duplicates.
  }
  if (length(target_vars) == 0) {
    return(data)
  }

  l_target_vars <- split(target_vars, target_vars)

  # Makes sure target_vars exist in data and names are not duplicated.
  assert_df_with_variables(data, l_target_vars)

  for (x in target_vars) {
    xi <- data[[x]]
    xi_label <- obj_label(xi)

    # Determine whether to convert character or logical input.
    do_char_conversion <- is.character(xi) && char_as_factor
    do_logical_conversion <- is.logical(xi) && logical_as_factor

    # Pre-convert logical to character to deal correctly with replacing NA
    # values below.
    if (do_logical_conversion) {
      xi <- as.character(xi)
    }

    if (is.factor(xi) || is.character(xi)) {
      # Handle empty strings and NA values.
      xi <- explicit_na(sas_na(xi), label = na_level)

      # Convert to factors if requested for the original type,
      # set na_level as the last value.
      if (do_char_conversion || do_logical_conversion) {
        levels_xi <- setdiff(sort(unique(xi)), na_level)
        if (na_level %in% unique(xi)) {
          levels_xi <- c(levels_xi, na_level)
        }

        xi <- factor(xi, levels = levels_xi)
      }

      data[, x] <- formatters::with_label(xi, label = xi_label)
    }
  }
  return(data)
}

#' Helper functions for accessing information from `rtables`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' These are a couple of functions that help with accessing the data in `rtables` objects.
#' Currently these work for occurrence tables, which are defined as having a count as the first
#' element and a fraction as the second element in each cell.
#'
#' @seealso [prune_occurrences] for usage of these functions.
#'
#' @name rtables_access
NULL

#' @describeIn rtables_access Helper function to extract the first values from each content
#'   cell and from specified columns in a `TableRow`. Defaults to all columns.
#'
#' @param table_row (`TableRow`)\cr an analysis row in a occurrence table.
#' @param col_names (`character`)\cr the names of the columns to extract from.
#' @param col_indices (`integer`)\cr the indices of the columns to extract from. If `col_names` are provided,
#'   then these are inferred from the names of `table_row`. Note that this currently only works well with a single
#'   column split.
#'
#' @return
#' * `h_row_first_values()` returns a `vector` of numeric values.
#'
#' @examples
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("RACE") %>%
#'   analyze("AGE", function(x) {
#'     list(
#'       "mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.x (xx.x)"),
#'       "n" = length(x),
#'       "frac" = rcell(c(0.1, 0.1), format = "xx (xx)")
#'     )
#'   }) %>%
#'   build_table(tern_ex_adsl) %>%
#'   prune_table()
#' tree_row_elem <- collect_leaves(tbl[2, ])[[1]]
#' result <- max(h_row_first_values(tree_row_elem))
#' result
#'
#' @export
h_row_first_values <- function(table_row,
                               col_names = NULL,
                               col_indices = NULL) {
  col_indices <- check_names_indices(table_row, col_names, col_indices)
  checkmate::assert_integerish(col_indices)
  checkmate::assert_subset(col_indices, seq_len(ncol(table_row)))

  # Main values are extracted
  row_vals <- row_values(table_row)[col_indices]

  # Main return
  vapply(row_vals, function(rv) {
    if (is.null(rv)) {
      NA_real_
    } else {
      rv[1L]
    }
  }, FUN.VALUE = numeric(1))
}

#' @describeIn rtables_access Helper function that extracts row values and checks if they are
#'   convertible to integers (`integerish` values).
#'
#' @return
#' * `h_row_counts()` returns a `vector` of numeric values.
#'
#' @examples
#' # Row counts (integer values)
#' # h_row_counts(tree_row_elem) # Fails because there are no integers
#' # Using values with integers
#' tree_row_elem <- collect_leaves(tbl[3, ])[[1]]
#' result <- h_row_counts(tree_row_elem)
#' # result
#'
#' @export
h_row_counts <- function(table_row,
                         col_names = NULL,
                         col_indices = NULL) {
  counts <- h_row_first_values(table_row, col_names, col_indices)
  checkmate::assert_integerish(counts)
  counts
}

#' @describeIn rtables_access Helper function to extract fractions from specified columns in a `TableRow`.
#'   More specifically it extracts the second values from each content cell and checks it is a fraction.
#'
#' @return
#' * `h_row_fractions()` returns a `vector` of proportions.
#'
#' @examples
#' # Row fractions
#' tree_row_elem <- collect_leaves(tbl[4, ])[[1]]
#' h_row_fractions(tree_row_elem)
#'
#' @export
h_row_fractions <- function(table_row,
                            col_names = NULL,
                            col_indices = NULL) {
  col_indices <- check_names_indices(table_row, col_names, col_indices)
  row_vals <- row_values(table_row)[col_indices]
  fractions <- sapply(row_vals, "[", 2L)
  checkmate::assert_numeric(fractions, lower = 0, upper = 1)
  fractions
}

#' @describeIn rtables_access Helper function to extract column counts from specified columns in a table.
#'
#' @param table (`VTableNodeInfo`)\cr an occurrence table or row.
#'
#' @return
#' * `h_col_counts()` returns a `vector` of column counts.
#'
#' @export
h_col_counts <- function(table,
                         col_names = NULL,
                         col_indices = NULL) {
  col_indices <- check_names_indices(table, col_names, col_indices)
  counts <- col_counts(table)[col_indices]
  stats::setNames(counts, col_names)
}

#' @describeIn rtables_access Helper function to get first row of content table of current table.
#'
#' @return
#' * `h_content_first_row()` returns a row from an `rtables` table.
#'
#' @export
h_content_first_row <- function(table) {
  ct <- content_table(table)
  tree_children(ct)[[1]]
}

#' @describeIn rtables_access Helper function which says whether current table is a leaf in the tree.
#'
#' @return
#' * `is_leaf_table()` returns a `logical` value indicating whether current table is a leaf.
#'
#' @keywords internal
is_leaf_table <- function(table) {
  children <- tree_children(table)
  child_classes <- unique(sapply(children, class))
  identical(child_classes, "ElementaryTable")
}

#' @describeIn rtables_access Internal helper function that tests standard inputs for column indices.
#'
#' @return
#' * `check_names_indices` returns column indices.
#'
#' @keywords internal
check_names_indices <- function(table_row,
                                col_names = NULL,
                                col_indices = NULL) {
  if (!is.null(col_names)) {
    if (!is.null(col_indices)) {
      stop(
        "Inserted both col_names and col_indices when selecting row values. ",
        "Please choose one."
      )
    }
    col_indices <- h_col_indices(table_row, col_names)
  }
  if (is.null(col_indices)) {
    ll <- ifelse(is.null(ncol(table_row)), length(table_row), ncol(table_row))
    col_indices <- seq_len(ll)
  }

  return(col_indices)
}

#' Occurrence table pruning
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Family of constructor and condition functions to flexibly prune occurrence tables.
#' The condition functions always return whether the row result is higher than the threshold.
#' Since they are of class [CombinationFunction()] they can be logically combined with other condition
#' functions.
#'
#' @note Since most table specifications are worded positively, we name our constructor and condition
#'   functions positively, too. However, note that the result of [keep_rows()] says what
#'   should be pruned, to conform with the [rtables::prune_table()] interface.
#'
#' @examples
#' \donttest{
#' tab <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("RACE") %>%
#'   split_rows_by("STRATA1") %>%
#'   summarize_row_groups() %>%
#'   analyze_vars("COUNTRY", .stats = "count_fraction") %>%
#'   build_table(DM)
#' }
#'
#' @name prune_occurrences
NULL

#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#'   a row condition function. This removes all analysis rows (`TableRow`) that should be
#'   pruned, i.e., don't fulfill the row condition. It removes the sub-tree if there are no
#'   children left.
#'
#' @param row_condition (`CombinationFunction`)\cr condition function which works on individual
#'   analysis rows and flags whether these should be kept in the pruned table.
#'
#' @return
#' * `keep_rows()` returns a pruning function that can be used with [rtables::prune_table()]
#'   to prune an `rtables` table.
#'
#' @examples
#' \donttest{
#' # `keep_rows`
#' is_non_empty <- !CombinationFunction(all_zero_or_na)
#' prune_table(tab, keep_rows(is_non_empty))
#' }
#'
#' @export
keep_rows <- function(row_condition) {
  checkmate::assert_function(row_condition)
  function(table_tree) {
    if (inherits(table_tree, "TableRow")) {
      return(!row_condition(table_tree))
    }
    children <- tree_children(table_tree)
    identical(length(children), 0L)
  }
}

#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#'   a condition for the (first) content row in leaf tables. This removes all leaf tables where
#'   the first content row does not fulfill the condition. It does not check individual rows.
#'   It then proceeds recursively by removing the sub tree if there are no children left.
#'
#' @param content_row_condition (`CombinationFunction`)\cr condition function which works on individual
#'   first content rows of leaf tables and flags whether these leaf tables should be kept in the pruned table.
#'
#' @return
#' * `keep_content_rows()` returns a pruning function that checks the condition on the first content
#'   row of leaf tables in the table.
#'
#' @examples
#' # `keep_content_rows`
#' \donttest{
#' more_than_twenty <- has_count_in_cols(atleast = 20L, col_names = names(tab))
#' prune_table(tab, keep_content_rows(more_than_twenty))
#' }
#'
#' @export
keep_content_rows <- function(content_row_condition) {
  checkmate::assert_function(content_row_condition)
  function(table_tree) {
    if (is_leaf_table(table_tree)) {
      content_row <- h_content_first_row(table_tree)
      return(!content_row_condition(content_row))
    }
    if (inherits(table_tree, "DataRow")) {
      return(FALSE)
    }
    children <- tree_children(table_tree)
    identical(length(children), 0L)
  }
}

#' @describeIn prune_occurrences Constructor for creating condition functions on total counts in the specified columns.
#'
#' @param atleast (`numeric(1)`)\cr threshold which should be met in order to keep the row.
#' @param ... arguments for row or column access, see [`rtables_access`]: either `col_names` (`character`) including
#'   the names of the columns which should be used, or alternatively `col_indices` (`integer`) giving the indices
#'   directly instead.
#'
#' @return
#' * `has_count_in_cols()` returns a condition function that sums the counts in the specified column.
#'
#' @examples
#' \donttest{
#' more_than_one <- has_count_in_cols(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one))
#' }
#'
#' @export
has_count_in_cols <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    total_count <- sum(row_counts)
    total_count >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on any of the counts in
#'   the specified columns satisfying a threshold.
#'
#' @param atleast (`numeric(1)`)\cr threshold which should be met in order to keep the row.
#'
#' @return
#' * `has_count_in_any_col()` returns a condition function that compares the counts in the
#'   specified columns with the threshold.
#'
#' @examples
#' \donttest{
#' # `has_count_in_any_col`
#' any_more_than_one <- has_count_in_any_col(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(any_more_than_one))
#' }
#'
#' @export
has_count_in_any_col <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    any(row_counts >= atleast)
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on total fraction in
#'   the specified columns.
#'
#' @return
#' * `has_fraction_in_cols()` returns a condition function that sums the counts in the
#'   specified column, and computes the fraction by dividing by the total column counts.
#'
#' @examples
#' \donttest{
#' # `has_fraction_in_cols`
#' more_than_five_percent <- has_fraction_in_cols(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_cols <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    total_count <- sum(row_counts)
    col_counts <- h_col_counts(table_row, ...)
    total_n <- sum(col_counts)
    total_percent <- total_count / total_n
    total_percent >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on any fraction in
#'   the specified columns.
#'
#' @return
#' * `has_fraction_in_any_col()` returns a condition function that looks at the fractions
#'  in the specified columns and checks whether any of them fulfill the threshold.
#'
#' @examples
#' \donttest{
#' # `has_fraction_in_any_col`
#' any_atleast_five_percent <- has_fraction_in_any_col(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(any_atleast_five_percent))
#' }
#'
#' @export
has_fraction_in_any_col <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    row_fractions <- h_row_fractions(table_row, ...)
    any(row_fractions >= atleast)
  })
}

#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#'   between the fractions reported in each specified column.
#'
#' @return
#' * `has_fractions_difference()` returns a condition function that extracts the fractions of each
#'   specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' \donttest{
#' # `has_fractions_difference`
#' more_than_five_percent_diff <- has_fractions_difference(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent_diff))
#' }
#'
#' @export
has_fractions_difference <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    fractions <- h_row_fractions(table_row, ...)
    difference <- diff(range(fractions))
    difference >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#'   between the counts reported in each specified column.
#'
#' @return
#' * `has_counts_difference()` returns a condition function that extracts the counts of each
#'   specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' \donttest{
#' more_than_one_diff <- has_counts_difference(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one_diff))
#' }
#'
#' @export
has_counts_difference <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    counts <- h_row_counts(table_row, ...)
    difference <- diff(range(counts))
    difference >= atleast
  })
}

#' Count occurrences
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_occurrences()] creates a layout element to calculate occurrence counts for patients.
#'
#' This function analyzes the variable(s) supplied to `vars` and returns a table of occurrence counts for
#' each unique value (or level) of the variable(s). This variable (or variables) must be
#' non-numeric. The `id` variable is used to indicate unique subject identifiers (defaults to `USUBJID`).
#'
#' If there are multiple occurrences of the same value recorded for a patient, the value is only counted once.
#'
#' The summarize function [summarize_occurrences()] performs the same function as [count_occurrences()] except it
#' creates content rows, not data rows, to summarize the current table row/column context and operates on the level of
#' the latest row split or the root of the table if no row splits have occurred.
#'
#' @inheritParams argument_convention
#' @param drop (`flag`)\cr whether non-appearing occurrence levels should be dropped from the resulting table.
#'   Note that in that case the remaining occurrence levels in the table are sorted alphabetically.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("count_occurrences"), type = "sh")``
#'
#' @note By default, occurrences which don't appear in a given row split are dropped from the table and
#'   the occurrences in the table are sorted alphabetically per row split. Therefore, the corresponding layout
#'   needs to use `split_fun = drop_split_levels` in the `split_rows_by` calls. Use `drop = FALSE` if you would
#'   like to show all occurrences.
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(
#'   USUBJID = as.character(c(
#'     1, 1, 2, 4, 4, 4,
#'     6, 6, 6, 7, 7, 8
#'   )),
#'   MHDECOD = c(
#'     "MH1", "MH2", "MH1", "MH1", "MH1", "MH3",
#'     "MH2", "MH2", "MH3", "MH1", "MH2", "MH4"
#'   ),
#'   ARM = rep(c("A", "B"), each = 6),
#'   SEX = c("F", "F", "M", "M", "M", "M", "F", "F", "F", "M", "M", "F")
#' )
#' df_adsl <- df %>%
#'   select(USUBJID, ARM) %>%
#'   unique()
#'
#' @name count_occurrences
#' @order 1
NULL

#' @describeIn count_occurrences Statistics function which counts number of patients that report an
#' occurrence.
#'
#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
#'   * `N_col`: total number of patients in this column across rows.
#'   * `n`: number of patients with any occurrences.
#'   * `N_row`: total number of patients in this row across columns.
#'
#' @return
#' * `s_count_occurrences()` returns a list with:
#'   * `count`: list of counts with one element per occurrence.
#'   * `count_fraction`: list of counts and fractions with one element per occurrence.
#'   * `fraction`: list of numerators and denominators with one element per occurrence.
#'
#' @examples
#' # Count unique occurrences per subject.
#' s_count_occurrences(
#'   df,
#'   .N_col = 4L,
#'   .N_row = 4L,
#'   .df_row = df,
#'   .var = "MHDECOD",
#'   id = "USUBJID"
#' )
#'
#' @export
s_count_occurrences <- function(df,
                                .var = "MHDECOD",
                                .N_col, # nolint
                                .N_row, # nolint
                                .df_row,
                                ...,
                                drop = TRUE,
                                id = "USUBJID",
                                denom = c("N_col", "n", "N_row")) {
  checkmate::assert_flag(drop)
  assert_df_with_variables(df, list(range = .var, id = id))
  checkmate::assert_count(.N_col)
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[id]], classes = c("factor", "character"))

  occurrences <- if (drop) {
    # Note that we don't try to preserve original level order here since a) that would required
    # more time to look up in large original levels and b) that would fail for character input variable.
    occurrence_levels <- sort(unique(.df_row[[.var]]))
    if (length(occurrence_levels) == 0) {
      stop(
        "no empty `.df_row` input allowed when `drop = TRUE`,",
        " please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls"
      )
    }
    factor(df[[.var]], levels = occurrence_levels)
  } else {
    df[[.var]]
  }
  ids <- factor(df[[id]])
  denom <- match.arg(denom) %>%
    switch(
      n = nlevels(ids),
      N_row = .N_row,
      N_col = .N_col
    )
  has_occurrence_per_id <- table(occurrences, ids) > 0
  n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
  cur_count_fraction <- lapply(
    n_ids_per_occurrence,
    function(i, denom) {
      if (i == 0 && denom == 0) {
        c(0, 0)
      } else {
        c(i, i / denom)
      }
    },
    denom = denom
  )

  list(
    count = n_ids_per_occurrence,
    count_fraction = cur_count_fraction,
    count_fraction_fixed_dp = cur_count_fraction,
    fraction = lapply(
      n_ids_per_occurrence,
      function(i, denom) c("num" = i, "denom" = denom),
      denom = denom
    )
  )
}

#' @describeIn count_occurrences Formatted analysis function which is used as `afun`
#'   in `count_occurrences()`.
#'
#' @return
#' * `a_count_occurrences()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_count_occurrences(
#'   df,
#'   .N_col = 4L,
#'   .df_row = df,
#'   .var = "MHDECOD",
#'   id = "USUBJID"
#' )
#'
#' @export
a_count_occurrences <- function(df,
                                labelstr = "",
                                ...,
                                .stats = NULL,
                                .stat_names = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_occurrences,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # if empty, return NA
  if (is.null(unlist(x_stats))) {
    return(in_rows(.list = as.list(rep(NA, length(.stats))) %>% stats::setNames(.stats)))
  }

  # Fill in formatting defaults
  .stats <- get_stats("count_occurrences", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  x_stats <- x_stats[.stats]
  levels_per_stats <- lapply(x_stats, names)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- x_stats[.stats] %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn count_occurrences Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_occurrences()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_occurrences()` to the table layout.
#'
#' @examples
#' # Create table layout
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences(vars = "MHDECOD", .stats = c("count_fraction"))
#'
#' # Apply table layout to data and produce `rtable` object
#' tbl <- lyt %>%
#'   build_table(df, alt_counts_df = df_adsl) %>%
#'   prune_table()
#'
#' tbl
#'
#' @export
#' @order 2
count_occurrences <- function(lyt,
                              vars,
                              id = "USUBJID",
                              drop = TRUE,
                              var_labels = vars,
                              show_labels = "hidden",
                              riskdiff = FALSE,
                              na_str = default_na_str(),
                              nested = TRUE,
                              ...,
                              table_names = vars,
                              .stats = "count_fraction_fixed_dp",
                              .stat_names = NULL,
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  checkmate::assert_flag(riskdiff)
  afun <- if (isFALSE(riskdiff)) a_count_occurrences else afun_riskdiff

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    id = id, drop = drop,
    if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences" = a_count_occurrences)),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = vars,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' @describeIn count_occurrences Layout-creating function which can take content function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_occurrences()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_occurrences()` to the table layout.
#'
#' @examples
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("SEX", child_labels = "visible") %>%
#'   summarize_occurrences(
#'     var = "MHDECOD",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
#' @order 3
summarize_occurrences <- function(lyt,
                                  var,
                                  id = "USUBJID",
                                  drop = TRUE,
                                  riskdiff = FALSE,
                                  na_str = default_na_str(),
                                  ...,
                                  .stats = "count_fraction_fixed_dp",
                                  .stat_names = NULL,
                                  .formats = NULL,
                                  .indent_mods = 0L,
                                  .labels = NULL) {
  checkmate::assert_flag(riskdiff)
  afun <- if (isFALSE(riskdiff)) a_count_occurrences else afun_riskdiff

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (is.null(.indent_mods)) {
    indent_mod <- 0L
  } else if (length(.indent_mods) == 1) {
    indent_mod <- .indent_mods
  } else {
    indent_mod <- 0L
    extra_args[[".indent_mods"]] <- .indent_mods
  }

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    id = id, drop = drop,
    if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences" = a_count_occurrences)),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = afun,
    na_str = na_str,
    extra_args = extra_args,
    indent_mod = indent_mod
  )
}

#' Re-implemented `range()` default S3 method for numerical objects
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data
#' without any warnings.
#'
#' @param x (`numeric`)\cr a sequence of numbers for which the range is computed.
#' @param na.rm (`flag`)\cr flag indicating if `NA` should be omitted.
#' @param finite (`flag`)\cr flag indicating if non-finite elements should be removed.
#'
#' @return A 2-element vector of class `numeric`.
#'
#' @examples
#' x <- rnorm(20, 1)
#' range_noinf(x, na.rm = TRUE)
#' range_noinf(rep(NA, 20), na.rm = TRUE)
#' range(rep(NA, 20), na.rm = TRUE)
#'
#' @export
range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint

  checkmate::assert_numeric(x)

  if (finite) {
    x <- x[is.finite(x)] # removes NAs too
  } else if (na.rm) {
    x <- x[!is.na(x)]
  }

  if (length(x) == 0) {
    rval <- c(NA, NA)
    mode(rval) <- typeof(x)
  } else {
    rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE))
  }

  return(rval)
}

#' Utility function to create label for confidence interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#'
#' @return A `string`.
#'
#' @export
f_conf_level <- function(conf_level) {
  assert_proportion_value(conf_level)
  paste0(conf_level * 100, "% CI")
}

#' Utility function to create label for p-value
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param test_mean (`numeric(1)`)\cr mean value to test under the null hypothesis.
#'
#' @return A `string`.
#'
#' @export
f_pval <- function(test_mean) {
  checkmate::assert_numeric(test_mean, len = 1)
  paste0("p-value (H0: mean = ", test_mean, ")")
}

#' Utility function to return a named list of covariate names
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param covariates (`character`)\cr a vector that can contain single variable names (such as
#'   `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'
#' @return A named `list` of `character` vector.
#'
#' @examples
#' get_covariates(c("a * b", "c"))
#'
#' @export
get_covariates <- function(covariates) {
  checkmate::assert_character(covariates)
  cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*"))))
  stats::setNames(as.list(cov_vars), cov_vars)
}

#' Replicate entries of a vector if required
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Replicate entries of a vector if required.
#'
#' @inheritParams argument_convention
#' @param n (`integer(1)`)\cr number of entries that are needed.
#'
#' @return `x` if it has the required length already or is `NULL`,
#'   otherwise if it is scalar the replicated version of it with `n` entries.
#'
#' @note This function will fail if `x` is not of length `n` and/or is not a scalar.
#'
#' @export
to_n <- function(x, n) {
  if (is.null(x)) {
    NULL
  } else if (length(x) == 1) {
    rep(x, n)
  } else if (length(x) == n) {
    x
  } else {
    stop("dimension mismatch")
  }
}

#' Check element dimension
#'
#' Checks if the elements in `...` have the same dimension.
#'
#' @param ... (`data.frame` or `vector`)\cr any data frames or vectors.
#' @param omit_null (`flag`)\cr whether `NULL` elements in `...` should be omitted from the check.
#'
#' @return A `logical` value.
#'
#' @keywords internal
check_same_n <- function(..., omit_null = TRUE) {
  dots <- list(...)

  n_list <- Map(
    function(x, name) {
      if (is.null(x)) {
        if (omit_null) {
          NA_integer_
        } else {
          stop("arg", name, "is not supposed to be NULL")
        }
      } else if (is.data.frame(x)) {
        nrow(x)
      } else if (is.atomic(x)) {
        length(x)
      } else {
        stop("data structure for ", name, "is currently not supported")
      }
    },
    dots, names(dots)
  )

  n <- stats::na.omit(unlist(n_list))

  if (length(unique(n)) > 1) {
    sel <- which(n != n[1])
    stop("Dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1])
  }

  TRUE
}

#' Utility function to check if a float value is equal to another float value
#'
#' Uses `.Machine$double.eps` as the tolerance for the comparison.
#'
#' @param x (`numeric(1)`)\cr a float number.
#' @param y (`numeric(1)`)\cr a float number.
#'
#' @return `TRUE` if identical, otherwise `FALSE`.
#'
#' @keywords internal
.is_equal_float <- function(x, y) {
  checkmate::assert_number(x)
  checkmate::assert_number(y)

  # Define a tolerance
  tolerance <- .Machine$double.eps

  # Check if x is close enough to y
  abs(x - y) < tolerance
}

#' Make names without dots
#'
#' @param nams (`character`)\cr vector of original names.
#'
#' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()].
#'
#' @keywords internal
make_names <- function(nams) {
  orig <- make.names(nams)
  gsub(".", "", x = orig, fixed = TRUE)
}

#' Conversion of months to days
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Conversion of months to days. This is an approximative calculation because it
#' considers each month as having an average of 30.4375 days.
#'
#' @param x (`numeric(1)`)\cr time in months.
#'
#' @return A `numeric` vector with the time in days.
#'
#' @examples
#' x <- c(13.25, 8.15, 1, 2.834)
#' month2day(x)
#'
#' @export
month2day <- function(x) {
  checkmate::assert_numeric(x)
  x * 30.4375
}

#' Conversion of days to months
#'
#' @param x (`numeric(1)`)\cr time in days.
#'
#' @return A `numeric` vector with the time in months.
#'
#' @examples
#' x <- c(403, 248, 30, 86)
#' day2month(x)
#'
#' @export
day2month <- function(x) {
  checkmate::assert_numeric(x)
  x / 30.4375
}

#' Return an empty numeric if all elements are `NA`.
#'
#' @param x (`numeric`)\cr vector.
#'
#' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`.
#'
#' @examples
#' x <- c(NA, NA, NA)
#' # Internal function - empty_vector_if_na
#' @keywords internal
empty_vector_if_na <- function(x) {
  if (all(is.na(x))) {
    numeric()
  } else {
    x
  }
}

#' Element-wise combination of two vectors
#'
#' @param x (`vector`)\cr first vector to combine.
#' @param y (`vector`)\cr second vector to combine.
#'
#' @return A `list` where each element combines corresponding elements of `x` and `y`.
#'
#' @examples
#' combine_vectors(1:3, 4:6)
#'
#' @export
combine_vectors <- function(x, y) {
  checkmate::assert_vector(x)
  checkmate::assert_vector(y, len = length(x))

  result <- lapply(as.data.frame(rbind(x, y)), `c`)
  names(result) <- NULL
  result
}

#' Extract elements by name
#'
#' This utility function extracts elements from a vector `x` by `names`.
#' Differences to the standard `[` function are:
#'
#' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
#' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
#'   elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
#'
#' @param x (named `vector`)\cr where to extract named elements from.
#' @param names (`character`)\cr vector of names to extract.
#'
#' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
#'
#' @keywords internal
extract_by_name <- function(x, names) {
  if (is.null(x)) {
    return(NULL)
  }
  checkmate::assert_named(x)
  checkmate::assert_character(names)
  which_extract <- intersect(names(x), names)
  if (length(which_extract) > 0) {
    x[which_extract]
  } else {
    NULL
  }
}

#' Labels for adverse event baskets
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param aesi (`character`)\cr vector with standardized MedDRA query name (e.g. `SMQxxNAM`) or customized query
#'   name (e.g. `CQxxNAM`).
#' @param scope (`character`)\cr vector with scope of query (e.g. `SMQxxSC`).
#'
#' @return A `string` with the standard label for the AE basket.
#'
#' @examples
#' adae <- tern_ex_adae
#'
#' # Standardized query label includes scope.
#' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC)
#'
#' # Customized query label.
#' aesi_label(adae$CQ01NAM)
#'
#' @export
aesi_label <- function(aesi, scope = NULL) {
  checkmate::assert_character(aesi)
  checkmate::assert_character(scope, null.ok = TRUE)
  aesi_label <- obj_label(aesi)
  aesi <- sas_na(aesi)
  aesi <- unique(aesi)[!is.na(unique(aesi))]

  lbl <- if (length(aesi) == 1 && !is.null(scope)) {
    scope <- sas_na(scope)
    scope <- unique(scope)[!is.na(unique(scope))]
    checkmate::assert_string(scope)
    paste0(aesi, " (", scope, ")")
  } else if (length(aesi) == 1 && is.null(scope)) {
    aesi
  } else {
    aesi_label
  }

  lbl
}

#' Indicate study arm variable in formula
#'
#' We use `study_arm` to indicate the study arm variable in `tern` formulas.
#'
#' @param x arm information
#'
#' @return `x`
#'
#' @keywords internal
study_arm <- function(x) {
  structure(x, varname = deparse(substitute(x)))
}

#' Smooth function with optional grouping
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This produces `loess` smoothed estimates of `y` with Student confidence intervals.
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param x (`string`)\cr x column name.
#' @param y (`string`)\cr y column name.
#' @param groups (`character` or `NULL`)\cr vector with optional grouping variables names.
#' @param level (`proportion`)\cr level of confidence interval to use (0.95 by default).
#'
#' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and
#'   optional `groups` variables formatted as `factor` type.
#'
#' @export
get_smooths <- function(df, x, y, groups = NULL, level = 0.95) {
  checkmate::assert_data_frame(df)
  df_cols <- colnames(df)
  checkmate::assert_string(x)
  checkmate::assert_subset(x, df_cols)
  checkmate::assert_numeric(df[[x]])
  checkmate::assert_string(y)
  checkmate::assert_subset(y, df_cols)
  checkmate::assert_numeric(df[[y]])

  if (!is.null(groups)) {
    checkmate::assert_character(groups)
    checkmate::assert_subset(groups, df_cols)
  }

  smooths <- function(x, y) {
    stats::predict(stats::loess(y ~ x), se = TRUE)
  }

  if (!is.null(groups)) {
    cc <- stats::complete.cases(df[c(x, y, groups)])
    df_c <- df[cc, c(x, y, groups)]
    df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE]
    df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups]))

    df_smooth_raw <-
      by(df_c_ordered, df_c_g, function(d) {
        plx <- smooths(d[[x]], d[[y]])
        data.frame(
          x = d[[x]],
          y = plx$fit,
          ylow = plx$fit - stats::qt(level, plx$df) * plx$se.fit,
          yhigh = plx$fit + stats::qt(level, plx$df) * plx$se.fit
        )
      })

    df_smooth <- do.call(rbind, df_smooth_raw)
    df_smooth[groups] <- df_c_g

    df_smooth
  } else {
    cc <- stats::complete.cases(df[c(x, y)])
    df_c <- df[cc, ]
    plx <- smooths(df_c[[x]], df_c[[y]])

    df_smooth <- data.frame(
      x = df_c[[x]],
      y = plx$fit,
      ylow = plx$fit - stats::qt(level, plx$df) * plx$se.fit,
      yhigh = plx$fit + stats::qt(level, plx$df) * plx$se.fit
    )

    df_smooth
  }
}

#' Number of available (non-missing entries) in a vector
#'
#' Small utility function for better readability.
#'
#' @param x (`vector`)\cr vector in which to count non-missing values.
#'
#' @return Number of non-missing values.
#'
#' @keywords internal
n_available <- function(x) {
  sum(!is.na(x))
}

#' Reapply variable labels
#'
#' This is a helper function that is used in tests.
#'
#' @param x (`vector`)\cr vector of elements that needs new labels.
#' @param varlabels (`character`)\cr vector of labels for `x`.
#' @param ... further parameters to be added to the list.
#'
#' @return `x` with variable labels reapplied.
#'
#' @export
reapply_varlabels <- function(x, varlabels, ...) {
  named_labels <- c(as.list(varlabels), list(...))
  formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels)
  x
}

#' Wrapper function of survival::clogit
#'
#' When model fitting failed, a more useful message would show.
#'
#' @param formula Model formula.
#' @param data data frame.
#' @param ... further parameters to be added to survival::clogit.
#'
#' @return When model fitting is successful, an object of class "clogit".\cr
#' When model fitting failed, an error message is shown.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' adrs_local <- tern_ex_adrs %>%
#'   dplyr::filter(ARMCD %in% c("ARM A", "ARM B")) %>%
#'   dplyr::mutate(
#'     RSP = dplyr::case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     ARMBIN = droplevels(ARMCD)
#'   )
#' dta <- adrs_local
#' dta <- dta[sample(nrow(dta)), ]
#' mod <- clogit_with_tryCatch(formula = RSP ~ ARMBIN * AGE + strata(STRATA1), data = dta)
#' }
#'
#' @export
clogit_with_tryCatch <- function(formula, data, ...) { # nolint
  tryCatch(
    survival::clogit(formula = formula, data = data, ...),
    error = function(e) stop("model not built successfully with survival::clogit")
  )
}

#' Helper functions for incidence rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param control (`list`)\cr parameters for estimation details, specified by using
#'   the helper function [control_incidence_rate()]. Possible parameter options are:
#'   * `conf_level`: (`proportion`)\cr confidence level for the estimated incidence rate.
#'   * `conf_type`: (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'     for confidence interval type.
#'   * `input_time_unit`: (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'     indicating time unit for data input.
#'   * `num_pt_year`: (`numeric`)\cr time unit for desired output (in person-years).
#' @param person_years (`numeric(1)`)\cr total person-years at risk.
#' @param alpha (`numeric(1)`)\cr two-sided alpha-level for confidence interval.
#' @param n_events (`integer(1)`)\cr number of events observed.
#'
#' @return Estimated incidence rate, `rate`, and associated confidence interval, `rate_ci`.
#'
#' @seealso [incidence_rate]
#'
#' @name h_incidence_rate
NULL

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval.
#'
#' @keywords internal
h_incidence_rate <- function(person_years,
                             n_events,
                             control = control_incidence_rate()) {
  alpha <- 1 - control$conf_level
  est <- switch(control$conf_type,
    normal = h_incidence_rate_normal(person_years, n_events, alpha),
    normal_log = h_incidence_rate_normal_log(person_years, n_events, alpha),
    exact = h_incidence_rate_exact(person_years, n_events, alpha),
    byar = h_incidence_rate_byar(person_years, n_events, alpha)
  )

  num_pt_year <- control$num_pt_year
  list(
    rate = est$rate * num_pt_year,
    rate_ci = est$rate_ci * num_pt_year
  )
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval based on the normal approximation for the
#'   incidence rate. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_normal(200, 2)
#'
#' @export
h_incidence_rate_normal <- function(person_years,
                                    n_events,
                                    alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  se <- sqrt(est / person_years)
  ci <- est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * se

  list(rate = est, rate_ci = ci)
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval based on the normal approximation for the
#'   logarithm of the incidence rate. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_normal_log(200, 2)
#'
#' @export
h_incidence_rate_normal_log <- function(person_years,
                                        n_events,
                                        alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  rate_est <- n_events / person_years
  rate_se <- sqrt(rate_est / person_years)
  lrate_est <- log(rate_est)
  lrate_se <- rate_se / rate_est
  ci <- exp(lrate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * lrate_se)

  list(rate = rate_est, rate_ci = ci)
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated exact confidence interval. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_exact(200, 2)
#'
#' @export
h_incidence_rate_exact <- function(person_years,
                                   n_events,
                                   alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  lcl <- stats::qchisq(p = (alpha) / 2, df = 2 * n_events) / (2 * person_years)
  ucl <- stats::qchisq(p = 1 - (alpha) / 2, df = 2 * n_events + 2) / (2 * person_years)

  list(rate = est, rate_ci = c(lcl, ucl))
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated Byar's confidence interval. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_byar(200, 2)
#'
#' @export
h_incidence_rate_byar <- function(person_years,
                                  n_events,
                                  alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  seg_1 <- n_events + 0.5
  seg_2 <- 1 - 1 / (9 * (n_events + 0.5))
  seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3
  lcl <- seg_1 * ((seg_2 - seg_3)^3) / person_years
  ucl <- seg_1 * ((seg_2 + seg_3)^3) / person_years

  list(rate = est, rate_ci = c(lcl, ucl))
}

#' Create a STEP graph
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Based on the STEP results, creates a `ggplot` graph showing the estimated HR or OR
#' along the continuous biomarker value subgroups.
#'
#' @param df (`tibble`)\cr result of [tidy.step()].
#' @param use_percentile (`flag`)\cr whether to use percentiles for the x axis or actual
#'   biomarker values.
#' @param est (named `list`)\cr `col` and `lty` settings for estimate line.
#' @param ci_ribbon (named `list` or `NULL`)\cr `fill` and `alpha` settings for the confidence interval
#'   ribbon area, or `NULL` to not plot a CI ribbon.
#' @param col (`character`)\cr color(s).
#'
#' @return A `ggplot` STEP graph.
#'
#' @seealso Custom tidy method [tidy.step()].
#'
#' @examples
#' library(survival)
#' lung$sex <- factor(lung$sex)
#'
#' # Survival example.
#' vars <- list(
#'   time = "time",
#'   event = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#'
#' step_matrix <- fit_survival_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(control_coxph(), control_step(num_points = 10, degree = 2))
#' )
#' step_data <- broom::tidy(step_matrix)
#'
#' # Default plot.
#' g_step(step_data)
#'
#' # Add the reference 1 horizontal line.
#' library(ggplot2)
#' g_step(step_data) +
#'   ggplot2::geom_hline(ggplot2::aes(yintercept = 1), linetype = 2)
#'
#' # Use actual values instead of percentiles, different color for estimate and no CI,
#' # use log scale for y axis.
#' g_step(
#'   step_data,
#'   use_percentile = FALSE,
#'   est = list(col = "blue", lty = 1),
#'   ci_ribbon = NULL
#' ) + scale_y_log10()
#'
#' # Adding another curve based on additional column.
#' step_data$extra <- exp(step_data$`Percentile Center`)
#' g_step(step_data) +
#'   ggplot2::geom_line(ggplot2::aes(y = extra), linetype = 2, color = "green")
#'
#' # Response example.
#' vars <- list(
#'   response = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#'
#' step_matrix <- fit_rsp_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(
#'     control_logistic(response_definition = "I(response == 2)"),
#'     control_step()
#'   )
#' )
#' step_data <- broom::tidy(step_matrix)
#' g_step(step_data)
#'
#' @export
g_step <- function(df,
                   use_percentile = "Percentile Center" %in% names(df),
                   est = list(col = "blue", lty = 1),
                   ci_ribbon = list(fill = getOption("ggplot2.discrete.colour")[1], alpha = 0.5),
                   col = getOption("ggplot2.discrete.colour")) {
  checkmate::assert_tibble(df)
  checkmate::assert_flag(use_percentile)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_list(est, names = "named")
  checkmate::assert_list(ci_ribbon, names = "named", null.ok = TRUE)

  x_var <- ifelse(use_percentile, "Percentile Center", "Interval Center")
  df$x <- df[[x_var]]
  attrs <- attributes(df)
  df$y <- df[[attrs$estimate]]

  # Set legend names. To be modified also at call level
  legend_names <- c("Estimate", "CI 95%")

  p <- ggplot2::ggplot(df, ggplot2::aes(x = .data[["x"]], y = .data[["y"]]))

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_color_manual(values = col)
  }

  if (!is.null(ci_ribbon)) {
    if (is.null(ci_ribbon$fill)) {
      ci_ribbon$fill <- "lightblue"
    }
    p <- p + ggplot2::geom_ribbon(
      ggplot2::aes(
        ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]],
        fill = legend_names[2]
      ),
      alpha = ci_ribbon$alpha
    ) +
      scale_fill_manual(
        name = "", values = c("CI 95%" = ci_ribbon$fill)
      )
  }
  suppressMessages(p <- p +
    ggplot2::geom_line(
      ggplot2::aes(y = .data[["y"]], color = legend_names[1]),
      linetype = est$lty
    ) +
    scale_colour_manual(
      name = "", values = c("Estimate" = "blue")
    ))

  p <- p + ggplot2::labs(x = attrs$biomarker, y = attrs$estimate)
  if (use_percentile) {
    p <- p + ggplot2::scale_x_continuous(labels = scales::percent)
  }
  p
}

#' Custom tidy method for STEP results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tidy the STEP results into a `tibble` format ready for plotting.
#'
#' @param x (`matrix`)\cr results from [fit_survival_step()].
#' @param ... not used.
#'
#' @return A `tibble` with one row per STEP subgroup. The estimates and CIs are on the HR or OR scale,
#'   respectively. Additional attributes carry metadata also used for plotting.
#'
#' @seealso [g_step()] which consumes the result from this function.
#'
#' @method tidy step
#'
#' @examples
#' library(survival)
#' lung$sex <- factor(lung$sex)
#' vars <- list(
#'   time = "time",
#'   event = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#' step_matrix <- fit_survival_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(control_coxph(), control_step(num_points = 10, degree = 2))
#' )
#' broom::tidy(step_matrix)
#'
#' @export
tidy.step <- function(x, ...) { # nolint
  checkmate::assert_class(x, "step")
  dat <- as.data.frame(x)
  nams <- names(dat)
  is_surv <- "loghr" %in% names(dat)
  est_var <- ifelse(is_surv, "loghr", "logor")
  new_est_var <- ifelse(is_surv, "Hazard Ratio", "Odds Ratio")
  new_y_vars <- c(new_est_var, c("ci_lower", "ci_upper"))
  names(dat)[match(est_var, nams)] <- new_est_var
  dat[, new_y_vars] <- exp(dat[, new_y_vars])
  any_is_na <- any(is.na(dat[, new_y_vars]))
  any_is_very_large <- any(abs(dat[, new_y_vars]) > 1e10, na.rm = TRUE)
  if (any_is_na) {
    warning(paste(
      "Missing values in the point estimate or CI columns,",
      "this will lead to holes in the `g_step()` plot"
    ))
  }
  if (any_is_very_large) {
    warning(paste(
      "Very large absolute values in the point estimate or CI columns,",
      "consider adding `scale_y_log10()` to the `g_step()` result for plotting"
    ))
  }
  if (any_is_na || any_is_very_large) {
    warning("Consider using larger `bandwidth`, less `num_points` in `control_step()` settings for fitting")
  }
  structure(
    tibble::as_tibble(dat),
    estimate = new_est_var,
    biomarker = attr(x, "variables")$biomarker,
    ci = f_conf_level(attr(x, "control")$conf_level)
  )
}

#' Bland-Altman analysis
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Statistics function that uses the Bland-Altman method to assess the agreement between two numerical vectors
#' and calculates a variety of statistics.
#'
#' @inheritParams argument_convention
#' @param y (`numeric`)\cr vector of numbers we want to analyze, to be compared with `x`.
#'
#' @return
#' A named list of the following elements:
#'   * `df`
#'   * `difference_mean`
#'   * `ci_mean`
#'   * `difference_sd`
#'   * `difference_se`
#'   * `upper_agreement_limit`
#'   * `lower_agreement_limit`
#'   * `agreement_limit_se`
#'   * `upper_agreement_limit_ci`
#'   * `lower_agreement_limit_ci`
#'   * `t_value`
#'   * `n`
#'
#' @examples
#' x <- seq(1, 60, 5)
#' y <- seq(5, 50, 4)
#'
#' s_bland_altman(x, y, conf_level = 0.9)
#'
#' @export
s_bland_altman <- function(x, y, conf_level = 0.95) {
  checkmate::assert_numeric(x, min.len = 1, any.missing = TRUE)
  checkmate::assert_numeric(y, len = length(x), any.missing = TRUE)
  checkmate::assert_numeric(conf_level, lower = 0, upper = 1, any.missing = TRUE)

  alpha <- 1 - conf_level

  ind <- complete.cases(x, y) # use only pairwise complete observations, and check if x and y have the same length
  x <- x[ind]
  y <- y[ind]
  n <- sum(ind) # number of 'observations'

  if (n == 0) {
    stop("there is no valid paired data")
  }

  difference <- x - y # vector of differences
  average <- (x + y) / 2 # vector of means
  difference_mean <- mean(difference) # mean difference
  difference_sd <- sd(difference) # SD of differences
  al <- qnorm(1 - alpha / 2) * difference_sd
  upper_agreement_limit <- difference_mean + al # agreement limits
  lower_agreement_limit <- difference_mean - al

  difference_se <- difference_sd / sqrt(n) # standard error of the mean
  al_se <- difference_sd * sqrt(3) / sqrt(n) # standard error of the agreement limit
  tvalue <- qt(1 - alpha / 2, n - 1) # t value for 95% CI calculation
  difference_mean_ci <- difference_se * tvalue
  al_ci <- al_se * tvalue
  upper_agreement_limit_ci <- c(upper_agreement_limit - al_ci, upper_agreement_limit + al_ci)
  lower_agreement_limit_ci <- c(lower_agreement_limit - al_ci, lower_agreement_limit + al_ci)

  list(
    df = data.frame(average, difference),
    difference_mean = difference_mean,
    ci_mean = difference_mean + c(-1, 1) * difference_mean_ci,
    difference_sd = difference_sd,
    difference_se = difference_se,
    upper_agreement_limit = upper_agreement_limit,
    lower_agreement_limit = lower_agreement_limit,
    agreement_limit_se = al_se,
    upper_agreement_limit_ci = upper_agreement_limit_ci,
    lower_agreement_limit_ci = lower_agreement_limit_ci,
    t_value = tvalue,
    n = n
  )
}

#' Bland-Altman plot
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Graphing function that produces a Bland-Altman plot.
#'
#' @inheritParams s_bland_altman
#'
#' @return A `ggplot` Bland-Altman plot.
#'
#' @examples
#' x <- seq(1, 60, 5)
#' y <- seq(5, 50, 4)
#'
#' g_bland_altman(x = x, y = y, conf_level = 0.9)
#'
#' @export
#' @aliases bland_altman
g_bland_altman <- function(x, y, conf_level = 0.95) {
  result_tem <- s_bland_altman(x, y, conf_level = conf_level)
  xpos <- max(result_tem$df$average) * 0.9 + min(result_tem$df$average) * 0.1
  yrange <- diff(range(result_tem$df$difference))

  p <- ggplot(result_tem$df) +
    geom_point(aes(x = average, y = difference), color = "blue") +
    geom_hline(yintercept = result_tem$difference_mean, color = "blue", linetype = 1) +
    geom_hline(yintercept = 0, color = "blue", linetype = 2) +
    geom_hline(yintercept = result_tem$lower_agreement_limit, color = "red", linetype = 2) +
    geom_hline(yintercept = result_tem$upper_agreement_limit, color = "red", linetype = 2) +
    annotate(
      "text",
      x = xpos,
      y = result_tem$lower_agreement_limit + 0.03 * yrange,
      label = "lower limits of agreement",
      color = "red"
    ) +
    annotate(
      "text",
      x = xpos,
      y = result_tem$upper_agreement_limit + 0.03 * yrange,
      label = "upper limits of agreement",
      color = "red"
    ) +
    annotate(
      "text",
      x = xpos,
      y = result_tem$difference_mean + 0.03 * yrange,
      label = "mean of difference between two measures",
      color = "blue"
    ) +
    annotate(
      "text",
      x = xpos,
      y = result_tem$lower_agreement_limit - 0.03 * yrange,
      label = sprintf("%.2f", result_tem$lower_agreement_limit),
      color = "red"
    ) +
    annotate(
      "text",
      x = xpos,
      y = result_tem$upper_agreement_limit - 0.03 * yrange,
      label = sprintf("%.2f", result_tem$upper_agreement_limit),
      color = "red"
    ) +
    annotate(
      "text",
      x = xpos,
      y = result_tem$difference_mean - 0.03 * yrange,
      label = sprintf("%.2f", result_tem$difference_meanm),
      color = "blue"
    ) +
    xlab("Average of two measures") +
    ylab("Difference between two measures")

  return(p)
}

#' Cumulative counts of numeric variable by thresholds
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_cumulative()] creates a layout element to calculate cumulative counts of values in a
#' numeric variable that are less than, less or equal to, greater than, or greater or equal to user-specified
#' threshold values.
#'
#' This function analyzes numeric variable `vars` against the threshold values supplied to the `thresholds`
#' argument as a numeric vector. Whether counts should include the threshold values, and whether to count
#' values lower or higher than the threshold values can be set via the `include_eq` and `lower_tail`
#' parameters, respectively.
#'
#' @inheritParams h_count_cumulative
#' @inheritParams argument_convention
#' @param thresholds (`numeric`)\cr vector of cutoff values for the counts.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("count_cumulative"), type = "sh")``
#'
#' @seealso Relevant helper function [h_count_cumulative()], and descriptive function [d_count_cumulative()].
#'
#' @name count_cumulative
#' @order 1
NULL

#' Helper function for `s_count_cumulative()`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to calculate count and fraction of `x` values in the lower or upper tail given a threshold.
#'
#' @inheritParams argument_convention
#' @param threshold (`numeric(1)`)\cr a cutoff value as threshold to count values of `x`.
#' @param lower_tail (`flag`)\cr whether to count lower tail, default is `TRUE`.
#' @param include_eq (`flag`)\cr whether to include value equal to the `threshold` in
#'   count, default is `TRUE`.
#'
#' @return A named vector with items:
#'   * `count`: the count of values less than, less or equal to, greater than, or greater or equal to a threshold
#'     of user specification.
#'   * `fraction`: the fraction of the count.
#'
#' @seealso [count_cumulative]
#'
#' @examples
#' set.seed(1, kind = "Mersenne-Twister")
#' x <- c(sample(1:10, 10), NA)
#' .N_col <- length(x)
#'
#' h_count_cumulative(x, 5, denom = .N_col)
#' h_count_cumulative(x, 5, lower_tail = FALSE, include_eq = FALSE, na_rm = FALSE, denom = .N_col)
#' h_count_cumulative(x, 0, lower_tail = FALSE, denom = .N_col)
#' h_count_cumulative(x, 100, lower_tail = FALSE, denom = .N_col)
#'
#' @export
h_count_cumulative <- function(x,
                               threshold,
                               lower_tail = TRUE,
                               include_eq = TRUE,
                               na_rm = TRUE,
                               denom) {
  checkmate::assert_numeric(x)
  checkmate::assert_numeric(threshold)
  checkmate::assert_numeric(denom)
  checkmate::assert_flag(lower_tail)
  checkmate::assert_flag(include_eq)
  checkmate::assert_flag(na_rm)

  is_keep <- if (na_rm) !is.na(x) else rep(TRUE, length(x))
  count <- if (lower_tail && include_eq) {
    length(x[is_keep & x <= threshold])
  } else if (lower_tail && !include_eq) {
    length(x[is_keep & x < threshold])
  } else if (!lower_tail && include_eq) {
    length(x[is_keep & x >= threshold])
  } else if (!lower_tail && !include_eq) {
    length(x[is_keep & x > threshold])
  }

  result <- c(
    count = count,
    fraction = if (count == 0 && denom == 0) 0 else count / denom
  )
  result
}

#' Description of cumulative count
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function that describes the analysis in [s_count_cumulative()].
#'
#' @inheritParams h_count_cumulative
#'
#' @return Labels for [s_count_cumulative()].
#'
#' @export
d_count_cumulative <- function(threshold, lower_tail = TRUE, include_eq = TRUE) {
  checkmate::assert_numeric(threshold)
  lg <- if (lower_tail) "<" else ">"
  eq <- if (include_eq) "=" else ""
  paste0(lg, eq, " ", threshold)
}

#' @describeIn count_cumulative Statistics function that produces a named list given a numeric vector of thresholds.
#'
#' @return
#' * `s_count_cumulative()` returns a named list of `count_fraction`s: a list with each `thresholds` value as a
#'   component, each component containing a vector for the count and fraction.
#'
#' @keywords internal
s_count_cumulative <- function(x,
                               thresholds,
                               lower_tail = TRUE,
                               include_eq = TRUE,
                               denom = c("N_col", "n", "N_row"),
                               .N_col, # nolint
                               .N_row, # nolint
                               na_rm = TRUE,
                               ...) {
  checkmate::assert_numeric(thresholds, min.len = 1, any.missing = FALSE)

  denom <- match.arg(denom) %>%
    switch(
      n = length(x),
      N_row = .N_row,
      N_col = .N_col
    )

  count_fraction_list <- Map(function(thres) {
    result <- h_count_cumulative(x, thres, lower_tail, include_eq, na_rm = na_rm, denom = denom)
    label <- d_count_cumulative(thres, lower_tail, include_eq)
    formatters::with_label(result, label)
  }, thresholds)

  names(count_fraction_list) <- thresholds
  list(count_fraction = count_fraction_list)
}

#' @describeIn count_cumulative Formatted analysis function which is used as `afun`
#'   in `count_cumulative()`.
#'
#' @return
#' * `a_count_cumulative()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_cumulative <- function(x,
                               ...,
                               .stats = NULL,
                               .stat_names = NULL,
                               .formats = NULL,
                               .labels = NULL,
                               .indent_mods = NULL) {
  dots_extra_args <- list(...)

  # Check if there are user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
  extra_afun_params <- retrieve_extra_afun_params(
    names(dots_extra_args$.additional_fun_parameters)
  )
  dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore

  # Main statistical functions application
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_cumulative,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      x = list(x),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in with stats defaults if needed
  .stats <- get_stats("count_cumulative",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )

  x_stats <- x_stats[.stats]
  levels_per_stats <- lapply(x_stats, names)

  # Fill in formats/indents/labels with custom input and defaults
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)
  .labels <- get_labels_from_stats(
    .stats, .labels, levels_per_stats,
    label_attr_from_stats = sapply(.unlist_keep_nulls(x_stats), attr, "label")
  )

  # Unlist stats
  x_stats <- x_stats %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(
    .formats,
    x_stats,
    extra_afun_params$.df_row,
    extra_afun_params$.var
  )

  # Get and check statistical names from defaults
  .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn count_cumulative Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_cumulative()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_cumulative()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_cumulative(
#'     vars = "AGE",
#'     thresholds = c(40, 60)
#'   ) %>%
#'   build_table(tern_ex_adsl)
#'
#' @export
#' @order 2
count_cumulative <- function(lyt,
                             vars,
                             thresholds,
                             lower_tail = TRUE,
                             include_eq = TRUE,
                             var_labels = vars,
                             show_labels = "visible",
                             na_str = default_na_str(),
                             nested = TRUE,
                             table_names = vars,
                             ...,
                             na_rm = TRUE,
                             .stats = c("count_fraction"),
                             .stat_names = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  # Depending on main functions
  extra_args <- list(
    "na_rm" = na_rm,
    "thresholds" = thresholds,
    "lower_tail" = lower_tail,
    "include_eq" = include_eq,
    ...
  )

  # Needed defaults
  if (!is.null(.stats)) extra_args[[".stats"]] <- .stats
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_count_cumulative) <- c(
    formals(a_count_cumulative),
    extra_args[[".additional_fun_parameters"]]
  )

  # Main {rtables} structural call
  analyze(
    lyt,
    vars,
    afun = a_count_cumulative,
    na_str = na_str,
    inclNAs = !na_rm,
    table_names = table_names,
    var_labels = var_labels,
    show_labels = show_labels,
    nested = nested,
    extra_args = extra_args
  )
}

#' Count patients with abnormal analysis range values by baseline status
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_abnormal_by_baseline()] creates a layout element to count patients with abnormal
#' analysis range values, categorized by baseline status.
#'
#' This function analyzes primary analysis variable `var` which indicates abnormal range results. Additional
#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to
#' `USUBJID`), a variable to indicate unique subject identifiers, and `baseline` (defaults to `BNRIND`), a
#' variable to indicate baseline reference ranges.
#'
#' For each direction specified via the `abnormal` parameter (e.g. High or Low), we condition on baseline
#' range result and count patients in the numerator and denominator as follows for each of the following
#' categories:
#'   * `Not <abnormality>`
#'     * `num`:  The number of patients without abnormality at baseline (excluding those with missing baseline)
#'       and with at least one abnormality post-baseline.
#'     * `denom`: The number of patients without abnormality at baseline (excluding those with missing baseline).
#'   * `<Abnormality>`
#'     * `num`: The number of patients with abnormality as baseline and at least one abnormality post-baseline.
#'     * `denom`: The number of patients with abnormality at baseline.
#'   * `Total`
#'     * `num`: The number of patients with at least one post-baseline record and at least one abnormality
#'       post-baseline.
#'     * `denom`: The number of patients with at least one post-baseline record.
#'
#' This function assumes that `df` has been filtered to only include post-baseline records.
#'
#' @inheritParams argument_convention
#' @param abnormal (`character`)\cr values identifying the abnormal range level(s) in `.var`.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("abnormal_by_baseline"), type = "sh")``
#'
#' @note
#' * `df` should be filtered to include only post-baseline records.
#' * If the baseline variable or analysis variable contains `NA` records, it is expected that `df` has been
#'   pre-processed using [df_explicit_na()] or [explicit_na()].
#'
#' @seealso Relevant description function [d_count_abnormal_by_baseline()].
#'
#' @name abnormal_by_baseline
#' @order 1
NULL

#' @describeIn abnormal_by_baseline Statistics function for a single `abnormal` level.
#'
#' @param na_str (`string`)\cr the explicit `na_level` argument you used in the pre-processing steps (maybe with
#'   [df_explicit_na()]). The default is `"<Missing>"`.
#'
#' @return
#' * `s_count_abnormal_by_baseline()` returns statistic `fraction` which is a named list with 3 labeled elements:
#'   `not_abnormal`, `abnormal`, and `total`. Each element contains a vector with `num` and `denom` patient counts.
#'
#' @keywords internal
s_count_abnormal_by_baseline <- function(df,
                                         .var,
                                         abnormal,
                                         na_str = "<Missing>",
                                         variables = list(id = "USUBJID", baseline = "BNRIND"),
                                         ...) {
  checkmate::assert_string(.var)
  checkmate::assert_string(abnormal)
  checkmate::assert_string(na_str)
  assert_df_with_variables(df, c(range = .var, variables))
  checkmate::assert_subset(names(variables), c("id", "baseline"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))

  # If input is passed as character, changed to factor
  df[[.var]] <- as_factor_keep_attributes(df[[.var]], na_level = na_str)
  df[[variables$baseline]] <- as_factor_keep_attributes(df[[variables$baseline]], na_level = na_str)

  assert_valid_factor(df[[.var]], any.missing = FALSE)
  assert_valid_factor(df[[variables$baseline]], any.missing = FALSE)

  # Keep only records with valid analysis value.
  df <- df[df[[.var]] != na_str, ]

  anl <- data.frame(
    id = df[[variables$id]],
    var = df[[.var]],
    baseline = df[[variables$baseline]],
    stringsAsFactors = FALSE
  )

  # Total:
  #  - Patients in denominator: have at least one valid measurement post-baseline.
  #  - Patients in numerator: have at least one abnormality.
  total_denom <- length(unique(anl$id))
  total_num <- length(unique(anl$id[anl$var == abnormal]))

  # Baseline NA records are counted only in total rows.
  anl <- anl[anl$baseline != na_str, ]

  # Abnormal:
  #   - Patients in denominator: have abnormality at baseline.
  #   - Patients in numerator: have abnormality at baseline AND
  #     have at least one abnormality post-baseline.
  abn_denom <- length(unique(anl$id[anl$baseline == abnormal]))
  abn_num <- length(unique(anl$id[anl$baseline == abnormal & anl$var == abnormal]))

  # Not abnormal:
  #   - Patients in denominator: do not have abnormality at baseline.
  #   - Patients in numerator: do not have abnormality at baseline AND
  #     have at least one abnormality post-baseline.
  not_abn_denom <- length(unique(anl$id[anl$baseline != abnormal]))
  not_abn_num <- length(unique(anl$id[anl$baseline != abnormal & anl$var == abnormal]))

  labels <- d_count_abnormal_by_baseline(abnormal)
  list(fraction = list(
    not_abnormal = formatters::with_label(c(num = not_abn_num, denom = not_abn_denom), labels$not_abnormal),
    abnormal = formatters::with_label(c(num = abn_num, denom = abn_denom), labels$abnormal),
    total = formatters::with_label(c(num = total_num, denom = total_denom), labels$total)
  ))
}

#' @describeIn abnormal_by_baseline Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_baseline()`.
#'
#' @return
#' * `a_count_abnormal_by_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal_by_baseline <- function(df,
                                         ...,
                                         .stats = NULL,
                                         .stat_names = NULL,
                                         .formats = NULL,
                                         .labels = NULL,
                                         .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_abnormal_by_baseline,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("abnormal_by_baseline", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  levels_per_stats <- lapply(x_stats, names)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(
    .stats, .labels, levels_per_stats, d_count_abnormal_by_baseline(dots_extra_args$abnormal)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- x_stats[.stats] %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn abnormal_by_baseline Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_baseline()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_baseline()` to the table layout.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(c(1:6)),
#'   ANRIND = factor(c(rep("LOW", 4), "NORMAL", "HIGH")),
#'   BNRIND = factor(c("LOW", "NORMAL", "HIGH", NA, "LOW", "NORMAL"))
#' )
#' df <- df_explicit_na(df)
#'
#' # Layout creating function.
#' basic_table() %>%
#'   count_abnormal_by_baseline(var = "ANRIND", abnormal = c(High = "HIGH")) %>%
#'   build_table(df)
#'
#' # Passing of statistics function and formatting arguments.
#' df2 <- data.frame(
#'   ID = as.character(c(1, 2, 3, 4)),
#'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BLRANGE = factor(c("LOW", "HIGH", "HIGH", "NORMAL"))
#' )
#'
#' basic_table() %>%
#'   count_abnormal_by_baseline(
#'     var = "RANGE",
#'     abnormal = c(Low = "LOW"),
#'     variables = list(id = "ID", baseline = "BLRANGE"),
#'     .formats = c(fraction = "xx / xx"),
#'     .indent_mods = c(fraction = 2L)
#'   ) %>%
#'   build_table(df2)
#'
#' @export
#' @order 2
count_abnormal_by_baseline <- function(lyt,
                                       var,
                                       abnormal,
                                       variables = list(id = "USUBJID", baseline = "BNRIND"),
                                       na_str = "<Missing>",
                                       nested = TRUE,
                                       ...,
                                       table_names = abnormal,
                                       .stats = "fraction",
                                       .stat_names = NULL,
                                       .formats = list(fraction = format_fraction),
                                       .labels = NULL,
                                       .indent_mods = NULL) {
  checkmate::assert_character(abnormal, len = length(table_names), names = "named")
  checkmate::assert_string(var)

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(extra_args, "variables" = list(variables), ...)

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_count_abnormal_by_baseline) <- c(
    formals(a_count_abnormal_by_baseline), extra_args[[".additional_fun_parameters"]]
  )

  # Add a new table section with label for each value in abnormal
  for (i in seq_along(abnormal)) {
    extra_args[["abnormal"]] <- abnormal[i]

    lyt <- analyze(
      lyt = lyt,
      vars = var,
      afun = a_count_abnormal_by_baseline,
      var_labels = names(abnormal)[i],
      na_str = na_str,
      nested = nested,
      extra_args = extra_args,
      show_labels = "visible",
      table_names = table_names[i]
    )
  }

  lyt
}

#' Description function for `s_count_abnormal_by_baseline()`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Description function that produces the labels for [s_count_abnormal_by_baseline()].
#'
#' @inheritParams abnormal_by_baseline
#'
#' @return Abnormal category labels for [s_count_abnormal_by_baseline()].
#'
#' @examples
#' d_count_abnormal_by_baseline("LOW")
#'
#' @export
d_count_abnormal_by_baseline <- function(abnormal) {
  not_abn_name <- paste("Not", tolower(abnormal))
  abn_name <- paste0(toupper(substr(abnormal, 1, 1)), tolower(substring(abnormal, 2)))
  total_name <- "Total"

  list(
    not_abnormal = not_abn_name,
    abnormal = abn_name,
    total = total_name
  )
}

#' Helper functions for subgroup treatment effect pattern (STEP) calculations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that are used internally for the STEP calculations.
#'
#' @inheritParams argument_convention
#'
#' @name h_step
#' @include control_step.R
NULL

#' @describeIn h_step Creates the windows for STEP, based on the control settings
#'   provided.
#'
#' @param x (`numeric`)\cr biomarker value(s) to use (without `NA`).
#' @param control (named `list`)\cr output from `control_step()`.
#'
#' @return
#' * `h_step_window()` returns a list containing the window-selection matrix `sel`
#'   and the interval information matrix `interval`.
#'
#' @export
h_step_window <- function(x,
                          control = control_step()) {
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  sel <- matrix(FALSE, length(x), control$num_points)
  out <- matrix(0, control$num_points, 3)
  colnames(out) <- paste("Interval", c("Center", "Lower", "Upper"))
  if (control$use_percentile) {
    # Create windows according to percentile cutoffs.
    out <- cbind(out, out)
    colnames(out)[1:3] <- paste("Percentile", c("Center", "Lower", "Upper"))
    xs <- seq(0, 1, length.out = control$num_points + 2)[-1]
    for (i in seq_len(control$num_points)) {
      out[i, 2:3] <- c(
        max(xs[i] - control$bandwidth, 0),
        min(xs[i] + control$bandwidth, 1)
      )
      out[i, 5:6] <- stats::quantile(x, out[i, 2:3])
      sel[, i] <- x >= out[i, 5] & x <= out[i, 6]
    }
    # Center is the middle point of the percentile window.
    out[, 1] <- xs[-control$num_points - 1]
    out[, 4] <- stats::quantile(x, out[, 1])
  } else {
    # Create windows according to cutoffs.
    m <- c(min(x), max(x))
    xs <- seq(m[1], m[2], length.out = control$num_points + 2)[-1]
    for (i in seq_len(control$num_points)) {
      out[i, 2:3] <- c(
        max(xs[i] - control$bandwidth, m[1]),
        min(xs[i] + control$bandwidth, m[2])
      )
      sel[, i] <- x >= out[i, 2] & x <= out[i, 3]
    }
    # Center is the same as the point for predicting.
    out[, 1] <- xs[-control$num_points - 1]
  }
  list(sel = sel, interval = out)
}

#' @describeIn h_step Calculates the estimated treatment effect estimate
#'   on the linear predictor scale and corresponding standard error from a STEP `model` fitted
#'   on `data` given `variables` specification, for a single biomarker value `x`.
#'   This works for both `coxph` and `glm` models, i.e. for calculating log hazard ratio or log odds
#'   ratio estimates.
#'
#' @param model (`coxph` or `glm`)\cr the regression model object.
#'
#' @return
#' * `h_step_trt_effect()` returns a vector with elements `est` and `se`.
#'
#' @export
h_step_trt_effect <- function(data,
                              model,
                              variables,
                              x) {
  checkmate::assert_multi_class(model, c("coxph", "glm"))
  checkmate::assert_number(x)
  assert_df_with_variables(data, variables)
  checkmate::assert_factor(data[[variables$arm]], n.levels = 2)

  newdata <- data[c(1, 1), ]
  newdata[, variables$biomarker] <- x
  newdata[, variables$arm] <- levels(data[[variables$arm]])
  model_terms <- stats::delete.response(stats::terms(model))
  model_frame <- stats::model.frame(model_terms, data = newdata, xlev = model$xlevels)
  mat <- stats::model.matrix(model_terms, data = model_frame, contrasts.arg = model$contrasts)
  coefs <- stats::coef(model)
  # Note: It is important to use the coef subset from matrix, otherwise intercept and
  # strata are included for coxph() models.
  mat <- mat[, names(coefs)]
  mat_diff <- diff(mat)
  est <- mat_diff %*% coefs
  var <- mat_diff %*% stats::vcov(model) %*% t(mat_diff)
  se <- sqrt(var)
  c(
    est = est,
    se = se
  )
}

#' @describeIn h_step Builds the model formula used in survival STEP calculations.
#'
#' @return
#' * `h_step_survival_formula()` returns a model formula.
#'
#' @export
h_step_survival_formula <- function(variables,
                                    control = control_step()) {
  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_list_of_variables(variables[c("arm", "biomarker", "event", "time")])
  form <- paste0("Surv(", variables$time, ", ", variables$event, ") ~ ", variables$arm)
  if (control$degree > 0) {
    form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
  }
  if (!is.null(variables$covariates)) {
    form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
  }
  if (!is.null(variables$strata)) {
    form <- paste0(form, " + strata(", paste0(variables$strata, collapse = ", "), ")")
  }
  stats::as.formula(form)
}

#' @describeIn h_step Estimates the model with `formula` built based on
#'   `variables` in `data` for a given `subset` and `control` parameters for the
#'   Cox regression.
#'
#' @param formula (`formula`)\cr the regression model formula.
#' @param subset (`logical`)\cr subset vector.
#'
#' @return
#' * `h_step_survival_est()` returns a matrix of number of observations `n`,
#'   `events`, log hazard ratio estimates `loghr`, standard error `se`,
#'   and Wald confidence interval bounds `ci_lower` and `ci_upper`. One row is
#'   included for each biomarker value in `x`.
#'
#' @export
h_step_survival_est <- function(formula,
                                data,
                                variables,
                                x,
                                subset = rep(TRUE, nrow(data)),
                                control = control_coxph()) {
  checkmate::assert_formula(formula)
  assert_df_with_variables(data, variables)
  checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  # Note: `subset` in `coxph` needs to be an expression referring to `data` variables.
  data$.subset <- subset
  coxph_warnings <- NULL
  tryCatch(
    withCallingHandlers(
      expr = {
        fit <- survival::coxph(
          formula = formula,
          data = data,
          subset = .subset,
          ties = control$ties
        )
      },
      warning = function(w) {
        coxph_warnings <<- c(coxph_warnings, w)
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )
  if (!is.null(coxph_warnings)) {
    warning(paste(
      "Fit warnings occurred, please consider using a simpler model, or",
      "larger `bandwidth`, less `num_points` in `control_step()` settings"
    ))
  }
  # Produce a matrix with one row per `x` and columns `est` and `se`.
  estimates <- t(vapply(
    X = x,
    FUN = h_step_trt_effect,
    FUN.VALUE = c(1, 2),
    data = data,
    model = fit,
    variables = variables
  ))
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  cbind(
    n = fit$n,
    events = fit$nevent,
    loghr = estimates[, "est"],
    se = estimates[, "se"],
    ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
    ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
  )
}

#' @describeIn h_step Builds the model formula used in response STEP calculations.
#'
#' @return
#' * `h_step_rsp_formula()` returns a model formula.
#'
#' @export
h_step_rsp_formula <- function(variables,
                               control = c(control_step(), control_logistic())) {
  checkmate::assert_character(variables$covariates, null.ok = TRUE)
  assert_list_of_variables(variables[c("arm", "biomarker", "response")])
  response_definition <- sub(
    pattern = "response",
    replacement = variables$response,
    x = control$response_definition,
    fixed = TRUE
  )
  form <- paste0(response_definition, " ~ ", variables$arm)
  if (control$degree > 0) {
    form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
  }
  if (!is.null(variables$covariates)) {
    form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
  }
  if (!is.null(variables$strata)) {
    strata_arg <- if (length(variables$strata) > 1) {
      paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
    } else {
      variables$strata
    }
    form <- paste0(form, "+ strata(", strata_arg, ")")
  }
  stats::as.formula(form)
}

#' @describeIn h_step Estimates the model with `formula` built based on
#'   `variables` in `data` for a given `subset` and `control` parameters for the
#'   logistic regression.
#'
#' @param formula (`formula`)\cr the regression model formula.
#' @param subset (`logical`)\cr subset vector.
#'
#' @return
#' * `h_step_rsp_est()` returns a matrix of number of observations `n`, log odds
#'   ratio estimates `logor`, standard error `se`, and Wald confidence interval bounds
#'   `ci_lower` and `ci_upper`. One row is included for each biomarker value in `x`.
#'
#' @export
h_step_rsp_est <- function(formula,
                           data,
                           variables,
                           x,
                           subset = rep(TRUE, nrow(data)),
                           control = control_logistic()) {
  checkmate::assert_formula(formula)
  assert_df_with_variables(data, variables)
  checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")
  # Note: `subset` in `glm` needs to be an expression referring to `data` variables.
  data$.subset <- subset
  fit_warnings <- NULL
  tryCatch(
    withCallingHandlers(
      expr = {
        fit <- if (is.null(variables$strata)) {
          stats::glm(
            formula = formula,
            data = data,
            subset = .subset,
            family = stats::binomial("logit")
          )
        } else {
          # clogit needs coxph and strata imported
          survival::clogit(
            formula = formula,
            data = data,
            subset = .subset
          )
        }
      },
      warning = function(w) {
        fit_warnings <<- c(fit_warnings, w)
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )
  if (!is.null(fit_warnings)) {
    warning(paste(
      "Fit warnings occurred, please consider using a simpler model, or",
      "larger `bandwidth`, less `num_points` in `control_step()` settings"
    ))
  }
  # Produce a matrix with one row per `x` and columns `est` and `se`.
  estimates <- t(vapply(
    X = x,
    FUN = h_step_trt_effect,
    FUN.VALUE = c(1, 2),
    data = data,
    model = fit,
    variables = variables
  ))
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  cbind(
    n = length(fit$y),
    logor = estimates[, "est"],
    se = estimates[, "se"],
    ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
    ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
  )
}

#' Cox proportional hazards regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("summarize_coxreg"), type = "sh")``
#'
#' @details Cox models are the most commonly used methods to estimate the magnitude of
#'   the effect in survival analysis. It assumes proportional hazards: the ratio
#'   of the hazards between groups (e.g., two arms) is constant over time.
#'   This ratio is referred to as the "hazard ratio" (HR) and is one of the
#'   most commonly reported metrics to describe the effect size in survival
#'   analysis (NEST Team, 2020).
#'
#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant
#'   helper functions, and [tidy_coxreg] for custom tidy methods.
#'
#' @examples
#' library(survival)
#'
#' # Testing dataset [survival::bladder].
#' set.seed(1, kind = "Mersenne-Twister")
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   tibble::tibble(
#'     TIME = stop,
#'     STATUS = event,
#'     ARM = as.factor(rx),
#'     COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"),
#'     COVAR2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     ) %>% formatters::with_label("Sex (F/M)")
#'   )
#' )
#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#' dta_bladder$STUDYID <- factor("X")
#'
#' u1_variables <- list(
#'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
#' )
#'
#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
#'
#' m1_variables <- list(
#'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
#' )
#'
#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
#'
#' @name cox_regression
#' @order 1
NULL

#' @describeIn cox_regression Statistics function that transforms results tabulated
#'   from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list.
#'
#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg]
#'   function with tidying applied via [broom::tidy()].
#' @param .stats (`character`)\cr the names of statistics to be reported among:
#'   * `n`: number of observations (univariate only)
#'   * `hr`: hazard ratio
#'   * `ci`: confidence interval
#'   * `pval`: p-value of the treatment effect
#'   * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only)
#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model.
#'   Defaults to `"all"`. Other options include `"var_main"` for main effects, `"inter"` for interaction effects,
#'   and `"multi_lvl"` for multivariate model covariate level rows. When `.which_vars` is `"all"`, specific
#'   variables can be selected by specifying `.var_nms`.
#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically
#'   this is the name of a variable. If using variable labels, `var` should be a vector of both the desired
#'   variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars`
#'   is `"var_main"`, `.var_nms` should be only the variable name.
#'
#' @return
#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s).
#'
#' @examples
#' # s_coxreg
#'
#' # Univariate
#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder)
#' df1 <- broom::tidy(univar_model)
#'
#' s_coxreg(model_df = df1, .stats = "hr")
#'
#' # Univariate with interactions
#' univar_model_inter <- fit_coxreg_univar(
#'   variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder
#' )
#' df1_inter <- broom::tidy(univar_model_inter)
#'
#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1")
#'
#' # Univariate without treatment arm - only "COVAR2" covariate effects
#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder)
#' df1_covs <- broom::tidy(univar_covs_model)
#'
#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)"))
#'
#' # Multivariate.
#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder)
#' df2 <- broom::tidy(multivar_model)
#'
#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1")
#' s_coxreg(
#'   model_df = df2, .stats = "pval", .which_vars = "multi_lvl",
#'   .var_nms = c("COVAR1", "A Covariate Label")
#' )
#'
#' # Multivariate without treatment arm - only "COVAR1" main effect
#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder)
#' df2_covs <- broom::tidy(multivar_covs_model)
#'
#' s_coxreg(model_df = df2_covs, .stats = "hr")
#'
#' @export
s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) {
  assert_df_with_variables(model_df, list(term = "term", stat = .stats))
  checkmate::assert_multi_class(model_df$term, classes = c("factor", "character"))
  model_df$term <- as.character(model_df$term)
  .var_nms <- .var_nms[!is.na(.var_nms)]

  if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ]
  if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1)

  # We need a list with names corresponding to the stats to display of equal length to the list of stats.
  y <- split(model_df, f = model_df$term, drop = FALSE)
  y <- stats::setNames(y, nm = rep(.stats, length(y)))

  if (.which_vars == "var_main") {
    y <- lapply(y, function(x) x[1, ]) # only main effect
  } else if (.which_vars %in% c("inter", "multi_lvl")) {
    y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect
  }

  lapply(
    X = y,
    FUN = function(x) {
      z <- as.list(x[[.stats]])
      stats::setNames(z, nm = x$term_label)
    }
  )
}

#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()]
#'   and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`.
#'
#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`.
#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`.
#' @param na_str (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`.
#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to
#'   avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching).
#' @param varlabels (`list`)\cr a named list corresponds to the names of variables found in data, passed
#'   as a named list and corresponding to time, event, arm, strata, and covariates terms. If arm is missing
#'   from variables, then only Cox model(s) including the covariates will be fitted and the corresponding
#'   effect estimates will be tabulated later.
#'
#' @return
#' * `a_coxreg()` returns formatted [rtables::CellValue()].
#'
#' @examples
#' a_coxreg(
#'   df = dta_bladder,
#'   labelstr = "Label 1",
#'   variables = u1_variables,
#'   .spl_context = list(value = "COVAR1"),
#'   .stats = "n",
#'   .formats = "xx"
#' )
#'
#' a_coxreg(
#'   df = dta_bladder,
#'   labelstr = "",
#'   variables = u1_variables,
#'   .spl_context = list(value = "COVAR2"),
#'   .stats = "pval",
#'   .formats = "xx.xxxx"
#' )
#'
#' @export
a_coxreg <- function(df,
                     labelstr,
                     eff = FALSE,
                     var_main = FALSE,
                     multivar = FALSE,
                     variables,
                     at = list(),
                     control = control_coxreg(),
                     .spl_context,
                     .stats,
                     .formats,
                     .indent_mods = NULL,
                     na_str = "",
                     cache_env = NULL) {
  cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm
  cov <- tail(.spl_context$value, 1) # current variable/covariate
  var_lbl <- formatters::var_labels(df)[cov] # check for df labels
  if (length(labelstr) > 1) {
    labelstr <- if (cov %in% names(labelstr)) labelstr[[cov]] else var_lbl # use df labels if none
  } else if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) {
    labelstr <- var_lbl
  }
  if (eff || multivar || cov_no_arm) {
    control$interaction <- FALSE
  } else {
    variables$covariates <- cov
    if (var_main) control$interaction <- TRUE
  }

  if (is.null(cache_env[[cov]])) {
    if (!multivar) {
      model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy()
    } else {
      model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy()
    }
    cache_env[[cov]] <- model
  } else {
    model <- cache_env[[cov]]
  }
  if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_

  if (cov_no_arm || (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) {
    multivar <- TRUE
    if (!cov_no_arm) var_main <- TRUE
  }

  vars_coxreg <- list(which_vars = "all", var_nms = NULL)
  if (eff) {
    if (multivar && !var_main) { # multivar treatment level
      var_lbl_arm <- formatters::var_labels(df)[[variables$arm]]
      vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl")
    } else { # treatment effect
      vars_coxreg["var_nms"] <- variables$arm
      if (var_main) vars_coxreg["which_vars"] <- "var_main"
    }
  } else {
    if (!multivar || (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level
      vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main")
    } else if (multivar) { # multivar covariate level
      vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl")
      if (var_main) model[cov, .stats] <- NA_real_
    }
    if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect
  }
  var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]]
  var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) {
    paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels
  } else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) ||
    (multivar && var_main && is.numeric(df[[cov]]))) { # nolint
    labelstr # other main effect labels
  } else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) {
    "All" # multivar numeric covariate
  } else {
    names(var_vals)
  }
  in_rows(
    .list = var_vals, .names = var_names, .labels = var_names, .indent_mods = .indent_mods,
    .formats = stats::setNames(rep(.formats, length(var_names)), var_names),
    .format_na_strs = stats::setNames(rep(na_str, length(var_names)), var_names)
  )
}

#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table
#'   layout. This function is a wrapper for several `rtables` layouting functions. This function
#'   is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()].
#'
#' @inheritParams fit_coxreg_univar
#' @param multivar (`flag`)\cr whether multivariate Cox regression should run (defaults to `FALSE`), otherwise
#'   univariate Cox regression will run.
#' @param common_var (`string`)\cr the name of a factor variable in the dataset which takes the same value
#'   for all rows. This should be created during pre-processing if no such variable currently exists.
#' @param .section_div (`string` or `NA`)\cr string which should be repeated as a section divider between sections.
#'   Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between
#'   treatment and covariate sections and the second between different covariates.
#'
#' @return
#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table
#'   containing the chosen statistics to the table layout.
#'
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`,
#'   `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate
#'   Cox regression models, respectively.
#'
#' @examples
#' # summarize_coxreg
#'
#' result_univar <- basic_table() %>%
#'   summarize_coxreg(variables = u1_variables) %>%
#'   build_table(dta_bladder)
#' result_univar
#'
#' result_univar_covs <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = u2_variables,
#'   ) %>%
#'   build_table(dta_bladder)
#' result_univar_covs
#'
#' result_multivar <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = m1_variables,
#'     multivar = TRUE,
#'   ) %>%
#'   build_table(dta_bladder)
#' result_multivar
#'
#' result_multivar_covs <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = m2_variables,
#'     multivar = TRUE,
#'     varlabels = c("Covariate 1", "Covariate 2") # custom labels
#'   ) %>%
#'   build_table(dta_bladder)
#' result_multivar_covs
#'
#' @export
#' @order 2
summarize_coxreg <- function(lyt,
                             variables,
                             control = control_coxreg(),
                             at = list(),
                             multivar = FALSE,
                             common_var = "STUDYID",
                             .stats = c("n", "hr", "ci", "pval", "pval_inter"),
                             .formats = c(
                               n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)",
                               pval = "x.xxxx | (<0.0001)", pval_inter = "x.xxxx | (<0.0001)"
                             ),
                             varlabels = NULL,
                             .indent_mods = NULL,
                             na_str = "",
                             .section_div = NA_character_) {
  if (multivar && control$interaction) {
    warning(paste(
      "Interactions are not available for multivariate cox regression using summarize_coxreg.",
      "The model will be calculated without interaction effects."
    ))
  }
  if (control$interaction && !"arm" %in% names(variables)) {
    stop("To include interactions please specify 'arm' in variables.")
  }

  .stats <- if (!"arm" %in% names(variables) || multivar) { # only valid statistics
    intersect(c("hr", "ci", "pval"), .stats)
  } else if (control$interaction) {
    intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats)
  } else {
    intersect(c("n", "hr", "ci", "pval"), .stats)
  }
  stat_labels <- c(
    n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"),
    pval = "p-value", pval_inter = "Interaction p-value"
  )
  stat_labels <- stat_labels[names(stat_labels) %in% .stats]
  .formats <- .formats[names(.formats) %in% .stats]
  env <- new.env() # create caching environment

  lyt <- lyt %>%
    split_cols_by_multivar(
      vars = rep(common_var, length(.stats)),
      varlabels = stat_labels,
      extra_args = list(
        .stats = .stats, .formats = .formats, .indent_mods = .indent_mods, na_str = rep(na_str, length(.stats)),
        cache_env = replicate(length(.stats), list(env))
      )
    )

  if ("arm" %in% names(variables)) { # treatment effect
    lyt <- lyt %>%
      split_rows_by(
        common_var,
        split_label = "Treatment:",
        label_pos = "visible",
        child_labels = "hidden",
        section_div = head(.section_div, 1)
      )
    if (!multivar) {
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          na_str = na_str,
          extra_args = list(
            variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar,
            labelstr = ""
          )
        )
    } else { # treatment level effects
      lyt <- lyt %>%
        summarize_row_groups(
          cfun = a_coxreg,
          na_str = na_str,
          extra_args = list(
            variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar
          )
        ) %>%
        analyze_colvars(
          afun = a_coxreg,
          na_str = na_str,
          extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar, labelstr = "")
        )
    }
  }

  if ("covariates" %in% names(variables)) { # covariate main effects
    lyt <- lyt %>%
      split_rows_by_multivar(
        vars = variables$covariates,
        varlabels = varlabels,
        split_label = "Covariate:",
        nested = FALSE,
        child_labels = if (multivar || control$interaction || !"arm" %in% names(variables)) "default" else "hidden",
        section_div = tail(.section_div, 1)
      )
    if (multivar || control$interaction || !"arm" %in% names(variables)) {
      lyt <- lyt %>%
        summarize_row_groups(
          cfun = a_coxreg,
          na_str = na_str,
          extra_args = list(
            variables = variables, at = at, control = control, multivar = multivar,
            var_main = if (multivar) multivar else control$interaction
          )
        )
    } else {
      if (!is.null(varlabels)) names(varlabels) <- variables$covariates
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          na_str = na_str,
          extra_args = list(
            variables = variables, at = at, control = control, multivar = multivar,
            var_main = if (multivar) multivar else control$interaction,
            labelstr = if (is.null(varlabels)) "" else varlabels
          )
        )
    }

    if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm
    if (multivar || control$interaction) { # covariate level effects
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          na_str = na_str,
          extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = ""),
          indent_mod = if (!"arm" %in% names(variables) || multivar) 0L else -1L
        )
    }
  }

  lyt
}

#' Occurrence table sorting
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to score occurrence table subtables and rows which can be used in the
#' sorting of occurrence tables.
#'
#' @name score_occurrences
NULL

#' @describeIn score_occurrences Scoring function which sums the counts across all
#'   columns. It will fail if anything else but counts are used.
#'
#' @inheritParams rtables_access
#'
#' @return
#' * `score_occurrences()` returns the sum of counts across all columns of a table row.
#'
#' @seealso [h_row_first_values()]
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   analyze_num_patients(
#'     vars = "USUBJID",
#'     .stats = c("unique"),
#'     .labels = c("Total number of patients with at least one event")
#'   ) %>%
#'   split_rows_by("AEBODSYS", child_labels = "visible", nested = FALSE) %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = c("unique", "nonunique"),
#'     .labels = c(
#'       "Total number of patients with at least one event",
#'       "Total number of events"
#'     )
#'   ) %>%
#'   count_occurrences(vars = "AEDECOD")
#'
#' tbl <- build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl) %>%
#'   prune_table()
#'
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_occurrences)
#'
#' tbl_sorted
#'
#' @export
score_occurrences <- function(table_row) {
  row_counts <- h_row_counts(table_row)
  sum(row_counts)
}

#' @describeIn score_occurrences Scoring functions can be produced by this constructor to only include
#'   specific columns in the scoring. See [h_row_counts()] for further information.
#'
#' @inheritParams has_count_in_cols
#'
#' @return
#' * `score_occurrences_cols()` returns a function that sums counts across all specified columns
#'   of a table row.
#'
#' @seealso [h_row_counts()]
#'
#' @examples
#' score_cols_a_and_b <- score_occurrences_cols(col_names = c("A: Drug X", "B: Placebo"))
#'
#' # Note that this here just sorts the AEDECOD inside the AEBODSYS. The AEBODSYS are not sorted.
#' # That would require a second pass of `sort_at_path`.
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_cols_a_and_b)
#'
#' tbl_sorted
#'
#' @export
score_occurrences_cols <- function(...) {
  function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    sum(row_counts)
  }
}

#' @describeIn score_occurrences Scoring functions produced by this constructor can be used on
#'   subtables: They sum up all specified column counts in the subtable. This is useful when
#'   there is no available content row summing up these counts.
#'
#' @return
#' * `score_occurrences_subtable()` returns a function that sums counts in each subtable
#'   across all specified columns.
#'
#' @examples
#' score_subtable_all <- score_occurrences_subtable(col_names = names(tbl))
#'
#' # Note that this code just sorts the AEBODSYS, not the AEDECOD within AEBODSYS. That
#' # would require a second pass of `sort_at_path`.
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS"), scorefun = score_subtable_all, decreasing = FALSE)
#'
#' tbl_sorted
#'
#' @export
score_occurrences_subtable <- function(...) {
  score_table_row <- score_occurrences_cols(...)
  function(table_tree) {
    table_rows <- collect_leaves(table_tree)
    counts <- vapply(table_rows, score_table_row, numeric(1))
    sum(counts)
  }
}

#' @describeIn score_occurrences Produces a score function for sorting table by summing the first content row in
#'   specified columns. Note that this is extending [rtables::cont_n_onecol()] and [rtables::cont_n_allcols()].
#'
#' @return
#' * `score_occurrences_cont_cols()` returns a function that sums counts in the first content row in
#'   specified columns.
#'
#' @export
score_occurrences_cont_cols <- function(...) {
  score_table_row <- score_occurrences_cols(...)
  function(table_tree) {
    if (inherits(table_tree, "ContentRow")) {
      return(NA)
    }
    content_row <- h_content_first_row(table_tree)
    score_table_row(content_row)
  }
}

#' Tabulate survival duration by subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The [tabulate_survival_subgroups()] function creates a layout element to tabulate survival duration by subgroup,
#' returning statistics including median survival time and hazard ratio for each population subgroup. The table is
#' created from `df`, a list of data frames returned by [extract_survival_subgroups()], with the statistics to include
#' specified via the `vars` parameter.
#'
#' A forest plot can be created from the resulting table using the [g_forest()] function.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_coxph_pairwise
#' @param df (`list`)\cr list of data frames containing all analysis variables. List should be
#'   created using [extract_survival_subgroups()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot_events`: Total number of events per group.
#'   * `n_events`: Number of events per group.
#'   * `n_tot`: Total number of observations per group.
#'   * `n`: Number of observations per group.
#'   * `median`: Median survival time.
#'   * `hr`: Hazard ratio.
#'   * `ci`: Confidence interval of hazard ratio.
#'   * `pval`: p-value of the effect.
#'   Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci`
#'   are required.
#' @param time_unit (`string`)\cr label with unit of median survival time. Default `NULL` skips displaying unit.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. Tables typically used as part of forest plot.
#'
#' @seealso [extract_survival_subgroups()]
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c(
#'   "ARM" = adtte_labels[["ARM"]],
#'   "SEX" = adtte_labels[["SEX"]],
#'   "AVALU" = adtte_labels[["AVALU"]],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   label_all = "Total Patients",
#'   data = adtte_f
#' )
#' df
#'
#' df_grouped <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @name survival_duration_subgroups
#' @order 1
NULL

#' Prepare survival data for population subgroups in data frames
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates of median survival times and treatment hazard ratios for population subgroups in
#' data frames. Simple wrapper for [h_survtime_subgroups_df()] and [h_coxph_subgroups_df()]. Result is a `list`
#' of two `data.frame`s: `survtime` and `hr`. `variables` corresponds to the names of variables found in `data`,
#' passed as a named `list` and requires elements `tte`, `is_event`, `arm` and optionally `subgroups` and `strata`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_duration_subgroups
#' @inheritParams survival_coxph_pairwise
#'
#' @return A named `list` of two elements:
#'   * `survtime`: A `data.frame` containing columns `arm`, `n`, `n_events`, `median`, `subgroup`, `var`,
#'     `var_label`, and `row_type`.
#'   * `hr`: A `data.frame` containing columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, `conf_level`,
#'     `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @seealso [survival_duration_subgroups]
#'
#' @export
extract_survival_subgroups <- function(variables,
                                       data,
                                       groups_lists = list(),
                                       control = control_coxph(),
                                       label_all = "All Patients") {
  if ("strat" %in% names(variables)) {
    warning(
      "Warning: the `strat` element name of the `variables` list argument to `extract_survival_subgroups() ",
      "was deprecated in tern 0.9.4.\n  ",
      "Please use the name `strata` instead of `strat` in the `variables` argument."
    )
    variables[["strata"]] <- variables[["strat"]]
  }

  df_survtime <- h_survtime_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    label_all = label_all
  )
  df_hr <- h_coxph_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    control = control,
    label_all = label_all
  )

  list(survtime = df_survtime, hr = df_hr)
}

#' @describeIn survival_duration_subgroups  Formatted analysis function which is used as
#'   `afun` in `tabulate_survival_subgroups()`.
#'
#' @return
#' * `a_survival_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_survival_subgroups <- function(df,
                                 labelstr = "",
                                 ...,
                                 .stats = NULL,
                                 .stat_names = NULL,
                                 .formats = NULL,
                                 .labels = NULL,
                                 .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL
  cur_col_stat <- extra_afun_params$.var %||% .stats

  # Uniquely name & label rows
  var_lvls <- if ("biomarker" %in% names(dots_extra_args) && "biomarker" %in% names(df)) {
    if ("overall" %in% names(dots_extra_args)) { # label rows for (nested) biomarker tables - e.g. "AGE", "BMRKR1"
      as.character(df$biomarker)
    } else { # data rows for (nested) biomarker tables - e.g. "AGE.LOW", "BMRKR1.Total Patients"
      paste(as.character(df$biomarker), as.character(df$subgroup), sep = ".")
    }
  } else { # data rows for non-biomarker tables - e.g. "Total Patients", "F", "M"
    make.unique(as.character(df$subgroup))
  }

  # if empty, return NA
  if (nrow(df) == 0) {
    return(in_rows(.list = list(NA) %>% stats::setNames(cur_col_stat)))
  }

  # Main statistics taken from df
  x_stats <- as.list(df)

  # Fill in formatting defaults
  .stats <- get_stats("tabulate_survival_subgroups", stats_in = cur_col_stat)
  levels_per_stats <- rep(list(var_lvls), length(.stats)) %>% setNames(.stats)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(
    .stats, .labels, levels_per_stats,
    # default labels are pre-determined in extract_*() function
    tern_defaults = as.list(as.character(df$subgroup)) %>% setNames(var_lvls)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- lapply(
    .stats,
    function(x) x_stats[[x]] %>% stats::setNames(var_lvls)
  ) %>%
    stats::setNames(.stats) %>%
    .unlist_keep_nulls()

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn survival_duration_subgroups Table-creating function which creates a table
#'   summarizing survival by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
#'   and [rtables::summarize_row_groups()].
#'
#' @param label_all `r lifecycle::badge("deprecated")`\cr please assign the `label_all` parameter within the
#'   [extract_survival_subgroups()] function when creating `df`.
#' @param riskdiff (`list`)\cr if a risk (proportion) difference column should be added, a list of settings to apply
#'   within the column. See [control_riskdiff()] for details. If `NULL`, no risk difference column will be added. If
#'   `riskdiff$arm_x` and `riskdiff$arm_y` are `NULL`, the first level of `df$survtime$arm` will be used as `arm_x`
#'   and the second level as `arm_y`.
#'
#' @return An `rtables` table summarizing survival by subgroup.
#'
#' @examples
#' ## Table with default columns.
#' basic_table() %>%
#'   tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
#'
#' ## Table with a manually chosen set of columns: adding "pval".
#' basic_table() %>%
#'   tabulate_survival_subgroups(
#'     df = df,
#'     vars = c("n_tot_events", "n_events", "median", "hr", "ci", "pval"),
#'     time_unit = adtte_f$AVALU[1]
#'   )
#'
#' @export
#' @order 2
tabulate_survival_subgroups <- function(lyt,
                                        df,
                                        vars = c("n_tot_events", "n_events", "median", "hr", "ci"),
                                        groups_lists = list(),
                                        label_all = lifecycle::deprecated(),
                                        time_unit = NULL,
                                        riskdiff = NULL,
                                        na_str = default_na_str(),
                                        ...,
                                        .stat_names = NULL,
                                        .formats = NULL,
                                        .labels = NULL,
                                        .indent_mods = NULL) {
  checkmate::assert_list(riskdiff, null.ok = TRUE)
  checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars))
  checkmate::assert_true(all(c("hr", "ci") %in% vars))
  if ("pval" %in% vars && !"pval" %in% names(df$hr)) {
    warning(
      'The "pval" statistic has been selected but is not present in "df" so it will not be included in the output ',
      'table. To include the "pval" statistic, please specify a p-value test when generating "df" via ',
      'the "method" argument to `extract_survival_subgroups()`. If method = "cmh", strata must also be specified via ',
      'the "variables" argument to `extract_survival_subgroups()`.'
    )
  }

  if (lifecycle::is_present(label_all)) {
    lifecycle::deprecate_warn(
      "0.9.5", "tabulate_survival_subgroups(label_all)",
      details =
        "Please assign the `label_all` parameter within the `extract_survival_subgroups()` function when creating `df`."
    )
  }

  # Process standard extra arguments
  extra_args <- list(".stats" = vars)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Create "ci" column from "lcl" and "ucl"
  df$hr$ci <- combine_vectors(df$hr$lcl, df$hr$ucl)

  # Extract additional parameters from df
  conf_level <- df$hr$conf_level[1]
  method <- if ("pval_label" %in% names(df$hr)) df$hr$pval_label[1] else NULL
  colvars <- d_survival_subgroups_colvars(vars, conf_level = conf_level, method = method, time_unit = time_unit)
  survtime_vars <- intersect(colvars$vars, c("n", "n_events", "median"))
  hr_vars <- intersect(names(colvars$labels), c("n_tot", "n_tot_events", "hr", "ci", "pval"))
  colvars_survtime <- list(vars = survtime_vars, labels = colvars$labels[survtime_vars])
  colvars_hr <- list(vars = hr_vars, labels = colvars$labels[hr_vars])

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    groups_lists = list(groups_lists), conf_level = conf_level, method = method,
    ...
  )

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_survival_subgroups) <- c(formals(a_survival_subgroups), extra_args[[".additional_fun_parameters"]])

  # Add risk difference column
  if (!is.null(riskdiff)) {
    if (is.null(riskdiff$arm_x)) riskdiff$arm_x <- levels(df$survtime$arm)[1]
    if (is.null(riskdiff$arm_y)) riskdiff$arm_y <- levels(df$survtime$arm)[2]
    colvars_hr$vars <- c(colvars_hr$vars, "riskdiff")
    colvars_hr$labels <- c(colvars_hr$labels, riskdiff = riskdiff$col_label)
    arm_cols <- paste(rep(c("n_events", "n_events", "n", "n")), c(riskdiff$arm_x, riskdiff$arm_y), sep = "_")
    extra_args[[".formats"]] <- c(extra_args[[".formats"]], list(riskdiff = riskdiff$format))

    df_prop_diff <- df$survtime %>%
      dplyr::select(-"median") %>%
      tidyr::pivot_wider(
        id_cols = c("subgroup", "var", "var_label", "row_type"),
        names_from = "arm",
        values_from = c("n", "n_events")
      ) %>%
      dplyr::rowwise() %>%
      dplyr::mutate(
        riskdiff = stat_propdiff_ci(
          x = as.list(.data[[arm_cols[1]]]),
          y = as.list(.data[[arm_cols[2]]]),
          N_x = .data[[arm_cols[3]]],
          N_y = .data[[arm_cols[4]]],
          pct = riskdiff$pct
        )
      ) %>%
      dplyr::select(-dplyr::all_of(arm_cols))

    df$hr <- df$hr %>%
      dplyr::left_join(
        df_prop_diff,
        by = c("subgroup", "var", "var_label", "row_type")
      )
  }

  # Add columns from table_survtime (optional)
  if (length(colvars_survtime$vars) > 0) {
    lyt_survtime <- split_cols_by(lyt = lyt, var = "arm")
    lyt_survtime <- split_cols_by_multivar(
      lyt = lyt_survtime,
      vars = colvars_survtime$vars,
      varlabels = colvars_survtime$labels
    )

    # Add "All Patients" row
    lyt_survtime <- split_rows_by(
      lyt = lyt_survtime,
      var = "row_type",
      split_fun = keep_split_levels("content"),
      nested = FALSE,
      child_labels = "hidden",
      parent_name = "All Patients"
    )
    lyt_survtime <- analyze_colvars(
      lyt = lyt_survtime,
      afun = a_survival_subgroups,
      na_str = na_str,
      extra_args = extra_args
    )

    # Add analysis rows
    if ("analysis" %in% df$survtime$row_type) {
      lyt_survtime <- split_rows_by(
        lyt = lyt_survtime,
        var = "row_type",
        split_fun = keep_split_levels("analysis"),
        nested = FALSE,
        child_labels = "hidden",
        parent_name = "analysis rows"
      )
      lyt_survtime <- split_rows_by(lyt = lyt_survtime, var = "var_label", nested = TRUE)
      lyt_survtime <- analyze_colvars(
        lyt = lyt_survtime,
        afun = a_survival_subgroups,
        na_str = na_str,
        inclNAs = TRUE,
        extra_args = extra_args
      )
    }

    table_survtime <- build_table(lyt_survtime, df = df$survtime)
  } else {
    table_survtime <- NULL
  }

  # Add columns from table_hr ("n_tot_events" or "n_tot", "hr" and "ci" required)
  lyt_hr <- split_cols_by(lyt = lyt, var = "arm")
  lyt_hr <- split_cols_by_multivar(
    lyt = lyt_hr,
    vars = colvars_hr$vars,
    varlabels = colvars_hr$labels
  )

  # Add "All Patients" row
  lyt_hr <- split_rows_by(
    lyt = lyt_hr,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE,
    child_labels = "hidden",
    parent_name = "All patient row"
  )
  lyt_hr <- analyze_colvars(
    lyt = lyt_hr,
    afun = a_survival_subgroups,
    na_str = na_str,
    extra_args = extra_args
  ) %>%
    append_topleft("Baseline Risk Factors")

  # Add analysis rows
  if ("analysis" %in% df$survtime$row_type) {
    lyt_hr <- split_rows_by(
      lyt = lyt_hr,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden",
      parent_name = "analysis rows"
    )
    lyt_hr <- split_rows_by(lyt = lyt_hr, var = "var_label", nested = TRUE)
    lyt_hr <- analyze_colvars(
      lyt = lyt_hr,
      afun = a_survival_subgroups,
      na_str = na_str,
      inclNAs = TRUE,
      extra_args = extra_args
    )
  }

  table_hr <- build_table(lyt_hr, df = df$hr)

  # Join tables, add forest plot attributes
  n_tot_ids <- grep("^n_tot", colvars_hr$vars)
  if (is.null(table_survtime)) {
    result <- table_hr
    hr_id <- match("hr", colvars_hr$vars)
    ci_id <- match("ci", colvars_hr$vars)
  } else {
    result <- cbind_rtables(table_hr[, n_tot_ids], table_survtime, table_hr[, -n_tot_ids])
    hr_id <- length(n_tot_ids) + ncol(table_survtime) + match("hr", colvars_hr$vars[-n_tot_ids])
    ci_id <- length(n_tot_ids) + ncol(table_survtime) + match("ci", colvars_hr$vars[-n_tot_ids])
    n_tot_ids <- seq_along(n_tot_ids)
  }
  structure(
    result,
    forest_header = paste0(rev(levels(df$survtime$arm)), "\nBetter"),
    col_x = hr_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_ids[1] # for scaling the symbol sizes in forest plots
  )
}

#' Labels for column variables in survival duration by subgroup table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Internal function to check variables included in [tabulate_survival_subgroups()] and create column labels.
#'
#' @inheritParams tabulate_survival_subgroups
#' @inheritParams argument_convention
#' @param method (`string`)\cr p-value method for testing hazard ratio = 1.
#'
#' @return A `list` of variables and their labels to tabulate.
#'
#' @note At least one of `n_tot` and `n_tot_events` must be provided in `vars`.
#'
#' @export
d_survival_subgroups_colvars <- function(vars,
                                         conf_level,
                                         method,
                                         time_unit = NULL) {
  checkmate::assert_character(vars)
  checkmate::assert_string(time_unit, null.ok = TRUE)
  checkmate::assert_subset(c("hr", "ci"), vars)
  checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars))
  checkmate::assert_subset(
    vars,
    c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
  )

  propcase_time_label <- if (!is.null(time_unit)) {
    paste0("Median (", time_unit, ")")
  } else {
    "Median"
  }

  varlabels <- c(
    n = "n",
    n_events = "Events",
    median = propcase_time_label,
    n_tot = "Total n",
    n_tot_events = "Total Events",
    hr = "Hazard Ratio",
    ci = paste0(100 * conf_level, "% Wald CI"),
    pval = method
  )

  colvars <- vars

  list(
    vars = colvars,
    labels = varlabels[vars]
  )
}

#' Cox regression helper function for interactions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Test and estimate the effect of a treatment in interaction with a covariate.
#' The effect is estimated as the HR of the tested treatment for a given level
#' of the covariate, in comparison to the treatment control.
#'
#' @inheritParams argument_convention
#' @param x (`numeric` or `factor`)\cr the values of the covariate to be tested.
#' @param effect (`string`)\cr the name of the effect to be tested and estimated.
#' @param covar (`string`)\cr the name of the covariate in the model.
#' @param mod (`coxph`)\cr the Cox regression model.
#' @param label (`string`)\cr the label to be returned as `term_label`.
#' @param control (`list`)\cr a list of controls as returned by [control_coxreg()].
#' @param ... see methods.
#'
#' @examples
#' library(survival)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' # Testing dataset [survival::bladder].
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4,
#'       labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' plot(
#'   survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name cox_regression_inter
NULL

#' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect.
#'
#' @return
#' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following
#'   variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`.
#'
#' @export
h_coxreg_inter_effect <- function(x,
                                  effect,
                                  covar,
                                  mod,
                                  label,
                                  control,
                                  ...) {
  UseMethod("h_coxreg_inter_effect", x)
}

#' @describeIn cox_regression_inter Method for `numeric` class. Estimates the interaction with a `numeric` covariate.
#'
#' @method h_coxreg_inter_effect numeric
#'
#' @param at (`list`)\cr a list with items named after the covariate, every
#'   item is a vector of levels at which the interaction should be estimated.
#'
#' @export
h_coxreg_inter_effect.numeric <- function(x,
                                          effect,
                                          covar,
                                          mod,
                                          label,
                                          control,
                                          at,
                                          ...) {
  betas <- stats::coef(mod)
  attrs <- attr(stats::terms(mod), "term.labels")
  term_indices <- grep(
    pattern = effect,
    x = attrs[!grepl("strata\\(", attrs)]
  )
  checkmate::assert_vector(term_indices, len = 2)
  betas <- betas[term_indices]
  betas_var <- diag(stats::vcov(mod))[term_indices]
  betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]]
  xval <- if (is.null(at[[covar]])) {
    stats::median(x)
  } else {
    at[[covar]]
  }
  effect_index <- !grepl(covar, names(betas))
  coef_hat <- betas[effect_index] + xval * betas[!effect_index]
  coef_se <- sqrt(
    betas_var[effect_index] +
      xval ^ 2 * betas_var[!effect_index] + # styler: off
      2 * xval * betas_cov
  )
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  data.frame(
    effect = "Covariate:",
    term = rep(covar, length(xval)),
    term_label = paste0("  ", xval),
    level = as.character(xval),
    n = NA,
    hr = exp(coef_hat),
    lcl = exp(coef_hat - q_norm * coef_se),
    ucl = exp(coef_hat + q_norm * coef_se),
    pval = NA,
    pval_inter = NA,
    stringsAsFactors = FALSE
  )
}

#' @describeIn cox_regression_inter Method for `factor` class. Estimate the interaction with a `factor` covariate.
#'
#' @method h_coxreg_inter_effect factor
#'
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
#'
#' @export
h_coxreg_inter_effect.factor <- function(x,
                                         effect,
                                         covar,
                                         mod,
                                         label,
                                         control,
                                         data,
                                         ...) {
  lvl_given <- levels(x)
  y <- h_coxreg_inter_estimations(
    variable = effect, given = covar,
    lvl_var = levels(data[[effect]]),
    lvl_given = lvl_given,
    mod = mod,
    conf_level = 0.95
  )[[1]]

  data.frame(
    effect = "Covariate:",
    term = rep(covar, nrow(y)),
    term_label = paste0("  ", lvl_given),
    level = lvl_given,
    n = NA,
    hr = y[, "hr"],
    lcl = y[, "lcl"],
    ucl = y[, "ucl"],
    pval = NA,
    pval_inter = NA,
    stringsAsFactors = FALSE
  )
}

#' @describeIn cox_regression_inter Method for `character` class. Estimate the interaction with a `character` covariate.
#'   This makes an automatic conversion to `factor` and then forwards to the method for factors.
#'
#' @method h_coxreg_inter_effect character
#'
#' @note
#' * Automatic conversion of character to factor does not guarantee results can be generated correctly. It is
#'   therefore better to always pre-process the dataset such that factors are manually created from character
#'   variables before passing the dataset to [rtables::build_table()].
#'
#' @export
h_coxreg_inter_effect.character <- function(x,
                                            effect,
                                            covar,
                                            mod,
                                            label,
                                            control,
                                            data,
                                            ...) {
  y <- as.factor(x)

  h_coxreg_inter_effect(
    x = y,
    effect = effect,
    covar = covar,
    mod = mod,
    label = label,
    control = control,
    data = data,
    ...
  )
}

#' @describeIn cox_regression_inter A higher level function to get
#'   the results of the interaction test and the estimated values.
#'
#' @return
#' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If
#'   no interaction, [h_coxreg_univar_extract()] is applied instead.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
#' h_coxreg_extract_interaction(
#'   mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder,
#'   control = control_coxreg()
#' )
#'
#' @export
h_coxreg_extract_interaction <- function(effect,
                                         covar,
                                         mod,
                                         data,
                                         at,
                                         control) {
  if (!any(attr(stats::terms(mod), "order") == 2)) {
    y <- h_coxreg_univar_extract(
      effect = effect, covar = covar, mod = mod, data = data, control = control
    )
    y$pval_inter <- NA
    y
  } else {
    test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]

    # Test the main treatment effect.
    mod_aov <- muffled_car_anova(mod, test_statistic)
    sum_anova <- broom::tidy(mod_aov)
    pval <- sum_anova[sum_anova$term == effect, ][["p.value"]]

    # Test the interaction effect.
    pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]]
    covar_test <- data.frame(
      effect = "Covariate:",
      term = covar,
      term_label = unname(labels_or_names(data[covar])),
      level = "",
      n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval,
      pval_inter = pval_inter,
      stringsAsFactors = FALSE
    )
    # Estimate the interaction.
    y <- h_coxreg_inter_effect(
      data[[covar]],
      covar = covar,
      effect = effect,
      mod = mod,
      label = unname(labels_or_names(data[covar])),
      at = at,
      control = control,
      data = data
    )
    rbind(covar_test, y)
  }
}

#' @describeIn cox_regression_inter Hazard ratio estimation in interactions.
#'
#' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation
#'   of the levels of `variable` given the levels of `given`.
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels as given by [levels()].
#' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]).
#'
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
#'   and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex.
#'   The cox regression estimates the coefficients along with a variance-covariance matrix for:
#'
#'   - b1 (arm b), b2 (arm c)
#'   - b3 (sex m)
#'   - b4 (arm b: sex m), b5 (arm c: sex m)
#'
#'   The estimation of the Hazard Ratio for arm C/sex M is given in reference
#'   to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5).
#'   The interaction coefficient is deduced by b2 + b5 while the standard error
#'   is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$.
#'
#' @return
#' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding
#'   to the combinations of `variable` and `given`, with columns:
#'   * `coef_hat`: Estimation of the coefficient.
#'   * `coef_se`: Standard error of the estimation.
#'   * `hr`: Hazard ratio.
#'   * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
#' result <- h_coxreg_inter_estimations(
#'   variable = "armcd", given = "covar1",
#'   lvl_var = levels(dta_bladder$armcd),
#'   lvl_given = levels(dta_bladder$covar1),
#'   mod = mod, conf_level = .95
#' )
#' result
#'
#' @export
h_coxreg_inter_estimations <- function(variable,
                                       given,
                                       lvl_var,
                                       lvl_given,
                                       mod,
                                       conf_level = 0.95) {
  var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
  giv_lvl <- paste0(given, lvl_given)
  design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
  design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
  design_mat <- within(
    data = design_mat,
    expr = {
      inter <- paste0(variable, ":", given)
      rev_inter <- paste0(given, ":", variable)
    }
  )
  split_by_variable <- design_mat$variable
  interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")

  mmat <- stats::model.matrix(mod)[1, ]
  mmat[!mmat == 0] <- 0

  design_mat <- apply(
    X = design_mat, MARGIN = 1, FUN = function(x) {
      mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
      mmat
    }
  )
  colnames(design_mat) <- interaction_names

  coef <- stats::coef(mod)
  vcov <- stats::vcov(mod)
  betas <- as.matrix(coef)
  coef_hat <- t(design_mat) %*% betas
  dimnames(coef_hat)[2] <- "coef"
  coef_se <- apply(
    design_mat, 2,
    function(x) {
      vcov_el <- as.logical(x)
      y <- vcov[vcov_el, vcov_el]
      y <- sum(y)
      y <- sqrt(y)
      y
    }
  )
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  y <- cbind(coef_hat, `se(coef)` = coef_se)
  y <- apply(y, 1, function(x) {
    x["hr"] <- exp(x["coef"])
    x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
    x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
    x
  })
  y <- t(y)
  y <- by(y, split_by_variable, identity)
  y <- lapply(y, as.matrix)
  attr(y, "details") <- paste0(
    "Estimations of ", variable,
    " hazard ratio given the level of ", given, " compared to ",
    variable, " level ", lvl_var[1], "."
  )
  y
}

#' Tabulate biomarker effects on survival by subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The [tabulate_survival_biomarkers()] function creates a layout element to tabulate the estimated effects of multiple
#' continuous biomarker variables on survival across subgroups, returning statistics including median survival time and
#' hazard ratio for each population subgroup. The table is created from `df`, a list of data frames returned by
#' [extract_survival_biomarkers()], with the statistics to include specified via the `vars` parameter.
#'
#' A forest plot can be created from the resulting table using the [g_forest()] function.
#'
#' @inheritParams fit_coxreg_multivar
#' @inheritParams survival_duration_subgroups
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#'   [extract_survival_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot_events`: Total number of events per group.
#'   * `n_tot`: Total number of observations per group.
#'   * `median`: Median survival time.
#'   * `hr`: Hazard ratio.
#'   * `ci`: Confidence interval of hazard ratio.
#'   * `pval`: p-value of the effect.
#'   Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci` are required.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. The tables are then typically used as input for forest plots.
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in multiple regression models containing one covariate `RACE`,
#' # as well as one stratification variable `STRATA1`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#'
#' df <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   label_all = "Total Patients",
#'   data = adtte_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually.
#' df_grouped <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @name survival_biomarkers_subgroups
#' @order 1
NULL

#' Prepare survival data estimates for multiple biomarkers in a single data frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of events, patients and median survival times, as well as hazard ratio estimates,
#' confidence intervals and p-values, for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a named `list` and requires elements
#' `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables), and optionally `subgroups` and `strata`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#' @inheritParams survival_duration_subgroups
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_tot_events`,
#'   `median`, `hr`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#'   `var_label`, and `row_type`.
#'
#' @seealso [h_coxreg_mult_cont_df()] which is used internally, [tabulate_survival_biomarkers()].
#'
#' @export
extract_survival_biomarkers <- function(variables,
                                        data,
                                        groups_lists = list(),
                                        control = control_coxreg(),
                                        label_all = "All Patients") {
  if ("strat" %in% names(variables)) {
    warning(
      "Warning: the `strat` element name of the `variables` list argument to `extract_survival_biomarkers() ",
      "was deprecated in tern 0.9.4.\n  ",
      "Please use the name `strata` instead of `strat` in the `variables` argument."
    )
    variables[["strata"]] <- variables[["strat"]]
  }

  checkmate::assert_list(variables)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_string(label_all)

  # Start with all patients.
  result_all <- h_coxreg_mult_cont_df(
    variables = variables,
    data = data,
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"
  if (is.null(variables$subgroups)) {
    # Only return result for all patients.
    result_all
  } else {
    # Add subgroups results.
    l_data <- h_split_by_subgroups(
      data,
      variables$subgroups,
      groups_lists = groups_lists
    )
    l_result <- lapply(l_data, function(grp) {
      result <- h_coxreg_mult_cont_df(
        variables = variables,
        data = grp$df,
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn survival_biomarkers_subgroups Table-creating function which creates a table
#'   summarizing biomarker effects on survival by subgroup.
#'
#' @param label_all `r lifecycle::badge("deprecated")`\cr please assign the `label_all` parameter within the
#'   [extract_survival_biomarkers()] function when creating `df`.
#'
#' @return An `rtables` table summarizing biomarker effects on survival by subgroup.
#'
#' @note In contrast to [tabulate_survival_subgroups()] this tabulation function does
#'   not start from an input layout `lyt`. This is because internally the table is
#'   created by combining multiple subtables.
#'
#' @seealso [extract_survival_biomarkers()]
#'
#' @examples
#' ## Table with default columns.
#' tabulate_survival_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_survival_biomarkers(
#'   df = df,
#'   vars = c("n_tot_events", "ci", "n_tot", "median", "hr"),
#'   time_unit = as.character(adtte_f$AVALU[1])
#' )
#'
#' ## Finally produce the forest plot.
#' \donttest{
#' g_forest(tab, xlim = c(0.8, 1.2))
#' }
#'
#' @export
#' @order 2
tabulate_survival_biomarkers <- function(df,
                                         vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
                                         groups_lists = list(),
                                         control = control_coxreg(),
                                         label_all = lifecycle::deprecated(),
                                         time_unit = NULL,
                                         na_str = default_na_str(),
                                         ...,
                                         .stat_names = NULL,
                                         .formats = NULL,
                                         .labels = NULL,
                                         .indent_mods = NULL) {
  if (lifecycle::is_present(label_all)) {
    lifecycle::deprecate_warn(
      "0.9.5", "tabulate_survival_biomarkers(label_all)",
      details = paste(
        "Please assign the `label_all` parameter within the",
        "`extract_survival_biomarkers()` function when creating `df`."
      )
    )
  }

  checkmate::assert_data_frame(df)
  checkmate::assert_character(df$biomarker)
  checkmate::assert_character(df$biomarker_label)
  checkmate::assert_subset(vars, get_stats("tabulate_survival_biomarkers"))

  # Process standard extra arguments
  extra_args <- list(".stats" = vars)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  colvars <- d_survival_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1],
    time_unit = time_unit
  )

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    groups_lists = list(groups_lists), control = list(control), biomarker = TRUE,
    ...
  )

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_survival_subgroups) <- c(formals(a_survival_subgroups), extra_args[[".additional_fun_parameters"]])

  # Create "ci" column from "lcl" and "ucl"
  df$ci <- combine_vectors(df$lcl, df$ucl)

  df_subs <- split(df, f = df$biomarker)
  tbls <- lapply(
    df_subs,
    function(df) {
      lyt <- basic_table()

      # Split cols by the multiple variables to populate into columns.
      lyt <- split_cols_by_multivar(
        lyt = lyt,
        vars = colvars$vars,
        varlabels = colvars$labels
      )

      # Row split by biomarker
      lyt <- split_rows_by(
        lyt = lyt,
        var = "biomarker_label",
        nested = FALSE
      )

      # Add "All Patients" row
      lyt <- split_rows_by(
        lyt = lyt,
        var = "row_type",
        split_fun = keep_split_levels("content"),
        nested = TRUE,
        child_labels = "hidden"
      )
      lyt <- analyze_colvars(
        lyt = lyt,
        afun = a_survival_subgroups,
        na_str = na_str,
        extra_args = c(extra_args, overall = TRUE)
      )

      # Add analysis rows
      if ("analysis" %in% df$row_type) {
        lyt <- split_rows_by(
          lyt = lyt,
          var = "row_type",
          split_fun = keep_split_levels("analysis"),
          nested = TRUE,
          child_labels = "hidden"
        )
        lyt <- split_rows_by(
          lyt = lyt,
          var = "var_label",
          nested = TRUE,
          indent_mod = 1L
        )
        lyt <- analyze_colvars(
          lyt = lyt,
          afun = a_survival_subgroups,
          na_str = na_str,
          inclNAs = TRUE,
          extra_args = extra_args
        )
      }
      build_table(lyt, df = df)
    }
  )

  result <- do.call(rbind, tbls)

  n_tot_ids <- grep("^n_tot", vars)
  hr_id <- match("hr", vars)
  ci_id <- match("ci", vars)
  structure(
    result,
    forest_header = paste0(c("Higher", "Lower"), "\nBetter"),
    col_x = hr_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_ids[1]
  )
}

#' Proportion estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [estimate_proportion()] creates a layout element to estimate the proportion of responders
#' within a studied population. The primary analysis variable, `vars`, indicates whether a response has occurred for
#' each record. See the `method` parameter for options of methods to use when constructing the confidence interval of
#' the proportion. Additionally, a stratification variable can be supplied via the `strata` element of the `variables`
#' argument.
#'
#' @inheritParams prop_strat_wilson
#' @inheritParams argument_convention
#' @param method (`string`)\cr the method used to construct the confidence interval
#'   for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`,
#'   `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`.
#' @param long (`flag`)\cr whether a long description is required.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("estimate_proportion"), type = "sh")``
#'
#' @seealso [h_proportions]
#'
#' @name estimate_proportion
#' @order 1
NULL

#' @describeIn estimate_proportion Statistics function estimating a
#'   proportion along with its confidence interval.
#'
#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used,
#'   it indicates whether each subject is a responder or not. `TRUE` represents
#'   a successful outcome. If a `data.frame` is provided, also the `strata` variable
#'   names must be provided in `variables` as a list element with the strata strings.
#'   In the case of `data.frame`, the logical vector of responses must be indicated as a
#'   variable name in `.var`.
#'
#' @return
#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a
#'   given variable.
#'
#' @examples
#' # Case with only logical vector.
#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0)
#' s_proportion(rsp_v)
#'
#' # Example for Stratified Wilson CI
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' s_proportion(
#'   df = dta,
#'   .var = "rsp",
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "strat_wilson"
#' )
#'
#' @export
s_proportion <- function(df,
                         .var,
                         conf_level = 0.95,
                         method = c(
                           "waldcc", "wald", "clopper-pearson",
                           "wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
                           "agresti-coull", "jeffreys"
                         ),
                         weights = NULL,
                         max_iterations = 50,
                         variables = list(strata = NULL),
                         long = FALSE,
                         denom = c("n", "N_col", "N_row"),
                         ...) {
  method <- match.arg(method)
  checkmate::assert_flag(long)
  assert_proportion_value(conf_level)
  args_list <- list(...)
  .N_row <- args_list[[".N_row"]] # nolint
  .N_col <- args_list[[".N_col"]] # nolint

  if (!is.null(variables$strata)) {
    # Checks for strata
    if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.")
    strata_colnames <- variables$strata
    checkmate::assert_character(strata_colnames, null.ok = FALSE)
    strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
    assert_df_with_variables(df, strata_vars)

    strata <- interaction(df[strata_colnames])
    strata <- as.factor(strata)

    # Pushing down checks to prop_strat_wilson
  } else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) {
    stop("To use stratified methods you need to specify the strata variables.")
  }

  # Finding the Responders
  if (checkmate::test_atomic_vector(df)) {
    rsp <- as.logical(df)
  } else {
    rsp <- as.logical(df[[.var]])
  }

  # Stop for stratified analysis
  if (method %in% c("strat_wilson", "strat_wilsonc") && denom[1] != "n") {
    stop(
      "Stratified methods only support 'n' as the denominator (denom). ",
      "Consider adding negative responders directly to the dataset."
    )
  }

  denom <- match.arg(denom) %>%
    switch(
      n = length(rsp),
      N_row = .N_row,
      N_col = .N_col
    )
  n_rsp <- sum(rsp)
  p_hat <- ifelse(denom > 0, n_rsp / denom, 0)

  prop_ci <- switch(method,
    "clopper-pearson" = prop_clopper_pearson(rsp, n = denom, conf_level),
    "wilson" = prop_wilson(rsp, n = denom, conf_level),
    "wilsonc" = prop_wilson(rsp, n = denom, conf_level, correct = TRUE),
    "strat_wilson" = prop_strat_wilson(rsp, strata, weights, conf_level, max_iterations, correct = FALSE)$conf_int,
    "strat_wilsonc" = prop_strat_wilson(rsp, strata, weights, conf_level, max_iterations, correct = TRUE)$conf_int,
    "wald" = prop_wald(rsp, n = denom, conf_level),
    "waldcc" = prop_wald(rsp, n = denom, conf_level, correct = TRUE),
    "agresti-coull" = prop_agresti_coull(rsp, n = denom, conf_level),
    "jeffreys" = prop_jeffreys(rsp, n = denom, conf_level)
  )

  list(
    "n_prop" = formatters::with_label(c(n_rsp, p_hat), "Responders"),
    "prop_ci" = formatters::with_label(x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long))
  )
}

#' @describeIn estimate_proportion Formatted analysis function which is used as `afun`
#'   in `estimate_proportion()`.
#'
#' @return
#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_proportion <- function(df,
                         ...,
                         .stats = NULL,
                         .stat_names = NULL,
                         .formats = NULL,
                         .labels = NULL,
                         .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_proportion,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("estimate_proportion",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )
  x_stats <- x_stats[.stats]
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(
    .stats, .labels,
    tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn estimate_proportion Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_proportion()` to the table layout.
#'
#' @examples
#' dta_test <- data.frame(
#'   USUBJID = paste0("S", 1:12),
#'   ARM = rep(LETTERS[1:3], each = 4),
#'   AVAL = rep(LETTERS[1:3], each = 4)
#' ) %>%
#'   dplyr::mutate(is_rsp = AVAL == "A")
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   estimate_proportion(vars = "is_rsp") %>%
#'   build_table(df = dta_test)
#'
#' @export
#' @order 2
estimate_proportion <- function(lyt,
                                vars,
                                conf_level = 0.95,
                                method = c(
                                  "waldcc", "wald", "clopper-pearson",
                                  "wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
                                  "agresti-coull", "jeffreys"
                                ),
                                weights = NULL,
                                max_iterations = 50,
                                variables = list(strata = NULL),
                                long = FALSE,
                                na_str = default_na_str(),
                                nested = TRUE,
                                ...,
                                show_labels = "hidden",
                                table_names = vars,
                                .stats = c("n_prop", "prop_ci"),
                                .stat_names = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    conf_level = list(conf_level), method = list(method), weights = list(weights),
    max_iterations = list(max_iterations), variables = list(variables), long = list(long),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_proportion) <- c(formals(a_proportion), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = vars,
    afun = a_proportion,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper functions for calculating proportion confidence intervals
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()].
#'
#' @inheritParams argument_convention
#' @inheritParams estimate_proportion
#'
#' @return Confidence interval of a proportion.
#'
#' @seealso [estimate_proportion], descriptive function [d_proportion()],
#'  and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()].
#'
#' @name h_proportions
NULL

#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()].
#'  Also referred to as Wilson score interval.
#'
#' @examples
#' rsp <- c(
#'   TRUE, TRUE, TRUE, TRUE, TRUE,
#'   FALSE, FALSE, FALSE, FALSE, FALSE
#' )
#' prop_wilson(rsp, conf_level = 0.9)
#'
#' @export
prop_wilson <- function(rsp, n = length(rsp), conf_level, correct = FALSE) {
  y <- stats::prop.test(
    sum(rsp),
    n,
    correct = correct,
    conf.level = conf_level
  )

  as.numeric(y$conf.int)
}

#' @describeIn h_proportions Calculates the stratified Wilson confidence
#'   interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern}
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are
#'   estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that
#'   minimizes the weighted squared length of the confidence interval.
#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used
#'   to find estimates of optimal weights.
#' @param correct (`flag`)\cr whether to include the continuity correction. For further information, see for example
#'   for [stats::prop.test()].
#'
#' @references
#' \insertRef{Yan2010-jt}{tern}
#'
#' @examples
#' # Stratified Wilson confidence interval with unequal probabilities
#'
#' set.seed(1)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#' strata <- interaction(strata_data)
#' n_strata <- ncol(table(rsp, strata)) # Number of strata
#'
#' prop_strat_wilson(
#'   rsp = rsp, strata = strata,
#'   conf_level = 0.90
#' )
#'
#' # Not automatic setting of weights
#' prop_strat_wilson(
#'   rsp = rsp, strata = strata,
#'   weights = rep(1 / n_strata, n_strata),
#'   conf_level = 0.90
#' )
#'
#' @export
prop_strat_wilson <- function(rsp,
                              strata,
                              weights = NULL,
                              conf_level = 0.95,
                              max_iterations = NULL,
                              correct = FALSE) {
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(strata, len = length(rsp))
  assert_proportion_value(conf_level)

  tbl <- table(rsp, strata)
  n_strata <- length(unique(strata))

  # Checking the weights and maximum number of iterations.
  do_iter <- FALSE
  if (is.null(weights)) {
    weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
    do_iter <- TRUE

    # Iteration parameters
    if (is.null(max_iterations)) max_iterations <- 10
    checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1)
  }
  checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = n_strata)
  sum_weights <- checkmate::assert_int(sum(weights))
  if (as.integer(sum_weights + 0.5) != 1L) stop("Sum of weights must be 1L.")

  xs <- tbl["TRUE", ]
  ns <- colSums(tbl)
  use_stratum <- (ns > 0)
  ns <- ns[use_stratum]
  xs <- xs[use_stratum]
  ests <- xs / ns
  vars <- ests * (1 - ests) / ns

  strata_qnorm <- strata_normal_quantile(vars, weights, conf_level)

  # Iterative setting of weights if they were not set externally
  weights_new <- if (do_iter) {
    update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights
  } else {
    weights
  }

  strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1

  ci_by_strata <- Map(
    function(x, n) {
      # Classic Wilson's confidence interval
      suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int)
    },
    x = xs,
    n = ns
  )
  lower_by_strata <- sapply(ci_by_strata, "[", 1L)
  upper_by_strata <- sapply(ci_by_strata, "[", 2L)

  lower <- sum(weights_new * lower_by_strata)
  upper <- sum(weights_new * upper_by_strata)

  # Return values
  if (do_iter) {
    list(
      conf_int = c(
        lower = lower,
        upper = upper
      ),
      weights = weights_new
    )
  } else {
    list(
      conf_int = c(
        lower = lower,
        upper = upper
      )
    )
  }
}

#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
#'   Also referred to as the `exact` method.
#'
#' @param n (`count`)\cr number of participants (if `denom = "N_col"`) or the number of responders
#'   (if `denom = "n"`, the default).
#'
#' @examples
#' prop_clopper_pearson(rsp, conf_level = .95)
#'
#' @export
prop_clopper_pearson <- function(rsp, n = length(rsp), conf_level) {
  y <- stats::binom.test(
    x = sum(rsp),
    n = n,
    conf.level = conf_level
  )
  as.numeric(y$conf.int)
}

#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition
#'   for a single proportion confidence interval using the normal approximation.
#'
#' @param correct (`flag`)\cr whether to apply continuity correction.
#'
#' @examples
#' prop_wald(rsp, conf_level = 0.95)
#' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
#'
#' @export
prop_wald <- function(rsp, n = length(rsp), conf_level, correct = FALSE) {
  p_hat <- ifelse(n > 0, sum(rsp) / n, 0)
  z <- stats::qnorm((1 + conf_level) / 2)
  q_hat <- 1 - p_hat
  correct <- if (correct) 1 / (2 * n) else 0

  err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct
  l_ci <- max(0, p_hat - err)
  u_ci <- min(1, p_hat + err)

  c(l_ci, u_ci)
}

#' @describeIn h_proportions Calculates the Agresti-Coull interval. Constructed (for 95% CI) by adding two successes
#'   and two failures to the data and then using the Wald formula to construct a CI.
#'
#' @examples
#' prop_agresti_coull(rsp, conf_level = 0.95)
#'
#' @export
prop_agresti_coull <- function(rsp, n = length(rsp), conf_level) {
  x_sum <- sum(rsp)
  z <- stats::qnorm((1 + conf_level) / 2)

  # Add here both z^2 / 2 successes and failures.
  x_sum_tilde <- x_sum + z^2 / 2
  n_tilde <- n + z^2

  # Then proceed as with the Wald interval.
  p_tilde <- x_sum_tilde / n_tilde
  q_tilde <- 1 - p_tilde
  err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
  l_ci <- max(0, p_tilde - err)
  u_ci <- min(1, p_tilde + err)

  c(l_ci, u_ci)
}

#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the
#'   non-informative Jeffreys prior for a binomial proportion.
#'
#' @examples
#' prop_jeffreys(rsp, conf_level = 0.95)
#'
#' @export
prop_jeffreys <- function(rsp, n = length(rsp), conf_level) {
  x_sum <- sum(rsp)

  alpha <- 1 - conf_level
  l_ci <- ifelse(
    x_sum == 0,
    0,
    stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  u_ci <- ifelse(
    x_sum == n,
    1,
    stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  c(l_ci, u_ci)
}

#' Description of the proportion summary
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function that describes the analysis in [s_proportion()].
#'
#' @inheritParams s_proportion
#' @param long (`flag`)\cr whether a long or a short (default) description is required.
#'
#' @return String describing the analysis.
#'
#' @export
d_proportion <- function(conf_level,
                         method,
                         long = FALSE) {
  label <- paste0(conf_level * 100, "% CI")

  if (long) label <- paste(label, "for Response Rates")

  method_part <- switch(method,
    "clopper-pearson" = "Clopper-Pearson",
    "waldcc" = "Wald, with correction",
    "wald" = "Wald, without correction",
    "wilson" = "Wilson, without correction",
    "strat_wilson" = "Stratified Wilson, without correction",
    "wilsonc" = "Wilson, with correction",
    "strat_wilsonc" = "Stratified Wilson, with correction",
    "agresti-coull" = "Agresti-Coull",
    "jeffreys" = "Jeffreys",
    stop(paste(method, "does not have a description"))
  )

  paste0(label, " (", method_part, ")")
}

#' Helper function for the estimation of stratified quantiles
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function wraps the estimation of stratified percentiles when we assume
#' the approximation for large numbers. This is necessary only in the case
#' proportions for each strata are unequal.
#'
#' @inheritParams argument_convention
#' @inheritParams prop_strat_wilson
#'
#' @return Stratified quantile.
#'
#' @seealso [prop_strat_wilson()]
#'
#' @examples
#' strata_data <- table(data.frame(
#'   "f1" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' ))
#' ns <- colSums(strata_data)
#' ests <- strata_data["TRUE", ] / ns
#' vars <- ests * (1 - ests) / ns
#' weights <- rep(1 / length(ns), length(ns))
#'
#' strata_normal_quantile(vars, weights, 0.95)
#'
#' @export
strata_normal_quantile <- function(vars, weights, conf_level) {
  summands <- weights^2 * vars
  # Stratified quantile
  sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2)
}

#' Helper function for the estimation of weights for `prop_strat_wilson()`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function wraps the iteration procedure that allows you to estimate
#' the weights for each proportional strata. This assumes to minimize the
#' weighted squared length of the confidence interval.
#'
#' @inheritParams prop_strat_wilson
#' @param vars (`numeric`)\cr normalized proportions for each strata.
#' @param strata_qnorm (`numeric(1)`)\cr initial estimation with identical weights of the quantiles.
#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
#'   be optimized in the future if we need to estimate better initial weights.
#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
#' @param max_iterations (`integer(1)`)\cr maximum number of iterations to be tried. Convergence is always checked.
#' @param tol (`numeric(1)`)\cr tolerance threshold for convergence.
#'
#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
#'
#' @seealso For references and details see [prop_strat_wilson()].
#'
#' @examples
#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
#' sq <- 0.674
#' ws <- rep(1 / length(vs), length(vs))
#' ns <- c(22, 18, 17, 17, 14, 12)
#'
#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
#'
#' @export
update_weights_strat_wilson <- function(vars,
                                        strata_qnorm,
                                        initial_weights,
                                        n_per_strata,
                                        max_iterations = 50,
                                        conf_level = 0.95,
                                        tol = 0.001) {
  it <- 0
  diff_v <- NULL

  while (it < max_iterations) {
    it <- it + 1
    weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
    weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
    weights_new <- weights_new_t / weights_new_b
    weights_new <- weights_new / sum(weights_new)
    strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level)
    diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
    if (diff_v[length(diff_v)] < tol) break
    initial_weights <- weights_new
  }

  if (it == max_iterations) {
    warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations)
  }

  list(
    "n_it" = it,
    "weights" = weights_new,
    "diff_v" = diff_v
  )
}

#' Create a forest plot from an `rtable`
#'
#' Given a [rtables::rtable()] object with at least one column with a single value and one column with 2
#' values, converts table to a [ggplot2::ggplot()] object and generates an accompanying forest plot. The
#' table and forest plot are printed side-by-side.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams rtable2gg
#' @inheritParams argument_convention
#' @param tbl (`VTableTree`)\cr `rtables` table with at least one column with a single value and one column with 2
#'   values.
#' @param col_x (`integer(1)` or `NULL`)\cr column index with estimator. By default tries to get this from
#'   `tbl` attribute `col_x`, otherwise needs to be manually specified. If `NULL`, points will be excluded
#'   from forest plot.
#' @param col_ci (`integer(1)` or `NULL`)\cr column index with confidence intervals. By default tries to get this from
#'   `tbl` attribute `col_ci`, otherwise needs to be manually specified. If `NULL`, lines will be excluded
#'   from forest plot.
#' @param vline (`numeric(1)` or `NULL`)\cr x coordinate for vertical line, if `NULL` then the line is omitted.
#' @param forest_header (`character(2)`)\cr text displayed to the left and right of `vline`, respectively.
#'   If `vline = NULL` then `forest_header` is not printed. By default tries to get this from `tbl` attribute
#'   `forest_header`. If `NULL`, defaults will be extracted from the table if possible, and set to
#'   `"Comparison\nBetter"` and `"Treatment\nBetter"` if not.
#' @param xlim (`numeric(2)`)\cr limits for x axis.
#' @param logx (`flag`)\cr show the x-values on logarithm scale.
#' @param x_at (`numeric`)\cr x-tick locations, if `NULL`, `x_at` is set to `vline` and both `xlim` values.
#' @param width_row_names `r lifecycle::badge("deprecated")` Please use the `lbl_col_padding` argument instead.
#' @param width_columns (`numeric`)\cr a vector of column widths. Each element's position in
#'   `colwidths` corresponds to the column of `tbl` in the same position. If `NULL`, column widths are calculated
#'   according to maximum number of characters per column.
#' @param width_forest `r lifecycle::badge("deprecated")` Please use the `rel_width_forest` argument instead.
#' @param rel_width_forest (`proportion`)\cr proportion of total width to allocate to the forest plot. Relative
#'   width of table is then `1 - rel_width_forest`. If `as_list = TRUE`, this parameter is ignored.
#' @param font_size (`numeric(1)`)\cr font size.
#' @param col_symbol_size (`numeric` or `NULL`)\cr column index from `tbl` containing data to be used
#'   to determine relative size for estimator plot symbol. Typically, the symbol size is proportional
#'   to the sample size used to calculate the estimator. If `NULL`, the same symbol size is used for all subgroups.
#'   By default tries to get this from `tbl` attribute `col_symbol_size`, otherwise needs to be manually specified.
#' @param col (`character`)\cr color(s).
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot.
#' @param as_list (`flag`)\cr whether the two `ggplot` objects should be returned as a list. If `TRUE`, a named list
#'   with two elements, `table` and `plot`, will be returned. If `FALSE` (default) the table and forest plot are
#'   printed side-by-side via [cowplot::plot_grid()].
#' @param gp `r lifecycle::badge("deprecated")` `g_forest` is now generated as a `ggplot` object. This argument
#'   is no longer used.
#' @param draw `r lifecycle::badge("deprecated")` `g_forest` is now generated as a `ggplot` object. This argument
#'   is no longer used.
#' @param newpage `r lifecycle::badge("deprecated")` `g_forest` is now generated as a `ggplot` object. This argument
#'   is no longer used.
#'
#' @return `ggplot` forest plot and table.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' n_records <- 20
#' adrs_labels <- formatters::var_labels(adrs, fill = TRUE)
#' adrs <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   slice(seq_len(n_records)) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs) <- c(adrs_labels, "Response")
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "STRATA2")),
#'   data = adrs
#' )
#' # Full commonly used response table.
#'
#' tbl <- basic_table() %>%
#'   tabulate_rsp_subgroups(df)
#' g_forest(tbl)
#'
#' # Odds ratio only table.
#'
#' tbl_or <- basic_table() %>%
#'   tabulate_rsp_subgroups(df, vars = c("n_tot", "or", "ci"))
#' g_forest(
#'   tbl_or,
#'   forest_header = c("Comparison\nBetter", "Treatment\nBetter")
#' )
#'
#' # Survival forest plot example.
#' adtte <- tern_ex_adtte
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = TRUE)
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- list(
#'   "ARM" = adtte_labels["ARM"],
#'   "SEX" = adtte_labels["SEX"],
#'   "AVALU" = adtte_labels["AVALU"],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- as.character(labels)
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' table_hr <- basic_table() %>%
#'   tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
#' g_forest(table_hr)
#'
#' # Works with any `rtable`.
#' tbl <- rtable(
#'   header = c("E", "CI", "N"),
#'   rrow("", 1, c(.8, 1.2), 200),
#'   rrow("", 1.2, c(1.1, 1.4), 50)
#' )
#' g_forest(
#'   tbl = tbl,
#'   col_x = 1,
#'   col_ci = 2,
#'   xlim = c(0.5, 2),
#'   x_at = c(0.5, 1, 2),
#'   col_symbol_size = 3
#' )
#'
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", rcell("A", colspan = 2)),
#'     rrow("", "c1", "c2")
#'   ),
#'   rrow("row 1", 1, c(.8, 1.2)),
#'   rrow("row 2", 1.2, c(1.1, 1.4))
#' )
#' g_forest(
#'   tbl = tbl,
#'   col_x = 1,
#'   col_ci = 2,
#'   xlim = c(0.5, 2),
#'   x_at = c(0.5, 1, 2),
#'   vline = 1,
#'   forest_header = c("Hello", "World")
#' )
#'
#' @export
g_forest <- function(tbl,
                     col_x = attr(tbl, "col_x"),
                     col_ci = attr(tbl, "col_ci"),
                     vline = 1,
                     forest_header = attr(tbl, "forest_header"),
                     xlim = c(0.1, 10),
                     logx = TRUE,
                     x_at = c(0.1, 1, 10),
                     width_row_names = lifecycle::deprecated(),
                     width_columns = NULL,
                     width_forest = lifecycle::deprecated(),
                     lbl_col_padding = 0,
                     rel_width_forest = 0.25,
                     font_size = 12,
                     col_symbol_size = attr(tbl, "col_symbol_size"),
                     col = getOption("ggplot2.discrete.colour")[1],
                     ggtheme = NULL,
                     as_list = FALSE,
                     gp = lifecycle::deprecated(),
                     draw = lifecycle::deprecated(),
                     newpage = lifecycle::deprecated()) {
  # Deprecated argument warnings
  if (lifecycle::is_present(width_row_names)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_forest(width_row_names)", "g_forest(lbl_col_padding)",
      details = "The width of the row label column can be adjusted via the `lbl_col_padding` parameter."
    )
  }
  if (lifecycle::is_present(width_forest)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_forest(width_forest)", "g_forest(rel_width_forest)",
      details = "Relative width of the forest plot (as a proportion) can be set via the `rel_width_forest` parameter."
    )
  }
  if (lifecycle::is_present(gp)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_forest(gp)", "g_forest(ggtheme)",
      details = paste(
        "`g_forest` is now generated as a `ggplot` object.",
        "Additional display settings should be supplied via the `ggtheme` parameter."
      )
    )
  }
  if (lifecycle::is_present(draw)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_forest(draw)",
      details = "`g_forest` now generates `ggplot` objects. This parameter has no effect."
    )
  }
  if (lifecycle::is_present(newpage)) {
    lifecycle::deprecate_warn(
      "0.9.4", "g_forest(newpage)",
      details = "`g_forest` now generates `ggplot` objects. This parameter has no effect."
    )
  }

  checkmate::assert_class(tbl, "VTableTree")
  checkmate::assert_number(col_x, lower = 0, upper = ncol(tbl), null.ok = TRUE)
  checkmate::assert_number(col_ci, lower = 0, upper = ncol(tbl), null.ok = TRUE)
  checkmate::assert_number(col_symbol_size, lower = 0, upper = ncol(tbl), null.ok = TRUE)
  checkmate::assert_number(font_size, lower = 0)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_true(is.null(col) | length(col) == 1 | length(col) == nrow(tbl))

  # Extract info from table
  mat <- matrix_form(tbl, indent_rownames = TRUE)
  mat_strings <- formatters::mf_strings(mat)
  nlines_hdr <- formatters::mf_nlheader(mat)
  nrows_body <- nrow(mat_strings) - nlines_hdr
  tbl_stats <- mat_strings[nlines_hdr, -1]

  # Generate and modify table as ggplot object
  gg_table <- rtable2gg(tbl, fontsize = font_size, colwidths = width_columns, lbl_col_padding = lbl_col_padding) +
    theme(plot.margin = margin(0, 0, 0, 0.025, "npc"))
  gg_table$scales$scales[[1]]$expand <- c(0.01, 0.01)
  gg_table$scales$scales[[2]]$limits[2] <- nrow(mat_strings) + 1
  if (nlines_hdr == 2) {
    gg_table$scales$scales[[2]]$expand <- c(0, 0)
    arms <- unique(mat_strings[1, ][nzchar(trimws(mat_strings[1, ]))])
  } else {
    arms <- NULL
  }

  tbl_df <- as_result_df(tbl)
  dat_cols <- seq(which(names(tbl_df) == "node_class") + 1, ncol(tbl_df))
  tbl_df <- tbl_df[, c(which(names(tbl_df) == "row_num"), dat_cols)]
  names(tbl_df) <- c("row_num", tbl_stats)

  # Check table data columns
  if (!is.null(col_ci)) {
    ci_col <- col_ci + 1
  } else {
    tbl_df[["empty_ci"]] <- rep(list(c(NA_real_, NA_real_)), nrow(tbl_df))
    ci_col <- which(names(tbl_df) == "empty_ci")
  }
  if (length(tbl_df[, ci_col][[1]]) != 2) stop("CI column must have two elements (lower and upper limits).")

  if (!is.null(col_x)) {
    x_col <- col_x + 1
  } else {
    tbl_df[["empty_x"]] <- NA_real_
    x_col <- which(names(tbl_df) == "empty_x")
  }
  if (!is.null(col_symbol_size)) {
    sym_size <- unlist(tbl_df[, col_symbol_size + 1])
  } else {
    sym_size <- rep(1, nrow(tbl_df))
  }

  tbl_df[, c("ci_lwr", "ci_upr")] <- t(sapply(tbl_df[, ci_col], unlist))
  x <- unlist(tbl_df[, x_col])
  lwr <- unlist(tbl_df[["ci_lwr"]])
  upr <- unlist(tbl_df[["ci_upr"]])
  row_num <- nrow(mat_strings) - tbl_df[["row_num"]] - as.numeric(nlines_hdr == 2)

  if (is.null(col)) col <- "#343cff"
  if (length(col) == 1) col <- rep(col, nrow(tbl_df))
  if (is.null(x_at)) x_at <- union(xlim, vline)
  x_labels <- x_at

  # Apply log transformation
  if (logx) {
    x_t <- log(x)
    lwr_t <- log(lwr)
    upr_t <- log(upr)
    xlim_t <- log(xlim)
  } else {
    x_t <- x
    lwr_t <- lwr
    upr_t <- upr
    xlim_t <- xlim
  }

  # Set up plot area
  gg_plt <- ggplot(data = tbl_df) +
    theme(
      panel.background = element_rect(fill = "transparent", color = NA_character_),
      plot.background = element_rect(fill = "transparent", color = NA_character_),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      axis.title.x = element_blank(),
      axis.title.y = element_blank(),
      axis.line.x = element_line(),
      axis.text = element_text(size = font_size),
      legend.position = "none",
      plot.margin = margin(0, 0.1, 0.05, 0, "npc")
    ) +
    scale_x_continuous(
      transform = ifelse(logx, "log", "identity"),
      limits = xlim,
      breaks = x_at,
      labels = x_labels,
      expand = c(0.01, 0)
    ) +
    scale_y_continuous(
      limits = c(0, nrow(mat_strings) + 1),
      breaks = NULL,
      expand = c(0, 0)
    ) +
    coord_cartesian(clip = "off")

  if (is.null(ggtheme)) {
    gg_plt <- gg_plt + annotate(
      "rect",
      xmin = xlim[1],
      xmax = xlim[2],
      ymin = 0,
      ymax = nrows_body + 0.5,
      fill = "grey92"
    )
  }

  if (!is.null(vline)) {
    # Set default forest header
    if (is.null(forest_header)) {
      forest_header <- c(
        paste(if (length(arms) == 2) arms[1] else "Comparison", "Better", sep = "\n"),
        paste(if (length(arms) == 2) arms[2] else "Treatment", "Better", sep = "\n")
      )
    }

    # Add vline and forest header labels
    mid_pts <- if (logx) {
      c(exp(mean(log(c(xlim[1], vline)))), exp(mean(log(c(vline, xlim[2])))))
    } else {
      c(mean(c(xlim[1], vline)), mean(c(vline, xlim[2])))
    }
    gg_plt <- gg_plt +
      annotate(
        "segment",
        x = vline, xend = vline, y = 0, yend = nrows_body + 0.5
      ) +
      annotate(
        "text",
        x = mid_pts[1], y = nrows_body + 1.25,
        label = forest_header[1],
        size = font_size / .pt,
        lineheight = 0.9
      ) +
      annotate(
        "text",
        x = mid_pts[2], y = nrows_body + 1.25,
        label = forest_header[2],
        size = font_size / .pt,
        lineheight = 0.9
      )
  }

  # Add points to plot
  if (any(!is.na(x_t))) {
    x_t[x < xlim[1] | x > xlim[2]] <- NA
    gg_plt <- gg_plt + geom_point(
      x = x_t,
      y = row_num,
      color = col,
      aes(size = sym_size),
      na.rm = TRUE
    )
  }

  for (i in seq_len(nrow(tbl_df))) {
    # Determine which arrow(s) to add to CI lines
    which_arrow <- c(lwr_t[i] < xlim_t[1], upr_t[i] > xlim_t[2])
    which_arrow <- dplyr::case_when(
      all(which_arrow) ~ "both",
      which_arrow[1] ~ "first",
      which_arrow[2] ~ "last",
      TRUE ~ NA_character_
    )

    # Add CI lines
    gg_plt <- gg_plt +
      if (!is.na(which_arrow)) {
        annotate(
          "segment",
          x = if (!which_arrow %in% c("first", "both")) lwr[i] else xlim[1],
          xend = if (!which_arrow %in% c("last", "both")) upr[i] else xlim[2],
          y = row_num[i], yend = row_num[i],
          color = if (length(col) == 1) col else col[i],
          arrow = arrow(length = unit(0.05, "npc"), ends = which_arrow),
          na.rm = TRUE
        )
      } else {
        annotate(
          "segment",
          x = lwr[i], xend = upr[i],
          y = row_num[i], yend = row_num[i],
          color = if (length(col) == 1) col else col[i],
          na.rm = TRUE
        )
      }
  }

  # Apply custom ggtheme to plot
  if (!is.null(ggtheme)) gg_plt <- gg_plt + ggtheme

  if (as_list) {
    list(
      table = gg_table,
      plot = gg_plt
    )
  } else {
    cowplot::plot_grid(
      gg_table,
      gg_plt,
      align = "h",
      axis = "tblr",
      rel_widths = c(1 - rel_width_forest, rel_width_forest)
    )
  }
}

#' Forest plot grob
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' @inheritParams g_forest
#' @param tbl (`VTableTree`)\cr `rtables` table object.
#' @param x (`numeric`)\cr coordinate of point.
#' @param lower,upper (`numeric`)\cr lower/upper bound of the confidence interval.
#' @param symbol_size (`numeric`)\cr vector with relative size for plot symbol.
#'   If `NULL`, the same symbol size is used.
#'
#' @details
#' The heights get automatically determined.
#'
#' @examples
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", "E", rcell("CI", colspan = 2), "N"),
#'     rrow("", "A", "B", "C", "D")
#'   ),
#'   rrow("row 1", 1, 0.8, 1.1, 16),
#'   rrow("row 2", 1.4, 0.8, 1.6, 25),
#'   rrow("row 3", 1.2, 0.8, 1.6, 36)
#' )
#'
#' x <- c(1, 1.4, 1.2)
#' lower <- c(0.8, 0.8, 0.8)
#' upper <- c(1.1, 1.6, 1.6)
#' # numeric vector with multiplication factor to scale each circle radius
#' # default radius is 1/3.5 lines
#' symbol_scale <- c(1, 1.25, 1.5)
#'
#' # Internal function - forest_grob
#' \donttest{
#' p <- forest_grob(tbl, x, lower, upper,
#'   vline = 1, forest_header = c("A", "B"),
#'   x_at = c(.1, 1, 10), xlim = c(0.1, 10), logx = TRUE, symbol_size = symbol_scale,
#'   vp = grid::plotViewport(margins = c(1, 1, 1, 1))
#' )
#'
#' draw_grob(p)
#' }
#'
#' @noRd
#' @keywords internal
forest_grob <- function(tbl,
                        x,
                        lower,
                        upper,
                        vline,
                        forest_header,
                        xlim = NULL,
                        logx = FALSE,
                        x_at = NULL,
                        width_row_names = NULL,
                        width_columns = NULL,
                        width_forest = grid::unit(1, "null"),
                        symbol_size = NULL,
                        col = "blue",
                        name = NULL,
                        gp = NULL,
                        vp = NULL) {
  lifecycle::deprecate_warn(
    "0.9.4", "forest_grob()",
    details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`."
  )

  nr <- nrow(tbl)
  if (is.null(vline)) {
    checkmate::assert_true(is.null(forest_header))
  } else {
    checkmate::assert_number(vline)
    checkmate::assert_character(forest_header, len = 2, null.ok = TRUE)
  }

  checkmate::assert_numeric(x, len = nr)
  checkmate::assert_numeric(lower, len = nr)
  checkmate::assert_numeric(upper, len = nr)
  checkmate::assert_numeric(symbol_size, len = nr, null.ok = TRUE)
  checkmate::assert_character(col)

  if (is.null(symbol_size)) {
    symbol_size <- rep(1, nr)
  }

  if (is.null(xlim)) {
    r <- range(c(x, lower, upper), na.rm = TRUE)
    xlim <- r + c(-0.05, 0.05) * diff(r)
  }

  if (logx) {
    if (is.null(x_at)) {
      x_at <- pretty(log(stats::na.omit(c(x, lower, upper))))
      x_labels <- exp(x_at)
    } else {
      x_labels <- x_at
      x_at <- log(x_at)
    }
    xlim <- log(xlim)
    x <- log(x)
    lower <- log(lower)
    upper <- log(upper)
    if (!is.null(vline)) {
      vline <- log(vline)
    }
  } else {
    x_labels <- TRUE
  }

  data_forest_vp <- grid::dataViewport(xlim, c(0, 1))

  # Get table content as matrix form.
  mf <- matrix_form(tbl)

  # Use `rtables` indent_string eventually.
  mf$strings[, 1] <- paste0(
    strrep("    ", c(rep(0, attr(mf, "nrow_header")), mf$row_info$indent)),
    mf$strings[, 1]
  )

  n_header <- attr(mf, "nrow_header")

  if (any(mf$display[, 1] == FALSE)) stop("row names need to be always displayed")

  # Pre-process the data to be used in lapply and cell_in_rows.
  to_args_for_cell_in_rows_fun <- function(part = c("body", "header"),
                                           underline_colspan = FALSE) {
    part <- match.arg(part)
    if (part == "body") {
      mat_row_indices <- seq_len(nrow(tbl)) + n_header
      row_ind_offset <- -n_header
    } else {
      mat_row_indices <- seq_len(n_header)
      row_ind_offset <- 0
    }

    lapply(mat_row_indices, function(i) {
      disp <- mf$display[i, -1]
      list(
        row_name = mf$strings[i, 1],
        cells = mf$strings[i, -1][disp],
        cell_spans = mf$spans[i, -1][disp],
        row_index = i + row_ind_offset,
        underline_colspan = underline_colspan
      )
    })
  }

  args_header <- to_args_for_cell_in_rows_fun("header", underline_colspan = TRUE)
  args_body <- to_args_for_cell_in_rows_fun("body", underline_colspan = FALSE)

  grid::gTree(
    name = name,
    children = grid::gList(
      grid::gTree(
        children = do.call(grid::gList, lapply(args_header, do.call, what = cell_in_rows)),
        vp = grid::vpPath("vp_table_layout", "vp_header")
      ),
      grid::gTree(
        children = do.call(grid::gList, lapply(args_body, do.call, what = cell_in_rows)),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      ),
      grid::linesGrob(
        grid::unit(c(0, 1), "npc"),
        y = grid::unit(c(.5, .5), "npc"),
        vp = grid::vpPath("vp_table_layout", "vp_spacer")
      ),
      # forest part
      if (is.null(vline)) {
        NULL
      } else {
        grid::gTree(
          children = grid::gList(
            grid::gTree(
              children = grid::gList(
                grid::textGrob(
                  forest_header[1],
                  x = grid::unit(vline, "native") - grid::unit(1, "lines"),
                  just = c("right", "center")
                ),
                grid::textGrob(
                  forest_header[2],
                  x = grid::unit(vline, "native") + grid::unit(1, "lines"),
                  just = c("left", "center")
                )
              ),
              vp = grid::vpStack(grid::viewport(layout.pos.col = ncol(tbl) + 2), data_forest_vp)
            )
          ),
          vp = grid::vpPath("vp_table_layout", "vp_header")
        )
      },
      grid::gTree(
        children = grid::gList(
          grid::gTree(
            children = grid::gList(
              grid::rectGrob(gp = grid::gpar(col = "gray90", fill = "gray90")),
              if (is.null(vline)) {
                NULL
              } else {
                grid::linesGrob(
                  x = grid::unit(rep(vline, 2), "native"),
                  y = grid::unit(c(0, 1), "npc"),
                  gp = grid::gpar(lwd = 2),
                  vp = data_forest_vp
                )
              },
              grid::xaxisGrob(at = x_at, label = x_labels, vp = data_forest_vp)
            ),
            vp = grid::viewport(layout.pos.col = ncol(tbl) + 2)
          )
        ),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      ),
      grid::gTree(
        children = do.call(
          grid::gList,
          Map(
            function(xi, li, ui, row_index, size_i, col) {
              forest_dot_line(
                xi,
                li,
                ui,
                row_index,
                xlim,
                symbol_size = size_i,
                col = col,
                datavp = data_forest_vp
              )
            },
            x,
            lower,
            upper,
            seq_along(x),
            symbol_size,
            col,
            USE.NAMES = FALSE
          )
        ),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      )
    ),
    childrenvp = forest_viewport(tbl, width_row_names, width_columns, width_forest),
    vp = vp,
    gp = gp
  )
}

cell_in_rows <- function(row_name,
                         cells,
                         cell_spans,
                         row_index,
                         underline_colspan = FALSE) {
  checkmate::assert_string(row_name)
  checkmate::assert_character(cells, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(cell_spans, len = length(cells), any.missing = FALSE)
  checkmate::assert_number(row_index)
  checkmate::assert_flag(underline_colspan)

  vp_name_rn <- paste0("rowname-", row_index)
  g_rowname <- if (!is.null(row_name) && row_name != "") {
    grid::textGrob(
      name = vp_name_rn,
      label = row_name,
      x = grid::unit(0, "npc"),
      just = c("left", "center"),
      vp = grid::vpPath(paste0("rowname-", row_index))
    )
  } else {
    NULL
  }

  gl_cols <- if (!(length(cells) > 0)) {
    list(NULL)
  } else {
    j <- 1 # column index of cell

    lapply(seq_along(cells), function(k) {
      cell_ascii <- cells[[k]]
      cs <- cell_spans[[k]]

      if (is.na(cell_ascii) || is.null(cell_ascii)) {
        cell_ascii <- "NA"
      }

      cell_name <- paste0("g-cell-", row_index, "-", j)

      cell_grobs <- if (identical(cell_ascii, "")) {
        NULL
      } else {
        if (cs == 1) {
          grid::textGrob(
            label = cell_ascii,
            name = cell_name,
            vp = grid::vpPath(paste0("cell-", row_index, "-", j))
          )
        } else {
          # +1 because of rowname
          vp_joined_cols <- grid::viewport(layout.pos.row = row_index, layout.pos.col = seq(j + 1, j + cs))

          lab <- grid::textGrob(
            label = cell_ascii,
            name = cell_name,
            vp = vp_joined_cols
          )

          if (!underline_colspan || grepl("^[[:space:]]*$", cell_ascii)) {
            lab
          } else {
            grid::gList(
              lab,
              grid::linesGrob(
                x = grid::unit.c(grid::unit(.2, "lines"), grid::unit(1, "npc") - grid::unit(.2, "lines")),
                y = grid::unit(c(0, 0), "npc"),
                vp = vp_joined_cols
              )
            )
          }
        }
      }
      j <<- j + cs

      cell_grobs
    })
  }

  grid::gList(
    g_rowname,
    do.call(grid::gList, gl_cols)
  )
}

#' Graphic object: forest dot line
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Calculate the `grob` corresponding to the dot line within the forest plot.
#'
#' @noRd
#' @keywords internal
forest_dot_line <- function(x,
                            lower,
                            upper,
                            row_index,
                            xlim,
                            symbol_size = 1,
                            col = "blue",
                            datavp) {
  lifecycle::deprecate_warn(
    "0.9.4", "forest_dot_line()",
    details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`."
  )

  ci <- c(lower, upper)
  if (any(!is.na(c(x, ci)))) {
    # line
    y <- grid::unit(c(0.5, 0.5), "npc")

    g_line <- if (all(!is.na(ci)) && ci[2] > xlim[1] && ci[1] < xlim[2]) {
      # -
      if (ci[1] >= xlim[1] && ci[2] <= xlim[2]) {
        grid::linesGrob(x = grid::unit(c(ci[1], ci[2]), "native"), y = y)
      } else if (ci[1] < xlim[1] && ci[2] > xlim[2]) {
        # <->
        grid::linesGrob(
          x = grid::unit(xlim, "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "both")
        )
      } else if (ci[1] < xlim[1] && ci[2] <= xlim[2]) {
        # <-
        grid::linesGrob(
          x = grid::unit(c(xlim[1], ci[2]), "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "first")
        )
      } else if (ci[1] >= xlim[1] && ci[2] > xlim[2]) {
        # ->
        grid::linesGrob(
          x = grid::unit(c(ci[1], xlim[2]), "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "last")
        )
      }
    } else {
      NULL
    }

    g_circle <- if (!is.na(x) && x >= xlim[1] && x <= xlim[2]) {
      grid::circleGrob(
        x = grid::unit(x, "native"),
        y = y,
        r = grid::unit(1 / 3.5 * symbol_size, "lines"),
        name = "point"
      )
    } else {
      NULL
    }

    grid::gTree(
      children = grid::gList(
        grid::gTree(
          children = grid::gList(
            grid::gList(
              g_line,
              g_circle
            )
          ),
          vp = datavp,
          gp = grid::gpar(col = col, fill = col)
        )
      ),
      vp = grid::vpPath(paste0("forest-", row_index))
    )
  } else {
    NULL
  }
}

#' Create a viewport tree for the forest plot
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' @param tbl (`VTableTree`)\cr `rtables` table object.
#' @param width_row_names (`grid::unit`)\cr width of row names.
#' @param width_columns (`grid::unit`)\cr width of column spans.
#' @param width_forest (`grid::unit`)\cr width of the forest plot.
#' @param gap_column (`grid::unit`)\cr gap width between the columns.
#' @param gap_header (`grid::unit`)\cr gap width between the header.
#' @param mat_form (`MatrixPrintForm`)\cr matrix print form of the table.
#'
#' @return A viewport tree.
#'
#' @examples
#' library(grid)
#'
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", "E", rcell("CI", colspan = 2)),
#'     rrow("", "A", "B", "C")
#'   ),
#'   rrow("row 1", 1, 0.8, 1.1),
#'   rrow("row 2", 1.4, 0.8, 1.6),
#'   rrow("row 3", 1.2, 0.8, 1.2)
#' )
#'
#' \donttest{
#' v <- forest_viewport(tbl)
#'
#' grid::grid.newpage()
#' showViewport(v)
#' }
#'
#' @export
forest_viewport <- function(tbl,
                            width_row_names = NULL,
                            width_columns = NULL,
                            width_forest = grid::unit(1, "null"),
                            gap_column = grid::unit(1, "lines"),
                            gap_header = grid::unit(1, "lines"),
                            mat_form = NULL) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "forest_viewport()",
    details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`."
  )

  checkmate::assert_class(tbl, "VTableTree")
  checkmate::assert_true(grid::is.unit(width_forest))
  if (!is.null(width_row_names)) {
    checkmate::assert_true(grid::is.unit(width_row_names))
  }
  if (!is.null(width_columns)) {
    checkmate::assert_true(grid::is.unit(width_columns))
  }

  if (is.null(mat_form)) mat_form <- matrix_form(tbl)

  mat_form$strings[!mat_form$display] <- ""

  nr <- nrow(tbl)
  nc <- ncol(tbl)
  nr_h <- attr(mat_form, "nrow_header")

  if (is.null(width_row_names) || is.null(width_columns)) {
    tbl_widths <- formatters::propose_column_widths(mat_form)
    strs_with_width <- strrep("x", tbl_widths) # that works for mono spaced fonts
    if (is.null(width_row_names)) width_row_names <- grid::stringWidth(strs_with_width[1])
    if (is.null(width_columns)) width_columns <- grid::stringWidth(strs_with_width[-1])
  }

  # Widths for row name, cols, forest.
  widths <- grid::unit.c(
    width_row_names + gap_column,
    width_columns + gap_column,
    width_forest
  )

  n_lines_per_row <- apply(
    X = mat_form$strings,
    MARGIN = 1,
    FUN = function(row) {
      tmp <- vapply(
        gregexpr("\n", row, fixed = TRUE),
        attr, numeric(1),
        "match.length"
      ) + 1
      max(c(tmp, 1))
    }
  )

  i_header <- seq_len(nr_h)

  height_body_rows <- grid::unit(n_lines_per_row[-i_header] * 1.2, "lines")
  height_header_rows <- grid::unit(n_lines_per_row[i_header] * 1.2, "lines")

  height_body <- grid::unit(sum(n_lines_per_row[-i_header]) * 1.2, "lines")
  height_header <- grid::unit(sum(n_lines_per_row[i_header]) * 1.2, "lines")

  nc_g <- nc + 2 # number of columns incl. row names and forest

  vp_tbl <- grid::vpTree(
    parent = grid::viewport(
      name = "vp_table_layout",
      layout = grid::grid.layout(
        nrow = 3, ncol = 1,
        heights = grid::unit.c(height_header, gap_header, height_body)
      )
    ),
    children = grid::vpList(
      vp_forest_table_part(nr_h, nc_g, 1, 1, widths, height_header_rows, "vp_header"),
      vp_forest_table_part(nr, nc_g, 3, 1, widths, height_body_rows, "vp_body"),
      grid::viewport(name = "vp_spacer", layout.pos.row = 2, layout.pos.col = 1)
    )
  )
  vp_tbl
}

#' Viewport forest plot: table part
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Prepares a viewport for the table included in the forest plot.
#'
#' @noRd
#' @keywords internal
vp_forest_table_part <- function(nrow,
                                 ncol,
                                 l_row,
                                 l_col,
                                 widths,
                                 heights,
                                 name) {
  lifecycle::deprecate_warn(
    "0.9.4", "vp_forest_table_part()",
    details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`."
  )

  grid::vpTree(
    grid::viewport(
      name = name,
      layout.pos.row = l_row,
      layout.pos.col = l_col,
      layout = grid::grid.layout(nrow = nrow, ncol = ncol, widths = widths, heights = heights)
    ),
    children = grid::vpList(
      do.call(
        grid::vpList,
        lapply(
          seq_len(nrow), function(i) {
            grid::viewport(layout.pos.row = i, layout.pos.col = 1, name = paste0("rowname-", i))
          }
        )
      ),
      do.call(
        grid::vpList,
        apply(
          expand.grid(seq_len(nrow), seq_len(ncol - 2)),
          1,
          function(x) {
            i <- x[1]
            j <- x[2]
            grid::viewport(layout.pos.row = i, layout.pos.col = j + 1, name = paste0("cell-", i, "-", j))
          }
        )
      ),
      do.call(
        grid::vpList,
        lapply(
          seq_len(nrow),
          function(i) {
            grid::viewport(layout.pos.row = i, layout.pos.col = ncol, name = paste0("forest-", i))
          }
        )
      )
    )
  )
}

#' Forest rendering
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Renders the forest grob.
#'
#' @noRd
#' @keywords internal
grid.forest <- function(...) { # nolint
  lifecycle::deprecate_warn(
    "0.9.4", "grid.forest()",
    details = "`g_forest` now generates `ggplot` objects. This function is no longer used within `tern`."
  )

  grid::grid.draw(forest_grob(...))
}

#' Summarize analysis of covariance (ANCOVA) results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [summarize_ancova()] creates a layout element to summarize ANCOVA results.
#'
#' This function can be used to analyze multiple endpoints and/or multiple timepoints within the response variable(s)
#' specified as `vars`.
#'
#' Additional variables for the analysis, namely an arm (grouping) variable and covariate variables, can be defined
#' via the `variables` argument. See below for more details on how to specify `variables`. An interaction term can
#' be implemented in the model if needed. The interaction variable that should interact with the arm variable is
#' specified via the `interaction_term` parameter, and the specific value of `interaction_term` for which to extract
#' the ANCOVA results via the `interaction_y` parameter.
#'
#' @inheritParams h_ancova
#' @inheritParams argument_convention
#' @param interaction_y (`string` or `flag`)\cr a selected item inside of the `interaction_item` variable which will be
#'   used to select the specific ANCOVA results. if the interaction is not needed, the default option is `FALSE`.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("summarize_ancova"), type = "sh")``
#'
#' @name summarize_ancova
#' @order 1
NULL

#' Helper function to return results of a linear model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
#' @param variables (named `list` of `string`)\cr list of additional analysis variables, with expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
#'     summarized. Specifically, the first level of `arm` variable is taken as the reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as `"X1"`), and/or
#'     interaction terms indicated by `"X1 * X2"`.
#' @param interaction_item (`string` or `NULL`)\cr name of the variable that should have interactions
#'   with arm. if the interaction is not needed, the default option is `NULL`.
#' @param weights_emmeans (`string` or `NULL`)\cr argument from [emmeans::emmeans()]
#'
#' @return The summary of a linear model.
#'
#' @examples
#' h_ancova(
#'   .var = "Sepal.Length",
#'   .df_row = iris,
#'   variables = list(arm = "Species", covariates = c("Petal.Length * Petal.Width", "Sepal.Width"))
#' )
#'
#' @export
h_ancova <- function(.var,
                     .df_row,
                     variables,
                     interaction_item = NULL,
                     weights_emmeans = NULL) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_subset(names(variables), c("arm", "covariates"))
  assert_df_with_variables(.df_row, list(rsp = .var))

  arm <- variables$arm
  covariates <- variables$covariates
  if (!is.null(covariates) && length(covariates) > 0) {
    # Get all covariate variable names in the model.
    var_list <- get_covariates(covariates)
    assert_df_with_variables(.df_row, var_list)
  }

  covariates_part <- paste(covariates, collapse = " + ")
  if (covariates_part != "") {
    formula <- stats::as.formula(paste0(.var, " ~ ", covariates_part, " + ", arm))
  } else {
    formula <- stats::as.formula(paste0(.var, " ~ ", arm))
  }

  if (is.null(interaction_item)) {
    specs <- arm
  } else {
    specs <- c(arm, interaction_item)
  }

  lm_fit <- stats::lm(
    formula = formula,
    data = .df_row
  )
  emmeans_fit <- emmeans::emmeans(
    lm_fit,
    # Specify here the group variable over which EMM are desired.
    specs = specs,
    # Pass the data again so that the factor levels of the arm variable can be inferred.
    data = .df_row,
    weights = weights_emmeans
  )

  emmeans_fit
}

#' @describeIn summarize_ancova Statistics function that produces a named list of results
#'   of the investigated linear model.
#'
#' @return
#' * `s_ancova()` returns a named list of 5 statistics:
#'   * `n`: Count of complete sample size for the group.
#'   * `lsmean`: Estimated marginal means in the group.
#'   * `lsmean_diff`: Difference in estimated marginal means in comparison to the reference group.
#'     If working with the reference group, this will be empty.
#'   * `lsmean_diff_ci`: Confidence level for difference in estimated marginal means in comparison
#'     to the reference group.
#'   * `pval`: p-value (not adjusted for multiple comparisons).
#'
#' @keywords internal
s_ancova <- function(df,
                     .var,
                     .df_row,
                     .ref_group,
                     .in_ref_col,
                     variables,
                     conf_level,
                     interaction_y = FALSE,
                     interaction_item = NULL,
                     weights_emmeans = NULL,
                     ...) {
  emmeans_fit <- h_ancova(
    .var = .var,
    variables = variables,
    .df_row = .df_row,
    interaction_item = interaction_item,
    weights_emmeans = weights_emmeans
  )

  sum_fit <- summary(
    emmeans_fit,
    level = conf_level
  )

  arm <- variables$arm

  sum_level <- as.character(unique(df[[arm]]))

  # Ensure that there is only one element in sum_level.
  checkmate::assert_scalar(sum_level)

  sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ]

  # Get the index of the ref arm
  if (interaction_y != FALSE) {
    y <- unlist(df[(df[[interaction_item]] == interaction_y), .var])
    # convert characters selected in interaction_y into the numeric order
    interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y)
    sum_fit_level <- sum_fit_level[interaction_y, ]
    # if interaction is called, reset the index
    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
    ref_key <- tail(ref_key, n = 1)
    ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key
  } else {
    y <- df[[.var]]
    # Get the index of the ref arm when interaction is not called
    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
    ref_key <- tail(ref_key, n = 1)
  }

  if (.in_ref_col) {
    list(
      n = length(y[!is.na(y)]),
      lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
      lsmean_diff = formatters::with_label(numeric(), "Difference in Adjusted Means"),
      lsmean_diff_ci = formatters::with_label(numeric(), f_conf_level(conf_level)),
      pval = formatters::with_label(numeric(), "p-value")
    )
  } else {
    # Estimate the differences between the marginal means.
    emmeans_contrasts <- emmeans::contrast(
      emmeans_fit,
      # Compare all arms versus the control arm.
      method = "trt.vs.ctrl",
      # Take the arm factor from .ref_group as the control arm.
      ref = ref_key,
      level = conf_level
    )
    sum_contrasts <- summary(
      emmeans_contrasts,
      # Derive confidence intervals, t-tests and p-values.
      infer = TRUE,
      # Do not adjust the p-values for multiplicity.
      adjust = "none"
    )

    contrast_lvls <- gsub(
      "^\\(|\\)$", "", gsub(paste0(" - \\(*", .ref_group[[arm]][1], ".*"), "", sum_contrasts$contrast)
    )
    if (!is.null(interaction_item)) {
      sum_contrasts_level <- sum_contrasts[grepl(sum_level, contrast_lvls, fixed = TRUE), ]
    } else {
      sum_contrasts_level <- sum_contrasts[sum_level == contrast_lvls, ]
    }
    if (interaction_y != FALSE) {
      sum_contrasts_level <- sum_contrasts_level[interaction_y, ]
    }

    list(
      n = length(y[!is.na(y)]),
      lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
      lsmean_diff = formatters::with_label(sum_contrasts_level$estimate, "Difference in Adjusted Means"),
      lsmean_diff_ci = formatters::with_label(
        c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL),
        f_conf_level(conf_level)
      ),
      pval = formatters::with_label(sum_contrasts_level$p.value, "p-value")
    )
  }
}

#' @describeIn summarize_ancova Formatted analysis function which is used as `afun` in `summarize_ancova()`.
#'
#' @return
#' * `a_ancova()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_ancova <- function(df,
                     ...,
                     .stats = NULL,
                     .stat_names = NULL,
                     .formats = NULL,
                     .labels = NULL,
                     .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_ancova,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("summarize_ancova",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )
  x_stats <- x_stats[.stats]
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(
    .stats, .labels,
    tern_defaults = c(lapply(x_stats[names(x_stats) != "n"], attr, "label"), tern_default_labels)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn summarize_ancova Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_ancova()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_ancova()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("Species", ref_group = "setosa") %>%
#'   add_colcounts() %>%
#'   summarize_ancova(
#'     vars = "Petal.Length",
#'     variables = list(arm = "Species", covariates = NULL),
#'     table_names = "unadj",
#'     conf_level = 0.95, var_labels = "Unadjusted comparison",
#'     .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means")
#'   ) %>%
#'   summarize_ancova(
#'     vars = "Petal.Length",
#'     variables = list(arm = "Species", covariates = c("Sepal.Length", "Sepal.Width")),
#'     table_names = "adj",
#'     conf_level = 0.95, var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)"
#'   ) %>%
#'   build_table(iris)
#'
#' @export
#' @order 2
summarize_ancova <- function(lyt,
                             vars,
                             variables,
                             conf_level,
                             interaction_y = FALSE,
                             interaction_item = NULL,
                             weights_emmeans = NULL,
                             var_labels,
                             na_str = default_na_str(),
                             nested = TRUE,
                             ...,
                             show_labels = "visible",
                             table_names = vars,
                             .stats = c("n", "lsmean", "lsmean_diff", "lsmean_diff_ci", "pval"),
                             .stat_names = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = list("lsmean_diff_ci" = 1L, "pval" = 1L)) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    variables = list(variables), conf_level = list(conf_level), interaction_y = list(interaction_y),
    interaction_item = list(interaction_item),
    weights_emmeans = weights_emmeans,
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_ancova) <- c(formals(a_ancova), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = vars,
    afun = a_ancova,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Add titles, footnotes, page Number, and a bounding box to a grid grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function is useful to label grid grobs (also `ggplot2`, and `lattice` plots)
#' with title, footnote, and page numbers.
#'
#' @inheritParams grid::grob
#' @param grob (`grob`)\cr a grid grob object, optionally `NULL` if only a `grob` with the decoration should be shown.
#' @param titles (`character`)\cr titles given as a vector of strings that are each separated by a newline and wrapped
#'   according to the page width.
#' @param footnotes (`character`)\cr footnotes. Uses the same formatting rules as `titles`.
#' @param page (`string` or `NULL`)\cr page numeration. If `NULL` then no page number is displayed.
#' @param width_titles (`grid::unit`)\cr width of titles. Usually defined as all the available space
#'   `grid::unit(1, "npc")`, it is affected by the parameter `outer_margins`. Right margins (`outer_margins[4]`)
#'   need to be subtracted to the allowed width.
#' @param width_footnotes (`grid::unit`)\cr width of footnotes. Same default and margin correction as `width_titles`.
#' @param border (`flag`)\cr whether a border should be drawn around the plot or not.
#' @param padding (`grid::unit`)\cr padding. A unit object of length 4. Innermost margin between the plot (`grob`)
#'   and, possibly, the border of the plot. Usually expressed in 4 identical values (usually `"lines"`). It defaults
#'   to `grid::unit(rep(1, 4), "lines")`.
#' @param margins (`grid::unit`)\cr margins. A unit object of length 4. Margins between the plot and the other
#'   elements in the list (e.g. titles, plot, and footers). This is usually expressed in 4 `"lines"`, where the
#'   lateral ones are 0s, while top and bottom are 1s. It defaults to `grid::unit(c(1, 0, 1, 0), "lines")`.
#' @param outer_margins (`grid::unit`)\cr outer margins. A unit object of length 4. It defines the general margin of
#'   the plot, considering also decorations like titles, footnotes, and page numbers. It defaults to
#'   `grid::unit(c(2, 1.5, 3, 1.5), "cm")`.
#' @param gp_titles (`gpar`)\cr a `gpar` object. Mainly used to set different `"fontsize"`.
#' @param gp_footnotes (`gpar`)\cr a `gpar` object. Mainly used to set different `"fontsize"`.
#'
#' @return A grid grob (`gTree`).
#'
#' @details The titles and footnotes will be ragged, i.e. each title will be wrapped individually.
#'
#' @examples
#' library(grid)
#'
#' titles <- c(
#'   "Edgar Anderson's Iris Data",
#'   paste(
#'     "This famous (Fisher's or Anderson's) iris data set gives the measurements",
#'     "in centimeters of the variables sepal length and width and petal length",
#'     "and width, respectively, for 50 flowers from each of 3 species of iris."
#'   )
#' )
#'
#' footnotes <- c(
#'   "The species are Iris setosa, versicolor, and virginica.",
#'   paste(
#'     "iris is a data frame with 150 cases (rows) and 5 variables (columns) named",
#'     "Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, and Species."
#'   )
#' )
#'
#' ## empty plot
#' grid.newpage()
#'
#' grid.draw(
#'   decorate_grob(
#'     NULL,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 4 of 10"
#'   )
#' )
#'
#' # grid
#' p <- gTree(
#'   children = gList(
#'     rectGrob(),
#'     xaxisGrob(),
#'     yaxisGrob(),
#'     textGrob("Sepal.Length", y = unit(-4, "lines")),
#'     textGrob("Petal.Length", x = unit(-3.5, "lines"), rot = 90),
#'     pointsGrob(iris$Sepal.Length, iris$Petal.Length, gp = gpar(col = iris$Species), pch = 16)
#'   ),
#'   vp = vpStack(plotViewport(), dataViewport(xData = iris$Sepal.Length, yData = iris$Petal.Length))
#' )
#' grid.newpage()
#' grid.draw(p)
#'
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' ## with ggplot2
#' library(ggplot2)
#'
#' p_gg <- ggplot2::ggplot(iris, aes(Sepal.Length, Sepal.Width, col = Species)) +
#'   ggplot2::geom_point()
#' p_gg
#' p <- ggplotGrob(p_gg)
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' ## with lattice
#' library(lattice)
#'
#' xyplot(Sepal.Length ~ Petal.Length, data = iris, col = iris$Species)
#' p <- grid.grab()
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' # with gridExtra - no borders
#' library(gridExtra)
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     tableGrob(
#'       head(mtcars)
#'     ),
#'     titles = "title",
#'     footnotes = "footnote",
#'     border = FALSE
#'   )
#' )
#'
#' @export
decorate_grob <- function(grob,
                          titles,
                          footnotes,
                          page = "",
                          width_titles = grid::unit(1, "npc"),
                          width_footnotes = grid::unit(1, "npc"),
                          border = TRUE,
                          padding = grid::unit(rep(1, 4), "lines"),
                          margins = grid::unit(c(1, 0, 1, 0), "lines"),
                          outer_margins = grid::unit(c(2, 1.5, 3, 1.5), "cm"),
                          gp_titles = grid::gpar(),
                          gp_footnotes = grid::gpar(fontsize = 8),
                          name = NULL,
                          gp = grid::gpar(),
                          vp = NULL) {
  # External margins need to be taken into account when defining the width of titles and footers
  # because the text is split in advance depending on only the width of the viewport.
  if (any(as.numeric(outer_margins) > 0)) {
    width_titles <- width_titles - outer_margins[4]
    width_footnotes <- width_footnotes - outer_margins[4]
  }

  st_titles <- split_text_grob(
    titles,
    x = 0, y = 1,
    just = c("left", "top"),
    width = width_titles,
    vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 1),
    gp = gp_titles
  )

  st_footnotes <- split_text_grob(
    footnotes,
    x = 0, y = 1,
    just = c("left", "top"),
    width = width_footnotes,
    vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
    gp = gp_footnotes
  )

  pg_footnote <- grid::textGrob(
    paste("\n", page),
    x = 1, y = 0,
    just = c("right", "bottom"),
    vp = grid::viewport(layout.pos.row = 4, layout.pos.col = 1),
    gp = gp_footnotes
  )

  # Initial decoration of the grob -> border, paddings, and margins are used here
  main_plot <- grid::gTree(
    children = grid::gList(
      if (border) grid::rectGrob(),
      grid::gTree(
        children = grid::gList(
          grob
        ),
        vp = grid::plotViewport(margins = padding) # innermost margins of the grob plot
      )
    ),
    vp = grid::vpStack(
      grid::viewport(layout.pos.row = 2, layout.pos.col = 1),
      grid::plotViewport(margins = margins) # margins around the border plot
    )
  )

  grid::gTree(
    grob = grob,
    titles = titles,
    footnotes = footnotes,
    page = page,
    width_titles = width_titles,
    width_footnotes = width_footnotes,
    outer_margins = outer_margins,
    gp_titles = gp_titles,
    gp_footnotes = gp_footnotes,
    children = grid::gList(
      grid::gTree(
        children = grid::gList(
          st_titles,
          main_plot, # main plot with border, padding, and margins
          st_footnotes,
          pg_footnote
        ),
        childrenvp = NULL,
        name = "titles_grob_footnotes",
        vp = grid::vpStack(
          grid::plotViewport(margins = outer_margins), # Main external margins
          grid::viewport(
            layout = grid::grid.layout(
              nrow = 4, ncol = 1,
              heights = grid::unit.c(
                grid::grobHeight(st_titles),
                grid::unit(1, "null"),
                grid::grobHeight(st_footnotes),
                grid::grobHeight(pg_footnote)
              )
            )
          )
        )
      )
    ),
    name = name,
    gp = gp,
    vp = vp,
    cl = "decoratedGrob"
  )
}

# nocov start
#' @importFrom grid validDetails
#' @noRd
validDetails.decoratedGrob <- function(x) {
  checkmate::assert_character(x$titles)
  checkmate::assert_character(x$footnotes)

  if (!is.null(x$grob)) {
    checkmate::assert_true(grid::is.grob(x$grob))
  }
  if (length(x$page) == 1) {
    checkmate::assert_character(x$page)
  }
  if (!grid::is.unit(x$outer_margins)) {
    checkmate::assert_vector(x$outer_margins, len = 4)
  }
  if (!grid::is.unit(x$margins)) {
    checkmate::assert_vector(x$margins, len = 4)
  }
  if (!grid::is.unit(x$padding)) {
    checkmate::assert_vector(x$padding, len = 4)
  }

  x
}

#' @importFrom grid widthDetails
#' @noRd
widthDetails.decoratedGrob <- function(x) {
  grid::unit(1, "null")
}

#' @importFrom grid heightDetails
#' @noRd
heightDetails.decoratedGrob <- function(x) {
  grid::unit(1, "null")
}

#' Split text according to available text width
#'
#' Dynamically wrap text.
#'
#' @inheritParams grid::grid.text
#' @param text (`string`)\cr the text to wrap.
#' @param width (`grid::unit`)\cr a unit object specifying maximum width of text.
#'
#' @return A text `grob`.
#'
#' @details This code is taken from `R Graphics by Paul Murell, 2nd edition`
#'
#' @keywords internal
split_text_grob <- function(text,
                            x = grid::unit(0.5, "npc"),
                            y = grid::unit(0.5, "npc"),
                            width = grid::unit(1, "npc"),
                            just = "centre",
                            hjust = NULL,
                            vjust = NULL,
                            default.units = "npc", # nolint
                            name = NULL,
                            gp = grid::gpar(),
                            vp = NULL) {
  text <- gsub("\\\\n", "\n", text) # fixing cases of mixed behavior (\n and \\n)

  if (!grid::is.unit(x)) x <- grid::unit(x, default.units)
  if (!grid::is.unit(y)) y <- grid::unit(y, default.units)
  if (!grid::is.unit(width)) width <- grid::unit(width, default.units)
  if (grid::unitType(x) %in% c("sum", "min", "max")) x <- grid::convertUnit(x, default.units)
  if (grid::unitType(y) %in% c("sum", "min", "max")) y <- grid::convertUnit(y, default.units)
  if (grid::unitType(width) %in% c("sum", "min", "max")) width <- grid::convertUnit(width, default.units)

  if (length(gp) > 0) { # account for effect of gp on text width -> it was bugging when text was empty
    horizontal_npc_width_no_gp <- grid::convertWidth(
      grid::grobWidth(
        grid::textGrob(
          paste0(text, collapse = "\n")
        )
      ), "npc",
      valueOnly = TRUE
    )
    horizontal_npc_width_with_gp <- grid::convertWidth(grid::grobWidth(
      grid::textGrob(
        paste0(text, collapse = "\n"),
        gp = gp
      )
    ), "npc", valueOnly = TRUE)

    # Adapting width to the input gpar (it is normalized so does not matter what is text)
    width <- width * horizontal_npc_width_no_gp / horizontal_npc_width_with_gp
  }

  ## if it is a fixed unit then we do not need to recalculate when viewport resized
  if (!inherits(width, "unit.arithmetic") && !is.null(attr(width, "unit")) &&
    attr(width, "unit") %in% c("cm", "inches", "mm", "points", "picas", "bigpts", "dida", "cicero", "scaledpts")) { # nolint
    attr(text, "fixed_text") <- paste(vapply(text, split_string, character(1), width = width), collapse = "\n")
  }

  # Fix for split_string in case of residual \n (otherwise is counted as character)
  text2 <- unlist(
    strsplit(
      paste0(text, collapse = "\n"), # for "" cases
      "\n"
    )
  )

  # Final grid text with cat-friendly split_string
  grid::grid.text(
    label = split_string(text2, width),
    x = x, y = y,
    just = just,
    hjust = hjust,
    vjust = vjust,
    rot = 0,
    check.overlap = FALSE,
    name = name,
    gp = gp,
    vp = vp,
    draw = FALSE
  )
}

#' @importFrom grid validDetails
#' @noRd
validDetails.dynamicSplitText <- function(x) {
  checkmate::assert_character(x$text)
  checkmate::assert_true(grid::is.unit(x$width))
  checkmate::assert_vector(x$width, len = 1)
  x
}

#' @importFrom grid heightDetails
#' @noRd
heightDetails.dynamicSplitText <- function(x) {
  txt <- if (!is.null(attr(x$text, "fixed_text"))) {
    attr(x$text, "fixed_text")
  } else {
    paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
  }
  grid::stringHeight(txt)
}

#' @importFrom grid widthDetails
#' @noRd
widthDetails.dynamicSplitText <- function(x) {
  x$width
}

#' @importFrom grid drawDetails
#' @noRd
drawDetails.dynamicSplitText <- function(x, recording) {
  txt <- if (!is.null(attr(x$text, "fixed_text"))) {
    attr(x$text, "fixed_text")
  } else {
    paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
  }

  x$width <- NULL
  x$label <- txt
  x$text <- NULL
  class(x) <- c("text", class(x)[-1])

  grid::grid.draw(x)
}
# nocov end

# Adapted from Paul Murell R Graphics 2nd Edition
# https://www.stat.auckland.ac.nz/~paul/RG2e/interactgrid-splittext.R
split_string <- function(text, width) {
  strings <- strsplit(text, " ")
  out_string <- NA
  for (string_i in seq_along(strings)) {
    newline_str <- strings[[string_i]]
    if (length(newline_str) == 0) newline_str <- ""
    if (is.na(out_string[string_i])) {
      out_string[string_i] <- newline_str[[1]][[1]]
      linewidth <- grid::stringWidth(out_string[string_i])
    }
    gapwidth <- grid::stringWidth(" ")
    availwidth <- as.numeric(width)
    if (length(newline_str) > 1) {
      for (i in seq(2, length(newline_str))) {
        width_i <- grid::stringWidth(newline_str[i])
        # Main conversion of allowed text width -> npc units are 0<npc<1. External viewport is used for conversion
        if (grid::convertWidth(linewidth + gapwidth + width_i, grid::unitType(width), valueOnly = TRUE) < availwidth) {
          sep <- " "
          linewidth <- linewidth + gapwidth + width_i
        } else {
          sep <- "\n"
          linewidth <- width_i
        }
        out_string[string_i] <- paste(out_string[string_i], newline_str[i], sep = sep)
      }
    }
  }
  paste(out_string, collapse = "\n")
}

#' Update page number
#'
#' Automatically updates page number.
#'
#' @param npages (`numeric(1)`)\cr total number of pages.
#' @param ... arguments passed on to [decorate_grob()].
#'
#' @return Closure that increments the page number.
#'
#' @keywords internal
decorate_grob_factory <- function(npages, ...) {
  current_page <- 0
  function(grob) {
    current_page <<- current_page + 1
    if (current_page > npages) {
      stop(paste("current page is", current_page, "but max.", npages, "specified."))
    }
    decorate_grob(grob = grob, page = paste("Page", current_page, "of", npages), ...)
  }
}

#' Decorate set of `grob`s and add page numbering
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Note that this uses the [decorate_grob_factory()] function.
#'
#' @param grobs (`list` of `grob`)\cr a list of grid grobs.
#' @param ... arguments passed on to [decorate_grob()].
#'
#' @return A decorated grob.
#'
#' @examples
#' library(ggplot2)
#' library(grid)
#' g <- with(data = iris, {
#'   list(
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Sepal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Length, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Length, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Petal.Length, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     )
#'   )
#' })
#' lg <- decorate_grob_set(grobs = g, titles = "Hello\nOne\nTwo\nThree", footnotes = "")
#'
#' draw_grob(lg[[1]])
#' draw_grob(lg[[2]])
#' draw_grob(lg[[6]])
#'
#' @export
decorate_grob_set <- function(grobs, ...) {
  n <- length(grobs)
  lgf <- decorate_grob_factory(npages = n, ...)
  lapply(grobs, lgf)
}

#' Custom split functions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Collection of useful functions that are expanding on the core list of functions
#' provided by `rtables`. See [rtables::custom_split_funs] and [rtables::make_split_fun()]
#' for more information on how to make a custom split function. All these functions
#' work with [rtables::split_rows_by()] argument `split_fun` to modify the way the split
#' happens. For other split functions, consider consulting [`rtables::split_funcs`].
#'
#' @seealso [rtables::make_split_fun()]
#'
#' @name utils_split_funs
NULL

#' @describeIn utils_split_funs Split function to place reference group facet at a specific position
#'  during post-processing stage.
#'
#' @param position (`string` or `integer`)\cr position to use for the reference group facet. Can be `"first"`,
#'   `"last"`, or a specific position.
#'
#' @return
#' * `ref_group_position()` returns an utility function that puts the reference group
#'   as first, last or at a certain position and needs to be assigned to `split_fun`.
#'
#' @examples
#' library(dplyr)
#'
#' dat <- data.frame(
#'   x = factor(letters[1:5], levels = letters[5:1]),
#'   y = 1:5
#' )
#'
#' # With rtables layout functions
#' basic_table() %>%
#'   split_cols_by("x", ref_group = "c", split_fun = ref_group_position("last")) %>%
#'   analyze("y") %>%
#'   build_table(dat)
#'
#' # With tern layout funcitons
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) %>%
#'   add_colcounts() %>%
#'   surv_time(
#'     vars = "AVAL",
#'     var_labels = "Survival Time (Months)",
#'     is_event = "is_event",
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) %>%
#'   add_colcounts() %>%
#'   surv_time(
#'     vars = "AVAL",
#'     var_labels = "Survival Time (Months)",
#'     is_event = "is_event",
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
ref_group_position <- function(position = "first") {
  make_split_fun(
    post = list(
      function(splret, spl, fulldf) {
        if (!"ref_group_value" %in% methods::slotNames(spl)) {
          stop("Reference group is undefined.")
        }

        spl_var <- rtables:::spl_payload(spl)
        fulldf[[spl_var]] <- factor(fulldf[[spl_var]])
        init_lvls <- levels(fulldf[[spl_var]])

        if (!all(names(splret$values) %in% init_lvls)) {
          stop("This split function does not work with combination facets.")
        }

        ref_group_pos <- which(init_lvls == rtables:::spl_ref_group(spl))
        pos_choices <- c("first", "last")
        if (checkmate::test_choice(position, pos_choices) && position == "first") {
          pos <- 0
        } else if (checkmate::test_choice(position, pos_choices) && position == "last") {
          pos <- length(init_lvls)
        } else if (checkmate::test_int(position, lower = 1, upper = length(init_lvls))) {
          pos <- position - 1
        } else {
          stop("Wrong input for ref group position. It must be 'first', 'last', or a integer.")
        }

        reord_lvls <- append(init_lvls[-ref_group_pos], init_lvls[ref_group_pos], after = pos)
        ord <- match(reord_lvls, names(splret$values))

        make_split_result(
          splret$values[ord],
          splret$datasplit[ord],
          splret$labels[ord]
        )
      }
    )
  )
}

#' @describeIn utils_split_funs Split function to change level order based on an `integer`
#'   vector or a `character` vector that represent the split variable's factor levels.
#'
#' @param order (`character` or `numeric`)\cr vector of ordering indices for the split facets.
#'
#' @return
#' * `level_order()` returns an utility function that changes the original levels' order,
#'   depending on input `order` and split levels.
#'
#' @examples
#' # level_order --------
#' # Even if default would bring ref_group first, the original order puts it last
#' basic_table() %>%
#'   split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) %>%
#'   analyze("Sepal.Length") %>%
#'   build_table(iris)
#'
#' # character vector
#' new_order <- level_order(levels(iris$Species)[c(1, 3, 2)])
#' basic_table() %>%
#'   split_cols_by("Species", ref_group = "virginica", split_fun = new_order) %>%
#'   analyze("Sepal.Length") %>%
#'   build_table(iris)
#'
#' @export
level_order <- function(order) {
  make_split_fun(
    post = list(
      function(splret, spl, fulldf) {
        if (checkmate::test_integerish(order)) {
          checkmate::assert_integerish(order, lower = 1, upper = length(splret$values))
          ord <- order
        } else {
          checkmate::assert_character(order, len = length(splret$values))
          checkmate::assert_set_equal(order, names(splret$values), ordered = FALSE)
          ord <- match(order, names(splret$values))
        }
        make_split_result(
          splret$values[ord],
          splret$datasplit[ord],
          splret$labels[ord]
        )
      }
    )
  )
}

#' Summarize variables in columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [summarize_colvars()] uses the statistics function [s_summary()] to analyze variables that are
#' arranged in columns. The variables to analyze should be specified in the table layout via column splits (see
#' [rtables::split_cols_by()] and [rtables::split_cols_by_multivar()]) prior to using [summarize_colvars()].
#'
#' The function is a minimal wrapper for [rtables::analyze_colvars()], a function typically used to apply different
#' analysis methods in rows for each column variable. To use the analysis methods as column labels, please refer to
#' the [analyze_vars_in_cols()] function.
#'
#' @inheritParams argument_convention
#' @param ... arguments passed to [s_summary()].
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
#' in columns, and add it to the table layout.
#'
#' @seealso [rtables::split_cols_by_multivar()] and [`analyze_colvars_functions`].
#'
#' @examples
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT = rep(paste0("V", 1:3), 6),
#'   ARM = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL = c(9:1, rep(NA, 9)),
#'   CHG = c(1:9, rep(NA, 9))
#' )
#'
#' ## Default output within a `rtables` pipeline.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars() %>%
#'   build_table(dta_test)
#'
#' ## Selection of statistics, formats and labels also work.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars(
#'     .stats = c("n", "mean_sd"),
#'     .formats = c("mean_sd" = "xx.x, xx.x"),
#'     .labels = c(n = "n", mean_sd = "Mean, SD")
#'   ) %>%
#'   build_table(dta_test)
#'
#' ## Use arguments interpreted by `s_summary`.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars(na.rm = FALSE) %>%
#'   build_table(dta_test)
#'
#' @export
summarize_colvars <- function(lyt,
                              na_str = default_na_str(),
                              ...,
                              .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                              .stat_names = NULL,
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_summary) <- c(formals(a_summary), extra_args[[".additional_fun_parameters"]])

  analyze_colvars(
    lyt,
    afun = a_summary,
    na_str = na_str,
    extra_args = extra_args
  )
}

#' Count patient events in columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The summarize function [summarize_patients_events_in_cols()] creates a layout element to summarize patient
#' event counts in columns.
#'
#' This function analyzes the elements (events) supplied via the `filters_list` parameter and returns a row
#' with counts of number of patients for each event as well as the total numbers of patients and events.
#' The `id` variable is used to indicate unique subject identifiers (defaults to `USUBJID`).
#'
#' If there are multiple occurrences of the same event recorded for a patient, the event is only counted once.
#'
#' @inheritParams argument_convention
#' @param filters_list (named `list` of `character`)\cr list where each element in this list describes one
#'   type of event describe by filters, in the same format as [s_count_patients_with_event()].
#'   If it has a label, then this will be used for the column title.
#' @param empty_stats (`character`)\cr optional names of the statistics that should be returned empty such
#'   that corresponding table cells will stay blank.
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will
#'   be used as label.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   In addition to any statistics added using `filters_list`, statistic options are:
#'   ``r shQuote(get_stats("summarize_patients_events_in_cols"), type = "sh")``
#'
#' @name count_patients_events_in_cols
#' @order 1
NULL

#' @describeIn count_patients_events_in_cols Statistics function which counts numbers of patients and multiple
#'   events defined by filters. Used as analysis function `afun` in `summarize_patients_events_in_cols()`.
#'
#' @return
#' * `s_count_patients_and_multiple_events()` returns a list with the statistics:
#'   - `unique`: number of unique patients in `df`.
#'   - `all`: number of rows in `df`.
#'   - one element with the same name as in `filters_list`: number of rows in `df`,
#'     i.e. events, fulfilling the filter condition.
#'
#' @keywords internal
s_count_patients_and_multiple_events <- function(df,
                                                 id,
                                                 filters_list,
                                                 empty_stats = character(),
                                                 labelstr = "",
                                                 custom_label = NULL) {
  checkmate::assert_list(filters_list, names = "named")
  checkmate::assert_data_frame(df)
  checkmate::assert_string(id)
  checkmate::assert_disjunct(c("unique", "all"), names(filters_list))
  checkmate::assert_character(empty_stats)
  checkmate::assert_string(labelstr)
  checkmate::assert_string(custom_label, null.ok = TRUE)

  # Below we want to count each row in `df` once, therefore introducing this helper index column.
  df$.row_index <- as.character(seq_len(nrow(df)))
  y <- list()
  row_label <- if (labelstr != "") {
    labelstr
  } else if (!is.null(custom_label)) {
    custom_label
  } else {
    "counts"
  }
  y$unique <- formatters::with_label(
    s_num_patients_content(df = df, .N_col = 1, .var = id, required = NULL)$unique[1L],
    row_label
  )
  y$all <- formatters::with_label(
    nrow(df),
    row_label
  )
  events <- Map(
    function(filters) {
      formatters::with_label(
        s_count_patients_with_event(df = df, .var = ".row_index", filters = filters, .N_col = 1, .N_row = 1)$count,
        row_label
      )
    },
    filters = filters_list
  )
  y_complete <- c(y, events)
  y <- if (length(empty_stats) > 0) {
    y_reduced <- y_complete
    for (stat in intersect(names(y_complete), empty_stats)) {
      y_reduced[[stat]] <- formatters::with_label(character(), obj_label(y_reduced[[stat]]))
    }
    y_reduced
  } else {
    y_complete
  }
  y
}

#' @describeIn count_patients_events_in_cols Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @param col_split (`flag`)\cr whether the columns should be split.
#'   Set to `FALSE` when the required column split has been done already earlier in the layout pipe.
#'
#' @return
#' * `summarize_patients_events_in_cols()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_patients_and_multiple_events()` to the table layout.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = rep(c("id1", "id2", "id3", "id4"), c(2, 3, 1, 1)),
#'   ARM = c("A", "A", "B", "B", "B", "B", "A"),
#'   AESER = rep("Y", 7),
#'   AESDTH = c("Y", "Y", "N", "Y", "Y", "N", "N"),
#'   AEREL = c("Y", "Y", "N", "Y", "Y", "N", "Y"),
#'   AEDECOD = c("A", "A", "A", "B", "B", "C", "D"),
#'   AEBODSYS = rep(c("SOC1", "SOC2", "SOC3"), c(3, 3, 1))
#' )
#'
#' # `summarize_patients_events_in_cols()`
#' basic_table() %>%
#'   summarize_patients_events_in_cols(
#'     filters_list = list(
#'       related = formatters::with_label(c(AEREL = "Y"), "Events (Related)"),
#'       fatal = c(AESDTH = "Y"),
#'       fatal_related = c(AEREL = "Y", AESDTH = "Y")
#'     ),
#'     custom_label = "%s Total number of patients and events"
#'   ) %>%
#'   build_table(df)
#'
#' @export
#' @order 2
summarize_patients_events_in_cols <- function(lyt,
                                              id = "USUBJID",
                                              filters_list = list(),
                                              empty_stats = character(),
                                              na_str = default_na_str(),
                                              ...,
                                              .stats = c(
                                                "unique",
                                                "all",
                                                names(filters_list)
                                              ),
                                              .labels = c(
                                                unique = "Patients (All)",
                                                all = "Events (All)",
                                                labels_or_names(filters_list)
                                              ),
                                              col_split = TRUE) {
  extra_args <- list(id = id, filters_list = filters_list, empty_stats = empty_stats, ...)

  afun_list <- Map(
    function(stat) {
      make_afun(
        s_count_patients_and_multiple_events,
        .stats = stat,
        .formats = "xx."
      )
    },
    stat = .stats
  )
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(id, length(.stats)),
      varlabels = .labels[.stats]
    )
  }
  summarize_row_groups(
    lyt = lyt,
    cfun = afun_list,
    na_str = na_str,
    extra_args = extra_args
  )
}

#' Analyze a pairwise Cox-PH model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [coxph_pairwise()] creates a layout element to analyze a pairwise Cox-PH model.
#'
#' This function can return statistics including p-value, hazard ratio (HR), and HR confidence intervals from both
#' stratified and unstratified Cox-PH models. The variable(s) to be analyzed is specified via the `vars` argument and
#' any stratification factors via the `strata` argument.
#'
#' @inheritParams argument_convention
#' @inheritParams s_surv_time
#' @param strata (`character` or `NULL`)\cr variable names indicating stratification factors.
#' @param strat `r lifecycle::badge("deprecated")` Please use the `strata` argument instead.
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_coxph()]. Some possible parameter options are:
#'   * `pval_method` (`string`)\cr p-value method for testing the null hypothesis that hazard ratio = 1. Default
#'     method is `"log-rank"` which comes from [survival::survdiff()], can also be set to `"wald"` or `"likelihood"`
#'     (from [survival::coxph()]).
#'   * `ties` (`string`)\cr specifying the method for tie handling. Default is `"efron"`,
#'     can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()].
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("coxph_pairwise"), type = "sh")``
#'
#' @name survival_coxph_pairwise
#' @order 1
NULL

#' @describeIn survival_coxph_pairwise Statistics function which analyzes HR, CIs of HR, and p-value of a Cox-PH model.
#'
#' @return
#' * `s_coxph_pairwise()` returns the statistics:
#'   * `pvalue`: p-value to test the null hypothesis that hazard ratio = 1.
#'   * `hr`: Hazard ratio.
#'   * `hr_ci`: Confidence interval for hazard ratio.
#'   * `n_tot`: Total number of observations.
#'   * `n_tot_events`: Total number of events.
#'
#' @keywords internal
s_coxph_pairwise <- function(df,
                             .ref_group,
                             .in_ref_col,
                             .var,
                             is_event,
                             strata = NULL,
                             strat = lifecycle::deprecated(),
                             control = control_coxph(),
                             ...) {
  if (lifecycle::is_present(strat)) {
    lifecycle::deprecate_warn("0.9.4", "s_coxph_pairwise(strat)", "s_coxph_pairwise(strata)")
    strata <- strat
  }

  checkmate::assert_string(.var)
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[is_event]])
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  pval_method <- control$pval_method
  ties <- control$ties
  conf_level <- control$conf_level

  if (.in_ref_col) {
    return(
      list(
        pvalue = formatters::with_label(numeric(), paste0("p-value (", pval_method, ")")),
        hr = formatters::with_label(numeric(), "Hazard Ratio"),
        hr_ci = formatters::with_label(numeric(), f_conf_level(conf_level)),
        hr_ci_3d = formatters::with_label(numeric(), paste0("Hazard Ratio (", f_conf_level(conf_level), ")")),
        n_tot = formatters::with_label(numeric(), "Total n"),
        n_tot_events = formatters::with_label(numeric(), "Total events")
      )
    )
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))

  df_cox <- data.frame(
    tte = data[[.var]],
    is_event = data[[is_event]],
    arm = group
  )
  if (is.null(strata)) {
    formula_cox <- survival::Surv(tte, is_event) ~ arm
  } else {
    formula_cox <- stats::as.formula(
      paste0(
        "survival::Surv(tte, is_event) ~ arm + strata(",
        paste(strata, collapse = ","),
        ")"
      )
    )
    df_cox <- cbind(df_cox, data[strata])
  }
  cox_fit <- survival::coxph(
    formula = formula_cox,
    data = df_cox,
    ties = ties
  )
  sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE)
  orginal_survdiff <- survival::survdiff(
    formula_cox,
    data = df_cox
  )
  log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1)

  pval <- switch(pval_method,
    "wald" = sum_cox$waldtest["pvalue"],
    "log-rank" = log_rank_pvalue, # pvalue from original log-rank test survival::survdiff()
    "likelihood" = sum_cox$logtest["pvalue"]
  )
  list(
    pvalue = formatters::with_label(unname(pval), paste0("p-value (", pval_method, ")")),
    hr = formatters::with_label(sum_cox$conf.int[1, 1], "Hazard Ratio"),
    hr_ci = formatters::with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)),
    hr_ci_3d = formatters::with_label(
      c(sum_cox$conf.int[1, 1], unname(sum_cox$conf.int[1, 3:4])),
      paste0("Hazard Ratio (", f_conf_level(conf_level), ")")
    ),
    n_tot = formatters::with_label(sum_cox$n, "Total n"),
    n_tot_events = formatters::with_label(sum_cox$nevent, "Total events")
  )
}

#' @describeIn survival_coxph_pairwise Formatted analysis function which is used as `afun` in `coxph_pairwise()`.
#'
#' @return
#' * `a_coxph_pairwise()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_coxph_pairwise <- function(df,
                             ...,
                             .stats = NULL,
                             .stat_names = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_coxph_pairwise,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("coxph_pairwise",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )
  x_stats <- x_stats[.stats]
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(
    .stats, .labels,
    tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn survival_coxph_pairwise Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `coxph_pairwise()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_coxph_pairwise()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#'
#' df <- adtte_f %>% filter(ARMCD == "ARM A")
#' df_ref_group <- adtte_f %>% filter(ARMCD == "ARM B")
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   coxph_pairwise(
#'     vars = "AVAL",
#'     is_event = "is_event",
#'     var_labels = "Unstratified Analysis"
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   coxph_pairwise(
#'     vars = "AVAL",
#'     is_event = "is_event",
#'     var_labels = "Stratified Analysis",
#'     strata = "SEX",
#'     control = control_coxph(pval_method = "wald")
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
#' @order 2
coxph_pairwise <- function(lyt,
                           vars,
                           strata = NULL,
                           control = control_coxph(),
                           na_str = default_na_str(),
                           nested = TRUE,
                           ...,
                           var_labels = "CoxPH",
                           show_labels = "visible",
                           table_names = vars,
                           .stats = c("pvalue", "hr", "hr_ci"),
                           .stat_names = NULL,
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    strata = list(strata), control = list(control),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_coxph_pairwise) <- c(formals(a_coxph_pairwise), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = vars,
    afun = a_coxph_pairwise,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Factor utilities
#'
#' @description `r lifecycle::badge("stable")`
#'
#' A collection of utility functions for factors.
#'
#' @param x (`factor`)\cr factor variable or object to convert (for `as_factor_keep_attributes`).
#'
#' @seealso [cut_quantile_bins()] for splitting numeric vectors into quantile bins.
#'
#' @name factor_utils
NULL

#' @describeIn factor_utils Combine specified old factor Levels in a single new level.
#'
#' @param levels (`character`)\cr level names to be combined.
#' @param new_level (`string`)\cr name of new level.
#'
#' @return
#' * `combine_levels`: A `factor` with the new levels.
#'
#' @examples
#' x <- factor(letters[1:5], levels = letters[5:1])
#' combine_levels(x, levels = c("a", "b"))
#'
#' combine_levels(x, c("e", "b"))
#'
#' @export
combine_levels <- function(x, levels, new_level = paste(levels, collapse = "/")) {
  checkmate::assert_factor(x)
  checkmate::assert_subset(levels, levels(x))

  lvls <- levels(x)

  lvls[lvls %in% levels] <- new_level

  levels(x) <- lvls

  x
}

#' Conversion of a vector to a factor
#'
#' @describeIn factor_utils Converts `x` to a factor and keeps its attributes. Warns appropriately such that the user
#' can decide whether they prefer converting to factor manually (e.g. for full control of
#' factor levels).
#'
#' @param x_name (`string`)\cr name of `x`.
#' @param na_level (`string`)\cr the explicit missing level which should be used when converting a character vector.
#' @param verbose (`flag`)\cr defaults to `TRUE`. It prints out warnings and messages.
#'
#' @return
#' * `as_factor_keep_attributes`: A `factor` with same attributes (except class) as `x`.
#'   Does not modify `x` if already a `factor`.
#'
#' @examples
#' a_chr_with_labels <- c("a", "b", NA)
#' attr(a_chr_with_labels, "label") <- "A character vector with labels"
#' as_factor_keep_attributes(a_chr_with_labels)
#'
#' @export
as_factor_keep_attributes <- function(x,
                                      x_name = deparse(substitute(x)),
                                      na_level = "<Missing>",
                                      verbose = TRUE) {
  checkmate::assert_atomic(x)
  checkmate::assert_string(x_name)
  checkmate::assert_string(na_level)
  checkmate::assert_flag(verbose)
  if (is.factor(x)) {
    return(x)
  }
  x_class <- class(x)[1]
  if (verbose) {
    warning(paste(
      "automatically converting", x_class, "variable", x_name,
      "to factor, better manually convert to factor to avoid failures"
    ))
  }
  if (identical(length(x), 0L)) {
    warning(paste(
      x_name, "has length 0, this can lead to tabulation failures, better convert to factor"
    ))
  }
  if (is.character(x)) {
    x_no_na <- explicit_na(sas_na(x), label = na_level)
    if (any(na_level %in% x_no_na)) {
      do.call(
        structure,
        c(
          list(.Data = forcats::fct_relevel(x_no_na, na_level, after = Inf)),
          attributes(x)
        )
      )
    } else {
      do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
    }
  } else {
    do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
  }
}

#' Labels for bins in percent
#'
#' This creates labels for quantile based bins in percent. This assumes the right-closed
#' intervals as produced by [cut_quantile_bins()].
#'
#' @param probs (`numeric`)\cr the probabilities identifying the quantiles.
#'   This is a sorted vector of unique `proportion` values, i.e. between 0 and 1, where
#'   the boundaries 0 and 1 must not be included.
#' @param digits (`integer(1)`)\cr number of decimal places to round the percent numbers.
#'
#' @return A `character` vector with labels in the format `[0%,20%]`, `(20%,50%]`, etc.
#'
#' @keywords internal
bins_percent_labels <- function(probs,
                                digits = 0) {
  if (isFALSE(0 %in% probs)) probs <- c(0, probs)
  if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
  checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
  percent <- round(probs * 100, digits = digits)
  left <- paste0(utils::head(percent, -1), "%")
  right <- paste0(utils::tail(percent, -1), "%")
  without_left_bracket <- paste0(left, ",", right, "]")
  with_left_bracket <- paste0("[", utils::head(without_left_bracket, 1))
  if (length(without_left_bracket) > 1) {
    with_left_bracket <- c(
      with_left_bracket,
      paste0("(", utils::tail(without_left_bracket, -1))
    )
  }
  with_left_bracket
}

#' Cut numeric vector into empirical quantile bins
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This cuts a numeric vector into sample quantile bins.
#'
#' @inheritParams bins_percent_labels
#' @param x (`numeric`)\cr the continuous variable values which should be cut into
#'   quantile bins. This may contain `NA` values, which are then
#'   not used for the quantile calculations, but included in the return vector.
#' @param labels (`character`)\cr the unique labels for the quantile bins. When there are `n`
#'   probabilities in `probs`, then this must be `n + 1` long.
#' @param type (`integer(1)`)\cr type of quantiles to use, see [stats::quantile()] for details.
#' @param ordered (`flag`)\cr should the result be an ordered factor.
#'
#' @return
#' * `cut_quantile_bins`: A `factor` variable with appropriately-labeled bins as levels.
#'
#' @note Intervals are closed on the right side. That is, the first bin is the interval
#'   `[-Inf, q1]` where `q1` is the first quantile, the second bin is then `(q1, q2]`, etc.,
#'   and the last bin is `(qn, +Inf]` where `qn` is the last quantile.
#'
#' @examples
#' # Default is to cut into quartile bins.
#' cut_quantile_bins(cars$speed)
#'
#' # Use custom quantiles.
#' cut_quantile_bins(cars$speed, probs = c(0.1, 0.2, 0.6, 0.88))
#'
#' # Use custom labels.
#' cut_quantile_bins(cars$speed, labels = paste0("Q", 1:4))
#'
#' # NAs are preserved in result factor.
#' ozone_binned <- cut_quantile_bins(airquality$Ozone)
#' which(is.na(ozone_binned))
#' # So you might want to make these explicit.
#' explicit_na(ozone_binned)
#'
#' @export
cut_quantile_bins <- function(x,
                              probs = c(0.25, 0.5, 0.75),
                              labels = NULL,
                              type = 7,
                              ordered = TRUE) {
  checkmate::assert_flag(ordered)
  checkmate::assert_numeric(x)
  if (isFALSE(0 %in% probs)) probs <- c(0, probs)
  if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
  checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
  if (is.null(labels)) labels <- bins_percent_labels(probs)
  checkmate::assert_character(labels, len = length(probs) - 1, any.missing = FALSE, unique = TRUE)

  if (all(is.na(x))) {
    # Early return if there are only NAs in input.
    return(factor(x, ordered = ordered, levels = labels))
  }

  quantiles <- stats::quantile(
    x,
    probs = probs,
    type = type,
    na.rm = TRUE
  )

  checkmate::assert_numeric(quantiles, unique = TRUE)

  cut(
    x,
    breaks = quantiles,
    labels = labels,
    ordered_result = ordered,
    include.lowest = TRUE,
    right = TRUE
  )
}

#' @describeIn factor_utils This discards the observations as well as the levels specified from a factor.
#'
#' @param discard (`character`)\cr levels to discard.
#'
#' @return
#' * `fct_discard`: A modified `factor` with observations as well as levels from `discard` dropped.
#'
#' @examples
#' fct_discard(factor(c("a", "b", "c")), "c")
#'
#' @export
fct_discard <- function(x, discard) {
  checkmate::assert_factor(x)
  checkmate::assert_character(discard, any.missing = FALSE)
  new_obs <- x[!(x %in% discard)]
  new_levels <- setdiff(levels(x), discard)
  factor(new_obs, levels = new_levels)
}

#' @describeIn factor_utils This inserts explicit missing values in a factor based on a condition. Additionally,
#' existing `NA` values will be explicitly converted to given `na_level`.
#'
#' @param condition (`logical`)\cr positions at which to insert missing values.
#' @param na_level (`string`)\cr which level to use for missing values.
#'
#' @return
#' * `fct_explicit_na_if`: A modified `factor` with inserted and existing `NA` converted to `na_level`.
#'
#' @seealso [forcats::fct_na_value_to_level()] which is used internally.
#'
#' @examples
#' fct_explicit_na_if(factor(c("a", "b", NA)), c(TRUE, FALSE, FALSE))
#'
#' @export
fct_explicit_na_if <- function(x, condition, na_level = "<Missing>") {
  checkmate::assert_factor(x, len = length(condition))
  checkmate::assert_logical(condition)
  x[condition] <- NA
  x <- forcats::fct_na_value_to_level(x, level = na_level)
  forcats::fct_drop(x, only = na_level)
}

#' @describeIn factor_utils This collapses levels and only keeps those new group levels, in the order provided.
#' The returned factor has levels in the order given, with the possible missing level last (this will
#' only be included if there are missing values).
#'
#' @param .f (`factor` or `character`)\cr original vector.
#' @param ... (named `character`)\cr levels in each vector provided will be collapsed into
#'   the new level given by the respective name.
#' @param .na_level (`string`)\cr which level to use for other levels, which should be missing in the
#'   new factor. Note that this level must not be contained in the new levels specified in `...`.
#'
#' @return
#' * `fct_collapse_only`: A modified `factor` with collapsed levels. Values and levels which are not included
#'   in the given `character` vector input will be set to the missing level `.na_level`.
#'
#' @note Any existing `NA`s in the input vector will not be replaced by the missing level. If needed,
#'   [explicit_na()] can be called separately on the result.
#'
#' @seealso [forcats::fct_collapse()], [forcats::fct_relevel()] which are used internally.
#'
#' @examples
#' fct_collapse_only(factor(c("a", "b", "c", "d")), TRT = "b", CTRL = c("c", "d"))
#'
#' @export
fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
  new_lvls <- names(list(...))
  if (checkmate::test_subset(.na_level, new_lvls)) {
    stop(paste0(".na_level currently set to '", .na_level, "' must not be contained in the new levels"))
  }
  x <- forcats::fct_collapse(.f, ..., other_level = .na_level)
  do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls)))
}

#' Tabulate binary response by subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The [tabulate_rsp_subgroups()] function creates a layout element to tabulate binary response by subgroup, returning
#' statistics including response rate and odds ratio for each population subgroup. The table is created from `df`, a
#' list of data frames returned by [extract_rsp_subgroups()], with the statistics to include specified via the `vars`
#' parameter.
#'
#' A forest plot can be created from the resulting table using the [g_forest()] function.
#'
#' @inheritParams extract_rsp_subgroups
#' @inheritParams argument_convention
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. Tables typically used as part of forest plot.
#'
#' @seealso [extract_rsp_subgroups()]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' # Unstratified analysis.
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#' df
#'
#' # Stratified analysis.
#' df_strat <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2"), strata = "STRATA1"),
#'   data = adrs_f
#' )
#' df_strat
#'
#' # Grouping of the BMRKR2 levels.
#' df_grouped <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @name response_subgroups
#' @order 1
NULL

#' Prepare response data for population subgroups in data frames
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares response rates and odds ratios for population subgroups in data frames. Simple wrapper
#' for [h_odds_ratio_subgroups_df()] and [h_proportion_subgroups_df()]. Result is a list of two
#' `data.frames`: `prop` and `or`. `variables` corresponds to the names of variables found in `data`,
#' passed as a named `list` and requires elements `rsp`, `arm` and optionally `subgroups` and `strata`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param label_all (`string`)\cr label for the total population analysis.
#'
#' @return A named list of two elements:
#'   * `prop`: A `data.frame` containing columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, `var`,
#'     `var_label`, and `row_type`.
#'   * `or`: A `data.frame` containing columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`,
#'     `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @seealso [response_subgroups]
#'
#' @export
extract_rsp_subgroups <- function(variables,
                                  data,
                                  groups_lists = list(),
                                  conf_level = 0.95,
                                  method = NULL,
                                  label_all = "All Patients") {
  if ("strat" %in% names(variables)) {
    warning(
      "Warning: the `strat` element name of the `variables` list argument to `extract_rsp_subgroups() ",
      "was deprecated in tern 0.9.4.\n  ",
      "Please use the name `strata` instead of `strat` in the `variables` argument."
    )
    variables[["strata"]] <- variables[["strat"]]
  }

  df_prop <- h_proportion_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    label_all = label_all
  )
  df_or <- h_odds_ratio_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    conf_level = conf_level,
    method = method,
    label_all = label_all
  )

  list(prop = df_prop, or = df_or)
}

#' @describeIn response_subgroups Formatted analysis function which is used as `afun` in `tabulate_rsp_subgroups()`.
#'
#' @return
#' * `a_response_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_response_subgroups <- function(df,
                                 labelstr = "",
                                 ...,
                                 .stats = NULL,
                                 .stat_names = NULL,
                                 .formats = NULL,
                                 .labels = NULL,
                                 .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL
  cur_col_stat <- extra_afun_params$.var %||% .stats

  # Uniquely name & label rows
  var_lvls <- if ("biomarker" %in% names(dots_extra_args) && "biomarker" %in% names(df)) {
    if ("overall" %in% names(dots_extra_args)) { # label rows for (nested) biomarker tables - e.g. "AGE", "BMRKR1"
      as.character(df$biomarker)
    } else { # data rows for (nested) biomarker tables - e.g. "AGE.LOW", "BMRKR1.Total Patients"
      paste(as.character(df$biomarker), as.character(df$subgroup), sep = ".")
    }
  } else { # data rows for non-biomarker tables - e.g. "Total Patients", "F", "M"
    make.unique(as.character(df$subgroup))
  }

  # if empty, return NA
  if (nrow(df) == 0) {
    return(in_rows(.list = list(NA) %>% stats::setNames(cur_col_stat)))
  }

  # Main statistics taken from df
  x_stats <- as.list(df)

  # Fill in formatting defaults
  .stats <- get_stats("tabulate_rsp_subgroups", stats_in = cur_col_stat)
  levels_per_stats <- rep(list(var_lvls), length(.stats)) %>% setNames(.stats)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(
    .stats, .labels, levels_per_stats,
    # default labels are pre-determined in extract_*() function
    tern_defaults = as.list(as.character(df$subgroup)) %>% setNames(var_lvls)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- lapply(
    .stats,
    function(x) x_stats[[x]] %>% stats::setNames(var_lvls)
  ) %>%
    stats::setNames(.stats) %>%
    .unlist_keep_nulls()

  .nms <- if ("biomarker" %in% names(dots_extra_args)) var_lvls else names(.labels)

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .nms,
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn response_subgroups Table-creating function which creates a table
#'   summarizing binary response by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
#'   and [rtables::summarize_row_groups()].
#'
#' @param df (`list`)\cr a list of data frames containing all analysis variables. List should be
#'   created using [extract_rsp_subgroups()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n`: Total number of observations per group.
#'   * `n_rsp`: Number of responders per group.
#'   * `prop`: Proportion of responders.
#'   * `n_tot`: Total number of observations.
#'   * `or`: Odds ratio.
#'   * `ci` : Confidence interval of odds ratio.
#'   * `pval`: p-value of the effect.
#'   Note, the statistics `n_tot`, `or`, and `ci` are required.
#' @param riskdiff (`list`)\cr if a risk (proportion) difference column should be added, a list of settings to apply
#'   within the column. See [control_riskdiff()] for details. If `NULL`, no risk difference column will be added. If
#'   `riskdiff$arm_x` and `riskdiff$arm_y` are `NULL`, the first level of `df$prop$arm` will be used as `arm_x` and
#'   the second level as `arm_y`.
#'
#' @return An `rtables` table summarizing binary response by subgroup.
#'
#' @examples
#' # Table with default columns
#' basic_table() %>%
#'   tabulate_rsp_subgroups(df)
#'
#' # Table with selected columns
#' basic_table() %>%
#'   tabulate_rsp_subgroups(
#'     df = df,
#'     vars = c("n_tot", "n", "n_rsp", "prop", "or", "ci")
#'   )
#'
#' # Table with risk difference column added
#' basic_table() %>%
#'   tabulate_rsp_subgroups(
#'     df,
#'     riskdiff = control_riskdiff(
#'       arm_x = levels(df$prop$arm)[1],
#'       arm_y = levels(df$prop$arm)[2]
#'     )
#'   )
#'
#' @export
#' @order 2
tabulate_rsp_subgroups <- function(lyt,
                                   df,
                                   vars = c("n_tot", "n", "prop", "or", "ci"),
                                   groups_lists = list(),
                                   label_all = lifecycle::deprecated(),
                                   riskdiff = NULL,
                                   na_str = default_na_str(),
                                   ...,
                                   .stat_names = NULL,
                                   .formats = NULL,
                                   .labels = NULL,
                                   .indent_mods = NULL) {
  checkmate::assert_list(riskdiff, null.ok = TRUE)
  checkmate::assert_true(all(c("n_tot", "or", "ci") %in% vars))
  if ("pval" %in% vars && !"pval" %in% names(df$or)) {
    warning(
      'The "pval" statistic has been selected but is not present in "df" so it will not be included in the output ',
      'table. To include the "pval" statistic, please specify a p-value test when generating "df" via ',
      'the "method" argument to `extract_rsp_subgroups()`. If method = "cmh", strata must also be specified via the ',
      '"variables" argument to `extract_rsp_subgroups()`.'
    )
  }

  if (lifecycle::is_present(label_all)) {
    lifecycle::deprecate_warn(
      "0.9.8", "tabulate_rsp_subgroups(label_all)",
      details =
        "Please assign the `label_all` parameter within the `extract_rsp_subgroups()` function when creating `df`."
    )
  }

  # Process standard extra arguments
  extra_args <- list(".stats" = vars)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Create "ci" column from "lcl" and "ucl"
  df$or$ci <- combine_vectors(df$or$lcl, df$or$ucl)

  # Extract additional parameters from df
  conf_level <- df$or$conf_level[1]
  method <- if ("pval_label" %in% names(df$or)) df$or$pval_label[1] else NULL
  colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method)
  prop_vars <- intersect(colvars$vars, c("n", "prop", "n_rsp"))
  or_vars <- intersect(names(colvars$labels), c("n_tot", "or", "ci", "pval"))
  colvars_prop <- list(vars = prop_vars, labels = colvars$labels[prop_vars])
  colvars_or <- list(vars = or_vars, labels = colvars$labels[or_vars])

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    groups_lists = list(groups_lists), conf_level = conf_level, method = method,
    ...
  )

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_response_subgroups) <- c(formals(a_response_subgroups), extra_args[[".additional_fun_parameters"]])

  # Add risk difference column
  if (!is.null(riskdiff)) {
    if (is.null(riskdiff$arm_x)) riskdiff$arm_x <- levels(df$prop$arm)[1]
    if (is.null(riskdiff$arm_y)) riskdiff$arm_y <- levels(df$prop$arm)[2]
    colvars_or$vars <- c(colvars_or$vars, "riskdiff")
    colvars_or$labels <- c(colvars_or$labels, riskdiff = riskdiff$col_label)
    arm_cols <- paste(rep(c("n_rsp", "n_rsp", "n", "n")), c(riskdiff$arm_x, riskdiff$arm_y), sep = "_")
    extra_args[[".formats"]] <- c(extra_args[[".formats"]], list(riskdiff = riskdiff$format))

    df_prop_diff <- df$prop %>%
      dplyr::select(-"prop") %>%
      tidyr::pivot_wider(
        id_cols = c("subgroup", "var", "var_label", "row_type"),
        names_from = "arm",
        values_from = c("n", "n_rsp")
      ) %>%
      dplyr::rowwise() %>%
      dplyr::mutate(
        riskdiff = stat_propdiff_ci(
          x = as.list(.data[[arm_cols[1]]]),
          y = as.list(.data[[arm_cols[2]]]),
          N_x = .data[[arm_cols[3]]],
          N_y = .data[[arm_cols[4]]],
          pct = riskdiff$pct
        )
      ) %>%
      dplyr::select(-dplyr::all_of(arm_cols))

    df$or <- df$or %>%
      dplyr::left_join(
        df_prop_diff,
        by = c("subgroup", "var", "var_label", "row_type")
      )
  }

  # Add columns from table_prop (optional)
  if (length(colvars_prop$vars) > 0) {
    lyt_prop <- split_cols_by(lyt = lyt, var = "arm")
    lyt_prop <- split_cols_by_multivar(
      lyt = lyt_prop,
      vars = colvars_prop$vars,
      varlabels = colvars_prop$labels
    )

    # Add "All Patients" row
    lyt_prop <- split_rows_by(
      lyt = lyt_prop,
      var = "row_type",
      split_fun = keep_split_levels("content"),
      nested = FALSE,
      child_labels = "hidden",
      parent_name = "All Patients"
    )
    lyt_prop <- analyze_colvars(
      lyt = lyt_prop,
      afun = a_response_subgroups,
      na_str = na_str,
      extra_args = extra_args
    )

    # Add analysis rows
    if ("analysis" %in% df$prop$row_type) {
      lyt_prop <- split_rows_by(
        lyt = lyt_prop,
        var = "row_type",
        split_fun = keep_split_levels("analysis"),
        nested = FALSE,
        child_labels = "hidden",
        parent_name = "analysis rows"
      )
      lyt_prop <- split_rows_by(lyt = lyt_prop, var = "var_label", nested = TRUE)
      lyt_prop <- analyze_colvars(
        lyt = lyt_prop,
        afun = a_response_subgroups,
        na_str = na_str,
        inclNAs = TRUE,
        extra_args = extra_args
      )
    }

    table_prop <- build_table(lyt_prop, df = df$prop)
  } else {
    table_prop <- NULL
  }

  # Add columns from table_or ("n_tot", "or", and "ci" required)
  lyt_or <- split_cols_by(lyt = lyt, var = "arm")
  lyt_or <- split_cols_by_multivar(
    lyt = lyt_or,
    vars = colvars_or$vars,
    varlabels = colvars_or$labels
  )

  # Add "All Patients" row
  lyt_or <- split_rows_by(
    lyt = lyt_or,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE,
    child_labels = "hidden",
    parent_name = "All Patients"
  )
  lyt_or <- analyze_colvars(
    lyt = lyt_or,
    afun = a_response_subgroups,
    na_str = na_str,
    extra_args = extra_args
  ) %>%
    append_topleft("Baseline Risk Factors")

  # Add analysis rows
  if ("analysis" %in% df$or$row_type) {
    lyt_or <- split_rows_by(
      lyt = lyt_or,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden",
      parent_name = "analysis rows"
    )
    lyt_or <- split_rows_by(lyt = lyt_or, var = "var_label", nested = TRUE)
    lyt_or <- analyze_colvars(
      lyt = lyt_or,
      afun = a_response_subgroups,
      na_str = na_str,
      inclNAs = TRUE,
      extra_args = extra_args
    )
  }

  table_or <- build_table(lyt_or, df = df$or)

  # Join tables, add forest plot attributes
  n_tot_id <- match("n_tot", colvars_or$vars)
  if (is.null(table_prop)) {
    result <- table_or
    or_id <- match("or", colvars_or$vars)
    ci_id <- match("ci", colvars_or$vars)
  } else {
    result <- cbind_rtables(table_or[, n_tot_id], table_prop, table_or[, -n_tot_id])
    or_id <- 1L + ncol(table_prop) + match("or", colvars_or$vars[-n_tot_id])
    ci_id <- 1L + ncol(table_prop) + match("ci", colvars_or$vars[-n_tot_id])
    n_tot_id <- 1L
  }
  structure(
    result,
    forest_header = paste0(levels(df$prop$arm), "\nBetter"),
    col_x = or_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_id
  )
}

#' Labels for column variables in binary response by subgroup table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Internal function to check variables included in [tabulate_rsp_subgroups()] and create column labels.
#'
#' @inheritParams argument_convention
#' @inheritParams tabulate_rsp_subgroups
#'
#' @return A `list` of variables to tabulate and their labels.
#'
#' @export
d_rsp_subgroups_colvars <- function(vars,
                                    conf_level = NULL,
                                    method = NULL) {
  checkmate::assert_character(vars)
  checkmate::assert_subset(c("n_tot", "or", "ci"), vars)
  checkmate::assert_subset(
    vars,
    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
  )

  varlabels <- c(
    n = "n",
    n_rsp = "Responders",
    prop = "Response (%)",
    n_tot = "Total n",
    or = "Odds Ratio"
  )
  colvars <- vars

  if ("ci" %in% colvars) {
    checkmate::assert_false(is.null(conf_level))

    varlabels <- c(
      varlabels,
      ci = paste0(100 * conf_level, "% CI")
    )
  }

  if ("pval" %in% colvars) {
    varlabels <- c(
      varlabels,
      pval = method
    )
  }

  list(
    vars = colvars,
    labels = varlabels[vars]
  )
}

#' Count number of patients
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [analyze_num_patients()] creates a layout element to count total numbers of unique or
#' non-unique patients. The primary analysis variable `vars` is used to uniquely identify patients.
#'
#' The `count_by` variable can be used to identify non-unique patients such that the number of patients with a unique
#' combination of values in `vars` and `count_by` will be returned instead as the `nonunique` statistic. The `required`
#' variable can be used to specify a variable required to be non-missing for the record to be included in the counts.
#'
#' The summarize function [summarize_num_patients()] performs the same function as [analyze_num_patients()] except it
#' creates content rows, not data rows, to summarize the current table row/column context and operates on the level of
#' the latest row split or the root of the table if no row splits have occurred.
#'
#' @inheritParams argument_convention
#' @param required (`character` or `NULL`)\cr name of a variable that is required to be non-missing.
#' @param count_by (`character` or `NULL`)\cr name of a variable to be combined with `vars` when counting
#'   `nonunique` records.
#' @param unique_count_suffix (`flag`)\cr whether the `"(n)"` suffix should be added to `unique_count` labels.
#'   Defaults to `TRUE`.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("summarize_num_patients"), type = "sh")``
#'
#' @name summarize_num_patients
#' @order 1
NULL

#' @describeIn summarize_num_patients Statistics function which counts the number of
#'   unique patients, the corresponding percentage taken with respect to the
#'   total number of patients, and the number of non-unique patients.
#'
#' @param x (`character` or `factor`)\cr vector of patient IDs.
#'
#' @return
#' * `s_num_patients()` returns a named `list` of 3 statistics:
#'   * `unique`: Vector of counts and percentages.
#'   * `nonunique`: Vector of counts.
#'   * `unique_count`: Counts.
#'
#' @examples
#' # Use the statistics function to count number of unique and nonunique patients.
#' s_num_patients(x = as.character(c(1, 1, 1, 2, 4, NA)), labelstr = "", .N_col = 6L)
#' s_num_patients(
#'   x = as.character(c(1, 1, 1, 2, 4, NA)),
#'   labelstr = "",
#'   .N_col = 6L,
#'   count_by = c(1, 1, 2, 1, 1, 1)
#' )
#'
#' @export
s_num_patients <- function(x,
                           labelstr,
                           .N_col, # nolint
                           ...,
                           count_by = NULL,
                           unique_count_suffix = TRUE) {
  checkmate::assert_string(labelstr)
  checkmate::assert_count(.N_col)
  checkmate::assert_multi_class(x, classes = c("factor", "character"))
  checkmate::assert_flag(unique_count_suffix)

  count1 <- n_available(unique(x))
  count2 <- n_available(x)

  if (!is.null(count_by)) {
    checkmate::assert_vector(count_by, len = length(x))
    count2 <- n_available(unique(interaction(x, count_by)))
  }

  out <- list(
    unique = formatters::with_label(c(count1, ifelse(count1 == 0 && .N_col == 0, 0, count1 / .N_col)), labelstr),
    nonunique = formatters::with_label(count2, labelstr),
    unique_count = formatters::with_label(
      count1, ifelse(unique_count_suffix, paste0(labelstr, if (nzchar(labelstr)) " ", "(n)"), labelstr)
    )
  )

  out
}

#' @describeIn summarize_num_patients Statistics function which counts the number of unique patients
#'   in a column (variable), the corresponding percentage taken with respect to the total number of
#'   patients, and the number of non-unique patients in the column.
#'
#' @return
#' * `s_num_patients_content()` returns the same values as `s_num_patients()`.
#'
#' @examples
#' # Count number of unique and non-unique patients.
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA)),
#'   EVENT = as.character(c(10, 15, 10, 17, 8))
#' )
#' s_num_patients_content(df, .N_col = 5, .var = "USUBJID")
#'
#' df_by_event <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA)),
#'   EVENT = c(10, 15, 10, 17, 8)
#' )
#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID", count_by = "EVENT")
#'
#' @export
s_num_patients_content <- function(df,
                                   labelstr = "",
                                   .N_col, # nolint
                                   .var,
                                   ...,
                                   required = NULL,
                                   count_by = NULL,
                                   unique_count_suffix = TRUE) {
  checkmate::assert_string(.var)
  checkmate::assert_data_frame(df)
  if (is.null(count_by)) {
    assert_df_with_variables(df, list(id = .var))
  } else {
    assert_df_with_variables(df, list(id = .var, count_by = count_by))
  }
  if (!is.null(required)) {
    checkmate::assert_string(required)
    assert_df_with_variables(df, list(required = required))
    df <- df[!is.na(df[[required]]), , drop = FALSE]
  }

  x <- df[[.var]]
  y <- if (is.null(count_by)) NULL else df[[count_by]]

  s_num_patients(
    x = x,
    labelstr = labelstr,
    .N_col = .N_col,
    count_by = y,
    unique_count_suffix = unique_count_suffix
  )
}

#' @describeIn summarize_num_patients Formatted analysis function which is used as `afun`
#'   in `analyze_num_patients()` and as `cfun` in `summarize_num_patients()`.
#'
#' @return
#' * `a_num_patients()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_num_patients <- function(df,
                           labelstr = "",
                           ...,
                           .stats = NULL,
                           .stat_names = NULL,
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_num_patients_content,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      labelstr = list(labelstr),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("summarize_num_patients", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(
    .stats, .labels,
    tern_defaults = c(lapply(x_stats, attr, "label")[nchar(lapply(x_stats, attr, "label")) > 0], tern_default_labels)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  x_stats <- x_stats[.stats]

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_num_patients()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_num_patients_content()` to the table layout.
#'
#' @examples
#' # summarize_num_patients
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("SEX") %>%
#'   summarize_num_patients("USUBJID", .stats = "unique_count") %>%
#'   build_table(df)
#'
#' tbl
#'
#' @export
#' @order 3
summarize_num_patients <- function(lyt,
                                   var,
                                   required = NULL,
                                   count_by = NULL,
                                   unique_count_suffix = TRUE,
                                   na_str = default_na_str(),
                                   riskdiff = FALSE,
                                   ...,
                                   .stats = c("unique", "nonunique", "unique_count"),
                                   .stat_names = NULL,
                                   .formats = NULL,
                                   .labels = list(
                                     unique = "Number of patients with at least one event",
                                     nonunique = "Number of events"
                                   ),
                                   .indent_mods = 0L) {
  checkmate::assert_flag(riskdiff)
  afun <- if (isFALSE(riskdiff)) a_num_patients else afun_riskdiff

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (is.null(.indent_mods)) {
    indent_mod <- 0L
  } else if (length(.indent_mods) == 1) {
    indent_mod <- .indent_mods
  } else {
    indent_mod <- 0L
    extra_args[[".indent_mods"]] <- .indent_mods
  }

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    required = required, count_by = count_by, unique_count_suffix = unique_count_suffix,
    if (!isFALSE(riskdiff)) list(afun = list("s_num_patients_content" = a_num_patients)),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = afun,
    na_str = na_str,
    extra_args = extra_args,
    indent_mod = indent_mod
  )
}

#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `analyze_num_patients()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_num_patients_content()` to the table layout.
#'
#' @details In general, functions that starts with `analyze*` are expected to
#'   work like [rtables::analyze()], while functions that starts with `summarize*`
#'   are based upon [rtables::summarize_row_groups()]. The latter provides a
#'   value for each dividing split in the row and column space, but, being it
#'   bound to the fundamental splits, it is repeated by design in every page
#'   when pagination is involved.
#'
#' @note As opposed to [summarize_num_patients()], this function does not repeat the produced rows.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA, 6, 6, 8, 9)),
#'   ARM = c("A", "A", "A", "A", "A", "B", "B", "B", "B"),
#'   AGE = c(10, 15, 10, 17, 8, 11, 11, 19, 17),
#'   SEX = c("M", "M", "M", "F", "F", "F", "M", "F", "M")
#' )
#'
#' # analyze_num_patients
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   analyze_num_patients("USUBJID", .stats = c("unique")) %>%
#'   build_table(df)
#'
#' tbl
#'
#' @export
#' @order 2
analyze_num_patients <- function(lyt,
                                 vars,
                                 required = NULL,
                                 count_by = NULL,
                                 unique_count_suffix = TRUE,
                                 na_str = default_na_str(),
                                 nested = TRUE,
                                 show_labels = c("default", "visible", "hidden"),
                                 riskdiff = FALSE,
                                 ...,
                                 .stats = c("unique", "nonunique", "unique_count"),
                                 .stat_names = NULL,
                                 .formats = NULL,
                                 .labels = list(
                                   unique = "Number of patients with at least one event",
                                   nonunique = "Number of events"
                                 ),
                                 .indent_mods = NULL) {
  checkmate::assert_flag(riskdiff)
  afun <- if (isFALSE(riskdiff)) a_num_patients else afun_riskdiff

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    required = required, count_by = count_by, unique_count_suffix = unique_count_suffix,
    if (!isFALSE(riskdiff)) list(afun = list("s_num_patients_content" = a_num_patients)),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = vars,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels
  )
}

#' Count the number of patients with a particular event
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_patients_with_event()] creates a layout element to calculate patient counts for a
#' user-specified set of events.
#'
#' This function analyzes primary analysis variable `vars` which indicates unique subject identifiers. Events
#' are defined by the user as a named vector via the `filters` argument, where each name corresponds to a
#' variable and each value is the value(s) that that variable takes for the event.
#'
#' If there are multiple records with the same event recorded for a patient, only one occurrence is counted.
#'
#' @inheritParams argument_convention
#' @param filters (`character`)\cr a character vector specifying the column names and flag variables
#'   to be used for counting the number of unique identifiers satisfying such conditions.
#'   Multiple column names and flags are accepted in this format
#'   `c("column_name1" = "flag1", "column_name2" = "flag2")`.
#'   Note that only equality is being accepted as condition.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("count_patients_with_event"), type = "sh")``
#'
#' @seealso [count_patients_with_flags()]
#'
#' @name count_patients_with_event
#' @order 1
NULL

#' @describeIn count_patients_with_event Statistics function which counts the number of patients for which
#'   the defined event has occurred.
#'
#' @inheritParams analyze_variables
#' @param .var (`string`)\cr name of the column that contains the unique identifier.
#'
#' @return
#' * `s_count_patients_with_event()` returns the count and fraction of unique identifiers with the defined event.
#'
#' @examples
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y"),
#' )
#'
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL")
#' )
#'
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
#'   denom = "N_col",
#'   .N_col = 456
#' )
#'
#' @export
s_count_patients_with_event <- function(df,
                                        .var,
                                        .N_col = ncol(df), # nolint
                                        .N_row = nrow(df), # nolint
                                        ...,
                                        filters,
                                        denom = c("n", "N_col", "N_row")) {
  col_names <- names(filters)
  filter_values <- filters

  checkmate::assert_subset(col_names, colnames(df))

  temp <- Map(
    function(x, y) which(df[[x]] == y),
    col_names,
    filter_values
  )
  position_satisfy_filters <- Reduce(intersect, temp)
  id_satisfy_filters <- as.character(unique(df[position_satisfy_filters, ][[.var]]))
  result <- s_count_values(
    as.character(unique(df[[.var]])),
    id_satisfy_filters,
    denom = denom,
    .N_col = .N_col,
    .N_row = .N_row
  )
  result
}

#' @describeIn count_patients_with_event Formatted analysis function which is used as `afun`
#'   in `count_patients_with_event()`.
#'
#' @return
#' * `a_count_patients_with_event()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y"),
#'   .N_col = 100,
#'   .N_row = 100
#' )
#'
#' @export
a_count_patients_with_event <- function(df,
                                        labelstr = "",
                                        ...,
                                        .stats = NULL,
                                        .stat_names = NULL,
                                        .formats = NULL,
                                        .labels = NULL,
                                        .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_patients_with_event,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("count_patients_with_event", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(.stats, .labels)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  x_stats <- x_stats[.stats]

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn count_patients_with_event Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_patients_with_event()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_patients_with_event()` to the table layout.
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_values(
#'     "STUDYID",
#'     values = "AB12345",
#'     .stats = "count",
#'     .labels = c(count = "Total AEs")
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y"),
#'     .labels = c(count_fraction = "Total number of patients with at least one adverse event"),
#'     table_names = "tbl_all"
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
#'     .labels = c(count_fraction = "Total number of patients with fatal AEs"),
#'     table_names = "tbl_fatal"
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL", "AEREL" = "Y"),
#'     .labels = c(count_fraction = "Total number of patients with related fatal AEs"),
#'     .indent_mods = c(count_fraction = 2L),
#'     table_names = "tbl_rel_fatal"
#'   )
#'
#' build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl)
#'
#' @export
#' @order 2
count_patients_with_event <- function(lyt,
                                      vars,
                                      filters,
                                      riskdiff = FALSE,
                                      na_str = default_na_str(),
                                      nested = TRUE,
                                      show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
                                      ...,
                                      table_names = vars,
                                      .stats = "count_fraction",
                                      .stat_names = NULL,
                                      .formats = list(count_fraction = format_count_fraction_fixed_dp),
                                      .labels = NULL,
                                      .indent_mods = NULL) {
  checkmate::assert_flag(riskdiff)
  afun <- if (isFALSE(riskdiff)) a_count_patients_with_event else afun_riskdiff

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    filters = list(filters),
    if (!isFALSE(riskdiff)) list(afun = list("s_count_patients_with_event" = a_count_patients_with_event)),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = vars,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Count patients with abnormal range values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [count_abnormal()] creates a layout element to count patients with abnormal analysis range
#' values in each direction.
#'
#' This function analyzes primary analysis variable `var` which indicates abnormal range results.
#' Additional analysis variables that can be supplied as a list via the `variables` parameter are
#' `id` (defaults to `USUBJID`), a variable to indicate unique subject identifiers, and `baseline`
#' (defaults to `BNRIND`), a variable to indicate baseline reference ranges.
#'
#' For each direction specified via the `abnormal` parameter (e.g. High or Low), a fraction of
#' patient counts is returned, with numerator and denominator calculated as follows:
#'   * `num`: The number of patients with this abnormality recorded while on treatment.
#'   * `denom`: The total number of patients with at least one post-baseline assessment.
#'
#' This function assumes that `df` has been filtered to only include post-baseline records.
#'
#' @inheritParams argument_convention
#' @param abnormal (named `list`)\cr list identifying the abnormal range level(s) in `var`. Defaults to
#'   `list(Low = "LOW", High = "HIGH")` but you can also group different levels into the named list,
#'   for example, `abnormal = list(Low = c("LOW", "LOW LOW"), High = c("HIGH", "HIGH HIGH"))`.
#' @param exclude_base_abn (`flag`)\cr whether to exclude subjects with baseline abnormality
#'   from numerator and denominator.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("abnormal"), type = "sh")``
#'
#' @note
#' * `count_abnormal()` only considers a single variable that contains multiple abnormal levels.
#' * `df` should be filtered to only include post-baseline records.
#' * The denominator includes patients that may have other abnormal levels at baseline,
#'   and patients missing baseline records. Patients with these abnormalities at
#'   baseline can be optionally excluded from numerator and denominator via the
#'   `exclude_base_abn` parameter.
#'
#' @name abnormal
#' @include formatting_functions.R
#' @order 1
NULL

#' @describeIn abnormal Statistics function which counts patients with abnormal range values
#'   for a single `abnormal` level.
#'
#' @return
#' * `s_count_abnormal()` returns the statistic `fraction` which is a vector with `num` and `denom` counts of patients.
#'
#' @keywords internal
s_count_abnormal <- function(df,
                             .var,
                             abnormal = list(Low = "LOW", High = "HIGH"),
                             variables = list(id = "USUBJID", baseline = "BNRIND"),
                             exclude_base_abn = FALSE,
                             ...) {
  checkmate::assert_list(abnormal, types = "character", names = "named", len = 2, any.missing = FALSE)
  checkmate::assert_true(any(unlist(abnormal) %in% levels(df[[.var]])))
  checkmate::assert_factor(df[[.var]])
  checkmate::assert_flag(exclude_base_abn)
  assert_df_with_variables(df, c(range = .var, variables))
  checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))

  count_abnormal_single <- function(abn_name, abn) {
    # Patients in the denominator fulfill:
    # - have at least one post-baseline visit
    # - their baseline must not be abnormal if `exclude_base_abn`.
    if (exclude_base_abn) {
      denom_select <- !(df[[variables$baseline]] %in% abn)
    } else {
      denom_select <- TRUE
    }
    denom <- length(unique(df[denom_select, variables$id, drop = TRUE]))

    # Patients in the numerator fulfill:
    # - have at least one post-baseline visit with the required abnormality level
    # - are part of the denominator patients.
    num_select <- (df[[.var]] %in% abn) & denom_select
    num <- length(unique(df[num_select, variables$id, drop = TRUE]))

    formatters::with_label(c(num = num, denom = denom), abn_name)
  }

  # This will define the abnormal levels theoretically possible for a specific lab parameter
  # within a split level of a layout.
  abnormal_lev <- lapply(abnormal, intersect, levels(df[[.var]]))
  abnormal_lev <- abnormal_lev[vapply(abnormal_lev, function(x) length(x) > 0, logical(1))]

  result <- sapply(names(abnormal_lev), function(i) count_abnormal_single(i, abnormal_lev[[i]]), simplify = FALSE)
  result <- list(fraction = result)
  result
}

#' @describeIn abnormal Formatted analysis function which is used as `afun` in `count_abnormal()`.
#'
#' @return
#' * `a_count_abnormal()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal <- function(df,
                             ...,
                             .stats = NULL,
                             .stat_names = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_abnormal,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats("abnormal", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
  levels_per_stats <- lapply(x_stats, names)
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .labels <- get_labels_from_stats(.stats, .labels, levels_per_stats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)

  x_stats <- x_stats[.stats] %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn abnormal Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 1, 2, 2)),
#'   ANRIND = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BNRIND = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
#'   ONTRTFL = c("", "Y", "", "Y"),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Select only post-baseline records.
#' df <- df %>%
#'   filter(ONTRTFL == "Y")
#'
#' # Layout creating function.
#' basic_table() %>%
#'   count_abnormal(var = "ANRIND", abnormal = list(high = "HIGH", low = "LOW")) %>%
#'   build_table(df)
#'
#' # Passing of statistics function and formatting arguments.
#' df2 <- data.frame(
#'   ID = as.character(c(1, 1, 2, 2)),
#'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BL_RANGE = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
#'   ONTRTFL = c("", "Y", "", "Y"),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Select only post-baseline records.
#' df2 <- df2 %>%
#'   filter(ONTRTFL == "Y")
#'
#' basic_table() %>%
#'   count_abnormal(
#'     var = "RANGE",
#'     abnormal = list(low = "LOW", high = "HIGH"),
#'     variables = list(id = "ID", baseline = "BL_RANGE")
#'   ) %>%
#'   build_table(df2)
#'
#' @export
#' @order 2
count_abnormal <- function(lyt,
                           var,
                           abnormal = list(Low = "LOW", High = "HIGH"),
                           variables = list(id = "USUBJID", baseline = "BNRIND"),
                           exclude_base_abn = FALSE,
                           na_str = default_na_str(),
                           nested = TRUE,
                           ...,
                           table_names = var,
                           .stats = "fraction",
                           .stat_names = NULL,
                           .formats = list(fraction = format_fraction),
                           .labels = NULL,
                           .indent_mods = NULL) {
  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    "abnormal" = list(abnormal), "variables" = list(variables), "exclude_base_abn" = exclude_base_abn,
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_count_abnormal) <- c(formals(a_count_abnormal), extra_args[[".additional_fun_parameters"]])

  analyze(
    lyt = lyt,
    vars = var,
    afun = a_count_abnormal,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = "hidden",
    table_names = table_names
  )
}

#' Helper function to create a map data frame for `trim_levels_to_map()`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to create a map data frame from the input dataset, which can be used as an argument in the
#' `trim_levels_to_map` split function. Based on different method, the map is constructed differently.
#'
#' @inheritParams argument_convention
#' @param abnormal (named `list`)\cr identifying the abnormal range level(s) in `df`. Based on the levels of
#'   abnormality of the input dataset, it can be something like `list(Low = "LOW LOW", High = "HIGH HIGH")` or
#'   `abnormal = list(Low = "LOW", High = "HIGH"))`
#' @param method (`string`)\cr indicates how the returned map will be constructed. Can be `"default"` or `"range"`.
#'
#' @return A map `data.frame`.
#'
#' @note If method is `"default"`, the returned map will only have the abnormal directions that are observed in the
#'   `df`, and records with all normal values will be excluded to avoid error in creating layout. If method is
#'   `"range"`, the returned map will be based on the rule that at least one observation with low range > 0
#'   for low direction and at least one observation with high range is not missing for high direction.
#'
#' @examples
#' adlb <- df_explicit_na(tern_ex_adlb)
#'
#' h_map_for_count_abnormal(
#'   df = adlb,
#'   variables = list(anl = "ANRIND", split_rows = c("LBCAT", "PARAM")),
#'   abnormal = list(low = c("LOW"), high = c("HIGH")),
#'   method = "default",
#'   na_str = "<Missing>"
#' )
#'
#' df <- data.frame(
#'   USUBJID = c(rep("1", 4), rep("2", 4), rep("3", 4)),
#'   AVISIT = c(
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2),
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2),
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2)
#'   ),
#'   PARAM = rep(c("ALT", "CPR"), 6),
#'   ANRIND = c(
#'     "NORMAL", "NORMAL", "LOW",
#'     "HIGH", "LOW", "LOW", "HIGH", "HIGH", rep("NORMAL", 4)
#'   ),
#'   ANRLO = rep(5, 12),
#'   ANRHI = rep(20, 12)
#' )
#' df$ANRIND <- factor(df$ANRIND, levels = c("LOW", "HIGH", "NORMAL"))
#' h_map_for_count_abnormal(
#'   df = df,
#'   variables = list(
#'     anl = "ANRIND",
#'     split_rows = c("PARAM"),
#'     range_low = "ANRLO",
#'     range_high = "ANRHI"
#'   ),
#'   abnormal = list(low = c("LOW"), high = c("HIGH")),
#'   method = "range",
#'   na_str = "<Missing>"
#' )
#'
#' @export
h_map_for_count_abnormal <- function(df,
                                     variables = list(
                                       anl = "ANRIND",
                                       split_rows = c("PARAM"),
                                       range_low = "ANRLO",
                                       range_high = "ANRHI"
                                     ),
                                     abnormal = list(low = c("LOW", "LOW LOW"), high = c("HIGH", "HIGH HIGH")),
                                     method = c("default", "range"),
                                     na_str = "<Missing>") {
  method <- match.arg(method)
  checkmate::assert_subset(c("anl", "split_rows"), names(variables))
  checkmate::assert_false(anyNA(df[variables$split_rows]))
  assert_df_with_variables(df,
    variables = list(anl = variables$anl, split_rows = variables$split_rows),
    na_level = na_str
  )
  assert_df_with_factors(df, list(val = variables$anl))
  assert_valid_factor(df[[variables$anl]], any.missing = FALSE)
  assert_list_of_variables(variables)
  checkmate::assert_list(abnormal, types = "character", len = 2)

  # Drop usued levels from df as they are not supposed to be in the final map
  df <- droplevels(df)

  normal_value <- setdiff(levels(df[[variables$anl]]), unlist(abnormal))

  # Based on the understanding of clinical data, there should only be one level of normal which is "NORMAL"
  checkmate::assert_vector(normal_value, len = 1)

  # Default method will only have what is observed in the df, and records with all normal values will be excluded to
  # avoid error in layout building.
  if (method == "default") {
    df_abnormal <- subset(df, df[[variables$anl]] %in% unlist(abnormal))
    map <- unique(df_abnormal[c(variables$split_rows, variables$anl)])
    map_normal <- unique(subset(map, select = variables$split_rows))
    map_normal[[variables$anl]] <- normal_value
    map <- rbind(map, map_normal)
  } else if (method == "range") {
    # range method follows the rule that at least one observation with ANRLO > 0 for low
    # direction and at least one observation with ANRHI is not missing for high direction.
    checkmate::assert_subset(c("range_low", "range_high"), names(variables))
    checkmate::assert_subset(c("LOW", "HIGH"), toupper(names(abnormal)))

    assert_df_with_variables(df,
      variables = list(
        range_low = variables$range_low,
        range_high = variables$range_high
      )
    )

    # Define low direction of map
    df_low <- subset(df, df[[variables$range_low]] > 0)
    map_low <- unique(df_low[variables$split_rows])
    low_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "LOW"]))
    low_levels_df <- as.data.frame(low_levels)
    colnames(low_levels_df) <- variables$anl
    low_levels_df <- do.call("rbind", replicate(nrow(map_low), low_levels_df, simplify = FALSE))
    rownames(map_low) <- NULL # Just to avoid strange row index in case upstream functions changed
    map_low <- map_low[rep(seq_len(nrow(map_low)), each = length(low_levels)), , drop = FALSE]
    map_low <- cbind(map_low, low_levels_df)

    # Define high direction of map
    df_high <- subset(df, df[[variables$range_high]] != na_str | !is.na(df[[variables$range_high]]))
    map_high <- unique(df_high[variables$split_rows])
    high_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "HIGH"]))
    high_levels_df <- as.data.frame(high_levels)
    colnames(high_levels_df) <- variables$anl
    high_levels_df <- do.call("rbind", replicate(nrow(map_high), high_levels_df, simplify = FALSE))
    rownames(map_high) <- NULL
    map_high <- map_high[rep(seq_len(nrow(map_high)), each = length(high_levels)), , drop = FALSE]
    map_high <- cbind(map_high, high_levels_df)

    # Define normal of map
    map_normal <- unique(rbind(map_low, map_high)[variables$split_rows])
    map_normal[variables$anl] <- normal_value

    map <- rbind(map_low, map_high, map_normal)
  }

  # map should be all characters
  map <- data.frame(lapply(map, as.character), stringsAsFactors = FALSE)

  # sort the map final output by split_rows variables
  for (i in rev(seq_len(length(variables$split_rows)))) {
    map <- map[order(map[[i]]), ]
  }
  map
}

## Deprecated ------------------------------------------------------------

#' Helper functions for tabulation of a single biomarker result
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' @inheritParams argument_convention
#' @inheritParams survival_duration_subgroups
#' @param df (`data.frame`)\cr results for a single biomarker. For `h_tab_rsp_one_biomarker()`, the results returned by
#'   [extract_rsp_biomarkers()]. For `h_tab_surv_one_biomarker()`, the results returned by
#'   [extract_survival_biomarkers()].
#' @param afuns (named `list` of `function`)\cr analysis functions.
#' @param colvars (named `list`)\cr named list with elements `vars` (variables to tabulate) and `labels` (their labels).
#'
#' @return An `rtables` table object with statistics in columns.
#'
#' @name h_biomarkers_subgroups
NULL

#' @describeIn h_biomarkers_subgroups Helper function to calculate statistics in columns for one biomarker.
#'
#' @export
h_tab_one_biomarker <- function(df,
                                afuns,
                                colvars,
                                na_str = default_na_str(),
                                ...,
                                .stats = NULL,
                                .stat_names = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  lifecycle::deprecate_warn(
    "0.9.8", "h_tab_one_biomarker()",
    details = "This function is no longer used within `tern`."
  )

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(extra_args, biomarker = TRUE, ...)

  # Adding additional info from layout to analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(afuns) <- c(formals(afuns), extra_args[[".additional_fun_parameters"]])

  # Create "ci" column from "lcl" and "ucl"
  df$ci <- combine_vectors(df$lcl, df$ucl)

  colvars$vars <- intersect(colvars$vars, names(df))
  colvars$labels <- colvars$labels[colvars$vars]

  lyt <- basic_table()

  # Split cols by the multiple variables to populate into columns.
  lyt <- split_cols_by_multivar(
    lyt = lyt,
    vars = colvars$vars,
    varlabels = colvars$labels
  )

  # Add "All Patients" row
  lyt <- split_rows_by(
    lyt = lyt,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = TRUE,
    child_labels = "hidden"
  )
  lyt <- analyze_colvars(
    lyt = lyt,
    afun = afuns,
    na_str = na_str,
    extra_args = c(extra_args)
  )

  # Add analysis rows
  if ("analysis" %in% df$row_type) {
    lyt <- split_rows_by(
      lyt = lyt,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = TRUE,
      child_labels = "hidden"
    )
    lyt <- split_rows_by(
      lyt = lyt,
      var = "var_label",
      nested = TRUE,
      indent_mod = 1L
    )
    lyt <- analyze_colvars(
      lyt = lyt,
      afun = afuns,
      na_str = na_str,
      inclNAs = TRUE,
      extra_args = extra_args
    )
  }

  build_table(lyt, df = df)
}

#' @describeIn h_biomarkers_subgroups Helper function that prepares a single response sub-table given the results for a
#'   single biomarker.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' # For a single population, separately estimate the effects of two biomarkers.
#' df <- h_logistic_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX"
#'   ),
#'   data = adrs_f
#' )
#'
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
#' df1 <- df[1, ]
#' df1$subgroup <- "All patients"
#' df1$row_type <- "content"
#' df1$var <- "ALL"
#' df1$var_label <- "All patients"
#'
#' h_tab_rsp_one_biomarker(
#'   df1,
#'   vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval")
#' )
#'
#' @export
h_tab_rsp_one_biomarker <- function(df,
                                    vars,
                                    na_str = default_na_str(),
                                    .indent_mods = 0L,
                                    ...) {
  lifecycle::deprecate_warn(
    "0.9.8", "h_tab_rsp_one_biomarker()",
    details = "This function is no longer used within `tern`."
  )

  colvars <- d_rsp_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1]
  )

  h_tab_one_biomarker(
    df = df,
    afuns = a_response_subgroups,
    colvars = colvars,
    na_str = na_str,
    .indent_mods = .indent_mods,
    ...
  )
}

#' @describeIn h_biomarkers_subgroups Helper function that prepares a single survival sub-table given the results for a
#'   single biomarker.
#'
#' @examples
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = FALSE)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' # For a single population, separately estimate the effects of two biomarkers.
#' df <- h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
#' df1 <- df[1, ]
#' df1$subgroup <- "All patients"
#' df1$row_type <- "content"
#' df1$var <- "ALL"
#' df1$var_label <- "All patients"
#' h_tab_surv_one_biomarker(
#'   df1,
#'   vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
#'   time_unit = "days"
#' )
#'
#' @export
h_tab_surv_one_biomarker <- function(df,
                                     vars,
                                     time_unit,
                                     na_str = default_na_str(),
                                     .indent_mods = 0L,
                                     ...) {
  lifecycle::deprecate_warn(
    "0.9.8", "h_tab_surv_one_biomarker()",
    details = "This function is no longer used within `tern`."
  )

  colvars <- d_survival_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1],
    time_unit = time_unit
  )

  h_tab_one_biomarker(
    df = df,
    afuns = a_survival_subgroups,
    colvars = colvars,
    na_str = na_str,
    .indent_mods = .indent_mods,
    ...
  )
}

#' Split function to configure risk difference column
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Wrapper function for [rtables::add_combo_levels()] which configures settings for the risk difference
#' column to be added to an `rtables` object. To add a risk difference column to a table, this function
#' should be used as `split_fun` in calls to [rtables::split_cols_by()], followed by setting argument
#' `riskdiff` to `TRUE` in all following analyze function calls.
#'
#' @param arm_x (`string`)\cr name of reference arm to use in risk difference calculations.
#' @param arm_y (`character`)\cr names of one or more arms to compare to reference arm in risk difference
#'   calculations. A new column will be added for each value of `arm_y`.
#' @param col_label (`character`)\cr labels to use when rendering the risk difference column within the table.
#'   If more than one comparison arm is specified in `arm_y`, default labels will specify which two arms are
#'   being compared (reference arm vs. comparison arm).
#' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`.
#'
#' @return A closure suitable for use as a split function (`split_fun`) within [rtables::split_cols_by()]
#'   when creating a table layout.
#'
#' @seealso [stat_propdiff_ci()] for details on risk difference calculation.
#'
#' @examples
#' adae <- tern_ex_adae
#' adae$AESEV <- factor(adae$AESEV)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_riskdiff(arm_x = "ARM A", arm_y = c("ARM B", "ARM C"))) %>%
#'   count_occurrences_by_grade(
#'     var = "AESEV",
#'     riskdiff = TRUE
#'   )
#'
#' tbl <- build_table(lyt, df = adae)
#' tbl
#'
#' @export
add_riskdiff <- function(arm_x,
                         arm_y,
                         col_label = paste0(
                           "Risk Difference (%) (95% CI)", if (length(arm_y) > 1) paste0("\n", arm_x, " vs. ", arm_y)
                         ),
                         pct = TRUE) {
  checkmate::assert_character(arm_x, len = 1)
  checkmate::assert_character(arm_y, min.len = 1)
  checkmate::assert_character(col_label, len = length(arm_y))

  combodf <- tibble::tribble(~valname, ~label, ~levelcombo, ~exargs)
  for (i in seq_len(length(arm_y))) {
    combodf <- rbind(
      combodf,
      tibble::tribble(
        ~valname, ~label, ~levelcombo, ~exargs,
        paste("riskdiff", arm_x, arm_y[i], sep = "_"), col_label[i], c(arm_x, arm_y[i]), list()
      )
    )
  }
  if (pct) combodf$valname <- paste0(combodf$valname, "_pct")
  add_combo_levels(combodf)
}

#' Analysis function to calculate risk difference column values
#'
#' In the risk difference column, this function uses the statistics function associated with `afun` to
#' calculates risk difference values from arm X (reference group) and arm Y. These arms are specified
#' when configuring the risk difference column which is done using the [add_riskdiff()] split function in
#' the previous call to [rtables::split_cols_by()]. For all other columns, applies `afun` as usual. This
#' function utilizes the [stat_propdiff_ci()] function to perform risk difference calculations.
#'
#' @inheritParams argument_convention
#' @param afun (named `list`)\cr a named list containing one name-value pair where the name corresponds to
#'   the name of the statistics function that should be used in calculations and the value is the corresponding
#'   analysis function.
#'
#' @return A list of formatted [rtables::CellValue()].
#'
#' @seealso
#' * [stat_propdiff_ci()] for details on risk difference calculation.
#' * Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()] with
#'   `riskdiff` argument set to `TRUE` in subsequent analyze functions calls, adds a risk difference column
#'   to a table layout.
#'
#' @keywords internal
afun_riskdiff <- function(df,
                          labelstr = "",
                          afun,
                          ...,
                          .stats = NULL,
                          .stat_names = NULL,
                          .formats = NULL,
                          .labels = NULL,
                          .indent_mods = NULL) {
  if (!any(grepl("riskdiff", names(.spl_context)))) {
    stop(
      "Please set up levels to use in risk difference calculations using the `add_riskdiff` ",
      "split function within `split_cols_by`. See ?add_riskdiff for details."
    )
  }
  checkmate::assert_list(afun, len = 1, types = "function")
  checkmate::assert_named(afun)

  sfun <- names(afun)
  dots_extra_args <- list(...)[intersect(names(list(...)), names(formals(sfun)))]
  extra_args <- list(
    .var = .var, .df_row = .df_row, .N_col = .N_col, .N_row = .N_row, .stats = .stats, .formats = .formats,
    .labels = .labels, .indent_mods = .indent_mods
  )
  cur_split <- tail(.spl_context$cur_col_split_val[[1]], 1)

  if (!grepl("^riskdiff", cur_split)) {
    # Apply basic afun (no risk difference) in all other columns
    do.call(afun[[1]], args = c(list(df = df, labelstr = labelstr), extra_args, dots_extra_args))
  } else {
    arm_x <- strsplit(cur_split, "_")[[1]][2]
    arm_y <- strsplit(cur_split, "_")[[1]][3]
    if (length(.spl_context$cur_col_split[[1]]) > 1) { # Different split name for nested column splits
      arm_spl_x <- gsub("riskdiff", "", paste0(strsplit(.spl_context$cur_col_id[1], "_")[[1]][c(1, 2)], collapse = ""))
      arm_spl_y <- gsub("riskdiff", "", paste0(strsplit(.spl_context$cur_col_id[1], "_")[[1]][c(1, 3)], collapse = ""))
    } else {
      arm_spl_x <- arm_x
      arm_spl_y <- arm_y
    }
    N_col_x <- .all_col_counts[[arm_spl_x]] # nolint
    N_col_y <- .all_col_counts[[arm_spl_y]] # nolint
    cur_var <- tail(.spl_context$cur_col_split[[1]], 1)

    # Apply statistics function to arm X and arm Y data
    s_args <- c(dots_extra_args, extra_args[intersect(setdiff(names(extra_args), ".N_col"), names(formals(sfun)))])
    s_x <- do.call(sfun, args = c(list(df = df[df[[cur_var]] == arm_x, ], .N_col = N_col_x), s_args))
    s_y <- do.call(sfun, args = c(list(df = df[df[[cur_var]] == arm_y, ], .N_col = N_col_y), s_args))

    # Get statistic name and row names
    stat <- ifelse("count_fraction" %in% names(s_x), "count_fraction", "unique")
    if ("flag_variables" %in% names(s_args)) {
      var_nms <- s_args$flag_variables
    } else if (is.list(s_x[[stat]]) && !is.null(names(s_x[[stat]]))) {
      var_nms <- names(s_x[[stat]])
    } else {
      var_nms <- ""
      s_x[[stat]] <- list(s_x[[stat]])
      s_y[[stat]] <- list(s_y[[stat]])
    }

    # Calculate risk difference for each row, repeated if multiple statistics in table
    pct <- tail(strsplit(cur_split, "_")[[1]], 1) == "pct"
    rd_ci <- rep(stat_propdiff_ci(
      lapply(s_x[[stat]], `[`, 1), lapply(s_y[[stat]], `[`, 1),
      N_col_x, N_col_y,
      list_names = var_nms,
      pct = pct
    ), max(1, length(.stats)))

    in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = .indent_mods)
  }
}

#' Control function for risk difference column
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters to use when generating a risk (proportion) difference column. Used as input to the
#' `riskdiff` parameter of [tabulate_rsp_subgroups()] and [tabulate_survival_subgroups()].
#'
#' @inheritParams add_riskdiff
#' @param format (`string` or `function`)\cr the format label (string) or formatting function to apply to the risk
#'   difference statistic. See the `3d` string options in [formatters::list_valid_format_labels()] for possible format
#'   strings. Defaults to `"xx.x (xx.x - xx.x)"`.
#'
#' @return A `list` of items with names corresponding to the arguments.
#'
#' @seealso [add_riskdiff()], [tabulate_rsp_subgroups()], and [tabulate_survival_subgroups()].
#'
#' @examples
#' control_riskdiff()
#' control_riskdiff(arm_x = "ARM A", arm_y = "ARM B")
#'
#' @export
control_riskdiff <- function(arm_x = NULL,
                             arm_y = NULL,
                             format = "xx.x (xx.x - xx.x)",
                             col_label = "Risk Difference (%) (95% CI)",
                             pct = TRUE) {
  checkmate::assert_character(arm_x, len = 1, null.ok = TRUE)
  checkmate::assert_character(arm_y, min.len = 1, null.ok = TRUE)
  checkmate::assert_character(format, len = 1)
  checkmate::assert_character(col_label)
  checkmate::assert_flag(pct)

  list(arm_x = arm_x, arm_y = arm_y, format = format, col_label = col_label, pct = pct)
}

#' Convert list of groups to a data frame
#'
#' This converts a list of group levels into a data frame format which is expected by [rtables::add_combo_levels()].
#'
#' @param groups_list (named `list` of `character`)\cr specifies the new group levels via the names and the
#'   levels that belong to it in the character vectors that are elements of the list.
#'
#' @return A `tibble` in the required format.
#'
#' @examples
#' grade_groups <- list(
#'   "Any Grade (%)" = c("1", "2", "3", "4", "5"),
#'   "Grade 3-4 (%)" = c("3", "4"),
#'   "Grade 5 (%)" = "5"
#' )
#' groups_list_to_df(grade_groups)
#'
#' @export
groups_list_to_df <- function(groups_list) {
  checkmate::assert_list(groups_list, names = "named")
  lapply(groups_list, checkmate::assert_character)
  tibble::tibble(
    valname = make_names(names(groups_list)),
    label = names(groups_list),
    levelcombo = unname(groups_list),
    exargs = replicate(length(groups_list), list())
  )
}

#' Reference and treatment group combination
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Facilitate the re-combination of groups divided as reference and treatment groups; it helps in arranging groups of
#' columns in the `rtables` framework and teal modules.
#'
#' @param fct (`factor`)\cr the variable with levels which needs to be grouped.
#' @param ref (`character`)\cr the reference level(s).
#' @param collapse (`string`)\cr a character string to separate `fct` and `ref`.
#'
#' @return A `list` with first item `ref` (reference) and second item `trt` (treatment).
#'
#' @examples
#' groups <- combine_groups(
#'   fct = DM$ARM,
#'   ref = c("B: Placebo")
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze_vars("AGE") %>%
#'   build_table(DM)
#'
#' @export
combine_groups <- function(fct,
                           ref = NULL,
                           collapse = "/") {
  checkmate::assert_string(collapse)
  checkmate::assert_character(ref, min.chars = 1, any.missing = FALSE, null.ok = TRUE)
  checkmate::assert_multi_class(fct, classes = c("factor", "character"))

  fct <- as_factor_keep_attributes(fct)

  group_levels <- levels(fct)
  if (is.null(ref)) {
    ref <- group_levels[1]
  } else {
    checkmate::assert_subset(ref, group_levels)
  }

  groups <- list(
    ref = group_levels[group_levels %in% ref],
    trt = group_levels[!group_levels %in% ref]
  )
  stats::setNames(groups, nm = lapply(groups, paste, collapse = collapse))
}

#' Split columns by groups of levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @inheritParams groups_list_to_df
#' @param ... additional arguments to [rtables::split_cols_by()] in order. For instance, to
#'   control formats (`format`), add a joint column for all groups (`incl_all`).
#'
#' @return A layout object suitable for passing to further layouting functions. Adding
#'   this function to an `rtable` layout will add a column split including the given
#'   groups to the table layout.
#'
#' @seealso [rtables::split_cols_by()]
#'
#' @examples
#' # 1 - Basic use
#'
#' # Without group combination `split_cols_by_groups` is
#' # equivalent to [rtables::split_cols_by()].
#' basic_table() %>%
#'   split_cols_by_groups("ARM") %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Add a reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", ref_group = "B: Placebo") %>%
#'   add_colcounts() %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff Mean" = rcell(NULL))
#'       } else {
#'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' # 2 - Adding group specification
#'
#' # Manual preparation of the groups.
#' groups <- list(
#'   "Arms A+B" = c("A: Drug X", "B: Placebo"),
#'   "Arms A+C" = c("A: Drug X", "C: Combination")
#' )
#'
#' # Use of split_cols_by_groups without reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Including differentiated output in the reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups_list = groups, ref_group = "Arms A+B") %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff. of Averages" = rcell(NULL))
#'       } else {
#'         in_rows("Diff. of Averages" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' # 3 - Binary list dividing factor levels into reference and treatment
#'
#' # `combine_groups` defines reference and treatment.
#' groups <- combine_groups(
#'   fct = DM$ARM,
#'   ref = c("A: Drug X", "B: Placebo")
#' )
#' groups
#'
#' # Use group definition without reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups_list = groups) %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Use group definition with reference column (first item of groups).
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups, ref_group = names(groups)[1]) %>%
#'   add_colcounts() %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff Mean" = rcell(NULL))
#'       } else {
#'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' @export
split_cols_by_groups <- function(lyt,
                                 var,
                                 groups_list = NULL,
                                 ref_group = NULL,
                                 ...) {
  if (is.null(groups_list)) {
    split_cols_by(
      lyt = lyt,
      var = var,
      ref_group = ref_group,
      ...
    )
  } else {
    groups_df <- groups_list_to_df(groups_list)
    if (!is.null(ref_group)) {
      ref_group <- groups_df$valname[groups_df$label == ref_group]
    }
    split_cols_by(
      lyt = lyt,
      var = var,
      split_fun = add_combo_levels(groups_df, keep_levels = groups_df$valname),
      ref_group = ref_group,
      ...
    )
  }
}

#' Combine counts
#'
#' Simplifies the estimation of column counts, especially when group combination is required.
#'
#' @inheritParams combine_groups
#' @inheritParams groups_list_to_df
#'
#' @return A `vector` of column counts.
#'
#' @seealso [combine_groups()]
#'
#' @examples
#' ref <- c("A: Drug X", "B: Placebo")
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
#'
#' col_counts <- combine_counts(
#'   fct = DM$ARM,
#'   groups_list = groups
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze_vars("AGE") %>%
#'   build_table(DM, col_counts = col_counts)
#'
#' ref <- "A: Drug X"
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
#' col_counts <- combine_counts(
#'   fct = DM$ARM,
#'   groups_list = groups
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze_vars("AGE") %>%
#'   build_table(DM, col_counts = col_counts)
#'
#' @export
combine_counts <- function(fct, groups_list = NULL) {
  checkmate::assert_multi_class(fct, classes = c("factor", "character"))

  fct <- as_factor_keep_attributes(fct)

  if (is.null(groups_list)) {
    y <- table(fct)
    y <- stats::setNames(as.numeric(y), nm = dimnames(y)[[1]])
  } else {
    y <- vapply(
      X = groups_list,
      FUN = function(x) sum(table(fct)[x]),
      FUN.VALUE = 1
    )
  }
  y
}

#' Control function for incidence rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for the incidence rate, used
#' internally to specify details in `s_incidence_rate()`.
#'
#' @inheritParams argument_convention
#' @param conf_type (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'   for confidence interval type.
#' @param input_time_unit (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'   indicating time unit for data input.
#' @param num_pt_year (`numeric(1)`)\cr number of patient-years to use when calculating adverse event rates.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @seealso [incidence_rate]
#'
#' @examples
#' control_incidence_rate(0.9, "exact", "month", 100)
#'
#' @export
control_incidence_rate <- function(conf_level = 0.95,
                                   conf_type = c("normal", "normal_log", "exact", "byar"),
                                   input_time_unit = c("year", "day", "week", "month"),
                                   num_pt_year = 100) {
  conf_type <- match.arg(conf_type)
  input_time_unit <- match.arg(input_time_unit)
  checkmate::assert_number(num_pt_year)
  assert_proportion_value(conf_level)

  list(
    conf_level = conf_level,
    conf_type = conf_type,
    input_time_unit = input_time_unit,
    num_pt_year = num_pt_year
  )
}

#' Helper functions for tabulating biomarker effects on survival by subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions which are documented here separately to not confuse the user
#' when reading about the user-facing functions.
#'
#' @inheritParams survival_biomarkers_subgroups
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = FALSE)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' @name h_survival_biomarkers_subgroups
NULL

#' @describeIn h_survival_biomarkers_subgroups Helps with converting the "survival" function variable list
#'   to the "Cox regression" variable list. The reason is that currently there is an inconsistency between the variable
#'   names accepted by `extract_survival_subgroups()` and `fit_coxreg_multivar()`.
#'
#' @param biomarker (`string`)\cr the name of the biomarker variable.
#'
#' @return
#' * `h_surv_to_coxreg_variables()` returns a named `list` of elements `time`, `event`, `arm`,
#'   `covariates`, and `strata`.
#'
#' @examples
#' # This is how the variable list is converted internally.
#' h_surv_to_coxreg_variables(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "EVNT",
#'     covariates = c("A", "B"),
#'     strata = "D"
#'   ),
#'   biomarker = "AGE"
#' )
#'
#' @export
h_surv_to_coxreg_variables <- function(variables, biomarker) {
  checkmate::assert_list(variables)
  checkmate::assert_string(variables$tte)
  checkmate::assert_string(variables$is_event)
  checkmate::assert_string(biomarker)
  list(
    time = variables$tte,
    event = variables$is_event,
    arm = biomarker,
    covariates = variables$covariates,
    strata = variables$strata
  )
}

#' @describeIn h_survival_biomarkers_subgroups Prepares estimates for number of events, patients and median survival
#'   times, as well as hazard ratio estimates, confidence intervals and p-values, for multiple biomarkers
#'   in a given single data set.
#'   `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
#'   `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables) and optionally `subgroups` and `strata`.
#'
#' @return
#' * `h_coxreg_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
#'
#' @examples
#' # For a single population, estimate separately the effects
#' # of two biomarkers.
#' df <- h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' # If the data set is empty, still the corresponding rows with missings are returned.
#' h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "REGION1",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f[NULL, ]
#' )
#'
#' @export
h_coxreg_mult_cont_df <- function(variables,
                                  data,
                                  control = control_coxreg()) {
  if ("strat" %in% names(variables)) {
    warning(
      "Warning: the `strat` element name of the `variables` list argument to `h_coxreg_mult_cont_df() ",
      "was deprecated in tern 0.9.4.\n  ",
      "Please use the name `strata` instead of `strat` in the `variables` argument."
    )
    variables[["strata"]] <- variables[["strat"]]
  }

  assert_df_with_variables(data, variables)
  checkmate::assert_list(control, names = "named")
  checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
  conf_level <- control[["conf_level"]]
  pval_label <- paste0(
    # the regex capitalizes the first letter of the string / senetence.
    "p-value (", gsub("(^[a-z])", "\\U\\1", trimws(control[["pval_method"]]), perl = TRUE), ")"
  )
  # If there is any data, run model, otherwise return empty results.
  if (nrow(data) > 0) {
    bm_cols <- match(variables$biomarkers, names(data))
    l_result <- lapply(variables$biomarkers, function(bm) {
      coxreg_list <- fit_coxreg_multivar(
        variables = h_surv_to_coxreg_variables(variables, bm),
        data = data,
        control = control
      )
      result <- do.call(
        h_coxreg_multivar_extract,
        c(list(var = bm), coxreg_list[c("mod", "data", "control")])
      )
      data_fit <- as.data.frame(as.matrix(coxreg_list$mod$y))
      data_fit$status <- as.logical(data_fit$status)
      median <- s_surv_time(
        df = data_fit,
        .var = "time",
        is_event = "status"
      )$median
      data.frame(
        # Dummy column needed downstream to create a nested header.
        biomarker = bm,
        biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
        n_tot = coxreg_list$mod$n,
        n_tot_events = coxreg_list$mod$nevent,
        median = as.numeric(median),
        result[1L, c("hr", "lcl", "ucl")],
        conf_level = conf_level,
        pval = result[1L, "pval"],
        pval_label = pval_label,
        stringsAsFactors = FALSE
      )
    })
    do.call(rbind, args = c(l_result, make.row.names = FALSE))
  } else {
    data.frame(
      biomarker = variables$biomarkers,
      biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
      n_tot = 0L,
      n_tot_events = 0L,
      median = NA,
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      pval = NA,
      pval_label = pval_label,
      row.names = seq_along(variables$biomarkers),
      stringsAsFactors = FALSE
    )
  }
}

#' Stack multiple grobs
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Stack grobs as a new grob with 1 column and multiple rows layout.
#'
#' @param ... grobs.
#' @param grobs (`list` of `grob`)\cr a list of grobs.
#' @param padding (`grid::unit`)\cr unit of length 1, space between each grob.
#' @param vp (`viewport` or `NULL`)\cr a [viewport()] object (or `NULL`).
#' @param name (`string`)\cr a character identifier for the grob.
#' @param gp (`gpar`)\cr a [gpar()] object.
#'
#' @return A `grob`.
#'
#' @examples
#' library(grid)
#'
#' g1 <- circleGrob(gp = gpar(col = "blue"))
#' g2 <- circleGrob(gp = gpar(col = "red"))
#' g3 <- textGrob("TEST TEXT")
#' grid.newpage()
#' grid.draw(stack_grobs(g1, g2, g3))
#'
#' showViewport()
#'
#' grid.newpage()
#' pushViewport(viewport(layout = grid.layout(1, 2)))
#' vp1 <- viewport(layout.pos.row = 1, layout.pos.col = 2)
#' grid.draw(stack_grobs(g1, g2, g3, vp = vp1, name = "test"))
#'
#' showViewport()
#' grid.ls(grobs = TRUE, viewports = TRUE, print = FALSE)
#'
#' @export
stack_grobs <- function(...,
                        grobs = list(...),
                        padding = grid::unit(2, "line"),
                        vp = NULL,
                        gp = NULL,
                        name = NULL) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "stack_grobs()",
    details = "`tern` plotting functions no longer generate `grob` objects."
  )

  checkmate::assert_true(
    all(vapply(grobs, grid::is.grob, logical(1)))
  )

  if (length(grobs) == 1) {
    return(grobs[[1]])
  }

  n_layout <- 2 * length(grobs) - 1
  hts <- lapply(
    seq(1, n_layout),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding
      }
    }
  )
  hts <- do.call(grid::unit.c, hts)

  main_vp <- grid::viewport(
    layout = grid::grid.layout(nrow = n_layout, ncol = 1, heights = hts)
  )

  nested_grobs <- Map(function(g, i) {
    grid::gTree(
      children = grid::gList(g),
      vp = grid::viewport(layout.pos.row = i, layout.pos.col = 1)
    )
  }, grobs, seq_along(grobs) * 2 - 1)

  grobs_mainvp <- grid::gTree(
    children = do.call(grid::gList, nested_grobs),
    vp = main_vp
  )

  grid::gTree(
    children = grid::gList(grobs_mainvp),
    vp = vp,
    gp = gp,
    name = name
  )
}

#' Arrange multiple grobs
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Arrange grobs as a new grob with `n * m (rows * cols)` layout.
#'
#' @inheritParams stack_grobs
#' @param ncol (`integer(1)`)\cr number of columns in layout.
#' @param nrow (`integer(1)`)\cr number of rows in layout.
#' @param padding_ht (`grid::unit`)\cr unit of length 1, vertical space between each grob.
#' @param padding_wt (`grid::unit`)\cr unit of length 1, horizontal space between each grob.
#'
#' @return A `grob`.
#'
#' @examples
#' library(grid)
#'
#' \donttest{
#' num <- lapply(1:9, textGrob)
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(grobs = num, ncol = 2))
#'
#' showViewport()
#'
#' g1 <- circleGrob(gp = gpar(col = "blue"))
#' g2 <- circleGrob(gp = gpar(col = "red"))
#' g3 <- textGrob("TEST TEXT")
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(g1, g2, g3, nrow = 2))
#'
#' showViewport()
#'
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 3))
#'
#' grid::grid.newpage()
#' grid::pushViewport(grid::viewport(layout = grid::grid.layout(1, 2)))
#' vp1 <- grid::viewport(layout.pos.row = 1, layout.pos.col = 2)
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 2, vp = vp1))
#'
#' showViewport()
#' }
#' @export
arrange_grobs <- function(...,
                          grobs = list(...),
                          ncol = NULL, nrow = NULL,
                          padding_ht = grid::unit(2, "line"),
                          padding_wt = grid::unit(2, "line"),
                          vp = NULL,
                          gp = NULL,
                          name = NULL) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "arrange_grobs()",
    details = "`tern` plotting functions no longer generate `grob` objects."
  )

  checkmate::assert_true(
    all(vapply(grobs, grid::is.grob, logical(1)))
  )

  if (length(grobs) == 1) {
    return(grobs[[1]])
  }

  if (is.null(ncol) && is.null(nrow)) {
    ncol <- 1
    nrow <- ceiling(length(grobs) / ncol)
  } else if (!is.null(ncol) && is.null(nrow)) {
    nrow <- ceiling(length(grobs) / ncol)
  } else if (is.null(ncol) && !is.null(nrow)) {
    ncol <- ceiling(length(grobs) / nrow)
  }

  if (ncol * nrow < length(grobs)) {
    stop("specififed ncol and nrow are not enough for arranging the grobs ")
  }

  if (ncol == 1) {
    return(stack_grobs(grobs = grobs, padding = padding_ht, vp = vp, gp = gp, name = name))
  }

  n_col <- 2 * ncol - 1
  n_row <- 2 * nrow - 1
  hts <- lapply(
    seq(1, n_row),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding_ht
      }
    }
  )
  hts <- do.call(grid::unit.c, hts)

  wts <- lapply(
    seq(1, n_col),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding_wt
      }
    }
  )
  wts <- do.call(grid::unit.c, wts)

  main_vp <- grid::viewport(
    layout = grid::grid.layout(nrow = n_row, ncol = n_col, widths = wts, heights = hts)
  )

  nested_grobs <- list()
  k <- 0
  for (i in seq(nrow) * 2 - 1) {
    for (j in seq(ncol) * 2 - 1) {
      k <- k + 1
      if (k <= length(grobs)) {
        nested_grobs <- c(
          nested_grobs,
          list(grid::gTree(
            children = grid::gList(grobs[[k]]),
            vp = grid::viewport(layout.pos.row = i, layout.pos.col = j)
          ))
        )
      }
    }
  }
  grobs_mainvp <- grid::gTree(
    children = do.call(grid::gList, nested_grobs),
    vp = main_vp
  )

  grid::gTree(
    children = grid::gList(grobs_mainvp),
    vp = vp,
    gp = gp,
    name = name
  )
}

#' Draw `grob`
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Draw grob on device page.
#'
#' @param grob (`grob`)\cr grid object.
#' @param newpage (`flag`)\cr draw on a new page.
#' @param vp (`viewport` or `NULL`)\cr a [viewport()] object (or `NULL`).
#'
#' @return A `grob`.
#'
#' @examples
#' library(dplyr)
#' library(grid)
#'
#' \donttest{
#' rect <- rectGrob(width = grid::unit(0.5, "npc"), height = grid::unit(0.5, "npc"))
#' rect %>% draw_grob(vp = grid::viewport(angle = 45))
#'
#' num <- lapply(1:10, textGrob)
#' num %>%
#'   arrange_grobs(grobs = .) %>%
#'   draw_grob()
#' showViewport()
#' }
#'
#' @export
draw_grob <- function(grob, newpage = TRUE, vp = NULL) {
  lifecycle::deprecate_warn(
    "0.9.4",
    "draw_grob()",
    details = "`tern` plotting functions no longer generate `grob` objects."
  )

  if (newpage) {
    grid::grid.newpage()
  }
  if (!is.null(vp)) {
    grid::pushViewport(vp)
  }
  grid::grid.draw(grob)
}

tern_grob <- function(x) {
  class(x) <- unique(c("ternGrob", class(x)))
  x
}

#' @keywords internal
print.ternGrob <- function(x, ...) {
  grid::grid.newpage()
  grid::grid.draw(x)
}

#' Survival time point analysis
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [surv_timepoint()] creates a layout element to analyze patient survival rates and difference
#' of survival rates between groups at a given time point. The primary analysis variable `vars` is the time variable.
#' Other required inputs are `time_point`, the numeric time point of interest, and `is_event`, a variable that
#' indicates whether or not an event has occurred. The `method` argument is used to specify whether you want to analyze
#' survival estimations (`"surv"`), difference in survival with the control (`"surv_diff"`), or both of these
#' (`"both"`).
#'
#' @inheritParams argument_convention
#' @inheritParams s_surv_time
#' @param time_point (`numeric(1)`)\cr survival time point of interest.
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_surv_timepoint()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
#'     see more in [survival::survfit()]. Note option "none" is no longer supported.
#' @param method (`string`)\cr `"surv"` (survival estimations), `"surv_diff"` (difference in survival with the
#'   control), or `"both"`.
#' @param table_names_suffix (`string`)\cr optional suffix for the `table_names` used for the `rtables` to
#'   avoid warnings from duplicate table names.
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("surv_timepoint"), type = "sh")``
#'
#' @name survival_timepoint
#' @order 1
NULL

#' @describeIn survival_timepoint Statistics function which analyzes survival rate.
#'
#' @return
#' * `s_surv_timepoint()` returns the statistics:
#'   * `pt_at_risk`: Patients remaining at risk.
#'   * `event_free_rate`: Event-free rate (%).
#'   * `rate_se`: Standard error of event free rate.
#'   * `rate_ci`: Confidence interval for event free rate.
#'   * `event_free_rate_3d`: Event-free rate (%) with Confidence interval.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#'
#' s_surv_timepoint(
#'   df = subset(adtte_f, ARMCD == "ARM A"),
#'   .var = "AVAL",
#'   is_event = "is_event",
#'   time_point = c(10),
#'   control = control_surv_timepoint()
#' )
#'
#' @export
s_surv_timepoint <- function(df,
                             .var,
                             time_point,
                             is_event,
                             control = control_surv_timepoint(),
                             ...) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_number(time_point)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = conf_level,
    conf.type = conf_type
  )
  s_srv_fit <- summary(srv_fit, times = time_point, extend = TRUE)
  df_srv_fit <- as.data.frame(s_srv_fit[c("time", "n.risk", "surv", "lower", "upper", "std.err")])
  if (df_srv_fit[["n.risk"]] == 0) {
    pt_at_risk <- event_free_rate <- rate_se <- NA_real_
    rate_ci <- c(NA_real_, NA_real_)
  } else {
    pt_at_risk <- df_srv_fit$n.risk
    event_free_rate <- df_srv_fit$surv
    rate_se <- df_srv_fit$std.err
    rate_ci <- c(df_srv_fit$lower, df_srv_fit$upper)
  }
  event_free_rate_3d <- c(event_free_rate, rate_ci)
  list(
    pt_at_risk = formatters::with_label(pt_at_risk, "Patients remaining at risk"),
    event_free_rate = formatters::with_label(event_free_rate * 100, "Event Free Rate (%)"),
    rate_se = formatters::with_label(rate_se * 100, "Standard Error of Event Free Rate"),
    rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level)),
    event_free_rate_3d = formatters::with_label(
      event_free_rate_3d * 100, paste0("Event Free Rate (", f_conf_level(conf_level), ")")
    )
  )
}

#' @describeIn survival_timepoint Statistics function which analyzes difference between two survival rates.
#'
#' @return
#' * `s_surv_timepoint_diff()` returns the statistics:
#'   * `rate_diff`: Event-free rate difference between two groups.
#'   * `rate_diff_ci`: Confidence interval for the difference.
#'   * `rate_diff_ci_3d`: Event-free rate difference and confidence interval between two groups.
#'   * `ztest_pval`: p-value to test the difference is 0.
#'
#' @keywords internal
s_surv_timepoint_diff <- function(df,
                                  .var,
                                  .ref_group,
                                  .in_ref_col,
                                  time_point,
                                  control = control_surv_timepoint(),
                                  ...) {
  if (.in_ref_col) {
    return(
      list(
        rate_diff = formatters::with_label(numeric(), "Difference in Event Free Rate"),
        rate_diff_ci = formatters::with_label(numeric(), f_conf_level(control$conf_level)),
        rate_diff_ci_3d = formatters::with_label(
          numeric(), paste0("Difference in Event Free Rate", f_conf_level(control$conf_level))
        ),
        ztest_pval = formatters::with_label(numeric(), "p-value (Z-test)")
      )
    )
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))
  res_per_group <- lapply(split(data, group), function(x) {
    s_surv_timepoint(df = x, .var = .var, time_point = time_point, control = control, ...)
  })

  res_x <- res_per_group[[2]]
  res_ref <- res_per_group[[1]]
  rate_diff <- res_x$event_free_rate - res_ref$event_free_rate
  se_diff <- sqrt(res_x$rate_se^2 + res_ref$rate_se^2)

  qs <- c(-1, 1) * stats::qnorm(1 - (1 - control$conf_level) / 2)
  rate_diff_ci <- rate_diff + qs * se_diff
  rate_diff_ci_3d <- c(rate_diff, rate_diff_ci)
  ztest_pval <- if (is.na(rate_diff)) {
    NA
  } else {
    2 * (1 - stats::pnorm(abs(rate_diff) / se_diff))
  }
  list(
    rate_diff = formatters::with_label(rate_diff, "Difference in Event Free Rate"),
    rate_diff_ci = formatters::with_label(rate_diff_ci, f_conf_level(control$conf_level)),
    rate_diff_ci_3d = formatters::with_label(
      rate_diff_ci_3d, paste0("Difference in Event Free Rate", f_conf_level(control$conf_level))
    ),
    ztest_pval = formatters::with_label(ztest_pval, "p-value (Z-test)")
  )
}

#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`.
#'
#' @return
#' * `a_surv_timepoint()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_surv_timepoint <- function(df,
                             ...,
                             .stats = NULL,
                             .stat_names = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  # Check for additional parameters to the statistics function
  dots_extra_args <- list(...)
  extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
  dots_extra_args$.additional_fun_parameters <- NULL
  method <- dots_extra_args$method

  # Check for user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Apply statistics function
  x_stats <- .apply_stat_functions(
    default_stat_fnc = if (method == "surv") s_surv_timepoint else s_surv_timepoint_diff,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      df = list(df),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in formatting defaults
  .stats <- get_stats(if (method == "surv") "surv_timepoint" else "surv_timepoint_diff",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )
  x_stats <- x_stats[.stats]
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(
    .stats, .labels,
    tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels)
  )
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)

  # Get and check statistical names
  .stat_names <- get_stat_names(x_stats, .stat_names)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels %>% .unlist_keep_nulls(),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn survival_timepoint Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `surv_timepoint()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_surv_timepoint()` and/or `s_surv_timepoint_diff()` to the table layout depending on
#'   the value of `method`.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#'
#' # Survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 7
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' # Difference in survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 9,
#'     method = "surv_diff",
#'     .indent_mods = c("rate_diff" = 0L, "rate_diff_ci" = 2L, "ztest_pval" = 2L)
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' # Survival and difference in survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 9,
#'     method = "both"
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
#' @order 2
surv_timepoint <- function(lyt,
                           vars,
                           time_point,
                           is_event,
                           control = control_surv_timepoint(),
                           method = c("surv", "surv_diff", "both"),
                           na_str = default_na_str(),
                           nested = TRUE,
                           ...,
                           table_names_suffix = "",
                           var_labels = "Time",
                           show_labels = "visible",
                           .stats = c(
                             "pt_at_risk", "event_free_rate", "rate_ci",
                             "rate_diff", "rate_diff_ci", "ztest_pval"
                           ),
                           .stat_names = NULL,
                           .formats = list(rate_ci = "(xx.xx, xx.xx)"),
                           .labels = NULL,
                           .indent_mods = if (method == "both") {
                             c(rate_diff = 1L, rate_diff_ci = 2L, ztest_pval = 2L)
                           } else {
                             c(rate_diff_ci = 1L, ztest_pval = 1L)
                           }) {
  method <- match.arg(method)
  checkmate::assert_string(table_names_suffix)

  # Process standard extra arguments
  extra_args <- list(".stats" = .stats)
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Process additional arguments to the statistic function
  extra_args <- c(
    extra_args,
    time_point = list(time_point), is_event = is_event, control = list(control),
    ...
  )

  # Append additional info from layout to the analysis function
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_surv_timepoint) <- c(formals(a_surv_timepoint), extra_args[[".additional_fun_parameters"]])

  for (i in seq_along(time_point)) {
    extra_args[["time_point"]] <- time_point[i]

    if (method %in% c("surv", "both")) {
      extra_args_i <- extra_args
      extra_args_i[["method"]] <- "surv"

      lyt <- analyze(
        lyt = lyt,
        vars = vars,
        afun = a_surv_timepoint,
        na_str = na_str,
        nested = nested,
        extra_args = extra_args_i,
        var_labels = paste(time_point[i], var_labels),
        show_labels = show_labels,
        table_names = paste0("surv_", time_point[i], table_names_suffix)
      )
    }

    if (method %in% c("surv_diff", "both")) {
      extra_args_i <- extra_args
      extra_args_i[["method"]] <- "surv_diff"

      lyt <- analyze(
        lyt = lyt,
        vars = vars,
        afun = a_surv_timepoint,
        na_str = na_str,
        nested = nested,
        extra_args = extra_args_i,
        var_labels = paste(time_point[i], var_labels),
        show_labels = ifelse(method == "both", "hidden", show_labels),
        table_names = paste0("surv_diff_", time_point[i], table_names_suffix)
      )
    }
  }

  lyt
}

#' Control function for subgroup treatment effect pattern (STEP) calculations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for STEP calculations.
#'
#' @param biomarker (`numeric` or `NULL`)\cr optional provision of the numeric biomarker variable, which
#'   could be used to infer `bandwidth`, see below.
#' @param use_percentile (`flag`)\cr if `TRUE`, the running windows are created according to
#'   quantiles rather than actual values, i.e. the bandwidth refers to the percentage of data
#'   covered in each window. Suggest `TRUE` if the biomarker variable is not uniformly
#'   distributed.
#' @param bandwidth (`numeric(1)` or `NULL`)\cr indicating the bandwidth of each window.
#'   Depending on the argument `use_percentile`, it can be either the length of actual-value
#'   windows on the real biomarker scale, or percentage windows.
#'   If `use_percentile = TRUE`, it should be a number between 0 and 1.
#'   If `NULL`, treat the bandwidth to be infinity, which means only one global model will be fitted.
#'   By default, `0.25` is used for percentage windows and one quarter of the range of the `biomarker`
#'   variable for actual-value windows.
#' @param degree (`integer(1)`)\cr the degree of polynomial function of the biomarker as an interaction term
#'   with the treatment arm fitted at each window. If 0 (default), then the biomarker variable
#'   is not included in the model fitted in each biomarker window.
#' @param num_points (`integer(1)`)\cr the number of points at which the hazard ratios are estimated. The
#'   smallest number is 2.
#'
#' @return A list of components with the same names as the arguments, except `biomarker` which is
#'   just used to calculate the `bandwidth` in case that actual biomarker windows are requested.
#'
#' @examples
#' # Provide biomarker values and request actual values to be used,
#' # so that bandwidth is chosen from range.
#' control_step(biomarker = 1:10, use_percentile = FALSE)
#'
#' # Use a global model with quadratic biomarker interaction term.
#' control_step(bandwidth = NULL, degree = 2)
#'
#' # Reduce number of points to be used.
#' control_step(num_points = 10)
#'
#' @export
control_step <- function(biomarker = NULL,
                         use_percentile = TRUE,
                         bandwidth,
                         degree = 0L,
                         num_points = 39L) {
  checkmate::assert_numeric(biomarker, null.ok = TRUE)
  checkmate::assert_flag(use_percentile)
  checkmate::assert_int(num_points, lower = 2)
  checkmate::assert_count(degree)

  if (missing(bandwidth)) {
    # Infer bandwidth
    bandwidth <- if (use_percentile) {
      0.25
    } else if (!is.null(biomarker)) {
      diff(range(biomarker, na.rm = TRUE)) / 4
    } else {
      NULL
    }
  } else {
    # Check bandwidth
    if (!is.null(bandwidth)) {
      if (use_percentile) {
        assert_proportion_value(bandwidth)
      } else {
        checkmate::assert_scalar(bandwidth)
        checkmate::assert_true(bandwidth > 0)
      }
    }
  }
  list(
    use_percentile = use_percentile,
    bandwidth = bandwidth,
    degree = as.integer(degree),
    num_points = as.integer(num_points)
  )
}

#' Sort pharmacokinetic data by `PARAM` variable
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param pk_data (`data.frame`)\cr pharmacokinetic data frame.
#' @param key_var (`string`)\cr key variable used to merge pk_data and metadata created by [d_pkparam()].
#'
#' @return A pharmacokinetic `data.frame` sorted by a `PARAM` variable.
#'
#' @examples
#' library(dplyr)
#'
#' adpp <- tern_ex_adpp %>% mutate(PKPARAM = factor(paste0(PARAM, " (", AVALU, ")")))
#' pk_ordered_data <- h_pkparam_sort(adpp)
#'
#' @export
h_pkparam_sort <- function(pk_data, key_var = "PARAMCD") {
  assert_df_with_variables(pk_data, list(key_var = key_var))
  pk_data$PARAMCD <- pk_data[[key_var]]

  ordered_pk_data <- d_pkparam()

  # Add the numeric values from ordered_pk_data to pk_data
  joined_data <- merge(pk_data, ordered_pk_data, by = "PARAMCD", suffixes = c("", ".y"))

  joined_data <- joined_data[, -grep(".*.y$", colnames(joined_data))]

  joined_data$TLG_ORDER <- as.numeric(joined_data$TLG_ORDER)

  # Then order PARAM based on this column
  joined_data$PARAM <- factor(joined_data$PARAM,
    levels = unique(joined_data$PARAM[order(joined_data$TLG_ORDER)]),
    ordered = TRUE
  )

  joined_data$TLG_DISPLAY <- factor(joined_data$TLG_DISPLAY,
    levels = unique(joined_data$TLG_DISPLAY[order(joined_data$TLG_ORDER)]),
    ordered = TRUE
  )

  joined_data
}

#' Count number of patients with missed doses by thresholds
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function creates a layout element to calculate cumulative counts of patients with number of missed
#' doses at least equal to user-specified threshold values.
#'
#' This function analyzes numeric variable `vars`, a variable with numbers of missed doses,
#' against the threshold values supplied to the `thresholds` argument as a numeric vector. This function
#' assumes that every row of the given data frame corresponds to a unique patient.
#'
#' @inheritParams s_count_cumulative
#' @inheritParams argument_convention
#' @param thresholds (`numeric`)\cr minimum number of missed doses the patients had.
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'   Options are: ``r shQuote(get_stats("count_missed_doses"), type = "sh")``
#'
#' @seealso
#' * Relevant description function [d_count_missed_doses()] which generates labels for [count_missed_doses()].
#' * Similar analyze function [count_cumulative()] which more generally counts cumulative values and has more
#'   options for threshold handling, but uses different labels.
#'
#' @name count_missed_doses
#' @order 1
NULL

#' Description function that calculates labels for `s_count_missed_doses()`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams s_count_missed_doses
#'
#' @return [d_count_missed_doses()] returns a named `character` vector with the labels.
#'
#' @seealso [s_count_missed_doses()]
#'
#' @export
d_count_missed_doses <- function(thresholds) {
  paste0("At least ", thresholds, " missed dose", ifelse(thresholds > 1, "s", ""))
}

#' @describeIn count_missed_doses Statistics function to count patients with missed doses.
#'
#' @return
#' * `s_count_missed_doses()` returns the statistics `n` and `count_fraction` with one element for each threshold.
#'
#' @keywords internal
s_count_missed_doses <- function(x,
                                 thresholds,
                                 .N_col, # nolint
                                 .N_row, # nolint
                                 denom = c("N_col", "n", "N_row"),
                                 ...) {
  stat <- s_count_cumulative(
    x = x,
    thresholds = thresholds,
    lower_tail = FALSE,
    include_eq = TRUE,
    .N_col = .N_col,
    .N_row = .N_row,
    denom = denom,
    ...
  )
  labels <- d_count_missed_doses(thresholds)
  for (i in seq_along(stat$count_fraction)) {
    stat$count_fraction[[i]] <- formatters::with_label(stat$count_fraction[[i]], label = labels[i])
  }

  c(list(n = n_available(x)), stat)
}

#' @describeIn count_missed_doses Formatted analysis function which is used as `afun`
#'   in `count_missed_doses()`.
#'
#' @return
#' * `a_count_missed_doses()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_missed_doses <- function(x,
                                 ...,
                                 .stats = NULL,
                                 .stat_names = NULL,
                                 .formats = NULL,
                                 .labels = NULL,
                                 .indent_mods = NULL) {
  dots_extra_args <- list(...)

  # Check if there are user-defined functions
  default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
  .stats <- default_and_custom_stats_list$all_stats
  custom_stat_functions <- default_and_custom_stats_list$custom_stats

  # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
  extra_afun_params <- retrieve_extra_afun_params(
    names(dots_extra_args$.additional_fun_parameters)
  )
  dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore

  # Main statistical functions application
  x_stats <- .apply_stat_functions(
    default_stat_fnc = s_count_missed_doses,
    custom_stat_fnc_list = custom_stat_functions,
    args_list = c(
      x = list(x),
      extra_afun_params,
      dots_extra_args
    )
  )

  # Fill in with stats defaults if needed
  .stats <- get_stats("count_missed_doses",
    stats_in = .stats,
    custom_stats_in = names(custom_stat_functions)
  )

  x_stats <- x_stats[.stats]
  levels_per_stats <- lapply(x_stats, names)

  # Fill in formats/indents/labels with custom input and defaults
  .formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)
  .labels <- get_labels_from_stats(
    .stats, .labels, levels_per_stats,
    label_attr_from_stats = sapply(.unlist_keep_nulls(x_stats), attr, "label")
  )

  # Unlist stats
  x_stats <- x_stats %>%
    .unlist_keep_nulls() %>%
    setNames(names(.formats))

  # Auto format handling
  .formats <- apply_auto_formatting(
    .formats,
    x_stats,
    extra_afun_params$.df_row,
    extra_afun_params$.var
  )

  # Get and check statistical names from defaults
  .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = names(.labels),
    .stat_names = .stat_names,
    .labels = .labels %>% .unlist_keep_nulls(),
    .indent_mods = .indent_mods %>% .unlist_keep_nulls()
  )
}

#' @describeIn count_missed_doses Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_missed_doses()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_missed_doses()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' anl <- tern_ex_adsl %>%
#'   distinct(STUDYID, USUBJID, ARM) %>%
#'   mutate(
#'     PARAMCD = "TNDOSMIS",
#'     PARAM = "Total number of missed doses during study",
#'     AVAL = sample(0:20, size = nrow(tern_ex_adsl), replace = TRUE),
#'     AVALC = ""
#'   )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_missed_doses("AVAL", thresholds = c(1, 5, 10, 15), var_labels = "Missed Doses") %>%
#'   build_table(anl, alt_counts_df = tern_ex_adsl)
#'
#' @export
#' @order 2
count_missed_doses <- function(lyt,
                               vars,
                               thresholds,
                               var_labels = vars,
                               show_labels = "visible",
                               na_str = default_na_str(),
                               nested = TRUE,
                               table_names = vars,
                               ...,
                               na_rm = TRUE,
                               .stats = c("n", "count_fraction"),
                               .stat_names = NULL,
                               .formats = NULL,
                               .labels = NULL,
                               .indent_mods = NULL) {
  # Depending on main functions
  extra_args <- list(
    "na_rm" = na_rm,
    "thresholds" = thresholds,
    ...
  )

  # Needed defaults
  if (!is.null(.stats)) extra_args[[".stats"]] <- .stats
  if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
  extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
  formals(a_count_missed_doses) <- c(
    formals(a_count_missed_doses),
    extra_args[[".additional_fun_parameters"]]
  )

  # Main {rtables} structural call
  analyze(
    lyt,
    vars,
    afun = a_count_missed_doses,
    na_str = na_str,
    inclNAs = !na_rm,
    table_names = table_names,
    var_labels = var_labels,
    show_labels = show_labels,
    nested = nested,
    extra_args = extra_args
  )
}

#' Helper function for deriving analysis datasets for select laboratory tables
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function that merges ADSL and ADLB datasets so that missing lab test records are inserted in the
#' output dataset. Remember that `na_level` must match the needed pre-processing
#' done with [df_explicit_na()] to have the desired output.
#'
#' @param adsl (`data.frame`)\cr ADSL data frame.
#' @param adlb (`data.frame`)\cr ADLB data frame.
#' @param worst_flag (named `character`)\cr worst post-baseline lab flag variable. See how this is implemented in the
#'   following examples.
#' @param by_visit (`flag`)\cr defaults to `FALSE` to generate worst grade per patient.
#'   If worst grade per patient per visit is specified for `worst_flag`, then
#'   `by_visit` should be `TRUE` to generate worst grade patient per visit.
#' @param no_fillin_visits (named `character`)\cr visits that are not considered for post-baseline worst toxicity
#'   grade. Defaults to `c("SCREENING", "BASELINE")`.
#'
#' @return `df` containing variables shared between `adlb` and `adsl` along with variables `PARAM`, `PARAMCD`,
#'   `ATOXGR`, and `BTOXGR` relevant for analysis. Optionally, `AVISIT` are `AVISITN` are included when
#'   `by_visit = TRUE` and `no_fillin_visits = c("SCREENING", "BASELINE")`.
#'
#' @details In the result data missing records will be created for the following situations:
#'   * Patients who are present in `adsl` but have no lab data in `adlb` (both baseline and post-baseline).
#'   * Patients who do not have any post-baseline lab values.
#'   * Patients without any post-baseline values flagged as the worst.
#'
#' @examples
#' # `h_adsl_adlb_merge_using_worst_flag`
#' adlb_out <- h_adsl_adlb_merge_using_worst_flag(
#'   tern_ex_adsl,
#'   tern_ex_adlb,
#'   worst_flag = c("WGRHIFL" = "Y")
#' )
#'
#' # `h_adsl_adlb_merge_using_worst_flag` by visit example
#' adlb_out_by_visit <- h_adsl_adlb_merge_using_worst_flag(
#'   tern_ex_adsl,
#'   tern_ex_adlb,
#'   worst_flag = c("WGRLOVFL" = "Y"),
#'   by_visit = TRUE
#' )
#'
#' @export
h_adsl_adlb_merge_using_worst_flag <- function(adsl,
                                               adlb,
                                               worst_flag = c("WGRHIFL" = "Y"),
                                               by_visit = FALSE,
                                               no_fillin_visits = c("SCREENING", "BASELINE")) {
  col_names <- names(worst_flag)
  filter_values <- worst_flag

  temp <- Map(
    function(x, y) which(adlb[[x]] == y),
    col_names,
    filter_values
  )

  position_satisfy_filters <- Reduce(intersect, temp)

  adsl_adlb_common_columns <- intersect(colnames(adsl), colnames(adlb))
  columns_from_adlb <- c("USUBJID", "PARAM", "PARAMCD", "AVISIT", "AVISITN", "ATOXGR", "BTOXGR")

  adlb_f <- adlb[position_satisfy_filters, ] %>%
    dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits)
  adlb_f <- adlb_f[, columns_from_adlb]

  avisits_grid <- adlb %>%
    dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) %>%
    dplyr::pull(.data[["AVISIT"]]) %>%
    unique()

  if (by_visit) {
    adsl_lb <- expand.grid(
      USUBJID = unique(adsl$USUBJID),
      AVISIT = avisits_grid,
      PARAMCD = unique(adlb$PARAMCD)
    )

    adsl_lb <- adsl_lb %>%
      dplyr::left_join(unique(adlb[c("AVISIT", "AVISITN")]), by = "AVISIT") %>%
      dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")

    adsl1 <- adsl[, adsl_adlb_common_columns]
    adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")

    by_variables_from_adlb <- c("USUBJID", "AVISIT", "AVISITN", "PARAMCD", "PARAM")

    adlb_btoxgr <- adlb %>%
      dplyr::select(c("USUBJID", "PARAMCD", "BTOXGR")) %>%
      unique() %>%
      dplyr::rename("BTOXGR_MAP" = "BTOXGR")

    adlb_out <- merge(
      adlb_f,
      adsl_lb,
      by = by_variables_from_adlb,
      all = TRUE,
      sort = FALSE
    )
    adlb_out <- adlb_out %>%
      dplyr::left_join(adlb_btoxgr, by = c("USUBJID", "PARAMCD")) %>%
      dplyr::mutate(BTOXGR = .data$BTOXGR_MAP) %>%
      dplyr::select(-"BTOXGR_MAP")

    adlb_var_labels <- c(
      formatters::var_labels(adlb[by_variables_from_adlb]),
      formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
      formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
    )
  } else {
    adsl_lb <- expand.grid(
      USUBJID = unique(adsl$USUBJID),
      PARAMCD = unique(adlb$PARAMCD)
    )

    adsl_lb <- adsl_lb %>% dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")

    adsl1 <- adsl[, adsl_adlb_common_columns]
    adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")

    by_variables_from_adlb <- c("USUBJID", "PARAMCD", "PARAM")

    adlb_out <- merge(
      adlb_f,
      adsl_lb,
      by = by_variables_from_adlb,
      all = TRUE,
      sort = FALSE
    )

    adlb_var_labels <- c(
      formatters::var_labels(adlb[by_variables_from_adlb]),
      formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
      formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
    )
  }

  adlb_out$ATOXGR <- as.factor(adlb_out$ATOXGR)
  adlb_out$BTOXGR <- as.factor(adlb_out$BTOXGR)

  formatters::var_labels(adlb_out) <- adlb_var_labels

  adlb_out
}

#' Generate PK reference dataset
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @return A `data.frame` of PK parameters.
#'
#' @examples
#' pk_reference_dataset <- d_pkparam()
#'
#' @export
d_pkparam <- function() {
  pk_dataset <- as.data.frame(matrix(
    c(
      "TMAX", "Time of CMAX", "Tmax", "Plasma/Blood/Serum", "1",
      "CMAX", "Max Conc", "Cmax", "Plasma/Blood/Serum", "2",
      "CMAXD", "Max Conc Norm by Dose", "Cmax/D", "Plasma/Blood/Serum", "3",
      "AUCIFO", "AUC Infinity Obs", "AUCinf obs", "Plasma/Blood/Serum", "4",
      "AUCIFP", "AUC Infinity Pred", "AUCinf pred", "Plasma/Blood/Serum", "5",
      "AUCIFOD", "AUC Infinity Obs Norm by Dose", "AUCinf/D obs", "Plasma/Blood/Serum", "6",
      "AUCIFD", "AUC Infinity Pred Norm by Dose", "AUCinf/D pred", "Plasma/Blood/Serum", "7",
      "AUCPEO", "AUC %Extrapolation Obs", "AUCinf extrap obs", "Plasma/Blood/Serum", "8",
      "AUCPEP", "AUC %Extrapolation Pred", "AUCinf extrap pred", "Plasma/Blood/Serum", "9",
      "AUCINT", "AUC from T1 to T2", "AUCupper-lower ", "Plasma/Blood/Serum", "10",
      "AUCTAU", "AUC Over Dosing Interval", "AUCtau", "Plasma/Blood/Serum", "11",
      "AUCLST", "AUC to Last Nonzero Conc", "AUClast", "Plasma/Blood/Serum", "12",
      "AUCALL", "AUC All", "AUCall", "Plasma/Blood/Serum", "13",
      "AUMCIFO", "AUMC Infinity Obs", "AUMCinf obs", "Plasma/Blood/Serum", "14",
      "AUMCIFP", "AUMC Infinity Pred", "AUMCinf pred", "Plasma/Blood/Serum", "15",
      "AUMCPEO", "AUMC % Extrapolation Obs", "AUMC extrap obs", "Plasma/Blood/Serum", "16",
      "AUMCPEP", "AUMC % Extrapolation Pred", "AUMC extrap pred", "Plasma/Blood/Serum", "17",
      "AUMCTAU", "AUMC Over Dosing Interval", "AUMCtau", "Plasma/Blood/Serum", "18",
      "AUMCLST", "AUMC to Last Nonzero Conc", "AUMClast", "Plasma/Blood/Serum", "19",
      "AURCIFO", "AURC Infinity Obs", "AURCinf obs", "Plasma/Blood/Serum", "20",
      "AURCIFP", "AURC Infinity Pred", "AURCinf pred", "Plasma/Blood/Serum", "21",
      "AURCPEO", "AURC % Extrapolation Obs", "AURC extrap obs", "Plasma/Blood/Serum", "22",
      "AURCPEP", "AURC % Extrapolation Pred", "AURC extrap pred", "Plasma/Blood/Serum", "23",
      "AURCLST", "AURC Dosing to Last Conc", "AURClast", "Plasma/Blood/Serum", "24",
      "AURCALL", "AURC All", "AURCall", "Plasma/Blood/Serum", "25",
      "TLST", "Time of Last Nonzero Conc", "Tlast", "Plasma/Blood/Serum", "26",
      "CO", "Initial Conc", "CO", "Plasma/Blood/Serum", "27",
      "C0", "Initial Conc", "C0", "Plasma/Blood/Serum", "28",
      "CAVG", "Average Conc", "Cavg", "Plasma/Blood/Serum", "29",
      "CLST", "Last Nonzero Conc", "Clast", "Plasma/Blood/Serum", "30",
      "CMIN", "Min Conc", "Cmin", "Plasma/Blood/Serum", "31",
      "LAMZHL", "Half-Life Lambda z", "t1/2", "Plasma/Blood/Serum", "32",
      "CLFO", "Total CL Obs by F", "CL/F obs", "Plasma/Blood/Serum", "33",
      "CLFP", "Total CL Pred by F", "CL/F pred", "Plasma/Blood/Serum", "34",
      "CLO", "Total CL Obs", "CL obs", "Plasma/Blood/Serum", "35",
      "CLP", "Total CL Pred", "CL pred", "Plasma/Blood/Serum", "36",
      "CLSS", "Total CL Steady State Pred", "CLss", "Plasma/Blood/Serum", "37",
      "CLSSF", "Total CL Steady State Pred by F", "CLss/F", "Plasma/Blood/Serum", "38",
      "VZFO", "Vz Obs by F", "Vz/F obs", "Plasma/Blood/Serum", "39",
      "VZFP", "Vz Pred by F", "Vz/F pred", "Plasma/Blood/Serum", "40",
      "VZO", "Vz Obs", "Vz obs", "Plasma/Blood/Serum", "41",
      "VZP", "Vz Pred", "Vz pred", "Plasma/Blood/Serum", "42",
      "VSSO", "Vol Dist Steady State Obs", "Vss obs", "Plasma/Blood/Serum", "43",
      "VSSP", "Vol Dist Steady State Pred", "Vss pred", "Plasma/Blood/Serum", "44",
      "LAMZ", "Lambda z", "Lambda z", "Plasma/Blood/Serum", "45",
      "LAMZLL", "Lambda z Lower Limit", "Lambda z lower", "Plasma/Blood/Serum", "46",
      "LAMZUL", "Lambda z Upper Limit", "Lambda z upper", "Plasma/Blood/Serum", "47",
      "LAMZNPT", "Number of Points for Lambda z", "No points Lambda z", "Plasma/Blood/Serum", "48",
      "MRTIFO", "MRT Infinity Obs", "MRTinf obs", "Plasma/Blood/Serum", "49",
      "MRTIFP", "MRT Infinity Pred", "MRTinf pred", "Plasma/Blood/Serum", "50",
      "MRTLST", "MRT to Last Nonzero Conc", "MRTlast", "Plasma/Blood/Serum", "51",
      "R2", "R Squared", "Rsq", "Plasma/Blood/Serum", "52",
      "R2ADJ", "R Squared Adjusted", "Rsq adjusted", "Plasma/Blood/Serum", "53",
      "TLAG", "Time Until First Nonzero Conc", "TIag", "Plasma/Blood/Serum", "54",
      "TMIN", "Time of CMIN Observation", "Tmin", "Plasma/Blood/Serum", "55",
      "ACCI", "Accumulation Index", "Accumulation Index", "Plasma/Blood/Serum/Urine", "56",
      "FLUCP", "Fluctuation%", "Fluctuation", "Plasma/Blood/Serum", "57",
      "CORRXY", "Correlation Between TimeX and Log ConcY", "Corr xy", "Plasma/Blood/Serum", "58",
      "RCAMINT", "Amt Rec from T1 to T2", "Ae", "Urine", "59",
      "RCPCINT", "Pct Rec from T1 to T2", "Fe", "Urine", "60",
      "VOLPK", "Sum of Urine Vol", "Urine volume", "Urine", "61",
      "RENALCL", "Renal CL", "CLR", "Plasma/Blood/Serum/Urine", "62",
      "ERTMAX", "Time of Max Excretion Rate", "Tmax Rate", "Urine", "63",
      "RMAX", "Time of Maximum Response", "Rmax", "Matrix of PD", "64",
      "RMIN", "Time of Minimum Response", "Rmin", "Matrix of PD", "65",
      "ERMAX", "Max Excretion Rate", "Max excretion rate", "Urine", "66",
      "MIDPTLST", "Midpoint of Collection Interval", "Midpoint last", "Urine", "67",
      "ERLST", "Last Meas Excretion Rate", "Rate last", "Urine", "68",
      "TON", "Time to Onset", "Tonset", "Matrix of PD", "69",
      "TOFF", "Time to Offset", "Toffset", "Matrix of PD", "70",
      "TBBLP", "Time Below Baseline %", "Time %Below Baseline", "Matrix of PD", "71",
      "TBTP", "Time Below Threshold %", "Time %Below Threshold", "Matrix of PD", "72",
      "TABL", "Time Above Baseline", "Time Above Baseline", "Matrix of PD", "73",
      "TAT", "Time Above Threshold", "Time Above Threshold", "Matrix of PD", "74",
      "TBT", "Time Below Threshold", "Time Below Threshold", "Matrix of PD", "75",
      "TBLT", "Time Between Baseline and Threshold", "Time Between Baseline Threshold", "Matrix of PD", "76",
      "BLRSP", "Baseline Response", "Baseline", "Matrix of PD", "77",
      "TSHDRSP", "Response Threshold", "Threshold", "Matrix of PD", "78",
      "AUCABL", "AUC Above Baseline", "AUC above baseline", "Matrix of PD", "79",
      "AUCAT", "AUC Above Threshold", "AUC above threshold", "Matrix of PD", "80",
      "AUCBBL", "AUC Below Baseline", "AUC below baseline", "Matrix of PD", "81",
      "AUCBT", "AUC Below Threshold", "AUC below threshold", "Matrix of PD", "82",
      "AUCBLDIF", "Diff AUC Above Base and AUC Below Base", "AUC diff baseline", "Matrix of PD", "83",
      "AUCTDIF", "Diff AUC Above Thr and AUC Below Thr", "AUCnet threshold", "Matrix of PD", "84",
      "TDIFF", "Diff Time to Offset and Time to Onset", "Diff toffset-tonset", "Matrix of PD", "85",
      "AUCPBEO", "AUC %Back Extrapolation Obs", "AUC%Back extrap obs", "Plasma/Blood/Serum", "86",
      "AUCPBEP", "AUC %Back Extrapolation Pred", "AUC%Back extrap pred", "Plasma/Blood/Serum", "87",
      "TSLP1L", "Lower Time Limit Slope 1st", "Slope1 lower", "Matrix of PD", "88",
      "TSLP1U", "Upper Time Limit Slope 1st Segment", "Slope1 upper", "Matrix of PD", "89",
      "TSLP2L", "Lower Time Limit Slope 2nd Segment", "Slope2 lower", "Matrix of PD", "90",
      "TSLP2U", "Upper Time Limit Slope 2nd Segment", "Slope2 upper", "Matrix of PD", "91",
      "SLP1", "Slope, 1st Segment", "Slope1", "Matrix of PD", "92",
      "SLP2", "Slope, 2nd Segment", "Slope2", "Matrix of PD", "93",
      "SLP1PT", "Number of Points for Slope 1st Segment", "No points slope1", "Matrix of PD", "94",
      "SLP2PT", "Number of Points for Slope 2nd Segment", "No points slope2", "Matrix of PD", "95",
      "R2ADJS1", "R-Squared Adjusted Slope, 1st Segment", "Rsq adjusted slope1", "Matrix of PD", "96",
      "R2ADJS2", "R-Squared Adjusted Slope, 2nd Segment", "Rsq adjusted slope2", "Matrix of PD", "97",
      "R2SLP1", "R Squared, Slope, 1st Segment", "Rsq slope1", "Matrix of PD", "98",
      "R2SLP2", "R Squared, Slope, 2nd Segment", "Rsq slope2", "Matrix of PD", "99",
      "CORRXYS1", "Corr Btw TimeX and Log ConcY, Slope 1st", "Corr xy slope1", "Plasma/Blood/Serum", "100",
      "CORRXYS2", "Corr Btw TimeX and Log ConcY, Slope 1st Slope 2nd", "Corr xy slope2", "Plasma/Blood/Serum", "101",
      "AILAMZ", "Accumulation Index using Lambda z", "AILAMZ", "Plasma/Blood/Serum", "102",
      "ARAUC", "Accumulation Ratio AUCTAU", "ARAUC", "Plasma/Blood/Serum", "103",
      "ARAUCD", "Accum Ratio AUCTAU norm by dose", "ARAUCD", "Plasma/Blood/Serum", "104",
      "ARAUCIFO", "Accum Ratio AUC Infinity Obs", "ARAUCIFO", "Plasma/Blood/Serum", "105",
      "ARAUCIFP", "Accum Ratio AUC Infinity Pred", "ARAUCIFP", "Plasma/Blood/Serum", "106",
      "ARAUCIND", "Accum Ratio AUC T1 to T2 norm by dose", "ARAUCIND_T1_T2_UNIT", "Plasma/Blood/Serum", "107",
      "ARAUCINT", "Accumulation Ratio AUC from T1 to T2", "ARAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "108",
      "ARAUCIOD", "Accum Ratio AUCIFO Norm by Dose", "ARAUCIOD", "Plasma/Blood/Serum", "109",
      "ARAUCIPD", "Accum Ratio AUCIFP Norm by Dose", "ARAUCIPD", "Plasma/Blood/Serum", "110",
      "ARAUCLST", "Accum Ratio AUC to Last Nonzero Conc", "ARAUCLST", "Plasma/Blood/Serum", "111",
      "ARCMAX", "Accumulation Ratio Cmax", "ARCMAX", "Plasma/Blood/Serum", "112",
      "ARCMAXD", "Accum Ratio Cmax norm by dose", "ARCMAXD", "Plasma/Blood/Serum", "113",
      "ARCMIN", "Accumulation Ratio Cmin", "ARCMIN", "Plasma/Blood/Serum", "114",
      "ARCMIND", "Accum Ratio Cmin norm by dose", "ARCMIND", "Plasma/Blood/Serum", "115",
      "ARCTROUD", "Accum Ratio Ctrough norm by dose", "ARCTROUD", "Plasma/Blood/Serum", "116",
      "ARCTROUG", "Accumulation Ratio Ctrough", "ARCTROUG", "Plasma/Blood/Serum", "117",
      "AUCALLB", "AUC All Norm by BMI", "AUCall_B", "Plasma/Blood/Serum", "118",
      "AUCALLD", "AUC All Norm by Dose", "AUCall_D", "Plasma/Blood/Serum", "119",
      "AUCALLS", "AUC All Norm by SA", "AUCall_S", "Plasma/Blood/Serum", "120",
      "AUCALLW", "AUC All Norm by WT", "AUCall_W", "Plasma/Blood/Serum", "121",
      "AUCIFOB", "AUC Infinity Obs Norm by BMI", "AUCINF_obs_B", "Plasma/Blood/Serum", "122",
      "AUCIFOLN", "AUC Infinity Obs LN Transformed", "AUCIFOLN", "Plasma/Blood/Serum", "123",
      "AUCIFOS", "AUC Infinity Obs Norm by SA", "AUCINF_obs_S", "Plasma/Blood/Serum", "124",
      "AUCIFOUB", "AUC Infinity Obs, Unbound Drug", "AUCIFOUB", "Plasma/Blood/Serum", "125",
      "AUCIFOW", "AUC Infinity Obs Norm by WT", "AUCINF_obs_W", "Plasma/Blood/Serum", "126",
      "AUCIFPB", "AUC Infinity Pred Norm by BMI", "AUCINF_pred_B", "Plasma/Blood/Serum", "127",
      "AUCIFPD", "AUC Infinity Pred Norm by Dose", "AUCINF_pred_D", "Plasma/Blood/Serum", "128",
      "AUCIFPS", "AUC Infinity Pred Norm by SA", "AUCINF_pred_S", "Plasma/Blood/Serum", "129",
      "AUCIFPUB", "AUC Infinity Pred, Unbound Drug", "AUCIFPUB", "Plasma/Blood/Serum", "130",
      "AUCIFPW", "AUC Infinity Pred Norm by WT", "AUCINF_pred_W", "Plasma/Blood/Serum", "131",
      "AUCINTB", "AUC from T1 to T2 Norm by BMI", "AUC_B_T1_T2_UNIT", "Plasma/Blood/Serum", "132",
      "AUCINTD", "AUC from T1 to T2 Norm by Dose", "AUC_D_T1_T2_UNIT", "Plasma/Blood/Serum", "133",
      "AUCINTS", "AUC from T1 to T2 Norm by SA", "AUC_S_T1_T2_UNIT", "Plasma/Blood/Serum", "134",
      "AUCINTW", "AUC from T1 to T2 Norm by WT", "AUC_W_T1_T2_UNIT", "Plasma/Blood/Serum", "135",
      "AUCLSTB", "AUC to Last Nonzero Conc Norm by BMI", "AUClast_B", "Plasma/Blood/Serum", "136",
      "AUCLSTD", "AUC to Last Nonzero Conc Norm by Dose", "AUClast_D", "Plasma/Blood/Serum", "137",
      "AUCLSTLN", "AUC to Last Nonzero Conc LN Transformed", "AUCLSTLN", "Plasma/Blood/Serum", "138",
      "AUCLSTS", "AUC to Last Nonzero Conc Norm by SA", "AUClast_S", "Plasma/Blood/Serum", "139",
      "AUCLSTUB", "AUC to Last Nonzero Conc, Unbound Drug", "AUCLSTUB", "Plasma/Blood/Serum", "140",
      "AUCLSTW", "AUC to Last Nonzero Conc Norm by WT", "AUClast_W", "Plasma/Blood/Serum", "141",
      "AUCTAUB", "AUC Over Dosing Interval Norm by BMI", "AUC_TAU_B", "Plasma/Blood/Serum", "142",
      "AUCTAUD", "AUC Over Dosing Interval Norm by Dose", "AUC_TAU_D", "Plasma/Blood/Serum", "143",
      "AUCTAUS", "AUC Over Dosing Interval Norm by SA", "AUC_TAU_S", "Plasma/Blood/Serum", "144",
      "AUCTAUW", "AUC Over Dosing Interval Norm by WT", "AUC_TAU_W", "Plasma/Blood/Serum", "145",
      "AUMCIFOB", "AUMC Infinity Obs Norm by BMI", "AUMCINF_obs_B", "Plasma/Blood/Serum", "146",
      "AUMCIFOD", "AUMC Infinity Obs Norm by Dose", "AUMCINF_obs_D", "Plasma/Blood/Serum", "147",
      "AUMCIFOS", "AUMC Infinity Obs Norm by SA", "AUMCINF_obs_S", "Plasma/Blood/Serum", "148",
      "AUMCIFOW", "AUMC Infinity Obs Norm by WT", "AUMCINF_obs_W", "Plasma/Blood/Serum", "149",
      "AUMCIFPB", "AUMC Infinity Pred Norm by BMI", "AUMCINF_pred_B", "Plasma/Blood/Serum", "150",
      "AUMCIFPD", "AUMC Infinity Pred Norm by Dose", "AUMCINF_pred_D", "Plasma/Blood/Serum", "151",
      "AUMCIFPS", "AUMC Infinity Pred Norm by SA", "AUMCINF_pred_S", "Plasma/Blood/Serum", "152",
      "AUMCIFPW", "AUMC Infinity Pred Norm by WT", "AUMCINF_pred_W", "Plasma/Blood/Serum", "153",
      "AUMCLSTB", "AUMC to Last Nonzero Conc Norm by BMI", "AUMClast_B", "Plasma/Blood/Serum", "154",
      "AUMCLSTD", "AUMC to Last Nonzero Conc Norm by Dose", "AUMClast_D", "Plasma/Blood/Serum", "155",
      "AUMCLSTS", "AUMC to Last Nonzero Conc Norm by SA", "AUMClast_S", "Plasma/Blood/Serum", "156",
      "AUMCLSTW", "AUMC to Last Nonzero Conc Norm by WT", "AUMClast_W", "Plasma/Blood/Serum", "157",
      "AUMCTAUB", "AUMC Over Dosing Interval Norm by BMI", "AUMCTAUB", "Plasma/Blood/Serum", "158",
      "AUMCTAUD", "AUMC Over Dosing Interval Norm by Dose", "AUMCTAUD", "Plasma/Blood/Serum", "159",
      "AUMCTAUS", "AUMC Over Dosing Interval Norm by SA", "AUMCTAUS", "Plasma/Blood/Serum", "160",
      "AUMCTAUW", "AUMC Over Dosing Interval Norm by WT", "AUMCTAUW", "Plasma/Blood/Serum", "161",
      "AURCALLB", "AURC All Norm by BMI", "AURCALLB", "Plasma/Blood/Serum", "162",
      "AURCALLD", "AURC All Norm by Dose", "AURCALLD", "Plasma/Blood/Serum", "163",
      "AURCALLS", "AURC All Norm by SA", "AURCALLS", "Plasma/Blood/Serum", "164",
      "AURCALLW", "AURC All Norm by WT", "AURCALLW", "Plasma/Blood/Serum", "165",
      "AURCIFOB", "AURC Infinity Obs Norm by BMI", "AURCIFOB", "Plasma/Blood/Serum", "166",
      "AURCIFOD", "AURC Infinity Obs Norm by Dose", "AURCIFOD", "Plasma/Blood/Serum", "167",
      "AURCIFOS", "AURC Infinity Obs Norm by SA", "AURCIFOS", "Plasma/Blood/Serum", "168",
      "AURCIFOW", "AURC Infinity Obs Norm by WT", "AURCIFOW", "Plasma/Blood/Serum", "169",
      "AURCIFPB", "AURC Infinity Pred Norm by BMI", "AURCIFPB", "Plasma/Blood/Serum", "170",
      "AURCIFPD", "AURC Infinity Pred Norm by Dose", "AURCIFPD", "Plasma/Blood/Serum", "171",
      "AURCIFPS", "AURC Infinity Pred Norm by SA", "AURCIFPS", "Plasma/Blood/Serum", "172",
      "AURCIFPW", "AURC Infinity Pred Norm by WT", "AURCIFPW", "Plasma/Blood/Serum", "173",
      "AURCINT", "AURC from T1 to T2", "AURCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "174",
      "AURCINTB", "AURC from T1 to T2 Norm by BMI", "AURCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "175",
      "AURCINTD", "AURC from T1 to T2 Norm by Dose", "AURCINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "176",
      "AURCINTS", "AURC from T1 to T2 Norm by SA", "AURCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "177",
      "AURCINTW", "AURC from T1 to T2 Norm by WT", "AURCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "178",
      "AURCLSTB", "AURC to Last Nonzero Rate Norm by BMI", "AURCLSTB", "Plasma/Blood/Serum", "179",
      "AURCLSTD", "AURC to Last Nonzero Rate Norm by Dose", "AURCLSTD", "Plasma/Blood/Serum", "180",
      "AURCLSTS", "AURC to Last Nonzero Rate Norm by SA", "AURCLSTS", "Plasma/Blood/Serum", "181",
      "AURCLSTW", "AURC to Last Nonzero Rate Norm by WT", "AURCLSTW", "Plasma/Blood/Serum", "182",
      "C0B", "Initial Conc Norm by BMI", "C0B", "Plasma/Blood/Serum", "183",
      "C0D", "Initial Conc Norm by Dose", "C0D", "Plasma/Blood/Serum", "184",
      "C0S", "Initial Conc Norm by SA", "C0S", "Plasma/Blood/Serum", "185",
      "C0W", "Initial Conc Norm by WT", "C0W", "Plasma/Blood/Serum", "186",
      "CAVGB", "Average Conc Norm by BMI", "CAVGB", "Plasma/Blood/Serum", "187",
      "CAVGD", "Average Conc Norm by Dose", "CAVGD", "Plasma/Blood/Serum", "188",
      "CAVGINT", "Average Conc from T1 to T2", "CAVGINT_T1_T2_UNIT", "Plasma/Blood/Serum", "189",
      "CAVGINTB", "Average Conc from T1 to T2 Norm by BMI", "CAVGINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "190",
      "CAVGINTD", "Average Conc from T1 to T2 Norm by Dose", "CAVGINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "191",
      "CAVGINTS", "Average Conc from T1 to T2 Norm by SA", "CAVGINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "192",
      "CAVGINTW", "Average Conc from T1 to T2 Norm by WT", "CAVGINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "193",
      "CAVGS", "Average Conc Norm by SA", "CAVGS", "Plasma/Blood/Serum", "194",
      "CAVGW", "Average Conc Norm by WT", "CAVGW", "Plasma/Blood/Serum", "195",
      "CHTMAX", "Concentration at Half Tmax", "CHTMAX", "Plasma/Blood/Serum", "196",
      "CLFOB", "Total CL Obs by F Norm by BMI", "CLFOB", "Plasma/Blood/Serum", "197",
      "CLFOD", "Total CL Obs by F Norm by Dose", "CLFOD", "Plasma/Blood/Serum", "198",
      "CLFOS", "Total CL Obs by F Norm by SA", "CLFOS", "Plasma/Blood/Serum", "199",
      "CLFOW", "Total CL Obs by F Norm by WT", "CLFOW", "Plasma/Blood/Serum", "200",
      "CLFPB", "Total CL Pred by F Norm by BMI", "CLFPB", "Plasma/Blood/Serum", "201",
      "CLFPD", "Total CL Pred by F Norm by Dose", "CLFPD", "Plasma/Blood/Serum", "202",
      "CLFPS", "Total CL Pred by F Norm by SA", "CLFPS", "Plasma/Blood/Serum", "203",
      "CLFPW", "Total CL Pred by F Norm by WT", "CLFPW", "Plasma/Blood/Serum", "204",
      "CLFTAU", "Total CL by F for Dose Int", "CLFTAU", "Plasma/Blood/Serum", "205",
      "CLFTAUB", "Total CL by F for Dose Int Norm by BMI", "CLFTAUB", "Plasma/Blood/Serum", "206",
      "CLFTAUD", "Total CL by F for Dose Int Norm by Dose", "CLFTAUD", "Plasma/Blood/Serum", "207",
      "CLFTAUS", "Total CL by F for Dose Int Norm by SA", "CLFTAUS", "Plasma/Blood/Serum", "208",
      "CLFTAUW", "Total CL by F for Dose Int Norm by WT", "CLFTAUW", "Plasma/Blood/Serum", "209",
      "CLFUB", "Apparent CL for Unbound Drug", "CLFUB", "Plasma/Blood/Serum", "210",
      "CLOB", "Total CL Obs Norm by BMI", "CLOB", "Plasma/Blood/Serum", "211",
      "CLOD", "Total CL Obs Norm by Dose", "CLOD", "Plasma/Blood/Serum", "212",
      "CLOS", "Total CL Obs Norm by SA", "CLOS", "Plasma/Blood/Serum", "213",
      "CLOUB", "Total CL Obs for Unbound Drug", "CLOUB", "Plasma/Blood/Serum", "214",
      "CLOW", "Total CL Obs Norm by WT", "CLOW", "Plasma/Blood/Serum", "215",
      "CLPB", "Total CL Pred Norm by BMI", "CLPB", "Plasma/Blood/Serum", "216",
      "CLPD", "Total CL Pred Norm by Dose", "CLPD", "Plasma/Blood/Serum", "217",
      "CLPS", "Total CL Pred Norm by SA", "CLPS", "Plasma/Blood/Serum", "218",
      "CLPUB", "Total CL Pred for Unbound Drug", "CLPUB", "Plasma/Blood/Serum", "219",
      "CLPW", "Total CL Pred Norm by WT", "CLPW", "Plasma/Blood/Serum", "220",
      "CLRPCLEV", "Renal CL as Pct CL EV", "CLRPCLEV", "Urine", "221",
      "CLRPCLIV", "Renal CL as Pct CL IV", "CLRPCLIV", "Urine", "222",
      "CLSTB", "Last Nonzero Conc Norm by BMI", "CLSTB", "Plasma/Blood/Serum", "223",
      "CLSTD", "Last Nonzero Conc Norm by Dose", "CLSTD", "Plasma/Blood/Serum", "224",
      "CLSTS", "Last Nonzero Conc Norm by SA", "CLSTS", "Plasma/Blood/Serum", "225",
      "CLSTW", "Last Nonzero Conc Norm by WT", "CLSTW", "Plasma/Blood/Serum", "226",
      "CLTAU", "Total CL for Dose Int", "CLTAU", "Plasma/Blood/Serum", "227",
      "CLTAUB", "Total CL for Dose Int Norm by BMI", "CLTAUB", "Plasma/Blood/Serum", "228",
      "CLTAUD", "Total CL for Dose Int Norm by Dose", "CLTAUD", "Plasma/Blood/Serum", "229",
      "CLTAUS", "Total CL for Dose Int Norm by SA", "CLTAUS", "Plasma/Blood/Serum", "230",
      "CLTAUW", "Total CL for Dose Int Norm by WT", "CLTAUW", "Plasma/Blood/Serum", "231",
      "CMAXB", "Max Conc Norm by BMI", "CMAX_B", "Plasma/Blood/Serum", "232",
      "CMAXLN", "Max Conc LN Transformed", "CMAXLN", "Plasma/Blood/Serum", "233",
      "CMAXS", "Max Conc Norm by SA", "CMAXS", "Plasma/Blood/Serum", "234",
      "CMAXUB", "Max Conc, Unbound Drug", "CMAXUB", "Plasma/Blood/Serum", "235",
      "CMAXW", "Max Conc Norm by WT", "CMAXW", "Plasma/Blood/Serum", "236",
      "CMINB", "Min Conc Norm by BMI", "CMINB", "Plasma/Blood/Serum", "237",
      "CMIND", "Min Conc Norm by Dose", "CMIND", "Plasma/Blood/Serum", "238",
      "CMINS", "Min Conc Norm by SA", "CMINS", "Plasma/Blood/Serum", "239",
      "CMINW", "Min Conc Norm by WT", "CMINW", "Plasma/Blood/Serum", "240",
      "CONC", "Concentration", "CONC", "Plasma/Blood/Serum", "241",
      "CONCB", "Conc by BMI", "CONCB", "Plasma/Blood/Serum", "242",
      "CONCD", "Conc by Dose", "CONCD", "Plasma/Blood/Serum", "243",
      "CONCS", "Conc by SA", "CONCS", "Plasma/Blood/Serum", "244",
      "CONCW", "Conc by WT", "CONCW", "Plasma/Blood/Serum", "245",
      "CTROUGH", "Conc Trough", "CTROUGH", "Plasma/Blood/Serum", "246",
      "CTROUGHB", "Conc Trough by BMI", "CTROUGHB", "Plasma/Blood/Serum", "247",
      "CTROUGHD", "Conc Trough by Dose", "CTROUGHD", "Plasma/Blood/Serum", "248",
      "CTROUGHS", "Conc Trough by SA", "CTROUGHS", "Plasma/Blood/Serum", "249",
      "CTROUGHW", "Conc Trough by WT", "CTROUGHW", "Plasma/Blood/Serum", "250",
      "EFFHL", "Effective Half-Life", "EFFHL", "Plasma/Blood/Serum", "251",
      "ERINT", "Excret Rate from T1 to T2", "ERINT_T1_T2_UNIT", "Plasma/Blood/Serum", "252",
      "ERINTB", "Excret Rate from T1 to T2 Norm by BMI", "ERINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "253",
      "ERINTD", "Excret Rate from T1 to T2 Norm by Dose", "ERINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "254",
      "ERINTS", "Excret Rate from T1 to T2 Norm by SA", "ERINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "255",
      "ERINTW", "Excret Rate from T1 to T2 Norm by WT", "ERINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "256",
      "ERLSTB", "Last Meas Excretion Rate Norm by BMI", "ERLSTB", "Plasma/Blood/Serum", "257",
      "ERLSTD", "Last Meas Excretion Rate Norm by Dose", "ERLSTD", "Plasma/Blood/Serum", "258",
      "ERLSTS", "Last Meas Excretion Rate Norm by SA", "ERLSTS", "Plasma/Blood/Serum", "259",
      "ERLSTW", "Last Meas Excretion Rate Norm by WT", "ERLSTW", "Plasma/Blood/Serum", "260",
      "ERMAXB", "Max Excretion Rate Norm by BMI", "ERMAXB", "Plasma/Blood/Serum", "261",
      "ERMAXD", "Max Excretion Rate Norm by Dose", "ERMAXD", "Plasma/Blood/Serum", "262",
      "ERMAXS", "Max Excretion Rate Norm by SA", "ERMAXS", "Plasma/Blood/Serum", "263",
      "ERMAXW", "Max Excretion Rate Norm by WT", "ERMAXW", "Plasma/Blood/Serum", "264",
      "ERTLST", "Midpoint of Interval of Last Nonzero ER", "ERTLST", "Plasma/Blood/Serum", "265",
      "FABS", "Absolute Bioavailability", "FABS", "Plasma/Blood/Serum", "266",
      "FB", "Fraction Bound", "FB", "Plasma/Blood/Serum", "267",
      "FREL", "Relative Bioavailability", "FREL", "Plasma/Blood/Serum", "268",
      "FREXINT", "Fract Excr from T1 to T2", "FREXINT_T1_T2_UNIT", "Plasma/Blood/Serum", "269",
      "FU", "Fraction Unbound", "FU", "Plasma/Blood/Serum", "270",
      "HDCL", "Hemodialysis Clearance", "HDCL", "Plasma/Blood/Serum", "271",
      "HDER", "Hemodialysis Extraction Ratio", "HDER", "Plasma/Blood/Serum", "272",
      "HTMAX", "Half Tmax", "HTMAX", "Plasma/Blood/Serum", "273",
      "LAMZLTAU", "Lambda z Lower Limit TAU", "LAMZLTAU", "Plasma/Blood/Serum", "274",
      "LAMZNTAU", "Number of Points for Lambda z TAU", "LAMZNTAU", "Plasma/Blood/Serum", "275",
      "LAMZSPN", "Lambda z Span", "LAMZSPN", "Plasma/Blood/Serum", "276",
      "LAMZTAU", "Lambda z TAU", "LAMZTAU", "Plasma/Blood/Serum", "277",
      "LAMZUTAU", "Lambda z Upper Limit TAU", "LAMZUTAU", "Plasma/Blood/Serum", "278",
      "MAT", "Mean Absorption Time", "MAT", "Plasma/Blood/Serum", "279",
      "MRAUCIFO", "Metabolite Ratio for AUC Infinity Obs", "MRAUCIFO", "Plasma/Blood/Serum", "280",
      "MRAUCIFP", "Metabolite Ratio for AUC Infinity Pred", "MRAUCIFP", "Plasma/Blood/Serum", "281",
      "MRAUCINT", "Metabolite Ratio AUC from T1 to T2", "MRAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "282",
      "MRAUCLST", "Metabolite Ratio AUC Last Nonzero Conc", "MRAUCLST", "Plasma/Blood/Serum", "283",
      "MRAUCTAU", "Metabolite Ratio for AUC Dosing Interval", "MRAUCTAU", "Plasma/Blood/Serum", "284",
      "MRCMAX", "Metabolite Ratio for Max Conc", "MRCMAX", "Plasma/Blood/Serum", "285",
      "MRTEVIFO", "MRT Extravasc Infinity Obs", "MRTEVIFO", "Plasma/Blood/Serum", "286",
      "MRTEVIFP", "MRT Extravasc Infinity Pred", "MRTEVIFP", "Plasma/Blood/Serum", "287",
      "MRTEVLST", "MRT Extravasc to Last Nonzero Conc", "MRTEVLST", "Plasma/Blood/Serum", "288",
      "MRTIVIFO", "MRT Intravasc Infinity Obs", "MRTIVIFO", "Plasma/Blood/Serum", "289",
      "MRTIVIFP", "MRT Intravasc Infinity Pred", "MRTIVIFP", "Plasma/Blood/Serum", "290",
      "MRTIVLST", "MRT Intravasc to Last Nonzero Conc", "MRTIVLST", "Plasma/Blood/Serum", "291",
      "NRENALCL", "Nonrenal CL", "NRENALCL", "Urine", "292",
      "NRENLCLB", "Nonrenal CL Norm by BMI", "NRENLCLB", "Urine", "293",
      "NRENLCLD", "Nonrenal CL Norm by Dose", "NRENLCLD", "Urine", "294",
      "NRENLCLS", "Nonrenal CL Norm by SA", "NRENLCLS", "Urine", "295",
      "NRENLCLW", "Nonrenal CL Norm by WT", "NRENLCLW", "Urine", "296",
      "PTROUGHR", "Peak Trough Ratio", "PTROUGHR", "Plasma/Blood/Serum", "297",
      "RAAUC", "Ratio AUC", "RAAUC", "Plasma/Blood/Serum", "298",
      "RAAUCIFO", "Ratio AUC Infinity Obs", "RAAUCIFO", "Plasma/Blood/Serum", "299",
      "RAAUCIFP", "Ratio AUC Infinity Pred", "RAAUCIFP", "Plasma/Blood/Serum", "300",
      "RACMAX", "Ratio CMAX", "RACMAX", "Plasma/Blood/Serum", "301",
      "RAMAXMIN", "Ratio of CMAX to CMIN", "RAMAXMIN", "Plasma/Blood/Serum", "302",
      "RCAMIFO", "Amt Rec Infinity Obs", "RCAMIFO", "Plasma/Blood/Serum", "303",
      "RCAMIFOB", "Amt Rec Infinity Obs Norm by BMI", "RCAMIFOB", "Plasma/Blood/Serum", "304",
      "RCAMIFOS", "Amt Rec Infinity Obs Norm by SA", "RCAMIFOS", "Plasma/Blood/Serum", "305",
      "RCAMIFOW", "Amt Rec Infinity Obs Norm by WT", "RCAMIFOW", "Plasma/Blood/Serum", "306",
      "RCAMIFP", "Amt Rec Infinity Pred", "RCAMIFP", "Plasma/Blood/Serum", "307",
      "RCAMIFPB", "Amt Rec Infinity Pred Norm by BMI", "RCAMIFPB", "Plasma/Blood/Serum", "308",
      "RCAMIFPS", "Amt Rec Infinity Pred Norm by SA", "RCAMIFPS", "Plasma/Blood/Serum", "309",
      "RCAMIFPW", "Amt Rec Infinity Pred Norm by WT", "RCAMIFPW", "Plasma/Blood/Serum", "310",
      "RCAMINTB", "Amt Rec from T1 to T2 Norm by BMI", "RCAMINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "311",
      "RCAMINTS", "Amt Rec from T1 to T2 Norm by SA", "RCAMINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "312",
      "RCAMINTW", "Amt Rec from T1 to T2 Norm by WT", "RCAMINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "313",
      "RCAMTAU", "Amt Rec Over Dosing Interval", "RCAMTAU", "Plasma/Blood/Serum", "314",
      "RCAMTAUB", "Amt Rec Over Dosing Interval Norm by BMI", "RCAMTAUB", "Plasma/Blood/Serum", "315",
      "RCAMTAUS", "Amt Rec Over Dosing Interval Norm by SA", "RCAMTAUS", "Plasma/Blood/Serum", "316",
      "RCAMTAUW", "Amt Rec Over Dosing Interval Norm by WT", "RCAMTAUW", "Plasma/Blood/Serum", "317",
      "RCPCIFO", "Pct Rec Infinity Obs", "RCPCIFO", "Plasma/Blood/Serum", "318",
      "RCPCIFOB", "Pct Rec Infinity Obs Norm by BMI", "RCPCIFOB", "Plasma/Blood/Serum", "319",
      "RCPCIFOS", "Pct Rec Infinity Obs Norm by SA", "RCPCIFOS", "Plasma/Blood/Serum", "320",
      "RCPCIFOW", "Pct Rec Infinity Obs Norm by WT", "RCPCIFOW", "Plasma/Blood/Serum", "321",
      "RCPCIFP", "Pct Rec Infinity Pred", "RCPCIFP", "Plasma/Blood/Serum", "322",
      "RCPCIFPB", "Pct Rec Infinity Pred Norm by BMI", "RCPCIFPB", "Plasma/Blood/Serum", "323",
      "RCPCIFPS", "Pct Rec Infinity Pred Norm by SA", "RCPCIFPS", "Plasma/Blood/Serum", "324",
      "RCPCIFPW", "Pct Rec Infinity Pred Norm by WT", "RCPCIFPW", "Plasma/Blood/Serum", "325",
      "RCPCINTB", "Pct Rec from T1 to T2 Norm by BMI", "RCPCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "326",
      "RCPCINTS", "Pct Rec from T1 to T2 Norm by SA", "RCPCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "327",
      "RCPCINTW", "Pct Rec from T1 to T2 Norm by WT", "RCPCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "328",
      "RCPCLST", "Pct Rec to Last Nonzero Conc", "RCPCLST", "Plasma/Blood/Serum", "329",
      "RCPCTAU", "Pct Rec Over Dosing Interval", "RCPCTAU", "Plasma/Blood/Serum", "330",
      "RCPCTAUB", "Pct Rec Over Dosing Interval Norm by BMI", "RCPCTAUB", "Plasma/Blood/Serum", "331",
      "RCPCTAUS", "Pct Rec Over Dosing Interval Norm by SA", "RCPCTAUS", "Plasma/Blood/Serum", "332",
      "RCPCTAUW", "Pct Rec Over Dosing Interval Norm by WT", "RCPCTAUW", "Plasma/Blood/Serum", "333",
      "RENALCLB", "Renal CL Norm by BMI", "RENALCLB", "Urine", "334",
      "RENALCLD", "Renal CL Norm by Dose", "RENALCLD", "Urine", "335",
      "RENALCLS", "Renal CL Norm by SA", "RENALCLS", "Urine", "336",
      "RENALCLW", "Renal CL Norm by WT", "RENALCLW", "Urine", "337",
      "RENCLTAU", "Renal CL for Dose Int", "RENCLTAU", "Urine", "338",
      "RNCLINT", "Renal CL from T1 to T2", "RNCLINT_T1_T2_UNIT", "Urine", "339",
      "RNCLINTB", "Renal CL from T1 to T2 Norm by BMI", "RNCLINTB_T1_T2_UNIT", "Urine", "340",
      "RNCLINTD", "Renal CL from T1 to T2 Norm by Dose", "RNCLINTD_T1_T2_UNIT", "Urine", "341",
      "RNCLINTS", "Renal CL from T1 to T2 Norm by SA", "RNCLINTS_T1_T2_UNIT", "Urine", "342",
      "RNCLINTW", "Renal CL from T1 to T2 Norm by WT", "RNCLINTW_T1_T2_UNIT", "Urine", "343",
      "RNCLTAUB", "Renal CL for Dose Int Norm by BMI", "RNCLTAUB", "Urine", "344",
      "RNCLTAUD", "Renal CL for Dose Int Norm by Dose", "RNCLTAUD", "Urine", "345",
      "RNCLTAUS", "Renal CL for Dose Int Norm by SA", "RNCLTAUS", "Urine", "346",
      "RNCLTAUW", "Renal CL for Dose Int Norm by WT", "RNCLTAUW", "Urine", "347",
      "RNCLUB", "Renal CL for Unbound Drug", "RNCLUB", "Urine", "348",
      "SRAUC", "Stationarity Ratio AUC", "SRAUC", "Plasma/Blood/Serum", "349",
      "SWING", "Swing", "SWING", "Plasma/Blood/Serum", "350",
      "TAUHL", "Half-Life TAU", "TAUHL", "Plasma/Blood/Serum", "351",
      "TBBL", "Time Below Baseline", "Time_Below_B", "Plasma/Blood/Serum", "352",
      "TROUGHPR", "Trough Peak Ratio", "TROUGHPR", "Plasma/Blood/Serum", "353",
      "V0", "Vol Dist Initial", "V0", "Plasma/Blood/Serum", "354",
      "V0B", "Vol Dist Initial Norm by BMI", "V0B", "Plasma/Blood/Serum", "355",
      "V0D", "Vol Dist Initial Norm by Dose", "V0D", "Plasma/Blood/Serum", "356",
      "V0S", "Vol Dist Initial Norm by SA", "V0S", "Plasma/Blood/Serum", "357",
      "V0W", "Vol Dist Initial Norm by WT", "V0W", "Plasma/Blood/Serum", "358",
      "VSSOB", "Vol Dist Steady State Obs Norm by BMI", "VSSOB", "Plasma/Blood/Serum", "359",
      "VSSOBD", "Vol Dist Steady State Obs by B", "VSSOBD", "Plasma/Blood/Serum", "360",
      "VSSOD", "Vol Dist Steady State Obs Norm by Dose", "VSSOD", "Plasma/Blood/Serum", "361",
      "VSSOF", "Vol Dist Steady State Obs by F", "VSSOF", "Plasma/Blood/Serum", "362",
      "VSSOS", "Vol Dist Steady State Obs Norm by SA", "VSSOS", "Plasma/Blood/Serum", "363",
      "VSSOUB", "Vol Dist Steady State Obs by UB", "VSSOUB", "Plasma/Blood/Serum", "364",
      "VSSOW", "Vol Dist Steady State Obs Norm by WT", "VSSOW", "Plasma/Blood/Serum", "365",
      "VSSPB", "Vol Dist Steady State Pred Norm by BMI", "VSSPB", "Plasma/Blood/Serum", "366",
      "VSSPBD", "Vol Dist Steady State Pred by B", "VSSPBD", "Plasma/Blood/Serum", "367",
      "VSSPD", "Vol Dist Steady State Pred Norm by Dose", "VSSPD", "Plasma/Blood/Serum", "368",
      "VSSPF", "Vol Dist Steady State Pred by F", "VSSPF", "Plasma/Blood/Serum", "369",
      "VSSPS", "Vol Dist Steady State Pred Norm by SA", "VSSPS", "Plasma/Blood/Serum", "370",
      "VSSPUB", "Vol Dist Steady State Pred by UB", "VSSPUB", "Plasma/Blood/Serum", "371",
      "VSSPW", "Vol Dist Steady State Pred Norm by WT", "VSSPW", "Plasma/Blood/Serum", "372",
      "VZ", "Vol Z", "Vz", "Plasma/Blood/Serum", "373",
      "VZF", "Vol Z by F", "Vz_F", "Plasma/Blood/Serum", "374",
      "VZFOB", "Vz Obs by F Norm by BMI", "VZFOB", "Plasma/Blood/Serum", "375",
      "VZFOD", "Vz Obs by F Norm by Dose", "VZFOD", "Plasma/Blood/Serum", "376",
      "VZFOS", "Vz Obs by F Norm by SA", "VZFOS", "Plasma/Blood/Serum", "377",
      "VZFOUB", "Vz Obs by F for UB", "VZFOUB", "Plasma/Blood/Serum", "378",
      "VZFOW", "Vz Obs by F Norm by WT", "VZFOW", "Plasma/Blood/Serum", "379",
      "VZFPB", "Vz Pred by F Norm by BMI", "VZFPB", "Plasma/Blood/Serum", "380",
      "VZFPD", "Vz Pred by F Norm by Dose", "VZFPD", "Plasma/Blood/Serum", "381",
      "VZFPS", "Vz Pred by F Norm by SA", "VZFPS", "Plasma/Blood/Serum", "382",
      "VZFPUB", "Vz Pred by F for UB", "VZFPUB", "Plasma/Blood/Serum", "383",
      "VZFPW", "Vz Pred by F Norm by WT", "VZFPW", "Plasma/Blood/Serum", "384",
      "VZFTAU", "Vz for Dose Int by F", "VZFTAU", "Plasma/Blood/Serum", "385",
      "VZFTAUB", "Vz for Dose Int by F Norm by BMI", "VZFTAUB", "Plasma/Blood/Serum", "386",
      "VZFTAUD", "Vz for Dose Int by F Norm by Dose", "VZFTAUD", "Plasma/Blood/Serum", "387",
      "VZFTAUS", "Vz for Dose Int by F Norm by SA", "VZFTAUS", "Plasma/Blood/Serum", "388",
      "VZFTAUW", "Vz for Dose Int by F Norm by WT", "VZFTAUW", "Plasma/Blood/Serum", "389",
      "VZOB", "Vz Obs Norm by BMI", "VZOB", "Plasma/Blood/Serum", "390",
      "VZOD", "Vz Obs Norm by Dose", "VZOD", "Plasma/Blood/Serum", "391",
      "VZOS", "Vz Obs Norm by SA", "VZOS", "Plasma/Blood/Serum", "392",
      "VZOUB", "Vz Obs for UB", "VZOUB", "Plasma/Blood/Serum", "393",
      "VZOW", "Vz Obs Norm by WT", "VZOW", "Plasma/Blood/Serum", "394",
      "VZPB", "Vz Pred Norm by BMI", "VZPB", "Plasma/Blood/Serum", "395",
      "VZPD", "Vz Pred Norm by Dose", "VZPD", "Plasma/Blood/Serum", "396",
      "VZPS", "Vz Pred Norm by SA", "VZPS", "Plasma/Blood/Serum", "397",
      "VZPUB", "Vz Pred for UB", "VZPUB", "Plasma/Blood/Serum", "398"
    ),
    ncol = 5,
    byrow = TRUE
  ))
  colnames(pk_dataset) <- c("PARAMCD", "PARAM", "TLG_DISPLAY", "MATRIX", "TLG_ORDER")
  pk_dataset
}

#' Control function for logistic regression model fitting
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for logistic regression models.
#' `conf_level` refers to the confidence level used for the Odds Ratio CIs.
#'
#' @inheritParams argument_convention
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
#'   This will be used when fitting the logistic regression model on the left hand side of the formula.
#'   Note that the evaluated expression should result in either a logical vector or a factor with 2
#'   levels. By default this is just `"response"` such that the original response variable is used
#'   and not modified further.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @examples
#' # Standard options.
#' control_logistic()
#'
#' # Modify confidence level.
#' control_logistic(conf_level = 0.9)
#'
#' # Use a different response definition.
#' control_logistic(response_definition = "I(response %in% c('CR', 'PR'))")
#'
#' @export
control_logistic <- function(response_definition = "response",
                             conf_level = 0.95) {
  checkmate::assert_true(grepl("response", response_definition))
  checkmate::assert_string(response_definition)
  assert_proportion_value(conf_level)
  list(
    response_definition = response_definition,
    conf_level = conf_level
  )
}

#' Compare variables between groups
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [compare_vars()] creates a layout element to summarize and compare one or more variables, using
#' the S3 generic function [s_summary()] to calculate a list of summary statistics. A list of all available statistics
#' for numeric variables can be viewed by running `get_stats("analyze_vars_numeric", add_pval = TRUE)` and for
#' non-numeric variables by running `get_stats("analyze_vars_counts", add_pval = TRUE)`. Use the `.stats` parameter to
#' specify the statistics to include in your output summary table.
#'
#' Prior to using this function in your table layout you must use [rtables::split_cols_by()] to create a column
#' split on the variable to be used in comparisons, and specify a reference group via the `ref_group` parameter.
#' Comparisons can be performed for each group (column) against the specified reference group by including the p-value
#' statistic.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table.
#'
#'  Options for numeric variables are: ``r shQuote(get_stats("analyze_vars_numeric", add_pval = TRUE), type = "sh")``
#'
#'  Options for non-numeric variables are: ``r shQuote(get_stats("analyze_vars_counts", add_pval = TRUE), type = "sh")``
#'
#' @note
#' * For factor variables, `denom` for factor proportions can only be `n` since the purpose is to compare proportions
#'   between columns, therefore a row-based proportion would not make sense. Proportion based on `N_col` would
#'   be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted
#'   for as explicit factor levels.
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
#'   set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
#'   factor level can be defined for `NA` values during pre-processing via [df_explicit_na()].
#' * For character variables, automatic conversion to factor does not guarantee that the table
#'   will be generated correctly. In particular for sparse tables this very likely can fail.
#'   Therefore it is always better to manually convert character variables to factors during pre-processing.
#' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison
#'   is well defined.
#'
#' @seealso [s_summary()] which is used internally to compute a summary within `s_compare()`, and [a_summary()]
#'   which is used (with `compare = TRUE`) as the analysis function for `compare_vars()`.
#'
#' @name compare_variables
#' @include analyze_variables.R
#' @order 1
NULL

#' @describeIn compare_variables S3 generic function to produce a comparison summary.
#'
#' @return
#' * `s_compare()` returns output of [s_summary()] and comparisons versus the reference group in the form of p-values.
#'
#' @export
s_compare <- function(x,
                      ...) {
  UseMethod("s_compare", x)
}

#' @describeIn compare_variables Method for `numeric` class. This uses the standard t-test
#'   to calculate the p-value.
#'
#' @method s_compare numeric
#'
#' @examples
#' # `s_compare.numeric`
#'
#' ## Usual case where both this and the reference group vector have more than 1 value.
#' s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE)
#'
#' ## If one group has not more than 1 value, then p-value is not calculated.
#' s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE)
#'
#' ## Empty numeric does not fail, it returns NA-filled items and no p-value.
#' s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE)
#'
#' @export
s_compare.numeric <- function(x, ...) {
  s_summary.numeric(x = x, compare_with_ref_group = TRUE, ...)
}

#' @describeIn compare_variables Method for `factor` class. This uses the chi-squared test
#'   to calculate the p-value.
#'
#' @method s_compare factor
#'
#' @examples
#' # `s_compare.factor`
#'
#' ## Basic usage:
#' x <- factor(c("a", "a", "b", "c", "a"))
#' y <- factor(c("a", "b", "c"))
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE)
#'
#' ## Management of NA values.
#' x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA)))
#' y <- explicit_na(factor(c("a", "b", "c", NA)))
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na_rm = TRUE)
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na_rm = FALSE)
#'
#' @export
s_compare.factor <- function(x, ...) {
  s_summary.factor(
    x = x,
    compare_with_ref_group = TRUE,
    ...
  )
}

#' @describeIn compare_variables Method for `character` class. This makes an automatic
#'   conversion to `factor` (with a warning) and then forwards to the method for factors.
#'
#' @method s_compare character
#'
#' @examples
#' # `s_compare.character`
#'
#' ## Basic usage:
#' x <- c("a", "a", "b", "c", "a")
#' y <- c("a", "b", "c")
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
#'
#' ## Note that missing values handling can make a large difference:
#' x <- c("a", "a", "b", "c", "a", NA)
#' y <- c("a", "b", "c", rep(NA, 20))
#' s_compare(x,
#'   .ref_group = y, .in_ref_col = FALSE,
#'   .var = "x", verbose = FALSE
#' )
#' s_compare(x,
#'   .ref_group = y, .in_ref_col = FALSE, .var = "x",
#'   na.rm = FALSE, verbose = FALSE
#' )
#'
#' @export
s_compare.character <- function(x, ...) {
  s_summary.character(
    x,
    compare_with_ref_group = TRUE,
    ...
  )
}

#' @describeIn compare_variables Method for `logical` class. A chi-squared test
#'   is used. If missing values are not removed, then they are counted as `FALSE`.
#'
#' @method s_compare logical
#'
#' @examples
#' # `s_compare.logical`
#'
#' ## Basic usage:
#' x <- c(TRUE, FALSE, TRUE, TRUE)
#' y <- c(FALSE, FALSE, TRUE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' ## Management of NA values.
#' x <- c(NA, TRUE, FALSE)
#' y <- c(NA, NA, NA, NA, FALSE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na_rm = TRUE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na_rm = FALSE)
#'
#' @export
s_compare.logical <- function(x, ...) {
  s_summary.logical(
    x = x,
    compare_with_ref_group = TRUE,
    ...
  )
}

#' @describeIn compare_variables Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... additional arguments passed to `s_compare()`, including:
#'   * `denom`: (`string`) choice of denominator. Options are `c("n", "N_col", "N_row")`. For factor variables, can
#'     only be `"n"` (number of values in this row and column intersection).
#'   * `.N_row`: (`numeric(1)`) Row-wise N (row group count) for the group of observations being analyzed (i.e. with no
#'     column-based subsetting).
#'   * `.N_col`: (`numeric(1)`) Column-wise N (column count) for the full column being tabulated within.
#'   * `verbose`: (`flag`) Whether additional warnings and messages should be printed. Mainly used to print out
#'     information about factor casting. Defaults to `TRUE`. Used for `character`/`factor` variables only.
#' @param .indent_mods (named `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `compare_vars()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_compare()` to the table layout.
#'
#' @examples
#' # `compare_vars()` in `rtables` pipelines
#'
#' ## Default output within a `rtables` pipeline.
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM B") %>%
#'   compare_vars(c("AGE", "SEX"))
#' build_table(lyt, tern_ex_adsl)
#'
#' ## Select and format statistics output.
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM C") %>%
#'   compare_vars(
#'     vars = "AGE",
#'     .stats = c("mean_sd", "pval"),
#'     .formats = c(mean_sd = "xx.x, xx.x"),
#'     .labels = c(mean_sd = "Mean, SD")
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' @export
#' @order 2
compare_vars <- function(lyt,
                         vars,
                         var_labels = vars,
                         na_str = default_na_str(),
                         nested = TRUE,
                         ...,
                         na_rm = TRUE,
                         show_labels = "default",
                         table_names = vars,
                         section_div = NA_character_,
                         .stats = c("n", "mean_sd", "count_fraction", "pval"),
                         .stat_names = NULL,
                         .formats = NULL,
                         .labels = NULL,
                         .indent_mods = NULL) {
  analyze_vars(
    lyt = lyt,
    compare_with_ref_group = TRUE,
    vars = vars,
    var_labels = var_labels,
    na_str = na_str,
    nested = nested,
    na_rm = na_rm,
    show_labels = show_labels,
    table_names = table_names,
    section_div = section_div,
    .stats = .stats,
    .stat_names = .stat_names,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    ...
  )
}

#' Class for `CombinationFunction`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' `CombinationFunction` is an S4 class which extends standard functions. These are special functions that
#' can be combined and negated with the logical operators.
#'
#' @param e1 (`CombinationFunction`)\cr left hand side of logical operator.
#' @param e2 (`CombinationFunction`)\cr right hand side of logical operator.
#' @param x (`CombinationFunction`)\cr the function which should be negated.
#'
#' @return A logical value indicating whether the left hand side of the equation equals the right hand side.
#'
#' @examples
#' higher <- function(a) {
#'   force(a)
#'   CombinationFunction(
#'     function(x) {
#'       x > a
#'     }
#'   )
#' }
#'
#' lower <- function(b) {
#'   force(b)
#'   CombinationFunction(
#'     function(x) {
#'       x < b
#'     }
#'   )
#' }
#'
#' c1 <- higher(5)
#' c2 <- lower(10)
#' c3 <- higher(5) & lower(10)
#' c3(7)
#'
#' @name combination_function
#' @aliases CombinationFunction-class
#' @exportClass CombinationFunction
#' @export CombinationFunction
CombinationFunction <- methods::setClass("CombinationFunction", contains = "function") # nolint

#' @describeIn combination_function Logical "AND" combination of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the two argument functions. The result
#'   is then the "AND" of the two individual results.
#'
#' @export
methods::setMethod(
  "&",
  signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
  definition = function(e1, e2) {
    CombinationFunction(function(...) {
      e1(...) && e2(...)
    })
  }
)

#' @describeIn combination_function Logical "OR" combination of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the two argument functions. The result
#'   is then the "OR" of the two individual results.
#'
#' @export
methods::setMethod(
  "|",
  signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
  definition = function(e1, e2) {
    CombinationFunction(function(...) {
      e1(...) || e2(...)
    })
  }
)

#' @describeIn combination_function Logical negation of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the original function. The result
#'   is then the opposite of this results.
#'
#' @export
methods::setMethod(
  "!",
  signature = c(x = "CombinationFunction"),
  definition = function(x) {
    CombinationFunction(function(...) {
      !x(...)
    })
  }
)

#' Control function for Cox-PH model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for Cox-PH model, typically used internally to specify
#' details of Cox-PH model for [s_coxph_pairwise()]. `conf_level` refers to Hazard Ratio estimation.
#'
#' @inheritParams argument_convention
#' @param pval_method (`string`)\cr p-value method for testing hazard ratio = 1.
#'   Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
#' @param ties (`string`)\cr string specifying the method for tie handling. Default is `"efron"`,
#'   can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()].
#'
#' @return A list of components with the same names as the arguments.
#'
#' @export
control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"),
                          ties = c("efron", "breslow", "exact"),
                          conf_level = 0.95) {
  pval_method <- match.arg(pval_method)
  ties <- match.arg(ties)
  assert_proportion_value(conf_level)

  list(pval_method = pval_method, ties = ties, conf_level = conf_level)
}

#' Control function for `survfit` models for survival time
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
#' details of `survfit` model for [s_surv_time()]. `conf_level` refers to survival time estimation.
#'
#' @inheritParams argument_convention
#' @param conf_type (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
#'   see more in [survival::survfit()]. Note option "none" is no longer supported.
#' @param quantiles (`numeric(2)`)\cr vector of length two specifying the quantiles of survival time.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @export
control_surv_time <- function(conf_level = 0.95,
                              conf_type = c("plain", "log", "log-log"),
                              quantiles = c(0.25, 0.75)) {
  conf_type <- match.arg(conf_type)
  checkmate::assert_numeric(quantiles, lower = 0, upper = 1, len = 2, unique = TRUE, sorted = TRUE)
  nullo <- lapply(quantiles, assert_proportion_value)
  assert_proportion_value(conf_level)
  list(conf_level = conf_level, conf_type = conf_type, quantiles = quantiles)
}

#' Control function for `survfit` models for patients' survival rate at time points
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
#' details of `survfit` model for [s_surv_timepoint()]. `conf_level` refers to patient risk estimation at a time point.
#'
#' @inheritParams argument_convention
#' @inheritParams control_surv_time
#'
#' @return A list of components with the same names as the arguments.
#'
#' @export
control_surv_timepoint <- function(conf_level = 0.95,
                                   conf_type = c("plain", "log", "log-log")) {
  conf_type <- match.arg(conf_type)
  assert_proportion_value(conf_level)
  list(
    conf_level = conf_level,
    conf_type = conf_type
  )
}

1		#' Helper function to create a new SMQ variable in ADAE by stacking SMQ and/or CQ records.
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper function to create a new SMQ variable in ADAE that consists of all adverse events belonging to
6		#' selected Standardized/Customized queries. The new dataset will only contain records of the adverse events
7		#' belonging to any of the selected baskets. Remember that `na_str` must match the needed pre-processing
8		#' done with [df_explicit_na()] to have the desired output.
9		#'
10		#' @inheritParams argument_convention
11		#' @param baskets (`character`)\cr variable names of the selected Standardized/Customized queries.
12		#' @param smq_varlabel (`string`)\cr a label for the new variable created.
13		#' @param keys (`character`)\cr names of the key variables to be returned along with the new variable created.
14		#' @param aag_summary (`data.frame`)\cr containing the SMQ baskets and the levels of interest for the final SMQ
15		#' variable. This is useful when there are some levels of interest that are not observed in the `df` dataset.
16		#' The two columns of this dataset should be named `basket` and `basket_name`.
17		#'
18		#' @return A `data.frame` with variables in `keys` taken from `df` and new variable SMQ containing
19		#' records belonging to the baskets selected via the `baskets` argument.
20		#'
21		#' @examples
22		#' adae <- tern_ex_adae[1:20, ] %>% df_explicit_na()
23		#' h_stack_by_baskets(df = adae)
24		#'
25		#' aag <- data.frame(
26		#' NAMVAR = c("CQ01NAM", "CQ02NAM", "SMQ01NAM", "SMQ02NAM"),
27		#' REFNAME = c(
28		#' "D.2.1.5.3/A.1.1.1.1 aesi", "X.9.9.9.9/Y.8.8.8.8 aesi",
29		#' "C.1.1.1.3/B.2.2.3.1 aesi", "C.1.1.1.3/B.3.3.3.3 aesi"
30		#' ),
31		#' SCOPE = c("", "", "BROAD", "BROAD"),
32		#' stringsAsFactors = FALSE
33		#' )
34		#'
35		#' basket_name <- character(nrow(aag))
36		#' cq_pos <- grep("^(CQ).+NAM$", aag$NAMVAR)
37		#' smq_pos <- grep("^(SMQ).+NAM$", aag$NAMVAR)
38		#' basket_name[cq_pos] <- aag$REFNAME[cq_pos]
39		#' basket_name[smq_pos] <- paste0(
40		#' aag$REFNAME[smq_pos], "(", aag$SCOPE[smq_pos], ")"
41		#' )
42		#'
43		#' aag_summary <- data.frame(
44		#' basket = aag$NAMVAR,
45		#' basket_name = basket_name,
46		#' stringsAsFactors = TRUE
47		#' )
48		#'
49		#' result <- h_stack_by_baskets(df = adae, aag_summary = aag_summary)
50		#' all(levels(aag_summary$basket_name) %in% levels(result$SMQ))
51		#'
52		#' h_stack_by_baskets(
53		#' df = adae,
54		#' aag_summary = NULL,
55		#' keys = c("STUDYID", "USUBJID", "AEDECOD", "ARM"),
56		#' baskets = "SMQ01NAM"
57		#' )
58		#'
59		#' @export
60		h_stack_by_baskets <- function(df,
61		baskets = grep("^(SMQ\|CQ).+NAM$", names(df), value = TRUE),
62		smq_varlabel = "Standardized MedDRA Query",
63		keys = c("STUDYID", "USUBJID", "ASTDTM", "AEDECOD", "AESEQ"),
64		aag_summary = NULL,
65		na_str = "<Missing>") {
66	5x	smq_nam <- baskets[startsWith(baskets, "SMQ")]
67		# SC corresponding to NAM
68	5x	smq_sc <- gsub(pattern = "NAM", replacement = "SC", x = smq_nam, fixed = TRUE)
69	5x	smq <- stats::setNames(smq_sc, smq_nam)
70
71	5x	checkmate::assert_character(baskets)
72	5x	checkmate::assert_string(smq_varlabel)
73	5x	checkmate::assert_data_frame(df)
74	5x	checkmate::assert_true(all(startsWith(baskets, "SMQ") \| startsWith(baskets, "CQ")))
75	4x	checkmate::assert_true(all(endsWith(baskets, "NAM")))
76	3x	checkmate::assert_subset(baskets, names(df))
77	3x	checkmate::assert_subset(keys, names(df))
78	3x	checkmate::assert_subset(smq_sc, names(df))
79	3x	checkmate::assert_string(na_str)
80
81	3x	if (!is.null(aag_summary)) {
82	1x	assert_df_with_variables(
83	1x	df = aag_summary,
84	1x	variables = list(val = c("basket", "basket_name"))
85		)
86		# Warning in case there is no match between `aag_summary$basket` and `baskets` argument.
87		# Honestly, I think those should completely match. Target baskets should be the same.
88	1x	if (length(intersect(baskets, unique(aag_summary$basket))) == 0) {
89	!	warning("There are 0 baskets in common between aag_summary$basket and `baskets` argument.")
90		}
91		}
92
93	3x	var_labels <- c(formatters::var_labels(df[, keys]), "SMQ" = smq_varlabel)
94
95		# convert `na_str` records from baskets to NA for the later loop and from wide to long steps
96	3x	df[, c(baskets, smq_sc)][df[, c(baskets, smq_sc)] == na_str] <- NA
97
98	3x	if (all(is.na(df[, baskets]))) { # in case there is no level for the target baskets
99	1x	df_long <- df[-seq_len(nrow(df)), keys] # we just need an empty data frame keeping all factor levels
100		} else {
101		# Concatenate SMQxxxNAM with corresponding SMQxxxSC
102	2x	df_cnct <- df[, c(keys, baskets[startsWith(baskets, "CQ")])]
103
104	2x	for (nam in names(smq)) {
105	4x	sc <- smq[nam] # SMQxxxSC corresponding to SMQxxxNAM
106	4x	nam_notna <- !is.na(df[[nam]])
107	4x	new_colname <- paste(nam, sc, sep = "_")
108	4x	df_cnct[nam_notna, new_colname] <- paste0(df[[nam]], "(", df[[sc]], ")")[nam_notna]
109		}
110
111	2x	df_cnct$unique_id <- seq(1, nrow(df_cnct))
112	2x	var_cols <- names(df_cnct)[!(names(df_cnct) %in% c(keys, "unique_id"))]
113		# have to convert df_cnct from tibble to data frame
114		# as it throws a warning otherwise about rownames.
115		# tibble do not support rownames and reshape creates rownames
116
117	2x	df_long <- stats::reshape(
118	2x	data = as.data.frame(df_cnct),
119	2x	varying = var_cols,
120	2x	v.names = "SMQ",
121	2x	idvar = names(df_cnct)[names(df_cnct) %in% c(keys, "unique_id")],
122	2x	direction = "long",
123	2x	new.row.names = seq(prod(length(var_cols), nrow(df_cnct)))
124		)
125
126	2x	df_long <- df_long[!is.na(df_long[, "SMQ"]), !(names(df_long) %in% c("time", "unique_id"))]
127	2x	df_long$SMQ <- as.factor(df_long$SMQ)
128		}
129
130	3x	smq_levels <- setdiff(levels(df_long[["SMQ"]]), na_str)
131
132	3x	if (!is.null(aag_summary)) {
133		# A warning in case there is no match between df and aag_summary records
134	1x	if (length(intersect(smq_levels, unique(aag_summary$basket_name))) == 0) {
135	1x	warning("There are 0 basket levels in common between aag_summary$basket_name and df.")
136		}
137	1x	df_long[["SMQ"]] <- factor(
138	1x	df_long[["SMQ"]],
139	1x	levels = sort(
140	1x	c(
141	1x	smq_levels,
142	1x	setdiff(unique(aag_summary$basket_name), smq_levels)
143		)
144		)
145		)
146		} else {
147	2x	all_na_basket_flag <- vapply(df[, baskets], function(x) {
148	6x	all(is.na(x))
149	2x	}, FUN.VALUE = logical(1))
150	2x	all_na_basket <- baskets[all_na_basket_flag]
151
152	2x	df_long[["SMQ"]] <- factor(
153	2x	df_long[["SMQ"]],
154	2x	levels = sort(c(smq_levels, all_na_basket))
155		)
156		}
157	3x	formatters::var_labels(df_long) <- var_labels
158	3x	tibble::tibble(df_long)
159		}

1		#' Control function for Cox regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Sets a list of parameters for Cox regression fit. Used internally.
6		#'
7		#' @inheritParams argument_convention
8		#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
9		#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied
10		#' treatment and candidate covariate. Note that for univariate models without treatment arm, and
11		#' multivariate models, no interaction can be used so that this needs to be `FALSE`.
12		#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`,
13		#' see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R.
14		#'
15		#' @return A `list` of items with names corresponding to the arguments.
16		#'
17		#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()].
18		#'
19		#' @examples
20		#' control_coxreg()
21		#'
22		#' @export
23		control_coxreg <- function(pval_method = c("wald", "likelihood"),
24		ties = c("exact", "efron", "breslow"),
25		conf_level = 0.95,
26		interaction = FALSE) {
27	55x	pval_method <- match.arg(pval_method)
28	55x	ties <- match.arg(ties)
29	55x	checkmate::assert_flag(interaction)
30	55x	assert_proportion_value(conf_level)
31	55x	list(
32	55x	pval_method = pval_method,
33	55x	ties = ties,
34	55x	conf_level = conf_level,
35	55x	interaction = interaction
36		)
37		}
38
39		#' Custom tidy methods for Cox regression
40		#'
41		#' @description `r lifecycle::badge("stable")`
42		#'
43		#' @inheritParams argument_convention
44		#' @param x (`list`)\cr result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models)
45		#' or [fit_coxreg_multivar()] (for multivariate models).
46		#'
47		#' @return [broom::tidy()] returns:
48		#' * For `summary.coxph` objects, a `data.frame` with columns: `Pr(>\|z\|)`, `exp(coef)`, `exp(-coef)`, `lower .95`,
49		#' `upper .95`, `level`, and `n`.
50		#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`,
51		#' `lcl`, `ucl`, `pval`, and `ci`.
52		#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`,
53		#' `level`, and `ci`.
54		#'
55		#' @seealso [cox_regression]
56		#'
57		#' @name tidy_coxreg
58		NULL
59
60		#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results.
61		#'
62		#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results.
63		#'
64		#' @method tidy summary.coxph
65		#'
66		#' @examples
67		#' library(survival)
68		#' library(broom)
69		#'
70		#' set.seed(1, kind = "Mersenne-Twister")
71		#'
72		#' dta_bladder <- with(
73		#' data = bladder[bladder$enum < 5, ],
74		#' data.frame(
75		#' time = stop,
76		#' status = event,
77		#' armcd = as.factor(rx),
78		#' covar1 = as.factor(enum),
79		#' covar2 = factor(
80		#' sample(as.factor(enum)),
81		#' levels = 1:4, labels = c("F", "F", "M", "M")
82		#' )
83		#' )
84		#' )
85		#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
86		#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
87		#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
88		#'
89		#' formula <- "survival::Surv(time, status) ~ armcd + covar1"
90		#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder))
91		#' tidy(msum)
92		#'
93		#' @export
94		tidy.summary.coxph <- function(x, # nolint
95		...) {
96	199x	checkmate::assert_class(x, "summary.coxph")
97	199x	pval <- x$coefficients
98	199x	confint <- x$conf.int
99	199x	levels <- rownames(pval)
100
101	199x	pval <- tibble::as_tibble(pval)
102	199x	confint <- tibble::as_tibble(confint)
103
104	199x	ret <- cbind(pval[, grepl("Pr", names(pval))], confint)
105	199x	ret$level <- levels
106	199x	ret$n <- x[["n"]]
107	199x	ret
108		}
109
110		#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression.
111		#'
112		#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()].
113		#'
114		#' @method tidy coxreg.univar
115		#'
116		#' @examples
117		#' ## Cox regression: arm + 1 covariate.
118		#' mod1 <- fit_coxreg_univar(
119		#' variables = list(
120		#' time = "time", event = "status", arm = "armcd",
121		#' covariates = "covar1"
122		#' ),
123		#' data = dta_bladder,
124		#' control = control_coxreg(conf_level = 0.91)
125		#' )
126		#'
127		#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
128		#' mod2 <- fit_coxreg_univar(
129		#' variables = list(
130		#' time = "time", event = "status", arm = "armcd",
131		#' covariates = c("covar1", "covar2")
132		#' ),
133		#' data = dta_bladder,
134		#' control = control_coxreg(conf_level = 0.91, interaction = TRUE)
135		#' )
136		#'
137		#' tidy(mod1)
138		#' tidy(mod2)
139		#'
140		#' @export
141		tidy.coxreg.univar <- function(x, # nolint
142		...) {
143	38x	checkmate::assert_class(x, "coxreg.univar")
144	38x	mod <- x$mod
145	38x	vars <- c(x$vars$arm, x$vars$covariates)
146	38x	has_arm <- "arm" %in% names(x$vars)
147
148	38x	result <- if (!has_arm) {
149	5x	Map(
150	5x	mod = mod, vars = vars,
151	5x	f = function(mod, vars) {
152	6x	h_coxreg_multivar_extract(
153	6x	var = vars,
154	6x	data = x$data,
155	6x	mod = mod,
156	6x	control = x$control
157		)
158		}
159		)
160	38x	} else if (x$control$interaction) {
161	12x	Map(
162	12x	mod = mod, covar = vars,
163	12x	f = function(mod, covar) {
164	26x	h_coxreg_extract_interaction(
165	26x	effect = x$vars$arm, covar = covar, mod = mod, data = x$data,
166	26x	at = x$at, control = x$control
167		)
168		}
169		)
170		} else {
171	21x	Map(
172	21x	mod = mod, vars = vars,
173	21x	f = function(mod, vars) {
174	53x	h_coxreg_univar_extract(
175	53x	effect = x$vars$arm, covar = vars, data = x$data, mod = mod,
176	53x	control = x$control
177		)
178		}
179		)
180		}
181	38x	result <- do.call(rbind, result)
182
183	38x	result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
184	38x	result$n <- lapply(result$n, empty_vector_if_na)
185	38x	result$ci <- lapply(result$ci, empty_vector_if_na)
186	38x	result$hr <- lapply(result$hr, empty_vector_if_na)
187	38x	if (x$control$interaction) {
188	12x	result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na)
189		# Remove interaction p-values due to change in specifications.
190	12x	result$pval[result$effect != "Treatment:"] <- NA
191		}
192	38x	result$pval <- lapply(result$pval, empty_vector_if_na)
193	38x	attr(result, "conf_level") <- x$control$conf_level
194	38x	result
195		}
196
197		#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression.
198		#'
199		#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()].
200		#'
201		#' @method tidy coxreg.multivar
202		#'
203		#' @examples
204		#' multivar_model <- fit_coxreg_multivar(
205		#' variables = list(
206		#' time = "time", event = "status", arm = "armcd",
207		#' covariates = c("covar1", "covar2")
208		#' ),
209		#' data = dta_bladder
210		#' )
211		#' broom::tidy(multivar_model)
212		#'
213		#' @export
214		tidy.coxreg.multivar <- function(x, # nolint
215		...) {
216	16x	checkmate::assert_class(x, "coxreg.multivar")
217	16x	vars <- c(x$vars$arm, x$vars$covariates)
218
219		# Convert the model summaries to data.
220	16x	result <- Map(
221	16x	vars = vars,
222	16x	f = function(vars) {
223	60x	h_coxreg_multivar_extract(
224	60x	var = vars, data = x$data,
225	60x	mod = x$mod, control = x$control
226		)
227		}
228		)
229	16x	result <- do.call(rbind, result)
230
231	16x	result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
232	16x	result$ci <- lapply(result$ci, empty_vector_if_na)
233	16x	result$hr <- lapply(result$hr, empty_vector_if_na)
234	16x	result$pval <- lapply(result$pval, empty_vector_if_na)
235	16x	result <- result[, names(result) != "n"]
236	16x	attr(result, "conf_level") <- x$control$conf_level
237
238	16x	result
239		}
240
241		#' Fitting functions for Cox proportional hazards regression
242		#'
243		#' @description `r lifecycle::badge("stable")`
244		#'
245		#' Fitting functions for univariate and multivariate Cox regression models.
246		#'
247		#' @param variables (named `list`)\cr the names of the variables found in `data`, passed as a named list and
248		#' corresponding to the `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from
249		#' `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect
250		#' estimates will be tabulated later.
251		#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models.
252		#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify
253		#' the value of the covariate at which the effect should be estimated.
254		#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()].
255		#'
256		#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression].
257		#'
258		#' @examples
259		#' library(survival)
260		#'
261		#' set.seed(1, kind = "Mersenne-Twister")
262		#'
263		#' # Testing dataset [survival::bladder].
264		#' dta_bladder <- with(
265		#' data = bladder[bladder$enum < 5, ],
266		#' data.frame(
267		#' time = stop,
268		#' status = event,
269		#' armcd = as.factor(rx),
270		#' covar1 = as.factor(enum),
271		#' covar2 = factor(
272		#' sample(as.factor(enum)),
273		#' levels = 1:4, labels = c("F", "F", "M", "M")
274		#' )
275		#' )
276		#' )
277		#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
278		#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
279		#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
280		#'
281		#' plot(
282		#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
283		#' lty = 2:4,
284		#' xlab = "Months",
285		#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
286		#' )
287		#'
288		#' @name fit_coxreg
289		NULL
290
291		#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs.
292		#'
293		#' @return
294		#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list`
295		#' with 5 elements:
296		#' * `mod`: Cox regression models fitted by [survival::coxph()].
297		#' * `data`: The original data frame input.
298		#' * `control`: The original control input.
299		#' * `vars`: The variables used in the model.
300		#' * `at`: Value of the covariate at which the effect should be estimated.
301		#'
302		#' @note When using `fit_coxreg_univar` there should be two study arms.
303		#'
304		#' @examples
305		#' # fit_coxreg_univar
306		#'
307		#' ## Cox regression: arm + 1 covariate.
308		#' mod1 <- fit_coxreg_univar(
309		#' variables = list(
310		#' time = "time", event = "status", arm = "armcd",
311		#' covariates = "covar1"
312		#' ),
313		#' data = dta_bladder,
314		#' control = control_coxreg(conf_level = 0.91)
315		#' )
316		#'
317		#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
318		#' mod2 <- fit_coxreg_univar(
319		#' variables = list(
320		#' time = "time", event = "status", arm = "armcd",
321		#' covariates = c("covar1", "covar2")
322		#' ),
323		#' data = dta_bladder,
324		#' control = control_coxreg(conf_level = 0.91, interaction = TRUE)
325		#' )
326		#'
327		#' ## Cox regression: arm + 1 covariate, stratified analysis.
328		#' mod3 <- fit_coxreg_univar(
329		#' variables = list(
330		#' time = "time", event = "status", arm = "armcd", strata = "covar2",
331		#' covariates = c("covar1")
332		#' ),
333		#' data = dta_bladder,
334		#' control = control_coxreg(conf_level = 0.91)
335		#' )
336		#'
337		#' ## Cox regression: no arm, only covariates.
338		#' mod4 <- fit_coxreg_univar(
339		#' variables = list(
340		#' time = "time", event = "status",
341		#' covariates = c("covar1", "covar2")
342		#' ),
343		#' data = dta_bladder
344		#' )
345		#'
346		#' @export
347		fit_coxreg_univar <- function(variables,
348		data,
349		at = list(),
350		control = control_coxreg()) {
351	43x	checkmate::assert_list(variables, names = "named")
352	43x	has_arm <- "arm" %in% names(variables)
353	43x	arm_name <- if (has_arm) "arm" else NULL
354
355	43x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
356
357	43x	assert_df_with_variables(data, variables)
358	43x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
359
360	43x	if (!is.null(variables$strata)) {
361	4x	checkmate::assert_disjunct(control$pval_method, "likelihood")
362		}
363	42x	if (has_arm) {
364	36x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
365		}
366	41x	vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE)
367	41x	for (i in vars) {
368	94x	if (is.factor(data[[i]])) {
369	82x	attr(data[[i]], "levels") <- levels(droplevels(data[[i]]))
370		}
371		}
372	41x	forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction)
373	41x	mod <- lapply(
374	41x	forms, function(x) {
375	90x	survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties)
376		}
377		)
378	41x	structure(
379	41x	list(
380	41x	mod = mod,
381	41x	data = data,
382	41x	control = control,
383	41x	vars = variables,
384	41x	at = at
385		),
386	41x	class = "coxreg.univar"
387		)
388		}
389
390		#' @describeIn fit_coxreg Fit a multivariate Cox regression model.
391		#'
392		#' @return
393		#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list
394		#' with 4 elements:
395		#' * `mod`: Cox regression model fitted by [survival::coxph()].
396		#' * `data`: The original data frame input.
397		#' * `control`: The original control input.
398		#' * `vars`: The variables used in the model.
399		#'
400		#' @examples
401		#' # fit_coxreg_multivar
402		#'
403		#' ## Cox regression: multivariate Cox regression.
404		#' multivar_model <- fit_coxreg_multivar(
405		#' variables = list(
406		#' time = "time", event = "status", arm = "armcd",
407		#' covariates = c("covar1", "covar2")
408		#' ),
409		#' data = dta_bladder
410		#' )
411		#'
412		#' # Example without treatment arm.
413		#' multivar_covs_model <- fit_coxreg_multivar(
414		#' variables = list(
415		#' time = "time", event = "status",
416		#' covariates = c("covar1", "covar2")
417		#' ),
418		#' data = dta_bladder
419		#' )
420		#'
421		#' @export
422		fit_coxreg_multivar <- function(variables,
423		data,
424		control = control_coxreg()) {
425	83x	checkmate::assert_list(variables, names = "named")
426	83x	has_arm <- "arm" %in% names(variables)
427	83x	arm_name <- if (has_arm) "arm" else NULL
428
429	83x	if (!is.null(variables$covariates)) {
430	21x	checkmate::assert_character(variables$covariates)
431		}
432
433	83x	checkmate::assert_false(control$interaction)
434	83x	assert_df_with_variables(data, variables)
435	83x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
436
437	83x	if (!is.null(variables$strata)) {
438	3x	checkmate::assert_disjunct(control$pval_method, "likelihood")
439		}
440
441	82x	form <- h_coxreg_multivar_formula(variables)
442	82x	mod <- survival::coxph(
443	82x	formula = stats::as.formula(form),
444	82x	data = data,
445	82x	ties = control$ties
446		)
447	82x	structure(
448	82x	list(
449	82x	mod = mod,
450	82x	data = data,
451	82x	control = control,
452	82x	vars = variables
453		),
454	82x	class = "coxreg.multivar"
455		)
456		}
457
458		#' Muffled `car::Anova`
459		#'
460		#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when
461		#' present, this function deliberately muffles this message.
462		#'
463		#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
464		#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
465		#'
466		#' @return The output of [car::Anova()], with convergence message muffled.
467		#'
468		#' @keywords internal
469		muffled_car_anova <- function(mod, test_statistic) {
470	219x	tryCatch(
471	219x	withCallingHandlers(
472	219x	expr = {
473	219x	car::Anova(
474	219x	mod,
475	219x	test.statistic = test_statistic,
476	219x	type = "III"
477		)
478		},
479	219x	message = function(m) invokeRestart("muffleMessage"),
480	219x	error = function(e) {
481	1x	stop(paste(
482	1x	"the model seems to have convergence problems, please try to change",
483	1x	"the configuration of covariates or strata variables, e.g.",
484	1x	"- original error:", e
485		))
486		}
487		)
488		)
489		}

1		#' Multivariate logistic regression table
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
6		#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
7		#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
8		#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
9		#' category or specified values and corresponding Wald confidence intervals as default but allow user
10		#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
11		#' that covariate has no effect on response in model containing all specified covariates.
12		#' Allow option to include one two-way interaction and present similar output for
13		#' each interaction degree of freedom.
14		#'
15		#' @inheritParams argument_convention
16		#' @param drop_and_remove_str (`string`)\cr string to be dropped and removed.
17		#'
18		#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
19		#' Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
20		#'
21		#' @note For the formula, the variable names need to be standard `data.frame` column names without
22		#' special characters.
23		#'
24		#' @examples
25		#' library(dplyr)
26		#' library(broom)
27		#'
28		#' adrs_f <- tern_ex_adrs %>%
29		#' filter(PARAMCD == "BESRSPI") %>%
30		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
31		#' mutate(
32		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
33		#' RACE = factor(RACE),
34		#' SEX = factor(SEX)
35		#' )
36		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
37		#' mod1 <- fit_logistic(
38		#' data = adrs_f,
39		#' variables = list(
40		#' response = "Response",
41		#' arm = "ARMCD",
42		#' covariates = c("AGE", "RACE")
43		#' )
44		#' )
45		#' mod2 <- fit_logistic(
46		#' data = adrs_f,
47		#' variables = list(
48		#' response = "Response",
49		#' arm = "ARMCD",
50		#' covariates = c("AGE", "RACE"),
51		#' interaction = "AGE"
52		#' )
53		#' )
54		#'
55		#' df <- tidy(mod1, conf_level = 0.99)
56		#' df2 <- tidy(mod2, conf_level = 0.99)
57		#'
58		#' # flagging empty strings with "_"
59		#' df <- df_explicit_na(df, na_level = "_")
60		#' df2 <- df_explicit_na(df2, na_level = "_")
61		#'
62		#' result1 <- basic_table() %>%
63		#' summarize_logistic(
64		#' conf_level = 0.95,
65		#' drop_and_remove_str = "_"
66		#' ) %>%
67		#' build_table(df = df)
68		#' result1
69		#'
70		#' result2 <- basic_table() %>%
71		#' summarize_logistic(
72		#' conf_level = 0.95,
73		#' drop_and_remove_str = "_"
74		#' ) %>%
75		#' build_table(df = df2)
76		#' result2
77		#'
78		#' @export
79		#' @order 1
80		summarize_logistic <- function(lyt,
81		conf_level,
82		drop_and_remove_str = "",
83		.indent_mods = NULL) {
84		# checks
85	3x	checkmate::assert_string(drop_and_remove_str)
86
87	3x	sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
88	3x	sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods)
89	3x	sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods)
90	3x	split_fun <- drop_and_remove_levels(drop_and_remove_str)
91
92	3x	lyt <- logistic_regression_cols(lyt, conf_level = conf_level)
93	3x	lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun)
94	3x	lyt <- sum_logistic_variable_test(lyt)
95	3x	lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun)
96	3x	lyt <- sum_logistic_term_estimates(lyt)
97	3x	lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun)
98	3x	lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun)
99	3x	lyt <- sum_logistic_odds_ratios(lyt)
100	3x	lyt
101		}
102
103		#' Fit for logistic regression
104		#'
105		#' @description `r lifecycle::badge("stable")`
106		#'
107		#' Fit a (conditional) logistic regression model.
108		#'
109		#' @inheritParams argument_convention
110		#' @param data (`data.frame`)\cr the data frame on which the model was fit.
111		#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
112		#' This will be used when fitting the (conditional) logistic regression model on the left hand
113		#' side of the formula.
114		#'
115		#' @return A fitted logistic regression model.
116		#'
117		#' @section Model Specification:
118		#'
119		#' The `variables` list needs to include the following elements:
120		#' * `arm`: Treatment arm variable name.
121		#' * `response`: The response arm variable name. Usually this is a 0/1 variable.
122		#' * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
123		#' * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
124		#' included in `covariates`. Then the interaction with the treatment arm is included in the model.
125		#'
126		#' @examples
127		#' library(dplyr)
128		#'
129		#' adrs_f <- tern_ex_adrs %>%
130		#' filter(PARAMCD == "BESRSPI") %>%
131		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
132		#' mutate(
133		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
134		#' RACE = factor(RACE),
135		#' SEX = factor(SEX)
136		#' )
137		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
138		#' mod1 <- fit_logistic(
139		#' data = adrs_f,
140		#' variables = list(
141		#' response = "Response",
142		#' arm = "ARMCD",
143		#' covariates = c("AGE", "RACE")
144		#' )
145		#' )
146		#' mod2 <- fit_logistic(
147		#' data = adrs_f,
148		#' variables = list(
149		#' response = "Response",
150		#' arm = "ARMCD",
151		#' covariates = c("AGE", "RACE"),
152		#' interaction = "AGE"
153		#' )
154		#' )
155		#'
156		#' @export
157		fit_logistic <- function(data,
158		variables = list(
159		response = "Response",
160		arm = "ARMCD",
161		covariates = NULL,
162		interaction = NULL,
163		strata = NULL
164		),
165		response_definition = "response") {
166	75x	assert_df_with_variables(data, variables)
167	75x	checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
168	75x	checkmate::assert_string(response_definition)
169	75x	checkmate::assert_true(grepl("response", response_definition))
170
171	75x	response_definition <- sub(
172	75x	pattern = "response",
173	75x	replacement = variables$response,
174	75x	x = response_definition,
175	75x	fixed = TRUE
176		)
177	75x	form <- paste0(response_definition, " ~ ", variables$arm)
178	75x	if (!is.null(variables$covariates)) {
179	29x	form <- paste0(form, " + ", paste(variables$covariates, collapse = " + "))
180		}
181	75x	if (!is.null(variables$interaction)) {
182	18x	checkmate::assert_string(variables$interaction)
183	18x	checkmate::assert_subset(variables$interaction, variables$covariates)
184	18x	form <- paste0(form, " + ", variables$arm, ":", variables$interaction)
185		}
186	75x	if (!is.null(variables$strata)) {
187	14x	strata_arg <- if (length(variables$strata) > 1) {
188	7x	paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
189		} else {
190	7x	variables$strata
191		}
192	14x	form <- paste0(form, "+ strata(", strata_arg, ")")
193		}
194	75x	formula <- stats::as.formula(form)
195	75x	if (is.null(variables$strata)) {
196	61x	stats::glm(
197	61x	formula = formula,
198	61x	data = data,
199	61x	family = stats::binomial("logit")
200		)
201		} else {
202	14x	clogit_with_tryCatch(
203	14x	formula = formula,
204	14x	data = data,
205	14x	x = TRUE
206		)
207		}
208		}
209
210		#' Custom tidy method for binomial GLM results
211		#'
212		#' @description `r lifecycle::badge("stable")`
213		#'
214		#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
215		#' with `binomial` family.
216		#'
217		#' @inheritParams argument_convention
218		#' @param at (`numeric` or `NULL`)\cr optional values for the interaction variable. Otherwise the median is used.
219		#' @param x (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family.
220		#'
221		#' @return A `data.frame` containing the tidied model.
222		#'
223		#' @method tidy glm
224		#'
225		#' @seealso [h_logistic_regression] for relevant helper functions.
226		#'
227		#' @examples
228		#' library(dplyr)
229		#' library(broom)
230		#'
231		#' adrs_f <- tern_ex_adrs %>%
232		#' filter(PARAMCD == "BESRSPI") %>%
233		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
234		#' mutate(
235		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
236		#' RACE = factor(RACE),
237		#' SEX = factor(SEX)
238		#' )
239		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
240		#' mod1 <- fit_logistic(
241		#' data = adrs_f,
242		#' variables = list(
243		#' response = "Response",
244		#' arm = "ARMCD",
245		#' covariates = c("AGE", "RACE")
246		#' )
247		#' )
248		#' mod2 <- fit_logistic(
249		#' data = adrs_f,
250		#' variables = list(
251		#' response = "Response",
252		#' arm = "ARMCD",
253		#' covariates = c("AGE", "RACE"),
254		#' interaction = "AGE"
255		#' )
256		#' )
257		#'
258		#' df <- tidy(mod1, conf_level = 0.99)
259		#' df2 <- tidy(mod2, conf_level = 0.99)
260		#'
261		#' @export
262		tidy.glm <- function(x, # nolint
263		conf_level = 0.95,
264		at = NULL,
265		...) {
266	5x	checkmate::assert_class(x, "glm")
267	5x	checkmate::assert_set_equal(x$family$family, "binomial")
268
269	5x	terms_name <- attr(stats::terms(x), "term.labels")
270	5x	xs_class <- attr(x$terms, "dataClasses")
271	5x	interaction <- terms_name[which(!terms_name %in% names(xs_class))]
272	5x	df <- if (length(interaction) == 0) {
273	2x	h_logistic_simple_terms(
274	2x	x = terms_name,
275	2x	fit_glm = x,
276	2x	conf_level = conf_level
277		)
278		} else {
279	3x	h_logistic_inter_terms(
280	3x	x = terms_name,
281	3x	fit_glm = x,
282	3x	conf_level = conf_level,
283	3x	at = at
284		)
285		}
286	5x	for (var in c("variable", "term", "interaction", "reference")) {
287	20x	df[[var]] <- factor(df[[var]], levels = unique(df[[var]]))
288		}
289	5x	df
290		}
291
292		#' Logistic regression multivariate column layout function
293		#'
294		#' @description `r lifecycle::badge("stable")`
295		#'
296		#' Layout-creating function which creates a multivariate column layout summarizing logistic
297		#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
298		#'
299		#' @inheritParams argument_convention
300		#'
301		#' @return A layout object suitable for passing to further layouting functions. Adding this
302		#' function to an `rtable` layout will split the table into columns corresponding to
303		#' statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
304		#'
305		#' @export
306		logistic_regression_cols <- function(lyt,
307		conf_level = 0.95) {
308	4x	vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
309	4x	var_labels <- c(
310	4x	df = "Degrees of Freedom",
311	4x	estimate = "Parameter Estimate",
312	4x	std_error = "Standard Error",
313	4x	odds_ratio = "Odds Ratio",
314	4x	ci = paste("Wald", f_conf_level(conf_level)),
315	4x	pvalue = "p-value"
316		)
317	4x	split_cols_by_multivar(
318	4x	lyt = lyt,
319	4x	vars = vars,
320	4x	varlabels = var_labels
321		)
322		}
323
324		#' Logistic regression summary table
325		#'
326		#' @description `r lifecycle::badge("stable")`
327		#'
328		#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
329		#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
330		#'
331		#' @inheritParams argument_convention
332		#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
333		#' content function.
334		#'
335		#' @return A content function.
336		#'
337		#' @export
338		logistic_summary_by_flag <- function(flag_var, na_str = default_na_str(), .indent_mods = NULL) {
339	10x	checkmate::assert_string(flag_var)
340	10x	function(lyt) {
341	10x	cfun_list <- list(
342	10x	df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods),
343	10x	estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
344	10x	std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
345	10x	odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods),
346	10x	ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods),
347	10x	pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx \| (<0.0001)", .indent_mods = .indent_mods)
348		)
349	10x	summarize_row_groups(
350	10x	lyt = lyt,
351	10x	cfun = cfun_list,
352	10x	na_str = na_str
353		)
354		}
355		}

1		# Utility functions to cooperate with {rtables} package
2
3		#' Convert table into matrix of strings
4		#'
5		#' @description `r lifecycle::badge("stable")`
6		#'
7		#' Helper function to use mostly within tests. `with_spaces`parameter allows
8		#' to test not only for content but also indentation and table structure.
9		#' `print_txt_to_copy` instead facilitate the testing development by returning a well
10		#' formatted text that needs only to be copied and pasted in the expected output.
11		#'
12		#' @inheritParams formatters::toString
13		#' @param x (`VTableTree`)\cr `rtables` table object.
14		#' @param with_spaces (`flag`)\cr whether the tested table should keep the indentation and other relevant spaces.
15		#' @param print_txt_to_copy (`flag`)\cr utility to have a way to copy the input table directly
16		#' into the expected variable instead of copying it too manually.
17		#'
18		#' @return A `matrix` of `string`s. If `print_txt_to_copy = TRUE` the well formatted printout of the
19		#' table will be printed to console, ready to be copied as a expected value.
20		#'
21		#' @examples
22		#' tbl <- basic_table() %>%
23		#' split_rows_by("SEX") %>%
24		#' split_cols_by("ARM") %>%
25		#' analyze("AGE") %>%
26		#' build_table(tern_ex_adsl)
27		#'
28		#' to_string_matrix(tbl, widths = ceiling(propose_column_widths(tbl) / 2))
29		#'
30		#' @export
31		to_string_matrix <- function(x, widths = NULL, max_width = NULL,
32		hsep = formatters::default_hsep(),
33		with_spaces = TRUE, print_txt_to_copy = FALSE) {
34	11x	checkmate::assert_flag(with_spaces)
35	11x	checkmate::assert_flag(print_txt_to_copy)
36	11x	checkmate::assert_int(max_width, null.ok = TRUE)
37
38	11x	if (inherits(x, "MatrixPrintForm")) {
39	!	tx <- x
40		} else {
41	11x	tx <- matrix_form(x, TRUE)
42		}
43
44	11x	tf_wrap <- FALSE
45	11x	if (!is.null(max_width)) {
46	!	tf_wrap <- TRUE
47		}
48
49		# Producing the matrix to test
50	11x	if (with_spaces) {
51	2x	out <- strsplit(toString(tx, widths = widths, tf_wrap = tf_wrap, max_width = max_width, hsep = hsep), "\n")[[1]]
52		} else {
53	9x	out <- tx$strings
54		}
55
56		# Printing to console formatted output that needs to be copied in "expected"
57	11x	if (print_txt_to_copy) {
58	2x	out_tmp <- out
59	2x	if (!with_spaces) {
60	1x	out_tmp <- apply(out, 1, paste0, collapse = '", "')
61		}
62	2x	cat(paste0('c(\n "', paste0(out_tmp, collapse = '",\n "'), '"\n)'))
63		}
64
65		# Return values
66	11x	out
67		}
68
69		#' Blank for missing input
70		#'
71		#' Helper function to use in tabulating model results.
72		#'
73		#' @param x (`vector`)\cr input for a cell.
74		#'
75		#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise
76		#' the unlisted version of `x`.
77		#'
78		#' @keywords internal
79		unlist_and_blank_na <- function(x) {
80	267x	unl <- unlist(x)
81	267x	if (all(is.na(unl))) {
82	161x	character()
83		} else {
84	106x	unl
85		}
86		}
87
88		#' Constructor for content functions given a data frame with flag input
89		#'
90		#' This can be useful for tabulating model results.
91		#'
92		#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the
93		#' content function.
94		#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned.
95		#' @param format (`string`)\cr `rtables` format to use.
96		#'
97		#' @return A content function which gives `df$analysis_var` at the row identified by
98		#' `.df_row$flag` in the given format.
99		#'
100		#' @keywords internal
101		cfun_by_flag <- function(analysis_var,
102		flag_var,
103		format = "xx",
104		.indent_mods = NULL) {
105	61x	checkmate::assert_string(analysis_var)
106	61x	checkmate::assert_string(flag_var)
107	61x	function(df, labelstr) {
108	265x	row_index <- which(df[[flag_var]])
109	265x	x <- unlist_and_blank_na(df[[analysis_var]][row_index])
110	265x	formatters::with_label(
111	265x	rcell(x, format = format, indent_mod = .indent_mods),
112	265x	labelstr
113		)
114		}
115		}
116
117		#' Content row function to add row total to labels
118		#'
119		#' This takes the label of the latest row split level and adds the row total from `df` in parentheses.
120		#' This function differs from [c_label_n_alt()] by taking row counts from `df` rather than
121		#' `alt_counts_df`, and is used by [add_rowcounts()] when `alt_counts` is set to `FALSE`.
122		#'
123		#' @inheritParams argument_convention
124		#'
125		#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
126		#'
127		#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because
128		#' the former is already split by columns and will refer to the first column of the data only.
129		#'
130		#' @seealso [c_label_n_alt()] which performs the same function but retrieves row counts from
131		#' `alt_counts_df` instead of `df`.
132		#'
133		#' @keywords internal
134		c_label_n <- function(df,
135		labelstr,
136		.N_row) { # nolint
137	273x	label <- paste0(labelstr, " (N=", .N_row, ")")
138	273x	in_rows(
139	273x	.list = list(row_count = formatters::with_label(c(.N_row, .N_row), label)),
140	273x	.formats = c(row_count = function(x, ...) "")
141		)
142		}
143
144		#' Content row function to add `alt_counts_df` row total to labels
145		#'
146		#' This takes the label of the latest row split level and adds the row total from `alt_counts_df`
147		#' in parentheses. This function differs from [c_label_n()] by taking row counts from `alt_counts_df`
148		#' rather than `df`, and is used by [add_rowcounts()] when `alt_counts` is set to `TRUE`.
149		#'
150		#' @inheritParams argument_convention
151		#'
152		#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
153		#'
154		#' @seealso [c_label_n()] which performs the same function but retrieves row counts from `df` instead
155		#' of `alt_counts_df`.
156		#'
157		#' @keywords internal
158		c_label_n_alt <- function(df,
159		labelstr,
160		.alt_df_row) {
161	7x	N_row_alt <- nrow(.alt_df_row) # nolint
162	7x	label <- paste0(labelstr, " (N=", N_row_alt, ")")
163	7x	in_rows(
164	7x	.list = list(row_count = formatters::with_label(c(N_row_alt, N_row_alt), label)),
165	7x	.formats = c(row_count = function(x, ...) "")
166		)
167		}
168
169		#' Layout-creating function to add row total counts
170		#'
171		#' @description `r lifecycle::badge("stable")`
172		#'
173		#' This works analogously to [rtables::add_colcounts()] but on the rows. This function
174		#' is a wrapper for [rtables::summarize_row_groups()].
175		#'
176		#' @inheritParams argument_convention
177		#' @param alt_counts (`flag`)\cr whether row counts should be taken from `alt_counts_df` (`TRUE`)
178		#' or from `df` (`FALSE`). Defaults to `FALSE`.
179		#'
180		#' @return A modified layout where the latest row split labels now have the row-wise
181		#' total counts (i.e. without column-based subsetting) attached in parentheses.
182		#'
183		#' @note Row count values are contained in these row count rows but are not displayed
184		#' so that they are not considered zero rows by default when pruning.
185		#'
186		#' @examples
187		#' basic_table() %>%
188		#' split_cols_by("ARM") %>%
189		#' add_colcounts() %>%
190		#' split_rows_by("RACE", split_fun = drop_split_levels) %>%
191		#' add_rowcounts() %>%
192		#' analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>%
193		#' build_table(DM)
194		#'
195		#' @export
196		add_rowcounts <- function(lyt, alt_counts = FALSE) {
197	7x	summarize_row_groups(
198	7x	lyt,
199	7x	cfun = if (alt_counts) c_label_n_alt else c_label_n
200		)
201		}
202
203		#' Obtain column indices
204		#'
205		#' @description `r lifecycle::badge("stable")`
206		#'
207		#' Helper function to extract column indices from a `VTableTree` for a given
208		#' vector of column names.
209		#'
210		#' @param table_tree (`VTableTree`)\cr `rtables` table object to extract the indices from.
211		#' @param col_names (`character`)\cr vector of column names.
212		#'
213		#' @return A vector of column indices.
214		#'
215		#' @export
216		h_col_indices <- function(table_tree, col_names) {
217	1256x	checkmate::assert_class(table_tree, "VTableNodeInfo")
218	1256x	checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE)
219	1256x	match(col_names, names(attr(col_info(table_tree), "cextra_args")))
220		}
221
222		#' Labels or names of list elements
223		#'
224		#' Helper function for working with nested statistic function results which typically
225		#' don't have labels but names that we can use.
226		#'
227		#' @param x (`list`)\cr a list.
228		#'
229		#' @return A `character` vector with the labels or names for the list elements.
230		#'
231		#' @examples
232		#' x <- data.frame(
233		#' a = 1:10,
234		#' b = rnorm(10)
235		#' )
236		#' labels_or_names(x)
237		#' var_labels(x) <- c(b = "Label for b", a = NA)
238		#' labels_or_names(x)
239		#'
240		#' @export
241		labels_or_names <- function(x) {
242	190x	checkmate::assert_multi_class(x, c("data.frame", "list"))
243	190x	labs <- sapply(x, obj_label)
244	190x	nams <- rlang::names2(x)
245	190x	label_is_null <- sapply(labs, is.null)
246	190x	result <- unlist(ifelse(label_is_null, nams, labs))
247	190x	result
248		}
249
250		#' Convert to `rtable`
251		#'
252		#' @description `r lifecycle::badge("stable")`
253		#'
254		#' This is a new generic function to convert objects to `rtable` tables.
255		#'
256		#' @param x (`data.frame`)\cr the object which should be converted to an `rtable`.
257		#' @param ... additional arguments for methods.
258		#'
259		#' @return An `rtables` table object. Note that the concrete class will depend on the method used.
260		#'
261		#' @export
262		as.rtable <- function(x, ...) { # nolint
263	3x	UseMethod("as.rtable", x)
264		}
265
266		#' @describeIn as.rtable Method for converting a `data.frame` that contains numeric columns to `rtable`.
267		#'
268		#' @param format (`string` or `function`)\cr the format which should be used for the columns.
269		#'
270		#' @method as.rtable data.frame
271		#'
272		#' @examples
273		#' x <- data.frame(
274		#' a = 1:10,
275		#' b = rnorm(10)
276		#' )
277		#' as.rtable(x)
278		#'
279		#' @export
280		as.rtable.data.frame <- function(x, format = "xx.xx", ...) {
281	3x	checkmate::assert_numeric(unlist(x))
282	2x	do.call(
283	2x	rtable,
284	2x	c(
285	2x	list(
286	2x	header = labels_or_names(x),
287	2x	format = format
288		),
289	2x	Map(
290	2x	function(row, row_name) {
291	20x	do.call(
292	20x	rrow,
293	20x	c(as.list(unname(row)),
294	20x	row.name = row_name
295		)
296		)
297		},
298	2x	row = as.data.frame(t(x)),
299	2x	row_name = rownames(x)
300		)
301		)
302		)
303		}
304
305		#' Split parameters
306		#'
307		#' @description `r lifecycle::badge("deprecated")`
308		#'
309		#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant
310		#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to
311		#' specific analysis function.
312		#'
313		#' @param param (`vector`)\cr the parameter to be split.
314		#' @param value (`vector`)\cr the value used to split.
315		#' @param f (`list`)\cr the reference to make the split.
316		#'
317		#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`.
318		#'
319		#' @examples
320		#' f <- list(
321		#' surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
322		#' surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
323		#' )
324		#'
325		#' .stats <- c("pt_at_risk", "rate_diff")
326		#' h_split_param(.stats, .stats, f = f)
327		#'
328		#' # $surv
329		#' # [1] "pt_at_risk"
330		#' #
331		#' # $surv_diff
332		#' # [1] "rate_diff"
333		#'
334		#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx")
335		#' h_split_param(.formats, names(.formats), f = f)
336		#'
337		#' # $surv
338		#' # pt_at_risk event_free_rate
339		#' # "xx" "xxx"
340		#' #
341		#' # $surv_diff
342		#' # NULL
343		#'
344		#' @export
345		h_split_param <- function(param,
346		value,
347		f) {
348	2x	lifecycle::deprecate_warn("0.9.8", "h_split_param()")
349
350	2x	y <- lapply(f, function(x) param[value %in% x])
351	2x	lapply(y, function(x) if (length(x) == 0) NULL else x)
352		}
353
354		#' Get selected statistics names
355		#'
356		#' Helper function to be used for creating `afun`.
357		#'
358		#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means
359		#' in this context that all default statistics should be used.
360		#' @param all_stats (`character`)\cr all statistics which can be selected here potentially.
361		#'
362		#' @return A `character` vector with the selected statistics.
363		#'
364		#' @keywords internal
365		afun_selected_stats <- function(.stats, all_stats) {
366	2x	checkmate::assert_character(.stats, null.ok = TRUE)
367	2x	checkmate::assert_character(all_stats)
368	2x	if (is.null(.stats)) {
369	1x	all_stats
370		} else {
371	1x	intersect(.stats, all_stats)
372		}
373		}
374
375		#' Add variable labels to top left corner in table
376		#'
377		#' @description `r lifecycle::badge("stable")`
378		#'
379		#' Helper layout-creating function to append the variable labels of a given variables vector
380		#' from a given dataset in the top left corner. If a variable label is not found then the
381		#' variable name itself is used instead. Multiple variable labels are concatenated with slashes.
382		#'
383		#' @inheritParams argument_convention
384		#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`.
385		#' @param indent (`integer(1)`)\cr non-negative number of nested indent space, default to 0L which means no indent.
386		#' 1L means two spaces indent, 2L means four spaces indent and so on.
387		#'
388		#' @return A modified layout with the new variable label(s) added to the top-left material.
389		#'
390		#' @note This is not an optimal implementation of course, since we are using here the data set
391		#' itself during the layout creation. When we have a more mature `rtables` implementation then
392		#' this will also be improved or not necessary anymore.
393		#'
394		#' @examples
395		#' lyt <- basic_table() %>%
396		#' split_cols_by("ARM") %>%
397		#' add_colcounts() %>%
398		#' split_rows_by("SEX") %>%
399		#' append_varlabels(DM, "SEX") %>%
400		#' analyze("AGE", afun = mean) %>%
401		#' append_varlabels(DM, "AGE", indent = 1)
402		#' build_table(lyt, DM)
403		#'
404		#' lyt <- basic_table() %>%
405		#' split_cols_by("ARM") %>%
406		#' split_rows_by("SEX") %>%
407		#' analyze("AGE", afun = mean) %>%
408		#' append_varlabels(DM, c("SEX", "AGE"))
409		#' build_table(lyt, DM)
410		#'
411		#' @export
412		append_varlabels <- function(lyt, df, vars, indent = 0L) {
413	3x	if (checkmate::test_flag(indent)) {
414	!	warning("indent argument is now accepting integers. Boolean indent will be converted to integers.")
415	!	indent <- as.integer(indent)
416		}
417
418	3x	checkmate::assert_data_frame(df)
419	3x	checkmate::assert_character(vars)
420	3x	checkmate::assert_count(indent)
421
422	3x	lab <- formatters::var_labels(df[vars], fill = TRUE)
423	3x	lab <- paste(lab, collapse = " / ")
424	3x	space <- paste(rep(" ", indent * 2), collapse = "")
425	3x	lab <- paste0(space, lab)
426
427	3x	append_topleft(lyt, lab)
428		}
429
430		#' Default string replacement for `NA` values
431		#'
432		#' @description `r lifecycle::badge("stable")`
433		#'
434		#' The default string used to represent `NA` values. This value is used as the default
435		#' value for the `na_str` argument throughout the `tern` package, and printed in place
436		#' of `NA` values in output tables. If not specified for each `tern` function by the user
437		#' via the `na_str` argument, or in the R environment options via [set_default_na_str()],
438		#' then `NA` is used.
439		#'
440		#' @param na_str (`string`)\cr single string value to set in the R environment options as
441		#' the default value to replace `NA`s. Use `getOption("tern_default_na_str")` to check the
442		#' current value set in the R environment (defaults to `NULL` if not set).
443		#'
444		#' @name default_na_str
445		NULL
446
447		#' @describeIn default_na_str Accessor for default `NA` value replacement string.
448		#'
449		#' @return
450		#' * `default_na_str` returns the current value if an R environment option has been set
451		#' for `"tern_default_na_str"`, or `NA_character_` otherwise.
452		#'
453		#' @examples
454		#' # Default settings
455		#' default_na_str()
456		#' getOption("tern_default_na_str")
457		#'
458		#' # Set custom value
459		#' set_default_na_str("<Missing>")
460		#'
461		#' # Settings after value has been set
462		#' default_na_str()
463		#' getOption("tern_default_na_str")
464		#'
465		#' @export
466		default_na_str <- function() {
467	284x	getOption("tern_default_na_str", default = NA_character_)
468		}
469
470		#' @describeIn default_na_str Setter for default `NA` value replacement string. Sets the
471		#' option `"tern_default_na_str"` within the R environment.
472		#'
473		#' @return
474		#' * `set_default_na_str` has no return value.
475		#'
476		#' @export
477		set_default_na_str <- function(na_str) {
478	4x	checkmate::assert_character(na_str, len = 1, null.ok = TRUE)
479	4x	options("tern_default_na_str" = na_str)
480		}
481
482
483		#' Utilities to handle extra arguments in analysis functions
484		#'
485		#' @description `r lifecycle::badge("stable")`
486		#' Important additional parameters, useful to modify behavior of analysis and summary
487		#' functions are listed in [rtables::additional_fun_params]. With these utility functions
488		#' we can retrieve a curated list of these parameters from the environment, and pass them
489		#' to the analysis functions with dedicated `...`; notice that the final `s_*` function
490		#' will get them through argument matching.
491		#'
492		#' @param extra_afun_params (`list`)\cr list of additional parameters (`character`) to be
493		#' retrieved from the environment. Curated list is present in [rtables::additional_fun_params].
494		#' @param add_alt_df (`logical`)\cr if `TRUE`, the function will also add `.alt_df` and `.alt_df_row`
495		#' parameters.
496		#'
497		#' @name util_handling_additional_fun_params
498		NULL
499
500		#' @describeIn util_handling_additional_fun_params Retrieve additional parameters from the environment.
501		#'
502		#' @return
503		#' * `retrieve_extra_afun_params` returns a list of the values of the parameters in the environment.
504		#'
505		#' @keywords internal
506		retrieve_extra_afun_params <- function(extra_afun_params) {
507	1607x	out <- list()
508	1607x	for (extra_param in extra_afun_params) {
509	16115x	out <- c(out, list(get(extra_param, envir = parent.frame())))
510		}
511	1607x	setNames(out, extra_afun_params)
512		}
513
514		#' @describeIn util_handling_additional_fun_params Curated list of additional parameters for
515		#' analysis functions. Please check [rtables::additional_fun_params] for precise descriptions.
516		#'
517		#' @return
518		#' * `get_additional_afun_params` returns a list of additional parameters.
519		#'
520		#' @keywords internal
521		get_additional_afun_params <- function(add_alt_df = FALSE) {
522	247x	out_list <- list(
523	247x	.N_col = integer(),
524	247x	.N_total = integer(),
525	247x	.N_row = integer(),
526	247x	.df_row = data.frame(),
527	247x	.var = character(),
528	247x	.ref_group = character(),
529	247x	.ref_full = vector(mode = "numeric"),
530	247x	.in_ref_col = logical(),
531	247x	.spl_context = data.frame(),
532	247x	.all_col_exprs = vector(mode = "expression"),
533	247x	.all_col_counts = vector(mode = "integer")
534		)
535
536	247x	if (isTRUE(add_alt_df)) {
537	!	out_list <- c(
538	!	out_list,
539	!	.alt_df_row = data.frame(),
540	!	.alt_df = data.frame()
541		)
542		}
543
544	247x	out_list
545		}

1		#' Apply 1/3 or 1/2 imputation rule to data
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' @inheritParams argument_convention
6		#' @param x_stats (named `list`)\cr a named list of statistics, typically the results of [s_summary()].
7		#' @param stat (`string`)\cr statistic to return the value/NA level of according to the imputation
8		#' rule applied.
9		#' @param imp_rule (`string`)\cr imputation rule setting. Set to `"1/3"` to implement 1/3 imputation
10		#' rule or `"1/2"` to implement 1/2 imputation rule.
11		#' @param post (`flag`)\cr whether the data corresponds to a post-dose time-point (defaults to `FALSE`).
12		#' This parameter is only used when `imp_rule` is set to `"1/3"`.
13		#' @param avalcat_var (`string`)\cr name of variable that indicates whether a row in `df` corresponds
14		#' to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of the above
15		#' (defaults to `"AVALCAT1"`). Variable `avalcat_var` must be present in `df`.
16		#'
17		#' @return A `list` containing statistic value (`val`) and NA level (`na_str`) that should be displayed
18		#' according to the specified imputation rule.
19		#'
20		#' @seealso [analyze_vars_in_cols()] where this function can be implemented by setting the `imp_rule`
21		#' argument.
22		#'
23		#' @examples
24		#' set.seed(1)
25		#' df <- data.frame(
26		#' AVAL = runif(50, 0, 1),
27		#' AVALCAT1 = sample(c(1, "BLQ"), 50, replace = TRUE)
28		#' )
29		#' x_stats <- s_summary(df$AVAL)
30		#' imputation_rule(df, x_stats, "max", "1/3")
31		#' imputation_rule(df, x_stats, "geom_mean", "1/3")
32		#' imputation_rule(df, x_stats, "mean", "1/2")
33		#'
34		#' @export
35		imputation_rule <- function(df, x_stats, stat, imp_rule, post = FALSE, avalcat_var = "AVALCAT1") {
36	128x	checkmate::assert_choice(avalcat_var, names(df))
37	128x	checkmate::assert_choice(imp_rule, c("1/3", "1/2"))
38	128x	n_blq <- sum(grepl("BLQ\|LTR\|<[1-9]\|<PCLLOQ", df[[avalcat_var]]))
39	128x	ltr_blq_ratio <- n_blq / max(1, nrow(df))
40
41		# defaults
42	128x	val <- x_stats[[stat]]
43	128x	na_str <- "NE"
44
45	128x	if (imp_rule == "1/3") {
46	2x	if (!post && stat == "geom_mean") val <- NA # 1/3_pre_LT, 1/3_pre_GT
47	84x	if (ltr_blq_ratio > 1 / 3) {
48	63x	if (stat != "geom_mean") na_str <- "ND" # 1/3_pre_GT, 1/3_post_GT
49	9x	if (!post && !stat %in% c("median", "max")) val <- NA # 1/3_pre_GT
50	39x	if (post && !stat %in% c("median", "max", "geom_mean")) val <- NA # 1/3_post_GT
51		}
52	44x	} else if (imp_rule == "1/2") {
53	44x	if (ltr_blq_ratio > 1 / 2 && !stat == "max") {
54	12x	val <- NA # 1/2_GT
55	12x	na_str <- "ND" # 1/2_GT
56		}
57		}
58
59	128x	list(val = val, na_str = na_str)
60		}

1		#' Confidence interval for mean
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
6		#' geometric mean. It can be used as a `ggplot` helper function for plotting.
7		#'
8		#' @inheritParams argument_convention
9		#' @param n_min (`numeric(1)`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
10		#' @param gg_helper (`flag`)\cr whether output should be aligned for use with `ggplot`s.
11		#' @param geom_mean (`flag`)\cr whether the geometric mean should be calculated.
12		#'
13		#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
14		#'
15		#' @examples
16		#' stat_mean_ci(sample(10), gg_helper = FALSE)
17		#'
18		#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
19		#' ggplot2::geom_point()
20		#'
21		#' p + ggplot2::stat_summary(
22		#' fun.data = stat_mean_ci,
23		#' geom = "errorbar"
24		#' )
25		#'
26		#' p + ggplot2::stat_summary(
27		#' fun.data = stat_mean_ci,
28		#' fun.args = list(conf_level = 0.5),
29		#' geom = "errorbar"
30		#' )
31		#'
32		#' p + ggplot2::stat_summary(
33		#' fun.data = stat_mean_ci,
34		#' fun.args = list(conf_level = 0.5, geom_mean = TRUE),
35		#' geom = "errorbar"
36		#' )
37		#'
38		#' @export
39		stat_mean_ci <- function(x,
40		conf_level = 0.95,
41		na.rm = TRUE, # nolint
42		n_min = 2,
43		gg_helper = TRUE,
44		geom_mean = FALSE) {
45	2409x	if (na.rm) {
46	10x	x <- stats::na.omit(x)
47		}
48	2409x	n <- length(x)
49
50	2409x	if (!geom_mean) {
51	1212x	m <- mean(x)
52		} else {
53	1197x	negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0)
54	1197x	if (negative_values_exist) {
55	26x	m <- NA_real_
56		} else {
57	1171x	x <- log(x)
58	1171x	m <- mean(x)
59		}
60		}
61
62	2409x	if (n < n_min \|\| is.na(m)) {
63	330x	ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_)
64		} else {
65	2079x	hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n)
66	2079x	ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci)
67	2079x	if (geom_mean) {
68	1028x	ci <- exp(ci)
69		}
70		}
71
72	2409x	if (gg_helper) {
73	4x	m <- ifelse(is.na(m), NA_real_, m)
74	4x	ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]])
75		}
76
77	2409x	return(ci)
78		}
79
80		#' Confidence interval for median
81		#'
82		#' @description `r lifecycle::badge("stable")`
83		#'
84		#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
85		#' function for plotting.
86		#'
87		#' @inheritParams argument_convention
88		#' @param gg_helper (`flag`)\cr whether output should be aligned for use with `ggplot`s.
89		#'
90		#' @details This function was adapted from `DescTools/versions/0.99.35/source`
91		#'
92		#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
93		#'
94		#' @examples
95		#' stat_median_ci(sample(10), gg_helper = FALSE)
96		#'
97		#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
98		#' ggplot2::geom_point()
99		#' p + ggplot2::stat_summary(
100		#' fun.data = stat_median_ci,
101		#' geom = "errorbar"
102		#' )
103		#'
104		#' @export
105		stat_median_ci <- function(x,
106		conf_level = 0.95,
107		na.rm = TRUE, # nolint
108		gg_helper = TRUE) {
109	1210x	x <- unname(x)
110	1210x	if (na.rm) {
111	9x	x <- x[!is.na(x)]
112		}
113	1210x	n <- length(x)
114	1210x	med <- stats::median(x)
115
116	1210x	k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE)
117
118		# k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
119	1210x	if (k == 0 \|\| is.na(med)) {
120	266x	ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_)
121	266x	empir_conf_level <- NA_real_
122		} else {
123	944x	x_sort <- sort(x)
124	944x	ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1])
125	944x	empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5)
126		}
127
128	1210x	if (gg_helper) {
129	4x	ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]])
130		}
131
132	1210x	attr(ci, "conf_level") <- empir_conf_level
133
134	1210x	return(ci)
135		}
136
137		#' p-Value of the mean
138		#'
139		#' @description `r lifecycle::badge("stable")`
140		#'
141		#' Convenient function for calculating the two-sided p-value of the mean.
142		#'
143		#' @inheritParams argument_convention
144		#' @param n_min (`numeric(1)`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
145		#' @param test_mean (`numeric(1)`)\cr mean value to test under the null hypothesis.
146		#'
147		#' @return A p-value.
148		#'
149		#' @examples
150		#' stat_mean_pval(sample(10))
151		#'
152		#' stat_mean_pval(rnorm(10), test_mean = 0.5)
153		#'
154		#' @export
155		stat_mean_pval <- function(x,
156		na.rm = TRUE, # nolint
157		n_min = 2,
158		test_mean = 0) {
159	1210x	if (na.rm) {
160	9x	x <- stats::na.omit(x)
161		}
162	1210x	n <- length(x)
163
164	1210x	x_mean <- mean(x)
165	1210x	x_sd <- stats::sd(x)
166
167	1210x	if (n < n_min) {
168	152x	pv <- c(p_value = NA_real_)
169		} else {
170	1058x	x_se <- stats::sd(x) / sqrt(n)
171	1058x	ttest <- (x_mean - test_mean) / x_se
172	1058x	pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1))
173		}
174
175	1210x	return(pv)
176		}
177
178		#' Proportion difference and confidence interval
179		#'
180		#' @description `r lifecycle::badge("stable")`
181		#'
182		#' Function for calculating the proportion (or risk) difference and confidence interval between arm
183		#' X (reference group) and arm Y. Risk difference is calculated by subtracting cumulative incidence
184		#' in arm Y from cumulative incidence in arm X.
185		#'
186		#' @inheritParams argument_convention
187		#' @param x (`list` of `integer`)\cr list of number of occurrences in arm X (reference group).
188		#' @param y (`list` of `integer`)\cr list of number of occurrences in arm Y. Must be of equal length to `x`.
189		#' @param N_x (`numeric(1)`)\cr total number of records in arm X.
190		#' @param N_y (`numeric(1)`)\cr total number of records in arm Y.
191		#' @param list_names (`character`)\cr names of each variable/level corresponding to pair of proportions in
192		#' `x` and `y`. Must be of equal length to `x` and `y`.
193		#' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`.
194		#'
195		#' @return List of proportion differences and CIs corresponding to each pair of number of occurrences in `x` and
196		#' `y`. Each list element consists of 3 statistics: proportion difference, CI lower bound, and CI upper bound.
197		#'
198		#' @seealso Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()]
199		#' with `riskdiff` argument is set to `TRUE` in subsequent analyze functions, adds a column containing
200		#' proportion (risk) difference to an `rtables` layout.
201		#'
202		#' @examples
203		#' stat_propdiff_ci(
204		#' x = list(0.375), y = list(0.01), N_x = 5, N_y = 5, list_names = "x", conf_level = 0.9
205		#' )
206		#'
207		#' stat_propdiff_ci(
208		#' x = list(0.5, 0.75, 1), y = list(0.25, 0.05, 0.5), N_x = 10, N_y = 20, pct = FALSE
209		#' )
210		#'
211		#' @export
212		stat_propdiff_ci <- function(x,
213		y,
214		N_x, # nolint
215		N_y, # nolint
216		list_names = NULL,
217		conf_level = 0.95,
218		pct = TRUE) {
219	62x	checkmate::assert_list(x, types = "numeric")
220	62x	checkmate::assert_list(y, types = "numeric", len = length(x))
221	62x	checkmate::assert_character(list_names, len = length(x), null.ok = TRUE)
222	62x	rd_list <- lapply(seq_along(x), function(i) {
223	145x	p_x <- x[[i]] / N_x
224	145x	p_y <- y[[i]] / N_y
225	145x	rd_ci <- p_x - p_y + c(-1, 1) * stats::qnorm((1 + conf_level) / 2) *
226	145x	sqrt(p_x * (1 - p_x) / N_x + p_y * (1 - p_y) / N_y)
227	145x	c(p_x - p_y, rd_ci) * ifelse(pct, 100, 1)
228		})
229	62x	names(rd_list) <- list_names
230	62x	rd_list
231		}

1		#' Analyze numeric variables in columns
2		#'
3		#' @description `r lifecycle::badge("experimental")`
4		#'
5		#' The layout-creating function [analyze_vars_in_cols()] creates a layout element to generate a column-wise
6		#' analysis table.
7		#'
8		#' This function sets the analysis methods as column labels and is a wrapper for [rtables::analyze_colvars()].
9		#' It was designed principally for PK tables.
10		#'
11		#' @inheritParams argument_convention
12		#' @inheritParams rtables::analyze_colvars
13		#' @param imp_rule (`string` or `NULL`)\cr imputation rule setting. Defaults to `NULL` for no imputation rule. Can
14		#' also be `"1/3"` to implement 1/3 imputation rule or `"1/2"` to implement 1/2 imputation rule. In order
15		#' to use an imputation rule, the `avalcat_var` argument must be specified. See [imputation_rule()]
16		#' for more details on imputation.
17		#' @param avalcat_var (`string`)\cr if `imp_rule` is not `NULL`, name of variable that indicates whether a
18		#' row in the data corresponds to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of
19		#' the above (defaults to `"AVALCAT1"`). Variable must be present in the data and should match the variable
20		#' used to calculate the `n_blq` statistic (if included in `.stats`).
21		#' @param cache (`flag`)\cr whether to store computed values in a temporary caching environment. This will
22		#' speed up calculations in large tables, but should be set to `FALSE` if the same `rtable` layout is
23		#' used for multiple tables with different data. Defaults to `FALSE`.
24		#' @param row_labels (`character`)\cr as this function works in columns space, usually `.labels`
25		#' character vector applies on the column space. You can change the row labels by defining this
26		#' parameter to a named character vector with names corresponding to the split values. It defaults
27		#' to `NULL` and if it contains only one `string`, it will duplicate that as a row label.
28		#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current
29		#' label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr`
30		#' to define row labels. This behavior is not supported as we never need to overload row labels.
31		#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns.
32		#' This option allows you to add multiple instances of this functions, also in a nested fashion,
33		#' without adding more splits. This split must happen only one time on a single layout.
34		#'
35		#' @return
36		#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
37		#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
38		#' in columns, and add it to the table layout.
39		#'
40		#' @note
41		#' * This is an experimental implementation of [rtables::summarize_row_groups()] and [rtables::analyze_colvars()]
42		#' that may be subjected to changes as `rtables` extends its support to more complex analysis pipelines in the
43		#' column space. We encourage users to read the examples carefully and file issues for different use cases.
44		#' * In this function, `labelstr` behaves atypically. If `labelstr = NULL` (the default), row labels are assigned
45		#' automatically as the split values if `do_summarize_row_groups = FALSE` (the default), and as the group label
46		#' if `do_summarize_row_groups = TRUE`.
47		#'
48		#' @seealso [analyze_vars()], [rtables::analyze_colvars()].
49		#'
50		#' @examples
51		#' library(dplyr)
52		#'
53		#' # Data preparation
54		#' adpp <- tern_ex_adpp %>% h_pkparam_sort()
55		#'
56		#' lyt <- basic_table() %>%
57		#' split_rows_by(var = "STRATA1", label_pos = "topleft") %>%
58		#' split_rows_by(
59		#' var = "SEX",
60		#' label_pos = "topleft",
61		#' child_labels = "hidden"
62		#' ) %>% # Removes duplicated labels
63		#' analyze_vars_in_cols(vars = "AGE")
64		#' result <- build_table(lyt = lyt, df = adpp)
65		#' result
66		#'
67		#' # By selecting just some statistics and ad-hoc labels
68		#' lyt <- basic_table() %>%
69		#' split_rows_by(var = "ARM", label_pos = "topleft") %>%
70		#' split_rows_by(
71		#' var = "SEX",
72		#' label_pos = "topleft",
73		#' child_labels = "hidden",
74		#' split_fun = drop_split_levels
75		#' ) %>%
76		#' analyze_vars_in_cols(
77		#' vars = "AGE",
78		#' .stats = c("n", "cv", "geom_mean"),
79		#' .labels = c(
80		#' n = "aN",
81		#' cv = "aCV",
82		#' geom_mean = "aGeomMean"
83		#' )
84		#' )
85		#' result <- build_table(lyt = lyt, df = adpp)
86		#' result
87		#'
88		#' # Changing row labels
89		#' lyt <- basic_table() %>%
90		#' analyze_vars_in_cols(
91		#' vars = "AGE",
92		#' row_labels = "some custom label"
93		#' )
94		#' result <- build_table(lyt, df = adpp)
95		#' result
96		#'
97		#' # Pharmacokinetic parameters
98		#' lyt <- basic_table() %>%
99		#' split_rows_by(
100		#' var = "TLG_DISPLAY",
101		#' split_label = "PK Parameter",
102		#' label_pos = "topleft",
103		#' child_labels = "hidden"
104		#' ) %>%
105		#' analyze_vars_in_cols(
106		#' vars = "AVAL"
107		#' )
108		#' result <- build_table(lyt, df = adpp)
109		#' result
110		#'
111		#' # Multiple calls (summarize label and analyze underneath)
112		#' lyt <- basic_table() %>%
113		#' split_rows_by(
114		#' var = "TLG_DISPLAY",
115		#' split_label = "PK Parameter",
116		#' label_pos = "topleft"
117		#' ) %>%
118		#' analyze_vars_in_cols(
119		#' vars = "AVAL",
120		#' do_summarize_row_groups = TRUE # does a summarize level
121		#' ) %>%
122		#' split_rows_by("SEX",
123		#' child_labels = "hidden",
124		#' label_pos = "topleft"
125		#' ) %>%
126		#' analyze_vars_in_cols(
127		#' vars = "AVAL",
128		#' split_col_vars = FALSE # avoids re-splitting the columns
129		#' )
130		#' result <- build_table(lyt, df = adpp)
131		#' result
132		#'
133		#' @export
134		analyze_vars_in_cols <- function(lyt,
135		vars,
136		...,
137		.stats = c(
138		"n",
139		"mean",
140		"sd",
141		"se",
142		"cv",
143		"geom_cv"
144		),
145		.labels = c(
146		n = "n",
147		mean = "Mean",
148		sd = "SD",
149		se = "SE",
150		cv = "CV (%)",
151		geom_cv = "CV % Geometric Mean"
152		),
153		row_labels = NULL,
154		do_summarize_row_groups = FALSE,
155		split_col_vars = TRUE,
156		imp_rule = NULL,
157		avalcat_var = "AVALCAT1",
158		cache = FALSE,
159		.indent_mods = NULL,
160		na_str = default_na_str(),
161		nested = TRUE,
162		.formats = NULL,
163		.aligns = NULL) {
164	26x	extra_args <- list(...)
165
166	26x	checkmate::assert_string(na_str, na.ok = TRUE, null.ok = TRUE)
167	26x	checkmate::assert_character(row_labels, null.ok = TRUE)
168	26x	checkmate::assert_int(.indent_mods, null.ok = TRUE)
169	26x	checkmate::assert_flag(nested)
170	26x	checkmate::assert_flag(split_col_vars)
171	26x	checkmate::assert_flag(do_summarize_row_groups)
172
173		# Filtering
174	26x	met_grps <- paste0("analyze_vars", c("_numeric", "_counts"))
175	26x	.stats <- get_stats(met_grps, stats_in = .stats)
176	26x	formats_v <- get_formats_from_stats(stats = .stats, formats_in = .formats)
177	26x	labels_v <- get_labels_from_stats(stats = .stats, labels_in = .labels) %>% .unlist_keep_nulls()
178	!	if ("control" %in% names(extra_args)) labels_v <- labels_v %>% labels_use_control(extra_args[["control"]], .labels)
179
180		# Check for vars in the case that one or more are used
181	26x	if (length(vars) == 1) {
182	21x	vars <- rep(vars, length(.stats))
183	5x	} else if (length(vars) != length(.stats)) {
184	1x	stop(
185	1x	"Analyzed variables (vars) does not have the same ",
186	1x	"number of elements of specified statistics (.stats)."
187		)
188		}
189
190	25x	if (split_col_vars) {
191		# Checking there is not a previous identical column split
192	21x	clyt <- tail(clayout(lyt), 1)[[1]]
193
194	21x	dummy_lyt <- split_cols_by_multivar(
195	21x	lyt = basic_table(),
196	21x	vars = vars,
197	21x	varlabels = labels_v
198		)
199
200	21x	if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) {
201	2x	stop(
202	2x	"Column split called again with the same values. ",
203	2x	"This can create many unwanted columns. Please consider adding ",
204	2x	"split_col_vars = FALSE to the last call of ",
205	2x	deparse(sys.calls()[[sys.nframe() - 1]]), "."
206		)
207		}
208
209		# Main col split
210	19x	lyt <- split_cols_by_multivar(
211	19x	lyt = lyt,
212	19x	vars = vars,
213	19x	varlabels = labels_v
214		)
215		}
216
217	23x	env <- new.env() # create caching environment
218
219	23x	if (do_summarize_row_groups) {
220	8x	if (length(unique(vars)) > 1) {
221	!	stop("When using do_summarize_row_groups only one label level var should be inserted.")
222		}
223
224		# Function list for do_summarize_row_groups. Slightly different handling of labels
225	8x	cfun_list <- Map(
226	8x	function(stat, use_cache, cache_env) {
227	48x	function(u, .spl_context, labelstr, .df_row, ...) {
228		# Statistic
229	152x	var_row_val <- paste(
230	152x	gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
231	152x	paste(.spl_context$value, collapse = "_"),
232	152x	sep = "_"
233		)
234	152x	if (use_cache) {
235	!	if (is.null(cache_env[[var_row_val]])) {
236	!	cache_env[[var_row_val]] <- s_summary(u, ...)
237		}
238	!	x_stats <- cache_env[[var_row_val]]
239		} else {
240	152x	x_stats <- s_summary(u, ...)
241		}
242
243	152x	if (is.null(imp_rule) \|\| !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
244	152x	res <- x_stats[[stat]]
245		} else {
246	!	timept <- as.numeric(gsub(".?([0-9\\.]+).", "\\1", tail(.spl_context$value, 1)))
247	!	res_imp <- imputation_rule(
248	!	.df_row, x_stats, stat,
249	!	imp_rule = imp_rule,
250	!	post = grepl("Predose", tail(.spl_context$value, 1)) \|\| timept > 0,
251	!	avalcat_var = avalcat_var
252		)
253	!	res <- res_imp[["val"]]
254	!	na_str <- res_imp[["na_str"]]
255		}
256
257		# Label check and replacement
258	152x	if (length(row_labels) > 1) {
259	32x	if (!(labelstr %in% names(row_labels))) {
260	2x	stop(
261	2x	"Replacing the labels in do_summarize_row_groups needs a named vector",
262	2x	"that contains the split values. In the current split variable ",
263	2x	.spl_context$split[nrow(.spl_context)],
264	2x	" the labelstr value (split value by default) ", labelstr, " is not in",
265	2x	" row_labels names: ", names(row_labels)
266		)
267		}
268	30x	lbl <- unlist(row_labels[labelstr])
269		} else {
270	120x	lbl <- labelstr
271		}
272
273		# Cell creation
274	150x	rcell(res,
275	150x	label = lbl,
276	150x	format = formats_v[names(formats_v) == stat][[1]],
277	150x	format_na_str = na_str,
278	150x	indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods),
279	150x	align = .aligns
280		)
281		}
282		},
283	8x	stat = .stats,
284	8x	use_cache = cache,
285	8x	cache_env = replicate(length(.stats), env)
286		)
287
288		# Main call to rtables
289	8x	summarize_row_groups(
290	8x	lyt = lyt,
291	8x	var = unique(vars),
292	8x	cfun = cfun_list,
293	8x	na_str = na_str,
294	8x	extra_args = extra_args
295		)
296		} else {
297		# Function list for analyze_colvars
298	15x	afun_list <- Map(
299	15x	function(stat, use_cache, cache_env) {
300	76x	function(u, .spl_context, .df_row, ...) {
301		# Main statistics
302	468x	var_row_val <- paste(
303	468x	gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
304	468x	paste(.spl_context$value, collapse = "_"),
305	468x	sep = "_"
306		)
307	468x	if (use_cache) {
308	16x	if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...)
309	56x	x_stats <- cache_env[[var_row_val]]
310		} else {
311	412x	x_stats <- s_summary(u, ...)
312		}
313
314	468x	if (is.null(imp_rule) \|\| !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
315	348x	res <- x_stats[[stat]]
316		} else {
317	120x	timept <- as.numeric(gsub(".?([0-9\\.]+).", "\\1", tail(.spl_context$value, 1)))
318	120x	res_imp <- imputation_rule(
319	120x	.df_row, x_stats, stat,
320	120x	imp_rule = imp_rule,
321	120x	post = grepl("Predose", tail(.spl_context$value, 1)) \|\| timept > 0,
322	120x	avalcat_var = avalcat_var
323		)
324	120x	res <- res_imp[["val"]]
325	120x	na_str <- res_imp[["na_str"]]
326		}
327
328	468x	if (is.list(res)) {
329	52x	if (length(res) > 1) {
330	1x	stop("The analyzed column produced more than one category of results.")
331		} else {
332	51x	res <- unlist(res)
333		}
334		}
335
336		# Label from context
337	467x	label_from_context <- .spl_context$value[nrow(.spl_context)]
338
339		# Label switcher
340	467x	if (is.null(row_labels)) {
341	387x	lbl <- label_from_context
342		} else {
343	80x	if (length(row_labels) > 1) {
344	68x	if (!(label_from_context %in% names(row_labels))) {
345	2x	stop(
346	2x	"Replacing the labels in do_summarize_row_groups needs a named vector",
347	2x	"that contains the split values. In the current split variable ",
348	2x	.spl_context$split[nrow(.spl_context)],
349	2x	" the split value ", label_from_context, " is not in",
350	2x	" row_labels names: ", names(row_labels)
351		)
352		}
353	66x	lbl <- unlist(row_labels[label_from_context])
354		} else {
355	12x	lbl <- row_labels
356		}
357		}
358
359		# Cell creation
360	465x	rcell(res,
361	465x	label = lbl,
362	465x	format = formats_v[names(formats_v) == stat][[1]],
363	465x	format_na_str = na_str,
364	465x	indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods),
365	465x	align = .aligns
366		)
367		}
368		},
369	15x	stat = .stats,
370	15x	use_cache = cache,
371	15x	cache_env = replicate(length(.stats), env)
372		)
373
374		# Main call to rtables
375	15x	analyze_colvars(lyt,
376	15x	afun = afun_list,
377	15x	na_str = na_str,
378	15x	nested = nested,
379	15x	extra_args = extra_args
380		)
381		}
382		}
383
384		# Helper function
385		get_last_col_split <- function(lyt) {
386	3x	tail(tail(clayout(lyt), 1)[[1]], 1)[[1]]
387		}

1		#' Odds ratio estimation
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze function [estimate_odds_ratio()] creates a layout element to compare bivariate responses between
6		#' two groups by estimating an odds ratio and its confidence interval.
7		#'
8		#' The primary analysis variable specified by `vars` is the group variable. Additional variables can be included in the
9		#' analysis via the `variables` argument, which accepts `arm`, an arm variable, and `strata`, a stratification variable.
10		#' If more than two arm levels are present, they can be combined into two groups using the `groups_list` argument.
11		#'
12		#' @inheritParams split_cols_by_groups
13		#' @inheritParams argument_convention
14		#' @param .stats (`character`)\cr statistics to select for the table.
15		#'
16		#' Options are: ``r shQuote(get_stats("estimate_odds_ratio"), type = "sh")``
17		#' @param method (`string`)\cr whether to use the correct (`"exact"`) calculation in the conditional likelihood or one
18		#' of the approximations. See [survival::clogit()] for details.
19		#'
20		#' @note
21		#' * This function uses logistic regression for unstratified analyses, and conditional logistic regression for
22		#' stratified analyses. The Wald confidence interval is calculated with the specified confidence level.
23		#' * For stratified analyses, there is currently no implementation for conditional likelihood confidence intervals,
24		#' therefore the likelihood confidence interval is not available as an option.
25		#' * When `vars` contains only responders or non-responders no odds ratio estimation is possible so the returned
26		#' values will be `NA`.
27		#'
28		#' @seealso Relevant helper function [h_odds_ratio()].
29		#'
30		#' @name odds_ratio
31		#' @order 1
32		NULL
33
34		#' @describeIn odds_ratio Statistics function which estimates the odds ratio
35		#' between a treatment and a control. A `variables` list with `arm` and `strata`
36		#' variable names must be passed if a stratified analysis is required.
37		#'
38		#' @return
39		#' * `s_odds_ratio()` returns a named list with the statistics `or_ci`
40		#' (containing `est`, `lcl`, and `ucl`) and `n_tot`.
41		#'
42		#' @examples
43		#' # Unstratified analysis.
44		#' s_odds_ratio(
45		#' df = subset(dta, grp == "A"),
46		#' .var = "rsp",
47		#' .ref_group = subset(dta, grp == "B"),
48		#' .in_ref_col = FALSE,
49		#' .df_row = dta
50		#' )
51		#'
52		#' # Stratified analysis.
53		#' s_odds_ratio(
54		#' df = subset(dta, grp == "A"),
55		#' .var = "rsp",
56		#' .ref_group = subset(dta, grp == "B"),
57		#' .in_ref_col = FALSE,
58		#' .df_row = dta,
59		#' variables = list(arm = "grp", strata = "strata")
60		#' )
61		#'
62		#' @export
63		s_odds_ratio <- function(df,
64		.var,
65		.ref_group,
66		.in_ref_col,
67		.df_row,
68		variables = list(arm = NULL, strata = NULL),
69		conf_level = 0.95,
70		groups_list = NULL,
71		method = "exact",
72		...) {
73	99x	y <- list(or_ci = numeric(), n_tot = numeric())
74
75	99x	if (!.in_ref_col) {
76	94x	assert_proportion_value(conf_level)
77	94x	assert_df_with_variables(df, list(rsp = .var))
78	94x	assert_df_with_variables(.ref_group, list(rsp = .var))
79
80	94x	if (is.null(variables$strata)) {
81	76x	data <- data.frame(
82	76x	rsp = c(.ref_group[[.var]], df[[.var]]),
83	76x	grp = factor(
84	76x	rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
85	76x	levels = c("ref", "Not-ref")
86		)
87		)
88	76x	y <- or_glm(data, conf_level = conf_level)
89		} else {
90	18x	assert_df_with_variables(.df_row, c(list(rsp = .var), variables))
91	18x	checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE)
92
93		# The group variable prepared for clogit must be synchronised with combination groups definition.
94	18x	if (is.null(groups_list)) {
95	16x	ref_grp <- as.character(unique(.ref_group[[variables$arm]]))
96	16x	trt_grp <- as.character(unique(df[[variables$arm]]))
97	16x	grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp)
98		} else {
99		# If more than one level in reference col.
100	2x	reference <- as.character(unique(.ref_group[[variables$arm]]))
101	2x	grp_ref_flag <- vapply(
102	2x	X = groups_list,
103	2x	FUN.VALUE = TRUE,
104	2x	FUN = function(x) all(reference %in% x)
105		)
106	2x	ref_grp <- names(groups_list)[grp_ref_flag]
107
108		# If more than one level in treatment col.
109	2x	treatment <- as.character(unique(df[[variables$arm]]))
110	2x	grp_trt_flag <- vapply(
111	2x	X = groups_list,
112	2x	FUN.VALUE = TRUE,
113	2x	FUN = function(x) all(treatment %in% x)
114		)
115	2x	trt_grp <- names(groups_list)[grp_trt_flag]
116
117	2x	grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp)
118	2x	grp <- combine_levels(grp, levels = treatment, new_level = trt_grp)
119		}
120
121		# The reference level in `grp` must be the same as in the `rtables` column split.
122	18x	data <- data.frame(
123	18x	rsp = .df_row[[.var]],
124	18x	grp = grp,
125	18x	strata = interaction(.df_row[variables$strata])
126		)
127	18x	y_all <- or_clogit(data, conf_level = conf_level, method = method)
128	18x	checkmate::assert_string(trt_grp)
129	18x	checkmate::assert_subset(trt_grp, names(y_all$or_ci))
130	17x	y$or_ci <- y_all$or_ci[[trt_grp]]
131	17x	y$n_tot <- y_all$n_tot
132		}
133		}
134
135	98x	if ("est" %in% names(y$or_ci) && is.na(y$or_ci[["est"]]) && method != "approximate") {
136	1x	warning(
137	1x	"Unable to compute the odds ratio estimate. Please try re-running the function with ",
138	1x	'parameter `method` set to "approximate".'
139		)
140		}
141
142	98x	y$or_ci <- formatters::with_label(
143	98x	x = y$or_ci,
144	98x	label = paste0("Odds Ratio (", 100 * conf_level, "% CI)")
145		)
146
147	98x	y$n_tot <- formatters::with_label(
148	98x	x = y$n_tot,
149	98x	label = "Total n"
150		)
151
152	98x	y
153		}
154
155		#' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`.
156		#'
157		#' @return
158		#' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()].
159		#'
160		#' @examples
161		#' a_odds_ratio(
162		#' df = subset(dta, grp == "A"),
163		#' .var = "rsp",
164		#' .ref_group = subset(dta, grp == "B"),
165		#' .in_ref_col = FALSE,
166		#' .df_row = dta
167		#' )
168		#'
169		#' @export
170		a_odds_ratio <- function(df,
171		...,
172		.stats = NULL,
173		.stat_names = NULL,
174		.formats = NULL,
175		.labels = NULL,
176		.indent_mods = NULL) {
177		# Check for additional parameters to the statistics function
178	12x	dots_extra_args <- list(...)
179	12x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
180	12x	dots_extra_args$.additional_fun_parameters <- NULL
181
182		# Check for user-defined functions
183	12x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
184	12x	.stats <- default_and_custom_stats_list$all_stats
185	12x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
186
187		# Apply statistics function
188	12x	x_stats <- .apply_stat_functions(
189	12x	default_stat_fnc = s_odds_ratio,
190	12x	custom_stat_fnc_list = custom_stat_functions,
191	12x	args_list = c(
192	12x	df = list(df),
193	12x	extra_afun_params,
194	12x	dots_extra_args
195		)
196		)
197
198		# Fill in formatting defaults
199	12x	.stats <- get_stats("estimate_odds_ratio",
200	12x	stats_in = .stats,
201	12x	custom_stats_in = names(custom_stat_functions)
202		)
203	12x	x_stats <- x_stats[.stats]
204	12x	.formats <- get_formats_from_stats(.stats, .formats)
205	12x	.labels <- get_labels_from_stats(
206	12x	.stats, .labels,
207	12x	tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels)
208		)
209	12x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods)
210
211		# Auto format handling
212	12x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
213
214		# Get and check statistical names
215	12x	.stat_names <- get_stat_names(x_stats, .stat_names)
216
217	12x	in_rows(
218	12x	.list = x_stats,
219	12x	.formats = .formats,
220	12x	.names = .labels %>% .unlist_keep_nulls(),
221	12x	.stat_names = .stat_names,
222	12x	.labels = .labels %>% .unlist_keep_nulls(),
223	12x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
224		)
225		}
226
227		#' @describeIn odds_ratio Layout-creating function which can take statistics function arguments
228		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
229		#'
230		#' @return
231		#' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions,
232		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
233		#' the statistics from `s_odds_ratio()` to the table layout.
234		#'
235		#' @examples
236		#' set.seed(12)
237		#' dta <- data.frame(
238		#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
239		#' grp = factor(rep(c("A", "B"), each = 50), levels = c("A", "B")),
240		#' strata = factor(sample(c("C", "D"), 100, TRUE))
241		#' )
242		#'
243		#' l <- basic_table() %>%
244		#' split_cols_by(var = "grp", ref_group = "B") %>%
245		#' estimate_odds_ratio(vars = "rsp")
246		#'
247		#' build_table(l, df = dta)
248		#'
249		#' @export
250		#' @order 2
251		estimate_odds_ratio <- function(lyt,
252		vars,
253		variables = list(arm = NULL, strata = NULL),
254		conf_level = 0.95,
255		groups_list = NULL,
256		method = "exact",
257		na_str = default_na_str(),
258		nested = TRUE,
259		...,
260		table_names = vars,
261		show_labels = "hidden",
262		var_labels = vars,
263		.stats = "or_ci",
264		.stat_names = NULL,
265		.formats = NULL,
266		.labels = NULL,
267		.indent_mods = NULL) {
268		# Process standard extra arguments
269	5x	extra_args <- list(".stats" = .stats)
270	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
271	!	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
272	!	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
273	!	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
274
275		# Process additional arguments to the statistic function
276	5x	extra_args <- c(
277	5x	extra_args,
278	5x	variables = list(variables), conf_level = list(conf_level), groups_list = list(groups_list), method = list(method),
279		...
280		)
281
282		# Append additional info from layout to the analysis function
283	5x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
284	5x	formals(a_odds_ratio) <- c(formals(a_odds_ratio), extra_args[[".additional_fun_parameters"]])
285
286	5x	analyze(
287	5x	lyt = lyt,
288	5x	vars = vars,
289	5x	afun = a_odds_ratio,
290	5x	na_str = na_str,
291	5x	nested = nested,
292	5x	extra_args = extra_args,
293	5x	var_labels = var_labels,
294	5x	show_labels = show_labels,
295	5x	table_names = table_names
296		)
297		}
298
299		#' Helper functions for odds ratio estimation
300		#'
301		#' @description `r lifecycle::badge("stable")`
302		#'
303		#' Functions to calculate odds ratios in [estimate_odds_ratio()].
304		#'
305		#' @inheritParams odds_ratio
306		#' @inheritParams argument_convention
307		#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally
308		#' `strata` for [or_clogit()].
309		#'
310		#' @return A named `list` of elements `or_ci` and `n_tot`.
311		#'
312		#' @seealso [odds_ratio]
313		#'
314		#' @name h_odds_ratio
315		NULL
316
317		#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be
318		#' exactly 2 groups in `data` as specified by the `grp` variable.
319		#'
320		#' @examples
321		#' # Data with 2 groups.
322		#' data <- data.frame(
323		#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)),
324		#' grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)],
325		#' strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)],
326		#' stringsAsFactors = TRUE
327		#' )
328		#'
329		#' # Odds ratio based on glm.
330		#' or_glm(data, conf_level = 0.95)
331		#'
332		#' @export
333		or_glm <- function(data, conf_level) {
334	77x	checkmate::assert_logical(data$rsp)
335	77x	assert_proportion_value(conf_level)
336	77x	assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
337	77x	checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
338
339	77x	data$grp <- as_factor_keep_attributes(data$grp)
340	77x	assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)
341	77x	formula <- stats::as.formula("rsp ~ grp")
342	77x	model_fit <- stats::glm(
343	77x	formula = formula, data = data,
344	77x	family = stats::binomial(link = "logit")
345		)
346
347		# Note that here we need to discard the intercept.
348	77x	or <- exp(stats::coef(model_fit)[-1])
349	77x	or_ci <- exp(
350	77x	stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE]
351		)
352
353	77x	values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl"))
354	77x	n_tot <- stats::setNames(nrow(model_fit$model), "n_tot")
355
356	77x	list(or_ci = values, n_tot = n_tot)
357		}
358
359		#' @describeIn h_odds_ratio Estimates the odds ratio based on [survival::clogit()]. This is done for
360		#' the whole data set including all groups, since the results are not the same as when doing
361		#' pairwise comparisons between the groups.
362		#'
363		#' @examples
364		#' # Data with 3 groups.
365		#' data <- data.frame(
366		#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)),
367		#' grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)],
368		#' strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)],
369		#' stringsAsFactors = TRUE
370		#' )
371		#'
372		#' # Odds ratio based on stratified estimation by conditional logistic regression.
373		#' or_clogit(data, conf_level = 0.95)
374		#'
375		#' @export
376		or_clogit <- function(data, conf_level, method = "exact") {
377	19x	checkmate::assert_logical(data$rsp)
378	19x	assert_proportion_value(conf_level)
379	19x	assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata"))
380	19x	checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
381	19x	checkmate::assert_multi_class(data$strata, classes = c("factor", "character"))
382	19x	checkmate::assert_subset(method, c("exact", "approximate", "efron", "breslow"), empty.ok = FALSE)
383
384	19x	data$grp <- as_factor_keep_attributes(data$grp)
385	19x	data$strata <- as_factor_keep_attributes(data$strata)
386
387		# Deviation from convention: `survival::strata` must be simply `strata`.
388	19x	formula <- stats::as.formula("rsp ~ grp + strata(strata)")
389	19x	model_fit <- clogit_with_tryCatch(formula = formula, data = data, method = method)
390
391		# Create a list with one set of OR estimates and CI per coefficient, i.e.
392		# comparison of one group vs. the reference group.
393	19x	coef_est <- stats::coef(model_fit)
394	19x	ci_est <- stats::confint(model_fit, level = conf_level)
395	19x	or_ci <- list()
396	19x	for (coef_name in names(coef_est)) {
397	21x	grp_name <- gsub("^grp", "", x = coef_name)
398	21x	or_ci[[grp_name]] <- stats::setNames(
399	21x	object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])),
400	21x	nm = c("est", "lcl", "ucl")
401		)
402		}
403	19x	list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n))
404		}

1		#' Summarize change from baseline values or absolute baseline values
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze function [summarize_change()] creates a layout element to summarize the change from baseline or absolute
6		#' baseline values. The primary analysis variable `vars` indicates the numerical change from baseline results.
7		#'
8		#' Required secondary analysis variables `value` and `baseline_flag` can be supplied to the function via
9		#' the `variables` argument. The `value` element should be the name of the analysis value variable, and the
10		#' `baseline_flag` element should be the name of the flag variable that indicates whether or not records contain
11		#' baseline values. Depending on the baseline flag given, either the absolute baseline values (at baseline)
12		#' or the change from baseline values (post-baseline) are then summarized.
13		#'
14		#' @inheritParams argument_convention
15		#' @param .stats (`character`)\cr statistics to select for the table.
16		#'
17		#' Options are: ``r shQuote(get_stats("analyze_vars_numeric"), type = "sh")``
18		#'
19		#' @name summarize_change
20		#' @order 1
21		NULL
22
23		#' @describeIn summarize_change Statistics function that summarizes baseline or post-baseline visits.
24		#'
25		#' @return
26		#' * `s_change_from_baseline()` returns the same values returned by [s_summary.numeric()].
27		#'
28		#' @note The data in `df` must be either all be from baseline or post-baseline visits. Otherwise
29		#' an error will be thrown.
30		#'
31		#' @keywords internal
32		s_change_from_baseline <- function(df, ...) {
33	10x	args_list <- list(...)
34	10x	.var <- args_list[[".var"]]
35	10x	variables <- args_list[["variables"]]
36
37	10x	checkmate::assert_numeric(df[[variables$value]])
38	10x	checkmate::assert_numeric(df[[.var]])
39	10x	checkmate::assert_logical(df[[variables$baseline_flag]])
40	10x	checkmate::assert_vector(unique(df[[variables$baseline_flag]]), max.len = 1)
41	10x	assert_df_with_variables(df, c(variables, list(chg = .var)))
42
43	10x	combined <- ifelse(
44	10x	df[[variables$baseline_flag]],
45	10x	df[[variables$value]],
46	10x	df[[.var]]
47		)
48	10x	if (is.logical(combined) && identical(length(combined), 0L)) {
49	1x	combined <- numeric(0)
50		}
51	10x	s_summary(combined, ...)
52		}
53
54		#' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`.
55		#'
56		#' @return
57		#' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
58		#'
59		#' @keywords internal
60		a_change_from_baseline <- function(df,
61		...,
62		.stats = NULL,
63		.stat_names = NULL,
64		.formats = NULL,
65		.labels = NULL,
66		.indent_mods = NULL) {
67		# Check for additional parameters to the statistics function
68	8x	dots_extra_args <- list(...)
69	8x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
70	8x	dots_extra_args$.additional_fun_parameters <- NULL
71
72		# Check for user-defined functions
73	8x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
74	8x	.stats <- default_and_custom_stats_list$all_stats
75	8x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
76
77		# Apply statistics function
78	8x	x_stats <- .apply_stat_functions(
79	8x	default_stat_fnc = s_change_from_baseline,
80	8x	custom_stat_fnc_list = custom_stat_functions,
81	8x	args_list = c(
82	8x	df = list(df),
83	8x	extra_afun_params,
84	8x	dots_extra_args
85		)
86		)
87
88		# Fill in with formatting defaults
89	6x	.stats <- get_stats("analyze_vars_numeric", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
90	6x	.formats <- get_formats_from_stats(.stats, .formats)
91	6x	.labels <- get_labels_from_stats(.stats, .labels)
92	6x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods)
93
94	6x	x_stats <- x_stats[.stats]
95
96		# Auto format handling
97	6x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
98
99		# Get and check statistical names
100	6x	.stat_names <- get_stat_names(x_stats, .stat_names)
101
102	6x	in_rows(
103	6x	.list = x_stats,
104	6x	.formats = .formats,
105	6x	.names = names(.labels),
106	6x	.stat_names = .stat_names,
107	6x	.labels = .labels %>% .unlist_keep_nulls(),
108	6x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
109		)
110		}
111
112		#' @describeIn summarize_change Layout-creating function which can take statistics function arguments
113		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
114		#'
115		#' @return
116		#' * `summarize_change()` returns a layout object suitable for passing to further layouting functions,
117		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
118		#' the statistics from `s_change_from_baseline()` to the table layout.
119		#'
120		#' @note To be used after a split on visits in the layout, such that each data subset only contains
121		#' either baseline or post-baseline data.
122		#'
123		#' @examples
124		#' library(dplyr)
125		#'
126		#' # Fabricate dataset
127		#' dta_test <- data.frame(
128		#' USUBJID = rep(1:6, each = 3),
129		#' AVISIT = rep(paste0("V", 1:3), 6),
130		#' ARM = rep(LETTERS[1:3], rep(6, 3)),
131		#' AVAL = c(9:1, rep(NA, 9))
132		#' ) %>%
133		#' mutate(ABLFLL = AVISIT == "V1") %>%
134		#' group_by(USUBJID) %>%
135		#' mutate(
136		#' BLVAL = AVAL[ABLFLL],
137		#' CHG = AVAL - BLVAL
138		#' ) %>%
139		#' ungroup()
140		#'
141		#' results <- basic_table() %>%
142		#' split_cols_by("ARM") %>%
143		#' split_rows_by("AVISIT") %>%
144		#' summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>%
145		#' build_table(dta_test)
146		#'
147		#' results
148		#'
149		#' @export
150		#' @order 2
151		summarize_change <- function(lyt,
152		vars,
153		variables,
154		var_labels = vars,
155		na_str = default_na_str(),
156		na_rm = TRUE,
157		nested = TRUE,
158		show_labels = "default",
159		table_names = vars,
160		section_div = NA_character_,
161		...,
162		.stats = c("n", "mean_sd", "median", "range"),
163		.stat_names = NULL,
164		.formats = c(
165		mean_sd = "xx.xx (xx.xx)",
166		mean_se = "xx.xx (xx.xx)",
167		median = "xx.xx",
168		range = "xx.xx - xx.xx",
169		mean_pval = "xx.xx"
170		),
171		.labels = NULL,
172		.indent_mods = NULL) {
173		# Process standard extra arguments
174	4x	extra_args <- list(".stats" = .stats)
175	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
176	4x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
177	!	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
178	!	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
179
180		# Process additional arguments to the statistic function
181	4x	extra_args <- c(
182	4x	extra_args,
183	4x	variables = list(variables),
184	4x	na_rm = na_rm,
185		...
186		)
187
188		# Append additional info from layout to the analysis function
189	4x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
190	4x	formals(a_change_from_baseline) <- c(formals(a_change_from_baseline), extra_args[[".additional_fun_parameters"]])
191
192	4x	analyze(
193	4x	lyt = lyt,
194	4x	vars = vars,
195	4x	afun = a_change_from_baseline,
196	4x	na_str = na_str,
197	4x	nested = nested,
198	4x	extra_args = extra_args,
199	4x	var_labels = var_labels,
200	4x	show_labels = show_labels,
201	4x	table_names = table_names,
202	4x	inclNAs = !na_rm,
203	4x	section_div = section_div
204		)
205		}

1		#' Count patients by most extreme post-baseline toxicity grade per direction of abnormality
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze function [count_abnormal_by_worst_grade()] creates a layout element to count patients by highest (worst)
6		#' analysis toxicity grade post-baseline for each direction, categorized by parameter value.
7		#'
8		#' This function analyzes primary analysis variable `var` which indicates toxicity grades. Additional
9		#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to
10		#' `USUBJID`), a variable to indicate unique subject identifiers, `param` (defaults to `PARAM`), a variable
11		#' to indicate parameter values, and `grade_dir` (defaults to `GRADE_DIR`), a variable to indicate directions
12		#' (e.g. High or Low) for each toxicity grade supplied in `var`.
13		#'
14		#' For each combination of `param` and `grade_dir` levels, patient counts by worst
15		#' grade are calculated as follows:
16		#' * `1` to `4`: The number of patients with worst grades 1-4, respectively.
17		#' * `Any`: The number of patients with at least one abnormality (i.e. grade is not 0).
18		#'
19		#' Fractions are calculated by dividing the above counts by the number of patients with at least one
20		#' valid measurement recorded during treatment.
21		#'
22		#' Pre-processing is crucial when using this function and can be done automatically using the
23		#' [h_adlb_abnormal_by_worst_grade()] helper function. See the description of this function for details on the
24		#' necessary pre-processing steps.
25		#'
26		#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create two row
27		#' splits, one on variable `param` and one on variable `grade_dir`.
28		#'
29		#' @inheritParams argument_convention
30		#' @param .stats (`character`)\cr statistics to select for the table.
31		#'
32		#' Options are: ``r shQuote(get_stats("abnormal_by_worst_grade"), type = "sh")``
33		#'
34		#' @seealso [h_adlb_abnormal_by_worst_grade()] which pre-processes ADLB data frames to be used in
35		#' [count_abnormal_by_worst_grade()].
36		#'
37		#' @name abnormal_by_worst_grade
38		#' @order 1
39		NULL
40
41		#' @describeIn abnormal_by_worst_grade Statistics function which counts patients by worst grade.
42		#'
43		#' @return
44		#' * `s_count_abnormal_by_worst_grade()` returns the single statistic `count_fraction` with grades 1 to 4 and
45		#' "Any" results.
46		#'
47		#' @keywords internal
48		s_count_abnormal_by_worst_grade <- function(df,
49		.var = "GRADE_ANL",
50		.spl_context,
51		variables = list(
52		id = "USUBJID",
53		param = "PARAM",
54		grade_dir = "GRADE_DIR"
55		),
56		...) {
57	5x	checkmate::assert_string(.var)
58	5x	assert_valid_factor(df[[.var]])
59	5x	assert_valid_factor(df[[variables$param]])
60	4x	assert_valid_factor(df[[variables$grade_dir]])
61	4x	assert_df_with_variables(df, c(a = .var, variables))
62	4x	checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
63
64		# To verify that the `split_rows_by` are performed with correct variables.
65	4x	checkmate::assert_subset(c(variables[["param"]], variables[["grade_dir"]]), .spl_context$split)
66	4x	first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
67	4x	x_lvls <- c(setdiff(levels(df[[.var]]), "0"), "Any")
68	4x	result <- split(numeric(0), factor(x_lvls))
69
70	4x	subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
71	4x	subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
72		# Some subjects may have a record for high and low directions but
73		# should be counted only once.
74	4x	denom <- length(unique(subj_cur_col))
75
76	4x	for (lvl in x_lvls) {
77	20x	if (lvl != "Any") {
78	16x	df_lvl <- df[df[[.var]] == lvl, ]
79		} else {
80	4x	df_lvl <- df[df[[.var]] != 0, ]
81		}
82	20x	num <- length(unique(df_lvl[[variables[["id"]]]]))
83	20x	fraction <- ifelse(denom == 0, 0, num / denom)
84	20x	result[[lvl]] <- formatters::with_label(c(count = num, fraction = fraction), lvl)
85		}
86
87	4x	result <- list(count_fraction = result)
88	4x	result
89		}
90
91		#' @describeIn abnormal_by_worst_grade Formatted analysis function which is used as `afun`
92		#' in `count_abnormal_by_worst_grade()`.
93		#'
94		#' @return
95		#' * `a_count_abnormal_by_worst_grade()` returns the corresponding list with formatted [rtables::CellValue()].
96		#'
97		#' @keywords internal
98		a_count_abnormal_by_worst_grade <- function(df,
99		...,
100		.stats = NULL,
101		.stat_names = NULL,
102		.formats = NULL,
103		.labels = NULL,
104		.indent_mods = NULL) {
105		# Check for additional parameters to the statistics function
106	4x	dots_extra_args <- list(...)
107	4x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
108	4x	dots_extra_args$.additional_fun_parameters <- NULL
109
110		# Check for user-defined functions
111	4x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
112	4x	.stats <- default_and_custom_stats_list$all_stats
113	4x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
114
115		# Apply statistics function
116	4x	x_stats <- .apply_stat_functions(
117	4x	default_stat_fnc = s_count_abnormal_by_worst_grade,
118	4x	custom_stat_fnc_list = custom_stat_functions,
119	4x	args_list = c(
120	4x	df = list(df),
121	4x	extra_afun_params,
122	4x	dots_extra_args
123		)
124		)
125
126		# Fill in formatting defaults
127	3x	.stats <- get_stats("abnormal_by_worst_grade", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
128	3x	levels_per_stats <- lapply(x_stats, names)
129	3x	.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
130	3x	.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats)
131	3x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)
132
133	3x	x_stats <- x_stats[.stats] %>%
134	3x	.unlist_keep_nulls() %>%
135	3x	setNames(names(.formats))
136
137		# Auto format handling
138	3x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
139
140		# Get and check statistical names
141	3x	.stat_names <- get_stat_names(x_stats, .stat_names)
142
143	3x	in_rows(
144	3x	.list = x_stats,
145	3x	.formats = .formats,
146	3x	.names = .labels %>% .unlist_keep_nulls(),
147	3x	.stat_names = .stat_names,
148	3x	.labels = .labels %>% .unlist_keep_nulls(),
149	3x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
150		)
151		}
152
153		#' @describeIn abnormal_by_worst_grade Layout-creating function which can take statistics function arguments
154		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
155		#'
156		#' @return
157		#' * `count_abnormal_by_worst_grade()` returns a layout object suitable for passing to further layouting functions,
158		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
159		#' the statistics from `s_count_abnormal_by_worst_grade()` to the table layout.
160		#'
161		#' @examples
162		#' library(dplyr)
163		#' library(forcats)
164		#' adlb <- tern_ex_adlb
165		#'
166		#' # Data is modified in order to have some parameters with grades only in one direction
167		#' # and simulate the real data.
168		#' adlb$ATOXGR[adlb$PARAMCD == "ALT" & adlb$ATOXGR %in% c("1", "2", "3", "4")] <- "-1"
169		#' adlb$ANRIND[adlb$PARAMCD == "ALT" & adlb$ANRIND == "HIGH"] <- "LOW"
170		#' adlb$WGRHIFL[adlb$PARAMCD == "ALT"] <- ""
171		#'
172		#' adlb$ATOXGR[adlb$PARAMCD == "IGA" & adlb$ATOXGR %in% c("-1", "-2", "-3", "-4")] <- "1"
173		#' adlb$ANRIND[adlb$PARAMCD == "IGA" & adlb$ANRIND == "LOW"] <- "HIGH"
174		#' adlb$WGRLOFL[adlb$PARAMCD == "IGA"] <- ""
175		#'
176		#' # Pre-processing
177		#' adlb_f <- adlb %>% h_adlb_abnormal_by_worst_grade()
178		#'
179		#' # Map excludes records without abnormal grade since they should not be displayed
180		#' # in the table.
181		#' map <- unique(adlb_f[adlb_f$GRADE_DIR != "ZERO", c("PARAM", "GRADE_DIR", "GRADE_ANL")]) %>%
182		#' lapply(as.character) %>%
183		#' as.data.frame() %>%
184		#' arrange(PARAM, desc(GRADE_DIR), GRADE_ANL)
185		#'
186		#' basic_table() %>%
187		#' split_cols_by("ARMCD") %>%
188		#' split_rows_by("PARAM") %>%
189		#' split_rows_by("GRADE_DIR", split_fun = trim_levels_to_map(map)) %>%
190		#' count_abnormal_by_worst_grade(
191		#' var = "GRADE_ANL",
192		#' variables = list(id = "USUBJID", param = "PARAM", grade_dir = "GRADE_DIR")
193		#' ) %>%
194		#' build_table(df = adlb_f)
195		#'
196		#' @export
197		#' @order 2
198		count_abnormal_by_worst_grade <- function(lyt,
199		var,
200		variables = list(
201		id = "USUBJID",
202		param = "PARAM",
203		grade_dir = "GRADE_DIR"
204		),
205		na_str = default_na_str(),
206		nested = TRUE,
207		...,
208		.stats = "count_fraction",
209		.stat_names = NULL,
210		.formats = list(count_fraction = format_count_fraction),
211		.labels = NULL,
212		.indent_mods = NULL) {
213		# Process standard extra arguments
214	2x	extra_args <- list(".stats" = .stats)
215	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
216	2x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
217	!	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
218	!	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
219
220		# Process additional arguments to the statistic function
221	2x	extra_args <- c(extra_args, "variables" = list(variables), ...)
222
223		# Append additional info from layout to the analysis function
224	2x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
225	2x	formals(a_count_abnormal_by_worst_grade) <- c(
226	2x	formals(a_count_abnormal_by_worst_grade), extra_args[[".additional_fun_parameters"]]
227		)
228
229	2x	analyze(
230	2x	lyt = lyt,
231	2x	vars = var,
232	2x	afun = a_count_abnormal_by_worst_grade,
233	2x	na_str = na_str,
234	2x	nested = nested,
235	2x	extra_args = extra_args,
236	2x	show_labels = "hidden"
237		)
238		}
239
240		#' Helper function to prepare ADLB for `count_abnormal_by_worst_grade()`
241		#'
242		#' @description `r lifecycle::badge("stable")`
243		#'
244		#' Helper function to prepare an ADLB data frame to be used as input in
245		#' [count_abnormal_by_worst_grade()]. The following pre-processing steps are applied:
246		#'
247		#' 1. `adlb` is filtered on variable `avisit` to only include post-baseline visits.
248		#' 2. `adlb` is filtered on variables `worst_flag_low` and `worst_flag_high` so that only
249		#' worst grades (in either direction) are included.
250		#' 3. From the standard lab grade variable `atoxgr`, the following two variables are derived
251		#' and added to `adlb`:
252		#' * A grade direction variable (e.g. `GRADE_DIR`). The variable takes value `"HIGH"` when
253		#' `atoxgr > 0`, `"LOW"` when `atoxgr < 0`, and `"ZERO"` otherwise.
254		#' * A toxicity grade variable (e.g. `GRADE_ANL`) where all negative values from `atoxgr` are
255		#' replaced by their absolute values.
256		#' 4. Unused factor levels are dropped from `adlb` via [droplevels()].
257		#'
258		#' @param adlb (`data.frame`)\cr ADLB data frame.
259		#' @param atoxgr (`string`)\cr name of the analysis toxicity grade variable. This must be a `factor`
260		#' variable.
261		#' @param avisit (`string`)\cr name of the analysis visit variable.
262		#' @param worst_flag_low (`string`)\cr name of the worst low lab grade flag variable. This variable is
263		#' set to `"Y"` when indicating records of worst low lab grades.
264		#' @param worst_flag_high (`string`)\cr name of the worst high lab grade flag variable. This variable is
265		#' set to `"Y"` when indicating records of worst high lab grades.
266		#'
267		#' @return `h_adlb_abnormal_by_worst_grade()` returns the `adlb` data frame with two new
268		#' variables: `GRADE_DIR` and `GRADE_ANL`.
269		#'
270		#' @seealso [abnormal_by_worst_grade]
271		#'
272		#' @examples
273		#' h_adlb_abnormal_by_worst_grade(tern_ex_adlb) %>%
274		#' dplyr::select(ATOXGR, GRADE_DIR, GRADE_ANL) %>%
275		#' head(10)
276		#'
277		#' @export
278		h_adlb_abnormal_by_worst_grade <- function(adlb,
279		atoxgr = "ATOXGR",
280		avisit = "AVISIT",
281		worst_flag_low = "WGRLOFL",
282		worst_flag_high = "WGRHIFL") {
283	1x	adlb %>%
284	1x	dplyr::filter(
285	1x	!.data[[avisit]] %in% c("SCREENING", "BASELINE"),
286	1x	.data[[worst_flag_low]] == "Y" \| .data[[worst_flag_high]] == "Y"
287		) %>%
288	1x	dplyr::mutate(
289	1x	GRADE_DIR = factor(
290	1x	dplyr::case_when(
291	1x	.data[[atoxgr]] %in% c("-1", "-2", "-3", "-4") ~ "LOW",
292	1x	.data[[atoxgr]] == "0" ~ "ZERO",
293	1x	.data[[atoxgr]] %in% c("1", "2", "3", "4") ~ "HIGH"
294		),
295	1x	levels = c("LOW", "ZERO", "HIGH")
296		),
297	1x	GRADE_ANL = forcats::fct_relevel(
298	1x	forcats::fct_recode(.data[[atoxgr]], `1` = "-1", `2` = "-2", `3` = "-3", `4` = "-4"),
299	1x	c("0", "1", "2", "3", "4")
300		)
301		) %>%
302	1x	droplevels()
303		}

1		#' Incidence rate estimation
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze function [estimate_incidence_rate()] creates a layout element to estimate an event rate adjusted for
6		#' person-years at risk, otherwise known as incidence rate. The primary analysis variable specified via `vars` is
7		#' the person-years at risk. In addition to this variable, the `n_events` variable for number of events observed (where
8		#' a value of 1 means an event was observed and 0 means that no event was observed) must also be specified.
9		#'
10		#' @inheritParams argument_convention
11		#' @param control (`list`)\cr parameters for estimation details, specified by using
12		#' the helper function [control_incidence_rate()]. Possible parameter options are:
13		#' * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate.
14		#' * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
15		#' for confidence interval type.
16		#' * `input_time_unit` (`string`)\cr `day`, `week`, `month`, or `year` (default)
17		#' indicating time unit for data input.
18		#' * `num_pt_year` (`numeric`)\cr time unit for desired output (in person-years).
19		#' @param n_events (`string`)\cr name of integer variable indicating whether an event has been observed (1) or not (0).
20		#' @param id_var (`string`)\cr name of variable used as patient identifier if `"n_unique"` is included in `.stats`.
21		#' Defaults to `"USUBJID"`.
22		#' @param .stats (`character`)\cr statistics to select for the table.
23		#'
24		#' Options are: ``r shQuote(get_stats("estimate_incidence_rate"), type = "sh")``
25		#' @param summarize (`flag`)\cr whether the function should act as an analyze function (`summarize = FALSE`), or a
26		#' summarize function (`summarize = TRUE`). Defaults to `FALSE`.
27		#' @param label_fmt (`string`)\cr how labels should be formatted after a row split occurs if `summarize = TRUE`. The
28		#' string should use `"%s"` to represent row split levels, and `"%.labels"` to represent labels supplied to the
29		#' `.labels` argument. Defaults to `"%s - %.labels"`.
30		#'
31		#' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate].
32		#'
33		#' @examples
34		#' df <- data.frame(
35		#' USUBJID = as.character(seq(6)),
36		#' CNSR = c(0, 1, 1, 0, 0, 0),
37		#' AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4),
38		#' ARM = factor(c("A", "A", "A", "B", "B", "B")),
39		#' STRATA1 = factor(c("X", "Y", "Y", "X", "X", "Y"))
40		#' )
41		#' df$n_events <- 1 - df$CNSR
42		#'
43		#' @name incidence_rate
44		#' @order 1
45		NULL
46
47		#' @describeIn incidence_rate Statistics function which estimates the incidence rate and the
48		#' associated confidence interval.
49		#'
50		#' @return
51		#' * `s_incidence_rate()` returns the following statistics:
52		#' - `person_years`: Total person-years at risk.
53		#' - `n_events`: Total number of events observed.
54		#' - `rate`: Estimated incidence rate.
55		#' - `rate_ci`: Confidence interval for the incidence rate.
56		#' - `n_unique`: Total number of patients with at least one event observed.
57		#' - `n_rate`: Total number of events observed & estimated incidence rate.
58		#'
59		#' @keywords internal
60		s_incidence_rate <- function(df,
61		.var,
62		...,
63		n_events,
64		is_event = lifecycle::deprecated(),
65		id_var = "USUBJID",
66		control = control_incidence_rate()) {
67	17x	if (lifecycle::is_present(is_event)) {
68	!	checkmate::assert_string(is_event)
69	!	lifecycle::deprecate_warn(
70	!	"0.9.6", "s_incidence_rate(is_event)", "s_incidence_rate(n_events)"
71		)
72	!	n_events <- is_event
73	!	df[[n_events]] <- as.numeric(df[[is_event]])
74		}
75
76	17x	assert_df_with_variables(df, list(tte = .var, n_events = n_events))
77	17x	checkmate::assert_string(.var)
78	17x	checkmate::assert_string(n_events)
79	17x	checkmate::assert_string(id_var)
80	17x	checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
81	17x	checkmate::assert_integerish(df[[n_events]], any.missing = FALSE)
82
83	17x	n_unique <- n_available(unique(df[[id_var]][df[[n_events]] == 1]))
84	17x	input_time_unit <- control$input_time_unit
85	17x	num_pt_year <- control$num_pt_year
86	17x	conf_level <- control$conf_level
87	17x	person_years <- sum(df[[.var]], na.rm = TRUE) * (
88	17x	1 * (input_time_unit == "year") +
89	17x	1 / 12 * (input_time_unit == "month") +
90	17x	1 / 52.14 * (input_time_unit == "week") +
91	17x	1 / 365.24 * (input_time_unit == "day")
92		)
93	17x	n_events <- sum(df[[n_events]], na.rm = TRUE)
94
95	17x	result <- h_incidence_rate(
96	17x	person_years,
97	17x	n_events,
98	17x	control
99		)
100	17x	list(
101	17x	person_years = formatters::with_label(person_years, "Total patient-years at risk"),
102	17x	n_events = formatters::with_label(n_events, "Number of adverse events observed"),
103	17x	rate = formatters::with_label(result$rate, paste("AE rate per", num_pt_year, "patient-years")),
104	17x	rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level)),
105	17x	n_unique = formatters::with_label(n_unique, "Total number of patients with at least one adverse event"),
106	17x	n_rate = formatters::with_label(
107	17x	c(n_events, result$rate),
108	17x	paste("Number of adverse events observed (AE rate per", num_pt_year, "patient-years)")
109		)
110		)
111		}
112
113		#' @describeIn incidence_rate Formatted analysis function which is used as `afun` in `estimate_incidence_rate()`.
114		#'
115		#' @return
116		#' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()].
117		#'
118		#' @examples
119		#' a_incidence_rate(
120		#' df,
121		#' .var = "AVAL",
122		#' .df_row = df,
123		#' n_events = "n_events"
124		#' )
125		#'
126		#' @export
127		a_incidence_rate <- function(df,
128		labelstr = "",
129		label_fmt = "%s - %.labels",
130		...,
131		.stats = NULL,
132		.stat_names = NULL,
133		.formats = NULL,
134		.labels = NULL,
135		.indent_mods = NULL) {
136	16x	checkmate::assert_string(label_fmt)
137
138		# Check for additional parameters to the statistics function
139	16x	dots_extra_args <- list(...)
140	16x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
141	16x	dots_extra_args$.additional_fun_parameters <- NULL
142
143		# Check for user-defined functions
144	16x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
145	16x	.stats <- default_and_custom_stats_list$all_stats
146	16x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
147
148		# Main statistic calculations
149	16x	x_stats <- .apply_stat_functions(
150	16x	default_stat_fnc = s_incidence_rate,
151	16x	custom_stat_fnc_list = custom_stat_functions,
152	16x	args_list = c(
153	16x	df = list(df),
154	16x	extra_afun_params,
155	16x	dots_extra_args
156		)
157		)
158
159		# Fill in formatting defaults
160	16x	.stats <- get_stats("estimate_incidence_rate", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
161	16x	x_stats <- x_stats[.stats]
162	16x	.formats <- get_formats_from_stats(.stats, .formats)
163	16x	.labels <- get_labels_from_stats(.stats, .labels, tern_defaults = lapply(x_stats, attr, "label"))
164	16x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods)
165
166		# Apply label format
167	16x	if (nzchar(labelstr) > 0) {
168	8x	.labels <- sapply(.labels, function(x) gsub("%.labels", x, gsub("%s", labelstr, label_fmt)))
169		}
170
171		# Auto format handling
172	16x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
173
174		# Get and check statistical names
175	16x	.stat_names <- get_stat_names(x_stats, .stat_names)
176
177	16x	in_rows(
178	16x	.list = x_stats,
179	16x	.formats = .formats,
180	16x	.names = names(.labels),
181	16x	.stat_names = .stat_names,
182	16x	.labels = .labels %>% .unlist_keep_nulls(),
183	16x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
184		)
185		}
186
187		#' @describeIn incidence_rate Layout-creating function which can take statistics function arguments
188		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
189		#'
190		#' @return
191		#' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions,
192		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
193		#' the statistics from `s_incidence_rate()` to the table layout.
194		#'
195		#' @examples
196		#' basic_table(show_colcounts = TRUE) %>%
197		#' split_cols_by("ARM") %>%
198		#' estimate_incidence_rate(
199		#' vars = "AVAL",
200		#' n_events = "n_events",
201		#' control = control_incidence_rate(
202		#' input_time_unit = "month",
203		#' num_pt_year = 100
204		#' )
205		#' ) %>%
206		#' build_table(df)
207		#'
208		#' # summarize = TRUE
209		#' basic_table(show_colcounts = TRUE) %>%
210		#' split_cols_by("ARM") %>%
211		#' split_rows_by("STRATA1", child_labels = "visible") %>%
212		#' estimate_incidence_rate(
213		#' vars = "AVAL",
214		#' n_events = "n_events",
215		#' .stats = c("n_unique", "n_rate"),
216		#' summarize = TRUE,
217		#' label_fmt = "%.labels"
218		#' ) %>%
219		#' build_table(df)
220		#'
221		#' @export
222		#' @order 2
223		estimate_incidence_rate <- function(lyt,
224		vars,
225		n_events,
226		id_var = "USUBJID",
227		control = control_incidence_rate(),
228		na_str = default_na_str(),
229		nested = TRUE,
230		summarize = FALSE,
231		label_fmt = "%s - %.labels",
232		...,
233		show_labels = "hidden",
234		table_names = vars,
235		.stats = c("person_years", "n_events", "rate", "rate_ci"),
236		.stat_names = NULL,
237		.formats = list(rate = "xx.xx", rate_ci = "(xx.xx, xx.xx)"),
238		.labels = NULL,
239		.indent_mods = NULL) {
240		# Process standard extra arguments
241	5x	extra_args <- list(".stats" = .stats)
242	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
243	5x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
244	1x	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
245	1x	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
246
247		# Process additional arguments to the statistic function
248	5x	extra_args <- c(
249	5x	extra_args,
250	5x	n_events = n_events, id_var = id_var, control = list(control), label_fmt = label_fmt,
251		...
252		)
253
254		# Adding additional info from layout to analysis function
255	5x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
256	5x	formals(a_incidence_rate) <- c(formals(a_incidence_rate), extra_args[[".additional_fun_parameters"]])
257
258	5x	if (!summarize) {
259	3x	analyze(
260	3x	lyt = lyt,
261	3x	vars = vars,
262	3x	afun = a_incidence_rate,
263	3x	na_str = na_str,
264	3x	nested = nested,
265	3x	extra_args = extra_args,
266	3x	show_labels = show_labels,
267	3x	table_names = table_names
268		)
269		} else {
270	2x	summarize_row_groups(
271	2x	lyt = lyt,
272	2x	var = vars,
273	2x	cfun = a_incidence_rate,
274	2x	na_str = na_str,
275	2x	extra_args = extra_args
276		)
277		}
278		}

1		#' Convert `rtable` objects to `ggplot` objects
2		#'
3		#' @description `r lifecycle::badge("experimental")`
4		#'
5		#' Given a [rtables::rtable()] object, performs basic conversion to a [ggplot2::ggplot()] object built using
6		#' functions from the `ggplot2` package. Any table titles and/or footnotes are ignored.
7		#'
8		#' @param tbl (`VTableTree`)\cr `rtables` table object.
9		#' @param fontsize (`numeric(1)`)\cr font size.
10		#' @param colwidths (`numeric` or `NULL`)\cr a vector of column widths. Each element's position in
11		#' `colwidths` corresponds to the column of `tbl` in the same position. If `NULL`, column widths
12		#' are calculated according to maximum number of characters per column.
13		#' @param lbl_col_padding (`numeric`)\cr additional padding to use when calculating spacing between
14		#' the first (label) column and the second column of `tbl`. If `colwidths` is specified,
15		#' the width of the first column becomes `colwidths[1] + lbl_col_padding`. Defaults to 0.
16		#'
17		#' @return A `ggplot` object.
18		#'
19		#' @examples
20		#' dta <- data.frame(
21		#' ARM = rep(LETTERS[1:3], rep(6, 3)),
22		#' AVISIT = rep(paste0("V", 1:3), 6),
23		#' AVAL = c(9:1, rep(NA, 9))
24		#' )
25		#'
26		#' lyt <- basic_table() %>%
27		#' split_cols_by(var = "ARM") %>%
28		#' split_rows_by(var = "AVISIT") %>%
29		#' analyze_vars(vars = "AVAL")
30		#'
31		#' tbl <- build_table(lyt, df = dta)
32		#'
33		#' rtable2gg(tbl)
34		#'
35		#' rtable2gg(tbl, fontsize = 15, colwidths = c(2, 1, 1, 1))
36		#'
37		#' @export
38		rtable2gg <- function(tbl, fontsize = 12, colwidths = NULL, lbl_col_padding = 0) {
39	6x	mat <- rtables::matrix_form(tbl, indent_rownames = TRUE)
40	6x	mat_strings <- formatters::mf_strings(mat)
41	6x	mat_aligns <- formatters::mf_aligns(mat)
42	6x	mat_indent <- formatters::mf_rinfo(mat)$indent
43	6x	mat_display <- formatters::mf_display(mat)
44	6x	nlines_hdr <- formatters::mf_nlheader(mat)
45	6x	shared_hdr_rows <- which(apply(mat_display, 1, function(x) (any(!x))))
46
47	6x	tbl_df <- data.frame(mat_strings)
48	6x	body_rows <- seq(nlines_hdr + 1, nrow(tbl_df))
49	6x	mat_aligns <- apply(mat_aligns, 1:2, function(x) if (x == "left") 0 else if (x == "right") 1 else 0.5)
50
51		# Apply indentation in first column
52	6x	tbl_df[body_rows, 1] <- sapply(body_rows, function(i) {
53	42x	ind_i <- mat_indent[i - nlines_hdr] * 4
54	18x	if (ind_i > 0) paste0(paste(rep(" ", ind_i), collapse = ""), tbl_df[i, 1]) else tbl_df[i, 1]
55		})
56
57		# Get column widths
58	6x	if (is.null(colwidths)) {
59	6x	colwidths <- apply(tbl_df, 2, function(x) max(nchar(x))) + 1
60		}
61	6x	tot_width <- sum(colwidths) + lbl_col_padding
62
63	6x	if (length(shared_hdr_rows) > 0) {
64	5x	tbl_df <- tbl_df[-shared_hdr_rows, ]
65	5x	mat_aligns <- mat_aligns[-shared_hdr_rows, ]
66		}
67
68	6x	res <- ggplot(data = tbl_df) +
69	6x	theme_void() +
70	6x	scale_x_continuous(limits = c(0, tot_width)) +
71	6x	scale_y_continuous(limits = c(0, nrow(mat_strings))) +
72	6x	annotate(
73	6x	"segment",
74	6x	x = 0, xend = tot_width,
75	6x	y = nrow(mat_strings) - nlines_hdr + 0.5, yend = nrow(mat_strings) - nlines_hdr + 0.5
76		)
77
78		# If header content spans multiple columns, center over these columns
79	6x	if (length(shared_hdr_rows) > 0) {
80	5x	mat_strings[shared_hdr_rows, ] <- trimws(mat_strings[shared_hdr_rows, ])
81	5x	for (hr in shared_hdr_rows) {
82	6x	hdr_lbls <- mat_strings[1:hr, mat_display[hr, -1]]
83	6x	hdr_lbls <- matrix(hdr_lbls[nzchar(hdr_lbls)], nrow = hr)
84	6x	for (idx_hl in seq_len(ncol(hdr_lbls))) {
85	13x	cur_lbl <- tail(hdr_lbls[, idx_hl], 1)
86	13x	which_cols <- if (hr == 1) {
87	9x	which(mat_strings[hr, ] == hdr_lbls[idx_hl])
88	13x	} else { # for >2 col splits, only print labels for each unique combo of nested columns
89	4x	which(
90	4x	apply(mat_strings[1:hr, ], 2, function(x) all(x == hdr_lbls[1:hr, idx_hl]))
91		)
92		}
93	13x	line_pos <- c(
94	13x	sum(colwidths[1:(which_cols[1] - 1)]) + 1 + lbl_col_padding,
95	13x	sum(colwidths[1:max(which_cols)]) - 1 + lbl_col_padding
96		)
97
98	13x	res <- res +
99	13x	annotate(
100	13x	"text",
101	13x	x = mean(line_pos),
102	13x	y = nrow(mat_strings) + 1 - hr,
103	13x	label = cur_lbl,
104	13x	size = fontsize / .pt
105		) +
106	13x	annotate(
107	13x	"segment",
108	13x	x = line_pos[1],
109	13x	xend = line_pos[2],
110	13x	y = nrow(mat_strings) - hr + 0.5,
111	13x	yend = nrow(mat_strings) - hr + 0.5
112		)
113		}
114		}
115		}
116
117		# Add table columns
118	6x	for (i in seq_len(ncol(tbl_df))) {
119	40x	res <- res + annotate(
120	40x	"text",
121	40x	x = if (i == 1) 0 else sum(colwidths[1:i]) - 0.5 * colwidths[i] + lbl_col_padding,
122	40x	y = rev(seq_len(nrow(tbl_df))),
123	40x	label = tbl_df[, i],
124	40x	hjust = mat_aligns[, i],
125	40x	size = fontsize / .pt
126		)
127		}
128
129	6x	res
130		}
131
132		#' Convert `data.frame` object to `ggplot` object
133		#'
134		#' @description `r lifecycle::badge("experimental")`
135		#'
136		#' Given a `data.frame` object, performs basic conversion to a [ggplot2::ggplot()] object built using
137		#' functions from the `ggplot2` package.
138		#'
139		#' @param df (`data.frame`)\cr a data frame.
140		#' @param colwidths (`numeric` or `NULL`)\cr a vector of column widths. Each element's position in
141		#' `colwidths` corresponds to the column of `df` in the same position. If `NULL`, column widths
142		#' are calculated according to maximum number of characters per column.
143		#' @param font_size (`numeric(1)`)\cr font size.
144		#' @param col_labels (`flag`)\cr whether the column names (labels) of `df` should be used as the first row
145		#' of the output table.
146		#' @param col_lab_fontface (`string`)\cr font face to apply to the first row (of column labels
147		#' if `col_labels = TRUE`). Defaults to `"bold"`.
148		#' @param hline (`flag`)\cr whether a horizontal line should be printed below the first row of the table.
149		#' @param bg_fill (`string`)\cr table background fill color.
150		#'
151		#' @return A `ggplot` object.
152		#'
153		#' @examples
154		#' \dontrun{
155		#' df2gg(head(iris, 5))
156		#'
157		#' df2gg(head(iris, 5), font_size = 15, colwidths = c(1, 1, 1, 1, 1))
158		#' }
159		#' @keywords internal
160		df2gg <- function(df,
161		colwidths = NULL,
162		font_size = 10,
163		col_labels = TRUE,
164		col_lab_fontface = "bold",
165		hline = TRUE,
166		bg_fill = NULL) {
167		# convert to text
168	19x	df <- as.data.frame(apply(df, 1:2, function(x) if (is.na(x)) "NA" else as.character(x)))
169
170	19x	if (col_labels) {
171	10x	df <- as.matrix(df)
172	10x	df <- rbind(colnames(df), df)
173		}
174
175		# Get column widths
176	19x	if (is.null(colwidths)) {
177	1x	colwidths <- apply(df, 2, function(x) max(nchar(x), na.rm = TRUE))
178		}
179	19x	tot_width <- sum(colwidths)
180
181	19x	res <- ggplot(data = df) +
182	19x	theme_void() +
183	19x	scale_x_continuous(limits = c(0, tot_width)) +
184	19x	scale_y_continuous(limits = c(1, nrow(df)))
185
186	9x	if (!is.null(bg_fill)) res <- res + theme(plot.background = element_rect(fill = bg_fill))
187
188	19x	if (hline) {
189	10x	res <- res +
190	10x	annotate(
191	10x	"segment",
192	10x	x = 0 + 0.2 * colwidths[2], xend = tot_width - 0.1 * tail(colwidths, 1),
193	10x	y = nrow(df) - 0.5, yend = nrow(df) - 0.5
194		)
195		}
196
197	19x	for (i in seq_len(ncol(df))) {
198	86x	line_pos <- c(
199	86x	if (i == 1) 0 else sum(colwidths[1:(i - 1)]),
200	86x	sum(colwidths[1:i])
201		)
202	86x	res <- res +
203	86x	annotate(
204	86x	"text",
205	86x	x = mean(line_pos),
206	86x	y = rev(seq_len(nrow(df))),
207	86x	label = df[, i],
208	86x	size = font_size / .pt,
209	86x	fontface = if (col_labels) {
210	32x	c(col_lab_fontface, rep("plain", nrow(df) - 1))
211		} else {
212	54x	rep("plain", nrow(df))
213		}
214		)
215		}
216
217	19x	res
218		}

1		#' Subgroup treatment effect pattern (STEP) fit for binary (response) outcome
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This fits the Subgroup Treatment Effect Pattern logistic regression models for a binary
6		#' (response) outcome. The treatment arm variable must have exactly 2 levels,
7		#' where the first one is taken as reference and the estimated odds ratios are
8		#' for the comparison of the second level vs. the first one.
9		#'
10		#' The (conditional) logistic regression model which is fit is:
11		#'
12		#' `response ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
13		#'
14		#' where `degree` is specified by `control_step()`.
15		#'
16		#' @inheritParams argument_convention
17		#' @param variables (named `list` of `character`)\cr list of analysis variables:
18		#' needs `response`, `arm`, `biomarker`, and optional `covariates` and `strata`.
19		#' @param control (named `list`)\cr combined control list from [control_step()]
20		#' and [control_logistic()].
21		#'
22		#' @return A matrix of class `step`. The first part of the columns describe the
23		#' subgroup intervals used for the biomarker variable, including where the
24		#' center of the intervals are and their bounds. The second part of the
25		#' columns contain the estimates for the treatment arm comparison.
26		#'
27		#' @note For the default degree 0 the `biomarker` variable is not included in the model.
28		#'
29		#' @seealso [control_step()] and [control_logistic()] for the available
30		#' customization options.
31		#'
32		#' @examples
33		#' # Testing dataset with just two treatment arms.
34		#' library(survival)
35		#' library(dplyr)
36		#'
37		#' adrs_f <- tern_ex_adrs %>%
38		#' filter(
39		#' PARAMCD == "BESRSPI",
40		#' ARM %in% c("B: Placebo", "A: Drug X")
41		#' ) %>%
42		#' mutate(
43		#' # Reorder levels of ARM to have Placebo as reference arm for Odds Ratio calculations.
44		#' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
45		#' RSP = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
46		#' SEX = factor(SEX)
47		#' )
48		#'
49		#' variables <- list(
50		#' arm = "ARM",
51		#' biomarker = "BMRKR1",
52		#' covariates = "AGE",
53		#' response = "RSP"
54		#' )
55		#'
56		#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
57		#' # We use a large enough bandwidth to avoid too small subgroups and linear separation in those.
58		#' step_matrix <- fit_rsp_step(
59		#' variables = variables,
60		#' data = adrs_f,
61		#' control = c(control_logistic(), control_step(bandwidth = 0.9))
62		#' )
63		#' dim(step_matrix)
64		#' head(step_matrix)
65		#'
66		#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
67		#' # models. Or specify different logistic regression options, including confidence level.
68		#' step_matrix2 <- fit_rsp_step(
69		#' variables = variables,
70		#' data = adrs_f,
71		#' control = c(control_logistic(conf_level = 0.9), control_step(bandwidth = NULL, degree = 1))
72		#' )
73		#'
74		#' # Use a global constant model. This is helpful as a reference for the subgroup models.
75		#' step_matrix3 <- fit_rsp_step(
76		#' variables = variables,
77		#' data = adrs_f,
78		#' control = c(control_logistic(), control_step(bandwidth = NULL, num_points = 2L))
79		#' )
80		#'
81		#' # It is also possible to use strata, i.e. use conditional logistic regression models.
82		#' variables2 <- list(
83		#' arm = "ARM",
84		#' biomarker = "BMRKR1",
85		#' covariates = "AGE",
86		#' response = "RSP",
87		#' strata = c("STRATA1", "STRATA2")
88		#' )
89		#'
90		#' step_matrix4 <- fit_rsp_step(
91		#' variables = variables2,
92		#' data = adrs_f,
93		#' control = c(control_logistic(), control_step(bandwidth = NULL))
94		#' )
95		#'
96		#' @export
97		fit_rsp_step <- function(variables,
98		data,
99		control = c(control_step(), control_logistic())) {
100	5x	assert_df_with_variables(data, variables)
101	5x	checkmate::assert_list(control, names = "named")
102	5x	data <- data[!is.na(data[[variables$biomarker]]), ]
103	5x	window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
104	5x	interval_center <- window_sel$interval[, "Interval Center"]
105	5x	form <- h_step_rsp_formula(variables = variables, control = control)
106	5x	estimates <- if (is.null(control$bandwidth)) {
107	1x	h_step_rsp_est(
108	1x	formula = form,
109	1x	data = data,
110	1x	variables = variables,
111	1x	x = interval_center,
112	1x	control = control
113		)
114		} else {
115	4x	tmp <- mapply(
116	4x	FUN = h_step_rsp_est,
117	4x	x = interval_center,
118	4x	subset = as.list(as.data.frame(window_sel$sel)),
119	4x	MoreArgs = list(
120	4x	formula = form,
121	4x	data = data,
122	4x	variables = variables,
123	4x	control = control
124		)
125		)
126		# Maybe we find a more elegant solution than this.
127	4x	rownames(tmp) <- c("n", "logor", "se", "ci_lower", "ci_upper")
128	4x	t(tmp)
129		}
130	5x	result <- cbind(window_sel$interval, estimates)
131	5x	structure(
132	5x	result,
133	5x	class = c("step", "matrix"),
134	5x	variables = variables,
135	5x	control = control
136		)
137		}

1		#' Estimate proportions of each level of a variable
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze & summarize function [estimate_multinomial_response()] creates a layout element to estimate the
6		#' proportion and proportion confidence interval for each level of a factor variable. The primary analysis variable,
7		#' `var`, should be a factor variable, the values of which will be used as labels within the output table.
8		#'
9		#' @inheritParams argument_convention
10		#' @param .stats (`character`)\cr statistics to select for the table.
11		#'
12		#' Options are: ``r shQuote(get_stats("estimate_multinomial_response"), type = "sh")``
13		#'
14		#' @seealso Relevant description function [d_onco_rsp_label()].
15		#'
16		#' @name estimate_multinomial_rsp
17		#' @order 1
18		NULL
19
20		#' Description of standard oncology response
21		#'
22		#' @description `r lifecycle::badge("stable")`
23		#'
24		#' Describe the oncology response in a standard way.
25		#'
26		#' @param x (`character`)\cr the standard oncology codes to be described.
27		#'
28		#' @return Response labels.
29		#'
30		#' @seealso [estimate_multinomial_rsp()]
31		#'
32		#' @examples
33		#' d_onco_rsp_label(
34		#' c("CR", "PR", "SD", "NON CR/PD", "PD", "NE", "Missing", "<Missing>", "NE/Missing")
35		#' )
36		#'
37		#' # Adding some values not considered in d_onco_rsp_label
38		#'
39		#' d_onco_rsp_label(
40		#' c("CR", "PR", "hello", "hi")
41		#' )
42		#'
43		#' @export
44		d_onco_rsp_label <- function(x) {
45	2x	x <- as.character(x)
46	2x	desc <- c(
47	2x	CR = "Complete Response (CR)",
48	2x	PR = "Partial Response (PR)",
49	2x	MR = "Minimal/Minor Response (MR)",
50	2x	MRD = "Minimal Residual Disease (MRD)",
51	2x	SD = "Stable Disease (SD)",
52	2x	PD = "Progressive Disease (PD)",
53	2x	`NON CR/PD` = "Non-CR or Non-PD (NON CR/PD)",
54	2x	NE = "Not Evaluable (NE)",
55	2x	`NE/Missing` = "Missing or unevaluable",
56	2x	Missing = "Missing",
57	2x	`NA` = "Not Applicable (NA)",
58	2x	ND = "Not Done (ND)"
59		)
60
61	2x	values_label <- vapply(
62	2x	X = x,
63	2x	FUN.VALUE = character(1),
64	2x	function(val) {
65	!	if (val %in% names(desc)) desc[val] else val
66		}
67		)
68
69	2x	factor(values_label, levels = c(intersect(desc, values_label), setdiff(values_label, desc)))
70		}
71
72		#' @describeIn estimate_multinomial_rsp Statistics function which feeds the length of `x` as number
73		#' of successes, and `.N_col` as total number of successes and failures into [s_proportion()].
74		#'
75		#' @return
76		#' * `s_length_proportion()` returns statistics from [s_proportion()].
77		#'
78		#' @examples
79		#' s_length_proportion(rep("CR", 10), .N_col = 100)
80		#' s_length_proportion(factor(character(0)), .N_col = 100)
81		#'
82		#' @export
83		s_length_proportion <- function(x,
84		...,
85		.N_col) { # nolint
86	10x	checkmate::assert_multi_class(x, classes = c("factor", "character"))
87	9x	checkmate::assert_vector(x, min.len = 0, max.len = .N_col)
88	7x	checkmate::assert_vector(unique(x), min.len = 0, max.len = 1)
89
90	7x	n_true <- length(x)
91	7x	n_false <- .N_col - n_true
92	7x	x_logical <- rep(c(TRUE, FALSE), c(n_true, n_false))
93	7x	s_proportion(df = x_logical, ...)
94		}
95
96		#' @describeIn estimate_multinomial_rsp Formatted analysis function which is used as `afun`
97		#' in `estimate_multinomial_response()`.
98		#'
99		#' @return
100		#' * `a_length_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
101		#'
102		#' @examples
103		#' a_length_proportion(rep("CR", 10), .N_col = 100)
104		#' a_length_proportion(factor(character(0)), .N_col = 100)
105		#'
106		#' @export
107		a_length_proportion <- function(x,
108		...,
109		.stats = NULL,
110		.stat_names = NULL,
111		.formats = NULL,
112		.labels = NULL,
113		.indent_mods = NULL) {
114		# Check for additional parameters to the statistics function
115	6x	dots_extra_args <- list(...)
116	6x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
117	6x	dots_extra_args$.additional_fun_parameters <- NULL
118
119		# Check for user-defined functions
120	6x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
121	6x	.stats <- default_and_custom_stats_list$all_stats
122	6x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
123
124		# Apply statistics function
125	6x	x_stats <- .apply_stat_functions(
126	6x	default_stat_fnc = s_length_proportion,
127	6x	custom_stat_fnc_list = custom_stat_functions,
128	6x	args_list = c(
129	6x	x = list(x),
130	6x	extra_afun_params,
131	6x	dots_extra_args
132		)
133		)
134
135		# Fill in formatting defaults
136	6x	.stats <- get_stats("estimate_multinomial_response",
137	6x	stats_in = .stats,
138	6x	custom_stats_in = names(custom_stat_functions)
139		)
140	6x	x_stats <- x_stats[.stats]
141	6x	.formats <- get_formats_from_stats(.stats, .formats)
142	6x	.labels <- get_labels_from_stats(
143	6x	.stats, .labels,
144	6x	tern_defaults = c(lapply(x_stats, attr, "label"), tern_default_labels)
145		)
146	6x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods)
147
148		# Auto format handling
149	6x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
150
151		# Get and check statistical names
152	6x	.stat_names <- get_stat_names(x_stats, .stat_names)
153
154	6x	in_rows(
155	6x	.list = x_stats,
156	6x	.formats = .formats,
157	6x	.names = .labels %>% .unlist_keep_nulls(),
158	6x	.stat_names = .stat_names,
159	6x	.labels = .labels %>% .unlist_keep_nulls(),
160	6x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
161		)
162		}
163
164		#' @describeIn estimate_multinomial_rsp Layout-creating function which can take statistics function arguments
165		#' and additional format arguments. This function is a wrapper for [rtables::analyze()] and
166		#' [rtables::summarize_row_groups()].
167		#'
168		#' @return
169		#' * `estimate_multinomial_response()` returns a layout object suitable for passing to further layouting functions,
170		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
171		#' the statistics from `s_length_proportion()` to the table layout.
172		#'
173		#' @examples
174		#' library(dplyr)
175		#'
176		#' # Use of the layout creating function.
177		#' dta_test <- data.frame(
178		#' USUBJID = paste0("S", 1:12),
179		#' ARM = factor(rep(LETTERS[1:3], each = 4)),
180		#' AVAL = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
181		#' ) %>% mutate(
182		#' AVALC = factor(AVAL,
183		#' levels = c(0, 1),
184		#' labels = c("Complete Response (CR)", "Partial Response (PR)")
185		#' )
186		#' )
187		#'
188		#' lyt <- basic_table() %>%
189		#' split_cols_by("ARM") %>%
190		#' estimate_multinomial_response(var = "AVALC")
191		#'
192		#' tbl <- build_table(lyt, dta_test)
193		#'
194		#' tbl
195		#'
196		#' @export
197		#' @order 2
198		estimate_multinomial_response <- function(lyt,
199		var,
200		na_str = default_na_str(),
201		nested = TRUE,
202		...,
203		show_labels = "hidden",
204		table_names = var,
205		.stats = "prop_ci",
206		.stat_names = NULL,
207		.formats = list(prop_ci = "(xx.xx, xx.xx)"),
208		.labels = NULL,
209		.indent_mods = NULL) {
210		# Process standard extra arguments
211	1x	extra_args <- list(".stats" = .stats)
212	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
213	1x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
214	!	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
215	!	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
216
217		# Process additional arguments to the statistic function
218	1x	extra_args <- c(extra_args, ...)
219
220		# Append additional info from layout to the analysis function
221	1x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
222	1x	formals(a_length_proportion) <- c(formals(a_length_proportion), extra_args[[".additional_fun_parameters"]])
223
224	1x	lyt <- split_rows_by(lyt, var = var)
225	1x	lyt <- summarize_row_groups(lyt, na_str = na_str)
226
227	1x	analyze(
228	1x	lyt = lyt,
229	1x	vars = var,
230	1x	afun = a_length_proportion,
231	1x	na_str = na_str,
232	1x	nested = nested,
233	1x	extra_args = extra_args,
234	1x	show_labels = show_labels,
235	1x	table_names = table_names
236		)
237		}

1		#' Difference test for two proportions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze function [test_proportion_diff()] creates a layout element to test the difference between two
6		#' proportions. The primary analysis variable, `vars`, indicates whether a response has occurred for each record. See
7		#' the `method` parameter for options of methods to use to calculate the p-value. The argument `alternative`
8		#' specifies the direction of the alternative hypothesis. Additionally, a stratification variable can be
9		#' supplied via the `strata` element of the `variables` argument.
10		#'
11		#' @inheritParams argument_convention
12		#' @param method (`string`)\cr one of `chisq`, `cmh`, `cmh_wh`, `fisher`, or `schouten`;
13		#' specifies the test used to calculate the p-value.
14		#' @param .stats (`character`)\cr statistics to select for the table.
15		#'
16		#' Options are: ``r shQuote(get_stats("test_proportion_diff"), type = "sh")``
17		#'
18		#' @seealso [h_prop_diff_test]
19		#'
20		#' @name prop_diff_test
21		#' @order 1
22		NULL
23
24		#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
25		#'
26		#' @return
27		#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
28		#' describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
29		#'
30		#' @examples
31		#'
32		#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
33		#' nex <- 100 # Number of example rows
34		#' dta <- data.frame(
35		#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
36		#' "grp" = sample(c("A", "B"), nex, TRUE),
37		#' "f1" = sample(c("a1", "a2"), nex, TRUE),
38		#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
39		#' stringsAsFactors = TRUE
40		#' )
41		#' s_test_proportion_diff(
42		#' df = subset(dta, grp == "A"),
43		#' .var = "rsp",
44		#' .ref_group = subset(dta, grp == "B"),
45		#' .in_ref_col = FALSE,
46		#' variables = NULL,
47		#' method = "chisq"
48		#' )
49		#'
50		#' @export
51		s_test_proportion_diff <- function(df,
52		.var,
53		.ref_group,
54		.in_ref_col,
55		variables = list(strata = NULL),
56		method = c("chisq", "schouten", "fisher", "cmh", "cmh_wh"),
57		alternative = c("two.sided", "less", "greater"),
58		...) {
59	64x	method <- match.arg(method)
60	64x	y <- list(pval = numeric())
61
62	64x	if (!.in_ref_col) {
63	56x	assert_df_with_variables(df, list(rsp = .var))
64	56x	assert_df_with_variables(.ref_group, list(rsp = .var))
65	56x	rsp <- factor(
66	56x	c(.ref_group[[.var]], df[[.var]]),
67	56x	levels = c("TRUE", "FALSE")
68		)
69	56x	grp <- factor(
70	56x	rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
71	56x	levels = c("ref", "Not-ref")
72		)
73
74	56x	if (!is.null(variables$strata) \|\| method %in% c("cmh", "cmh_wh")) {
75	18x	strata <- variables$strata
76	18x	checkmate::assert_false(is.null(strata))
77	18x	strata_vars <- stats::setNames(as.list(strata), strata)
78	18x	assert_df_with_variables(df, strata_vars)
79	18x	assert_df_with_variables(.ref_group, strata_vars)
80	18x	strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
81		}
82
83	56x	tbl <- switch(method,
84	56x	cmh = table(grp, rsp, strata),
85	56x	cmh_wh = table(grp, rsp, strata),
86	56x	table(grp, rsp)
87		)
88
89	56x	y$pval <- switch(method,
90	56x	chisq = prop_chisq(tbl, alternative = alternative),
91	56x	cmh = prop_cmh(tbl, alternative = alternative),
92	56x	fisher = prop_fisher(tbl, alternative = alternative),
93	56x	schouten = prop_schouten(tbl, alternative = alternative),
94	56x	cmh_wh = prop_cmh(tbl, alternative = alternative, transform = "wilson_hilferty")
95		)
96		}
97
98	64x	y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method, alternative = alternative))
99	64x	y
100		}
101
102		#' Description of the difference test between two proportions
103		#'
104		#' @description `r lifecycle::badge("stable")`
105		#'
106		#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
107		#'
108		#' @inheritParams s_test_proportion_diff
109		#'
110		#' @return A `string` describing the test from which the p-value is derived.
111		#'
112		#' @export
113		d_test_proportion_diff <- function(method, alternative = c("two.sided", "less", "greater")) {
114	67x	checkmate::assert_string(method)
115	67x	alternative <- match.arg(alternative)
116
117	67x	meth_part <- switch(method,
118	67x	"schouten" = "Chi-Squared Test with Schouten Correction",
119	67x	"chisq" = "Chi-Squared Test",
120	67x	"cmh" = "Cochran-Mantel-Haenszel Test",
121	67x	"cmh_wh" = "Cochran-Mantel-Haenszel Test with Wilson-Hilferty Transformation",
122	67x	"fisher" = "Fisher's Exact Test",
123	67x	stop(paste(method, "does not have a description"))
124		)
125	67x	alt_part <- switch(alternative,
126	67x	two.sided = "",
127	67x	less = ", 1-sided, direction less",
128	67x	greater = ", 1-sided, direction greater"
129		)
130	67x	paste0("p-value (", meth_part, alt_part, ")")
131		}
132
133		#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
134		#'
135		#' @return
136		#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
137		#'
138		#' @keywords internal
139		a_test_proportion_diff <- function(df,
140		...,
141		.stats = NULL,
142		.stat_names = NULL,
143		.formats = NULL,
144		.labels = NULL,
145		.indent_mods = NULL) {
146	17x	dots_extra_args <- list(...)
147
148		# Check if there are user-defined functions
149	17x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
150	17x	.stats <- default_and_custom_stats_list$all_stats
151	17x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
152
153		# Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params)
154	17x	extra_afun_params <- retrieve_extra_afun_params(
155	17x	names(dots_extra_args$.additional_fun_parameters)
156		)
157	17x	dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore
158
159		# Main statistical functions application
160	17x	x_stats <- .apply_stat_functions(
161	17x	default_stat_fnc = s_test_proportion_diff,
162	17x	custom_stat_fnc_list = custom_stat_functions,
163	17x	args_list = c(
164	17x	df = list(df),
165	17x	extra_afun_params,
166	17x	dots_extra_args
167		)
168		)
169
170		# Fill in with stats defaults if needed
171	17x	.stats <- get_stats("test_proportion_diff",
172	17x	stats_in = .stats,
173	17x	custom_stats_in = names(custom_stat_functions)
174		)
175
176	17x	x_stats <- x_stats[.stats]
177
178		# Fill in formats/indents/labels with custom input and defaults
179	17x	.formats <- get_formats_from_stats(.stats, .formats)
180	17x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods)
181	17x	if (is.null(.labels)) {
182	17x	.labels <- sapply(x_stats, attr, "label")
183	17x	.labels <- .labels[nzchar(.labels) & !sapply(.labels, is.null) & !is.na(.labels)]
184		}
185	17x	.labels <- get_labels_from_stats(.stats, .labels)
186
187		# Auto format handling
188	17x	.formats <- apply_auto_formatting(
189	17x	.formats,
190	17x	x_stats,
191	17x	extra_afun_params$.df_row,
192	17x	extra_afun_params$.var
193		)
194
195		# Get and check statistical names from defaults
196	17x	.stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats
197
198	17x	in_rows(
199	17x	.list = x_stats,
200	17x	.formats = .formats,
201	17x	.names = names(.labels),
202	17x	.stat_names = .stat_names,
203	17x	.labels = .labels %>% .unlist_keep_nulls(),
204	17x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
205		)
206		}
207
208		#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
209		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
210		#'
211		#' @return
212		#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
213		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
214		#' the statistics from `s_test_proportion_diff()` to the table layout.
215		#'
216		#' @examples
217		#' dta <- data.frame(
218		#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
219		#' grp = factor(rep(c("A", "B"), each = 50)),
220		#' strata = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
221		#' )
222		#'
223		#' # With `rtables` pipelines.
224		#' l <- basic_table() %>%
225		#' split_cols_by(var = "grp", ref_group = "B") %>%
226		#' test_proportion_diff(
227		#' vars = "rsp",
228		#' method = "cmh", variables = list(strata = "strata")
229		#' )
230		#'
231		#' build_table(l, df = dta)
232		#'
233		#' @export
234		#' @order 2
235		test_proportion_diff <- function(lyt,
236		vars,
237		variables = list(strata = NULL),
238		method = c("chisq", "schouten", "fisher", "cmh", "cmh_wh"),
239		alternative = c("two.sided", "less", "greater"),
240		var_labels = vars,
241		na_str = default_na_str(),
242		nested = TRUE,
243		show_labels = "hidden",
244		table_names = vars,
245		section_div = NA_character_,
246		...,
247		na_rm = TRUE,
248		.stats = c("pval"),
249		.stat_names = NULL,
250		.formats = c(pval = "x.xxxx \| (<0.0001)"),
251		.labels = NULL,
252		.indent_mods = c(pval = 1L)) {
253		# Depending on main functions
254	8x	extra_args <- list(
255	8x	"na_rm" = na_rm,
256	8x	"variables" = variables,
257	8x	"method" = method,
258	8x	"alternative" = alternative,
259		...
260		)
261
262		# Needed defaults
263	8x	if (!is.null(.stats)) extra_args[[".stats"]] <- .stats
264	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
265	8x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
266	!	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
267	8x	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
268
269		# Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params)
270	8x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
271	8x	formals(a_test_proportion_diff) <- c(
272	8x	formals(a_test_proportion_diff),
273	8x	extra_args[[".additional_fun_parameters"]]
274		)
275
276		# Main {rtables} structural call
277	8x	analyze(
278	8x	lyt = lyt,
279	8x	vars = vars,
280	8x	var_labels = var_labels,
281	8x	afun = a_test_proportion_diff,
282	8x	na_str = na_str,
283	8x	inclNAs = !na_rm,
284	8x	nested = nested,
285	8x	extra_args = extra_args,
286	8x	show_labels = show_labels,
287	8x	table_names = table_names,
288	8x	section_div = section_div
289		)
290		}
291
292		#' Helper functions to test proportion differences
293		#'
294		#' Helper functions to implement various tests on the difference between two proportions.
295		#'
296		#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
297		#' @inheritParams argument_convention
298		#'
299		#' @return A p-value.
300		#'
301		#' @seealso [prop_diff_test()] for implementation of these helper functions.
302		#'
303		#' @name h_prop_diff_test
304		NULL
305
306		#' @describeIn h_prop_diff_test Performs Chi-Squared test. Internally calls [stats::prop.test()].
307		#'
308		#' @keywords internal
309		prop_chisq <- function(tbl, alternative = c("two.sided", "less", "greater")) {
310	45x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
311	45x	tbl <- tbl[, c("TRUE", "FALSE")]
312	45x	if (any(colSums(tbl) == 0)) {
313	2x	return(1)
314		}
315	43x	stats::prop.test(tbl, correct = FALSE, alternative = alternative)$p.value
316		}
317
318		#' @describeIn h_prop_diff_test Performs stratified Cochran-Mantel-Haenszel test,
319		#' using [stats::mantelhaen.test()] internally.
320		#' Note that strata with less than two observations are automatically discarded.
321		#'
322		#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
323		#' (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
324		#' @param transform (`string`)\cr either `none` or `wilson_hilferty`; specifies whether to apply
325		#' the Wilson-Hilferty transformation of the chi-squared statistic.
326		#'
327		#' @keywords internal
328		prop_cmh <- function(ary,
329		alternative = c("two.sided", "less", "greater"),
330		transform = c("none", "wilson_hilferty")) {
331	26x	checkmate::assert_array(ary)
332	26x	checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
333	26x	checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
334	26x	alternative <- match.arg(alternative)
335	26x	transform <- match.arg(transform)
336
337	26x	strata_sizes <- apply(ary, MARGIN = 3, sum)
338	26x	if (any(strata_sizes < 5)) {
339	1x	warning("<5 data points in some strata. CMH test may be incorrect.")
340	1x	ary <- ary[, , strata_sizes > 1]
341		}
342
343	26x	cmh_res <- stats::mantelhaen.test(ary, correct = FALSE, alternative = alternative)
344
345	26x	if (transform == "none") {
346	23x	cmh_res$p.value
347		} else {
348	3x	chisq_stat <- unname(cmh_res$statistic)
349	3x	df <- unname(cmh_res$parameter)
350	3x	num <- (chisq_stat / df)^(1 / 3) - (1 - 2 / (9 * df))
351	3x	denom <- sqrt(2 / (9 * df))
352	3x	wh_stat <- num / denom
353
354	3x	if (alternative == "two.sided") {
355	1x	2 * stats::pnorm(-abs(wh_stat))
356		} else {
357	2x	stats::pnorm(wh_stat, lower.tail = (alternative == "greater"))
358		}
359		}
360		}
361
362		#' @describeIn h_prop_diff_test Performs the Chi-Squared test with Schouten correction.
363		#'
364		#' @seealso Schouten correction is based upon \insertCite{Schouten1980-kd;textual}{tern}.
365		#'
366		#' @keywords internal
367		prop_schouten <- function(tbl, alternative = c("two.sided", "less", "greater")) {
368	102x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
369	102x	alternative <- match.arg(alternative)
370	102x	tbl <- tbl[, c("TRUE", "FALSE")]
371	102x	if (any(colSums(tbl) == 0)) {
372	1x	return(1)
373		}
374
375	101x	n <- sum(tbl)
376	101x	n1 <- sum(tbl[1, ])
377	101x	n2 <- sum(tbl[2, ])
378
379	101x	ad <- diag(tbl)
380	101x	bc <- diag(apply(tbl, 2, rev))
381	101x	ac <- tbl[, 1]
382	101x	bd <- tbl[, 2]
383
384	101x	t_schouten <- (n - 1) *
385	101x	(abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 /
386	101x	(n1 * n2 * sum(ac) * sum(bd))
387
388	101x	if (alternative == "two.sided") {
389	99x	stats::pchisq(t_schouten, df = 1, lower.tail = FALSE)
390		} else {
391		# This follows the logic in stats::prop.test for one-sided p-values.
392	2x	x1 <- tbl[1, 1]
393	2x	x2 <- tbl[2, 1]
394	2x	delta <- (x1 / n1) - (x2 / n2)
395	2x	z <- sign(delta) * sqrt(t_schouten)
396	2x	stats::pnorm(z, lower.tail = (alternative == "less"))
397		}
398		}
399
400		#' @describeIn h_prop_diff_test Performs the Fisher's exact test. Internally calls [stats::fisher.test()].
401		#'
402		#' @keywords internal
403		prop_fisher <- function(tbl, alternative = c("two.sided", "less", "greater")) {
404	4x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
405	4x	alternative <- match.arg(alternative) # Is needed here, because stats::fisher.test does not handle defaults.
406	4x	tbl <- tbl[, c("TRUE", "FALSE")]
407	4x	stats::fisher.test(tbl, alternative = alternative)$p.value
408		}

1		#' Count patients with toxicity grades that have worsened from baseline by highest grade post-baseline
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze function [count_abnormal_lab_worsen_by_baseline()] creates a layout element to count patients with
6		#' analysis toxicity grades which have worsened from baseline, categorized by highest (worst) grade post-baseline.
7		#'
8		#' This function analyzes primary analysis variable `var` which indicates analysis toxicity grades. Additional
9		#' analysis variables that can be supplied as a list via the `variables` parameter are `id` (defaults to `USUBJID`),
10		#' a variable to indicate unique subject identifiers, `baseline_var` (defaults to `BTOXGR`), a variable to indicate
11		#' baseline toxicity grades, and `direction_var` (defaults to `GRADDIR`), a variable to indicate toxicity grade
12		#' directions of interest to include (e.g. `"H"` (high), `"L"` (low), or `"B"` (both)).
13		#'
14		#' For the direction(s) specified in `direction_var`, patient counts by worst grade for patients who have
15		#' worsened from baseline are calculated as follows:
16		#' * `1` to `4`: The number of patients who have worsened from their baseline grades with worst
17		#' grades 1-4, respectively.
18		#' * `Any`: The total number of patients who have worsened from their baseline grades.
19		#'
20		#' Fractions are calculated by dividing the above counts by the number of patients who's analysis toxicity grades
21		#' have worsened from baseline toxicity grades during treatment.
22		#'
23		#' Prior to using this function in your table layout you must use [rtables::split_rows_by()] to create a row
24		#' split on variable `direction_var`.
25		#'
26		#' @inheritParams argument_convention
27		#' @param variables (named `list` of `string`)\cr list of additional analysis variables including:
28		#' * `id` (`string`)\cr subject variable name.
29		#' * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable.
30		#' * `direction_var` (`string`)\cr see `direction_var` for more details.
31		#' @param .stats (`character`)\cr statistics to select for the table.
32		#' @param table_names `r lifecycle::badge("deprecated")` this parameter has no effect.
33		#'
34		#' Options are: ``r shQuote(get_stats("abnormal_lab_worsen_by_baseline"), type = "sh")``
35		#'
36		#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()] which are used within
37		#' [s_count_abnormal_lab_worsen_by_baseline()] to process input data.
38		#'
39		#' @name abnormal_lab_worsen_by_baseline
40		#' @order 1
41		NULL
42
43		#' @describeIn abnormal_lab_worsen_by_baseline Statistics function for patients whose worst post-baseline
44		#' lab grades are worse than their baseline grades.
45		#'
46		#' @return
47		#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst
48		#' post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades
49		#' "1", "2", "3", "4" and "Any".
50		#'
51		#' @keywords internal
52		s_count_abnormal_lab_worsen_by_baseline <- function(df,
53		.var = "ATOXGR",
54		variables = list(
55		id = "USUBJID",
56		baseline_var = "BTOXGR",
57		direction_var = "GRADDR"
58		),
59		...) {
60	13x	checkmate::assert_string(.var)
61	13x	checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var"))
62	13x	checkmate::assert_string(variables$id)
63	13x	checkmate::assert_string(variables$baseline_var)
64	13x	checkmate::assert_string(variables$direction_var)
65	13x	assert_df_with_variables(df, c(aval = .var, variables[1:3]))
66	13x	assert_list_of_variables(variables)
67
68	13x	h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var)
69		}
70
71		#' @describeIn abnormal_lab_worsen_by_baseline Formatted analysis function which is used as `afun`
72		#' in `count_abnormal_lab_worsen_by_baseline()`.
73		#'
74		#' @return
75		#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with
76		#' formatted [rtables::CellValue()].
77		#'
78		#' @keywords internal
79		a_count_abnormal_lab_worsen_by_baseline <- function(df,
80		...,
81		.stats = NULL,
82		.stat_names = NULL,
83		.formats = NULL,
84		.labels = NULL,
85		.indent_mods = NULL) {
86		# Check for additional parameters to the statistics function
87	12x	dots_extra_args <- list(...)
88	12x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
89	12x	dots_extra_args$.additional_fun_parameters <- NULL
90
91		# Check for user-defined functions
92	12x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
93	12x	.stats <- default_and_custom_stats_list$all_stats
94	12x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
95
96		# Apply statistics function
97	12x	x_stats <- .apply_stat_functions(
98	12x	default_stat_fnc = s_count_abnormal_lab_worsen_by_baseline,
99	12x	custom_stat_fnc_list = custom_stat_functions,
100	12x	args_list = c(
101	12x	df = list(df),
102	12x	extra_afun_params,
103	12x	dots_extra_args
104		)
105		)
106
107		# Fill in formatting defaults
108	12x	.stats <- get_stats(
109	12x	"abnormal_lab_worsen_by_baseline",
110	12x	stats_in = .stats,
111	12x	custom_stats_in = names(custom_stat_functions)
112		)
113	12x	levels_per_stats <- lapply(x_stats, names)
114	12x	.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
115	12x	.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats)
116	12x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)
117
118	12x	x_stats <- x_stats[.stats] %>%
119	12x	.unlist_keep_nulls() %>%
120	12x	setNames(names(.formats))
121
122		# Auto format handling
123	12x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
124
125		# Get and check statistical names
126	12x	.stat_names <- get_stat_names(x_stats, .stat_names)
127
128	12x	in_rows(
129	12x	.list = x_stats,
130	12x	.formats = .formats,
131	12x	.names = .labels %>% .unlist_keep_nulls(),
132	12x	.stat_names = .stat_names,
133	12x	.labels = .labels %>% .unlist_keep_nulls(),
134	12x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
135		)
136		}
137
138		#' @describeIn abnormal_lab_worsen_by_baseline Layout-creating function which can take statistics function
139		#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
140		#'
141		#' @return
142		#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting
143		#' functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted
144		#' rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout.
145		#'
146		#' @examples
147		#' library(dplyr)
148		#'
149		#' # The direction variable, GRADDR, is based on metadata
150		#' adlb <- tern_ex_adlb %>%
151		#' mutate(
152		#' GRADDR = case_when(
153		#' PARAMCD == "ALT" ~ "B",
154		#' PARAMCD == "CRP" ~ "L",
155		#' PARAMCD == "IGA" ~ "H"
156		#' )
157		#' ) %>%
158		#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
159		#'
160		#' df <- h_adlb_worsen(
161		#' adlb,
162		#' worst_flag_low = c("WGRLOFL" = "Y"),
163		#' worst_flag_high = c("WGRHIFL" = "Y"),
164		#' direction_var = "GRADDR"
165		#' )
166		#'
167		#' basic_table() %>%
168		#' split_cols_by("ARMCD") %>%
169		#' add_colcounts() %>%
170		#' split_rows_by("PARAMCD") %>%
171		#' split_rows_by("GRADDR") %>%
172		#' count_abnormal_lab_worsen_by_baseline(
173		#' var = "ATOXGR",
174		#' variables = list(
175		#' id = "USUBJID",
176		#' baseline_var = "BTOXGR",
177		#' direction_var = "GRADDR"
178		#' )
179		#' ) %>%
180		#' append_topleft("Direction of Abnormality") %>%
181		#' build_table(df = df, alt_counts_df = tern_ex_adsl)
182		#'
183		#' @export
184		#' @order 2
185		count_abnormal_lab_worsen_by_baseline <- function(lyt,
186		var,
187		variables = list(
188		id = "USUBJID",
189		baseline_var = "BTOXGR",
190		direction_var = "GRADDR"
191		),
192		na_str = default_na_str(),
193		nested = TRUE,
194		...,
195		table_names = lifecycle::deprecated(),
196		.stats = "fraction",
197		.stat_names = NULL,
198		.formats = list(fraction = format_fraction),
199		.labels = NULL,
200		.indent_mods = NULL) {
201	1x	checkmate::assert_string(var)
202
203		# Deprecated argument warning
204	1x	if (lifecycle::is_present(table_names)) {
205	!	lifecycle::deprecate_warn(
206	!	"0.9.8", "count_abnormal_lab_worsen_by_baseline(table_names)",
207	!	details = "The argument has no effect on the output."
208		)
209		}
210
211		# Process standard extra arguments
212	1x	extra_args <- list(".stats" = .stats)
213	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
214	1x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
215	!	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
216	!	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
217
218		# Process additional arguments to the statistic function
219	1x	extra_args <- c(extra_args, "variables" = list(variables), ...)
220
221		# Append additional info from layout to the analysis function
222	1x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
223	1x	formals(a_count_abnormal_lab_worsen_by_baseline) <- c(
224	1x	formals(a_count_abnormal_lab_worsen_by_baseline), extra_args[[".additional_fun_parameters"]]
225		)
226
227	1x	analyze(
228	1x	lyt = lyt,
229	1x	vars = var,
230	1x	afun = a_count_abnormal_lab_worsen_by_baseline,
231	1x	na_str = na_str,
232	1x	nested = nested,
233	1x	extra_args = extra_args,
234	1x	show_labels = "hidden"
235		)
236		}
237
238		#' Helper function to prepare ADLB with worst labs
239		#'
240		#' @description `r lifecycle::badge("stable")`
241		#'
242		#' Helper function to prepare a `df` for generate the patient count shift table.
243		#'
244		#' @param adlb (`data.frame`)\cr ADLB data frame.
245		#' @param worst_flag_low (named `vector`)\cr worst low post-baseline lab grade flag variable. See how this is
246		#' implemented in the following examples.
247		#' @param worst_flag_high (named `vector`)\cr worst high post-baseline lab grade flag variable. See how this is
248		#' implemented in the following examples.
249		#' @param direction_var (`string`)\cr name of the direction variable specifying the direction of the shift table of
250		#' interest. Only lab records flagged by `L`, `H` or `B` are included in the shift table.
251		#' * `L`: low direction only
252		#' * `H`: high direction only
253		#' * `B`: both low and high directions
254		#'
255		#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the
256		#' worst labs specified according to `worst_flag_low` or `worst_flag_high` for the
257		#' direction specified according to `direction_var`. For instance, for a lab that is
258		#' needed for the low direction only, only records flagged by `worst_flag_low` are
259		#' selected. For a lab that is needed for both low and high directions, the worst
260		#' low records are selected for the low direction, and the worst high record are selected
261		#' for the high direction.
262		#'
263		#' @seealso [abnormal_lab_worsen_by_baseline]
264		#'
265		#' @examples
266		#' library(dplyr)
267		#'
268		#' # The direction variable, GRADDR, is based on metadata
269		#' adlb <- tern_ex_adlb %>%
270		#' mutate(
271		#' GRADDR = case_when(
272		#' PARAMCD == "ALT" ~ "B",
273		#' PARAMCD == "CRP" ~ "L",
274		#' PARAMCD == "IGA" ~ "H"
275		#' )
276		#' ) %>%
277		#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
278		#'
279		#' df <- h_adlb_worsen(
280		#' adlb,
281		#' worst_flag_low = c("WGRLOFL" = "Y"),
282		#' worst_flag_high = c("WGRHIFL" = "Y"),
283		#' direction_var = "GRADDR"
284		#' )
285		#'
286		#' @export
287		h_adlb_worsen <- function(adlb,
288		worst_flag_low = NULL,
289		worst_flag_high = NULL,
290		direction_var) {
291	5x	checkmate::assert_string(direction_var)
292	5x	checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H"))
293	5x	assert_df_with_variables(adlb, list("Col" = direction_var))
294
295	5x	if (any(unique(adlb[[direction_var]]) == "H")) {
296	4x	assert_df_with_variables(adlb, list("High" = names(worst_flag_high)))
297		}
298
299	5x	if (any(unique(adlb[[direction_var]]) == "L")) {
300	4x	assert_df_with_variables(adlb, list("Low" = names(worst_flag_low)))
301		}
302
303	5x	if (any(unique(adlb[[direction_var]]) == "B")) {
304	3x	assert_df_with_variables(
305	3x	adlb,
306	3x	list(
307	3x	"Low" = names(worst_flag_low),
308	3x	"High" = names(worst_flag_high)
309		)
310		)
311		}
312
313		# extract patients with worst post-baseline lab, either low or high or both
314	5x	worst_flag <- c(worst_flag_low, worst_flag_high)
315	5x	col_names <- names(worst_flag)
316	5x	filter_values <- worst_flag
317	5x	temp <- Map(
318	5x	function(x, y) which(adlb[[x]] == y),
319	5x	col_names,
320	5x	filter_values
321		)
322	5x	position_satisfy_filters <- Reduce(union, temp)
323
324		# select variables of interest
325	5x	adlb_f <- adlb[position_satisfy_filters, ]
326
327		# generate subsets for different directionality
328	5x	adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ]
329	5x	adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ]
330	5x	adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ]
331
332		# for labs requiring both high and low, data is duplicated and will be stacked on top of each other
333	5x	adlb_f_b_h <- adlb_f_b
334	5x	adlb_f_b_l <- adlb_f_b
335
336		# extract data with worst lab
337	5x	if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) {
338		# change H to High, L to Low
339	3x	adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
340	3x	adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
341
342		# change, B to High and Low
343	3x	adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
344	3x	adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))
345
346	3x	adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
347	3x	adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
348	3x	adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
349	3x	adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]
350
351	3x	out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l)
352	2x	} else if (!is.null(worst_flag_high)) {
353	1x	adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
354	1x	adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
355
356	1x	adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
357	1x	adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
358
359	1x	out <- rbind(adlb_out_h, adlb_out_b_h)
360	1x	} else if (!is.null(worst_flag_low)) {
361	1x	adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
362	1x	adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))
363
364	1x	adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
365	1x	adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]
366
367	1x	out <- rbind(adlb_out_l, adlb_out_b_l)
368		}
369
370		# label
371	5x	formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE)
372
373	5x	out
374		}
375
376		#' Helper function to analyze patients for `s_count_abnormal_lab_worsen_by_baseline()`
377		#'
378		#' @description `r lifecycle::badge("stable")`
379		#'
380		#' Helper function to count the number of patients and the fraction of patients according to
381		#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`,
382		#' and the direction of interest specified in `direction_var`.
383		#'
384		#' @inheritParams argument_convention
385		#' @inheritParams h_adlb_worsen
386		#' @param baseline_var (`string`)\cr name of the baseline lab grade variable.
387		#'
388		#' @return The counts and fraction of patients
389		#' whose worst post-baseline lab grades are worse than their baseline grades, for
390		#' post-baseline worst grades "1", "2", "3", "4" and "Any".
391		#'
392		#' @seealso [abnormal_lab_worsen_by_baseline]
393		#'
394		#' @examples
395		#' library(dplyr)
396		#'
397		#' # The direction variable, GRADDR, is based on metadata
398		#' adlb <- tern_ex_adlb %>%
399		#' mutate(
400		#' GRADDR = case_when(
401		#' PARAMCD == "ALT" ~ "B",
402		#' PARAMCD == "CRP" ~ "L",
403		#' PARAMCD == "IGA" ~ "H"
404		#' )
405		#' ) %>%
406		#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
407		#'
408		#' df <- h_adlb_worsen(
409		#' adlb,
410		#' worst_flag_low = c("WGRLOFL" = "Y"),
411		#' worst_flag_high = c("WGRHIFL" = "Y"),
412		#' direction_var = "GRADDR"
413		#' )
414		#'
415		#' # `h_worsen_counter`
416		#' h_worsen_counter(
417		#' df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"),
418		#' id = "USUBJID",
419		#' .var = "ATOXGR",
420		#' baseline_var = "BTOXGR",
421		#' direction_var = "GRADDR"
422		#' )
423		#'
424		#' @export
425		h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) {
426	17x	checkmate::assert_string(id)
427	17x	checkmate::assert_string(.var)
428	17x	checkmate::assert_string(baseline_var)
429	17x	checkmate::assert_scalar(unique(df[[direction_var]]))
430	17x	checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low"))
431	17x	assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var)))
432
433		# remove post-baseline missing
434	17x	df <- df[df[[.var]] != "<Missing>", ]
435
436		# obtain directionality
437	17x	direction <- unique(df[[direction_var]])
438
439	17x	if (direction == "Low") {
440	10x	grade <- -1:-4
441	10x	worst_grade <- -4
442	7x	} else if (direction == "High") {
443	7x	grade <- 1:4
444	7x	worst_grade <- 4
445		}
446
447	17x	if (nrow(df) > 0) {
448	17x	by_grade <- lapply(grade, function(i) {
449		# filter baseline values that is less than i or <Missing>
450	68x	df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ]
451		# num: number of patients with post-baseline worst lab equal to i
452	68x	num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE]))
453		# denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction
454	68x	denom <- length(unique(df_temp[[id]]))
455	68x	rm(df_temp)
456	68x	c(num = num, denom = denom)
457		})
458		} else {
459	!	by_grade <- lapply(1, function(i) {
460	!	c(num = 0, denom = 0)
461		})
462		}
463
464	17x	names(by_grade) <- as.character(seq_along(by_grade))
465
466		# baseline grade less 4 or missing
467	17x	df_temp <- df[!df[[baseline_var]] %in% worst_grade, ]
468
469		# denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction
470	17x	denom <- length(unique(df_temp[, id, drop = TRUE]))
471
472		# condition 1: missing baseline and in the direction of abnormality
473	17x	con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade)
474	17x	df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ]
475
476		# condition 2: if post-baseline values are present then post-baseline values must be worse than baseline
477	17x	if (direction == "Low") {
478	10x	con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]])))
479		} else {
480	7x	con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]])))
481		}
482
483		# number of patients satisfy either conditions 1 or 2
484	17x	num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE]))
485
486	17x	list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom))))
487		}

1		#' Count the number of patients with particular flags
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze function [count_patients_with_flags()] creates a layout element to calculate counts of patients for
6		#' which user-specified flags are present.
7		#'
8		#' This function analyzes primary analysis variable `var` which indicates unique subject identifiers. Flags
9		#' variables to analyze are specified by the user via the `flag_variables` argument, and must either take value
10		#' `TRUE` (flag present) or `FALSE` (flag absent) for each record.
11		#'
12		#' If there are multiple records with the same flag present for a patient, only one occurrence is counted.
13		#'
14		#' @inheritParams argument_convention
15		#' @param flag_variables (`character`)\cr a vector specifying the names of `logical` variables from analysis dataset
16		#' used for counting the number of unique identifiers.
17		#' @param flag_labels (`character`)\cr vector of labels to use for flag variables. If any labels are also specified via
18		#' the `.labels` parameter, the `.labels` values will take precedence and replace these labels.
19		#' @param .stats (`character`)\cr statistics to select for the table.
20		#'
21		#' Options are: ``r shQuote(get_stats("count_patients_with_flags"), type = "sh")``
22		#'
23		#' @seealso [count_patients_with_event]
24		#'
25		#' @name count_patients_with_flags
26		#' @order 1
27		NULL
28
29		#' @describeIn count_patients_with_flags Statistics function which counts the number of patients for which
30		#' a particular flag variable is `TRUE`.
31		#'
32		#' @inheritParams analyze_variables
33		#' @param .var (`string`)\cr name of the column that contains the unique identifier.
34		#'
35		#' @note If `flag_labels` is not specified, variables labels will be extracted from `df`. If variables are not
36		#' labeled, variable names will be used instead. Alternatively, a named `vector` can be supplied to
37		#' `flag_variables` such that within each name-value pair the name corresponds to the variable name and the value is
38		#' the label to use for this variable.
39		#'
40		#' @return
41		#' * `s_count_patients_with_flags()` returns the count and the fraction of unique identifiers with each particular
42		#' flag as a list of statistics `n`, `count`, `count_fraction`, and `n_blq`, with one element per flag.
43		#'
44		#' @examples
45		#' # `s_count_patients_with_flags()`
46		#'
47		#' s_count_patients_with_flags(
48		#' adae,
49		#' "SUBJID",
50		#' flag_variables = c("fl1", "fl2", "fl3", "fl4"),
51		#' denom = "N_col",
52		#' .N_col = 1000
53		#' )
54		#'
55		#' @export
56		s_count_patients_with_flags <- function(df,
57		.var,
58		.N_col = ncol(df), # nolint
59		.N_row = nrow(df), # nolint
60		...,
61		flag_variables,
62		flag_labels = NULL,
63		denom = c("n", "N_col", "N_row")) {
64	41x	checkmate::assert_character(flag_variables)
65	41x	if (!is.null(flag_labels)) {
66	6x	checkmate::assert_character(flag_labels, len = length(flag_variables), any.missing = FALSE)
67	6x	flag_names <- flag_labels
68		} else {
69	35x	if (is.null(names(flag_variables))) {
70	20x	flag_names <- formatters::var_labels(df[flag_variables], fill = TRUE)
71		} else {
72	15x	flag_names <- unname(flag_variables)
73	15x	flag_variables <- names(flag_variables)
74		}
75		}
76	41x	checkmate::assert_subset(flag_variables, colnames(df))
77
78	41x	temp <- sapply(flag_variables, function(x) {
79	123x	tmp <- Map(function(y) which(df[[y]]), x)
80	123x	position_satisfy_flags <- Reduce(intersect, tmp)
81	123x	id_satisfy_flags <- as.character(unique(df[position_satisfy_flags, ][[.var]]))
82	123x	s_count_values(
83	123x	x = as.character(unique(df[[.var]])),
84	123x	values = id_satisfy_flags,
85	123x	denom = denom,
86	123x	.N_col = .N_col,
87	123x	.N_row = .N_row
88		)
89		})
90	41x	colnames(temp) <- flag_names
91	41x	temp <- data.frame(t(temp))
92	41x	result <- as.list(temp)
93	41x	if (length(flag_variables) == 1) {
94	1x	for (i in seq(3)) names(result[[i]]) <- flag_names[1]
95		}
96	41x	result
97		}
98
99		#' @describeIn count_patients_with_flags Formatted analysis function which is used as `afun`
100		#' in `count_patients_with_flags()`.
101		#'
102		#' @return
103		#' * `a_count_patients_with_flags()` returns the corresponding list with formatted [rtables::CellValue()].
104		#'
105		#' @examples
106		#' a_count_patients_with_flags(
107		#' adae,
108		#' .N_col = 10L,
109		#' .N_row = 10L,
110		#' .var = "USUBJID",
111		#' flag_variables = c("fl1", "fl2", "fl3", "fl4")
112		#' )
113		#'
114		#' @export
115		a_count_patients_with_flags <- function(df,
116		labelstr = "",
117		...,
118		.stats = NULL,
119		.stat_names = NULL,
120		.formats = NULL,
121		.labels = NULL,
122		.indent_mods = NULL) {
123		# Check for additional parameters to the statistics function
124	31x	dots_extra_args <- list(...)
125	31x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
126	31x	dots_extra_args$.additional_fun_parameters <- NULL
127	31x	flag_variables <- dots_extra_args[["flag_variables"]]
128	31x	flag_labels <- dots_extra_args[["flag_labels"]]
129
130	17x	if (is.null(names(flag_variables))) flag_variables <- formatters::var_labels(df, fill = TRUE)[flag_variables]
131	26x	if (is.null(flag_labels)) flag_labels <- flag_variables
132
133		# Check for user-defined functions
134	31x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
135	31x	.stats <- default_and_custom_stats_list$all_stats
136	31x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
137
138		# Apply statistics function
139	31x	x_stats <- .apply_stat_functions(
140	31x	default_stat_fnc = s_count_patients_with_flags,
141	31x	custom_stat_fnc_list = custom_stat_functions,
142	31x	args_list = c(
143	31x	df = list(df),
144	31x	extra_afun_params,
145	31x	dots_extra_args
146		)
147		)
148
149		# Fill in formatting defaults
150	31x	.stats <- get_stats("count_patients_with_flags", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
151	31x	levels_per_stats <- rep(list(names(flag_variables)), length(.stats)) %>% stats::setNames(.stats)
152	31x	.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
153	31x	.labels <- get_labels_from_stats(
154	31x	.stats, .labels, levels_per_stats,
155	31x	tern_defaults = flag_labels %>% stats::setNames(names(flag_variables))
156		)
157	31x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)
158
159	31x	x_stats <- x_stats[.stats] %>%
160	31x	.unlist_keep_nulls() %>%
161	31x	setNames(names(.formats))
162
163		# Auto format handling
164	31x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
165
166		# Get and check statistical names
167	31x	.stat_names <- get_stat_names(x_stats, .stat_names)
168
169	31x	in_rows(
170	31x	.list = x_stats,
171	31x	.formats = .formats,
172	31x	.names = names(.labels),
173	31x	.stat_names = .stat_names,
174	31x	.labels = .labels %>% .unlist_keep_nulls(),
175	31x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
176		)
177		}
178
179		#' @describeIn count_patients_with_flags Layout-creating function which can take statistics function
180		#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
181		#'
182		#' @return
183		#' * `count_patients_with_flags()` returns a layout object suitable for passing to further layouting functions,
184		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
185		#' the statistics from `s_count_patients_with_flags()` to the table layout.
186		#'
187		#' @examples
188		#' # Add labelled flag variables to analysis dataset.
189		#' adae <- tern_ex_adae %>%
190		#' dplyr::mutate(
191		#' fl1 = TRUE %>% with_label("Total AEs"),
192		#' fl2 = (TRTEMFL == "Y") %>%
193		#' with_label("Total number of patients with at least one adverse event"),
194		#' fl3 = (TRTEMFL == "Y" & AEOUT == "FATAL") %>%
195		#' with_label("Total number of patients with fatal AEs"),
196		#' fl4 = (TRTEMFL == "Y" & AEOUT == "FATAL" & AEREL == "Y") %>%
197		#' with_label("Total number of patients with related fatal AEs")
198		#' )
199		#'
200		#' lyt <- basic_table() %>%
201		#' split_cols_by("ARM") %>%
202		#' add_colcounts() %>%
203		#' count_patients_with_flags(
204		#' "SUBJID",
205		#' flag_variables = c("fl1", "fl2", "fl3", "fl4"),
206		#' denom = "N_col"
207		#' )
208		#'
209		#' build_table(lyt, adae, alt_counts_df = tern_ex_adsl)
210		#'
211		#' @export
212		#' @order 2
213		count_patients_with_flags <- function(lyt,
214		var,
215		flag_variables,
216		flag_labels = NULL,
217		var_labels = var,
218		show_labels = "hidden",
219		riskdiff = FALSE,
220		na_str = default_na_str(),
221		nested = TRUE,
222		...,
223		table_names = paste0("tbl_flags_", var),
224		.stats = "count_fraction",
225		.stat_names = NULL,
226		.formats = list(count_fraction = format_count_fraction_fixed_dp),
227		.indent_mods = NULL,
228		.labels = NULL) {
229	11x	checkmate::assert_flag(riskdiff)
230	11x	afun <- if (isFALSE(riskdiff)) a_count_patients_with_flags else afun_riskdiff
231
232		# Process standard extra arguments
233	11x	extra_args <- list(".stats" = .stats)
234	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
235	11x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
236	!	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
237	1x	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
238
239		# Process additional arguments to the statistic function
240	11x	extra_args <- c(
241	11x	extra_args,
242	11x	flag_variables = list(flag_variables), flag_labels = list(flag_labels),
243	11x	if (!isFALSE(riskdiff)) list(afun = list("s_count_patients_with_flags" = a_count_patients_with_flags)),
244		...
245		)
246
247		# Append additional info from layout to the analysis function
248	11x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
249	11x	formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])
250
251	11x	analyze(
252	11x	lyt = lyt,
253	11x	vars = var,
254	11x	afun = afun,
255	11x	na_str = na_str,
256	11x	nested = nested,
257	11x	extra_args = extra_args,
258	11x	var_labels = var_labels,
259	11x	show_labels = show_labels,
260	11x	table_names = table_names
261		)
262		}

1		#' Count specific values
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze function [count_values()] creates a layout element to calculate counts of specific values within a
6		#' variable of interest.
7		#'
8		#' This function analyzes one or more variables of interest supplied as a vector to `vars`. Values to
9		#' count for variable(s) in `vars` can be given as a vector via the `values` argument. One row of
10		#' counts will be generated for each variable.
11		#'
12		#' @inheritParams argument_convention
13		#' @param values (`character`)\cr specific values that should be counted.
14		#' @param .stats (`character`)\cr statistics to select for the table.
15		#'
16		#' Options are: ``r shQuote(get_stats("count_values"), type = "sh")``
17		#'
18		#' @note
19		#' * For `factor` variables, `s_count_values` checks whether `values` are all included in the levels of `x`
20		#' and fails otherwise.
21		#' * For `count_values()`, variable labels are shown when there is more than one element in `vars`,
22		#' otherwise they are hidden.
23		#'
24		#' @name count_values
25		#' @order 1
26		NULL
27
28		#' @describeIn count_values S3 generic function to count values.
29		#'
30		#' @inheritParams s_summary.logical
31		#'
32		#' @return
33		#' * `s_count_values()` returns output of [s_summary()] for specified values of a non-numeric variable.
34		#'
35		#' @export
36		s_count_values <- function(x,
37		values,
38		na.rm = TRUE, # nolint
39		denom = c("n", "N_col", "N_row"),
40		...) {
41	207x	UseMethod("s_count_values", x)
42		}
43
44		#' @describeIn count_values Method for `character` class.
45		#'
46		#' @method s_count_values character
47		#'
48		#' @examples
49		#' # `s_count_values.character`
50		#' s_count_values(x = c("a", "b", "a"), values = "a")
51		#' s_count_values(x = c("a", "b", "a", NA, NA), values = "b", na.rm = FALSE)
52		#'
53		#' @export
54		s_count_values.character <- function(x,
55		values = "Y",
56		na.rm = TRUE, # nolint
57		...) {
58	200x	checkmate::assert_character(values)
59
60	200x	if (na.rm) {
61	199x	x <- x[!is.na(x)]
62		}
63
64	200x	is_in_values <- x %in% values
65
66	200x	s_summary(is_in_values, na_rm = na.rm, ...)
67		}
68
69		#' @describeIn count_values Method for `factor` class. This makes an automatic
70		#' conversion to `character` and then forwards to the method for characters.
71		#'
72		#' @method s_count_values factor
73		#'
74		#' @examples
75		#' # `s_count_values.factor`
76		#' s_count_values(x = factor(c("a", "b", "a")), values = "a")
77		#'
78		#' @export
79		s_count_values.factor <- function(x,
80		values = "Y",
81		...) {
82	4x	s_count_values(as.character(x), values = as.character(values), ...)
83		}
84
85		#' @describeIn count_values Method for `logical` class.
86		#'
87		#' @method s_count_values logical
88		#'
89		#' @examples
90		#' # `s_count_values.logical`
91		#' s_count_values(x = c(TRUE, FALSE, TRUE))
92		#'
93		#' @export
94		s_count_values.logical <- function(x, values = TRUE, ...) {
95	3x	checkmate::assert_logical(values)
96	3x	s_count_values(as.character(x), values = as.character(values), ...)
97		}
98
99		#' @describeIn count_values Formatted analysis function which is used as `afun`
100		#' in `count_values()`.
101		#'
102		#' @return
103		#' * `a_count_values()` returns the corresponding list with formatted [rtables::CellValue()].
104		#'
105		#' @examples
106		#' # `a_count_values`
107		#' a_count_values(x = factor(c("a", "b", "a")), values = "a", .N_col = 10, .N_row = 10)
108		#'
109		#' @export
110		a_count_values <- function(x,
111		...,
112		.stats = NULL,
113		.stat_names = NULL,
114		.formats = NULL,
115		.labels = NULL,
116		.indent_mods = NULL) {
117		# Check for additional parameters to the statistics function
118	17x	dots_extra_args <- list(...)
119	17x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
120	17x	dots_extra_args$.additional_fun_parameters <- NULL
121
122		# Check for user-defined functions
123	17x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
124	17x	.stats <- default_and_custom_stats_list$all_stats
125	17x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
126
127		# Main statistic calculations
128	17x	x_stats <- .apply_stat_functions(
129	17x	default_stat_fnc = s_count_values,
130	17x	custom_stat_fnc_list = custom_stat_functions,
131	17x	args_list = c(
132	17x	x = list(x),
133	17x	extra_afun_params,
134	17x	dots_extra_args
135		)
136		)
137
138		# Fill in formatting defaults
139	17x	.stats <- get_stats("analyze_vars_counts", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
140	17x	.formats <- get_formats_from_stats(.stats, .formats)
141	17x	.labels <- get_labels_from_stats(.stats, .labels)
142	17x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods)
143
144	17x	x_stats <- x_stats[.stats]
145
146		# Auto format handling
147	17x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
148
149		# Get and check statistical names
150	17x	.stat_names <- get_stat_names(x_stats, .stat_names)
151
152	17x	in_rows(
153	17x	.list = x_stats,
154	17x	.formats = .formats,
155	17x	.names = names(.labels),
156	17x	.stat_names = .stat_names,
157	17x	.labels = .labels %>% .unlist_keep_nulls(),
158	17x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
159		)
160		}
161
162		#' @describeIn count_values Layout-creating function which can take statistics function arguments
163		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
164		#'
165		#' @return
166		#' * `count_values()` returns a layout object suitable for passing to further layouting functions,
167		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
168		#' the statistics from `s_count_values()` to the table layout.
169		#'
170		#' @examples
171		#' # `count_values`
172		#' basic_table() %>%
173		#' count_values("Species", values = "setosa") %>%
174		#' build_table(iris)
175		#'
176		#' @export
177		#' @order 2
178		count_values <- function(lyt,
179		vars,
180		values,
181		na_str = default_na_str(),
182		na_rm = TRUE,
183		nested = TRUE,
184		...,
185		table_names = vars,
186		.stats = "count_fraction",
187		.stat_names = NULL,
188		.formats = c(count_fraction = "xx (xx.xx%)", count = "xx"),
189		.labels = c(count_fraction = paste(values, collapse = ", ")),
190		.indent_mods = NULL) {
191		# Process standard extra arguments
192	8x	extra_args <- list(".stats" = .stats)
193	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
194	8x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
195	8x	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
196	!	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
197
198		# Process additional arguments to the statistic function
199	8x	extra_args <- c(
200	8x	extra_args,
201	8x	na_rm = na_rm, values = list(values),
202		...
203		)
204
205		# Adding additional info from layout to analysis function
206	8x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
207	8x	formals(a_count_values) <- c(formals(a_count_values), extra_args[[".additional_fun_parameters"]])
208
209	8x	analyze(
210	8x	lyt,
211	8x	vars,
212	8x	afun = a_count_values,
213	8x	na_str = na_str,
214	8x	nested = nested,
215	8x	extra_args = extra_args,
216	8x	show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
217	8x	table_names = table_names
218		)
219		}

1		#' Missing data
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Substitute missing data with a string or factor level.
6		#'
7		#' @param x (`factor` or `character`)\cr values for which any missing values should be substituted.
8		#' @param label (`string`)\cr string that missing data should be replaced with.
9		#' @param drop_na (`flag`)\cr if `TRUE` and `x` is a factor, any levels
10		#' that are only `label` will be dropped.
11		#'
12		#' @return `x` with any `NA` values substituted by `label`.
13		#'
14		#' @examples
15		#' explicit_na(c(NA, "a", "b"))
16		#' is.na(explicit_na(c(NA, "a", "b")))
17		#'
18		#' explicit_na(factor(c(NA, "a", "b")))
19		#' is.na(explicit_na(factor(c(NA, "a", "b"))))
20		#'
21		#' explicit_na(sas_na(c("a", "")))
22		#'
23		#' explicit_na(factor(levels = c(NA, "a")))
24		#' explicit_na(factor(levels = c(NA, "a")), drop_na = TRUE) # previous default
25		#'
26		#' @export
27		explicit_na <- function(x, label = default_na_str(), drop_na = default_drop_na()) {
28	257x	checkmate::assert_string(label, na.ok = TRUE)
29	257x	checkmate::assert_flag(drop_na)
30
31	257x	if (is.factor(x)) {
32	151x	x <- forcats::fct_na_value_to_level(x, label)
33	151x	if (drop_na) {
34	151x	x <- forcats::fct_drop(x, only = label)
35		}
36	106x	} else if (is.character(x)) {
37	106x	x[is.na(x)] <- label
38		} else {
39	!	stop("only factors and character vectors allowed")
40		}
41
42	257x	x
43		}
44		#' @describeIn explicit_na should `NA` values without a dedicated level be dropped?
45		#'
46		#' @return
47		#' * `tern_default_drop_na`: (`flag`)\cr default value for `drop_na` argument in `explicit_na()`.
48		#'
49		#' @export
50		default_drop_na <- function() {
51	257x	getOption("tern_default_drop_na", default = TRUE)
52		}
53
54		#' @describeIn explicit_na Setter for default `NA` value replacement string. Sets the
55		#' option `"tern_default_drop_na"` within the R environment.
56		#'
57		#' @return
58		#' * `tern_default_drop_na` has no return value.
59		#'
60		#' @export
61		set_default_drop_na <- function(drop_na) {
62	!	checkmate::assert_flag(drop_na, null.ok = TRUE)
63	!	options("tern_default_drop_na" = drop_na)
64		}
65
66		#' Convert strings to `NA`
67		#'
68		#' @description `r lifecycle::badge("stable")`
69		#'
70		#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to
71		#' convert these values to `NA`s.
72		#'
73		#' @inheritParams explicit_na
74		#' @param empty (`flag`)\cr if `TRUE`, empty strings get replaced by `NA`.
75		#' @param whitespaces (`flag`)\cr if `TRUE`, strings made from only whitespaces get replaced with `NA`.
76		#'
77		#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of
78		#' `empty` and `whitespaces`.
79		#'
80		#' @examples
81		#' sas_na(c("1", "", " ", " ", "b"))
82		#' sas_na(factor(c("", " ", "b")))
83		#'
84		#' is.na(sas_na(c("1", "", " ", " ", "b")))
85		#'
86		#' @export
87		sas_na <- function(x, empty = TRUE, whitespaces = TRUE) {
88	246x	checkmate::assert_flag(empty)
89	246x	checkmate::assert_flag(whitespaces)
90
91	246x	if (is.factor(x)) {
92	135x	empty_levels <- levels(x) == ""
93	11x	if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA
94
95	135x	ws_levels <- grepl("^\\s+$", levels(x))
96	!	if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA
97
98	135x	x
99	111x	} else if (is.character(x)) {
100	111x	if (empty) x[x == ""] <- NA_character_
101
102	111x	if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_
103
104	111x	x
105		} else {
106	!	stop("only factors and character vectors allowed")
107		}
108		}

1		#' Additional assertions to use with `checkmate`
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Additional assertion functions which can be used together with the `checkmate` package.
6		#'
7		#' @inheritParams checkmate::assert_factor
8		#' @param x (`any`)\cr object to test.
9		#' @param df (`data.frame`)\cr data set to test.
10		#' @param variables (named `list` of `character`)\cr list of variables to test.
11		#' @param include_boundaries (`flag`)\cr whether to include boundaries when testing
12		#' for proportions.
13		#' @param na_level (`string`)\cr the string you have been using to represent NA or
14		#' missing data. For `NA` values please consider using directly [is.na()] or
15		#' similar approaches.
16		#'
17		#' @return Nothing if assertion passes, otherwise prints the error message.
18		#'
19		#' @name assertions
20		NULL
21
22		check_list_of_variables <- function(x) {
23		# drop NULL elements in list
24	3019x	x <- Filter(Negate(is.null), x)
25
26	3019x	res <- checkmate::check_list(x,
27	3019x	names = "named",
28	3019x	min.len = 1,
29	3019x	any.missing = FALSE,
30	3019x	types = "character"
31		)
32		# no empty strings allowed
33	3019x	if (isTRUE(res)) {
34	3014x	res <- checkmate::check_character(unlist(x), min.chars = 1)
35		}
36	3019x	res
37		}
38		#' @describeIn assertions Checks whether `x` is a valid list of variable names.
39		#' `NULL` elements of the list `x` are dropped with `Filter(Negate(is.null), x)`.
40		#'
41		#' @keywords internal
42		assert_list_of_variables <- checkmate::makeAssertionFunction(check_list_of_variables)
43
44		check_df_with_variables <- function(df, variables, na_level = NULL) {
45	2702x	checkmate::assert_data_frame(df)
46	2700x	assert_list_of_variables(variables)
47
48		# flag for equal variables and column names
49	2698x	err_flag <- all(unlist(variables) %in% colnames(df))
50	2698x	checkmate::assert_flag(err_flag)
51
52	2698x	if (isFALSE(err_flag)) {
53	5x	vars <- setdiff(unlist(variables), colnames(df))
54	5x	return(paste(
55	5x	deparse(substitute(df)),
56	5x	"does not contain all specified variables as column names. Missing from data frame:",
57	5x	paste(vars, collapse = ", ")
58		))
59		}
60		# checking if na_level is present and in which column
61	2693x	if (!is.null(na_level)) {
62	9x	checkmate::assert_string(na_level)
63	9x	res <- unlist(lapply(as.list(df)[unlist(variables)], function(x) any(x == na_level)))
64	9x	if (any(res)) {
65	1x	return(paste0(
66	1x	deparse(substitute(df)), " contains explicit na_level (", na_level,
67	1x	") in the following columns: ", paste0(unlist(variables)[res],
68	1x	collapse = ", "
69		)
70		))
71		}
72		}
73	2692x	return(TRUE)
74		}
75		#' @describeIn assertions Check whether `df` is a data frame with the analysis `variables`.
76		#' Please notice how this produces an error when not all variables are present in the
77		#' data.frame while the opposite is not required.
78		#'
79		#' @examples
80		#' x <- data.frame(
81		#' a = 1:10,
82		#' b = rnorm(10)
83		#' )
84		#' assert_df_with_variables(x, variables = list(a = "a", b = "b"))
85		#'
86		#' x <- ex_adsl
87		#' assert_df_with_variables(x, list(a = "ARM", b = "USUBJID"))
88		#'
89		#' @export
90		assert_df_with_variables <- checkmate::makeAssertionFunction(check_df_with_variables)
91
92		check_valid_factor <- function(x,
93		min.levels = 1, # nolint
94		max.levels = NULL, # nolint
95		null.ok = TRUE, # nolint
96		any.missing = TRUE, # nolint
97		n.levels = NULL, # nolint
98		len = NULL) {
99		# checks on levels insertion
100	1115x	checkmate::assert_int(min.levels, lower = 1)
101
102		# main factor check
103	1115x	res <- checkmate::check_factor(x,
104	1115x	min.levels = min.levels,
105	1115x	null.ok = null.ok,
106	1115x	max.levels = max.levels,
107	1115x	any.missing = any.missing,
108	1115x	n.levels = n.levels
109		)
110
111		# no empty strings allowed
112	1115x	if (isTRUE(res)) {
113	1101x	res <- checkmate::check_character(levels(x), min.chars = 1)
114		}
115
116	1115x	return(res)
117		}
118		#' @describeIn assertions Check whether `x` is a valid factor (i.e. has levels and no empty
119		#' string levels). Note that `NULL` and `NA` elements are allowed.
120		#'
121		#' @keywords internal
122		assert_valid_factor <- checkmate::makeAssertionFunction(check_valid_factor)
123
124		check_df_with_factors <- function(df,
125		variables,
126		min.levels = 1, # nolint
127		max.levels = NULL, # nolint
128		any.missing = TRUE, # nolint
129		na_level = NULL) {
130	254x	res <- check_df_with_variables(df, variables, na_level)
131		# checking if all the columns specified by variables are valid factors
132	253x	if (isTRUE(res)) {
133		# searching the data.frame with selected columns (variables) as a list
134	251x	res <- lapply(
135	251x	X = as.list(df)[unlist(variables)],
136	251x	FUN = check_valid_factor,
137	251x	min.levels = min.levels,
138	251x	max.levels = max.levels,
139	251x	any.missing = any.missing
140		)
141	251x	res_lo <- unlist(vapply(res, Negate(isTRUE), logical(1)))
142	251x	if (any(res_lo)) {
143	6x	return(paste0(
144	6x	deparse(substitute(df)), " does not contain only factor variables among:",
145	6x	"\n* Column `", paste0(unlist(variables)[res_lo],
146	6x	"` of the data.frame -> ", res[res_lo],
147	6x	collapse = "\n* "
148		)
149		))
150		} else {
151	245x	res <- TRUE
152		}
153		}
154	247x	return(res)
155		}
156
157		#' @describeIn assertions Check whether `df` is a data frame where the analysis `variables`
158		#' are all factors. Note that the creation of `NA` by direct call of `factor()` will
159		#' trim `NA` levels out of the vector list itself.
160		#'
161		#' @examples
162		#' x <- ex_adsl
163		#' assert_df_with_factors(x, list(a = "ARM"))
164		#'
165		#' @export
166		assert_df_with_factors <- checkmate::makeAssertionFunction(check_df_with_factors)
167
168		#' @describeIn assertions Check whether `x` is a proportion: number between 0 and 1.
169		#'
170		#' @examples
171		#' assert_proportion_value(0.95)
172		#' assert_proportion_value(1.0, include_boundaries = TRUE)
173		#'
174		#' @export
175		assert_proportion_value <- function(x, include_boundaries = FALSE) {
176	19396x	checkmate::assert_number(x, lower = 0, upper = 1)
177	19384x	checkmate::assert_flag(include_boundaries)
178	19384x	if (isFALSE(include_boundaries)) {
179	13456x	checkmate::assert_true(x > 0)
180	13454x	checkmate::assert_true(x < 1)
181		}
182		}

1		#' Count occurrences by grade
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze function [count_occurrences_by_grade()] creates a layout element to calculate occurrence counts by grade.
6		#'
7		#' This function analyzes primary analysis variable `var` which indicates toxicity grades. The `id` variable
8		#' is used to indicate unique subject identifiers (defaults to `USUBJID`). The user can also supply a list of
9		#' custom groups of grades to analyze via the `grade_groups` parameter. The `remove_single` argument will
10		#' remove single grades from the analysis so that only grade groups are analyzed.
11		#'
12		#' If there are multiple grades recorded for one patient only the highest grade level is counted.
13		#'
14		#' The summarize function [summarize_occurrences_by_grade()] performs the same function as
15		#' [count_occurrences_by_grade()] except it creates content rows, not data rows, to summarize the current table
16		#' row/column context and operates on the level of the latest row split or the root of the table if no row splits have
17		#' occurred.
18		#'
19		#' @inheritParams count_occurrences
20		#' @inheritParams argument_convention
21		#' @param grade_groups (named `list` of `character`)\cr list containing groupings of grades.
22		#' @param remove_single (`flag`)\cr `TRUE` to not include the elements of one-element grade groups
23		#' in the the output list; in this case only the grade groups names will be included in the output. If
24		#' `only_grade_groups` is set to `TRUE` this argument is ignored.
25		#' @param only_grade_groups (`flag`)\cr whether only the specified grade groups should be
26		#' included, with individual grade rows removed (`TRUE`), or all grades and grade groups
27		#' should be displayed (`FALSE`).
28		#' @param .stats (`character`)\cr statistics to select for the table.
29		#'
30		#' Options are: ``r shQuote(get_stats("count_occurrences_by_grade"), type = "sh")``
31		#'
32		#' @seealso Relevant helper function [h_append_grade_groups()].
33		#'
34		#' @name count_occurrences_by_grade
35		#' @order 1
36		NULL
37
38		#' Helper function for `s_count_occurrences_by_grade()`
39		#'
40		#' @description `r lifecycle::badge("stable")`
41		#'
42		#' Helper function for [s_count_occurrences_by_grade()] to insert grade groupings into list with
43		#' individual grade frequencies. The order of the final result follows the order of `grade_groups`.
44		#' The elements under any-grade group (if any), i.e. the grade group equal to `refs` will be moved to
45		#' the end. Grade groups names must be unique.
46		#'
47		#' @inheritParams count_occurrences_by_grade
48		#' @param refs (named `list` of `numeric`)\cr named list where each name corresponds to a reference grade level
49		#' and each entry represents a count.
50		#'
51		#' @return Formatted list of grade groupings.
52		#'
53		#' @examples
54		#' h_append_grade_groups(
55		#' list(
56		#' "Any Grade" = as.character(1:5),
57		#' "Grade 1-2" = c("1", "2"),
58		#' "Grade 3-4" = c("3", "4")
59		#' ),
60		#' list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
61		#' )
62		#'
63		#' h_append_grade_groups(
64		#' list(
65		#' "Any Grade" = as.character(5:1),
66		#' "Grade A" = "5",
67		#' "Grade B" = c("4", "3")
68		#' ),
69		#' list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
70		#' )
71		#'
72		#' h_append_grade_groups(
73		#' list(
74		#' "Any Grade" = as.character(1:5),
75		#' "Grade 1-2" = c("1", "2"),
76		#' "Grade 3-4" = c("3", "4")
77		#' ),
78		#' list("1" = 10, "2" = 5, "3" = 0)
79		#' )
80		#'
81		#' @export
82		h_append_grade_groups <- function(grade_groups, refs, remove_single = TRUE, only_grade_groups = FALSE) {
83	32x	checkmate::assert_list(grade_groups)
84	32x	checkmate::assert_list(refs)
85	32x	refs_orig <- refs
86	32x	elements <- unique(unlist(grade_groups))
87
88		### compute sums in groups
89	32x	grp_sum <- lapply(grade_groups, function(i) do.call(sum, refs[i]))
90	32x	if (!checkmate::test_subset(elements, names(refs))) {
91	2x	padding_el <- setdiff(elements, names(refs))
92	2x	refs[padding_el] <- 0
93		}
94	32x	result <- c(grp_sum, refs)
95
96		### order result while keeping grade_groups's ordering
97	32x	ordr <- grade_groups
98
99		# elements of any-grade group (if any) will be moved to the end
100	32x	is_any <- sapply(grade_groups, setequal, y = names(refs))
101	32x	ordr[is_any] <- list(character(0)) # hide elements under any-grade group
102
103		# groups-elements combined sequence
104	32x	ordr <- c(lapply(names(ordr), function(g) c(g, ordr[[g]])), recursive = TRUE, use.names = FALSE)
105	32x	ordr <- ordr[!duplicated(ordr)]
106
107		# append remaining elements (if any)
108	32x	ordr <- union(ordr, unlist(grade_groups[is_any])) # from any-grade group
109	32x	ordr <- union(ordr, names(refs)) # from refs
110
111		# remove elements of single-element groups, if any
112	32x	if (only_grade_groups) {
113	3x	ordr <- intersect(ordr, names(grade_groups))
114	29x	} else if (remove_single) {
115	29x	is_single <- sapply(grade_groups, length) == 1L
116	29x	ordr <- setdiff(ordr, unlist(grade_groups[is_single]))
117		}
118
119		# apply the order
120	32x	result <- result[ordr]
121
122		# remove groups without any elements in the original refs
123		# note: it's OK if groups have 0 value
124	32x	keep_grp <- vapply(grade_groups, function(x, rf) {
125	64x	any(x %in% rf)
126	32x	}, rf = names(refs_orig), logical(1))
127
128	32x	keep_el <- names(result) %in% names(refs_orig) \| names(result) %in% names(keep_grp)[keep_grp]
129	32x	result <- result[keep_el]
130
131	32x	result
132		}
133
134		#' @describeIn count_occurrences_by_grade Statistics function which counts the
135		#' number of patients by highest grade.
136		#'
137		#' @return
138		#' * `s_count_occurrences_by_grade()` returns a list of counts and fractions with one element per grade level or
139		#' grade level grouping.
140		#'
141		#' @examples
142		#' s_count_occurrences_by_grade(
143		#' df,
144		#' .N_col = 10L,
145		#' .var = "AETOXGR",
146		#' id = "USUBJID",
147		#' grade_groups = list("ANY" = levels(df$AETOXGR))
148		#' )
149		#'
150		#' @export
151		s_count_occurrences_by_grade <- function(df,
152		labelstr = "",
153		.var,
154		.N_row, # nolint
155		.N_col, # nolint
156		...,
157		id = "USUBJID",
158		grade_groups = list(),
159		remove_single = TRUE,
160		only_grade_groups = FALSE,
161		denom = c("N_col", "n", "N_row")) {
162	75x	assert_valid_factor(df[[.var]])
163	75x	assert_df_with_variables(df, list(grade = .var, id = id))
164
165	75x	denom <- match.arg(denom) %>%
166	75x	switch(
167	75x	n = nlevels(factor(df[[id]])),
168	75x	N_row = .N_row,
169	75x	N_col = .N_col
170		)
171
172	75x	if (nrow(df) < 1) {
173	5x	grade_levels <- levels(df[[.var]])
174	5x	l_count <- as.list(rep(0, length(grade_levels)))
175	5x	names(l_count) <- grade_levels
176		} else {
177	70x	if (isTRUE(is.factor(df[[id]]))) {
178	!	assert_valid_factor(df[[id]], any.missing = FALSE)
179		} else {
180	70x	checkmate::assert_character(df[[id]], min.chars = 1, any.missing = FALSE)
181		}
182	70x	checkmate::assert_count(.N_col)
183
184	70x	id <- df[[id]]
185	70x	grade <- df[[.var]]
186
187	70x	if (!is.ordered(grade)) {
188	70x	grade_lbl <- obj_label(grade)
189	70x	lvls <- levels(grade)
190	70x	if (sum(grepl("^\\d+$", lvls)) %in% c(0, length(lvls))) {
191	69x	lvl_ord <- lvls
192		} else {
193	1x	lvls[!grepl("^\\d+$", lvls)] <- min(as.numeric(lvls[grepl("^\\d+$", lvls)])) - 1
194	1x	lvl_ord <- levels(grade)[order(as.numeric(lvls))]
195		}
196	70x	grade <- formatters::with_label(factor(grade, levels = lvl_ord, ordered = TRUE), grade_lbl)
197		}
198
199	70x	missing_lvl <- grepl("missing", tolower(levels(grade)))
200	70x	if (any(missing_lvl)) {
201	1x	grade <- factor(
202	1x	grade,
203	1x	levels = c(levels(grade)[!missing_lvl], levels(grade)[missing_lvl]),
204	1x	ordered = is.ordered(grade)
205		)
206		}
207	70x	df_max <- stats::aggregate(grade ~ id, FUN = max, drop = FALSE)
208	70x	l_count <- as.list(table(df_max$grade))
209		}
210
211	75x	if (length(grade_groups) > 0) {
212	30x	l_count <- h_append_grade_groups(grade_groups, l_count, remove_single, only_grade_groups)
213		}
214
215	75x	l_count_fraction <- lapply(
216	75x	l_count,
217	75x	function(i, denom) {
218	299x	if (i == 0 && denom == 0) {
219	9x	c(0, 0)
220		} else {
221	290x	c(i, i / denom)
222		}
223		},
224	75x	denom = denom
225		)
226
227	75x	list(
228	75x	count_fraction = l_count_fraction,
229	75x	count_fraction_fixed_dp = l_count_fraction
230		)
231		}
232
233		#' @describeIn count_occurrences_by_grade Formatted analysis function which is used as `afun`
234		#' in `count_occurrences_by_grade()`.
235		#'
236		#' @return
237		#' * `a_count_occurrences_by_grade()` returns the corresponding list with formatted [rtables::CellValue()].
238		#'
239		#' @examples
240		#' a_count_occurrences_by_grade(
241		#' df,
242		#' .N_col = 10L,
243		#' .N_row = 10L,
244		#' .var = "AETOXGR",
245		#' id = "USUBJID",
246		#' grade_groups = list("ANY" = levels(df$AETOXGR))
247		#' )
248		#'
249		#' @export
250		a_count_occurrences_by_grade <- function(df,
251		labelstr = "",
252		...,
253		.stats = NULL,
254		.stat_names = NULL,
255		.formats = NULL,
256		.labels = NULL,
257		.indent_mods = NULL) {
258		# Check for additional parameters to the statistics function
259	56x	dots_extra_args <- list(...)
260	56x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
261	56x	dots_extra_args$.additional_fun_parameters <- NULL
262
263		# Check for user-defined functions
264	56x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
265	56x	.stats <- default_and_custom_stats_list$all_stats
266	56x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
267
268		# Apply statistics function
269	56x	x_stats <- .apply_stat_functions(
270	56x	default_stat_fnc = s_count_occurrences_by_grade,
271	56x	custom_stat_fnc_list = custom_stat_functions,
272	56x	args_list = c(
273	56x	df = list(df),
274	56x	labelstr = list(labelstr),
275	56x	extra_afun_params,
276	56x	dots_extra_args
277		)
278		)
279
280		# Fill in formatting defaults
281	56x	.stats <- get_stats("count_occurrences_by_grade", stats_in = .stats, custom_stats_in = names(custom_stat_functions))
282	56x	x_stats <- x_stats[.stats]
283	56x	levels_per_stats <- lapply(x_stats, names)
284	56x	.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
285	56x	.labels <- get_labels_from_stats(.stats, .labels, levels_per_stats)
286	56x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)
287
288	56x	x_stats <- x_stats[.stats] %>%
289	56x	.unlist_keep_nulls() %>%
290	56x	setNames(names(.formats))
291
292		# Auto format handling
293	56x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
294
295		# Get and check statistical names
296	56x	.stat_names <- get_stat_names(x_stats, .stat_names)
297
298	56x	in_rows(
299	56x	.list = x_stats,
300	56x	.formats = .formats,
301	56x	.names = .labels %>% .unlist_keep_nulls(),
302	56x	.stat_names = .stat_names,
303	56x	.labels = .labels %>% .unlist_keep_nulls(),
304	56x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
305		)
306		}
307
308		#' @describeIn count_occurrences_by_grade Layout-creating function which can take statistics function
309		#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
310		#'
311		#' @return
312		#' * `count_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
313		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
314		#' the statistics from `s_count_occurrences_by_grade()` to the table layout.
315		#'
316		#' @examples
317		#' library(dplyr)
318		#'
319		#' df <- data.frame(
320		#' USUBJID = as.character(c(1:6, 1)),
321		#' ARM = factor(c("A", "A", "A", "B", "B", "B", "A"), levels = c("A", "B")),
322		#' AETOXGR = factor(c(1, 2, 3, 4, 1, 2, 3), levels = c(1:5)),
323		#' AESEV = factor(
324		#' x = c("MILD", "MODERATE", "SEVERE", "MILD", "MILD", "MODERATE", "SEVERE"),
325		#' levels = c("MILD", "MODERATE", "SEVERE")
326		#' ),
327		#' stringsAsFactors = FALSE
328		#' )
329		#'
330		#' df_adsl <- df %>%
331		#' select(USUBJID, ARM) %>%
332		#' unique()
333		#'
334		#' # Layout creating function with custom format.
335		#' basic_table() %>%
336		#' split_cols_by("ARM") %>%
337		#' add_colcounts() %>%
338		#' count_occurrences_by_grade(
339		#' var = "AESEV",
340		#' .formats = c("count_fraction" = "xx.xx (xx.xx%)")
341		#' ) %>%
342		#' build_table(df, alt_counts_df = df_adsl)
343		#'
344		#' # Define additional grade groupings.
345		#' grade_groups <- list(
346		#' "-Any-" = c("1", "2", "3", "4", "5"),
347		#' "Grade 1-2" = c("1", "2"),
348		#' "Grade 3-5" = c("3", "4", "5")
349		#' )
350		#'
351		#' basic_table() %>%
352		#' split_cols_by("ARM") %>%
353		#' add_colcounts() %>%
354		#' count_occurrences_by_grade(
355		#' var = "AETOXGR",
356		#' grade_groups = grade_groups,
357		#' only_grade_groups = TRUE
358		#' ) %>%
359		#' build_table(df, alt_counts_df = df_adsl)
360		#'
361		#' @export
362		#' @order 2
363		count_occurrences_by_grade <- function(lyt,
364		var,
365		id = "USUBJID",
366		grade_groups = list(),
367		remove_single = TRUE,
368		only_grade_groups = FALSE,
369		var_labels = var,
370		show_labels = "default",
371		riskdiff = FALSE,
372		na_str = default_na_str(),
373		nested = TRUE,
374		...,
375		table_names = var,
376		.stats = "count_fraction",
377		.stat_names = NULL,
378		.formats = list(count_fraction = format_count_fraction_fixed_dp),
379		.labels = NULL,
380		.indent_mods = NULL) {
381	12x	checkmate::assert_flag(riskdiff)
382	12x	afun <- if (isFALSE(riskdiff)) a_count_occurrences_by_grade else afun_riskdiff
383
384		# Process standard extra arguments
385	12x	extra_args <- list(".stats" = .stats)
386	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
387	12x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
388	!	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
389	1x	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
390
391		# Process additional arguments to the statistic function
392	12x	extra_args <- c(
393	12x	extra_args,
394	12x	id = id, grade_groups = list(grade_groups), remove_single = remove_single, only_grade_groups = only_grade_groups,
395	12x	if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade)),
396		...
397		)
398
399		# Append additional info from layout to the analysis function
400	12x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
401	12x	formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])
402
403	12x	analyze(
404	12x	lyt = lyt,
405	12x	vars = var,
406	12x	afun = afun,
407	12x	na_str = na_str,
408	12x	nested = nested,
409	12x	extra_args = extra_args,
410	12x	var_labels = var_labels,
411	12x	show_labels = show_labels,
412	12x	table_names = table_names
413		)
414		}
415
416		#' @describeIn count_occurrences_by_grade Layout-creating function which can take content function arguments
417		#' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
418		#'
419		#' @return
420		#' * `summarize_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
421		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
422		#' containing the statistics from `s_count_occurrences_by_grade()` to the table layout.
423		#'
424		#' @examples
425		#' # Layout creating function with custom format.
426		#' basic_table() %>%
427		#' add_colcounts() %>%
428		#' split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
429		#' summarize_occurrences_by_grade(
430		#' var = "AESEV",
431		#' .formats = c("count_fraction" = "xx.xx (xx.xx%)")
432		#' ) %>%
433		#' build_table(df, alt_counts_df = df_adsl)
434		#'
435		#' basic_table() %>%
436		#' add_colcounts() %>%
437		#' split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
438		#' summarize_occurrences_by_grade(
439		#' var = "AETOXGR",
440		#' grade_groups = grade_groups
441		#' ) %>%
442		#' build_table(df, alt_counts_df = df_adsl)
443		#'
444		#' @export
445		#' @order 3
446		summarize_occurrences_by_grade <- function(lyt,
447		var,
448		id = "USUBJID",
449		grade_groups = list(),
450		remove_single = TRUE,
451		only_grade_groups = FALSE,
452		riskdiff = FALSE,
453		na_str = default_na_str(),
454		...,
455		.stats = "count_fraction",
456		.stat_names = NULL,
457		.formats = list(count_fraction = format_count_fraction_fixed_dp),
458		.labels = NULL,
459		.indent_mods = 0L) {
460	6x	checkmate::assert_flag(riskdiff)
461	6x	afun <- if (isFALSE(riskdiff)) a_count_occurrences_by_grade else afun_riskdiff
462
463		# Process standard extra arguments
464	6x	extra_args <- list(".stats" = .stats)
465	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
466	6x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
467	!	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
468	6x	if (is.null(.indent_mods)) {
469	!	indent_mod <- 0L
470	6x	} else if (length(.indent_mods) == 1) {
471	6x	indent_mod <- .indent_mods
472		} else {
473	!	indent_mod <- 0L
474	!	extra_args[[".indent_mods"]] <- .indent_mods
475		}
476
477		# Process additional arguments to the statistic function
478	6x	extra_args <- c(
479	6x	extra_args,
480	6x	id = id, grade_groups = list(grade_groups), remove_single = remove_single, only_grade_groups = only_grade_groups,
481	6x	if (!isFALSE(riskdiff)) list(afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade)),
482		...
483		)
484
485		# Append additional info from layout to the analysis function
486	6x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
487	6x	formals(afun) <- c(formals(afun), extra_args[[".additional_fun_parameters"]])
488
489	6x	summarize_row_groups(
490	6x	lyt = lyt,
491	6x	var = var,
492	6x	cfun = afun,
493	6x	na_str = na_str,
494	6x	extra_args = extra_args,
495	6x	indent_mod = indent_mod
496		)
497		}

1		#' Tabulate biomarker effects on binary response by subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The [tabulate_rsp_biomarkers()] function creates a layout element to tabulate the estimated biomarker effects on a
6		#' binary response endpoint across subgroups, returning statistics including response rate and odds ratio for each
7		#' population subgroup. The table is created from `df`, a list of data frames returned by [extract_rsp_biomarkers()],
8		#' with the statistics to include specified via the `vars` parameter.
9		#'
10		#' A forest plot can be created from the resulting table using the [g_forest()] function.
11		#'
12		#' @inheritParams argument_convention
13		#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
14		#' [extract_rsp_biomarkers()].
15		#' @param vars (`character`)\cr the names of statistics to be reported among:
16		#' * `n_tot`: Total number of patients per group.
17		#' * `n_rsp`: Total number of responses per group.
18		#' * `prop`: Total response proportion per group.
19		#' * `or`: Odds ratio.
20		#' * `ci`: Confidence interval of odds ratio.
21		#' * `pval`: p-value of the effect.
22		#' Note, the statistics `n_tot`, `or` and `ci` are required.
23		#'
24		#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
25		#'
26		#' @details These functions create a layout starting from a data frame which contains
27		#' the required statistics. The tables are then typically used as input for forest plots.
28		#'
29		#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
30		#' not start from an input layout `lyt`. This is because internally the table is
31		#' created by combining multiple subtables.
32		#'
33		#' @seealso [extract_rsp_biomarkers()]
34		#'
35		#' @examples
36		#' library(dplyr)
37		#' library(forcats)
38		#'
39		#' adrs <- tern_ex_adrs
40		#' adrs_labels <- formatters::var_labels(adrs)
41		#'
42		#' adrs_f <- adrs %>%
43		#' filter(PARAMCD == "BESRSPI") %>%
44		#' mutate(rsp = AVALC == "CR")
45		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
46		#'
47		#' df <- extract_rsp_biomarkers(
48		#' variables = list(
49		#' rsp = "rsp",
50		#' biomarkers = c("BMRKR1", "AGE"),
51		#' covariates = "SEX",
52		#' subgroups = "BMRKR2"
53		#' ),
54		#' data = adrs_f
55		#' )
56		#'
57		#' \donttest{
58		#' ## Table with default columns.
59		#' tabulate_rsp_biomarkers(df)
60		#'
61		#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
62		#' tab <- tabulate_rsp_biomarkers(
63		#' df = df,
64		#' vars = c("n_rsp", "ci", "n_tot", "prop", "or")
65		#' )
66		#'
67		#' ## Finally produce the forest plot.
68		#' g_forest(tab, xlim = c(0.7, 1.4))
69		#' }
70		#'
71		#' @export
72		#' @name response_biomarkers_subgroups
73		tabulate_rsp_biomarkers <- function(df,
74		vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
75		na_str = default_na_str(),
76		...,
77		.stat_names = NULL,
78		.formats = NULL,
79		.labels = NULL,
80		.indent_mods = NULL) {
81	4x	checkmate::assert_data_frame(df)
82	4x	checkmate::assert_character(df$biomarker)
83	4x	checkmate::assert_character(df$biomarker_label)
84	4x	checkmate::assert_subset(vars, get_stats("tabulate_rsp_biomarkers"))
85
86		# Process standard extra arguments
87	4x	extra_args <- list(".stats" = vars)
88	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
89	!	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
90	!	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
91	!	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
92
93	4x	colvars <- d_rsp_subgroups_colvars(
94	4x	vars,
95	4x	conf_level = df$conf_level[1],
96	4x	method = df$pval_label[1]
97		)
98
99		# Process additional arguments to the statistic function
100	4x	extra_args <- c(extra_args, biomarker = TRUE, ...)
101
102		# Adding additional info from layout to analysis function
103	4x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
104	4x	formals(a_response_subgroups) <- c(formals(a_response_subgroups), extra_args[[".additional_fun_parameters"]])
105
106		# Create "ci" column from "lcl" and "ucl"
107	4x	df$ci <- combine_vectors(df$lcl, df$ucl)
108
109	4x	df_subs <- split(df, f = df$biomarker)
110	4x	tbls <- lapply(
111	4x	df_subs,
112	4x	function(df) {
113	7x	lyt <- basic_table()
114
115		# Split cols by the multiple variables to populate into columns.
116	7x	lyt <- split_cols_by_multivar(
117	7x	lyt = lyt,
118	7x	vars = colvars$vars,
119	7x	varlabels = colvars$labels
120		)
121
122		# Row split by biomarker
123	7x	lyt <- split_rows_by(
124	7x	lyt = lyt,
125	7x	var = "biomarker_label",
126	7x	nested = FALSE
127		)
128
129		# Add "All Patients" row
130	7x	lyt <- split_rows_by(
131	7x	lyt = lyt,
132	7x	var = "row_type",
133	7x	split_fun = keep_split_levels("content"),
134	7x	nested = TRUE,
135	7x	child_labels = "hidden"
136		)
137	7x	lyt <- analyze_colvars(
138	7x	lyt = lyt,
139	7x	afun = a_response_subgroups,
140	7x	na_str = na_str,
141	7x	extra_args = c(extra_args, overall = TRUE)
142		)
143
144		# Add analysis rows
145	7x	if ("analysis" %in% df$row_type) {
146	4x	lyt <- split_rows_by(
147	4x	lyt = lyt,
148	4x	var = "row_type",
149	4x	split_fun = keep_split_levels("analysis"),
150	4x	nested = TRUE,
151	4x	child_labels = "hidden"
152		)
153	4x	lyt <- split_rows_by(
154	4x	lyt = lyt,
155	4x	var = "var_label",
156	4x	nested = TRUE,
157	4x	indent_mod = 1L
158		)
159	4x	lyt <- analyze_colvars(
160	4x	lyt = lyt,
161	4x	afun = a_response_subgroups,
162	4x	na_str = na_str,
163	4x	inclNAs = TRUE,
164	4x	extra_args = extra_args
165		)
166		}
167	7x	build_table(lyt, df = df)
168		}
169		)
170
171	4x	result <- do.call(rbind, tbls)
172
173	4x	n_id <- grep("n_tot", vars)
174	4x	or_id <- match("or", vars)
175	4x	ci_id <- match("ci", vars)
176	4x	structure(
177	4x	result,
178	4x	forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
179	4x	col_x = or_id,
180	4x	col_ci = ci_id,
181	4x	col_symbol_size = n_id
182		)
183		}
184
185		#' Prepare response data estimates for multiple biomarkers in a single data frame
186		#'
187		#' @description `r lifecycle::badge("stable")`
188		#'
189		#' Prepares estimates for number of responses, patients and overall response rate,
190		#' as well as odds ratio estimates, confidence intervals and p-values,
191		#' for multiple biomarkers across population subgroups in a single data frame.
192		#' `variables` corresponds to the names of variables found in `data`, passed as a
193		#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
194		#' biomarker variables) and optionally `covariates`, `subgroups` and `strata`.
195		#' `groups_lists` optionally specifies groupings for `subgroups` variables.
196		#'
197		#' @inheritParams argument_convention
198		#' @inheritParams response_subgroups
199		#' @param control (named `list`)\cr controls for the response definition and the
200		#' confidence level produced by [control_logistic()].
201		#'
202		#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
203		#' `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
204		#' `var_label`, and `row_type`.
205		#'
206		#' @note You can also specify a continuous variable in `rsp` and then use the
207		#' `response_definition` control to convert that internally to a logical
208		#' variable reflecting binary response.
209		#'
210		#' @seealso [h_logistic_mult_cont_df()] which is used internally.
211		#'
212		#' @examples
213		#' library(dplyr)
214		#' library(forcats)
215		#'
216		#' adrs <- tern_ex_adrs
217		#' adrs_labels <- formatters::var_labels(adrs)
218		#'
219		#' adrs_f <- adrs %>%
220		#' filter(PARAMCD == "BESRSPI") %>%
221		#' mutate(rsp = AVALC == "CR")
222		#'
223		#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
224		#' # in logistic regression models with one covariate `RACE`. The subgroups
225		#' # are defined by the levels of `BMRKR2`.
226		#' df <- extract_rsp_biomarkers(
227		#' variables = list(
228		#' rsp = "rsp",
229		#' biomarkers = c("BMRKR1", "AGE"),
230		#' covariates = "SEX",
231		#' subgroups = "BMRKR2"
232		#' ),
233		#' data = adrs_f
234		#' )
235		#' df
236		#'
237		#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
238		#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
239		#' # which is then binarized internally (response is defined as this variable
240		#' # being larger than 750).
241		#' df_grouped <- extract_rsp_biomarkers(
242		#' variables = list(
243		#' rsp = "EOSDY",
244		#' biomarkers = c("BMRKR1", "AGE"),
245		#' covariates = "SEX",
246		#' subgroups = "BMRKR2",
247		#' strata = "STRATA1"
248		#' ),
249		#' data = adrs_f,
250		#' groups_lists = list(
251		#' BMRKR2 = list(
252		#' "low" = "LOW",
253		#' "low/medium" = c("LOW", "MEDIUM"),
254		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
255		#' )
256		#' ),
257		#' control = control_logistic(
258		#' response_definition = "I(response > 750)"
259		#' )
260		#' )
261		#' df_grouped
262		#'
263		#' @export
264		extract_rsp_biomarkers <- function(variables,
265		data,
266		groups_lists = list(),
267		control = control_logistic(),
268		label_all = "All Patients") {
269	5x	if ("strat" %in% names(variables)) {
270	!	warning(
271	!	"Warning: the `strat` element name of the `variables` list argument to `extract_rsp_biomarkers() ",
272	!	"was deprecated in tern 0.9.4.\n ",
273	!	"Please use the name `strata` instead of `strat` in the `variables` argument."
274		)
275	!	variables[["strata"]] <- variables[["strat"]]
276		}
277
278	5x	assert_list_of_variables(variables)
279	5x	checkmate::assert_string(variables$rsp)
280	5x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
281	5x	checkmate::assert_string(label_all)
282
283		# Start with all patients.
284	5x	result_all <- h_logistic_mult_cont_df(
285	5x	variables = variables,
286	5x	data = data,
287	5x	control = control
288		)
289	5x	result_all$subgroup <- label_all
290	5x	result_all$var <- "ALL"
291	5x	result_all$var_label <- label_all
292	5x	result_all$row_type <- "content"
293	5x	if (is.null(variables$subgroups)) {
294		# Only return result for all patients.
295	1x	result_all
296		} else {
297		# Add subgroups results.
298	4x	l_data <- h_split_by_subgroups(
299	4x	data,
300	4x	variables$subgroups,
301	4x	groups_lists = groups_lists
302		)
303	4x	l_result <- lapply(l_data, function(grp) {
304	20x	result <- h_logistic_mult_cont_df(
305	20x	variables = variables,
306	20x	data = grp$df,
307	20x	control = control
308		)
309	20x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
310	20x	cbind(result, result_labels)
311		})
312	4x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
313	4x	result_subgroups$row_type <- "analysis"
314	4x	rbind(
315	4x	result_all,
316	4x	result_subgroups
317		)
318		}
319		}

1		#' Helper functions for tabulating biomarker effects on binary response by subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions which are documented here separately to not confuse the user
6		#' when reading about the user-facing functions.
7		#'
8		#' @inheritParams response_biomarkers_subgroups
9		#' @inheritParams extract_rsp_biomarkers
10		#' @inheritParams argument_convention
11		#'
12		#' @examples
13		#' library(dplyr)
14		#' library(forcats)
15		#'
16		#' adrs <- tern_ex_adrs
17		#' adrs_labels <- formatters::var_labels(adrs)
18		#'
19		#' adrs_f <- adrs %>%
20		#' filter(PARAMCD == "BESRSPI") %>%
21		#' mutate(rsp = AVALC == "CR")
22		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
23		#'
24		#' @name h_response_biomarkers_subgroups
25		NULL
26
27		#' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list
28		#' to the "logistic regression" variable list. The reason is that currently there is an
29		#' inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`.
30		#'
31		#' @param biomarker (`string`)\cr the name of the biomarker variable.
32		#'
33		#' @return
34		#' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`.
35		#'
36		#' @examples
37		#' # This is how the variable list is converted internally.
38		#' h_rsp_to_logistic_variables(
39		#' variables = list(
40		#' rsp = "RSP",
41		#' covariates = c("A", "B"),
42		#' strata = "D"
43		#' ),
44		#' biomarker = "AGE"
45		#' )
46		#'
47		#' @export
48		h_rsp_to_logistic_variables <- function(variables, biomarker) {
49	49x	if ("strat" %in% names(variables)) {
50	!	warning(
51	!	"Warning: the `strat` element name of the `variables` list argument to `h_rsp_to_logistic_variables() ",
52	!	"was deprecated in tern 0.9.4.\n ",
53	!	"Please use the name `strata` instead of `strat` in the `variables` argument."
54		)
55	!	variables[["strata"]] <- variables[["strat"]]
56		}
57	49x	checkmate::assert_list(variables)
58	49x	checkmate::assert_string(variables$rsp)
59	49x	checkmate::assert_string(biomarker)
60	49x	list(
61	49x	response = variables$rsp,
62	49x	arm = biomarker,
63	49x	covariates = variables$covariates,
64	49x	strata = variables$strata
65		)
66		}
67
68		#' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and
69		#' overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple
70		#' biomarkers in a given single data set.
71		#' `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
72		#' `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates`
73		#' and `strata`.
74		#'
75		#' @return
76		#' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
77		#'
78		#' @examples
79		#' # For a single population, estimate separately the effects
80		#' # of two biomarkers.
81		#' df <- h_logistic_mult_cont_df(
82		#' variables = list(
83		#' rsp = "rsp",
84		#' biomarkers = c("BMRKR1", "AGE"),
85		#' covariates = "SEX"
86		#' ),
87		#' data = adrs_f
88		#' )
89		#' df
90		#'
91		#' # If the data set is empty, still the corresponding rows with missings are returned.
92		#' h_coxreg_mult_cont_df(
93		#' variables = list(
94		#' rsp = "rsp",
95		#' biomarkers = c("BMRKR1", "AGE"),
96		#' covariates = "SEX",
97		#' strata = "STRATA1"
98		#' ),
99		#' data = adrs_f[NULL, ]
100		#' )
101		#'
102		#' @export
103		h_logistic_mult_cont_df <- function(variables,
104		data,
105		control = control_logistic()) {
106	28x	if ("strat" %in% names(variables)) {
107	!	warning(
108	!	"Warning: the `strat` element name of the `variables` list argument to `h_logistic_mult_cont_df() ",
109	!	"was deprecated in tern 0.9.4.\n ",
110	!	"Please use the name `strata` instead of `strat` in the `variables` argument."
111		)
112	!	variables[["strata"]] <- variables[["strat"]]
113		}
114	28x	assert_df_with_variables(data, variables)
115
116	28x	checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
117	28x	checkmate::assert_list(control, names = "named")
118
119	28x	conf_level <- control[["conf_level"]]
120	28x	pval_label <- "p-value (Wald)"
121
122		# If there is any data, run model, otherwise return empty results.
123	28x	if (nrow(data) > 0) {
124	27x	bm_cols <- match(variables$biomarkers, names(data))
125	27x	l_result <- lapply(variables$biomarkers, function(bm) {
126	48x	model_fit <- fit_logistic(
127	48x	variables = h_rsp_to_logistic_variables(variables, bm),
128	48x	data = data,
129	48x	response_definition = control$response_definition
130		)
131	48x	result <- h_logistic_simple_terms(
132	48x	x = bm,
133	48x	fit_glm = model_fit,
134	48x	conf_level = control$conf_level
135		)
136	48x	resp_vector <- if (inherits(model_fit, "glm")) {
137	38x	model_fit$model[[variables$rsp]]
138		} else {
139	10x	as.logical(as.matrix(model_fit$y)[, "status"])
140		}
141	48x	data.frame(
142		# Dummy column needed downstream to create a nested header.
143	48x	biomarker = bm,
144	48x	biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
145	48x	n_tot = length(resp_vector),
146	48x	n_rsp = sum(resp_vector),
147	48x	prop = mean(resp_vector),
148	48x	or = as.numeric(result[1L, "odds_ratio"]),
149	48x	lcl = as.numeric(result[1L, "lcl"]),
150	48x	ucl = as.numeric(result[1L, "ucl"]),
151	48x	conf_level = conf_level,
152	48x	pval = as.numeric(result[1L, "pvalue"]),
153	48x	pval_label = pval_label,
154	48x	stringsAsFactors = FALSE
155		)
156		})
157	27x	do.call(rbind, args = c(l_result, make.row.names = FALSE))
158		} else {
159	1x	data.frame(
160	1x	biomarker = variables$biomarkers,
161	1x	biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
162	1x	n_tot = 0L,
163	1x	n_rsp = 0L,
164	1x	prop = NA,
165	1x	or = NA,
166	1x	lcl = NA,
167	1x	ucl = NA,
168	1x	conf_level = conf_level,
169	1x	pval = NA,
170	1x	pval_label = pval_label,
171	1x	row.names = seq_along(variables$biomarkers),
172	1x	stringsAsFactors = FALSE
173		)
174		}
175		}

1		#' Formatting functions
2		#'
3		#' See below for the list of formatting functions created in `tern` to work with `rtables`.
4		#'
5		#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional
6		#' custom formats can be created via the [`formatters::sprintf_format()`] function.
7		#'
8		#' @family formatting functions
9		#' @name formatting_functions
10		NULL
11
12		#' Format fraction and percentage
13		#'
14		#' @description `r lifecycle::badge("stable")`
15		#'
16		#' Formats a fraction together with ratio in percent.
17		#'
18		#' @param x (named `integer`)\cr vector with elements `num` and `denom`.
19		#' @param ... not used. Required for `rtables` interface.
20		#'
21		#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
22		#'
23		#' @examples
24		#' format_fraction(x = c(num = 2L, denom = 3L))
25		#' format_fraction(x = c(num = 0L, denom = 3L))
26		#'
27		#' @family formatting functions
28		#' @export
29		format_fraction <- function(x, ...) {
30	220x	attr(x, "label") <- NULL
31
32	220x	checkmate::assert_vector(x)
33	220x	checkmate::assert_count(x["num"])
34	218x	checkmate::assert_count(x["denom"])
35
36	218x	result <- if (x["num"] == 0) {
37	10x	paste0(x["num"], "/", x["denom"])
38		} else {
39	208x	paste0(
40	208x	x["num"], "/", x["denom"],
41	208x	" (", round(x["num"] / x["denom"] * 100, 1), "%)"
42		)
43		}
44
45	218x	return(result)
46		}
47
48		#' Format fraction and percentage with fixed single decimal place
49		#'
50		#' @description `r lifecycle::badge("stable")`
51		#'
52		#' Formats a fraction together with ratio in percent with fixed single decimal place.
53		#' Includes trailing zero in case of whole number percentages to always keep one decimal place.
54		#'
55		#' @inheritParams format_fraction
56		#'
57		#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
58		#'
59		#' @examples
60		#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L))
61		#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L))
62		#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L))
63		#'
64		#' @family formatting functions
65		#' @export
66		format_fraction_fixed_dp <- function(x, ...) {
67	3x	attr(x, "label") <- NULL
68	3x	checkmate::assert_vector(x)
69	3x	checkmate::assert_count(x["num"])
70	3x	checkmate::assert_count(x["denom"])
71
72	3x	result <- if (x["num"] == 0) {
73	1x	paste0(x["num"], "/", x["denom"])
74		} else {
75	2x	paste0(
76	2x	x["num"], "/", x["denom"],
77	2x	" (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)"
78		)
79		}
80	3x	return(result)
81		}
82
83		#' Format count and fraction
84		#'
85		#' @description `r lifecycle::badge("stable")`
86		#'
87		#' Formats a count together with fraction with special consideration when count is `0`.
88		#'
89		#' @param x (`numeric(2)`)\cr vector of length 2 with count and fraction, respectively.
90		#' @param ... not used. Required for `rtables` interface.
91		#'
92		#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
93		#'
94		#' @examples
95		#' format_count_fraction(x = c(2, 0.6667))
96		#' format_count_fraction(x = c(0, 0))
97		#'
98		#' @family formatting functions
99		#' @export
100		format_count_fraction <- function(x, ...) {
101	102x	attr(x, "label") <- NULL
102
103	102x	if (any(is.na(x))) {
104	1x	return("NA")
105		}
106
107	101x	checkmate::assert_vector(x)
108	101x	checkmate::assert_integerish(x[1])
109	101x	assert_proportion_value(x[2], include_boundaries = TRUE)
110
111	101x	result <- if (x[1] == 0) {
112	13x	"0"
113		} else {
114	88x	paste0(x[1], " (", round(x[2] * 100, 1), "%)")
115		}
116
117	101x	return(result)
118		}
119
120		#' Format count and percentage with fixed single decimal place
121		#'
122		#' @description `r lifecycle::badge("experimental")`
123		#'
124		#' Formats a count together with fraction with special consideration when count is `0`.
125		#'
126		#' @inheritParams format_count_fraction
127		#'
128		#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
129		#'
130		#' @examples
131		#' format_count_fraction_fixed_dp(x = c(2, 0.6667))
132		#' format_count_fraction_fixed_dp(x = c(2, 0.5))
133		#' format_count_fraction_fixed_dp(x = c(0, 0))
134		#'
135		#' @family formatting functions
136		#' @export
137		format_count_fraction_fixed_dp <- function(x, ...) {
138	1408x	attr(x, "label") <- NULL
139
140	1408x	if (any(is.na(x))) {
141	!	return("NA")
142		}
143
144	1408x	checkmate::assert_vector(x)
145	1408x	checkmate::assert_integerish(x[1])
146	1408x	assert_proportion_value(x[2], include_boundaries = TRUE)
147
148	1408x	result <- if (x[1] == 0) {
149	195x	"0"
150	1408x	} else if (.is_equal_float(x[2], 1)) {
151	549x	sprintf("%d (100%%)", x[1])
152		} else {
153	664x	sprintf("%d (%.1f%%)", x[1], x[2] * 100)
154		}
155
156	1408x	return(result)
157		}
158
159		#' Format count and fraction with special case for count < 10
160		#'
161		#' @description `r lifecycle::badge("stable")`
162		#'
163		#' Formats a count together with fraction with special consideration when count is less than 10.
164		#'
165		#' @inheritParams format_count_fraction
166		#'
167		#' @return A string in the format `count (fraction %)`. If `count` is less than 10, only `count` is printed.
168		#'
169		#' @examples
170		#' format_count_fraction_lt10(x = c(275, 0.9673))
171		#' format_count_fraction_lt10(x = c(2, 0.6667))
172		#' format_count_fraction_lt10(x = c(9, 1))
173		#'
174		#' @family formatting functions
175		#' @export
176		format_count_fraction_lt10 <- function(x, ...) {
177	7x	attr(x, "label") <- NULL
178
179	7x	if (any(is.na(x))) {
180	1x	return("NA")
181		}
182
183	6x	checkmate::assert_vector(x)
184	6x	checkmate::assert_integerish(x[1])
185	6x	assert_proportion_value(x[2], include_boundaries = TRUE)
186
187	6x	result <- if (x[1] < 10) {
188	3x	paste0(x[1])
189		} else {
190	3x	paste0(x[1], " (", round(x[2] * 100, 1), "%)")
191		}
192
193	6x	return(result)
194		}
195
196		#' Format XX as a formatting function
197		#'
198		#' Translate a string where x and dots are interpreted as number place
199		#' holders, and others as formatting elements.
200		#'
201		#' @param str (`string`)\cr template.
202		#'
203		#' @return An `rtables` formatting function.
204		#'
205		#' @examples
206		#' test <- list(c(1.658, 0.5761), c(1e1, 785.6))
207		#'
208		#' z <- format_xx("xx (xx.x)")
209		#' sapply(test, z)
210		#'
211		#' z <- format_xx("xx.x - xx.x")
212		#' sapply(test, z)
213		#'
214		#' z <- format_xx("xx.x, incl. xx.x% NE")
215		#' sapply(test, z)
216		#'
217		#' @family formatting functions
218		#' @export
219		format_xx <- function(str) {
220		# Find position in the string.
221	1x	positions <- gregexpr(pattern = "x+\\.x+\|x+", text = str, perl = TRUE)
222	1x	x_positions <- regmatches(x = str, m = positions)[[1]]
223
224		# Roundings depends on the number of x behind [.].
225	1x	roundings <- lapply(
226	1x	X = x_positions,
227	1x	function(x) {
228	2x	y <- strsplit(split = "\\.", x = x)[[1]]
229	2x	rounding <- function(x) {
230	4x	round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0))
231		}
232	2x	return(rounding)
233		}
234		)
235
236	1x	rtable_format <- function(x, output) {
237	2x	values <- Map(y = x, fun = roundings, function(y, fun) fun(y))
238	2x	regmatches(x = str, m = positions)[[1]] <- values
239	2x	return(str)
240		}
241
242	1x	return(rtable_format)
243		}
244
245		#' Format numeric values by significant figures
246		#'
247		#' Format numeric values to print with a specified number of significant figures.
248		#'
249		#' @param sigfig (`integer(1)`)\cr number of significant figures to display.
250		#' @param format (`string`)\cr the format label (string) to apply when printing the value. Decimal
251		#' places in string are ignored in favor of formatting by significant figures. Formats options are:
252		#' `"xx"`, `"xx / xx"`, `"(xx, xx)"`, `"xx - xx"`, and `"xx (xx)"`.
253		#' @param num_fmt (`string`)\cr numeric format modifiers to apply to the value. Defaults to `"fg"` for
254		#' standard significant figures formatting - fixed (non-scientific notation) format (`"f"`)
255		#' and `sigfig` equal to number of significant figures instead of decimal places (`"g"`). See the
256		#' [formatC()] `format` argument for more options.
257		#'
258		#' @return An `rtables` formatting function.
259		#'
260		#' @examples
261		#' fmt_3sf <- format_sigfig(3)
262		#' fmt_3sf(1.658)
263		#' fmt_3sf(1e1)
264		#'
265		#' fmt_5sf <- format_sigfig(5)
266		#' fmt_5sf(0.57)
267		#' fmt_5sf(0.000025645)
268		#'
269		#' @family formatting functions
270		#' @export
271		format_sigfig <- function(sigfig, format = "xx", num_fmt = "fg") {
272	3x	checkmate::assert_integerish(sigfig)
273	3x	format <- gsub("xx\\.\|xx\\.x+", "xx", format)
274	3x	checkmate::assert_choice(format, c("xx", "xx / xx", "(xx, xx)", "xx - xx", "xx (xx)"))
275	3x	function(x, ...) {
276	!	if (!is.numeric(x)) stop("`format_sigfig` cannot be used for non-numeric values. Please choose another format.")
277	12x	num <- formatC(signif(x, digits = sigfig), digits = sigfig, format = num_fmt, flag = "#")
278	12x	num <- gsub("\\.$", "", num) # remove trailing "."
279
280	12x	format_value(num, format)
281		}
282		}
283
284		#' Format fraction with lower threshold
285		#'
286		#' @description `r lifecycle::badge("stable")`
287		#'
288		#' Formats a fraction when the second element of the input `x` is the fraction. It applies
289		#' a lower threshold, below which it is just stated that the fraction is smaller than that.
290		#'
291		#' @param threshold (`proportion`)\cr lower threshold.
292		#'
293		#' @return An `rtables` formatting function that takes numeric input `x` where the second
294		#' element is the fraction that is formatted. If the fraction is above or equal to the threshold,
295		#' then it is displayed in percentage. If it is positive but below the threshold, it returns,
296		#' e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned.
297		#'
298		#' @examples
299		#' format_fun <- format_fraction_threshold(0.05)
300		#' format_fun(x = c(20, 0.1))
301		#' format_fun(x = c(2, 0.01))
302		#' format_fun(x = c(0, 0))
303		#'
304		#' @family formatting functions
305		#' @export
306		format_fraction_threshold <- function(threshold) {
307	1x	assert_proportion_value(threshold)
308	1x	string_below_threshold <- paste0("<", round(threshold * 100))
309	1x	function(x, ...) {
310	3x	assert_proportion_value(x[2], include_boundaries = TRUE)
311	3x	ifelse(
312	3x	x[2] > 0.01,
313	3x	round(x[2] * 100),
314	3x	ifelse(
315	3x	x[2] == 0,
316	3x	"0",
317	3x	string_below_threshold
318		)
319		)
320		}
321		}
322
323		#' Format extreme values
324		#'
325		#' @description `r lifecycle::badge("stable")`
326		#'
327		#' `rtables` formatting functions that handle extreme values.
328		#'
329		#' @param digits (`integer(1)`)\cr number of decimal places to display.
330		#'
331		#' @details For each input, apply a format to the specified number of `digits`. If the value is
332		#' below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is
333		#' above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2.
334		#' If it is zero, then returns "0.00".
335		#'
336		#' @family formatting functions
337		#' @name extreme_format
338		NULL
339
340		#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings
341		#' used in Formatting Functions. Returns a list with elements `threshold` and `format_string`.
342		#'
343		#' @return
344		#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds,
345		#' and `format_string`, with thresholds formatted as strings.
346		#'
347		#' @examples
348		#' h_get_format_threshold(2L)
349		#'
350		#' @export
351		h_get_format_threshold <- function(digits = 2L) {
352	2013x	checkmate::assert_integerish(digits)
353
354	2013x	low_threshold <- 1 / (10 ^ digits) # styler: off
355	2013x	high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off
356
357	2013x	string_below_threshold <- paste0("<", low_threshold)
358	2013x	string_above_threshold <- paste0(">", high_threshold)
359
360	2013x	list(
361	2013x	"threshold" = c(low = low_threshold, high = high_threshold),
362	2013x	"format_string" = c(low = string_below_threshold, high = string_above_threshold)
363		)
364		}
365
366		#' @describeIn extreme_format Internal helper function to apply a threshold format to a value.
367		#' Creates a formatted string to be used in Formatting Functions.
368		#'
369		#' @param x (`numeric(1)`)\cr value to format.
370		#'
371		#' @return
372		#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation
373		#' of the given value to the digit threshold, as a formatted string.
374		#'
375		#' @examples
376		#' h_format_threshold(0.001)
377		#' h_format_threshold(1000)
378		#'
379		#' @export
380		h_format_threshold <- function(x, digits = 2L) {
381	2015x	if (is.na(x)) {
382	4x	return(x)
383		}
384
385	2011x	checkmate::assert_numeric(x, lower = 0)
386
387	2011x	l_fmt <- h_get_format_threshold(digits)
388
389	2011x	result <- if (x < l_fmt$threshold["low"] && 0 < x) {
390	44x	l_fmt$format_string["low"]
391	2011x	} else if (x > l_fmt$threshold["high"]) {
392	99x	l_fmt$format_string["high"]
393		} else {
394	1868x	sprintf(fmt = paste0("%.", digits, "f"), x)
395		}
396
397	2011x	unname(result)
398		}
399
400		#' Format a single extreme value
401		#'
402		#' @description `r lifecycle::badge("stable")`
403		#'
404		#' Create a formatting function for a single extreme value.
405		#'
406		#' @inheritParams extreme_format
407		#'
408		#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value.
409		#'
410		#' @examples
411		#' format_fun <- format_extreme_values(2L)
412		#' format_fun(x = 0.127)
413		#' format_fun(x = Inf)
414		#' format_fun(x = 0)
415		#' format_fun(x = 0.009)
416		#'
417		#' @family formatting functions
418		#' @export
419		format_extreme_values <- function(digits = 2L) {
420	1x	function(x, ...) {
421	5x	checkmate::assert_scalar(x, na.ok = TRUE)
422
423	5x	h_format_threshold(x = x, digits = digits)
424		}
425		}
426
427		#' Format extreme values part of a confidence interval
428		#'
429		#' @description `r lifecycle::badge("stable")`
430		#'
431		#' Formatting Function for extreme values part of a confidence interval. Values
432		#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2.
433		#'
434		#' @inheritParams extreme_format
435		#'
436		#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme
437		#' values confidence interval.
438		#'
439		#' @examples
440		#' format_fun <- format_extreme_values_ci(2L)
441		#' format_fun(x = c(0.127, Inf))
442		#' format_fun(x = c(0, 0.009))
443		#'
444		#' @family formatting functions
445		#' @export
446		format_extreme_values_ci <- function(digits = 2L) {
447	9x	function(x, ...) {
448	54x	checkmate::assert_vector(x, len = 2)
449	54x	l_result <- h_format_threshold(x = x[1], digits = digits)
450	54x	h_result <- h_format_threshold(x = x[2], digits = digits)
451
452	54x	paste0("(", l_result, ", ", h_result, ")")
453		}
454		}
455
456		#' Format automatically using data significant digits
457		#'
458		#' @description `r lifecycle::badge("stable")`
459		#'
460		#' Formatting function for the majority of default methods used in [analyze_vars()].
461		#' For non-derived values, the significant digits of data is used (e.g. range), while derived
462		#' values have one more digits (measure of location and dispersion like mean, standard deviation).
463		#' This function can be called internally with "auto" like, for example,
464		#' `.formats = c("mean" = "auto")`. See details to see how this works with the inner function.
465		#'
466		#' @param dt_var (`numeric`)\cr variable data the statistics were calculated from. Used only to
467		#' find significant digits. In [analyze_vars] this comes from `.df_row` (see
468		#' [rtables::additional_fun_params]), and it is the row data after the above row splits. No
469		#' column split is considered.
470		#' @param x_stat (`string`)\cr string indicating the current statistical method used.
471		#'
472		#' @return A string that `rtables` prints in a table cell.
473		#'
474		#' @details
475		#' The internal function is needed to work with `rtables` default structure for
476		#' format functions, i.e. `function(x, ...)`, where is x are results from statistical evaluation.
477		#' It can be more than one element (e.g. for `.stats = "mean_sd"`).
478		#'
479		#' @examples
480		#' x_todo <- c(0.001, 0.2, 0.0011000, 3, 4)
481		#' res <- c(mean(x_todo[1:3]), sd(x_todo[1:3]))
482		#'
483		#' # x is the result coming into the formatting function -> res!!
484		#' format_auto(dt_var = x_todo, x_stat = "mean_sd")(x = res)
485		#' format_auto(x_todo, "range")(x = range(x_todo))
486		#' no_sc_x <- c(0.0000001, 1)
487		#' format_auto(no_sc_x, "range")(x = no_sc_x)
488		#'
489		#' @family formatting functions
490		#' @export
491		format_auto <- function(dt_var, x_stat) {
492	16x	function(x = "", ...) {
493	56x	checkmate::assert_numeric(x, min.len = 1)
494	56x	checkmate::assert_numeric(dt_var, min.len = 1)
495		# Defaults - they may be a param in the future
496	56x	der_stats <- c(
497	56x	"mean", "sd", "se", "median", "geom_mean", "quantiles", "iqr",
498	56x	"mean_sd", "mean_se", "mean_se", "mean_ci", "mean_sei", "mean_sdi",
499	56x	"median_ci"
500		)
501	56x	nonder_stats <- c("n", "range", "min", "max")
502
503		# Safenet for miss-modifications
504	56x	stopifnot(length(intersect(der_stats, nonder_stats)) == 0) # nolint
505	56x	checkmate::assert_choice(x_stat, c(der_stats, nonder_stats))
506
507		# Finds the max number of digits in data
508	56x	detect_dig <- vapply(dt_var, count_decimalplaces, FUN.VALUE = numeric(1)) %>%
509	56x	max()
510
511	56x	if (x_stat %in% der_stats) {
512	40x	detect_dig <- detect_dig + 1
513		}
514
515		# Render input
516	56x	str_vals <- formatC(x, digits = detect_dig, format = "f")
517	56x	def_fmt <- get_formats_from_stats(x_stat)[[x_stat]]
518	56x	str_fmt <- str_extract(def_fmt, invert = FALSE)[[1]]
519	56x	if (length(str_fmt) != length(str_vals)) {
520	2x	stop(
521	2x	"Number of inserted values as result (", length(str_vals),
522	2x	") is not the same as there should be in the default tern formats for ",
523	2x	x_stat, " (-> ", def_fmt, " needs ", length(str_fmt), " values). ",
524	2x	"See tern_default_formats to check all of them."
525		)
526		}
527
528		# Squashing them together
529	54x	inv_str_fmt <- str_extract(def_fmt, invert = TRUE)[[1]]
530	54x	stopifnot(length(inv_str_fmt) == length(str_vals) + 1) # nolint
531
532	54x	out <- vector("character", length = length(inv_str_fmt) + length(str_vals))
533	54x	is_even <- seq_along(out) %% 2 == 0
534	54x	out[is_even] <- str_vals
535	54x	out[!is_even] <- inv_str_fmt
536
537	54x	return(paste0(out, collapse = ""))
538		}
539		}
540
541		# Utility function that could be useful in general
542		str_extract <- function(string, pattern = "xx\|xx\\.\|xx\\.x+", invert = FALSE) {
543	110x	regmatches(string, gregexpr(pattern, string), invert = invert)
544		}
545
546		# Helper function
547		count_decimalplaces <- function(dec) {
548	2038x	if (is.na(dec)) {
549	6x	return(0)
550	2032x	} else if (abs(dec - round(dec)) > .Machine$double.eps^0.5) { # For precision
551	1939x	nchar(strsplit(format(dec, scientific = FALSE, trim = FALSE), ".", fixed = TRUE)[[1]][[2]])
552		} else {
553	93x	return(0)
554		}
555		}
556
557		#' Apply automatic formatting
558		#'
559		#' Checks if any of the listed formats in `.formats` are `"auto"`, and replaces `"auto"` with
560		#' the correct implementation of `format_auto` for the given statistics, data, and variable.
561		#'
562		#' @inheritParams argument_convention
563		#' @param x_stats (named `list`)\cr a named list of statistics where each element corresponds
564		#' to an element in `.formats`, with matching names.
565		#'
566		#' @keywords internal
567		apply_auto_formatting <- function(.formats, x_stats, .df_row, .var) {
568	1598x	is_auto_fmt <- vapply(.formats, function(ii) is.character(ii) && ii == "auto", logical(1))
569	1598x	if (any(is_auto_fmt)) {
570	8x	auto_stats <- x_stats[is_auto_fmt]
571	8x	var_df <- .df_row[[.var]] # xxx this can be extended for the WHOLE data or single facets
572	8x	.formats[is_auto_fmt] <- lapply(names(auto_stats), format_auto, dt_var = var_df)
573		}
574	1598x	.formats
575		}

1		# summarize_glm_count ----------------------------------------------------------
2		#' Summarize Poisson negative binomial regression
3		#'
4		#' @description `r lifecycle::badge("experimental")`
5		#'
6		#' Summarize results of a Poisson negative binomial regression.
7		#' This can be used to analyze count and/or frequency data using a linear model.
8		#' It is specifically useful for analyzing count data (using the Poisson or Negative
9		#' Binomial distribution) that is result of a generalized linear model of one (e.g. arm) or more
10		#' covariates.
11		#'
12		#' @inheritParams h_glm_count
13		#' @inheritParams argument_convention
14		#' @param rate_mean_method (`character(1)`)\cr method used to estimate the mean odds ratio. Defaults to `emmeans`.
15		#' see details for more information.
16		#' @param scale (`numeric(1)`)\cr linear scaling factor for rate and confidence intervals. Defaults to `1`.
17		#' @param .stats (`character`)\cr statistics to select for the table.
18		#'
19		#' Options are: ``r shQuote(get_stats("summarize_glm_count"), type = "sh")``
20		#'
21		#' @details
22		#' `summarize_glm_count()` uses `s_glm_count()` to calculate the statistics for the table. This
23		#' analysis function uses [h_glm_count()] to estimate the GLM with [stats::glm()] for Poisson and Quasi-Poisson
24		#' distributions or [MASS::glm.nb()] for Negative Binomial distribution. All methods assume a
25		#' logarithmic link function.
26		#'
27		#' At this point, rates and confidence intervals are estimated from the model using
28		#' either [emmeans::emmeans()] when `rate_mean_method = "emmeans"` or [h_ppmeans()]
29		#' when `rate_mean_method = "ppmeans"`.
30		#'
31		#' If a reference group is specified while building the table with `split_cols_by(ref_group)`,
32		#' no rate ratio or `p-value` are calculated. Otherwise, we use [emmeans::contrast()] to
33		#' calculate the rate ratio and `p-value` for the reference group. Values are always estimated
34		#' with `method = "trt.vs.ctrl"` and `ref` equal to the first `arm` value.
35		#'
36		#' @name summarize_glm_count
37		NULL
38
39		#' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments
40		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
41		#'
42		#' @return
43		#' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions,
44		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
45		#' the statistics from `s_glm_count()` to the table layout.
46		#'
47		#' @examples
48		#' library(dplyr)
49		#'
50		#' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE")
51		#' anl$AVAL_f <- as.factor(anl$AVAL)
52		#'
53		#' lyt <- basic_table() %>%
54		#' split_cols_by("ARM", ref_group = "B: Placebo") %>%
55		#' add_colcounts() %>%
56		#' analyze_vars(
57		#' "AVAL_f",
58		#' var_labels = "Number of exacerbations per patient",
59		#' .stats = c("count_fraction"),
60		#' .formats = c("count_fraction" = "xx (xx.xx%)"),
61		#' .labels = c("Number of exacerbations per patient")
62		#' ) %>%
63		#' summarize_glm_count(
64		#' vars = "AVAL",
65		#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL),
66		#' conf_level = 0.95,
67		#' distribution = "poisson",
68		#' rate_mean_method = "emmeans",
69		#' var_labels = "Adjusted (P) exacerbation rate (per year)",
70		#' table_names = "adjP",
71		#' .stats = c("rate"),
72		#' .labels = c(rate = "Rate")
73		#' ) %>%
74		#' summarize_glm_count(
75		#' vars = "AVAL",
76		#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
77		#' conf_level = 0.95,
78		#' distribution = "quasipoisson",
79		#' rate_mean_method = "ppmeans",
80		#' var_labels = "Adjusted (QP) exacerbation rate (per year)",
81		#' table_names = "adjQP",
82		#' .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
83		#' .labels = c(
84		#' rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
85		#' rate_ratio_ci = "Rate Ratio CI", pval = "p value"
86		#' )
87		#' ) %>%
88		#' summarize_glm_count(
89		#' vars = "AVAL",
90		#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
91		#' conf_level = 0.95,
92		#' distribution = "negbin",
93		#' rate_mean_method = "emmeans",
94		#' var_labels = "Adjusted (NB) exacerbation rate (per year)",
95		#' table_names = "adjNB",
96		#' .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
97		#' .labels = c(
98		#' rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
99		#' rate_ratio_ci = "Rate Ratio CI", pval = "p value"
100		#' )
101		#' )
102		#'
103		#' build_table(lyt = lyt, df = anl)
104		#'
105		#' @export
106		summarize_glm_count <- function(lyt,
107		vars,
108		variables,
109		distribution,
110		conf_level,
111		rate_mean_method = c("emmeans", "ppmeans")[1],
112		weights = stats::weights,
113		scale = 1,
114		var_labels,
115		na_str = default_na_str(),
116		nested = TRUE,
117		...,
118		show_labels = "visible",
119		table_names = vars,
120		.stats = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
121		.stat_names = NULL,
122		.formats = NULL,
123		.labels = NULL,
124		.indent_mods = list("rate_ci" = 1L, "rate_ratio_ci" = 1L, "pval" = 1L)) {
125	3x	checkmate::assert_choice(rate_mean_method, c("emmeans", "ppmeans"))
126
127		# Process standard extra arguments
128	3x	extra_args <- list(".stats" = .stats)
129	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
130	!	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
131	3x	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
132	3x	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
133
134		# Process additional arguments to the statistic function
135	3x	extra_args <- c(
136	3x	extra_args,
137	3x	variables = list(variables), distribution = list(distribution), conf_level = list(conf_level),
138	3x	rate_mean_method = list(rate_mean_method), weights = list(weights), scale = list(scale),
139		...
140		)
141
142		# Append additional info from layout to the analysis function
143	3x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
144	3x	formals(a_glm_count) <- c(formals(a_glm_count), extra_args[[".additional_fun_parameters"]])
145
146	3x	analyze(
147	3x	lyt = lyt,
148	3x	vars = vars,
149	3x	afun = a_glm_count,
150	3x	na_str = na_str,
151	3x	nested = nested,
152	3x	extra_args = extra_args,
153	3x	var_labels = var_labels,
154	3x	show_labels = show_labels,
155	3x	table_names = table_names
156		)
157		}
158
159		#' @describeIn summarize_glm_count Statistics function that produces a named list of results
160		#' of the investigated Poisson model.
161		#'
162		#' @return
163		#' * `s_glm_count()` returns a named `list` of 5 statistics:
164		#' * `n`: Count of complete sample size for the group.
165		#' * `rate`: Estimated event rate per follow-up time.
166		#' * `rate_ci`: Confidence level for estimated rate per follow-up time.
167		#' * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm.
168		#' * `rate_ratio_ci`: Confidence level for the rate ratio.
169		#' * `pval`: p-value.
170		#'
171		#' @keywords internal
172		s_glm_count <- function(df,
173		.var,
174		.df_row,
175		.ref_group,
176		.in_ref_col,
177		variables,
178		distribution,
179		conf_level,
180		rate_mean_method,
181		weights,
182		scale = 1,
183		...) {
184	14x	arm <- variables$arm
185
186	14x	y <- df[[.var]]
187	13x	smry_level <- as.character(unique(df[[arm]]))
188
189		# ensure there is only 1 value
190	13x	checkmate::assert_scalar(smry_level)
191
192	13x	results <- h_glm_count(
193	13x	.var = .var,
194	13x	.df_row = .df_row,
195	13x	variables = variables,
196	13x	distribution = distribution,
197	13x	weights
198		)
199
200	13x	if (rate_mean_method == "emmeans") {
201	13x	emmeans_smry <- summary(results$emmeans_fit, level = conf_level)
202	!	} else if (rate_mean_method == "ppmeans") {
203	!	emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level)
204		}
205
206	13x	emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ]
207
208		# This happens if there is a reference col. No Ratio is calculated?
209	13x	if (.in_ref_col) {
210	5x	list(
211	5x	n = length(y[!is.na(y)]),
212	5x	rate = formatters::with_label(
213	5x	ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate * scale),
214	5x	"Adjusted Rate"
215		),
216	5x	rate_ci = formatters::with_label(
217	5x	c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
218	5x	f_conf_level(conf_level)
219		),
220	5x	rate_ratio = formatters::with_label(numeric(), "Adjusted Rate Ratio"),
221	5x	rate_ratio_ci = formatters::with_label(numeric(), f_conf_level(conf_level)),
222	5x	pval = formatters::with_label(numeric(), "p-value")
223		)
224		} else {
225	8x	emmeans_contrasts <- emmeans::contrast(
226	8x	results$emmeans_fit,
227	8x	method = "trt.vs.ctrl",
228	8x	ref = grep(
229	8x	as.character(unique(.ref_group[[arm]])),
230	8x	as.data.frame(results$emmeans_fit)[[arm]]
231		)
232		)
233
234	8x	contrasts_smry <- summary(
235	8x	emmeans_contrasts,
236	8x	infer = TRUE,
237	8x	adjust = "none"
238		)
239
240	8x	smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ]
241
242	8x	list(
243	8x	n = length(y[!is.na(y)]),
244	8x	rate = formatters::with_label(
245	8x	ifelse(distribution == "negbin",
246	8x	emmeans_smry_level$response * scale,
247	8x	emmeans_smry_level$rate * scale
248		),
249	8x	"Adjusted Rate"
250		),
251	8x	rate_ci = formatters::with_label(
252	8x	c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
253	8x	f_conf_level(conf_level)
254		),
255	8x	rate_ratio = formatters::with_label(
256	8x	smry_contrasts_level$ratio,
257	8x	"Adjusted Rate Ratio"
258		),
259	8x	rate_ratio_ci = formatters::with_label(
260	8x	c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL),
261	8x	f_conf_level(conf_level)
262		),
263	8x	pval = formatters::with_label(
264	8x	smry_contrasts_level$p.value,
265	8x	"p-value"
266		)
267		)
268		}
269		}
270
271		#' @describeIn summarize_glm_count Formatted analysis function which is used as `afun` in `summarize_glm_count()`.
272		#'
273		#' @return
274		#' * `a_glm_count()` returns the corresponding list with formatted [rtables::CellValue()].
275		#'
276		#' @keywords internal
277		a_glm_count <- function(df,
278		...,
279		.stats = NULL,
280		.stat_names = NULL,
281		.formats = NULL,
282		.labels = NULL,
283		.indent_mods = NULL) {
284		# Check for additional parameters to the statistics function
285	9x	dots_extra_args <- list(...)
286	9x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
287	9x	dots_extra_args$.additional_fun_parameters <- NULL
288
289		# Check for user-defined functions
290	9x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
291	9x	.stats <- default_and_custom_stats_list$all_stats
292	9x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
293
294		# Apply statistics function
295	9x	x_stats <- .apply_stat_functions(
296	9x	default_stat_fnc = s_glm_count,
297	9x	custom_stat_fnc_list = custom_stat_functions,
298	9x	args_list = c(
299	9x	df = list(df),
300	9x	extra_afun_params,
301	9x	dots_extra_args
302		)
303		)
304
305		# Fill in formatting defaults
306	9x	.stats <- get_stats("summarize_glm_count",
307	9x	stats_in = .stats,
308	9x	custom_stats_in = names(custom_stat_functions)
309		)
310	9x	.formats <- get_formats_from_stats(.stats, .formats)
311	9x	.labels <- get_labels_from_stats(.stats, .labels)
312	9x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods)
313
314	9x	x_stats <- x_stats[.stats]
315
316		# Auto format handling
317	9x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
318
319		# Get and check statistical names
320	9x	.stat_names <- get_stat_names(x_stats, .stat_names)
321
322	9x	in_rows(
323	9x	.list = x_stats,
324	9x	.formats = .formats,
325	9x	.names = .labels %>% .unlist_keep_nulls(),
326	9x	.stat_names = .stat_names,
327	9x	.labels = .labels %>% .unlist_keep_nulls(),
328	9x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
329		)
330		}
331
332		# h_glm_count ------------------------------------------------------------------
333
334		#' Helper functions for Poisson models
335		#'
336		#' @description `r lifecycle::badge("experimental")`
337		#'
338		#' Helper functions that returns the results of [stats::glm()] when Poisson or Quasi-Poisson
339		#' distributions are needed (see `family` parameter), or [MASS::glm.nb()] for Negative Binomial
340		#' distributions. Link function for the GLM is `log`.
341		#'
342		#' @inheritParams argument_convention
343		#'
344		#' @seealso [summarize_glm_count]
345		#'
346		#' @name h_glm_count
347		NULL
348
349		#' @describeIn h_glm_count Helper function to return the results of the
350		#' selected model (Poisson, Quasi-Poisson, negative binomial).
351		#'
352		#' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called
353		#' in `.var` and `variables`.
354		#' @param variables (named `list` of `string`)\cr list of additional analysis variables, with
355		#' expected elements:
356		#' * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
357		#' groups will be summarized. Specifically, the first level of `arm` variable is taken as the
358		#' reference group.
359		#' * `covariates` (`character`)\cr a vector that can contain single variable names (such as
360		#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
361		#' * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
362		#' @param distribution (`character`)\cr a character value specifying the distribution
363		#' used in the regression (Poisson, Quasi-Poisson, negative binomial).
364		#' @param weights (`character`)\cr a character vector specifying weights used
365		#' in averaging predictions. Number of weights must equal the number of levels included in the covariates.
366		#' Weights option passed to [emmeans::emmeans()].
367		#'
368		#' @return
369		#' * `h_glm_count()` returns the results of the selected model.
370		#'
371		#' @keywords internal
372		h_glm_count <- function(.var,
373		.df_row,
374		variables,
375		distribution,
376		weights) {
377	21x	checkmate::assert_subset(distribution, c("poisson", "quasipoisson", "negbin"), empty.ok = FALSE)
378	19x	switch(distribution,
379	13x	poisson = h_glm_poisson(.var, .df_row, variables, weights),
380	1x	quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights),
381	5x	negbin = h_glm_negbin(.var, .df_row, variables, weights)
382		)
383		}
384
385		#' @describeIn h_glm_count Helper function to return results of a Poisson model.
386		#'
387		#' @return
388		#' * `h_glm_poisson()` returns the results of a Poisson model.
389		#'
390		#' @keywords internal
391		h_glm_poisson <- function(.var,
392		.df_row,
393		variables,
394		weights) {
395	17x	arm <- variables$arm
396	17x	covariates <- variables$covariates
397
398	17x	formula <- stats::as.formula(paste0(
399	17x	.var, " ~ ",
400		" + ",
401	17x	paste(covariates, collapse = " + "),
402		" + ",
403	17x	arm
404		))
405
406	17x	if (is.null(variables$offset)) {
407	1x	glm_fit <- stats::glm(
408	1x	formula = formula,
409	1x	data = .df_row,
410	1x	family = stats::poisson(link = "log")
411		)
412		} else {
413	16x	offset <- .df_row[[variables$offset]]
414	14x	glm_fit <- stats::glm(
415	14x	formula = formula,
416	14x	offset = offset,
417	14x	data = .df_row,
418	14x	family = stats::poisson(link = "log")
419		)
420		}
421
422	15x	emmeans_fit <- emmeans::emmeans(
423	15x	glm_fit,
424	15x	specs = arm,
425	15x	data = .df_row,
426	15x	type = "response",
427	15x	offset = 0,
428	15x	weights = weights
429		)
430
431	15x	list(
432	15x	glm_fit = glm_fit,
433	15x	emmeans_fit = emmeans_fit
434		)
435		}
436
437		#' @describeIn h_glm_count Helper function to return results of a Quasi-Poisson model.
438		#'
439		#' @return
440		#' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model.
441		#'
442		#' @keywords internal
443		h_glm_quasipoisson <- function(.var,
444		.df_row,
445		variables,
446		weights) {
447	5x	arm <- variables$arm
448	5x	covariates <- variables$covariates
449
450	5x	formula <- stats::as.formula(paste0(
451	5x	.var, " ~ ",
452		" + ",
453	5x	paste(covariates, collapse = " + "),
454		" + ",
455	5x	arm
456		))
457
458	5x	if (is.null(variables$offset)) {
459	!	glm_fit <- stats::glm(
460	!	formula = formula,
461	!	data = .df_row,
462	!	family = stats::quasipoisson(link = "log")
463		)
464		} else {
465	5x	offset <- .df_row[[variables$offset]]
466	3x	glm_fit <- stats::glm(
467	3x	formula = formula,
468	3x	offset = offset,
469	3x	data = .df_row,
470	3x	family = stats::quasipoisson(link = "log")
471		)
472		}
473	3x	emmeans_fit <- emmeans::emmeans(
474	3x	glm_fit,
475	3x	specs = arm,
476	3x	data = .df_row,
477	3x	type = "response",
478	3x	offset = 0,
479	3x	weights = weights
480		)
481
482	3x	list(
483	3x	glm_fit = glm_fit,
484	3x	emmeans_fit = emmeans_fit
485		)
486		}
487
488		#' @describeIn h_glm_count Helper function to return results of a negative binomial model.
489		#'
490		#' @return
491		#' * `h_glm_negbin()` returns the results of a negative binomial model.
492		#'
493		#' @keywords internal
494		h_glm_negbin <- function(.var,
495		.df_row,
496		variables,
497		weights) {
498	9x	arm <- variables$arm
499	9x	covariates <- variables$covariates
500	9x	formula <- stats::as.formula(paste0(
501	9x	.var, " ~ ",
502		" + ",
503	9x	paste(covariates, collapse = " + "),
504		" + ",
505	9x	arm
506		))
507
508	9x	if (is.null(variables$offset)) {
509	1x	formula <- stats::as.formula(paste0(
510	1x	.var, " ~ ",
511		" + ",
512	1x	paste(covariates, collapse = " + "),
513		" + ",
514	1x	arm
515		))
516		} else {
517	8x	offset <- variables$offset
518	8x	formula_txt <- sprintf(
519	8x	"%s ~ %s + %s + offset(%s)",
520	8x	.var,
521	8x	arm, paste0(covariates, collapse = " + "), offset
522		)
523	8x	formula <- stats::as.formula(
524	8x	formula_txt
525		)
526		}
527
528	9x	glm_fit <- MASS::glm.nb(
529	9x	formula = formula,
530	9x	data = .df_row,
531	9x	link = "log"
532		)
533
534	7x	emmeans_fit <- emmeans::emmeans(
535	7x	glm_fit,
536	7x	specs = arm,
537	7x	data = .df_row,
538	7x	type = "response",
539	7x	offset = 0,
540	7x	weights = weights
541		)
542
543	7x	list(
544	7x	glm_fit = glm_fit,
545	7x	emmeans_fit = emmeans_fit
546		)
547		}
548
549		# h_ppmeans --------------------------------------------------------------------
550		#' Function to return the estimated means using predicted probabilities
551		#'
552		#' @description
553		#' For each arm level, the predicted mean rate is calculated using the fitted model object, with `newdata`
554		#' set to the result of `stats::model.frame`, a reconstructed data or the original data, depending on the
555		#' object formula (coming from the fit). The confidence interval is derived using the `conf_level` parameter.
556		#'
557		#' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm.
558		#' @param .df_row (`data.frame`)\cr dataset that includes all the variables that are called in `.var` and `variables`.
559		#' @param arm (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
560		#' summarized. Specifically, the first level of `arm` variable is taken as the reference group.
561		#' @param conf_level (`proportion`)\cr value used to derive the confidence interval for the rate.
562		#'
563		#' @return
564		#' * `h_ppmeans()` returns the estimated means.
565		#'
566		#' @seealso [summarize_glm_count()].
567		#'
568		#' @export
569		h_ppmeans <- function(obj, .df_row, arm, conf_level) {
570	1x	alpha <- 1 - conf_level
571	1x	p <- 1 - alpha / 2
572
573	1x	arm_levels <- levels(.df_row[[arm]])
574
575	1x	out <- lapply(arm_levels, function(lev) {
576	3x	temp <- .df_row
577	3x	temp[[arm]] <- factor(lev, levels = arm_levels)
578
579	3x	mf <- stats::model.frame(obj$formula, data = temp)
580	3x	X <- stats::model.matrix(obj$formula, data = mf) # nolint
581
582	3x	rate <- stats::predict(obj, newdata = mf, type = "response")
583	3x	rate_hat <- mean(rate)
584
585	3x	zz <- colMeans(rate * X)
586	3x	se <- sqrt(as.numeric(t(zz) %% stats::vcov(obj) %% zz))
587	3x	rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat)
588	3x	rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat)
589
590	3x	c(rate_hat, rate_lwr, rate_upr)
591		})
592
593	1x	names(out) <- arm_levels
594	1x	out <- do.call(rbind, out)
595	1x	if ("negbin" %in% class(obj)) {
596	!	colnames(out) <- c("response", "asymp.LCL", "asymp.UCL")
597		} else {
598	1x	colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL")
599		}
600	1x	out <- as.data.frame(out)
601	1x	out[[arm]] <- rownames(out)
602	1x	out
603		}

1		#' Count number of patients and sum exposure across all patients in columns
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The analyze function [analyze_patients_exposure_in_cols()] creates a layout element to count total numbers of
6		#' patients and sum an analysis value (i.e. exposure) across all patients in columns.
7		#'
8		#' The primary analysis variable `ex_var` is the exposure variable used to calculate the `sum_exposure` statistic. The
9		#' `id` variable is used to uniquely identify patients in the data such that only unique patients are counted in the
10		#' `n_patients` statistic, and the `var` variable is used to create a row split if needed. The percentage returned as
11		#' part of the `n_patients` statistic is the proportion of all records that correspond to a unique patient.
12		#'
13		#' The summarize function [summarize_patients_exposure_in_cols()] performs the same function as
14		#' [analyze_patients_exposure_in_cols()] except it creates content rows, not data rows, to summarize the current table
15		#' row/column context and operates on the level of the latest row split or the root of the table if no row splits have
16		#' occurred.
17		#'
18		#' If a column split has not yet been performed in the table, `col_split` must be set to `TRUE` for the first call of
19		#' [analyze_patients_exposure_in_cols()] or [summarize_patients_exposure_in_cols()].
20		#'
21		#' @inheritParams argument_convention
22		#' @param ex_var (`string`)\cr name of the variable in `df` containing exposure values.
23		#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty, this will be used as label.
24		#' @param .stats (`character`)\cr statistics to select for the table.
25		#'
26		#' Options are: ``r shQuote(get_stats("analyze_patients_exposure_in_cols"), type = "sh")``
27		#'
28		#' @name summarize_patients_exposure_in_cols
29		#' @order 1
30		NULL
31
32		#' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers
33		#' of patients and the sum of exposure across all patients.
34		#'
35		#' @return
36		#' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics:
37		#' * `n_patients`: Number of unique patients in `df`.
38		#' * `sum_exposure`: Sum of `ex_var` across all patients in `df`.
39		#'
40		#' @keywords internal
41		s_count_patients_sum_exposure <- function(df,
42		labelstr = "",
43		.stats = c("n_patients", "sum_exposure"),
44		.N_col, # nolint
45		...,
46		ex_var = "AVAL",
47		id = "USUBJID",
48		custom_label = NULL,
49		var_level = NULL) {
50	56x	assert_df_with_variables(df, list(ex_var = ex_var, id = id))
51	56x	checkmate::assert_string(id)
52	56x	checkmate::assert_string(labelstr)
53	56x	checkmate::assert_string(custom_label, null.ok = TRUE)
54	56x	checkmate::assert_numeric(df[[ex_var]])
55	56x	checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure")))
56
57	56x	row_label <- if (labelstr != "") {
58	!	labelstr
59	56x	} else if (!is.null(var_level)) {
60	42x	var_level
61	56x	} else if (!is.null(custom_label)) {
62	6x	custom_label
63		} else {
64	8x	"Total patients numbers/person time"
65		}
66
67	56x	y <- list()
68
69	56x	if ("n_patients" %in% .stats) {
70	56x	y$n_patients <-
71	56x	formatters::with_label(
72	56x	s_num_patients_content(
73	56x	df = df,
74	56x	.N_col = .N_col, # nolint
75	56x	.var = id,
76	56x	labelstr = ""
77	56x	)$unique,
78	56x	row_label
79		)
80		}
81	56x	if ("sum_exposure" %in% .stats) {
82	56x	y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label)
83		}
84	56x	y
85		}
86
87		#' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in
88		#' [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in
89		#' [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`.
90		#'
91		#' @return
92		#' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()].
93		#'
94		#' @export
95		a_count_patients_sum_exposure <- function(df,
96		labelstr = "",
97		...,
98		.stats = NULL,
99		.stat_names = NULL,
100		.formats = NULL,
101		.labels = NULL,
102		.indent_mods = NULL) {
103	32x	checkmate::assert_character(.stats, len = 1)
104
105		# Check for additional parameters to the statistics function
106	32x	dots_extra_args <- list(...)
107	32x	extra_afun_params <- retrieve_extra_afun_params(names(dots_extra_args$.additional_fun_parameters))
108	32x	dots_extra_args$.additional_fun_parameters <- NULL
109
110	32x	add_total_level <- dots_extra_args$add_total_level
111	32x	checkmate::assert_flag(add_total_level)
112
113	32x	var <- dots_extra_args$var
114	32x	if (!is.null(var)) {
115	21x	assert_df_with_variables(df, list(var = var))
116	21x	df[[var]] <- as.factor(df[[var]])
117		}
118
119		# Check for user-defined functions
120	32x	default_and_custom_stats_list <- .split_std_from_custom_stats(.stats)
121	32x	.stats <- default_and_custom_stats_list$all_stats
122	32x	custom_stat_functions <- default_and_custom_stats_list$custom_stats
123
124	32x	x_stats <- list()
125	32x	if (!is.null(var)) {
126	21x	for (lvl in levels(df[[var]])) {
127	42x	x_stats_i <- .apply_stat_functions(
128	42x	default_stat_fnc = s_count_patients_sum_exposure,
129	42x	custom_stat_fnc_list = custom_stat_functions,
130	42x	args_list = c(
131	42x	df = list(subset(df, get(var) == lvl)),
132	42x	labelstr = list(labelstr),
133	42x	var_level = lvl,
134	42x	extra_afun_params,
135	42x	dots_extra_args
136		)
137		)
138	42x	x_stats[[.stats]][[lvl]] <- x_stats_i[[.stats]]
139		}
140		}
141
142	32x	if (add_total_level \|\| is.null(var)) {
143	13x	x_stats_total <- .apply_stat_functions(
144	13x	default_stat_fnc = s_count_patients_sum_exposure,
145	13x	custom_stat_fnc_list = custom_stat_functions,
146	13x	args_list = c(
147	13x	df = list(df),
148	13x	labelstr = list(labelstr),
149	13x	extra_afun_params,
150	13x	dots_extra_args
151		)
152		)
153	13x	x_stats[[.stats]][["Total"]] <- x_stats_total[[.stats]]
154		}
155
156		# Fill in formatting defaults
157	32x	.stats <- get_stats(
158	32x	"analyze_patients_exposure_in_cols",
159	32x	stats_in = .stats,
160	32x	custom_stats_in = names(custom_stat_functions)
161		)
162	32x	x_stats <- x_stats[.stats]
163	32x	levels_per_stats <- lapply(x_stats, names)
164	32x	.formats <- get_formats_from_stats(.stats, .formats, levels_per_stats)
165	32x	.labels <- get_labels_from_stats(
166	32x	.stats, .labels, levels_per_stats,
167	32x	tern_defaults = c(lapply(x_stats[[1]], attr, "label"), tern_default_labels)
168		)
169	32x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods, levels_per_stats)
170
171	32x	x_stats <- x_stats[.stats] %>%
172	32x	.unlist_keep_nulls() %>%
173	32x	setNames(names(.formats))
174
175		# Auto format handling
176	32x	.formats <- apply_auto_formatting(.formats, x_stats, extra_afun_params$.df_row, extra_afun_params$.var)
177
178		# Get and check statistical names
179	32x	.stat_names <- get_stat_names(x_stats, .stat_names)
180
181	32x	in_rows(
182	32x	.list = x_stats,
183	32x	.formats = .formats,
184	32x	.names = .labels %>% .unlist_keep_nulls(),
185	32x	.stat_names = .stat_names,
186	32x	.labels = .labels %>% .unlist_keep_nulls(),
187	32x	.indent_mods = .indent_mods %>% .unlist_keep_nulls()
188		)
189		}
190
191		#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
192		#' function arguments and additional format arguments. This function is a wrapper for
193		#' [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()].
194		#'
195		#' @return
196		#' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further
197		#' layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
198		#' add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
199		#' columns, to the table layout.
200		#'
201		#' @examples
202		#' lyt5 <- basic_table() %>%
203		#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE)
204		#'
205		#' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl)
206		#' result5
207		#'
208		#' lyt6 <- basic_table() %>%
209		#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure")
210		#'
211		#' result6 <- build_table(lyt6, df = df, alt_counts_df = adsl)
212		#' result6
213		#'
214		#' @export
215		#' @order 3
216		summarize_patients_exposure_in_cols <- function(lyt,
217		var,
218		ex_var = "AVAL",
219		id = "USUBJID",
220		add_total_level = FALSE,
221		custom_label = NULL,
222		col_split = TRUE,
223		na_str = default_na_str(),
224		...,
225		.stats = c("n_patients", "sum_exposure"),
226		.stat_names = NULL,
227		.formats = NULL,
228		.labels = c(n_patients = "Patients", sum_exposure = "Person time"),
229		.indent_mods = NULL) {
230		# Process standard extra arguments
231	3x	extra_args <- list()
232	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
233	!	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
234	3x	col_labels <- unlist(.labels[.stats])
235	3x	.labels <- .labels[!names(.labels) %in% c("n_patients", "sum_exposure")]
236	3x	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
237	!	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
238
239		# Process additional arguments to the statistic function
240	3x	extra_args <- c(
241	3x	extra_args,
242	3x	ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label,
243		...
244		)
245
246		# Adding additional info from layout to analysis function
247	3x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
248	3x	formals(a_count_patients_sum_exposure) <- c(
249	3x	formals(a_count_patients_sum_exposure), extra_args[[".additional_fun_parameters"]]
250		)
251
252	3x	if (col_split) {
253	3x	lyt <- split_cols_by_multivar(
254	3x	lyt = lyt,
255	3x	vars = rep(var, length(.stats)),
256	3x	varlabels = col_labels,
257	3x	extra_args = list(.stats = .stats)
258		)
259		}
260	3x	summarize_row_groups(
261	3x	lyt = lyt,
262	3x	var = var,
263	3x	cfun = a_count_patients_sum_exposure,
264	3x	na_str = na_str,
265	3x	extra_args = extra_args
266		)
267		}
268
269		#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
270		#' function arguments and additional format arguments. This function is a wrapper for
271		#' [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()].
272		#'
273		#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required
274		#' column split has been done already earlier in the layout pipe.
275		#'
276		#' @return
277		#' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further
278		#' layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
279		#' add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
280		#' columns, to the table layout.
281		#'
282		#' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows,
283		#' `analyze_patients_exposure_in_cols()` generates data rows which will _not_ be repeated on multiple
284		#' pages when pagination is used.
285		#'
286		#' @examples
287		#' set.seed(1)
288		#' df <- data.frame(
289		#' USUBJID = c(paste("id", seq(1, 12), sep = "")),
290		#' ARMCD = c(rep("ARM A", 6), rep("ARM B", 6)),
291		#' SEX = c(rep("Female", 6), rep("Male", 6)),
292		#' AVAL = as.numeric(sample(seq(1, 20), 12)),
293		#' stringsAsFactors = TRUE
294		#' )
295		#' adsl <- data.frame(
296		#' USUBJID = c(paste("id", seq(1, 12), sep = "")),
297		#' ARMCD = c(rep("ARM A", 2), rep("ARM B", 2)),
298		#' SEX = c(rep("Female", 2), rep("Male", 2)),
299		#' stringsAsFactors = TRUE
300		#' )
301		#'
302		#' lyt <- basic_table() %>%
303		#' split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
304		#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>%
305		#' analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE)
306		#' result <- build_table(lyt, df = df, alt_counts_df = adsl)
307		#' result
308		#'
309		#' lyt2 <- basic_table() %>%
310		#' split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
311		#' summarize_patients_exposure_in_cols(
312		#' var = "AVAL", col_split = TRUE,
313		#' .stats = "n_patients", custom_label = "some custom label"
314		#' ) %>%
315		#' analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL")
316		#' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl)
317		#' result2
318		#'
319		#' lyt3 <- basic_table() %>%
320		#' analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL")
321		#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl)
322		#' result3
323		#'
324		#' # Adding total levels and custom label
325		#' lyt4 <- basic_table(
326		#' show_colcounts = TRUE
327		#' ) %>%
328		#' analyze_patients_exposure_in_cols(
329		#' var = "ARMCD",
330		#' col_split = TRUE,
331		#' add_total_level = TRUE,
332		#' custom_label = "TOTAL"
333		#' ) %>%
334		#' append_topleft(c("", "Sex"))
335		#'
336		#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl)
337		#' result4
338		#'
339		#' @export
340		#' @order 2
341		analyze_patients_exposure_in_cols <- function(lyt,
342		var = NULL,
343		ex_var = "AVAL",
344		id = "USUBJID",
345		add_total_level = FALSE,
346		custom_label = NULL,
347		col_split = TRUE,
348		na_str = default_na_str(),
349		.stats = c("n_patients", "sum_exposure"),
350		.stat_names = NULL,
351		.formats = NULL,
352		.labels = c(n_patients = "Patients", sum_exposure = "Person time"),
353		.indent_mods = NULL,
354		...) {
355		# Process standard extra arguments
356	6x	extra_args <- list()
357	!	if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names
358	!	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
359	6x	col_labels <- unlist(.labels[.stats])
360	6x	.labels <- .labels[!names(.labels) %in% c("n_patients", "sum_exposure")]
361	6x	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
362	!	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
363
364		# Process additional arguments to the statistic function
365	6x	extra_args <- c(
366	6x	extra_args,
367	6x	var = var, ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label,
368		...
369		)
370
371		# Adding additional info from layout to analysis function
372	6x	extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE)
373	6x	formals(a_count_patients_sum_exposure) <- c(
374	6x	formals(a_count_patients_sum_exposure), extra_args[[".additional_fun_parameters"]]
375		)
376
377	6x	if (col_split) {
378	4x	lyt <- split_cols_by_multivar(
379	4x	lyt = lyt,
380	4x	vars = rep(ex_var, length(.stats)),
381	4x	varlabels = col_labels,
382	4x	extra_args = list(.stats = .stats)
383		)
384		}
385
386	6x	analyze_colvars(
387	6x	lyt = lyt,
388	6x	afun = a_count_patients_sum_exposure,
389	6x	na_str = na_str,
390	6x	extra_args = extra_args
391		)
392		}

1		#' Horizontal waterfall plot
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup.
6		#'
7		#' @param height (`numeric`)\cr vector containing values to be plotted as the waterfall bars.
8		#' @param id (`character`)\cr vector containing identifiers to use as the x-axis label for the waterfall bars.
9		#' @param col (`character`)\cr color(s).
10		#' @param col_var (`factor`, `character`, or `NULL`)\cr categorical variable for bar coloring. `NULL` by default.
11		#' @param xlab (`string`)\cr x label. Default is `"ID"`.
12		#' @param ylab (`string`)\cr y label. Default is `"Value"`.
13		#' @param title (`string`)\cr text to be displayed as plot title.
14		#' @param col_legend_title (`string`)\cr text to be displayed as legend title.
15		#'
16		#' @return A `ggplot` waterfall plot.
17		#'
18		#' @examples
19		#' library(dplyr)
20		#'
21		#' g_waterfall(height = c(3, 5, -1), id = letters[1:3])
22		#'
23		#' g_waterfall(
24		#' height = c(3, 5, -1),
25		#' id = letters[1:3],
26		#' col_var = letters[1:3]
27		#' )
28		#'
29		#' adsl_f <- tern_ex_adsl %>%
30		#' select(USUBJID, STUDYID, ARM, ARMCD, SEX)
31		#'
32		#' adrs_f <- tern_ex_adrs %>%
33		#' filter(PARAMCD == "OVRINV") %>%
34		#' mutate(pchg = rnorm(n(), 10, 50))
35		#'
36		#' adrs_f <- head(adrs_f, 30)
37		#' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ]
38		#' head(adrs_f)
39		#'
40		#' g_waterfall(
41		#' height = adrs_f$pchg,
42		#' id = adrs_f$USUBJID,
43		#' col_var = adrs_f$AVALC
44		#' )
45		#'
46		#' g_waterfall(
47		#' height = adrs_f$pchg,
48		#' id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
49		#' col_var = adrs_f$SEX
50		#' )
51		#'
52		#' g_waterfall(
53		#' height = adrs_f$pchg,
54		#' id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
55		#' xlab = "ID",
56		#' ylab = "Percentage Change",
57		#' title = "Waterfall plot"
58		#' )
59		#'
60		#' @export
61		g_waterfall <- function(height,
62		id,
63		col_var = NULL,
64		col = getOption("ggplot2.discrete.colour"),
65		xlab = NULL,
66		ylab = NULL,
67		col_legend_title = NULL,
68		title = NULL) {
69	2x	if (!is.null(col_var)) {
70	1x	check_same_n(height = height, id = id, col_var = col_var)
71		} else {
72	1x	check_same_n(height = height, id = id)
73		}
74
75	2x	checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE)
76	2x	checkmate::assert_character(col, null.ok = TRUE)
77
78	2x	xlabel <- deparse(substitute(id))
79	2x	ylabel <- deparse(substitute(height))
80
81	2x	col_label <- if (!missing(col_var)) {
82	1x	deparse(substitute(col_var))
83		}
84
85	2x	xlab <- if (is.null(xlab)) xlabel else xlab
86	2x	ylab <- if (is.null(ylab)) ylabel else ylab
87	2x	col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title
88
89	2x	plot_data <- data.frame(
90	2x	height = height,
91	2x	id = as.character(id),
92	2x	col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)),
93	2x	stringsAsFactors = FALSE
94		)
95
96	2x	plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ]
97
98	2x	p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) +
99	2x	ggplot2::geom_col() +
100	2x	ggplot2::geom_text(
101	2x	label = format(plot_data_ord$height, digits = 2),
102	2x	vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5)
103		) +
104	2x	ggplot2::xlab(xlab) +
105	2x	ggplot2::ylab(ylab) +
106	2x	ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5))
107
108	2x	if (!is.null(col_var)) {
109	1x	p <- p +
110	1x	ggplot2::aes(fill = col_var) +
111	1x	ggplot2::labs(fill = col_legend_title) +
112	1x	ggplot2::theme(
113	1x	legend.position = "bottom",
114	1x	legend.background = ggplot2::element_blank(),
115	1x	legend.title = ggplot2::element_text(face = "bold"),
116	1x	legend.box.background = ggplot2::element_rect(colour = "black")
117		)
118		}
119
120	2x	if (!is.null(col)) {
121	1x	p <- p +
122	1x	ggplot2::scale_fill_manual(values = col)
123		}
124
125	2x	if (!is.null(title)) {
126	1x	p <- p +
127	1x	ggplot2::labs(title = title) +
128	1x	ggplot2::theme(plot.title = ggplot2::element_text(face = "bold"))
129		}
130
131	2x	p
132		}

1		#' Individual patient plots
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Line plot(s) displaying trend in patients' parameter values over time is rendered.
6		#' Patients' individual baseline values can be added to the plot(s) as reference.
7		#'
8		#' @inheritParams argument_convention
9		#' @param xvar (`string`)\cr time point variable to be plotted on x-axis.
10		#' @param yvar (`string`)\cr continuous analysis variable to be plotted on y-axis.
11		#' @param xlab (`string`)\cr plot label for x-axis.
12		#' @param ylab (`string`)\cr plot label for y-axis.
13		#' @param id_var (`string`)\cr variable used as patient identifier.
14		#' @param title (`string`)\cr title for plot.
15		#' @param subtitle (`string`)\cr subtitle for plot.
16		#' @param add_baseline_hline (`flag`)\cr adds horizontal line at baseline y-value on
17		#' plot when `TRUE`.
18		#' @param yvar_baseline (`string`)\cr variable with baseline values only.
19		#' Ignored when `add_baseline_hline` is `FALSE`.
20		#' @param ggtheme (`theme`)\cr optional graphical theme function as provided
21		#' by `ggplot2` to control outlook of plot. Use `ggplot2::theme()` to tweak the display.
22		#' @param plotting_choices (`string`)\cr specifies options for displaying
23		#' plots. Must be one of `"all_in_one"`, `"split_by_max_obs"`, or `"separate_by_obs"`.
24		#' @param max_obs_per_plot (`integer(1)`)\cr number of observations to be plotted on one
25		#' plot. Ignored if `plotting_choices` is not `"separate_by_obs"`.
26		#' @param caption (`string`)\cr optional caption below the plot.
27		#' @param col (`character`)\cr line colors.
28		#'
29		#' @seealso Relevant helper function [h_g_ipp()].
30		#'
31		#' @name g_ipp
32		#' @aliases individual_patient_plot
33		NULL
34
35		#' Helper function to create simple line plot over time
36		#'
37		#' @description `r lifecycle::badge("stable")`
38		#'
39		#' Function that generates a simple line plot displaying parameter trends over time.
40		#'
41		#' @inheritParams argument_convention
42		#' @inheritParams g_ipp
43		#'
44		#' @return A `ggplot` line plot.
45		#'
46		#' @seealso [g_ipp()] which uses this function.
47		#'
48		#' @examples
49		#' library(dplyr)
50		#'
51		#' # Select a small sample of data to plot.
52		#' adlb <- tern_ex_adlb %>%
53		#' filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
54		#' slice(1:36)
55		#'
56		#' p <- h_g_ipp(
57		#' df = adlb,
58		#' xvar = "AVISIT",
59		#' yvar = "AVAL",
60		#' xlab = "Visit",
61		#' id_var = "USUBJID",
62		#' ylab = "SGOT/ALT (U/L)",
63		#' add_baseline_hline = TRUE
64		#' )
65		#' p
66		#'
67		#' @export
68		h_g_ipp <- function(df,
69		xvar,
70		yvar,
71		xlab,
72		ylab,
73		id_var,
74		title = "Individual Patient Plots",
75		subtitle = "",
76		caption = NULL,
77		add_baseline_hline = FALSE,
78		yvar_baseline = "BASE",
79		ggtheme = nestcolor::theme_nest(),
80		col = NULL) {
81	13x	checkmate::assert_string(xvar)
82	13x	checkmate::assert_string(yvar)
83	13x	checkmate::assert_string(yvar_baseline)
84	13x	checkmate::assert_string(id_var)
85	13x	checkmate::assert_string(xlab)
86	13x	checkmate::assert_string(ylab)
87	13x	checkmate::assert_string(title)
88	13x	checkmate::assert_string(subtitle)
89	13x	checkmate::assert_subset(c(xvar, yvar, yvar_baseline, id_var), colnames(df))
90	13x	checkmate::assert_data_frame(df)
91	13x	checkmate::assert_flag(add_baseline_hline)
92	13x	checkmate::assert_character(col, null.ok = TRUE)
93
94	13x	p <- ggplot2::ggplot(
95	13x	data = df,
96	13x	mapping = ggplot2::aes(
97	13x	x = .data[[xvar]],
98	13x	y = .data[[yvar]],
99	13x	group = .data[[id_var]],
100	13x	colour = .data[[id_var]]
101		)
102		) +
103	13x	ggplot2::geom_line(linewidth = 0.4) +
104	13x	ggplot2::geom_point(size = 2) +
105	13x	ggplot2::labs(
106	13x	x = xlab,
107	13x	y = ylab,
108	13x	title = title,
109	13x	subtitle = subtitle,
110	13x	caption = caption
111		) +
112	13x	ggtheme
113
114	13x	if (add_baseline_hline) {
115	12x	baseline_df <- df[, c(id_var, yvar_baseline)]
116	12x	baseline_df <- unique(baseline_df)
117
118	12x	p <- p +
119	12x	ggplot2::geom_hline(
120	12x	data = baseline_df,
121	12x	mapping = ggplot2::aes(
122	12x	yintercept = .data[[yvar_baseline]],
123	12x	colour = .data[[id_var]]
124		),
125	12x	linetype = "dotdash",
126	12x	linewidth = 0.4
127		) +
128	12x	ggplot2::geom_text(
129	12x	data = baseline_df,
130	12x	mapping = ggplot2::aes(
131	12x	x = 1,
132	12x	y = .data[[yvar_baseline]],
133	12x	label = .data[[id_var]],
134	12x	colour = .data[[id_var]]
135		),
136	12x	nudge_y = 0.025 * (max(df[, yvar], na.rm = TRUE) - min(df[, yvar], na.rm = TRUE)),
137	12x	vjust = "right",
138	12x	size = 2
139		)
140
141	12x	if (!is.null(col)) {
142	1x	p <- p +
143	1x	ggplot2::scale_color_manual(values = col)
144		}
145		}
146	13x	p
147		}
148
149		#' @describeIn g_ipp Plotting function for individual patient plots which, depending on user
150		#' preference, renders a single graphic or compiles a list of graphics that show trends in individual's parameter
151		#' values over time.
152		#'
153		#' @return A `ggplot` object or a list of `ggplot` objects.
154		#'
155		#' @examples
156		#' library(dplyr)
157		#'
158		#' # Select a small sample of data to plot.
159		#' adlb <- tern_ex_adlb %>%
160		#' filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
161		#' slice(1:36)
162		#'
163		#' plot_list <- g_ipp(
164		#' df = adlb,
165		#' xvar = "AVISIT",
166		#' yvar = "AVAL",
167		#' xlab = "Visit",
168		#' ylab = "SGOT/ALT (U/L)",
169		#' title = "Individual Patient Plots",
170		#' add_baseline_hline = TRUE,
171		#' plotting_choices = "split_by_max_obs",
172		#' max_obs_per_plot = 5
173		#' )
174		#' plot_list
175		#'
176		#' @export
177		g_ipp <- function(df,
178		xvar,
179		yvar,
180		xlab,
181		ylab,
182		id_var = "USUBJID",
183		title = "Individual Patient Plots",
184		subtitle = "",
185		caption = NULL,
186		add_baseline_hline = FALSE,
187		yvar_baseline = "BASE",
188		ggtheme = nestcolor::theme_nest(),
189		plotting_choices = c("all_in_one", "split_by_max_obs", "separate_by_obs"),
190		max_obs_per_plot = 4,
191		col = NULL) {
192	3x	checkmate::assert_count(max_obs_per_plot)
193	3x	checkmate::assert_subset(plotting_choices, c("all_in_one", "split_by_max_obs", "separate_by_obs"))
194	3x	checkmate::assert_character(col, null.ok = TRUE)
195
196	3x	plotting_choices <- match.arg(plotting_choices)
197
198	3x	if (plotting_choices == "all_in_one") {
199	1x	p <- h_g_ipp(
200	1x	df = df,
201	1x	xvar = xvar,
202	1x	yvar = yvar,
203	1x	xlab = xlab,
204	1x	ylab = ylab,
205	1x	id_var = id_var,
206	1x	title = title,
207	1x	subtitle = subtitle,
208	1x	caption = caption,
209	1x	add_baseline_hline = add_baseline_hline,
210	1x	yvar_baseline = yvar_baseline,
211	1x	ggtheme = ggtheme,
212	1x	col = col
213		)
214
215	1x	return(p)
216	2x	} else if (plotting_choices == "split_by_max_obs") {
217	1x	id_vec <- unique(df[[id_var]])
218	1x	id_list <- split(
219	1x	id_vec,
220	1x	rep(1:ceiling(length(id_vec) / max_obs_per_plot),
221	1x	each = max_obs_per_plot,
222	1x	length.out = length(id_vec)
223		)
224		)
225
226	1x	df_list <- list()
227	1x	plot_list <- list()
228
229	1x	for (i in seq_along(id_list)) {
230	2x	df_list[[i]] <- df[df[[id_var]] %in% id_list[[i]], ]
231
232	2x	plots <- h_g_ipp(
233	2x	df = df_list[[i]],
234	2x	xvar = xvar,
235	2x	yvar = yvar,
236	2x	xlab = xlab,
237	2x	ylab = ylab,
238	2x	id_var = id_var,
239	2x	title = title,
240	2x	subtitle = subtitle,
241	2x	caption = caption,
242	2x	add_baseline_hline = add_baseline_hline,
243	2x	yvar_baseline = yvar_baseline,
244	2x	ggtheme = ggtheme,
245	2x	col = col
246		)
247
248	2x	plot_list[[i]] <- plots
249		}
250	1x	return(plot_list)
251		} else {
252	1x	ind_df <- split(df, df[[id_var]])
253	1x	plot_list <- lapply(
254	1x	ind_df,
255	1x	function(x) {
256	8x	h_g_ipp(
257	8x	df = x,
258	8x	xvar = xvar,
259	8x	yvar = yvar,
260	8x	xlab = xlab,
261	8x	ylab = ylab,
262	8x	id_var = id_var,
263	8x	title = title,
264	8x	subtitle = subtitle,
265	8x	caption = caption,
266	8x	add_baseline_hline = add_baseline_hline,
267	8x	yvar_baseline = yvar_baseline,
268	8x	ggtheme = ggtheme,
269	8x	col = col
270		)
271		}
272		)
273
274	1x	return(plot_list)
275		}
276		}

1		#' Helper functions for tabulating survival duration by subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions that tabulate in a data frame statistics such as median survival
6		#' time and hazard ratio for population subgroups.
7		#'
8		#' @inheritParams argument_convention
9		#' @inheritParams survival_coxph_pairwise
10		#' @inheritParams survival_duration_subgroups
11		#' @param arm (`factor`)\cr the treatment group variable.
12		#'
13		#' @details Main functionality is to prepare data for use in a layout-creating function.
14		#'
15		#' @examples
16		#' library(dplyr)
17		#' library(forcats)
18		#'
19		#' adtte <- tern_ex_adtte
20		#'
21		#' # Save variable labels before data processing steps.
22		#' adtte_labels <- formatters::var_labels(adtte)
23		#'
24		#' adtte_f <- adtte %>%
25		#' filter(
26		#' PARAMCD == "OS",
27		#' ARM %in% c("B: Placebo", "A: Drug X"),
28		#' SEX %in% c("M", "F")
29		#' ) %>%
30		#' mutate(
31		#' # Reorder levels of ARM to display reference arm before treatment arm.
32		#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
33		#' SEX = droplevels(SEX),
34		#' is_event = CNSR == 0
35		#' )
36		#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
37		#' formatters::var_labels(adtte_f)[names(labels)] <- labels
38		#'
39		#' @name h_survival_duration_subgroups
40		NULL
41
42		#' @describeIn h_survival_duration_subgroups Helper to prepare a data frame of median survival times by arm.
43		#'
44		#' @return
45		#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`.
46		#'
47		#' @examples
48		#' # Extract median survival time for one group.
49		#' h_survtime_df(
50		#' tte = adtte_f$AVAL,
51		#' is_event = adtte_f$is_event,
52		#' arm = adtte_f$ARM
53		#' )
54		#'
55		#' @export
56		h_survtime_df <- function(tte, is_event, arm) {
57	79x	checkmate::assert_numeric(tte)
58	78x	checkmate::assert_logical(is_event, len = length(tte))
59	78x	assert_valid_factor(arm, len = length(tte))
60
61	78x	df_tte <- data.frame(
62	78x	tte = tte,
63	78x	is_event = is_event,
64	78x	stringsAsFactors = FALSE
65		)
66
67		# Delete NAs
68	78x	non_missing_rows <- stats::complete.cases(df_tte)
69	78x	df_tte <- df_tte[non_missing_rows, ]
70	78x	arm <- arm[non_missing_rows]
71
72	78x	lst_tte <- split(df_tte, arm)
73	78x	lst_results <- Map(function(x, arm) {
74	156x	if (nrow(x) > 0) {
75	152x	s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event")
76	152x	median_est <- unname(as.numeric(s_surv$median))
77	152x	n_events <- sum(x$is_event)
78		} else {
79	4x	median_est <- NA
80	4x	n_events <- NA
81		}
82
83	156x	data.frame(
84	156x	arm = arm,
85	156x	n = nrow(x),
86	156x	n_events = n_events,
87	156x	median = median_est,
88	156x	stringsAsFactors = FALSE
89		)
90	78x	}, lst_tte, names(lst_tte))
91
92	78x	df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
93	78x	df$arm <- factor(df$arm, levels = levels(arm))
94	78x	df
95		}
96
97		#' @describeIn h_survival_duration_subgroups Summarizes median survival times by arm and across subgroups
98		#' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
99		#' requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
100		#' groupings for `subgroups` variables.
101		#'
102		#' @return
103		#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`,
104		#' `var`, `var_label`, and `row_type`.
105		#'
106		#' @examples
107		#' # Extract median survival time for multiple groups.
108		#' h_survtime_subgroups_df(
109		#' variables = list(
110		#' tte = "AVAL",
111		#' is_event = "is_event",
112		#' arm = "ARM",
113		#' subgroups = c("SEX", "BMRKR2")
114		#' ),
115		#' data = adtte_f
116		#' )
117		#'
118		#' # Define groupings for BMRKR2 levels.
119		#' h_survtime_subgroups_df(
120		#' variables = list(
121		#' tte = "AVAL",
122		#' is_event = "is_event",
123		#' arm = "ARM",
124		#' subgroups = c("SEX", "BMRKR2")
125		#' ),
126		#' data = adtte_f,
127		#' groups_lists = list(
128		#' BMRKR2 = list(
129		#' "low" = "LOW",
130		#' "low/medium" = c("LOW", "MEDIUM"),
131		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
132		#' )
133		#' )
134		#' )
135		#'
136		#' @export
137		h_survtime_subgroups_df <- function(variables,
138		data,
139		groups_lists = list(),
140		label_all = "All Patients") {
141	15x	checkmate::assert_character(variables$tte)
142	15x	checkmate::assert_character(variables$is_event)
143	15x	checkmate::assert_character(variables$arm)
144	15x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
145
146	15x	assert_df_with_variables(data, variables)
147
148	15x	checkmate::assert_string(label_all)
149
150		# Add All Patients.
151	15x	result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]])
152	15x	result_all$subgroup <- label_all
153	15x	result_all$var <- "ALL"
154	15x	result_all$var_label <- label_all
155	15x	result_all$row_type <- "content"
156
157		# Add Subgroups.
158	15x	if (is.null(variables$subgroups)) {
159	3x	result_all
160		} else {
161	12x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
162	12x	l_result <- lapply(l_data, function(grp) {
163	60x	result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]])
164	60x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
165	60x	cbind(result, result_labels)
166		})
167	12x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
168	12x	result_subgroups$row_type <- "analysis"
169	12x	rbind(
170	12x	result_all,
171	12x	result_subgroups
172		)
173		}
174		}
175
176		#' @describeIn h_survival_duration_subgroups Helper to prepare a data frame with estimates of
177		#' treatment hazard ratio.
178		#'
179		#' @param strata_data (`factor`, `data.frame`, or `NULL`)\cr required if stratified analysis is performed.
180		#'
181		#' @return
182		#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`,
183		#' `conf_level`, `pval` and `pval_label`.
184		#'
185		#' @examples
186		#' # Extract hazard ratio for one group.
187		#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM)
188		#'
189		#' # Extract hazard ratio for one group with stratification factor.
190		#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1)
191		#'
192		#' @export
193		h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) {
194	85x	checkmate::assert_numeric(tte)
195	85x	checkmate::assert_logical(is_event, len = length(tte))
196	85x	assert_valid_factor(arm, n.levels = 2, len = length(tte))
197
198	85x	df_tte <- data.frame(tte = tte, is_event = is_event)
199	85x	strata_vars <- NULL
200
201	85x	if (!is.null(strata_data)) {
202	5x	if (is.data.frame(strata_data)) {
203	4x	strata_vars <- names(strata_data)
204	4x	checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte))
205	4x	assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars)))
206		} else {
207	1x	assert_valid_factor(strata_data, len = nrow(df_tte))
208	1x	strata_vars <- "strata_data"
209		}
210	5x	df_tte[strata_vars] <- strata_data
211		}
212
213	85x	l_df <- split(df_tte, arm)
214
215	85x	if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
216		# Hazard ratio and CI.
217	79x	result <- s_coxph_pairwise(
218	79x	df = l_df[[2]],
219	79x	.ref_group = l_df[[1]],
220	79x	.in_ref_col = FALSE,
221	79x	.var = "tte",
222	79x	is_event = "is_event",
223	79x	strata = strata_vars,
224	79x	control = control
225		)
226
227	79x	df <- data.frame(
228		# Dummy column needed downstream to create a nested header.
229	79x	arm = " ",
230	79x	n_tot = unname(as.numeric(result$n_tot)),
231	79x	n_tot_events = unname(as.numeric(result$n_tot_events)),
232	79x	hr = unname(as.numeric(result$hr)),
233	79x	lcl = unname(result$hr_ci[1]),
234	79x	ucl = unname(result$hr_ci[2]),
235	79x	conf_level = control[["conf_level"]],
236	79x	pval = as.numeric(result$pvalue),
237	79x	pval_label = obj_label(result$pvalue),
238	79x	stringsAsFactors = FALSE
239		)
240		} else if (
241	6x	(nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) \|\|
242	6x	(nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
243		) {
244	6x	df_tte_complete <- df_tte[stats::complete.cases(df_tte), ]
245	6x	df <- data.frame(
246		# Dummy column needed downstream to create a nested header.
247	6x	arm = " ",
248	6x	n_tot = nrow(df_tte_complete),
249	6x	n_tot_events = sum(df_tte_complete$is_event),
250	6x	hr = NA,
251	6x	lcl = NA,
252	6x	ucl = NA,
253	6x	conf_level = control[["conf_level"]],
254	6x	pval = NA,
255	6x	pval_label = NA,
256	6x	stringsAsFactors = FALSE
257		)
258		} else {
259	!	df <- data.frame(
260		# Dummy column needed downstream to create a nested header.
261	!	arm = " ",
262	!	n_tot = 0L,
263	!	n_tot_events = 0L,
264	!	hr = NA,
265	!	lcl = NA,
266	!	ucl = NA,
267	!	conf_level = control[["conf_level"]],
268	!	pval = NA,
269	!	pval_label = NA,
270	!	stringsAsFactors = FALSE
271		)
272		}
273
274	85x	df
275		}
276
277		#' @describeIn h_survival_duration_subgroups Summarizes estimates of the treatment hazard ratio
278		#' across subgroups in a data frame. `variables` corresponds to the names of variables found in
279		#' `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and
280		#' optionally `subgroups` and `strata`. `groups_lists` optionally specifies
281		#' groupings for `subgroups` variables.
282		#'
283		#' @return
284		#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`,
285		#' `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
286		#'
287		#' @examples
288		#' # Extract hazard ratio for multiple groups.
289		#' h_coxph_subgroups_df(
290		#' variables = list(
291		#' tte = "AVAL",
292		#' is_event = "is_event",
293		#' arm = "ARM",
294		#' subgroups = c("SEX", "BMRKR2")
295		#' ),
296		#' data = adtte_f
297		#' )
298		#'
299		#' # Define groupings of BMRKR2 levels.
300		#' h_coxph_subgroups_df(
301		#' variables = list(
302		#' tte = "AVAL",
303		#' is_event = "is_event",
304		#' arm = "ARM",
305		#' subgroups = c("SEX", "BMRKR2")
306		#' ),
307		#' data = adtte_f,
308		#' groups_lists = list(
309		#' BMRKR2 = list(
310		#' "low" = "LOW",
311		#' "low/medium" = c("LOW", "MEDIUM"),
312		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
313		#' )
314		#' )
315		#' )
316		#'
317		#' # Extract hazard ratio for multiple groups with stratification factors.
318		#' h_coxph_subgroups_df(
319		#' variables = list(
320		#' tte = "AVAL",
321		#' is_event = "is_event",
322		#' arm = "ARM",
323		#' subgroups = c("SEX", "BMRKR2"),
324		#' strata = c("STRATA1", "STRATA2")
325		#' ),
326		#' data = adtte_f
327		#' )
328		#'
329		#' @export
330		h_coxph_subgroups_df <- function(variables,
331		data,
332		groups_lists = list(),
333		control = control_coxph(),
334		label_all = "All Patients") {
335	17x	if ("strat" %in% names(variables)) {
336	!	warning(
337	!	"Warning: the `strat` element name of the `variables` list argument to `h_coxph_subgroups_df() ",
338	!	"was deprecated in tern 0.9.4.\n ",
339	!	"Please use the name `strata` instead of `strat` in the `variables` argument."
340		)
341	!	variables[["strata"]] <- variables[["strat"]]
342		}
343
344	17x	checkmate::assert_character(variables$tte)
345	17x	checkmate::assert_character(variables$is_event)
346	17x	checkmate::assert_character(variables$arm)
347	17x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
348	17x	checkmate::assert_character(variables$strata, null.ok = TRUE)
349	17x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
350	17x	assert_df_with_variables(data, variables)
351	17x	checkmate::assert_string(label_all)
352
353		# Add All Patients.
354	17x	result_all <- h_coxph_df(
355	17x	tte = data[[variables$tte]],
356	17x	is_event = data[[variables$is_event]],
357	17x	arm = data[[variables$arm]],
358	17x	strata_data = if (is.null(variables$strata)) NULL else data[variables$strata],
359	17x	control = control
360		)
361	17x	result_all$subgroup <- label_all
362	17x	result_all$var <- "ALL"
363	17x	result_all$var_label <- label_all
364	17x	result_all$row_type <- "content"
365
366		# Add Subgroups.
367	17x	if (is.null(variables$subgroups)) {
368	3x	result_all
369		} else {
370	14x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
371
372	14x	l_result <- lapply(l_data, function(grp) {
373	64x	result <- h_coxph_df(
374	64x	tte = grp$df[[variables$tte]],
375	64x	is_event = grp$df[[variables$is_event]],
376	64x	arm = grp$df[[variables$arm]],
377	64x	strata_data = if (is.null(variables$strata)) NULL else grp$df[variables$strata],
378	64x	control = control
379		)
380	64x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
381	64x	cbind(result, result_labels)
382		})
383
384	14x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
385	14x	result_subgroups$row_type <- "analysis"
386
387	14x	rbind(
388	14x	result_all,
389	14x	result_subgroups
390		)
391		}
392		}
393
394		#' Split data frame by subgroups
395		#'
396		#' @description `r lifecycle::badge("stable")`
397		#'
398		#' Split a data frame into a non-nested list of subsets.
399		#'
400		#' @inheritParams argument_convention
401		#' @inheritParams survival_duration_subgroups
402		#' @param data (`data.frame`)\cr dataset to split.
403		#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets.
404		#' Unused levels not present in `data` are dropped. Note that the order in this vector
405		#' determines the order in the downstream table.
406		#'
407		#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`).
408		#'
409		#' @details Main functionality is to prepare data for use in forest plot layouts.
410		#'
411		#' @examples
412		#' df <- data.frame(
413		#' x = c(1:5),
414		#' y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
415		#' z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
416		#' )
417		#' formatters::var_labels(df) <- paste("label for", names(df))
418		#'
419		#' h_split_by_subgroups(
420		#' data = df,
421		#' subgroups = c("y", "z")
422		#' )
423		#'
424		#' h_split_by_subgroups(
425		#' data = df,
426		#' subgroups = c("y", "z"),
427		#' groups_lists = list(
428		#' y = list("AB" = c("A", "B"), "C" = "C")
429		#' )
430		#' )
431		#'
432		#' @export
433		h_split_by_subgroups <- function(data,
434		subgroups,
435		groups_lists = list()) {
436	66x	checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE)
437	66x	checkmate::assert_list(groups_lists, names = "named")
438	66x	checkmate::assert_subset(names(groups_lists), subgroups)
439	66x	assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups)))
440
441	66x	data_labels <- unname(formatters::var_labels(data))
442	66x	df_subgroups <- data[, subgroups, drop = FALSE]
443	66x	subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE)
444
445	66x	l_labels <- Map(function(grp_i, name_i) {
446	120x	existing_levels <- levels(droplevels(grp_i))
447	120x	grp_levels <- if (name_i %in% names(groups_lists)) {
448		# For this variable groupings are defined. We check which groups are contained in the data.
449	11x	group_list_i <- groups_lists[[name_i]]
450	11x	group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE)
451	11x	names(which(group_has_levels))
452		} else {
453	109x	existing_levels
454		}
455	120x	df_labels <- data.frame(
456	120x	subgroup = grp_levels,
457	120x	var = name_i,
458	120x	var_label = unname(subgroup_labels[name_i]),
459	120x	stringsAsFactors = FALSE # Rationale is that subgroups may not be unique.
460		)
461	66x	}, df_subgroups, names(df_subgroups))
462
463		# Create a data frame with one row per subgroup.
464	66x	df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE))
465	66x	row_label <- paste0(df_labels$var, ".", df_labels$subgroup)
466	66x	row_split_var <- factor(row_label, levels = row_label)
467
468		# Create a list of data subsets.
469	66x	lapply(split(df_labels, row_split_var), function(row_i) {
470	294x	which_row <- if (row_i$var %in% names(groups_lists)) {
471	31x	data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]]
472		} else {
473	263x	data[[row_i$var]] == row_i$subgroup
474		}
475	294x	df <- data[which_row, ]
476	294x	rownames(df) <- NULL
477	294x	formatters::var_labels(df) <- data_labels
478
479	294x	list(
480	294x	df = df,
481	294x	df_labels = data.frame(row_i, row.names = NULL)
482		)
483		})
484		}

1		#' Helper functions for Cox proportional hazards regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()].
6		#'
7		#' @inheritParams argument_convention
8		#' @inheritParams h_coxreg_univar_extract
9		#' @inheritParams cox_regression_inter
10		#' @inheritParams control_coxreg
11		#'
12		#' @seealso [cox_regression]
13		#'
14		#' @name h_cox_regression
15		NULL
16
17		#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used
18		#' internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models.
19		#'
20		#' @return
21		#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]).
22		#'
23		#' @examples
24		#' # `h_coxreg_univar_formulas`
25		#'
26		#' ## Simple formulas.
27		#' h_coxreg_univar_formulas(
28		#' variables = list(
29		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
30		#' )
31		#' )
32		#'
33		#' ## Addition of an optional strata.
34		#' h_coxreg_univar_formulas(
35		#' variables = list(
36		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
37		#' strata = "SITE"
38		#' )
39		#' )
40		#'
41		#' ## Inclusion of the interaction term.
42		#' h_coxreg_univar_formulas(
43		#' variables = list(
44		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
45		#' strata = "SITE"
46		#' ),
47		#' interaction = TRUE
48		#' )
49		#'
50		#' ## Only covariates fitted in separate models.
51		#' h_coxreg_univar_formulas(
52		#' variables = list(
53		#' time = "time", event = "status", covariates = c("X", "y")
54		#' )
55		#' )
56		#'
57		#' @export
58		h_coxreg_univar_formulas <- function(variables,
59		interaction = FALSE) {
60	50x	checkmate::assert_list(variables, names = "named")
61	50x	has_arm <- "arm" %in% names(variables)
62	50x	arm_name <- if (has_arm) "arm" else NULL
63
64	50x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
65
66	50x	checkmate::assert_flag(interaction)
67
68	50x	if (!has_arm \|\| is.null(variables$covariates)) {
69	10x	checkmate::assert_false(interaction)
70		}
71
72	48x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
73
74	48x	if (!is.null(variables$covariates)) {
75	47x	forms <- paste0(
76	47x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
77	47x	ifelse(has_arm, variables$arm, "1"),
78	47x	ifelse(interaction, " * ", " + "),
79	47x	variables$covariates,
80	47x	ifelse(
81	47x	!is.null(variables$strata),
82	47x	paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"),
83		""
84		)
85		)
86		} else {
87	1x	forms <- NULL
88		}
89	48x	nams <- variables$covariates
90	48x	if (has_arm) {
91	41x	ref <- paste0(
92	41x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
93	41x	variables$arm,
94	41x	ifelse(
95	41x	!is.null(variables$strata),
96	41x	paste0(
97	41x	" + strata(", paste0(variables$strata, collapse = ", "), ")"
98		),
99		""
100		)
101		)
102	41x	forms <- c(ref, forms)
103	41x	nams <- c("ref", nams)
104		}
105	48x	stats::setNames(forms, nams)
106		}
107
108		#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas
109		#' string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox
110		#' regression models. Interactions will not be included in multivariate Cox regression model.
111		#'
112		#' @return
113		#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]).
114		#'
115		#' @examples
116		#' # `h_coxreg_multivar_formula`
117		#'
118		#' h_coxreg_multivar_formula(
119		#' variables = list(
120		#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
121		#' )
122		#' )
123		#'
124		#' # Addition of an optional strata.
125		#' h_coxreg_multivar_formula(
126		#' variables = list(
127		#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
128		#' strata = "SITE"
129		#' )
130		#' )
131		#'
132		#' # Example without treatment arm.
133		#' h_coxreg_multivar_formula(
134		#' variables = list(
135		#' time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
136		#' strata = "SITE"
137		#' )
138		#' )
139		#'
140		#' @export
141		h_coxreg_multivar_formula <- function(variables) {
142	89x	checkmate::assert_list(variables, names = "named")
143	89x	has_arm <- "arm" %in% names(variables)
144	89x	arm_name <- if (has_arm) "arm" else NULL
145
146	89x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
147
148	89x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
149
150	89x	y <- paste0(
151	89x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
152	89x	ifelse(has_arm, variables$arm, "1")
153		)
154	89x	if (length(variables$covariates) > 0) {
155	26x	y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ")
156		}
157	89x	if (!is.null(variables$strata)) {
158	5x	y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")")
159		}
160	89x	y
161		}
162
163		#' @describeIn h_cox_regression Utility function to help tabulate the result of
164		#' a univariate Cox regression model.
165		#'
166		#' @param effect (`string`)\cr the treatment variable.
167		#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
168		#'
169		#' @return
170		#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`,
171		#' `n`, `hr`, `lcl`, `ucl`, and `pval`.
172		#'
173		#' @examples
174		#' library(survival)
175		#'
176		#' dta_simple <- data.frame(
177		#' time = c(5, 5, 10, 10, 5, 5, 10, 10),
178		#' status = c(0, 0, 1, 0, 0, 1, 1, 1),
179		#' armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
180		#' var1 = c(45, 55, 65, 75, 55, 65, 85, 75),
181		#' var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
182		#' )
183		#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
184		#' result <- h_coxreg_univar_extract(
185		#' effect = "armcd", covar = "armcd", mod = mod, data = dta_simple
186		#' )
187		#' result
188		#'
189		#' @export
190		h_coxreg_univar_extract <- function(effect,
191		covar,
192		data,
193		mod,
194		control = control_coxreg()) {
195	66x	checkmate::assert_string(covar)
196	66x	checkmate::assert_string(effect)
197	66x	checkmate::assert_class(mod, "coxph")
198	66x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
199
200	66x	mod_aov <- muffled_car_anova(mod, test_statistic)
201	66x	msum <- summary(mod, conf.int = control$conf_level)
202	66x	sum_cox <- broom::tidy(msum)
203
204		# Combine results together.
205	66x	effect_aov <- mod_aov[effect, , drop = TRUE]
206	66x	pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]]
207	66x	sum_main <- sum_cox[grepl(effect, sum_cox$level), ]
208
209	66x	term_label <- if (effect == covar) {
210	34x	paste0(
211	34x	levels(data[[covar]])[2],
212	34x	" vs control (",
213	34x	levels(data[[covar]])[1],
214		")"
215		)
216		} else {
217	32x	unname(labels_or_names(data[covar]))
218		}
219	66x	data.frame(
220	66x	effect = ifelse(covar == effect, "Treatment:", "Covariate:"),
221	66x	term = covar,
222	66x	term_label = term_label,
223	66x	level = levels(data[[effect]])[2],
224	66x	n = mod[["n"]],
225	66x	hr = unname(sum_main["exp(coef)"]),
226	66x	lcl = unname(sum_main[grep("lower", names(sum_main))]),
227	66x	ucl = unname(sum_main[grep("upper", names(sum_main))]),
228	66x	pval = pval,
229	66x	stringsAsFactors = FALSE
230		)
231		}
232
233		#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help
234		#' tabulate the result of a multivariate Cox regression model for a treatment/covariate variable.
235		#'
236		#' @return
237		#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`,
238		#' `n`, `term`, and `term_label`.
239		#'
240		#' @examples
241		#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
242		#' result <- h_coxreg_multivar_extract(
243		#' var = "var1", mod = mod, data = dta_simple
244		#' )
245		#' result
246		#'
247		#' @export
248		h_coxreg_multivar_extract <- function(var,
249		data,
250		mod,
251		control = control_coxreg()) {
252	132x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
253	132x	mod_aov <- muffled_car_anova(mod, test_statistic)
254
255	132x	msum <- summary(mod, conf.int = control$conf_level)
256	132x	sum_anova <- broom::tidy(mod_aov)
257	132x	sum_cox <- broom::tidy(msum)
258
259	132x	ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")]
260	132x	names(ret_anova)[2] <- "pval"
261	132x	if (is.factor(data[[var]])) {
262	53x	ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
263		} else {
264	79x	ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
265		}
266	132x	names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl")
267	132x	varlab <- unname(labels_or_names(data[var]))
268	132x	ret_cox$term <- varlab
269
270	132x	if (is.numeric(data[[var]])) {
271	79x	ret <- ret_cox
272	79x	ret$term_label <- ret$term
273	53x	} else if (length(levels(data[[var]])) <= 2) {
274	34x	ret_anova$pval <- NA
275	34x	ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
276	34x	ret_cox$level <- gsub(var, "", ret_cox$level)
277	34x	ret_cox$term_label <- ret_cox$level
278	34x	ret <- dplyr::bind_rows(ret_anova, ret_cox)
279		} else {
280	19x	ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
281	19x	ret_cox$level <- gsub(var, "", ret_cox$level)
282	19x	ret_cox$term_label <- ret_cox$level
283	19x	ret <- dplyr::bind_rows(ret_anova, ret_cox)
284		}
285
286	132x	as.data.frame(ret)
287		}