tern coverage - 95.62%

Files
Source

#' Missing Data
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Substitute missing data with a string or factor level.
#'
#' @param x (`factor` or `character` vector)\cr values for which any missing values should be substituted.
#' @param label (`character`)\cr string that missing data should be replaced with.
#'
#' @return `x` with any `NA` values substituted by `label`.
#'
#' @examples
#' explicit_na(c(NA, "a", "b"))
#' is.na(explicit_na(c(NA, "a", "b")))
#'
#' explicit_na(factor(c(NA, "a", "b")))
#' is.na(explicit_na(factor(c(NA, "a", "b"))))
#'
#' explicit_na(sas_na(c("a", "")))
#'
#' @export
explicit_na <- function(x, label = "<Missing>") {
  checkmate::assert_string(label)

  if (is.factor(x)) {
    x <- forcats::fct_na_value_to_level(x, label)
    forcats::fct_drop(x, only = label)
  } else if (is.character(x)) {
    x[is.na(x)] <- label
    x
  } else {
    stop("only factors and character vectors allowed")
  }
}

#' Convert Strings to `NA`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to
#' convert these values to `NA`s.
#'
#' @inheritParams explicit_na
#' @param empty (`logical`)\cr if `TRUE` empty strings get replaced by `NA`.
#' @param whitespaces (`logical`)\cr if `TRUE` then strings made from whitespaces only get replaced with `NA`.
#'
#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of
#'   `empty` and `whitespaces`.
#'
#' @examples
#' sas_na(c("1", "", " ", "   ", "b"))
#' sas_na(factor(c("", " ", "b")))
#'
#' is.na(sas_na(c("1", "", " ", "   ", "b")))
#'
#' @export
sas_na <- function(x, empty = TRUE, whitespaces = TRUE) {
  checkmate::assert_flag(empty)
  checkmate::assert_flag(whitespaces)

  if (is.factor(x)) {
    empty_levels <- levels(x) == ""
    if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA

    ws_levels <- grepl("^\\s+$", levels(x))
    if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA

    x
  } else if (is.character(x)) {
    if (empty) x[x == ""] <- NA_character_

    if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_

    x
  } else {
    stop("only factors and character vectors allowed")
  }
}

#' Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis.
#'
#' @inheritParams argument_convention
#'
#' @details Cox models are the most commonly used methods to estimate the magnitude of
#'   the effect in survival analysis. It assumes proportional hazards: the ratio
#'   of the hazards between groups (e.g., two arms) is constant over time.
#'   This ratio is referred to as the "hazard ratio" (HR) and is one of the
#'   most commonly reported metrics to describe the effect size in survival
#'   analysis (NEST Team, 2020).
#'
#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant
#'   helper functions, and [tidy_coxreg] for custom tidy methods.
#'
#' @examples
#' library(survival)
#'
#' # Testing dataset [survival::bladder].
#' set.seed(1, kind = "Mersenne-Twister")
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   tibble::tibble(
#'     TIME = stop,
#'     STATUS = event,
#'     ARM = as.factor(rx),
#'     COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"),
#'     COVAR2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     ) %>% formatters::with_label("Sex (F/M)")
#'   )
#' )
#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#' dta_bladder$STUDYID <- factor("X")
#'
#' plot(
#'   survfit(Surv(TIME, STATUS) ~ ARM + COVAR1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name cox_regression
NULL

#' @describeIn cox_regression Statistics function that transforms results tabulated
#'   from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list.
#'
#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg]
#'   function with tidying applied via [broom::tidy()].
#' @param .stats (`character`)\cr the name of statistics to be reported among:
#'   * `n`: number of observations (univariate only)
#'   * `hr`: hazard ratio
#'   * `ci`: confidence interval
#'   * `pval`: p-value of the treatment effect
#'   * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only)
#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model.
#'   Defaults to "all". Other options include "var_main" for main effects, "inter" for interaction effects,
#'   and "multi_lvl" for multivariate model covariate level rows. When `.which_vars` is "all" specific
#'   variables can be selected by specifying `.var_nms`.
#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically
#'   this is the name of a variable. If using variable labels, `var` should be a vector of both the desired
#'   variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars`
#'   is "var_main" `.var_nms` should be only the variable name.
#'
#' @return
#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s).
#'
#' @examples
#' # s_coxreg
#'
#' # Univariate
#' u1_variables <- list(
#'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
#' )
#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder)
#' df1 <- broom::tidy(univar_model)
#' s_coxreg(model_df = df1, .stats = "hr")
#'
#' # Univariate with interactions
#' univar_model_inter <- fit_coxreg_univar(
#'   variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder
#' )
#' df1_inter <- broom::tidy(univar_model_inter)
#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1")
#'
#' # Univariate without treatment arm - only "COVAR2" covariate effects
#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder)
#' df1_covs <- broom::tidy(univar_covs_model)
#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)"))
#'
#' # Multivariate.
#' m1_variables <- list(
#'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
#' )
#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder)
#' df2 <- broom::tidy(multivar_model)
#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1")
#' s_coxreg(
#'   model_df = df2, .stats = "pval", .which_vars = "multi_lvl",
#'   .var_nms = c("COVAR1", "A Covariate Label")
#' )
#'
#' # Multivariate without treatment arm - only "COVAR1" main effect
#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder)
#' df2_covs <- broom::tidy(multivar_covs_model)
#' s_coxreg(model_df = df2_covs, .stats = "hr")
#'
#' @export
s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) {
  assert_df_with_variables(model_df, list(term = "term", stat = .stats))
  checkmate::assert_multi_class(model_df$term, classes = c("factor", "character"))
  model_df$term <- as.character(model_df$term)
  .var_nms <- .var_nms[!is.na(.var_nms)]

  if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ]
  if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1)

  # We need a list with names corresponding to the stats to display of equal length to the list of stats.
  y <- split(model_df, f = model_df$term, drop = FALSE)
  y <- stats::setNames(y, nm = rep(.stats, length(y)))

  if (.which_vars == "var_main") {
    y <- lapply(y, function(x) x[1, ]) # only main effect
  } else if (.which_vars %in% c("inter", "multi_lvl")) {
    y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect
  }

  lapply(
    X = y,
    FUN = function(x) {
      z <- as.list(x[[.stats]])
      stats::setNames(z, nm = x$term_label)
    }
  )
}

#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()]
#'   and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`.
#'
#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`.
#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`.
#' @param na_level (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`.
#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to
#'   avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching).
#'
#' @return
#' * `a_coxreg()` returns formatted [rtables::CellValue()].
#'
#' @examples
#' tern:::a_coxreg(
#'   df = dta_bladder,
#'   labelstr = "Label 1",
#'   variables = u1_variables,
#'   .spl_context = list(value = "COVAR1"),
#'   .stats = "n",
#'   .formats = "xx"
#' )
#'
#' tern:::a_coxreg(
#'   df = dta_bladder,
#'   labelstr = "",
#'   variables = u1_variables,
#'   .spl_context = list(value = "COVAR2"),
#'   .stats = "pval",
#'   .formats = "xx.xxxx"
#' )
#'
#' @keywords internal
a_coxreg <- function(df,
                     labelstr,
                     eff = FALSE,
                     var_main = FALSE,
                     multivar = FALSE,
                     variables,
                     at = list(),
                     control = control_coxreg(),
                     .spl_context,
                     .stats,
                     .formats,
                     na_level = "",
                     cache_env = NULL) {
  cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm
  cov <- tail(.spl_context$value, 1) # current variable/covariate
  var_lbl <- formatters::var_labels(df)[cov] # check for df labels
  if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) labelstr <- var_lbl # use df labels if none
  if (eff || multivar || cov_no_arm) {
    control$interaction <- FALSE
  } else {
    variables$covariates <- cov
    if (var_main) control$interaction <- TRUE
  }

  if (is.null(cache_env[[cov]])) {
    if (!multivar) {
      model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy()
    } else {
      model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy()
    }
    cache_env[[cov]] <- model
  } else {
    model <- cache_env[[cov]]
  }
  if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_

  if (cov_no_arm || (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) {
    multivar <- TRUE
    if (!cov_no_arm) var_main <- TRUE
  }

  vars_coxreg <- list(which_vars = "all", var_nms = NULL)
  if (eff) {
    if (multivar && !var_main) { # multivar treatment level
      var_lbl_arm <- formatters::var_labels(df)[[variables$arm]]
      vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl")
    } else { # treatment effect
      vars_coxreg["var_nms"] <- variables$arm
      if (var_main) vars_coxreg["which_vars"] <- "var_main"
    }
  } else {
    if (!multivar || (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level
      vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main")
    } else if (multivar) { # multivar covariate level
      vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl")
      if (var_main) model[cov, .stats] <- NA_real_
    }
    if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect
  }
  var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]]
  var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) {
    paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels
  } else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) ||
    (multivar && var_main && is.numeric(df[[cov]]))) {
    labelstr # other main effect labels
  } else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) {
    "All" # multivar numeric covariate
  } else {
    names(var_vals)
  }
  in_rows(
    .list = var_vals, .names = var_names, .labels = var_names,
    .formats = stats::setNames(rep(.formats, length(var_names)), var_names),
    .format_na_strs = stats::setNames(rep(na_level, length(var_names)), var_names)
  )
}

#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table
#'   layout. This function is a wrapper for several `rtables` layouting functions. This function
#'   is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()].
#'
#' @inheritParams fit_coxreg_univar
#' @param multivar (`flag`)\cr Defaults to `FALSE`. If `TRUE` multivariate Cox regression will run, otherwise
#'   univariate Cox regression will run.
#' @param common_var (`character`)\cr the name of a factor variable in the dataset which takes the same value
#'   for all rows. This should be created during pre-processing if no such variable currently exists.
#' @param .section_div (`character`)\cr string which should be repeated as a section divider between sections.
#'   Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between
#'   treatment and covariate sections and the second between different covariates.
#'
#' @return
#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table
#'   containing the chosen statistics to the table layout.
#'
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`,
#'   `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate
#'   Cox regression models, respectively.
#'
#' @examples
#' # summarize_coxreg
#'
#' result_univar <- basic_table() %>%
#'   summarize_coxreg(variables = u1_variables) %>%
#'   build_table(dta_bladder)
#' result_univar
#'
#' result_multivar <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = m1_variables,
#'     multivar = TRUE,
#'   ) %>%
#'   build_table(dta_bladder)
#' result_multivar
#'
#' result_univar_covs <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = u2_variables,
#'   ) %>%
#'   build_table(dta_bladder)
#' result_univar_covs
#'
#' result_multivar_covs <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = m2_variables,
#'     multivar = TRUE,
#'     varlabels = c("Covariate 1", "Covariate 2") # custom labels
#'   ) %>%
#'   build_table(dta_bladder)
#' result_multivar_covs
#'
#' @export
summarize_coxreg <- function(lyt,
                             variables,
                             control = control_coxreg(),
                             at = list(),
                             multivar = FALSE,
                             common_var = "STUDYID",
                             .stats = c("n", "hr", "ci", "pval", "pval_inter"),
                             .formats = c(
                               n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)",
                               pval = "x.xxxx | (<0.0001)", pval_inter = "x.xxxx | (<0.0001)"
                             ),
                             varlabels = NULL,
                             .indent_mods = NULL,
                             na_level = "",
                             .section_div = NA_character_) {
  if (multivar && control$interaction) {
    warning(paste(
      "Interactions are not available for multivariate cox regression using summarize_coxreg.",
      "The model will be calculated without interaction effects."
    ))
  }
  if (control$interaction && !"arm" %in% names(variables)) {
    stop("To include interactions please specify 'arm' in variables.")
  }

  .stats <- if (!"arm" %in% names(variables) || multivar) { # only valid statistics
    intersect(c("hr", "ci", "pval"), .stats)
  } else if (control$interaction) {
    intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats)
  } else {
    intersect(c("n", "hr", "ci", "pval"), .stats)
  }
  stat_labels <- c(
    n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"),
    pval = "p-value", pval_inter = "Interaction p-value"
  )
  stat_labels <- stat_labels[names(stat_labels) %in% .stats]
  .formats <- .formats[names(.formats) %in% .stats]
  env <- new.env() # create caching environment

  lyt <- lyt %>%
    split_cols_by_multivar(
      vars = rep(common_var, length(.stats)),
      varlabels = stat_labels,
      extra_args = list(
        .stats = .stats, .formats = .formats, na_level = rep(na_level, length(.stats)),
        cache_env = replicate(length(.stats), list(env))
      )
    )

  if ("arm" %in% names(variables)) { # treatment effect
    lyt <- lyt %>%
      split_rows_by(
        common_var,
        split_label = "Treatment:",
        label_pos = "visible",
        section_div = head(.section_div, 1)
      ) %>%
      summarize_row_groups(
        cfun = a_coxreg,
        extra_args = list(
          variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar
        )
      )
    if (multivar) { # treatment level effects
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar)
        )
    }
  }

  if ("covariates" %in% names(variables)) { # covariate main effects
    lyt <- lyt %>%
      split_rows_by_multivar(
        vars = variables$covariates,
        varlabels = varlabels,
        split_label = "Covariate:",
        nested = FALSE,
        section_div = tail(.section_div, 1)
      ) %>%
      summarize_row_groups(
        cfun = a_coxreg,
        extra_args = list(
          variables = variables, at = at, control = control, multivar = multivar,
          var_main = if (multivar) multivar else control$interaction
        )
      )
    if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm
    if (multivar || control$interaction) { # covariate level effects
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = "")
        )
    }
  }

  lyt
}

#' Pairwise Formula Special Term
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' The special term `pairwise` indicate that the model should be fitted individually for
#' every tested level in comparison to the reference level.
#'
#' @param x the variable for which pairwise result is expected.
#'
#' @return Variable "paired".
#'
#' @details Let's `ARM` being a factor with level A, B, C; let's be B the reference level,
#'   a model calling the formula including `pairwise(ARM)` will result in two models:
#'   * A model including only levels A and B, and effect of A estimated in reference to B.
#'   * A model including only levels C and B, the effect of C estimated in reference to B.
#'
#' @export
pairwise <- function(x) {
  lifecycle::deprecate_warn("0.8.1.9013", "pairwise()", "univariate()")
  structure(x, varname = deparse(substitute(x)))
}

#' Univariate Formula Special Term
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The special term `univariate` indicate that the model should be fitted individually for
#' every variable included in univariate.
#'
#' @param x A vector of variable name separated by commas.
#'
#' @return When used within a model formula, produces univariate models for each variable provided.
#'
#' @details
#' If provided alongside with pairwise specification, the model
#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models
#' + `y ~ ARM`
#' + `y ~ ARM + SEX`
#' + `y ~ ARM + AGE`
#' + `y ~ ARM + RACE`
#'
#' @export
univariate <- function(x) {
  structure(x, varname = deparse(substitute(x)))
}

# Get the right-hand-term of a formula
rht <- function(x) {
  checkmate::assert_formula(x)
  y <- as.character(rev(x)[[1]])
  return(y)
}

#' Hazard Ratio Estimation in Interactions
#'
#' This function estimates the hazard ratios between arms when an interaction variable is given with
#' specific values.
#'
#' @param variable,given Names of two variable in interaction. We seek the estimation of the levels of `variable`
#'   given the levels of `given`.
#' @param lvl_var,lvl_given corresponding levels has given by `levels`.
#' @param mmat A name numeric filled with 0 used as template to obtain the design matrix.
#' @param coef Numeric of estimated coefficients.
#' @param vcov Variance-covariance matrix of underlying model.
#' @param conf_level Single numeric for the confidence level of estimate intervals.
#'
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
#'   and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex.
#'   The cox regression estimates the coefficients along with a variance-covariance matrix for:
#'
#'   - b1 (arm b), b2 (arm c)
#'   - b3 (sex m)
#'   - b4 (arm b: sex m), b5 (arm c: sex m)
#'
#'   Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation
#'   will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5),
#'   therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained
#'   as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95.
#'
#' @return A list of matrix (one per level of variable) with rows corresponding to the combinations of
#'   `variable` and `given`, with columns:
#'   * `coef_hat`: Estimation of the coefficient.
#'   * `coef_se`: Standard error of the estimation.
#'   * `hr`: Hazard ratio.
#'   * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
#'
#' @seealso [s_cox_multivariate()].
#'
#' @examples
#' library(dplyr)
#' library(survival)
#'
#' ADSL <- tern_ex_adsl %>%
#'   filter(SEX %in% c("F", "M"))
#'
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS")
#' adtte$ARMCD <- droplevels(adtte$ARMCD)
#' adtte$SEX <- droplevels(adtte$SEX)
#'
#' mod <- coxph(
#'   formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2,
#'   data = adtte
#' )
#'
#' mmat <- stats::model.matrix(mod)[1, ]
#' mmat[!mmat == 0] <- 0
#'
#' # Internal function - estimate_coef
#' \dontrun{
#' estimate_coef(
#'   variable = "ARMCD", given = "SEX", lvl_var = "ARM A", lvl_given = "M",
#'   coef = stats::coef(mod), mmat = mmat, vcov = stats::vcov(mod), conf_level = .95
#' )
#' }
#'
#' @keywords internal
estimate_coef <- function(variable, given,
                          lvl_var, lvl_given,
                          coef,
                          mmat,
                          vcov,
                          conf_level = 0.95) {
  var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
  giv_lvl <- paste0(given, lvl_given)

  design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
  design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
  design_mat <- within(
    data = design_mat,
    expr = {
      inter <- paste0(variable, ":", given)
      rev_inter <- paste0(given, ":", variable)
    }
  )

  split_by_variable <- design_mat$variable
  interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")

  design_mat <- apply(
    X = design_mat, MARGIN = 1, FUN = function(x) {
      mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
      return(mmat)
    }
  )
  colnames(design_mat) <- interaction_names

  betas <- as.matrix(coef)

  coef_hat <- t(design_mat) %*% betas
  dimnames(coef_hat)[2] <- "coef"

  coef_se <- apply(design_mat, 2, function(x) {
    vcov_el <- as.logical(x)
    y <- vcov[vcov_el, vcov_el]
    y <- sum(y)
    y <- sqrt(y)
    return(y)
  })

  q_norm <- stats::qnorm((1 + conf_level) / 2)
  y <- cbind(coef_hat, `se(coef)` = coef_se)

  y <- apply(y, 1, function(x) {
    x["hr"] <- exp(x["coef"])
    x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
    x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])

    return(x)
  })

  y <- t(y)
  y <- by(y, split_by_variable, identity)
  y <- lapply(y, as.matrix)

  attr(y, "details") <- paste0(
    "Estimations of ", variable,
    " hazard ratio given the level of ", given, " compared to ",
    variable, " level ", lvl_var[1], "."
  )
  return(y)
}

#' `tryCatch` around `car::Anova`
#'
#' Captures warnings when executing [car::Anova].
#'
#' @inheritParams car::Anova
#'
#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings.
#'
#' @examples
#' # `car::Anova` on cox regression model including strata and expected
#' # a likelihood ratio test triggers a warning as only Wald method is
#' # accepted.
#'
#' library(survival)
#'
#' mod <- coxph(
#'   formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps),
#'   data = ovarian
#' )
#'
#' # Internal function - try_car_anova
#' \dontrun{
#' with_wald <- try_car_anova(mod = mod, test.statistic = "Wald")
#' with_lr <- try_car_anova(mod = mod, test.statistic = "LR")
#' }
#'
#' @keywords internal
try_car_anova <- function(mod,
                          test.statistic) { # nolint
  y <- tryCatch(
    withCallingHandlers(
      expr = {
        warn_text <- c()
        list(
          aov = car::Anova(
            mod,
            test.statistic = test.statistic,
            type = "III"
          ),
          warn_text = warn_text
        )
      },
      warning = function(w) {
        # If a warning is detected it is handled as "w".
        warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w))

        # A warning is sometimes expected, then, we want to restart
        # the execution while ignoring the warning.
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )

  return(y)
}

#' Fit the Cox Regression Model and Anova
#'
#' The functions allows to derive from the [survival::coxph()] results the effect p.values using [car::Anova()].
#' This last package introduces more flexibility to get the effect p.values.
#'
#' @inheritParams t_coxreg
#'
#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and
#'   `aov` (result of [car::Anova()]).
#'
#' @noRd
fit_n_aov <- function(formula,
                      data = data,
                      conf_level = conf_level,
                      pval_method = c("wald", "likelihood"),
                      ...) {
  pval_method <- match.arg(pval_method)

  environment(formula) <- environment()
  suppressWarnings({
    # We expect some warnings due to coxph which fails strict programming.
    mod <- survival::coxph(formula, data = data, ...)
    msum <- summary(mod, conf.int = conf_level)
  })

  aov <- try_car_anova(
    mod,
    test.statistic = switch(pval_method,
      "wald" = "Wald",
      "likelihood" = "LR"
    )
  )

  warn_attr <- aov$warn_text
  if (!is.null(aov$warn_text)) message(warn_attr)

  aov <- aov$aov
  y <- list(mod = mod, msum = msum, aov = aov)
  attr(y, "message") <- warn_attr

  return(y)
}

# argument_checks
check_formula <- function(formula) {
  if (!(inherits(formula, "formula"))) {
    stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.")
  }

  invisible()
}

check_covariate_formulas <- function(covariates) {
  if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) || is.null(covariates)) {
    stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).")
  }

  invisible()
}

name_covariate_names <- function(covariates) {
  miss_names <- names(covariates) == ""
  no_names <- is.null(names(covariates))
  if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name")
  if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
  return(covariates)
}

check_increments <- function(increments, covariates) {
  if (!is.null(increments)) {
    covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
    lapply(
      X = names(increments), FUN = function(x) {
        if (!x %in% covariates) {
          warning(
            paste(
              "Check `increments`, the `increment` for ", x,
              "doesn't match any names in investigated covariate(s)."
            )
          )
        }
      }
    )
  }

  invisible()
}

#' Multivariate Cox Model - Summarized Results
#'
#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or
#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually
#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the
#' covariates included in the model.
#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the
#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis,
#' the p.values need to be interpreted with caution. (**Statistical Analysis of Clinical Trials Data with R**,
#' `NEST's bookdown`)
#'
#' @param formula (`formula`)\cr A formula corresponding to the investigated [survival::Surv()] survival model
#'   including covariates.
#' @param data (`data.frame`)\cr A data frame which includes the variable in formula and covariates.
#' @param conf_level (`proportion`)\cr The confidence level for the hazard ratio interval estimations. Default is 0.95.
#' @param pval_method (`character`)\cr The method used for the estimation of p-values, should be one of
#'   "wald" (default) or "likelihood".
#' @param ... Optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the
#'   method for tie handling, one of `exact` (default), `efron`, `breslow`.
#'
#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`.
#'
#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms
#'   but is out of scope as defined by the  Global Data Standards Repository
#'   (**`GDS_Standard_TLG_Specs_Tables_2.doc`**).
#'
#' @seealso [estimate_coef()].
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered
#' adtte_f <- filter(
#'   adtte_f,
#'   PARAMCD == "OS" &
#'     SEX %in% c("F", "M") &
#'     RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
#' )
#' adtte_f$SEX <- droplevels(adtte_f$SEX)
#' adtte_f$RACE <- droplevels(adtte_f$RACE)
#'
#' # Internal function - s_cox_multivariate
#' \dontrun{
#' s_cox_multivariate(
#'   formula = Surv(time = AVAL, event = 1 - CNSR) ~ (ARMCD + RACE + AGE)^2, data = adtte_f
#' )
#' }
#'
#' @keywords internal
s_cox_multivariate <- function(formula, data,
                               conf_level = 0.95,
                               pval_method = c("wald", "likelihood"),
                               ...) {
  tf <- stats::terms(formula, specials = c("strata"))
  covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))]
  lapply(
    X = covariates,
    FUN = function(x) {
      if (is.character(data[[x]])) {
        data[[x]] <<- as.factor(data[[x]])
      }
      invisible()
    }
  )
  pval_method <- match.arg(pval_method)

  # Results directly exported from environment(fit_n_aov) to environment(s_function_draft)
  y <- fit_n_aov(
    formula = formula,
    data = data,
    conf_level = conf_level,
    pval_method = pval_method,
    ...
  )
  mod <- y$mod
  aov <- y$aov
  msum <- y$msum
  list2env(as.list(y), environment())

  all_term_labs <- attr(mod$terms, "term.labels")
  term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)]
  names(term_labs) <- term_labs

  coef_inter <- NULL
  if (any(attr(mod$terms, "order") > 1)) {
    for_inter <- all_term_labs[attr(mod$terms, "order") > 1]
    names(for_inter) <- for_inter
    mmat <- stats::model.matrix(mod)[1, ]
    mmat[!mmat == 0] <- 0
    mcoef <- stats::coef(mod)
    mvcov <- stats::vcov(mod)

    estimate_coef_local <- function(variable, given) {
      estimate_coef(
        variable, given,
        coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level,
        lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]])
      )
    }

    coef_inter <- lapply(
      for_inter, function(x) {
        y <- attr(mod$terms, "factor")[, x]
        y <- names(y[y > 0])
        Map(estimate_coef_local, variable = y, given = rev(y))
      }
    )
  }

  list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter)
}

#' Confidence Intervals for a Difference of Binomials
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Several confidence intervals for the difference between proportions.
#'
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
#'   (e.g. reference and treatment group).
#'
#' @name desctools_binom
NULL

#' Recycle List of Parameters
#'
#' This function recycles all supplied elements to the maximal dimension.
#'
#' @param ... (`any`)\cr Elements to recycle.
#'
#' @return A `list`.
#'
#' @keywords internal
#' @noRd
h_recycle <- function(...) {
  lst <- list(...)
  maxdim <- max(lengths(lst))
  res <- lapply(lst, rep, length.out = maxdim)
  attr(res, "maxdim") <- maxdim
  return(res)
}

#' @describeIn desctools_binom Several confidence intervals for the difference between proportions.
#'
#' @return A `matrix` of 3 values:
#'   * `est`: estimate of proportion difference.
#'   * `lwr.ci`: estimate of lower end of the confidence interval.
#'   * `upr.ci`: estimate of upper end of the confidence interval.
#'
#' @examples
#' # Internal function - desctools_binom
#' \dontrun{
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
#' grp <- factor(c(rep("A", 10), rep("B", 10)))
#' tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
#' desctools_binom(
#'   tbl[1], sum(tbl[1], tbl[3]), tbl[2], sum(tbl[2], tbl[4]),
#'   conf.level = 0.90, method = "waldcc"
#' )
#' }
#'
#' @keywords internal
desctools_binom <- function(x1, n1, x2, n2, conf.level = 0.95, sides = c( # nolint
                              "two.sided",
                              "left", "right"
                            ), method = c(
                              "ac", "wald", "waldcc", "score",
                              "scorecc", "mn", "mee", "blj", "ha", "hal", "jp"
                            )) {
  if (missing(sides)) {
    sides <- match.arg(sides)
  }
  if (missing(method)) {
    method <- match.arg(method)
  }
  iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, # nolint
                           method) {
    if (sides != "two.sided") {
      conf.level <- 1 - 2 * (1 - conf.level) # nolint
    }
    alpha <- 1 - conf.level
    kappa <- stats::qnorm(1 - alpha / 2)
    p1_hat <- x1 / n1
    p2_hat <- x2 / n2
    est <- p1_hat - p2_hat
    switch(method,
      wald = {
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      waldcc = {
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        term2 <- term2 + 0.5 * (1 / n1 + 1 / n2)
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      ac = {
        n1 <- n1 + 2
        n2 <- n2 + 2
        x1 <- x1 + 1
        x2 <- x2 + 1
        p1_hat <- x1 / n1
        p2_hat <- x2 / n2
        est1 <- p1_hat - p2_hat
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        ci_lwr <- max(-1, est1 - term2)
        ci_upr <- min(1, est1 + term2)
      },
      exact = {
        ci_lwr <- NA
        ci_upr <- NA
      },
      score = {
        w1 <- desctools_binomci(
          x = x1, n = n1, conf.level = conf.level,
          method = "wilson"
        )
        w2 <- desctools_binomci(
          x = x2, n = n2, conf.level = conf.level,
          method = "wilson"
        )
        l1 <- w1[2]
        u1 <- w1[3]
        l2 <- w2[2]
        u2 <- w2[3]
        ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 +
          u2 * (1 - u2) / n2)
        ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 +
          l2 * (1 - l2) / n2)
      },
      scorecc = {
        w1 <- desctools_binomci(
          x = x1, n = n1, conf.level = conf.level,
          method = "wilsoncc"
        )
        w2 <- desctools_binomci(
          x = x2, n = n2, conf.level = conf.level,
          method = "wilsoncc"
        )
        l1 <- w1[2]
        u1 <- w1[3]
        l2 <- w2[2]
        u2 <- w2[3]
        ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 +
          (u2 - p2_hat)^2))
        ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat -
          l2)^2))
      },
      mee = {
        .score <- function(p1, n1, p2, n2, dif) {
          if (dif > 1) dif <- 1
          if (dif < -1) dif <- -1
          diff <- p1 - p2 - dif
          if (abs(diff) == 0) {
            res <- 0
          } else {
            t <- n2 / n1
            a <- 1 + t
            b <- -(1 + t + p1 + t * p2 + dif * (t + 2))
            c <- dif * dif + dif * (2 * p1 + t + 1) + p1 +
              t * p2
            d <- -p1 * dif * (1 + dif)
            v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2
            if (abs(v) < .Machine$double.eps) v <- 0
            s <- sqrt((b / a / 3)^2 - c / a / 3)
            u <- ifelse(v > 0, 1, -1) * s
            w <- (3.141592654 + acos(v / u^3)) / 3
            p1d <- 2 * u * cos(w) - b / a / 3
            p2d <- p1d - dif
            n <- n1 + n2
            res <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2)
          }
          return(sqrt(res))
        }
        pval <- function(delta) {
          z <- (est - delta) / .score(
            p1_hat, n1, p2_hat,
            n2, delta
          )
          2 * min(stats::pnorm(z), 1 - stats::pnorm(z))
        }
        ci_lwr <- max(-1, stats::uniroot(function(delta) {
          pval(delta) -
            alpha
        }, interval = c(-1 + 1e-06, est - 1e-06))$root)
        ci_upr <- min(1, stats::uniroot(function(delta) {
          pval(delta) -
            alpha
        }, interval = c(est + 1e-06, 1 - 1e-06))$root)
      },
      blj = {
        p1_dash <- (x1 + 0.5) / (n1 + 1)
        p2_dash <- (x2 + 0.5) / (n2 + 1)
        vd <- p1_dash * (1 - p1_dash) / n1 + p2_dash * (1 -
          p2_dash) / n2
        term2 <- kappa * sqrt(vd)
        est_dash <- p1_dash - p2_dash
        ci_lwr <- max(-1, est_dash - term2)
        ci_upr <- min(1, est_dash + term2)
      },
      ha = {
        term2 <- 1 / (2 * min(n1, n2)) + kappa * sqrt(p1_hat *
          (1 - p1_hat) / (n1 - 1) + p2_hat * (1 - p2_hat) / (n2 -
            1))
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      mn = {
        .conf <- function(x1, n1, x2, n2, z, lower = FALSE) {
          p1 <- x1 / n1
          p2 <- x2 / n2
          p_hat <- p1 - p2
          dp <- 1 + ifelse(lower, 1, -1) * p_hat
          i <- 1
          while (i <= 50) {
            dp <- 0.5 * dp
            y <- p_hat + ifelse(lower, -1, 1) * dp
            score <- .score(p1, n1, p2, n2, y)
            if (score < z) {
              p_hat <- y
            }
            if ((dp < 1e-07) || (abs(z - score) < 1e-06)) {
              (break)()
            } else {
              i <- i +
                1
            }
          }
          return(y)
        }
        .score <- function(p1, n1, p2, n2, dif) {
          diff <- p1 - p2 - dif
          if (abs(diff) == 0) {
            res <- 0
          } else {
            t <- n2 / n1
            a <- 1 + t
            b <- -(1 + t + p1 + t * p2 + dif * (t + 2))
            c <- dif * dif + dif * (2 * p1 + t + 1) + p1 +
              t * p2
            d <- -p1 * dif * (1 + dif)
            v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2
            s <- sqrt((b / a / 3)^2 - c / a / 3)
            u <- ifelse(v > 0, 1, -1) * s
            w <- (3.141592654 + acos(v / u^3)) / 3
            p1d <- 2 * u * cos(w) - b / a / 3
            p2d <- p1d - dif
            n <- n1 + n2
            var <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) *
              n / (n - 1)
            res <- diff^2 / var
          }
          return(res)
        }
        z <- stats::qchisq(conf.level, 1)
        ci_lwr <- max(-1, .conf(x1, n1, x2, n2, z, TRUE))
        ci_upr <- min(1, .conf(x1, n1, x2, n2, z, FALSE))
      },
      beal = {
        a <- p1_hat + p2_hat
        b <- p1_hat - p2_hat
        u <- ((1 / n1) + (1 / n2)) / 4
        v <- ((1 / n1) - (1 / n2)) / 4
        V <- u * ((2 - a) * a - b^2) + 2 * v * (1 - a) * b # nolint
        z <- stats::qchisq(p = 1 - alpha / 2, df = 1)
        A <- sqrt(z * (V + z * u^2 * (2 - a) * a + z * v^2 * (1 - a)^2)) # nolint
        B <- (b + z * v * (1 - a)) / (1 + z * u) # nolint
        ci_lwr <- max(-1, B - A / (1 + z * u))
        ci_upr <- min(1, B + A / (1 + z * u))
      },
      hal = {
        psi <- (p1_hat + p2_hat) / 2
        u <- (1 / n1 + 1 / n2) / 4
        v <- (1 / n1 - 1 / n2) / 4
        z <- kappa
        theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 *
          psi)) / (1 + z^2 * u)
        w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) -
          (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) *
          (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) *
          psi + z^2 * v^2 * (1 - 2 * psi)^2)
        c(theta + w, theta - w)
        ci_lwr <- max(-1, theta - w)
        ci_upr <- min(1, theta + w)
      },
      jp = {
        psi <- 0.5 * ((x1 + 0.5) / (n1 + 1) + (x2 + 0.5) / (n2 +
          1))
        u <- (1 / n1 + 1 / n2) / 4
        v <- (1 / n1 - 1 / n2) / 4
        z <- kappa
        theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 *
          psi)) / (1 + z^2 * u)
        w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) -
          (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) *
          (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) *
          psi + z^2 * v^2 * (1 - 2 * psi)^2)
        c(theta + w, theta - w)
        ci_lwr <- max(-1, theta - w)
        ci_upr <- min(1, theta + w)
      },
    )
    ci <- c(
      est = est, lwr.ci = min(ci_lwr, ci_upr),
      upr.ci = max(ci_lwr, ci_upr)
    )
    if (sides == "left") {
      ci[3] <- 1
    } else if (sides == "right") {
      ci[2] <- -1
    }
    return(ci)
  }
  method <- match.arg(arg = method, several.ok = TRUE)
  sides <- match.arg(arg = sides, several.ok = TRUE)
  lst <- h_recycle(
    x1 = x1, n1 = n1, x2 = x2, n2 = n2, conf.level = conf.level,
    sides = sides, method = method
  )
  res <- t(sapply(1:attr(lst, "maxdim"), function(i) {
    iBinomDiffCI(
      x1 = lst$x1[i],
      n1 = lst$n1[i], x2 = lst$x2[i], n2 = lst$n2[i], conf.level = lst$conf.level[i],
      sides = lst$sides[i], method = lst$method[i]
    )
  }))
  lgn <- h_recycle(x1 = if (is.null(names(x1))) {
    paste("x1", seq_along(x1), sep = ".")
  } else {
    names(x1)
  }, n1 = if (is.null(names(n1))) {
    paste("n1", seq_along(n1), sep = ".")
  } else {
    names(n1)
  }, x2 = if (is.null(names(x2))) {
    paste("x2", seq_along(x2), sep = ".")
  } else {
    names(x2)
  }, n2 = if (is.null(names(n2))) {
    paste("n2", seq_along(n2), sep = ".")
  } else {
    names(n2)
  }, conf.level = conf.level, sides = sides, method = method)
  xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
    length(unique(x)) !=
      1
  })]), 1, paste, collapse = ":")
  rownames(res) <- xn
  return(res)
}

#' @describeIn desctools_binom Compute confidence intervals for binomial proportions.
#'
#' @param x (`count`)\cr number of successes
#' @param n (`count`)\cr number of trials
#' @param conf.level (`proportion`)\cr confidence level, defaults to 0.95.
#' @param sides (`character`)\cr side of the confidence interval to compute. Must be one of "two-sided" (default),
#'   "left", or "right".
#' @param method (`character`)\cr method to use. Can be one out of: "wald", "wilson", "wilsoncc", "agresti-coull",
#'   "jeffreys", "modified wilson", "modified jeffreys", "clopper-pearson", "arcsine", "logit", "witting", "pratt",
#'   "midp", "lik", and "blaker".
#'
#' @return A `matrix` with 3 columns containing:
#'   * `est`: estimate of proportion difference.
#'   * `lwr.ci`: lower end of the confidence interval.
#'   * `upr.ci`: upper end of the confidence interval.
#'
#' @keywords internal
desctools_binomci <- function(x,
                              n,
                              conf.level = 0.95, # nolint
                              sides = c("two.sided", "left", "right"),
                              method = c(
                                "wilson", "wald", "waldcc", "agresti-coull",
                                "jeffreys", "modified wilson", "wilsoncc", "modified jeffreys",
                                "clopper-pearson", "arcsine", "logit", "witting", "pratt",
                                "midp", "lik", "blaker"
                              ),
                              rand = 123,
                              tol = 1e-05) {
  if (missing(method)) {
    method <- "wilson"
  }
  if (missing(sides)) {
    sides <- "two.sided"
  }
  iBinomCI <- function(x, n, conf.level = 0.95, sides = c( # nolint
                         "two.sided",
                         "left", "right"
                       ), method = c(
                         "wilson", "wilsoncc", "wald",
                         "waldcc", "agresti-coull", "jeffreys", "modified wilson",
                         "modified jeffreys", "clopper-pearson", "arcsine", "logit",
                         "witting", "pratt", "midp", "lik", "blaker"
                       ), rand = 123,
                       tol = 1e-05) {
    if (length(x) != 1) {
      stop("'x' has to be of length 1 (number of successes)")
    }
    if (length(n) != 1) {
      stop("'n' has to be of length 1 (number of trials)")
    }
    if (length(conf.level) != 1) {
      stop("'conf.level' has to be of length 1 (confidence level)")
    }
    if (conf.level < 0.5 || conf.level > 1) {
      stop("'conf.level' has to be in [0.5, 1]")
    }
    sides <- match.arg(sides, choices = c(
      "two.sided", "left",
      "right"
    ), several.ok = FALSE)
    if (sides != "two.sided") {
      conf.level <- 1 - 2 * (1 - conf.level) # nolint
    }
    alpha <- 1 - conf.level
    kappa <- stats::qnorm(1 - alpha / 2)
    p_hat <- x / n
    q_hat <- 1 - p_hat
    est <- p_hat
    switch(match.arg(arg = method, choices = c(
      "wilson",
      "wald", "waldcc", "wilsoncc", "agresti-coull", "jeffreys",
      "modified wilson", "modified jeffreys", "clopper-pearson",
      "arcsine", "logit", "witting", "pratt", "midp", "lik",
      "blaker"
    )),
    wald = {
      term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n)
      ci_lwr <- max(0, p_hat - term2)
      ci_upr <- min(1, p_hat + term2)
    },
    waldcc = {
      term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n)
      term2 <- term2 + 1 / (2 * n)
      ci_lwr <- max(0, p_hat - term2)
      ci_upr <- min(1, p_hat + term2)
    },
    wilson = {
      term1 <- (x + kappa^2 / 2) / (n + kappa^2)
      term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat *
        q_hat + kappa^2 / (4 * n))
      ci_lwr <- max(0, term1 - term2)
      ci_upr <- min(1, term1 + term2)
    },
    wilsoncc = {
      lci <- (2 * x + kappa^2 - 1 - kappa * sqrt(kappa^2 -
        2 - 1 / n + 4 * p_hat * (n * q_hat + 1))) / (2 *
        (n + kappa^2))
      uci <- (2 * x + kappa^2 + 1 + kappa * sqrt(kappa^2 +
        2 - 1 / n + 4 * p_hat * (n * q_hat - 1))) / (2 *
        (n + kappa^2))
      ci_lwr <- max(0, ifelse(p_hat == 0, 0, lci))
      ci_upr <- min(1, ifelse(p_hat == 1, 1, uci))
    },
    `agresti-coull` = {
      x_tilde <- x + kappa^2 / 2
      n_tilde <- n + kappa^2
      p_tilde <- x_tilde / n_tilde
      q_tilde <- 1 - p_tilde
      est <- p_tilde
      term2 <- kappa * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
      ci_lwr <- max(0, p_tilde - term2)
      ci_upr <- min(1, p_tilde + term2)
    },
    jeffreys = {
      if (x == 0) {
        ci_lwr <- 0
      } else {
        ci_lwr <- stats::qbeta(
          alpha / 2,
          x + 0.5, n - x + 0.5
        )
      }
      if (x == n) {
        ci_upr <- 1
      } else {
        ci_upr <- stats::qbeta(1 -
          alpha / 2, x + 0.5, n - x + 0.5)
      }
    },
    `modified wilson` = {
      term1 <- (x + kappa^2 / 2) / (n + kappa^2)
      term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat *
        q_hat + kappa^2 / (4 * n))
      if ((n <= 50 & x %in% c(1, 2)) | (n >= 51 & x %in%
        c(1:3))) {
        ci_lwr <- 0.5 * stats::qchisq(alpha, 2 *
          x) / n
      } else {
        ci_lwr <- max(0, term1 - term2)
      }
      if ((n <= 50 & x %in% c(n - 1, n - 2)) | (n >= 51 &
        x %in% c(n - (1:3)))) {
        ci_upr <- 1 - 0.5 * stats::qchisq(
          alpha,
          2 * (n - x)
        ) / n
      } else {
        ci_upr <- min(1, term1 +
          term2)
      }
    },
    `modified jeffreys` = {
      if (x == n) {
        ci_lwr <- (alpha / 2)^(1 / n)
      } else {
        if (x <= 1) {
          ci_lwr <- 0
        } else {
          ci_lwr <- stats::qbeta(
            alpha / 2,
            x + 0.5, n - x + 0.5
          )
        }
      }
      if (x == 0) {
        ci_upr <- 1 - (alpha / 2)^(1 / n)
      } else {
        if (x >= n - 1) {
          ci_upr <- 1
        } else {
          ci_upr <- stats::qbeta(1 -
            alpha / 2, x + 0.5, n - x + 0.5)
        }
      }
    },
    `clopper-pearson` = {
      ci_lwr <- stats::qbeta(alpha / 2, x, n - x + 1)
      ci_upr <- stats::qbeta(1 - alpha / 2, x + 1, n - x)
    },
    arcsine = {
      p_tilde <- (x + 0.375) / (n + 0.75)
      est <- p_tilde
      ci_lwr <- sin(asin(sqrt(p_tilde)) - 0.5 * kappa / sqrt(n))^2
      ci_upr <- sin(asin(sqrt(p_tilde)) + 0.5 * kappa / sqrt(n))^2
    },
    logit = {
      lambda_hat <- log(x / (n - x))
      V_hat <- n / (x * (n - x)) # nolint
      lambda_lower <- lambda_hat - kappa * sqrt(V_hat)
      lambda_upper <- lambda_hat + kappa * sqrt(V_hat)
      ci_lwr <- exp(lambda_lower) / (1 + exp(lambda_lower))
      ci_upr <- exp(lambda_upper) / (1 + exp(lambda_upper))
    },
    witting = {
      set.seed(rand)
      x_tilde <- x + stats::runif(1, min = 0, max = 1)
      pbinom_abscont <- function(q, size, prob) {
        v <- trunc(q)
        term1 <- stats::pbinom(v - 1, size = size, prob = prob)
        term2 <- (q - v) * stats::dbinom(v, size = size, prob = prob)
        return(term1 + term2)
      }
      qbinom_abscont <- function(p, size, x) {
        fun <- function(prob, size, x, p) {
          pbinom_abscont(x, size, prob) - p
        }
        stats::uniroot(fun,
          interval = c(0, 1), size = size,
          x = x, p = p
        )$root
      }
      ci_lwr <- qbinom_abscont(1 - alpha, size = n, x = x_tilde)
      ci_upr <- qbinom_abscont(alpha, size = n, x = x_tilde)
    },
    pratt = {
      if (x == 0) {
        ci_lwr <- 0
        ci_upr <- 1 - alpha^(1 / n)
      } else if (x == 1) {
        ci_lwr <- 1 - (1 - alpha / 2)^(1 / n)
        ci_upr <- 1 - (alpha / 2)^(1 / n)
      } else if (x == (n - 1)) {
        ci_lwr <- (alpha / 2)^(1 / n)
        ci_upr <- (1 - alpha / 2)^(1 / n)
      } else if (x == n) {
        ci_lwr <- alpha^(1 / n)
        ci_upr <- 1
      } else {
        z <- stats::qnorm(1 - alpha / 2)
        A <- ((x + 1) / (n - x))^2 # nolint
        B <- 81 * (x + 1) * (n - x) - 9 * n - 8 # nolint
        C <- (0 - 3) * z * sqrt(9 * (x + 1) * (n - x) * (9 * n + 5 - z^2) + n + 1) # nolint
        D <- 81 * (x + 1)^2 - 9 * (x + 1) * (2 + z^2) + 1 # nolint
        E <- 1 + A * ((B + C) / D)^3 # nolint
        ci_upr <- 1 / E
        A <- (x / (n - x - 1))^2 # nolint
        B <- 81 * x * (n - x - 1) - 9 * n - 8 # nolint
        C <- 3 * z * sqrt(9 * x * (n - x - 1) * (9 * n + 5 - z^2) + n + 1) # nolint
        D <- 81 * x^2 - 9 * x * (2 + z^2) + 1 # nolint
        E <- 1 + A * ((B + C) / D)^3 # nolint
        ci_lwr <- 1 / E
      }
    },
    midp = {
      f_low <- function(pi, x, n) {
        1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x,
          size = n, prob = pi, lower.tail = FALSE
        ) -
          (1 - conf.level) / 2
      }
      f_up <- function(pi, x, n) {
        1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x -
          1, size = n, prob = pi) - (1 - conf.level) / 2
      }
      ci_lwr <- 0
      ci_upr <- 1
      if (x != 0) {
        ci_lwr <- stats::uniroot(f_low,
          interval = c(0, p_hat),
          x = x, n = n
        )$root
      }
      if (x != n) {
        ci_upr <- stats::uniroot(f_up, interval = c(
          p_hat,
          1
        ), x = x, n = n)$root
      }
    },
    lik = {
      ci_lwr <- 0
      ci_upr <- 1
      z <- stats::qnorm(1 - alpha * 0.5)
      tol <- .Machine$double.eps^0.5
      BinDev <- function(y, x, mu, wt, bound = 0, tol = .Machine$double.eps^0.5, # nolint
                         ...) {
        ll_y <- ifelse(y %in% c(0, 1), 0, stats::dbinom(x, wt,
          y,
          log = TRUE
        ))
        ll_mu <- ifelse(mu %in% c(0, 1), 0, stats::dbinom(x,
          wt, mu,
          log = TRUE
        ))
        res <- ifelse(abs(y - mu) < tol, 0, sign(y -
          mu) * sqrt(-2 * (ll_y - ll_mu)))
        return(res - bound)
      }
      if (x != 0 && tol < p_hat) {
        ci_lwr <- if (BinDev(
          tol, x, p_hat, n, -z,
          tol
        ) <= 0) {
          stats::uniroot(
            f = BinDev, interval = c(tol, if (p_hat <
              tol || p_hat == 1) {
              1 - tol
            } else {
              p_hat
            }), bound = -z,
            x = x, mu = p_hat, wt = n
          )$root
        }
      }
      if (x != n && p_hat < (1 - tol)) {
        ci_upr <- if (BinDev(y = 1 - tol, x = x, mu = ifelse(p_hat >
          1 - tol, tol, p_hat), wt = n, bound = z, tol = tol) <
          0) {
          ci_lwr <- if (BinDev(
            tol, x, if (p_hat <
              tol || p_hat == 1) {
              1 - tol
            } else {
              p_hat
            }, n,
            -z, tol
          ) <= 0) {
            stats::uniroot(
              f = BinDev, interval = c(tol, p_hat),
              bound = -z, x = x, mu = p_hat, wt = n
            )$root
          }
        } else {
          stats::uniroot(
            f = BinDev, interval = c(if (p_hat >
              1 - tol) {
              tol
            } else {
              p_hat
            }, 1 - tol), bound = z,
            x = x, mu = p_hat, wt = n
          )$root
        }
      }
    },
    blaker = {
      acceptbin <- function(x, n, p) {
        p1 <- 1 - stats::pbinom(x - 1, n, p)
        p2 <- stats::pbinom(x, n, p)
        a1 <- p1 + stats::pbinom(stats::qbinom(p1, n, p) - 1, n, p)
        a2 <- p2 + 1 - stats::pbinom(
          stats::qbinom(1 - p2, n, p), n,
          p
        )
        return(min(a1, a2))
      }
      ci_lwr <- 0
      ci_upr <- 1
      if (x != 0) {
        ci_lwr <- stats::qbeta((1 - conf.level) / 2, x, n -
          x + 1)
        while (acceptbin(x, n, ci_lwr + tol) < (1 -
          conf.level)) {
          ci_lwr <- ci_lwr + tol
        }
      }
      if (x != n) {
        ci_upr <- stats::qbeta(1 - (1 - conf.level) / 2, x +
          1, n - x)
        while (acceptbin(x, n, ci_upr - tol) < (1 -
          conf.level)) {
          ci_upr <- ci_upr - tol
        }
      }
    }
    )
    ci <- c(est = est, lwr.ci = max(0, ci_lwr), upr.ci = min(
      1,
      ci_upr
    ))
    if (sides == "left") {
      ci[3] <- 1
    } else if (sides == "right") {
      ci[2] <- 0
    }
    return(ci)
  }
  lst <- list(
    x = x, n = n, conf.level = conf.level, sides = sides,
    method = method, rand = rand
  )
  maxdim <- max(unlist(lapply(lst, length)))
  lgp <- lapply(lst, rep, length.out = maxdim)
  lgn <- h_recycle(x = if (is.null(names(x))) {
    paste("x", seq_along(x), sep = ".")
  } else {
    names(x)
  }, n = if (is.null(names(n))) {
    paste("n", seq_along(n), sep = ".")
  } else {
    names(n)
  }, conf.level = conf.level, sides = sides, method = method)
  xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
    length(unique(x)) !=
      1
  })]), 1, paste, collapse = ":")
  res <- t(sapply(1:maxdim, function(i) {
    iBinomCI(
      x = lgp$x[i],
      n = lgp$n[i], conf.level = lgp$conf.level[i], sides = lgp$sides[i],
      method = lgp$method[i], rand = lgp$rand[i]
    )
  }))
  colnames(res)[1] <- c("est")
  rownames(res) <- xn
  return(res)
}

#' Create a Forest Plot based on a Table
#'
#' Create a forest plot from any [rtables::rtable()] object that has a
#' column with a single value and a column with 2 values.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param tbl (`rtable`)
#' @param col_x (`integer`)\cr column index with estimator. By default tries to get this from
#'   `tbl` attribute `col_x`, otherwise needs to be manually specified.
#' @param col_ci (`integer`)\cr column index with confidence intervals. By default tries
#'   to get this from `tbl` attribute `col_ci`, otherwise needs to be manually specified.
#' @param vline (`number`)\cr x coordinate for vertical line, if `NULL` then the line is omitted.
#' @param forest_header (`character`, length 2)\cr text displayed to the left and right of `vline`, respectively.
#'   If `vline = NULL` then `forest_header` needs to be `NULL` too.
#'   By default tries to get this from `tbl` attribute `forest_header`.
#' @param xlim (`numeric`)\cr limits for x axis.
#' @param logx (`flag`)\cr show the x-values on logarithm scale.
#' @param x_at (`numeric`)\cr x-tick locations, if `NULL` they get automatically chosen.
#' @param width_row_names (`unit`)\cr width for row names.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param width_columns (`unit`)\cr widths for the table columns.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param width_forest (`unit`)\cr width for the forest column.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param col_symbol_size (`integer`)\cr column index from `tbl` containing data to be used
#'   to determine relative size for estimator plot symbol. Typically, the symbol size is proportional
#'   to the sample size used to calculate the estimator. If `NULL`, the same symbol size is used for all subgroups.
#'   By default tries to get this from `tbl` attribute `col_symbol_size`, otherwise needs to be manually specified.
#' @param col (`character`)\cr color(s).
#'
#' @return `gTree` object containing the forest plot and table.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(forcats)
#' library(nestcolor)
#'
#' adrs <- tern_ex_adrs
#' n_records <- 20
#' adrs_labels <- formatters::var_labels(adrs, fill = TRUE)
#' adrs <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   slice(seq_len(n_records)) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs) <- c(adrs_labels, "Response")
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "STRATA2")),
#'   data = adrs
#' )
#' # Full commonly used response table.
#'
#' tbl <- basic_table() %>%
#'   tabulate_rsp_subgroups(df)
#' p <- g_forest(tbl)
#'
#' draw_grob(p)
#'
#' # Odds ratio only table.
#'
#' tbl_or <- basic_table() %>%
#'   tabulate_rsp_subgroups(df, vars = c("n_tot", "or", "ci"))
#' tbl_or
#' p <- g_forest(
#'   tbl_or,
#'   forest_header = c("Comparison\nBetter", "Treatment\nBetter")
#' )
#'
#' draw_grob(p)
#'
#' # Survival forest plot example.
#' adtte <- tern_ex_adtte
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = TRUE)
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- list(
#'   "ARM" = adtte_labels["ARM"],
#'   "SEX" = adtte_labels["SEX"],
#'   "AVALU" = adtte_labels["AVALU"],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- as.character(labels)
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' table_hr <- basic_table() %>%
#'   tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
#' g_forest(table_hr)
#' # Works with any `rtable`.
#' tbl <- rtable(
#'   header = c("E", "CI", "N"),
#'   rrow("", 1, c(.8, 1.2), 200),
#'   rrow("", 1.2, c(1.1, 1.4), 50)
#' )
#' g_forest(
#'   tbl = tbl,
#'   col_x = 1,
#'   col_ci = 2,
#'   xlim = c(0.5, 2),
#'   x_at = c(0.5, 1, 2),
#'   col_symbol_size = 3
#' )
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", rcell("A", colspan = 2)),
#'     rrow("", "c1", "c2")
#'   ),
#'   rrow("row 1", 1, c(.8, 1.2)),
#'   rrow("row 2", 1.2, c(1.1, 1.4))
#' )
#' g_forest(
#'   tbl = tbl,
#'   col_x = 1,
#'   col_ci = 2,
#'   xlim = c(0.5, 2),
#'   x_at = c(0.5, 1, 2),
#'   vline = 1,
#'   forest_header = c("Hello", "World")
#' )
#' }
#'
#' @export
g_forest <- function(tbl,
                     col_x = attr(tbl, "col_x"),
                     col_ci = attr(tbl, "col_ci"),
                     vline = 1,
                     forest_header = attr(tbl, "forest_header"),
                     xlim = c(0.1, 10),
                     logx = TRUE,
                     x_at = c(0.1, 1, 10),
                     width_row_names = NULL,
                     width_columns = NULL,
                     width_forest = grid::unit(1, "null"),
                     col_symbol_size = attr(tbl, "col_symbol_size"),
                     col = getOption("ggplot2.discrete.colour")[1],
                     draw = TRUE,
                     newpage = TRUE) {
  checkmate::assert_class(tbl, "VTableTree")

  nr <- nrow(tbl)
  nc <- ncol(tbl)
  if (is.null(col)) {
    col <- "blue"
  }

  checkmate::assert_number(col_x, lower = 0, upper = nc, null.ok = FALSE)
  checkmate::assert_number(col_ci, lower = 0, upper = nc, null.ok = FALSE)
  checkmate::assert_number(col_symbol_size, lower = 0, upper = nc, null.ok = TRUE)
  checkmate::assert_true(col_x > 0)
  checkmate::assert_true(col_ci > 0)
  checkmate::assert_character(col)
  if (!is.null(col_symbol_size)) {
    checkmate::assert_true(col_symbol_size > 0)
  }

  x_e <- vapply(seq_len(nr), function(i) {
    # If a label row is selected NULL is returned with a warning (suppressed)
    xi <- suppressWarnings(as.vector(tbl[i, col_x, drop = TRUE]))

    if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
      xi
    } else {
      NA_real_
    }
  }, numeric(1))

  x_ci <- lapply(seq_len(nr), function(i) {
    xi <- suppressWarnings(as.vector(tbl[i, col_ci, drop = TRUE])) # as above

    if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
      if (length(xi) != 2) {
        stop("ci column needs two elements")
      }
      xi
    } else {
      c(NA_real_, NA_real_)
    }
  })

  lower <- vapply(x_ci, `[`, numeric(1), 1)
  upper <- vapply(x_ci, `[`, numeric(1), 2)

  symbol_size <- if (!is.null(col_symbol_size)) {
    tmp_symbol_size <- vapply(seq_len(nr), function(i) {
      suppressWarnings(xi <- as.vector(tbl[i, col_symbol_size, drop = TRUE]))

      if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
        xi
      } else {
        NA_real_
      }
    }, numeric(1))

    # Scale symbol size.
    tmp_symbol_size <- sqrt(tmp_symbol_size)
    max_size <- max(tmp_symbol_size, na.rm = TRUE)
    # Biggest points have radius is 2 * (1/3.5) lines not to overlap.
    # See forest_dot_line.
    2 * tmp_symbol_size / max_size
  } else {
    NULL
  }

  grob_forest <- forest_grob(
    tbl,
    x_e,
    lower,
    upper,
    vline,
    forest_header,
    xlim,
    logx,
    x_at,
    width_row_names,
    width_columns,
    width_forest,
    symbol_size = symbol_size,
    col = col,
    vp = grid::plotViewport(margins = rep(1, 4))
  )

  if (draw) {
    if (newpage) grid::grid.newpage()
    grid::grid.draw(grob_forest)
  }

  invisible(grob_forest)
}

#' Forest Plot Grob
#'
#' @inheritParams g_forest
#' @param tbl ([rtables::rtable()])
#' @param x (`numeric`)\cr coordinate of point.
#' @param lower,upper (`numeric`)\cr lower/upper bound of the confidence interval.
#' @param symbol_size (`numeric`)\cr vector with relative size for plot symbol.
#' If `NULL`, the same symbol size is used.
#'
#' @details
#' The heights get automatically determined.
#'
#' @noRd
#'
#' @examples
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", "E", rcell("CI", colspan = 2), "N"),
#'     rrow("", "A", "B", "C", "D")
#'   ),
#'   rrow("row 1", 1, 0.8, 1.1, 16),
#'   rrow("row 2", 1.4, 0.8, 1.6, 25),
#'   rrow("row 3", 1.2, 0.8, 1.6, 36)
#' )
#'
#' x <- c(1, 1.4, 1.2)
#' lower <- c(0.8, 0.8, 0.8)
#' upper <- c(1.1, 1.6, 1.6)
#' # numeric vector with multiplication factor to scale each circle radius
#' # default radius is 1/3.5 lines
#' symbol_scale <- c(1, 1.25, 1.5)
#'
#' # Internal function - forest_grob
#' \dontrun{
#' p <- forest_grob(tbl, x, lower, upper,
#'   vline = 1, forest_header = c("A", "B"),
#'   x_at = c(.1, 1, 10), xlim = c(0.1, 10), logx = TRUE, symbol_size = symbol_scale,
#'   vp = grid::plotViewport(margins = c(1, 1, 1, 1))
#' )
#'
#' draw_grob(p)
#' }
forest_grob <- function(tbl,
                        x,
                        lower,
                        upper,
                        vline,
                        forest_header,
                        xlim = NULL,
                        logx = FALSE,
                        x_at = NULL,
                        width_row_names = NULL,
                        width_columns = NULL,
                        width_forest = grid::unit(1, "null"),
                        symbol_size = NULL,
                        col = "blue",
                        name = NULL,
                        gp = NULL,
                        vp = NULL) {
  nr <- nrow(tbl)
  if (is.null(vline)) {
    checkmate::assert_true(is.null(forest_header))
  } else {
    checkmate::assert_number(vline)
    checkmate::assert_character(forest_header, len = 2, null.ok = TRUE)
  }

  checkmate::assert_numeric(x, len = nr)
  checkmate::assert_numeric(lower, len = nr)
  checkmate::assert_numeric(upper, len = nr)
  checkmate::assert_numeric(symbol_size, len = nr, null.ok = TRUE)
  checkmate::assert_character(col)

  if (is.null(symbol_size)) {
    symbol_size <- rep(1, nr)
  }

  if (is.null(xlim)) {
    r <- range(c(x, lower, upper), na.rm = TRUE)
    xlim <- r + c(-0.05, 0.05) * diff(r)
  }

  if (logx) {
    if (is.null(x_at)) {
      x_at <- pretty(log(stats::na.omit(c(x, lower, upper))))
      x_labels <- exp(x_at)
    } else {
      x_labels <- x_at
      x_at <- log(x_at)
    }
    xlim <- log(xlim)
    x <- log(x)
    lower <- log(lower)
    upper <- log(upper)
    if (!is.null(vline)) {
      vline <- log(vline)
    }
  } else {
    x_labels <- TRUE
  }

  data_forest_vp <- grid::dataViewport(xlim, c(0, 1))

  # Get table content as matrix form.
  mf <- matrix_form(tbl)

  # Use `rtables` indent_string eventually.
  mf$strings[, 1] <- paste0(
    strrep("    ", c(rep(0, attr(mf, "nrow_header")), mf$row_info$indent)),
    mf$strings[, 1]
  )

  n_header <- attr(mf, "nrow_header")

  if (any(mf$display[, 1] == FALSE)) stop("row names need to be always displayed")

  # Pre-process the data to be used in lapply and cell_in_rows.
  to_args_for_cell_in_rows_fun <- function(part = c("body", "header"),
                                           underline_colspan = FALSE) {
    part <- match.arg(part)
    if (part == "body") {
      mat_row_indices <- seq_len(nrow(tbl)) + n_header
      row_ind_offset <- -n_header
    } else {
      mat_row_indices <- seq_len(n_header)
      row_ind_offset <- 0
    }

    lapply(mat_row_indices, function(i) {
      disp <- mf$display[i, -1]
      list(
        row_name = mf$strings[i, 1],
        cells = mf$strings[i, -1][disp],
        cell_spans = mf$spans[i, -1][disp],
        row_index = i + row_ind_offset,
        underline_colspan = underline_colspan
      )
    })
  }

  args_header <- to_args_for_cell_in_rows_fun("header", underline_colspan = TRUE)
  args_body <- to_args_for_cell_in_rows_fun("body", underline_colspan = FALSE)

  grid::gTree(
    name = name,
    children = grid::gList(
      grid::gTree(
        children = do.call(grid::gList, lapply(args_header, do.call, what = cell_in_rows)),
        vp = grid::vpPath("vp_table_layout", "vp_header")
      ),
      grid::gTree(
        children = do.call(grid::gList, lapply(args_body, do.call, what = cell_in_rows)),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      ),
      grid::linesGrob(
        grid::unit(c(0, 1), "npc"),
        y = grid::unit(c(.5, .5), "npc"),
        vp = grid::vpPath("vp_table_layout", "vp_spacer")
      ),
      # forest part
      if (is.null(vline)) {
        NULL
      } else {
        grid::gTree(
          children = grid::gList(
            grid::gTree(
              children = grid::gList(
                # this may overflow, to fix, look here
                # https://stackoverflow.com/questions/33623169/add-multi-line-footnote-to-tablegrob-while-using-gridextra-in-r #nolintr
                grid::textGrob(
                  forest_header[1],
                  x = grid::unit(vline, "native") - grid::unit(1, "lines"),
                  just = c("right", "center")
                ),
                grid::textGrob(
                  forest_header[2],
                  x = grid::unit(vline, "native") + grid::unit(1, "lines"),
                  just = c("left", "center")
                )
              ),
              vp = grid::vpStack(grid::viewport(layout.pos.col = ncol(tbl) + 2), data_forest_vp)
            )
          ),
          vp = grid::vpPath("vp_table_layout", "vp_header")
        )
      },
      grid::gTree(
        children = grid::gList(
          grid::gTree(
            children = grid::gList(
              grid::rectGrob(gp = grid::gpar(col = "gray90", fill = "gray90")),
              if (is.null(vline)) {
                NULL
              } else {
                grid::linesGrob(
                  x = grid::unit(rep(vline, 2), "native"),
                  y = grid::unit(c(0, 1), "npc"),
                  gp = grid::gpar(lwd = 2),
                  vp = data_forest_vp
                )
              },
              grid::xaxisGrob(at = x_at, label = x_labels, vp = data_forest_vp)
            ),
            vp = grid::viewport(layout.pos.col = ncol(tbl) + 2)
          )
        ),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      ),
      grid::gTree(
        children = do.call(
          grid::gList,
          Map(
            function(xi, li, ui, row_index, size_i, col) {
              forest_dot_line(
                xi,
                li,
                ui,
                row_index,
                xlim,
                symbol_size = size_i,
                col = col,
                datavp = data_forest_vp
              )
            },
            x,
            lower,
            upper,
            seq_along(x),
            symbol_size,
            col,
            USE.NAMES = FALSE
          )
        ),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      )
    ),
    childrenvp = forest_viewport(tbl, width_row_names, width_columns, width_forest),
    vp = vp,
    gp = gp
  )
}


cell_in_rows <- function(row_name,
                         cells,
                         cell_spans,
                         row_index,
                         underline_colspan = FALSE) {
  checkmate::assert_string(row_name)
  checkmate::assert_character(cells, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(cell_spans, len = length(cells), any.missing = FALSE)
  checkmate::assert_number(row_index)
  checkmate::assert_flag(underline_colspan)

  vp_name_rn <- paste0("rowname-", row_index)
  g_rowname <- if (!is.null(row_name) && row_name != "") {
    grid::textGrob(
      name = vp_name_rn,
      label = row_name,
      x = grid::unit(0, "npc"),
      just = c("left", "center"),
      vp = grid::vpPath(paste0("rowname-", row_index))
    )
  } else {
    NULL
  }

  gl_cols <- if (!(length(cells) > 0)) {
    list(NULL)
  } else {
    j <- 1 # column index of cell

    lapply(seq_along(cells), function(k) {
      cell_ascii <- cells[[k]]
      cs <- cell_spans[[k]]

      if (is.na(cell_ascii) || is.null(cell_ascii)) {
        cell_ascii <- "NA"
      }

      cell_name <- paste0("g-cell-", row_index, "-", j)

      cell_grobs <- if (identical(cell_ascii, "")) {
        NULL
      } else {
        if (cs == 1) {
          grid::textGrob(
            label = cell_ascii,
            name = cell_name,
            vp = grid::vpPath(paste0("cell-", row_index, "-", j))
          )
        } else {
          # +1 because of rowname
          vp_joined_cols <- grid::viewport(layout.pos.row = row_index, layout.pos.col = seq(j + 1, j + cs))

          lab <- grid::textGrob(
            label = cell_ascii,
            name = cell_name,
            vp = vp_joined_cols
          )

          if (!underline_colspan || grepl("^[[:space:]]*$", cell_ascii)) {
            lab
          } else {
            grid::gList(
              lab,
              grid::linesGrob(
                x = grid::unit.c(grid::unit(.2, "lines"), grid::unit(1, "npc") - grid::unit(.2, "lines")),
                y = grid::unit(c(0, 0), "npc"),
                vp = vp_joined_cols
              )
            )
          }
        }
      }
      j <<- j + cs

      cell_grobs
    })
  }

  grid::gList(
    g_rowname,
    do.call(grid::gList, gl_cols)
  )
}

#' Graphic Object: Forest Dot Line
#'
#' Calculate the `grob` corresponding to the dot line within the forest plot.
#'
#' @noRd
forest_dot_line <- function(x,
                            lower,
                            upper,
                            row_index,
                            xlim,
                            symbol_size = 1,
                            col = "blue",
                            datavp) {
  ci <- c(lower, upper)
  if (any(!is.na(c(x, ci)))) {
    # line
    y <- grid::unit(c(0.5, 0.5), "npc")

    g_line <- if (all(!is.na(ci)) && ci[2] > xlim[1] && ci[1] < xlim[2]) {
      # -
      if (ci[1] >= xlim[1] && ci[2] <= xlim[2]) {
        grid::linesGrob(x = grid::unit(c(ci[1], ci[2]), "native"), y = y)
      } else if (ci[1] < xlim[1] && ci[2] > xlim[2]) {
        # <->
        grid::linesGrob(
          x = grid::unit(xlim, "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "both")
        )
      } else if (ci[1] < xlim[1] && ci[2] <= xlim[2]) {
        # <-
        grid::linesGrob(
          x = grid::unit(c(xlim[1], ci[2]), "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "first")
        )
      } else if (ci[1] >= xlim[1] && ci[2] > xlim[2]) {
        # ->
        grid::linesGrob(
          x = grid::unit(c(ci[1], xlim[2]), "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "last")
        )
      }
    } else {
      NULL
    }

    g_circle <- if (!is.na(x) && x >= xlim[1] && x <= xlim[2]) {
      grid::circleGrob(
        x = grid::unit(x, "native"),
        y = y,
        r = grid::unit(1 / 3.5 * symbol_size, "lines"),
        name = "point"
      )
    } else {
      NULL
    }

    grid::gTree(
      children = grid::gList(
        grid::gTree(
          children = grid::gList(
            grid::gList(
              g_line,
              g_circle
            )
          ),
          vp = datavp,
          gp = grid::gpar(col = col, fill = col)
        )
      ),
      vp = grid::vpPath(paste0("forest-", row_index))
    )
  } else {
    NULL
  }
}

#' Create a Viewport Tree for the Forest Plot
#'
#' @return A viewport tree.
#'
#' @examples
#' library(grid)
#'
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", "E", rcell("CI", colspan = 2)),
#'     rrow("", "A", "B", "C")
#'   ),
#'   rrow("row 1", 1, 0.8, 1.1),
#'   rrow("row 2", 1.4, 0.8, 1.6),
#'   rrow("row 3", 1.2, 0.8, 1.2)
#' )
#'
#' # Internal function - forest_viewport
#' \dontrun{
#' v <- forest_viewport(tbl)
#'
#' grid::grid.newpage()
#' showViewport(v)
#' }
#'
#' @keywords internal
forest_viewport <- function(tbl,
                            width_row_names = NULL,
                            width_columns = NULL,
                            width_forest = grid::unit(1, "null"),
                            gap_column = grid::unit(1, "lines"),
                            gap_header = grid::unit(1, "lines"),
                            mat_form = NULL) {
  checkmate::assert_class(tbl, "VTableTree")
  checkmate::assert_true(grid::is.unit(width_forest))
  if (!is.null(width_row_names)) {
    checkmate::assert_true(grid::is.unit(width_row_names))
  }
  if (!is.null(width_columns)) {
    checkmate::assert_true(grid::is.unit(width_columns))
  }

  if (is.null(mat_form)) mat_form <- matrix_form(tbl)

  mat_form$strings[!mat_form$display] <- ""

  nr <- nrow(tbl)
  nc <- ncol(tbl)
  nr_h <- attr(mat_form, "nrow_header")

  if (is.null(width_row_names) || is.null(width_columns)) {
    tbl_widths <- formatters::propose_column_widths(mat_form)
    strs_with_width <- strrep("x", tbl_widths) # that works for mono spaced fonts
    if (is.null(width_row_names)) width_row_names <- grid::stringWidth(strs_with_width[1])
    if (is.null(width_columns)) width_columns <- grid::stringWidth(strs_with_width[-1])
  }

  # Widths for row name, cols, forest.
  widths <- grid::unit.c(
    width_row_names + gap_column,
    width_columns + gap_column,
    width_forest
  )

  n_lines_per_row <- apply(
    X = mat_form$strings,
    MARGIN = 1,
    FUN = function(row) {
      tmp <- vapply(
        gregexpr("\n", row, fixed = TRUE),
        attr, numeric(1),
        "match.length"
      ) + 1
      max(c(tmp, 1))
    }
  )

  i_header <- seq_len(nr_h)

  height_body_rows <- grid::unit(n_lines_per_row[-i_header] * 1.2, "lines")
  height_header_rows <- grid::unit(n_lines_per_row[i_header] * 1.2, "lines")

  height_body <- grid::unit(sum(n_lines_per_row[-i_header]) * 1.2, "lines")
  height_header <- grid::unit(sum(n_lines_per_row[i_header]) * 1.2, "lines")

  nc_g <- nc + 2 # number of columns incl. row names and forest

  vp_tbl <- grid::vpTree(
    parent = grid::viewport(
      name = "vp_table_layout",
      layout = grid::grid.layout(
        nrow = 3, ncol = 1,
        heights = grid::unit.c(height_header, gap_header, height_body)
      )
    ),
    children = grid::vpList(
      vp_forest_table_part(nr_h, nc_g, 1, 1, widths, height_header_rows, "vp_header"),
      vp_forest_table_part(nr, nc_g, 3, 1, widths, height_body_rows, "vp_body"),
      grid::viewport(name = "vp_spacer", layout.pos.row = 2, layout.pos.col = 1)
    )
  )
  vp_tbl
}

#' Viewport Forest Plot: Table Part
#'
#' Prepares a viewport for the table included in the forest plot.
#'
#' @noRd
vp_forest_table_part <- function(nrow,
                                 ncol,
                                 l_row,
                                 l_col,
                                 widths,
                                 heights,
                                 name) {
  grid::vpTree(
    grid::viewport(
      name = name,
      layout.pos.row = l_row,
      layout.pos.col = l_col,
      layout = grid::grid.layout(nrow = nrow, ncol = ncol, widths = widths, heights = heights)
    ),
    children = grid::vpList(
      do.call(
        grid::vpList,
        lapply(
          seq_len(nrow), function(i) {
            grid::viewport(layout.pos.row = i, layout.pos.col = 1, name = paste0("rowname-", i))
          }
        )
      ),
      do.call(
        grid::vpList,
        apply(
          expand.grid(seq_len(nrow), seq_len(ncol - 2)),
          1,
          function(x) {
            i <- x[1]
            j <- x[2]
            grid::viewport(layout.pos.row = i, layout.pos.col = j + 1, name = paste0("cell-", i, "-", j))
          }
        )
      ),
      do.call(
        grid::vpList,
        lapply(
          seq_len(nrow),
          function(i) {
            grid::viewport(layout.pos.row = i, layout.pos.col = ncol, name = paste0("forest-", i))
          }
        )
      )
    )
  )
}

#' Forest Rendering
#'
#' Renders the forest grob.
#'
#' @noRd
grid.forest <- function(...) { # nolint
  grid::grid.draw(forest_grob(...))
}

#' Helper Functions for Multivariate Logistic Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions used in calculations for logistic regression.
#'
#' @inheritParams argument_convention
#' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family.
#'   Limited functionality is also available for conditional logistic regression models fitted by
#'   [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()].
#' @param x (`string` or `character`)\cr a variable or interaction term in `fit_glm` (depending on the
#'   helper function).
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' @name h_logistic_regression
NULL

#' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted
#'   model assuming only one interaction term.
#'
#' @return Vector of names of interaction variables.
#'
#' @export
h_get_interaction_vars <- function(fit_glm) {
  checkmate::assert_class(fit_glm, "glm")
  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  terms_order <- attr(stats::terms(fit_glm), "order")
  interaction_term <- terms_name[terms_order == 2]
  checkmate::assert_string(interaction_term)
  strsplit(interaction_term, split = ":")[[1]]
}

#' @describeIn h_logistic_regression Helper function to get the right coefficient name from the
#'   interaction variable names and the given levels. The main value here is that the order
#'   of first and second variable is checked in the `interaction_vars` input.
#'
#' @param interaction_vars (`character` of length 2)\cr interaction variable names.
#' @param first_var_with_level (`character` of length 2)\cr the first variable name with
#'   the interaction level.
#' @param second_var_with_level (`character` of length 2)\cr the second variable name with
#'   the interaction level.
#'
#' @return Name of coefficient.
#'
#' @export
h_interaction_coef_name <- function(interaction_vars,
                                    first_var_with_level,
                                    second_var_with_level) {
  checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE)
  checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE)
  checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE)
  checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars)

  first_name <- paste(first_var_with_level, collapse = "")
  second_name <- paste(second_var_with_level, collapse = "")
  if (first_var_with_level[1] == interaction_vars[1]) {
    paste(first_name, second_name, sep = ":")
  } else if (second_var_with_level[1] == interaction_vars[1]) {
    paste(second_name, first_name, sep = ":")
  }
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   for the case when both the odds ratio and the interaction variable are categorical.
#'
#' @param odds_ratio_var (`string`)\cr the odds ratio variable.
#' @param interaction_var (`string`)\cr the interaction variable.
#'
#' @return Odds ratio.
#'
#' @export
h_or_cat_interaction <- function(odds_ratio_var,
                                 interaction_var,
                                 fit_glm,
                                 conf_level = 0.95) {
  interaction_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_string(odds_ratio_var)
  checkmate::assert_string(interaction_var)
  checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars)
  checkmate::assert_vector(interaction_vars, len = 2)

  xs_level <- fit_glm$xlevels
  xs_coef <- stats::coef(fit_glm)
  xs_vcov <- stats::vcov(fit_glm)
  y <- list()
  for (var_level in xs_level[[odds_ratio_var]][-1]) {
    x <- list()
    for (ref_level in xs_level[[interaction_var]]) {
      coef_names <- paste0(odds_ratio_var, var_level)
      if (ref_level != xs_level[[interaction_var]][1]) {
        interaction_coef_name <- h_interaction_coef_name(
          interaction_vars,
          c(odds_ratio_var, var_level),
          c(interaction_var, ref_level)
        )
        coef_names <- c(
          coef_names,
          interaction_coef_name
        )
      }
      if (length(coef_names) > 1) {
        ones <- t(c(1, 1))
        est <- as.numeric(ones %*% xs_coef[coef_names])
        se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones)))
      } else {
        est <- xs_coef[coef_names]
        se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
      }
      or <- exp(est)
      ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
      x[[ref_level]] <- list(or = or, ci = ci)
    }
    y[[var_level]] <- x
  }
  y
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   for the case when either the odds ratio or the interaction variable is continuous.
#'
#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise
#'   the median is used.
#'
#' @return Odds ratio.
#'
#' @note We don't provide a function for the case when both variables are continuous because
#'   this does not arise in this table, as the treatment arm variable will always be involved
#'   and categorical.
#'
#' @export
h_or_cont_interaction <- function(odds_ratio_var,
                                  interaction_var,
                                  fit_glm,
                                  at = NULL,
                                  conf_level = 0.95) {
  interaction_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_string(odds_ratio_var)
  checkmate::assert_string(interaction_var)
  checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars)
  checkmate::assert_vector(interaction_vars, len = 2)
  checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE)
  xs_level <- fit_glm$xlevels
  xs_coef <- stats::coef(fit_glm)
  xs_vcov <- stats::vcov(fit_glm)
  xs_class <- attr(fit_glm$terms, "dataClasses")
  model_data <- fit_glm$model
  if (!is.null(at)) {
    checkmate::assert_set_equal(xs_class[interaction_var], "numeric")
  }
  y <- list()
  if (xs_class[interaction_var] == "numeric") {
    if (is.null(at)) {
      at <- ceiling(stats::median(model_data[[interaction_var]]))
    }

    for (var_level in xs_level[[odds_ratio_var]][-1]) {
      x <- list()
      for (increment in at) {
        coef_names <- paste0(odds_ratio_var, var_level)
        if (increment != 0) {
          interaction_coef_name <- h_interaction_coef_name(
            interaction_vars,
            c(odds_ratio_var, var_level),
            c(interaction_var, "")
          )
          coef_names <- c(
            coef_names,
            interaction_coef_name
          )
        }
        if (length(coef_names) > 1) {
          xvec <- t(c(1, increment))
          est <- as.numeric(xvec %*% xs_coef[coef_names])
          se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec)))
        } else {
          est <- xs_coef[coef_names]
          se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
        }
        or <- exp(est)
        ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
        x[[as.character(increment)]] <- list(or = or, ci = ci)
      }
      y[[var_level]] <- x
    }
  } else {
    checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric")
    checkmate::assert_set_equal(xs_class[interaction_var], "factor")
    for (var_level in xs_level[[interaction_var]]) {
      coef_names <- odds_ratio_var
      if (var_level != xs_level[[interaction_var]][1]) {
        interaction_coef_name <- h_interaction_coef_name(
          interaction_vars,
          c(odds_ratio_var, ""),
          c(interaction_var, var_level)
        )
        coef_names <- c(
          coef_names,
          interaction_coef_name
        )
      }
      if (length(coef_names) > 1) {
        xvec <- t(c(1, 1))
        est <- as.numeric(xvec %*% xs_coef[coef_names])
        se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec)))
      } else {
        est <- xs_coef[coef_names]
        se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
      }
      or <- exp(est)
      ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
      y[[var_level]] <- list(or = or, ci = ci)
    }
  }
  y
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and
#'   [h_or_cat_interaction()].
#'
#' @return Odds ratio.
#'
#' @export
h_or_interaction <- function(odds_ratio_var,
                             interaction_var,
                             fit_glm,
                             at = NULL,
                             conf_level = 0.95) {
  xs_class <- attr(fit_glm$terms, "dataClasses")
  if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) {
    h_or_cont_interaction(
      odds_ratio_var,
      interaction_var,
      fit_glm,
      at = at,
      conf_level = conf_level
    )
  } else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) {
    h_or_cat_interaction(
      odds_ratio_var,
      interaction_var,
      fit_glm,
      conf_level = conf_level
    )
  } else {
    stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor")
  }
}

#' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table
#'   of numbers of patients.
#'
#' @param terms (`character`)\cr simple terms.
#' @param table (`table`)\cr table containing numbers for terms.
#'
#' @return Term labels containing numbers of patients.
#'
#' @export
h_simple_term_labels <- function(terms,
                                 table) {
  checkmate::assert_true(is.table(table))
  checkmate::assert_multi_class(terms, classes = c("factor", "character"))
  terms <- as.character(terms)
  term_n <- table[terms]
  paste0(terms, ", n = ", term_n)
}

#' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table
#'   of numbers of patients.
#'
#' @param terms1 (`character`)\cr terms for first dimension (rows).
#' @param terms2 (`character`)\cr terms for second dimension (rows).
#' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the
#'   number of patients. In that case they can only be scalar (strings).
#'
#' @return Term labels containing numbers of patients.
#'
#' @export
h_interaction_term_labels <- function(terms1,
                                      terms2,
                                      table,
                                      any = FALSE) {
  checkmate::assert_true(is.table(table))
  checkmate::assert_flag(any)
  checkmate::assert_multi_class(terms1, classes = c("factor", "character"))
  checkmate::assert_multi_class(terms2, classes = c("factor", "character"))
  terms1 <- as.character(terms1)
  terms2 <- as.character(terms2)
  if (any) {
    checkmate::assert_scalar(terms1)
    checkmate::assert_scalar(terms2)
    paste0(
      terms1, " or ", terms2, ", n = ",
      # Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract.
      sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2]
    )
  } else {
    term_n <- table[cbind(terms1, terms2)]
    paste0(terms1, " * ", terms2, ", n = ", term_n)
  }
}

#' @describeIn h_logistic_regression Helper function to tabulate the main effect
#'   results of a (conditional) logistic regression model.
#'
#' @return Tabulated main effect results from a logistic regression model.
#'
#' @examples
#' h_glm_simple_term_extract("AGE", mod1)
#' h_glm_simple_term_extract("ARMCD", mod1)
#'
#' @export
h_glm_simple_term_extract <- function(x, fit_glm) {
  checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
  checkmate::assert_string(x)

  xs_class <- attr(fit_glm$terms, "dataClasses")
  xs_level <- fit_glm$xlevels
  xs_coef <- summary(fit_glm)$coefficients
  stats <- if (inherits(fit_glm, "glm")) {
    c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
  } else {
    c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)")
  }
  # Make sure x is not an interaction term.
  checkmate::assert_subset(x, names(xs_class))
  x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1])
  x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE)
  colnames(x_stats) <- names(stats)
  x_stats$estimate <- as.list(x_stats$estimate)
  x_stats$std_error <- as.list(x_stats$std_error)
  x_stats$pvalue <- as.list(x_stats$pvalue)
  x_stats$df <- as.list(1)
  if (xs_class[x] == "numeric") {
    x_stats$term <- x
    x_stats$term_label <- if (inherits(fit_glm, "glm")) {
      formatters::var_labels(fit_glm$data[x], fill = TRUE)
    } else {
      # We just fill in here with the `term` itself as we don't have the data available.
      x
    }
    x_stats$is_variable_summary <- FALSE
    x_stats$is_term_summary <- TRUE
  } else {
    checkmate::assert_class(fit_glm, "glm")
    # The reason is that we don't have the original data set in the `clogit` object
    # and therefore cannot determine the `x_numbers` here.
    x_numbers <- table(fit_glm$data[[x]])
    x_stats$term <- xs_level[[x]][-1]
    x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers)
    x_stats$is_variable_summary <- FALSE
    x_stats$is_term_summary <- TRUE
    main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald")
    x_main <- data.frame(
      pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE],
      term = xs_level[[x]][1],
      term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)),
      df = main_effects[x, "Df", drop = TRUE],
      stringsAsFactors = FALSE
    )
    x_main$pvalue <- as.list(x_main$pvalue)
    x_main$df <- as.list(x_main$df)
    x_main$estimate <- list(numeric(0))
    x_main$std_error <- list(numeric(0))
    if (length(xs_level[[x]][-1]) == 1) {
      x_main$pvalue <- list(numeric(0))
      x_main$df <- list(numeric(0))
    }
    x_main$is_variable_summary <- TRUE
    x_main$is_term_summary <- FALSE
    x_stats <- rbind(x_main, x_stats)
  }
  x_stats$variable <- x
  x_stats$variable_label <- if (inherits(fit_glm, "glm")) {
    formatters::var_labels(fit_glm$data[x], fill = TRUE)
  } else {
    x
  }
  x_stats$interaction <- ""
  x_stats$interaction_label <- ""
  x_stats$reference <- ""
  x_stats$reference_label <- ""
  rownames(x_stats) <- NULL
  x_stats[c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "is_variable_summary",
    "is_term_summary"
  )]
}

#' @describeIn h_logistic_regression Helper function to tabulate the interaction term
#'   results of a logistic regression model.
#'
#' @return Tabulated interaction term results from a logistic regression model.
#'
#' @examples
#' h_glm_interaction_extract("ARMCD:AGE", mod2)
#'
#' @export
h_glm_interaction_extract <- function(x, fit_glm) {
  vars <- h_get_interaction_vars(fit_glm)
  xs_class <- attr(fit_glm$terms, "dataClasses")

  checkmate::assert_string(x)

  # Only take two-way interaction
  checkmate::assert_vector(vars, len = 2)

  # Only consider simple case: first variable in interaction is arm, a categorical variable
  checkmate::assert_disjunct(xs_class[vars[1]], "numeric")

  xs_level <- fit_glm$xlevels
  xs_coef <- summary(fit_glm)$coefficients
  main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald")
  stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
  v1_comp <- xs_level[[vars[1]]][-1]
  if (xs_class[vars[2]] == "numeric") {
    x_stats <- as.data.frame(
      xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE],
      stringsAsFactors = FALSE
    )
    colnames(x_stats) <- names(stats)
    x_stats$term <- v1_comp
    x_numbers <- table(fit_glm$data[[vars[1]]])
    x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers)
    v1_ref <- xs_level[[vars[1]]][1]
    term_main <- v1_ref
    ref_label <- h_simple_term_labels(v1_ref, x_numbers)
  } else if (xs_class[vars[2]] != "numeric") {
    v2_comp <- xs_level[[vars[2]]][-1]
    v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp)
    x_sel <- paste(
      paste0(vars[1], v1_v2_grid$v1),
      paste0(vars[2], v1_v2_grid$v2),
      sep = ":"
    )
    x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE)
    colnames(x_stats) <- names(stats)
    x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2)
    x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]])
    x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers)
    v1_ref <- xs_level[[vars[1]]][1]
    v2_ref <- xs_level[[vars[2]]][1]
    term_main <- paste(vars[1], vars[2], sep = " * ")
    ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE)
  }
  x_stats$df <- as.list(1)
  x_stats$pvalue <- as.list(x_stats$pvalue)
  x_stats$is_variable_summary <- FALSE
  x_stats$is_term_summary <- TRUE
  x_main <- data.frame(
    pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE],
    term = term_main,
    term_label = paste("Reference", ref_label),
    df = main_effects[x, "Df", drop = TRUE],
    stringsAsFactors = FALSE
  )
  x_main$pvalue <- as.list(x_main$pvalue)
  x_main$df <- as.list(x_main$df)
  x_main$estimate <- list(numeric(0))
  x_main$std_error <- list(numeric(0))
  x_main$is_variable_summary <- TRUE
  x_main$is_term_summary <- FALSE

  x_stats <- rbind(x_main, x_stats)
  x_stats$variable <- x
  x_stats$variable_label <- paste(
    "Interaction of",
    formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE),
    "*",
    formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE)
  )
  x_stats$interaction <- ""
  x_stats$interaction_label <- ""
  x_stats$reference <- ""
  x_stats$reference_label <- ""
  rownames(x_stats) <- NULL
  x_stats[c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "is_variable_summary",
    "is_term_summary"
  )]
}

#' @describeIn h_logistic_regression Helper function to tabulate the interaction
#'   results of a logistic regression model. This basically is a wrapper for
#'   [h_or_interaction()] and [h_glm_simple_term_extract()] which puts the results
#'   in the right data frame format.
#'
#' @return A `data.frame` of tabulated interaction term results from a logistic regression model.
#'
#' @examples
#' h_glm_inter_term_extract("AGE", "ARMCD", mod2)
#'
#' @export
h_glm_inter_term_extract <- function(odds_ratio_var,
                                     interaction_var,
                                     fit_glm,
                                     ...) {
  # First obtain the main effects.
  main_stats <- h_glm_simple_term_extract(odds_ratio_var, fit_glm)
  main_stats$is_reference_summary <- FALSE
  main_stats$odds_ratio <- NA
  main_stats$lcl <- NA
  main_stats$ucl <- NA

  # Then we get the odds ratio estimates and put into df form.
  or_numbers <- h_or_interaction(odds_ratio_var, interaction_var, fit_glm, ...)
  is_num_or_var <- attr(fit_glm$terms, "dataClasses")[odds_ratio_var] == "numeric"

  if (is_num_or_var) {
    # Numeric OR variable case.
    references <- names(or_numbers)
    n_ref <- length(references)

    extract_from_list <- function(l, name, pos = 1) {
      unname(unlist(
        lapply(or_numbers, function(x) {
          x[[name]][pos]
        })
      ))
    }
    or_stats <- data.frame(
      variable = odds_ratio_var,
      variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      term = odds_ratio_var,
      term_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      interaction = interaction_var,
      interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)),
      reference = references,
      reference_label = references,
      estimate = NA,
      std_error = NA,
      odds_ratio = extract_from_list(or_numbers, "or"),
      lcl = extract_from_list(or_numbers, "ci", pos = "lcl"),
      ucl = extract_from_list(or_numbers, "ci", pos = "ucl"),
      df = NA,
      pvalue = NA,
      is_variable_summary = FALSE,
      is_term_summary = FALSE,
      is_reference_summary = TRUE
    )
  } else {
    # Categorical OR variable case.
    references <- names(or_numbers[[1]])
    n_ref <- length(references)

    extract_from_list <- function(l, name, pos = 1) {
      unname(unlist(
        lapply(or_numbers, function(x) {
          lapply(x, function(y) y[[name]][pos])
        })
      ))
    }
    or_stats <- data.frame(
      variable = odds_ratio_var,
      variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      term = rep(names(or_numbers), each = n_ref),
      term_label = h_simple_term_labels(rep(names(or_numbers), each = n_ref), table(fit_glm$data[[odds_ratio_var]])),
      interaction = interaction_var,
      interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)),
      reference = unlist(lapply(or_numbers, names)),
      reference_label = unlist(lapply(or_numbers, names)),
      estimate = NA,
      std_error = NA,
      odds_ratio = extract_from_list(or_numbers, "or"),
      lcl = extract_from_list(or_numbers, "ci", pos = "lcl"),
      ucl = extract_from_list(or_numbers, "ci", pos = "ucl"),
      df = NA,
      pvalue = NA,
      is_variable_summary = FALSE,
      is_term_summary = FALSE,
      is_reference_summary = TRUE
    )
  }

  df <- rbind(
    main_stats[, names(or_stats)],
    or_stats
  )
  df[order(-df$is_variable_summary, df$term, -df$is_term_summary, df$reference), ]
}

#' @describeIn h_logistic_regression Helper function to tabulate the results including
#'   odds ratios and confidence intervals of simple terms.
#'
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
#'
#' @examples
#' h_logistic_simple_terms("AGE", mod1)
#'
#' @export
h_logistic_simple_terms <- function(x, fit_glm, conf_level = 0.95) {
  checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
  if (inherits(fit_glm, "glm")) {
    checkmate::assert_set_equal(fit_glm$family$family, "binomial")
  }
  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  xs_class <- attr(fit_glm$terms, "dataClasses")
  interaction <- terms_name[which(!terms_name %in% names(xs_class))]
  checkmate::assert_subset(x, terms_name)
  if (length(interaction) != 0) {
    # Make sure any item in x is not part of interaction term
    checkmate::assert_disjunct(x, unlist(strsplit(interaction, ":")))
  }
  x_stats <- lapply(x, h_glm_simple_term_extract, fit_glm)
  x_stats <- do.call(rbind, x_stats)
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  x_stats$odds_ratio <- lapply(x_stats$estimate, exp)
  x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ci <- Map(function(lcl, ucl) c(lcl, ucl), lcl = x_stats$lcl, ucl = x_stats$ucl)
  x_stats
}

#' @describeIn h_logistic_regression Helper function to tabulate the results including
#'   odds ratios and confidence intervals of interaction terms.
#'
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
#'
#' @examples
#' h_logistic_inter_terms(c("RACE", "AGE", "ARMCD", "AGE:ARMCD"), mod2)
#'
#' @export
h_logistic_inter_terms <- function(x,
                                   fit_glm,
                                   conf_level = 0.95,
                                   at = NULL) {
  # Find out the interaction variables and interaction term.
  inter_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_vector(inter_vars, len = 2)


  inter_term_index <- intersect(grep(inter_vars[1], x), grep(inter_vars[2], x))
  inter_term <- x[inter_term_index]

  # For the non-interaction vars we need the standard stuff.
  normal_terms <- setdiff(x, union(inter_vars, inter_term))

  x_stats <- lapply(normal_terms, h_glm_simple_term_extract, fit_glm)
  x_stats <- do.call(rbind, x_stats)
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  x_stats$odds_ratio <- lapply(x_stats$estimate, exp)
  x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  normal_stats <- x_stats
  normal_stats$is_reference_summary <- FALSE

  # Now the interaction term itself.
  inter_term_stats <- h_glm_interaction_extract(inter_term, fit_glm)
  inter_term_stats$odds_ratio <- NA
  inter_term_stats$lcl <- NA
  inter_term_stats$ucl <- NA
  inter_term_stats$is_reference_summary <- FALSE

  is_intervar1_numeric <- attr(fit_glm$terms, "dataClasses")[inter_vars[1]] == "numeric"

  # Interaction stuff.
  inter_stats_one <- h_glm_inter_term_extract(
    inter_vars[1],
    inter_vars[2],
    fit_glm,
    conf_level = conf_level,
    at = `if`(is_intervar1_numeric, NULL, at)
  )
  inter_stats_two <- h_glm_inter_term_extract(
    inter_vars[2],
    inter_vars[1],
    fit_glm,
    conf_level = conf_level,
    at = `if`(is_intervar1_numeric, at, NULL)
  )

  # Now just combine everything in one data frame.
  col_names <- c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "odds_ratio",
    "lcl",
    "ucl",
    "is_variable_summary",
    "is_term_summary",
    "is_reference_summary"
  )
  df <- rbind(
    inter_stats_one[, col_names],
    inter_stats_two[, col_names],
    inter_term_stats[, col_names]
  )
  if (length(normal_terms) > 0) {
    df <- rbind(
      normal_stats[, col_names],
      df
    )
  }
  df$ci <- combine_vectors(df$lcl, df$ucl)
  df
}

#' Kaplan-Meier Plot
#'
#' @description `r lifecycle::badge("stable")`
#'
#' From a survival model, a graphic is rendered along with tabulated annotation
#' including the number of patient at risk at given time and the median survival
#' per group.
#'
#' @inheritParams grid::gTree
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param variables (named `list`)\cr variable names. Details are:
#'   * `tte` (`numeric`)\cr variable indicating time-to-event duration values.
#'   * `is_event` (`logical`)\cr event variable. `TRUE` if event, `FALSE` if time to event is censored.
#'   * `arm` (`factor`)\cr the treatment group variable.
#'   * `strat` (`character` or `NULL`)\cr variable names indicating stratification factors.
#' @param control_surv (`list`)\cr parameters for comparison details, specified by using
#'   the helper function [control_surv_timepoint()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
#'   * `conf_type` (`string`)\cr "plain" (default), "log", "log-log" for confidence interval type,
#'     see more in [survival::survfit()]. Note that the option "none" is no longer supported.
#' @param xticks (`numeric`, `number`, or `NULL`)\cr numeric vector of ticks or single number with spacing
#'   between ticks on the x axis. If `NULL` (default), [labeling::extended()] is used to determine
#'   an optimal tick position on the x axis.
#' @param yval (`string`)\cr value of y-axis. Options are `Survival` (default) and `Failure` probability.
#' @param censor_show (`flag`)\cr whether to show censored.
#' @param xlab (`string`)\cr label of x-axis.
#' @param ylab (`string`)\cr label of y-axis.
#' @param title (`string`)\cr title for plot.
#' @param footnotes (`string`)\cr footnotes for plot.
#' @param col (`character`)\cr lines colors. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param lty (`numeric`)\cr line type. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param lwd (`numeric`)\cr line width. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param pch (`numeric`, `string`)\cr value or character of points symbol to indicate censored cases.
#' @param size (`numeric`)\cr size of censored point, a class of `unit`.
#' @param max_time (`numeric`)\cr maximum value to show on X axis. Only data values less than or up to
#'   this threshold value will be plotted (defaults to `NULL`).
#' @param font_size (`number`)\cr font size to be used.
#' @param ci_ribbon (`flag`)\cr draw the confidence interval around the Kaplan-Meier curve.
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control outlook of the Kaplan-Meier curve.
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of patient at risk
#'   matching the main grid of the Kaplan-Meier curve.
#' @param annot_surv_med (`flag`)\cr compute and add the annotation table on the Kaplan-Meier curve estimating the
#'   median survival time per group.
#' @param annot_coxph (`flag`)\cr add the annotation table from a [survival::coxph()] model.
#' @param annot_stats (`string`)\cr statistics annotations to add to the plot. Options are
#'   `median` (median survival follow-up time) and `min` (minimum survival follow-up time).
#' @param annot_stats_vlines (`flag`)\cr add vertical lines corresponding to each of the statistics
#'   specified by `annot_stats`. If `annot_stats` is `NULL` no lines will be added.
#' @param control_coxph_pw (`list`)\cr parameters for comparison details, specified by using
#'   the helper function [control_coxph()]. Some possible parameter options are:
#'   * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1.
#'     Default method is "log-rank", can also be set to "wald" or "likelihood".
#'   * `ties` (`string`)\cr method for tie handling. Default is "efron",
#'     can also be set to "breslow" or "exact". See more in [survival::coxph()]
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
#' @param position_coxph (`numeric`)\cr x and y positions for plotting [survival::coxph()] model.
#' @param position_surv_med (`numeric`)\cr x and y positions for plotting annotation table estimating median survival
#'   time per group.
#'
#' @return A `grob` of class `gTree`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(ggplot2)
#' library(survival)
#' library(grid)
#' library(nestcolor)
#'
#' df <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' variables <- list(tte = "AVAL", is_event = "is_event", arm = "ARMCD")
#'
#' # 1. Example - basic option
#'
#' res <- g_km(df = df, variables = variables)
#' res <- g_km(df = df, variables = variables, yval = "Failure")
#' res <- g_km(
#'   df = df,
#'   variables = variables,
#'   control_surv = control_surv_timepoint(conf_level = 0.9),
#'   col = c("grey25", "grey50", "grey75")
#' )
#' res <- g_km(df = df, variables = variables, ggtheme = theme_minimal())
#' res <- g_km(df = df, variables = variables, ggtheme = theme_minimal(), lty = 1:3)
#' res <- g_km(df = df, variables = variables, max = 2000)
#' res <- g_km(
#'   df = df,
#'   variables = variables,
#'   annot_stats = c("min", "median"),
#'   annot_stats_vlines = TRUE
#' )
#'
#' # 2. Example - Arrange several KM curve on a single graph device
#'
#' # 2.1 Use case: A general graph on the top, a zoom on the bottom.
#' grid.newpage()
#' lyt <- grid.layout(nrow = 2, ncol = 1) %>%
#'   viewport(layout = .) %>%
#'   pushViewport()
#'
#' res <- g_km(
#'   df = df, variables = variables, newpage = FALSE, annot_surv_med = FALSE,
#'   vp = viewport(layout.pos.row = 1, layout.pos.col = 1)
#' )
#' res <- g_km(
#'   df = df, variables = variables, max = 1000, newpage = FALSE, annot_surv_med = FALSE,
#'   ggtheme = theme_dark(),
#'   vp = viewport(layout.pos.row = 2, layout.pos.col = 1)
#' )
#'
#' # 2.1 Use case: No annotations on top, annotated graph on bottom
#' grid.newpage()
#' lyt <- grid.layout(nrow = 2, ncol = 1) %>%
#'   viewport(layout = .) %>%
#'   pushViewport()
#'
#' res <- g_km(
#'   df = df, variables = variables, newpage = FALSE,
#'   annot_surv_med = FALSE, annot_at_risk = FALSE,
#'   vp = viewport(layout.pos.row = 1, layout.pos.col = 1)
#' )
#' res <- g_km(
#'   df = df, variables = variables, max = 2000, newpage = FALSE, annot_surv_med = FALSE,
#'   annot_at_risk = TRUE,
#'   ggtheme = theme_dark(),
#'   vp = viewport(layout.pos.row = 2, layout.pos.col = 1)
#' )
#'
#' # Add annotation from a pairwise coxph analysis
#' g_km(
#'   df = df, variables = variables,
#'   annot_coxph = TRUE
#' )
#'
#' g_km(
#'   df = df, variables = c(variables, list(strat = "SEX")),
#'   font_size = 15,
#'   annot_coxph = TRUE,
#'   control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99),
#'   position_coxph = c(0.4, 0.5)
#' )
#'
#' # Change position of the treatment group annotation table.
#' g_km(
#'   df = df, variables = c(variables, list(strat = "SEX")),
#'   font_size = 15,
#'   annot_coxph = TRUE,
#'   control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99),
#'   position_surv_med = c(1, 0.7)
#' )
#' }
#'
#' @export
g_km <- function(df,
                 variables,
                 control_surv = control_surv_timepoint(),
                 col = NULL,
                 lty = NULL,
                 lwd = .5,
                 censor_show = TRUE,
                 pch = 3,
                 size = 2,
                 max_time = NULL,
                 xticks = NULL,
                 xlab = "Days",
                 yval = c("Survival", "Failure"),
                 ylab = paste(yval, "Probability"),
                 title = NULL,
                 footnotes = NULL,
                 draw = TRUE,
                 newpage = TRUE,
                 gp = NULL,
                 vp = NULL,
                 name = NULL,
                 font_size = 12,
                 ci_ribbon = FALSE,
                 ggtheme = nestcolor::theme_nest(),
                 annot_at_risk = TRUE,
                 annot_surv_med = TRUE,
                 annot_coxph = FALSE,
                 annot_stats = NULL,
                 annot_stats_vlines = FALSE,
                 control_coxph_pw = control_coxph(),
                 position_coxph = c(0, 0.05),
                 position_surv_med = c(0.9, 0.9)) {
  checkmate::assert_list(variables)
  checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables))
  checkmate::assert_string(title, null.ok = TRUE)
  checkmate::assert_string(footnotes, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_subset(annot_stats, c("median", "min"))
  checkmate::assert_logical(annot_stats_vlines)

  tte <- variables$tte
  is_event <- variables$is_event
  arm <- variables$arm

  assert_valid_factor(df[[arm]])
  assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm))
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(df[[tte]], min.len = 1, any.missing = FALSE)

  armval <- as.character(unique(df[[arm]]))
  if (length(armval) > 1) {
    armval <- NULL
  }
  yval <- match.arg(yval)
  formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm))
  fit_km <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = control_surv$conf_level,
    conf.type = control_surv$conf_type
  )
  data_plot <- h_data_plot(
    fit_km = fit_km,
    armval = armval,
    max_time = max_time
  )

  xticks <- h_xticks(data = data_plot, xticks = xticks, max_time = max_time)
  gg <- h_ggkm(
    data = data_plot,
    censor_show = censor_show,
    pch = pch,
    size = size,
    xticks = xticks,
    xlab = xlab,
    yval = yval,
    ylab = ylab,
    title = title,
    footnotes = footnotes,
    max_time = max_time,
    lwd = lwd,
    lty = lty,
    col = col,
    ggtheme = ggtheme,
    ci_ribbon = ci_ribbon
  )

  if (!is.null(annot_stats)) {
    if ("median" %in% annot_stats) {
      fit_km_all <- survival::survfit(
        formula = stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", 1)),
        data = df,
        conf.int = control_surv$conf_level,
        conf.type = control_surv$conf_type
      )
      gg <- gg +
        geom_text(
          size = 8 / ggplot2::.pt, col = 1,
          x = stats::median(fit_km_all) + 0.065 * max(data_plot$time),
          y = ifelse(yval == "Survival", 0.62, 0.38),
          label = paste("Median F/U:\n", round(stats::median(fit_km_all), 1), tolower(df$AVALU[1]))
        )
      if (annot_stats_vlines) {
        gg <- gg +
          geom_segment(aes(x = stats::median(fit_km_all), xend = stats::median(fit_km_all), y = -Inf, yend = Inf),
            linetype = 2, col = "darkgray"
          )
      }
    }
    if ("min" %in% annot_stats) {
      min_fu <- min(df[[tte]])
      gg <- gg +
        geom_text(
          size = 8 / ggplot2::.pt, col = 1,
          x = min_fu + max(data_plot$time) * ifelse(yval == "Survival", 0.05, 0.07),
          y = ifelse(yval == "Survival", 1.0, 0.05),
          label = paste("Min. F/U:\n", round(min_fu, 1), tolower(df$AVALU[1]))
        )
      if (annot_stats_vlines) {
        gg <- gg +
          geom_segment(aes(x = min_fu, xend = min_fu, y = Inf, yend = -Inf), linetype = 2, col = "darkgray")
      }
    }
    gg <- gg + ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(shape = NA, label = "")))
  }

  g_el <- h_decompose_gg(gg)

  if (annot_at_risk) {
    # This is the content of the table that will be below the graph.
    annot_tbl <- summary(fit_km, time = xticks)
    annot_tbl <- if (is.null(fit_km$strata)) {
      data.frame(
        n.risk = annot_tbl$n.risk,
        time = annot_tbl$time,
        strata = as.factor(armval)
      )
    } else {
      strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
      levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
      data.frame(
        n.risk = annot_tbl$n.risk,
        time = annot_tbl$time,
        strata = annot_tbl$strata
      )
    }

    grobs_patient <- h_grob_tbl_at_risk(
      data = data_plot,
      annot_tbl = annot_tbl,
      xlim = max(max_time, data_plot$time, xticks)
    )
  }

  if (annot_at_risk || annot_surv_med || annot_coxph) {
    lyt <- h_km_layout(
      data = data_plot, g_el = g_el, title = title, footnotes = footnotes, annot_at_risk = annot_at_risk
    )
    ttl_row <- as.numeric(!is.null(title))
    foot_row <- as.numeric(!is.null(footnotes))
    km_grob <- grid::gTree(
      vp = grid::viewport(layout = lyt, height = .95, width = .95),
      children = grid::gList(
        # Title.
        if (ttl_row == 1) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 2),
            children = grid::gList(grid::textGrob(label = title, x = grid::unit(0, "npc"), hjust = 0))
          )
        },

        # The Kaplan - Meier curve (top-right corner).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
          children = grid::gList(g_el$panel)
        ),

        # Survfit summary table (top-right corner).
        if (annot_surv_med) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
            children = h_grob_median_surv(
              fit_km = fit_km,
              armval = armval,
              x = position_surv_med[1],
              y = position_surv_med[2],
              ttheme = gridExtra::ttheme_default(base_size = font_size)
            )
          )
        },
        if (annot_coxph) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
            children = h_grob_coxph(
              df = df,
              variables = variables,
              control_coxph_pw = control_coxph_pw,
              x = position_coxph[1],
              y = position_coxph[2],
              ttheme = gridExtra::ttheme_default(
                base_size = font_size,
                padding = grid::unit(c(1, .5), "lines"),
                core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5))
              )
            )
          )
        },

        # Add the y-axis annotation (top-left corner).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 1),
          children = h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis)
        ),

        # Add the x-axis annotation (second row below the Kaplan Meier Curve).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 2 + ttl_row, layout.pos.col = 2),
          children = grid::gList(rbind(g_el$xaxis, g_el$xlab))
        ),

        # Add the legend.
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 3 + ttl_row, layout.pos.col = 2),
          children = grid::gList(g_el$guide)
        ),

        # Add the table with patient-at-risk numbers.
        if (annot_at_risk) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 4 + ttl_row, layout.pos.col = 2),
            children = grobs_patient$at_risk
          )
        },
        if (annot_at_risk) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 4 + ttl_row, layout.pos.col = 1),
            children = grobs_patient$label
          )
        },
        if (annot_at_risk) {
          # Add the x-axis for the table.
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 5 + ttl_row, layout.pos.col = 2),
            children = grid::gList(rbind(g_el$xaxis, g_el$xlab))
          )
        },

        # Footnotes.
        if (foot_row == 1) {
          grid::gTree(
            vp = grid::viewport(
              layout.pos.row = ifelse(annot_at_risk, 6 + ttl_row, 4 + ttl_row),
              layout.pos.col = 2
            ),
            children = grid::gList(grid::textGrob(label = footnotes, x = grid::unit(0, "npc"), hjust = 0))
          )
        }
      )
    )

    result <- grid::gTree(
      vp = vp,
      gp = gp,
      name = name,
      children = grid::gList(km_grob)
    )
  } else {
    result <- grid::gTree(
      vp = vp,
      gp = gp,
      name = name,
      children = grid::gList(ggplot2::ggplotGrob(gg))
    )
  }

  if (newpage && draw) grid::grid.newpage()
  if (draw) grid::grid.draw(result)
  invisible(result)
}

#' Helper function: tidy survival fit
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convert the survival fit data into a data frame designed for plotting
#' within `g_km`.
#'
#' This starts from the [broom::tidy()] result, and then:
#'   * Post-processes the `strata` column into a factor.
#'   * Extends each stratum by an additional first row with time 0 and probability 1 so that
#'     downstream plot lines start at those coordinates.
#'   * Adds a `censor` column.
#'   * Filters the rows before `max_time`.
#'
#' @inheritParams g_km
#' @param fit_km (`survfit`)\cr result of [survival::survfit()].
#' @param armval (`string`)\cr used as strata name when treatment arm variable only has one level. Default is "All".
#'
#' @return A `tibble` with columns `time`, `n.risk`, `n.event`, `n.censor`, `estimate`, `std.error`, `conf.high`,
#'   `conf.low`, `strata`, and `censor`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#'
#' # Test with multiple arms
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot()
#'
#' # Test with single arm
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS", ARMCD == "ARM B") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot(armval = "ARM B")
#' }
#'
#' @export
h_data_plot <- function(fit_km,
                        armval = "All",
                        max_time = NULL) {
  y <- broom::tidy(fit_km)

  if (!is.null(fit_km$strata)) {
    fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals")
    strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2)
    strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals")
    y$strata <- factor(
      vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2),
      levels = strata_levels
    )
  } else {
    y$strata <- armval
  }

  y_by_strata <- split(y, y$strata)
  y_by_strata_extended <- lapply(
    y_by_strata,
    FUN = function(tbl) {
      first_row <- tbl[1L, ]
      first_row$time <- 0
      first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")])
      first_row$n.event <- first_row$n.censor <- 0
      first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1
      first_row$std.error <- 0
      rbind(
        first_row,
        tbl
      )
    }
  )
  y <- do.call(rbind, y_by_strata_extended)

  y$censor <- ifelse(y$n.censor > 0, y$estimate, NA)
  if (!is.null(max_time)) {
    y <- y[y$time <= max(max_time), ]
  }
  y
}

#' Helper function: x tick positions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Calculate the positions of ticks on the x-axis. However, if `xticks` already
#' exists it is kept as is. It is based on the same function `ggplot2` relies on,
#' and is required in the graphic and the patient-at-risk annotation table.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#'
#' @return A vector of positions to use for x-axis ticks on a `ggplot` object.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#'
#' data <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot()
#'
#' h_xticks(data)
#' h_xticks(data, xticks = seq(0, 3000, 500))
#' h_xticks(data, xticks = 500)
#' h_xticks(data, xticks = 500, max_time = 6000)
#' h_xticks(data, xticks = c(0, 500), max_time = 300)
#' h_xticks(data, xticks = 500, max_time = 300)
#' }
#'
#' @export
h_xticks <- function(data, xticks = NULL, max_time = NULL) {
  if (is.null(xticks)) {
    if (is.null(max_time)) {
      labeling::extended(range(data$time)[1], range(data$time)[2], m = 5)
    } else {
      labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5)
    }
  } else if (checkmate::test_number(xticks)) {
    if (is.null(max_time)) {
      seq(0, max(data$time), xticks)
    } else {
      seq(0, max(data$time, max_time), xticks)
    }
  } else if (is.numeric(xticks)) {
    xticks
  } else {
    stop(
      paste(
        "xticks should be either `NULL`",
        "or a single number (interval between x ticks)",
        "or a numeric vector (position of ticks on the x axis)"
      )
    )
  }
}

#' Helper function: KM plot
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Draw the Kaplan-Meier plot using `ggplot2`.
#'
#' @inheritParams g_km
#' @param data (`data.frame`)\cr survival data as pre-processed by `h_data_plot`.
#'
#' @return A `ggplot` object.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks,
#'   xlab = "Days",
#'   yval = "Survival",
#'   ylab = "Survival Probability",
#'   title = "Survival"
#' )
#' gg
#' }
#'
#' @export
h_ggkm <- function(data,
                   xticks = NULL,
                   yval = "Survival",
                   censor_show,
                   xlab,
                   ylab,
                   title,
                   footnotes = NULL,
                   max_time = NULL,
                   lwd = 1,
                   lty = NULL,
                   pch = 3,
                   size = 2,
                   col = NULL,
                   ci_ribbon = FALSE,
                   ggtheme = nestcolor::theme_nest()) {
  checkmate::assert_numeric(lty, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  # change estimates of survival to estimates of failure (1 - survival)
  if (yval == "Failure") {
    data$estimate <- 1 - data$estimate
    data[c("conf.high", "conf.low")] <- list(1 - data$conf.low, 1 - data$conf.high)
    data$censor <- 1 - data$censor
  }

  gg <- {
    ggplot2::ggplot(
      data = data,
      mapping = ggplot2::aes(
        x = .data[["time"]],
        y = .data[["estimate"]],
        ymin = .data[["conf.low"]],
        ymax = .data[["conf.high"]],
        color = .data[["strata"]],
        fill = .data[["strata"]]
      )
    ) +
      ggplot2::geom_hline(yintercept = 0)
  }

  if (ci_ribbon) {
    gg <- gg + ggplot2::geom_ribbon(alpha = .3, lty = 0)
  }

  gg <- if (is.null(lty)) {
    gg +
      ggplot2::geom_step(linewidth = lwd)
  } else if (checkmate::test_number(lty)) {
    gg +
      ggplot2::geom_step(linewidth = lwd, lty = lty)
  } else if (is.numeric(lty)) {
    gg +
      ggplot2::geom_step(mapping = ggplot2::aes(linetype = .data[["strata"]]), linewidth = lwd) +
      ggplot2::scale_linetype_manual(values = lty)
  }

  gg <- gg +
    ggplot2::coord_cartesian(ylim = c(0, 1)) +
    ggplot2::labs(x = xlab, y = ylab, title = title, caption = footnotes)

  if (!is.null(col)) {
    gg <- gg +
      ggplot2::scale_color_manual(values = col) +
      ggplot2::scale_fill_manual(values = col)
  }
  if (censor_show) {
    dt <- data[data$n.censor != 0, ]
    dt$censor_lbl <- factor("Censored")

    gg <- gg + ggplot2::geom_point(
      data = dt,
      ggplot2::aes(
        x = .data[["time"]],
        y = .data[["censor"]],
        shape = .data[["censor_lbl"]]
      ),
      size = size,
      show.legend = TRUE,
      inherit.aes = TRUE
    ) +
      ggplot2::scale_shape_manual(name = NULL, values = pch) +
      ggplot2::guides(
        shape = ggplot2::guide_legend(override.aes = list(linetype = NA)),
        fill = ggplot2::guide_legend(override.aes = list(shape = NA))
      )
  }

  if (!is.null(max_time) && !is.null(xticks)) {
    gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))))
  } else if (!is.null(xticks)) {
    if (max(data$time) <= max(xticks)) {
      gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)))
    } else {
      gg <- gg + ggplot2::scale_x_continuous(breaks = xticks)
    }
  } else if (!is.null(max_time)) {
    gg <- gg + ggplot2::scale_x_continuous(limits = c(0, max_time))
  }

  if (!is.null(ggtheme)) {
    gg <- gg + ggtheme
  }

  gg + ggplot2::theme(
    legend.position = "bottom",
    legend.title = ggplot2::element_blank(),
    panel.grid.major.x = ggplot2::element_line(linewidth = 2)
  )
}

#' `ggplot` Decomposition
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The elements composing the `ggplot` are extracted and organized in a `list`.
#'
#' @param gg (`ggplot`)\cr a graphic to decompose.
#'
#' @return A named `list` with elements:
#'   * `panel`: The panel.
#'   * `yaxis`: The y-axis.
#'   * `xaxis`: The x-axis.
#'   * `xlab`: The x-axis label.
#'   * `ylab`: The y-axis label.
#'   * `guide`: The legend.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   yval = "Survival",
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt",
#'   footnotes = "ff"
#' )
#'
#' g_el <- h_decompose_gg(gg)
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "red", fill = "gray85", lwd = 5))
#' grid::grid.draw(g_el$panel)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "royalblue", fill = "gray85", lwd = 5))
#' grid::grid.draw(with(g_el, cbind(ylab, yaxis)))
#' }
#'
#' @export
h_decompose_gg <- function(gg) {
  g_el <- ggplot2::ggplotGrob(gg)
  y <- c(
    panel = "panel",
    yaxis = "axis-l",
    xaxis = "axis-b",
    xlab = "xlab-b",
    ylab = "ylab-l",
    guide = "guide"
  )
  lapply(X = y, function(x) gtable::gtable_filter(g_el, x))
}

#' Helper: KM Layout
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares a (5 rows) x (2 cols) layout for the Kaplan-Meier curve.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#' @param g_el (`list` of `gtable`)\cr list as obtained by `h_decompose_gg()`.
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of
#'   patient at risk matching the main grid of the Kaplan-Meier curve.
#'
#' @return A grid layout.
#'
#' @details The layout corresponds to a grid of two columns and five rows of unequal dimensions. Most of the
#'   dimension are fixed, only the curve is flexible and will accommodate with the remaining free space.
#'   * The left column gets the annotation of the `ggplot` (y-axis) and the names of the strata for the patient
#'     at risk tabulation. The main constraint is about the width of the columns which must allow the writing of
#'     the strata name.
#'   * The right column receive the `ggplot`, the legend, the x-axis and the patient at risk table.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt", footnotes = "ff", yval = "Survival"
#' )
#' g_el <- h_decompose_gg(gg)
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
#' grid.show.layout(lyt)
#' }
#'
#' @export
h_km_layout <- function(data, g_el, title, footnotes, annot_at_risk = TRUE) {
  txtlines <- levels(as.factor(data$strata))
  nlines <- nlevels(as.factor(data$strata))
  col_annot_width <- max(
    c(
      as.numeric(grid::convertX(g_el$yaxis$width + g_el$ylab$width, "pt")),
      as.numeric(
        grid::convertX(
          grid::stringWidth(txtlines) + grid::unit(7, "pt"), "pt"
        )
      )
    )
  )

  ttl_row <- as.numeric(!is.null(title))
  foot_row <- as.numeric(!is.null(footnotes))
  no_tbl_ind <- c()
  ht_x <- c()
  ht_units <- c()

  if (ttl_row == 1) {
    no_tbl_ind <- c(no_tbl_ind, TRUE)
    ht_x <- c(ht_x, 2)
    ht_units <- c(ht_units, "lines")
  }

  no_tbl_ind <- c(no_tbl_ind, rep(TRUE, 3), rep(FALSE, 2))
  ht_x <- c(
    ht_x,
    1,
    grid::convertX(with(g_el, xaxis$height + ylab$width), "pt"),
    grid::convertX(g_el$guide$heights, "pt"),
    nlines + 1,
    grid::convertX(with(g_el, xaxis$height + ylab$width), "pt")
  )
  ht_units <- c(
    ht_units,
    "null",
    "pt",
    "pt",
    "lines",
    "pt"
  )

  if (foot_row == 1) {
    no_tbl_ind <- c(no_tbl_ind, TRUE)
    ht_x <- c(ht_x, 1)
    ht_units <- c(ht_units, "lines")
  }

  no_at_risk_tbl <- if (annot_at_risk) {
    rep(TRUE, 5 + ttl_row + foot_row)
  } else {
    no_tbl_ind
  }

  grid::grid.layout(
    nrow = sum(no_at_risk_tbl), ncol = 2,
    widths = grid::unit(c(col_annot_width, 1), c("pt", "null")),
    heights = grid::unit(
      x = ht_x[no_at_risk_tbl],
      units = ht_units[no_at_risk_tbl]
    )
  )
}

#' Helper: Patient-at-Risk Grobs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Two graphical objects are obtained, one corresponding to row labeling and
#' the second to the number of patient at risk.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#' @param annot_tbl (`data.frame`)\cr annotation as prepared by [survival::summary.survfit()] which
#'   includes the number of patients at risk at given time points.
#' @param xlim (`numeric`)\cr the maximum value on the x-axis (used to
#'   ensure the at risk table aligns with the KM graph).
#'
#' @return A named `list` of two `gTree` objects: `at_risk` and `label`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#'
#' data_plot <- h_data_plot(fit_km = fit_km)
#'
#' xticks <- h_xticks(data = data_plot)
#'
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt", footnotes = "ff", yval = "Survival"
#' )
#'
#' # The annotation table reports the patient at risk for a given strata and
#' # time (`xticks`).
#' annot_tbl <- summary(fit_km, time = xticks)
#' if (is.null(fit_km$strata)) {
#'   annot_tbl <- with(annot_tbl, data.frame(n.risk = n.risk, time = time, strata = "All"))
#' } else {
#'   strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
#'   levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
#'   annot_tbl <- data.frame(
#'     n.risk = annot_tbl$n.risk,
#'     time = annot_tbl$time,
#'     strata = annot_tbl$strata
#'   )
#' }
#'
#' # The annotation table is transformed into a grob.
#' tbl <- h_grob_tbl_at_risk(data = data_plot, annot_tbl = annot_tbl, xlim = max(xticks))
#'
#' # For the representation, the layout is estimated for which the decomposition
#' # of the graphic element is necessary.
#' g_el <- h_decompose_gg(gg)
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
#'
#' grid::grid.newpage()
#' pushViewport(viewport(layout = lyt, height = .95, width = .95))
#' grid.rect(gp = grid::gpar(lty = 1, col = "purple", fill = "gray85", lwd = 1))
#' pushViewport(viewport(layout.pos.row = 4, layout.pos.col = 2))
#' grid.rect(gp = grid::gpar(lty = 1, col = "orange", fill = "gray85", lwd = 1))
#' grid::grid.draw(tbl$at_risk)
#' popViewport()
#' pushViewport(viewport(layout.pos.row = 4, layout.pos.col = 1))
#' grid.rect(gp = grid::gpar(lty = 1, col = "green3", fill = "gray85", lwd = 1))
#' grid::grid.draw(tbl$label)
#' }
#'
#' @export
h_grob_tbl_at_risk <- function(data, annot_tbl, xlim) {
  txtlines <- levels(as.factor(data$strata))
  nlines <- nlevels(as.factor(data$strata))
  y_str_unit <- as.numeric(annot_tbl$strata)
  vp_table <- grid::plotViewport(margins = grid::unit(c(0, 0, 0, 0), "lines"))
  gb_table_left_annot <- grid::gList(
    grid::rectGrob(
      x = 0, y = grid::unit(c(1:nlines) - 1, "lines"),
      gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
      height = grid::unit(1, "lines"), just = "bottom", hjust = 0
    ),
    grid::textGrob(
      label = unique(annot_tbl$strata),
      x = .95,
      y = grid::unit(
        (max(unique(y_str_unit)) - unique(y_str_unit)) + .5,
        "native"
      ),
      hjust = 1,
      gp = grid::gpar(fontface = "italic", fontsize = 10)
    )
  )
  gb_patient_at_risk <- grid::gList(
    grid::rectGrob(
      x = 0, y = grid::unit(c(1:nlines) - 1, "lines"),
      gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
      height = grid::unit(1, "lines"), just = "bottom", hjust = 0
    ),
    grid::textGrob(
      label = annot_tbl$n.risk,
      x = grid::unit(annot_tbl$time, "native"),
      y = grid::unit(
        (max(y_str_unit) - y_str_unit) + .5,
        "line"
      ) # maybe native
    )
  )

  list(
    at_risk = grid::gList(
      grid::gTree(
        vp = vp_table,
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = c(0, xlim) + c(-0.05, 0.05) * xlim,
              yscale = c(0, nlines + 1),
              extension = c(0.05, 0)
            ),
            children = grid::gList(gb_patient_at_risk)
          )
        )
      )
    ),
    label = grid::gList(
      grid::gTree(
        vp = grid::viewport(width = max(grid::stringWidth(txtlines))),
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = 0:1,
              yscale = c(0, nlines + 1),
              extension = c(0.0, 0)
            ),
            children = grid::gList(gb_table_left_annot)
          )
        )
      )
    )
  )
}

#' Helper Function: Survival Estimations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Transform a survival fit to a table with groups in rows characterized by N, median and confidence interval.
#'
#' @inheritParams h_data_plot
#'
#' @return A summary table with statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#'
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "OS")
#' fit <- survfit(
#'   form = Surv(AVAL, 1 - CNSR) ~ ARMCD,
#'   data = adtte
#' )
#' h_tbl_median_surv(fit_km = fit)
#' }
#'
#' @export
h_tbl_median_surv <- function(fit_km, armval = "All") {
  y <- if (is.null(fit_km$strata)) {
    as.data.frame(t(summary(fit_km)$table), row.names = armval)
  } else {
    tbl <- summary(fit_km)$table
    rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals")
    rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2]
    as.data.frame(tbl)
  }
  conf.int <- summary(fit_km)$conf.int # nolint
  y$records <- round(y$records)
  y$median <- signif(y$median, 4)
  y$`CI` <- paste0(
    "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")"
  )
  stats::setNames(
    y[c("records", "median", "CI")],
    c("N", "Median", f_conf_level(conf.int))
  )
}

#' Helper Function: Survival Estimation Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The survival fit is transformed in a grob containing a table with groups in
#' rows characterized by N, median and 95% confidence interval.
#'
#' @inheritParams g_km
#' @inheritParams h_data_plot
#' @param ttheme (`list`)\cr see [gridExtra::ttheme_default()].
#' @param x (`numeric`)\cr a value between 0 and 1 specifying x-location.
#' @param y (`numeric`)\cr a value between 0 and 1 specifying y-location.
#'
#' @return A `grob` of a table containing statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_grob_median_surv() %>%
#'   grid::grid.draw()
#' }
#'
#' @export
h_grob_median_surv <- function(fit_km,
                               armval = "All",
                               x = 0.9,
                               y = 0.9,
                               ttheme = gridExtra::ttheme_default()) {
  data <- h_tbl_median_surv(fit_km, armval = armval)
  gt <- gridExtra::tableGrob(d = data, theme = ttheme)
  vp <- grid::viewport(
    x = grid::unit(x, "npc") + grid::unit(1, "lines"),
    y = grid::unit(y, "npc") + grid::unit(1.5, "lines"),
    height = sum(gt$heights),
    width = sum(gt$widths),
    just = c("right", "top")
  )

  grid::gList(
    grid::gTree(
      vp = vp,
      children = grid::gList(gt)
    )
  )
}

#' Helper: Grid Object with y-axis Annotation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Build the y-axis annotation from a decomposed `ggplot`.
#'
#' @param ylab (`gtable`)\cr the y-lab as a graphical object derived from a `ggplot`.
#' @param yaxis (`gtable`)\cr the y-axis as a graphical object derived from a `ggplot`.
#'
#' @return a `gTree` object containing the y-axis annotation from a `ggplot`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "title", footnotes = "footnotes", yval = "Survival"
#' )
#'
#' g_el <- h_decompose_gg(gg)
#'
#' grid::grid.newpage()
#' pvp <- grid::plotViewport(margins = c(5, 4, 2, 20))
#' pushViewport(pvp)
#' grid::grid.draw(h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis))
#' grid.rect(gp = grid::gpar(lty = 1, col = "gray35", fill = NA))
#' }
#'
#' @export
h_grob_y_annot <- function(ylab, yaxis) {
  grid::gList(
    grid::gTree(
      vp = grid::viewport(
        width = grid::convertX(yaxis$width + ylab$width, "pt"),
        x = grid::unit(1, "npc"),
        just = "right"
      ),
      children = grid::gList(cbind(ylab, yaxis))
    )
  )
}

#' Helper Function: Pairwise CoxPH table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Create a `data.frame` of pairwise stratified or unstratified CoxPH analysis results.
#'
#' @inheritParams g_km
#'
#' @return A `data.frame` containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
#'   and `p-value (log-rank)`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#'
#' h_tbl_coxph_pairwise(
#'   df = adtte,
#'   variables = list(tte = "AVAL", is_event = "is_event", arm = "ARM"),
#'   control_coxph_pw = control_coxph(conf_level = 0.9)
#' )
#' }
#'
#' @export
h_tbl_coxph_pairwise <- function(df,
                                 variables,
                                 control_coxph_pw = control_coxph()) {
  assert_df_with_variables(df, variables)
  arm <- variables$arm
  df[[arm]] <- factor(df[[arm]])
  ref_group <- levels(df[[arm]])[1]
  comp_group <- levels(df[[arm]])[-1]
  results <- Map(function(comp) {
    res <- s_coxph_pairwise(
      df = df[df[[arm]] == comp, , drop = FALSE],
      .ref_group = df[df[[arm]] == ref_group, , drop = FALSE],
      .in_ref_col = FALSE,
      .var = variables$tte,
      is_event = variables$is_event,
      strat = variables$strat,
      control = control_coxph_pw
    )
    res_df <- data.frame(
      hr = format(round(res$hr, 2), nsmall = 2),
      hr_ci = paste0(
        "(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ",
        format(round(res$hr_ci[2], 2), nsmall = 2), ")"
      ),
      pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4),
      stringsAsFactors = FALSE
    )
    colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character"))
    row.names(res_df) <- comp
    res_df
  }, comp_group)
  do.call(rbind, results)
}

#' Helper Function: CoxPH Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Grob of `rtable` output from [h_tbl_coxph_pairwise()]
#'
#' @inheritParams h_grob_median_surv
#' @param ... arguments will be passed to [h_tbl_coxph_pairwise()].
#' @param x (`numeric`)\cr a value between 0 and 1 specifying x-location.
#' @param y (`numeric`)\cr a value between 0 and 1 specifying y-location.
#'
#' @return A `grob` of a table containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
#'   and `p-value (log-rank)`.
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
#' data <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' tbl_grob <- h_grob_coxph(
#'   df = data,
#'   variables = list(tte = "AVAL", is_event = "is_event", arm = "ARMCD"),
#'   control_coxph_pw = control_coxph(conf_level = 0.9), x = 0.5, y = 0.5
#' )
#' grid::grid.draw(tbl_grob)
#' }
#'
#' @export
h_grob_coxph <- function(...,
                         x = 0,
                         y = 0,
                         ttheme = gridExtra::ttheme_default(
                           base_size = 12,
                           padding = grid::unit(c(1, .5), "lines"),
                           core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5))
                         )) {
  data <- h_tbl_coxph_pairwise(...)
  tryCatch(
    expr = {
      gt <- gridExtra::tableGrob(d = data, theme = ttheme) # ERROR 'data' must be of a vector type, was 'NULL'
      vp <- grid::viewport(
        x = grid::unit(x, "npc") + grid::unit(1, "lines"),
        y = grid::unit(y, "npc") + grid::unit(1.5, "lines"),
        height = sum(gt$heights),
        width = sum(gt$widths),
        just = c("left", "bottom")
      )
      grid::gList(
        grid::gTree(
          vp = vp,
          children = grid::gList(gt)
        )
      )
    },
    error = function(w) {
      message(paste(
        "Warning: Cox table will not be displayed as there is",
        "not any level to be compared in the arm variable."
      ))
      return(
        grid::gList(
          grid::gTree(
            vp = NULL,
            children = NULL
          )
        )
      )
    }
  )
}

#' Cox Regression Helper: Interactions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Test and estimate the effect of a treatment in interaction with a covariate.
#' The effect is estimated as the HR of the tested treatment for a given level
#' of the covariate, in comparison to the treatment control.
#'
#' @inheritParams argument_convention
#' @param x (`numeric` or `factor`)\cr the values of the effect to be tested.
#' @param effect (`string`)\cr the name of the effect to be tested and estimated.
#' @param covar (`string`)\cr the name of the covariate in the model.
#' @param mod (`coxph`)\cr the Cox regression model.
#' @param label (`string`)\cr the label to be returned as `term_label`.
#' @param control (`list`)\cr a list of controls as returned by [control_coxreg()].
#' @param ... see methods.
#'
#' @examples
#' library(survival)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' # Testing dataset [survival::bladder].
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4,
#'       labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' plot(
#'   survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name cox_regression_inter
NULL

#' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect.
#'
#' @return
#' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following
#'   variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`.
#'
#' @export
h_coxreg_inter_effect <- function(x,
                                  effect,
                                  covar,
                                  mod,
                                  label,
                                  control,
                                  ...) {
  UseMethod("h_coxreg_inter_effect", x)
}

#' @describeIn cox_regression_inter Estimate the interaction with a `numeric` covariate.
#'
#' @param at (`list`)\cr a list with items named after the covariate, every
#'   item is a vector of levels at which the interaction should be estimated.
#'
#' @export
h_coxreg_inter_effect.numeric <- function(x,
                                          effect,
                                          covar,
                                          mod,
                                          label,
                                          control,
                                          at,
                                          ...) {
  betas <- stats::coef(mod)
  attrs <- attr(stats::terms(mod), "term.labels")
  term_indices <- grep(
    pattern = effect,
    x = attrs[!grepl("strata\\(", attrs)]
  )
  checkmate::assert_vector(term_indices, len = 2)
  betas <- betas[term_indices]
  betas_var <- diag(stats::vcov(mod))[term_indices]
  betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]]
  xval <- if (is.null(at[[covar]])) {
    stats::median(x)
  } else {
    at[[covar]]
  }
  effect_index <- !grepl(covar, names(betas))
  coef_hat <- betas[effect_index] + xval * betas[!effect_index]
  coef_se <- sqrt(
    betas_var[effect_index] +
      xval ^ 2 * betas_var[!effect_index] + # styler: off
      2 * xval * betas_cov
  )
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  data.frame(
    effect = "Covariate:",
    term = rep(covar, length(xval)),
    term_label = paste0("  ", xval),
    level = as.character(xval),
    n = NA,
    hr = exp(coef_hat),
    lcl = exp(coef_hat - q_norm * coef_se),
    ucl = exp(coef_hat + q_norm * coef_se),
    pval = NA,
    pval_inter = NA,
    stringsAsFactors = FALSE
  )
}

#' @describeIn cox_regression_inter Estimate the interaction with a `factor` covariate.
#'
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
#'
#' @export
h_coxreg_inter_effect.factor <- function(x,
                                         effect,
                                         covar,
                                         mod,
                                         label,
                                         control,
                                         data,
                                         ...) {
  y <- h_coxreg_inter_estimations(
    variable = effect, given = covar,
    lvl_var = levels(data[[effect]]),
    lvl_given = levels(data[[covar]]),
    mod = mod,
    conf_level = 0.95
  )[[1]]

  data.frame(
    effect = "Covariate:",
    term = rep(covar, nrow(y)),
    term_label = as.character(paste0("  ", levels(data[[covar]]))),
    level = as.character(levels(data[[covar]])),
    n = NA,
    hr = y[, "hr"],
    lcl = y[, "lcl"],
    ucl = y[, "ucl"],
    pval = NA,
    pval_inter = NA,
    stringsAsFactors = FALSE
  )
}

#' @describeIn cox_regression_inter A higher level function to get
#'   the results of the interaction test and the estimated values.
#'
#' @return
#' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If
#'   no interaction, [h_coxreg_univar_extract()] is applied instead.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
#' h_coxreg_extract_interaction(
#'   mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder,
#'   control = control_coxreg()
#' )
#'
#' @export
h_coxreg_extract_interaction <- function(effect,
                                         covar,
                                         mod,
                                         data,
                                         at,
                                         control) {
  if (!any(attr(stats::terms(mod), "order") == 2)) {
    y <- h_coxreg_univar_extract(
      effect = effect, covar = covar, mod = mod, data = data, control = control
    )
    y$pval_inter <- NA
    y
  } else {
    test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]

    # Test the main treatment effect.
    mod_aov <- muffled_car_anova(mod, test_statistic)
    sum_anova <- broom::tidy(mod_aov)
    pval <- sum_anova[sum_anova$term == effect, ][["p.value"]]

    # Test the interaction effect.
    pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]]
    covar_test <- data.frame(
      effect = "Covariate:",
      term = covar,
      term_label = unname(labels_or_names(data[covar])),
      level = "",
      n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval,
      pval_inter = pval_inter,
      stringsAsFactors = FALSE
    )
    # Estimate the interaction.
    y <- h_coxreg_inter_effect(
      data[[covar]],
      covar = covar,
      effect = effect,
      mod = mod,
      label = unname(labels_or_names(data[covar])),
      at = at,
      control = control,
      data = data
    )
    rbind(covar_test, y)
  }
}

#' @describeIn cox_regression_inter Hazard ratio estimation in interactions.
#'
#' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation
#'   of the levels of `variable` given the levels of `given`.
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels has given by [levels()].
#' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]).
#'
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
#'   and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex.
#'   The cox regression estimates the coefficients along with a variance-covariance matrix for:
#'
#'   - b1 (arm b), b2 (arm c)
#'   - b3 (sex m)
#'   - b4 (arm b: sex m), b5 (arm c: sex m)
#'
#'   The estimation of the Hazard Ratio for arm C/sex M is given in reference
#'   to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5).
#'   The interaction coefficient is deduced by b2 + b5 while the standard error
#'   is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$.
#'
#' @return
#' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding
#'   to the combinations of `variable` and `given`, with columns:
#'   * `coef_hat`: Estimation of the coefficient.
#'   * `coef_se`: Standard error of the estimation.
#'   * `hr`: Hazard ratio.
#'   * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
#' result <- h_coxreg_inter_estimations(
#'   variable = "armcd", given = "covar1",
#'   lvl_var = levels(dta_bladder$armcd),
#'   lvl_given = levels(dta_bladder$covar1),
#'   mod = mod, conf_level = .95
#' )
#' result
#'
#' @export
h_coxreg_inter_estimations <- function(variable,
                                       given,
                                       lvl_var,
                                       lvl_given,
                                       mod,
                                       conf_level = 0.95) {
  var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
  giv_lvl <- paste0(given, lvl_given)
  design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
  design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
  design_mat <- within(
    data = design_mat,
    expr = {
      inter <- paste0(variable, ":", given)
      rev_inter <- paste0(given, ":", variable)
    }
  )
  split_by_variable <- design_mat$variable
  interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")

  mmat <- stats::model.matrix(mod)[1, ]
  mmat[!mmat == 0] <- 0

  design_mat <- apply(
    X = design_mat, MARGIN = 1, FUN = function(x) {
      mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
      mmat
    }
  )
  colnames(design_mat) <- interaction_names

  coef <- stats::coef(mod)
  vcov <- stats::vcov(mod)
  betas <- as.matrix(coef)
  coef_hat <- t(design_mat) %*% betas
  dimnames(coef_hat)[2] <- "coef"
  coef_se <- apply(
    design_mat, 2,
    function(x) {
      vcov_el <- as.logical(x)
      y <- vcov[vcov_el, vcov_el]
      y <- sum(y)
      y <- sqrt(y)
      return(y)
    }
  )
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  y <- cbind(coef_hat, `se(coef)` = coef_se)
  y <- apply(y, 1, function(x) {
    x["hr"] <- exp(x["coef"])
    x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
    x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
    x
  })
  y <- t(y)
  y <- by(y, split_by_variable, identity)
  y <- lapply(y, as.matrix)
  attr(y, "details") <- paste0(
    "Estimations of ", variable,
    " hazard ratio given the level of ", given, " compared to ",
    variable, " level ", lvl_var[1], "."
  )
  y
}

#' Count Patients with Marked Laboratory Abnormalities
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates whether single, replicated or last marked laboratory
#' abnormality was observed (`factor`). Additional analysis variables are `id` (`character` or `factor`)
#' and `direction` (`factor`) indicating the direction of the abnormality. Denominator is number of
#' patients with at least one valid measurement during the analysis.
#'   * For `Single, not last` and `Last or replicated`: Numerator is number of patients
#'     with `Single, not last` and `Last or replicated` levels, respectively.
#'   * For `Any`: Numerator is the number of patients with either single or
#'     replicated marked abnormalities.
#'
#' @inheritParams argument_convention
#' @param category (`list`)\cr with different marked category names for single
#'   and last or replicated.
#'
#' @note `Single, not last` and `Last or replicated` levels are mutually exclusive. If a patient has
#'   abnormalities that meet both the `Single, not last` and `Last or replicated` criteria, then the
#'   patient will be counted only under the `Last or replicated` category.
#'
#' @name abnormal_by_marked
NULL

#' @describeIn abnormal_by_marked Statistics function for patients with marked lab abnormalities.
#'
#' @return
#' * `s_count_abnormal_by_marked()` returns statistic `count_fraction` with `Single, not last`,
#'   `Last or replicated`, and `Any` results.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(rep(1, 5), rep(2, 5), rep(1, 5), rep(2, 5))),
#'   ARMCD = factor(c(rep("ARM A", 5), rep("ARM B", 5), rep("ARM A", 5), rep("ARM B", 5))),
#'   ANRIND = factor(c(
#'     "NORMAL", "HIGH", "HIGH", "HIGH HIGH", "HIGH",
#'     "HIGH", "HIGH", "HIGH HIGH", "NORMAL", "HIGH HIGH", "NORMAL", "LOW", "LOW", "LOW LOW", "LOW",
#'     "LOW", "LOW", "LOW LOW", "NORMAL", "LOW LOW"
#'   )),
#'   ONTRTFL = rep(c("", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), 2),
#'   PARAMCD = factor(c(rep("CRP", 10), rep("ALT", 10))),
#'   AVALCAT1 = factor(rep(c("", "", "", "SINGLE", "REPLICATED", "", "", "LAST", "", "SINGLE"), 2)),
#'   stringsAsFactors = FALSE
#' )
#'
#' df <- df %>%
#'   mutate(abn_dir = factor(
#'     case_when(
#'       ANRIND == "LOW LOW" ~ "Low",
#'       ANRIND == "HIGH HIGH" ~ "High",
#'       TRUE ~ ""
#'     ),
#'     levels = c("Low", "High")
#'   ))
#'
#' # Select only post-baseline records.
#' df <- df %>% filter(ONTRTFL == "Y")
#' df_crp <- df %>%
#'   filter(PARAMCD == "CRP") %>%
#'   droplevels()
#' full_parent_df <- list(df_crp, "not_needed")
#' cur_col_subset <- list(rep(TRUE, nrow(df_crp)), "not_needed")
#' spl_context <- data.frame(
#'   split = c("PARAMCD", "GRADE_DIR"),
#'   full_parent_df = I(full_parent_df),
#'   cur_col_subset = I(cur_col_subset)
#' )
#' # Internal function - s_count_abnormal_by_marked
#' \dontrun{
#' s_count_abnormal_by_marked(
#'   df = df_crp %>% filter(abn_dir == "High"),
#'   .spl_context = spl_context,
#'   .var = "AVALCAT1",
#'   variables = list(id = "USUBJID", param = "PARAMCD", direction = "abn_dir")
#' )
#' }
#'
#' @keywords internal
s_count_abnormal_by_marked <- function(df,
                                       .var = "AVALCAT1",
                                       .spl_context,
                                       category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")),
                                       variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir")) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_list(category)
  checkmate::assert_subset(names(category), c("single", "last_replicated"))
  checkmate::assert_subset(names(variables), c("id", "param", "direction"))
  checkmate::assert_vector(unique(df[[variables$direction]]), max.len = 1)

  assert_df_with_variables(df, c(aval = .var, variables))
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))


  first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
  # Patients in the denominator have at least one post-baseline visit.
  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
  # Some subjects may have a record for high and low directions but
  # should be counted only once.
  denom <- length(unique(subj_cur_col))

  if (denom != 0) {
    subjects_last_replicated <- unique(
      df[df[[.var]] %in% category[["last_replicated"]], variables$id, drop = TRUE]
    )
    subjects_single <- unique(
      df[df[[.var]] %in% category[["single"]], variables$id, drop = TRUE]
    )
    # Subjects who have both single and last/replicated abnormalities are counted in only the last/replicated group.
    subjects_single <- setdiff(subjects_single, subjects_last_replicated)
    n_single <- length(subjects_single)
    n_last_replicated <- length(subjects_last_replicated)
    n_any <- n_single + n_last_replicated
    result <- list(count_fraction = list(
      "Single, not last" = c(n_single, n_single / denom),
      "Last or replicated" = c(n_last_replicated, n_last_replicated / denom),
      "Any Abnormality" = c(n_any, n_any / denom)
    ))
  } else {
    result <- list(count_fraction = list(
      "Single, not last" = c(0, 0),
      "Last or replicated" = c(0, 0),
      "Any Abnormality" = c(0, 0)
    ))
  }

  result
}

#' @describeIn abnormal_by_marked Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_marked()`.
#'
#' @return
#' * `a_count_abnormal_by_marked()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_abnormal_by_marked
#' \dontrun{
#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `count_fraction` first
#' # so that the `rtables` formatting function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_abnormal_by_marked, .ungroup_stats = "count_fraction")
#' afun(
#'   df = df_crp %>% filter(abn_dir == "High"),
#'   .spl_context = spl_context,
#'   variables = list(id = "USUBJID", param = "PARAMCD", direction = "abn_dir")
#' )
#' }
#'
#' @keywords internal
a_count_abnormal_by_marked <- make_afun(
  s_count_abnormal_by_marked,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn abnormal_by_marked Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_marked()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_marked()` to the table layout.
#'
#' @examples
#' map <- unique(
#'   df[df$abn_dir %in% c("Low", "High") & df$AVALCAT1 != "", c("PARAMCD", "abn_dir")]
#' ) %>%
#'   lapply(as.character) %>%
#'   as.data.frame() %>%
#'   arrange(PARAMCD, abn_dir)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAMCD") %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = "unique_count"
#'   ) %>%
#'   split_rows_by(
#'     "abn_dir",
#'     split_fun = trim_levels_to_map(map)
#'   ) %>%
#'   count_abnormal_by_marked(
#'     var = "AVALCAT1",
#'     variables = list(
#'       id = "USUBJID",
#'       param = "PARAMCD",
#'       direction = "abn_dir"
#'     )
#'   ) %>%
#'   build_table(df = df)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAMCD") %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = "unique_count"
#'   ) %>%
#'   split_rows_by(
#'     "abn_dir",
#'     split_fun = trim_levels_in_group("abn_dir")
#'   ) %>%
#'   count_abnormal_by_marked(
#'     var = "AVALCAT1",
#'     variables = list(
#'       id = "USUBJID",
#'       param = "PARAMCD",
#'       direction = "abn_dir"
#'     )
#'   ) %>%
#'   build_table(df = df)
#'
#' @export
count_abnormal_by_marked <- function(lyt,
                                     var,
                                     ...,
                                     .stats = NULL,
                                     .formats = NULL,
                                     .labels = NULL,
                                     .indent_mods = NULL) {
  checkmate::assert_string(var)

  afun <- make_afun(
    a_count_abnormal_by_marked,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    show_labels = "hidden",
    extra_args = c(list(...))
  )
  lyt
}

#' Helper Functions for Tabulating Biomarker Effects on Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions which are documented here separately to not confuse the user
#' when reading about the user-facing functions.
#'
#' @inheritParams response_biomarkers_subgroups
#' @inheritParams extract_rsp_biomarkers
#' @inheritParams argument_convention
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' @name h_response_biomarkers_subgroups
NULL

#' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list
#'   to the "logistic regression" variable list. The reason is that currently there is an
#'   inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`.
#'
#' @param biomarker (`string`)\cr the name of the biomarker variable.
#'
#' @return
#' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`.
#'
#' @examples
#' # This is how the variable list is converted internally.
#' h_rsp_to_logistic_variables(
#'   variables = list(
#'     rsp = "RSP",
#'     covariates = c("A", "B"),
#'     strat = "D"
#'   ),
#'   biomarker = "AGE"
#' )
#'
#' @export
h_rsp_to_logistic_variables <- function(variables, biomarker) {
  checkmate::assert_list(variables)
  checkmate::assert_string(variables$rsp)
  checkmate::assert_string(biomarker)
  list(
    response = variables$rsp,
    arm = biomarker,
    covariates = variables$covariates,
    strata = variables$strat
  )
}

#' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and
#'   overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple
#'   biomarkers in a given single data set.
#'   `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
#'   `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates`
#'   and `strat`.
#'
#' @return
#' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
#'
#' @examples
#' # For a single population, estimate separately the effects
#' # of two biomarkers.
#' df <- h_logistic_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX"
#'   ),
#'   data = adrs_f
#' )
#' df
#'
#' # If the data set is empty, still the corresponding rows with missings are returned.
#' h_coxreg_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strat = "STRATA1"
#'   ),
#'   data = adrs_f[NULL, ]
#' )
#'
#' @export
h_logistic_mult_cont_df <- function(variables,
                                    data,
                                    control = control_logistic()) {
  assert_df_with_variables(data, variables)

  checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  conf_level <- control[["conf_level"]]
  pval_label <- "p-value (Wald)"

  # If there is any data, run model, otherwise return empty results.
  if (nrow(data) > 0) {
    bm_cols <- match(variables$biomarkers, names(data))
    l_result <- lapply(variables$biomarkers, function(bm) {
      model_fit <- fit_logistic(
        variables = h_rsp_to_logistic_variables(variables, bm),
        data = data,
        response_definition = control$response_definition
      )
      result <- h_logistic_simple_terms(
        x = bm,
        fit_glm = model_fit,
        conf_level = control$conf_level
      )
      resp_vector <- if (inherits(model_fit, "glm")) {
        model_fit$model[[variables$rsp]]
      } else {
        as.logical(as.matrix(model_fit$y)[, "status"])
      }
      data.frame(
        # Dummy column needed downstream to create a nested header.
        biomarker = bm,
        biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
        n_tot = length(resp_vector),
        n_rsp = sum(resp_vector),
        prop = mean(resp_vector),
        or = as.numeric(result[1L, "odds_ratio"]),
        lcl = as.numeric(result[1L, "lcl"]),
        ucl = as.numeric(result[1L, "ucl"]),
        conf_level = conf_level,
        pval = as.numeric(result[1L, "pvalue"]),
        pval_label = pval_label,
        stringsAsFactors = FALSE
      )
    })
    do.call(rbind, args = c(l_result, make.row.names = FALSE))
  } else {
    data.frame(
      biomarker = variables$biomarkers,
      biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
      n_tot = 0L,
      n_rsp = 0L,
      prop = NA,
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      pval = NA,
      pval_label = pval_label,
      row.names = seq_along(variables$biomarkers),
      stringsAsFactors = FALSE
    )
  }
}

#' @describeIn h_response_biomarkers_subgroups prepares a single sub-table given a `df_sub` containing
#'   the results for a single biomarker.
#'
#' @param df (`data.frame`)\cr results for a single biomarker, as part of what is
#'   returned by [extract_rsp_biomarkers()] (it needs a couple of columns which are
#'   added by that high-level function relative to what is returned by [h_logistic_mult_cont_df()],
#'   see the example).
#'
#' @return
#' * `h_tab_rsp_one_biomarker()` returns an `rtables` table object with the given statistics arranged in columns.
#'
#' @examples
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
#' df1 <- df[1, ]
#' df1$subgroup <- "All patients"
#' df1$row_type <- "content"
#' df1$var <- "ALL"
#' df1$var_label <- "All patients"
#'
#' # Internal function - h_tab_rsp_one_biomarker
#' \dontrun{
#' h_tab_rsp_one_biomarker(
#'   df1,
#'   vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval")
#' )
#' }
#'
#' @export
h_tab_rsp_one_biomarker <- function(df,
                                    vars) {
  afuns <- a_response_subgroups()[vars]
  colvars <- d_rsp_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1]
  )
  h_tab_one_biomarker(
    df = df,
    afuns = afuns,
    colvars = colvars
  )
}

#' Tabulate Biomarker Effects on Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate the estimated effects of multiple continuous biomarker variables
#' on a binary response endpoint across population subgroups.
#'
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#'   [extract_rsp_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot`: Total number of patients per group.
#'   * `n_rsp`: Total number of responses per group.
#'   * `prop`: Total response proportion per group.
#'   * `or`: Odds ratio.
#'   * `ci`: Confidence interval of odds ratio.
#'   * `pval`: p-value of the effect.
#'   Note, the statistics `n_tot`, `or` and `ci` are required.
#'
#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. The tables are then typically used as input for forest plots.
#'
#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
#'   not start from an input layout `lyt`. This is because internally the table is
#'   created by combining multiple subtables.
#'
#' @seealso [h_tab_rsp_one_biomarker()] which is used internally, [extract_rsp_biomarkers()].
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#' \dontrun{
#' ## Table with default columns.
#' # df <- <need_data_input_to_work>
#' tabulate_rsp_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_rsp_biomarkers(
#'   df = df,
#'   vars = c("n_rsp", "ci", "n_tot", "prop", "or")
#' )
#'
#' ## Finally produce the forest plot.
#' g_forest(tab, xlim = c(0.7, 1.4))
#' }
#'
#' @export
#' @name response_biomarkers_subgroups
tabulate_rsp_biomarkers <- function(df,
                                    vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval")) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(df$biomarker)
  checkmate::assert_character(df$biomarker_label)
  checkmate::assert_subset(vars, c("n_tot", "n_rsp", "prop", "or", "ci", "pval"))

  df_subs <- split(df, f = df$biomarker)
  tabs <- lapply(df_subs, FUN = function(df_sub) {
    tab_sub <- h_tab_rsp_one_biomarker(
      df = df_sub,
      vars = vars
    )
    # Insert label row as first row in table.
    label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
    tab_sub
  })
  result <- do.call(rbind, tabs)

  n_id <- grep("n_tot", vars)
  or_id <- match("or", vars)
  ci_id <- match("ci", vars)
  structure(
    result,
    forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
    col_x = or_id,
    col_ci = ci_id,
    col_symbol_size = n_id
  )
}

#' Prepares Response Data Estimates for Multiple Biomarkers in a Single Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of responses, patients and overall response rate,
#' as well as odds ratio estimates, confidence intervals and p-values,
#' for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a
#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
#' biomarker variables) and optionally `covariates`, `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param control (named `list`)\cr controls for the response definition and the
#'   confidence level produced by [control_logistic()].
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
#'   `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#'   `var_label`, and `row_type`.
#'
#' @note You can also specify a continuous variable in `rsp` and then use the
#'   `response_definition` control to convert that internally to a logical
#'   variable reflecting binary response.
#'
#' @seealso [h_logistic_mult_cont_df()] which is used internally.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#'
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in logistic regression models with one covariate `RACE`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#' df <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adrs_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
#' # which is then binarized internally (response is defined as this variable
#' # being larger than 500).
#' df_grouped <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "EOSDY",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2",
#'     strat = "STRATA1"
#'   ),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   ),
#'   control = control_logistic(
#'     response_definition = "I(response > 500)"
#'   )
#' )
#' df_grouped
#'
#' @export
extract_rsp_biomarkers <- function(variables,
                                   data,
                                   groups_lists = list(),
                                   control = control_logistic(),
                                   label_all = "All Patients") {
  assert_list_of_variables(variables)
  checkmate::assert_string(variables$rsp)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_string(label_all)

  # Start with all patients.
  result_all <- h_logistic_mult_cont_df(
    variables = variables,
    data = data,
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"
  if (is.null(variables$subgroups)) {
    # Only return result for all patients.
    result_all
  } else {
    # Add subgroups results.
    l_data <- h_split_by_subgroups(
      data,
      variables$subgroups,
      groups_lists = groups_lists
    )
    l_result <- lapply(l_data, function(grp) {
      result <- h_logistic_mult_cont_df(
        variables = variables,
        data = grp$df,
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Compare Variables Between Groups
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Comparison with a reference group for different `x` objects.
#'
#' @inheritParams argument_convention
#'
#' @note
#' * For factor variables, `denom` for factor proportions can only be `n` since the purpose is to compare proportions
#'   between columns, therefore a row-based proportion would not make sense. Proportion based on `N_col` would
#'   be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted
#'   for as explicit factor levels.
#' * For character variables, automatic conversion to factor does not guarantee that the table
#'   will be generated correctly. In particular for sparse tables this very likely can fail.
#'   Therefore it is always better to manually convert character variables to factors during pre-processing.
#' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison
#'   is well defined.
#' * When factor variables contains `NA`, it is expected that `NA` values have been conveyed to `na_level`
#'   appropriately beforehand via [df_explicit_na()].
#'
#' @seealso Relevant constructor function [create_afun_compare()], and [s_summary()] which is used internally
#'   to compute a summary within `s_compare()`.
#'
#' @name compare_variables
#' @include summarize_variables.R
NULL

#' @describeIn compare_variables S3 generic function to produce a comparison summary.
#'
#' @return
#' * `s_compare()` returns output of [s_summary()] and comparisons versus the reference group in the form of p-values.
#'
#' @export
s_compare <- function(x,
                      .ref_group,
                      .in_ref_col,
                      ...) {
  UseMethod("s_compare", x)
}

#' @describeIn compare_variables Method for `numeric` class. This uses the standard t-test
#'   to calculate the p-value.
#'
#' @method s_compare numeric
#'
#' @examples
#' # `s_compare.numeric`
#'
#' ## Usual case where both this and the reference group vector have more than 1 value.
#' s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE)
#'
#' ## If one group has not more than 1 value, then p-value is not calculated.
#' s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE)
#'
#' ## Empty numeric does not fail, it returns NA-filled items and no p-value.
#' s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE)
#'
#' @export
s_compare.numeric <- function(x,
                              .ref_group,
                              .in_ref_col,
                              ...) {
  checkmate::assert_numeric(x)
  checkmate::assert_numeric(.ref_group)
  checkmate::assert_flag(.in_ref_col)

  y <- s_summary.numeric(x = x, ...)

  y$pval <- if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) {
    stats::t.test(x, .ref_group)$p.value
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Method for `factor` class. This uses the chi-squared test
#'   to calculate the p-value.
#'
#' @param denom (`string`)\cr choice of denominator for factor proportions,
#'   can only be `n` (number of values in this row and column intersection).
#'
#' @method s_compare factor
#'
#' @examples
#' # `s_compare.factor`
#'
#' ## Basic usage:
#' x <- factor(c("a", "a", "b", "c", "a"))
#' y <- factor(c("a", "b", "c"))
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE)
#'
#' ## Management of NA values.
#' x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA)))
#' y <- explicit_na(factor(c("a", "b", "c", NA)))
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
#'
#' @export
s_compare.factor <- function(x,
                             .ref_group,
                             .in_ref_col,
                             denom = "n",
                             na.rm = TRUE, # nolint
                             na_level = "<Missing>",
                             ...) {
  checkmate::assert_flag(.in_ref_col)
  assert_valid_factor(x, any.missing = FALSE)
  assert_valid_factor(.ref_group, any.missing = FALSE)
  denom <- match.arg(denom)

  y <- s_summary.factor(
    x = x,
    denom = denom,
    na.rm = na.rm,
    na_level = na_level,
    ...
  )

  if (na.rm) {
    x <- fct_discard(x, na_level)
    .ref_group <- fct_discard(.ref_group, na_level)
  }

  checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2)

  y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
    tab <- rbind(table(x), table(.ref_group))
    res <- suppressWarnings(stats::chisq.test(tab))
    res$p.value
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Method for `character` class. This makes an automatic
#'   conversion to `factor` (with a warning) and then forwards to the method for factors.
#'
#' @param verbose (`logical`)\cr Whether warnings and messages should be printed. Mainly used
#'   to print out information about factor casting. Defaults to `TRUE`.
#'
#' @method s_compare character
#'
#' @examples
#' # `s_compare.character`
#'
#' ## Basic usage:
#' x <- c("a", "a", "b", "c", "a")
#' y <- c("a", "b", "c")
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
#'
#' ## Note that missing values handling can make a large difference:
#' x <- c("a", "a", "b", "c", "a", NA)
#' y <- c("a", "b", "c", rep(NA, 20))
#' s_compare(x,
#'   .ref_group = y, .in_ref_col = FALSE,
#'   .var = "x", verbose = FALSE
#' )
#' s_compare(x,
#'   .ref_group = y, .in_ref_col = FALSE, .var = "x",
#'   na.rm = FALSE, verbose = FALSE
#' )
#'
#' @export
s_compare.character <- function(x,
                                .ref_group,
                                .in_ref_col,
                                denom = "n",
                                na.rm = TRUE, # nolint
                                na_level = "<Missing>",
                                .var,
                                verbose = TRUE,
                                ...) {
  x <- as_factor_keep_attributes(x, x_name = .var, na_level = na_level, verbose = verbose)
  .ref_group <- as_factor_keep_attributes(.ref_group, x_name = .var, na_level = na_level, verbose = verbose)
  s_compare(
    x = x,
    .ref_group = .ref_group,
    .in_ref_col = .in_ref_col,
    denom = denom,
    na.rm = na.rm,
    na_level = na_level,
    ...
  )
}

#' @describeIn compare_variables Method for `logical` class. A chi-squared test
#'   is used. If missing values are not removed, then they are counted as `FALSE`.
#'
#' @method s_compare logical
#'
#' @examples
#' # `s_compare.logical`
#'
#' ## Basic usage:
#' x <- c(TRUE, FALSE, TRUE, TRUE)
#' y <- c(FALSE, FALSE, TRUE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' ## Management of NA values.
#' x <- c(NA, TRUE, FALSE)
#' y <- c(NA, NA, NA, NA, FALSE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
#'
#' @export
s_compare.logical <- function(x,
                              .ref_group,
                              .in_ref_col,
                              na.rm = TRUE, # nolint
                              denom = "n",
                              ...) {
  denom <- match.arg(denom)

  y <- s_summary.logical(
    x = x,
    na.rm = na.rm,
    denom = denom,
    ...
  )

  if (na.rm) {
    x <- stats::na.omit(x)
    .ref_group <- stats::na.omit(.ref_group)
  } else {
    x[is.na(x)] <- FALSE
    .ref_group[is.na(.ref_group)] <- FALSE
  }

  y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
    x <- factor(x, levels = c(TRUE, FALSE))
    .ref_group <- factor(.ref_group, levels = c(TRUE, FALSE))
    tbl <- rbind(table(x), table(.ref_group))
    suppressWarnings(prop_chisq(tbl))
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Formatted analysis function which is used as `afun`
#'   in `compare_vars()`.
#'
#' @return
#' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_compare <- function(x,
                      .ref_group,
                      .in_ref_col,
                      ...,
                      .var) {
  UseMethod("a_compare", x)
}

#' @describeIn compare_variables Formatted analysis function method for `numeric` class.
#'
#' @examples
#' # `a_compare.numeric`
#' a_compare(
#'   rnorm(10, 5, 1),
#'   .ref_group = rnorm(20, -5, 1),
#'   .in_ref_col = FALSE,
#'   .var = "bla"
#' )
#'
#' @export
a_compare.numeric <- make_afun(
  s_compare.numeric,
  .formats = c(
    .a_summary_numeric_formats,
    pval = "x.xxxx | (<0.0001)"
  ),
  .labels = c(
    .a_summary_numeric_labels,
    pval = "p-value (t-test)"
  ),
  .null_ref_cells = FALSE
)

.a_compare_counts_formats <- c(
  .a_summary_counts_formats,
  pval = "x.xxxx | (<0.0001)"
)

.a_compare_counts_labels <- c(
  pval = "p-value (chi-squared test)"
)

#' @describeIn compare_variables Formatted analysis function method for `factor` class.
#'
#' @examples
#' # `a_compare.factor`
#' # We need to ungroup `count` and `count_fraction` first so that the `rtables` formatting
#' # functions can be applied correctly.
#' afun <- make_afun(
#'   getS3method("a_compare", "factor"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' x <- factor(c("a", "a", "b", "c", "a"))
#' y <- factor(c("a", "a", "b", "c"))
#' afun(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' @export
a_compare.factor <- make_afun(
  s_compare.factor,
  .formats = .a_compare_counts_formats,
  .labels = .a_compare_counts_labels,
  .null_ref_cells = FALSE
)

#' @describeIn compare_variables Formatted analysis function method for `character` class.
#'
#' @examples
#' # `a_compare.character`
#' afun <- make_afun(
#'   getS3method("a_compare", "character"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' x <- c("A", "B", "A", "C")
#' y <- c("B", "A", "C")
#' afun(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
#'
#' @export
a_compare.character <- make_afun(
  s_compare.character,
  .formats = .a_compare_counts_formats,
  .labels = .a_compare_counts_labels,
  .null_ref_cells = FALSE
)

#' @describeIn compare_variables Formatted analysis function method for `logical` class.
#'
#' @examples
#' # `a_compare.logical`
#' afun <- make_afun(
#'   getS3method("a_compare", "logical")
#' )
#' x <- c(TRUE, FALSE, FALSE, TRUE, TRUE)
#' y <- c(TRUE, FALSE)
#' afun(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' @export
a_compare.logical <- make_afun(
  s_compare.logical,
  .formats = .a_compare_counts_formats,
  .labels = .a_compare_counts_labels,
  .null_ref_cells = FALSE
)

#' Constructor Function for [compare_vars()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor function which creates a combined formatted analysis function.
#'
#' @inheritParams argument_convention
#'
#' @return Combined formatted analysis function for use in [compare_vars()].
#'
#' @note Since [a_compare()] is generic and we want customization of the formatting arguments
#'   via [rtables::make_afun()], we need to create another temporary generic function, with
#'   corresponding customized methods. Then in order for the methods to be found,
#'   we need to wrap them in a combined `afun`. Since this is required by two layout creating
#'   functions (and possibly others in the future), we provide a constructor that does this:
#'   [create_afun_compare()].
#'
#' @seealso [compare_vars()]
#'
#' @examples
#' # `create_afun_compare()` to create combined `afun`
#'
#' afun <- create_afun_compare(
#'   .stats = c("n", "count_fraction", "mean_sd", "pval"),
#'   .indent_mods = c(pval = 1L)
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM A") %>%
#'   analyze(
#'     "AGE",
#'     afun = afun,
#'     show_labels = "visible"
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM A") %>%
#'   analyze(
#'     "SEX",
#'     afun = afun,
#'     show_labels = "visible"
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' @export
create_afun_compare <- function(.stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  function(x,
           .ref_group,
           .in_ref_col,
           ...,
           .var) {
    afun <- function(x, ...) {
      UseMethod("afun", x)
    }

    numeric_stats <- afun_selected_stats(
      .stats,
      all_stats = c(names(.a_summary_numeric_formats), "pval")
    )
    afun.numeric <- make_afun( # nolint
      a_compare.numeric,
      .stats = numeric_stats,
      .formats = extract_by_name(.formats, numeric_stats),
      .labels = extract_by_name(.labels, numeric_stats),
      .indent_mods = extract_by_name(.indent_mods, numeric_stats),
      .null_ref_cells = FALSE
    )

    factor_stats <- afun_selected_stats(
      .stats,
      all_stats = names(.a_compare_counts_formats)
    )
    ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction"))
    afun.factor <- make_afun( # nolint
      a_compare.factor,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats,
      .null_ref_cells = FALSE
    )

    afun.character <- make_afun( # nolint
      a_compare.character,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats,
      .null_ref_cells = FALSE
    )

    afun.logical <- make_afun( # nolint
      a_compare.logical,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .null_ref_cells = FALSE
    )

    afun(
      x = x,
      .ref_group = .ref_group,
      .in_ref_col = .in_ref_col,
      ...,
      .var = .var
    )
  }
}

#' @describeIn compare_variables Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_compare()`.
#'
#' @return
#' * `compare_vars()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_compare()` to the table layout.
#'
#' @examples
#' # `compare_vars()` in `rtables` pipelines
#'
#' ## Default output within a `rtables` pipeline.
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM B") %>%
#'   compare_vars(c("AGE", "SEX"))
#' build_table(lyt, tern_ex_adsl)
#'
#' ## Select and format statistics output.
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM C") %>%
#'   compare_vars(
#'     vars = "AGE",
#'     .stats = c("mean_sd", "pval"),
#'     .formats = c(mean_sd = "xx.x, xx.x"),
#'     .labels = c(mean_sd = "Mean, SD")
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' @export
compare_vars <- function(lyt,
                         vars,
                         var_labels = vars,
                         nested = TRUE,
                         ...,
                         show_labels = "default",
                         table_names = vars,
                         .stats = c("n", "mean_sd", "count_fraction", "pval"),
                         .formats = NULL,
                         .labels = NULL,
                         .indent_mods = NULL) {
  afun <- create_afun_compare(.stats, .formats, .labels, .indent_mods)

  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = afun,
    nested = nested,
    extra_args = list(...),
    inclNAs = TRUE,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Occurrence Table Pruning
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Family of constructor and condition functions to flexibly prune occurrence tables.
#' The condition functions always return whether the row result is higher than the threshold.
#' Since they are of class [CombinationFunction()] they can be logically combined with other condition
#' functions.
#'
#' @note Since most table specifications are worded positively, we name our constructor and condition
#'   functions positively, too. However, note that the result of [keep_rows()] says what
#'   should be pruned, to conform with the [rtables::prune_table()] interface.
#'
#' @examples
#' \dontrun{
#' tab <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("RACE") %>%
#'   split_rows_by("STRATA1") %>%
#'   summarize_row_groups() %>%
#'   summarize_vars("COUNTRY", .stats = "count_fraction") %>%
#'   build_table(DM)
#' }
#'
#' @name prune_occurrences
NULL

#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#'   a row condition function. This removes all analysis rows (`TableRow`) that should be
#'   pruned, i.e., don't fulfill the row condition. It removes the sub-tree if there are no
#'   children left.
#'
#' @param row_condition (`CombinationFunction`)\cr condition function which works on individual
#'   analysis rows and flags whether these should be kept in the pruned table.
#'
#' @return
#' * `keep_rows()` returns a pruning function that can be used with [rtables::prune_table()]
#'   to prune an `rtables` table.
#'
#' @examples
#' \dontrun{
#' # `keep_rows`
#' is_non_empty <- !CombinationFunction(all_zero_or_na)
#' prune_table(tab, keep_rows(is_non_empty))
#' }
#'
#' @export
keep_rows <- function(row_condition) {
  checkmate::assert_function(row_condition)
  function(table_tree) {
    if (inherits(table_tree, "TableRow")) {
      return(!row_condition(table_tree))
    }
    children <- tree_children(table_tree)
    identical(length(children), 0L)
  }
}

#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#'   a condition for the (first) content row in leaf tables. This removes all leaf tables where
#'   the first content row does not fulfill the condition. It does not check individual rows.
#'   It then proceeds recursively by removing the sub tree if there are no children left.
#'
#' @param content_row_condition (`CombinationFunction`)\cr condition function which works on individual
#'   first content rows of leaf tables and flags whether these leaf tables should be kept in the pruned table.
#'
#' @return
#' * `keep_content_rows()` returns a pruning function that checks the condition on the first content
#'   row of leaf tables in the table.
#'
#' @examples
#' # `keep_content_rows`
#' # Internal function - has_count_in_cols
#' \dontrun{
#' more_than_twenty <- has_count_in_cols(atleast = 20L, col_names = names(tab))
#' prune_table(tab, keep_content_rows(more_than_twenty))
#' }
#'
#' @export
keep_content_rows <- function(content_row_condition) {
  checkmate::assert_function(content_row_condition)
  function(table_tree) {
    if (is_leaf_table(table_tree)) {
      content_row <- h_content_first_row(table_tree)
      return(!content_row_condition(content_row))
    }
    if (inherits(table_tree, "DataRow")) {
      return(FALSE)
    }
    children <- tree_children(table_tree)
    identical(length(children), 0L)
  }
}

#' @describeIn prune_occurrences Constructor for creating condition functions on total counts in the specified columns.
#'
#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
#' @param ... arguments for row or column access, see [rtables_access]: either `col_names` (`character`) including
#'   the names of the columns which should be used, or alternatively `col_indices` (`integer`) giving the indices
#'   directly instead.
#'
#' @return
#' * `has_count_in_cols()` returns a condition function that sums the counts in the specified column.
#'
#' @examples
#' # Internal function - has_count_in_cols
#' \dontrun{
#' more_than_one <- has_count_in_cols(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one))
#' }
#'
#' @keywords internal
has_count_in_cols <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    total_count <- sum(row_counts)
    total_count >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on any of the counts in
#'   the specified columns satisfying a threshold.
#'
#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
#'
#' @return
#' * `has_count_in_any_col()` returns a condition function that compares the counts in the
#'   specified columns with the threshold.
#'
#' @examples
#' \dontrun{
#' # `has_count_in_any_col`
#' any_more_than_one <- has_count_in_any_col(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(any_more_than_one))
#' }
#'
#' @export
has_count_in_any_col <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    any(row_counts >= atleast)
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on total fraction in
#'   the specified columns.
#'
#' @return
#' * `has_fraction_in_cols()` returns a condition function that sums the counts in the
#'   specified column, and computes the fraction by dividing by the total column counts.
#'
#' @examples
#' \dontrun{
#' # `has_fraction_in_cols`
#' more_than_five_percent <- has_fraction_in_cols(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_cols <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    total_count <- sum(row_counts)
    col_counts <- h_col_counts(table_row, ...)
    total_n <- sum(col_counts)
    total_percent <- total_count / total_n
    total_percent >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on any fraction in
#'   the specified columns.
#'
#' @return
#' * `has_fraction_in_any_col()` returns a condition function that looks at the fractions
#'  in the specified columns and checks whether any of them fulfill the threshold.
#'
#' @examples
#' \dontrun{
#' # `has_fraction_in_any_col`
#' any_atleast_five_percent <- has_fraction_in_any_col(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_any_col <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    row_fractions <- h_row_fractions(table_row, ...)
    any(row_fractions >= atleast)
  })
}

#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#'   between the fractions reported in each specified column.
#'
#' @return
#' * `has_fractions_difference()` returns a condition function that extracts the fractions of each
#'   specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' \dontrun{
#' # `has_fractions_difference`
#' more_than_five_percent_diff <- has_fractions_difference(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent_diff))
#' }
#'
#' @export
has_fractions_difference <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    fractions <- h_row_fractions(table_row, ...)
    difference <- diff(range(fractions))
    difference >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#'   between the counts reported in each specified column.
#'
#' @return
#' * `has_counts_difference()` returns a condition function that extracts the counts of each
#'   specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' # Internal function - has_counts_difference
#' \dontrun{
#' more_than_one_diff <- has_counts_difference(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one_diff))
#' }
#'
#' @keywords internal
has_counts_difference <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    counts <- h_row_counts(table_row, ...)
    difference <- diff(range(counts))
    difference >= atleast
  })
}

#' Patient Counts with Abnormal Range Values by Baseline Status
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the abnormal range result (`character` or `factor`), and additional
#' analysis variables are `id` (`character` or `factor`) and `baseline` (`character` or `factor`). For each
#' direction specified in `abnormal` (e.g. high or low) we condition on baseline range result and count
#' patients in the numerator and denominator as follows:
#'   * `Not <Abnormal>`
#'     * `denom`: the number of patients without abnormality at baseline (excluding those with missing baseline)
#'     * `num`:  the number of patients in `denom` who also have at least one abnormality post-baseline
#'   * `<Abnormal>`
#'     * `denom`: the number of patients with abnormality at baseline
#'     * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
#'   * `Total`
#'     * `denom`: the number of patients with at least one valid measurement post-baseline
#'     * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
#'
#' @inheritParams argument_convention
#' @param abnormal (`character`)\cr identifying the abnormal range level(s) in `.var`.
#'
#' @note
#' * `df` should be filtered to include only post-baseline records.
#' * If the baseline variable or analysis variable contains `NA`, it is expected that `NA` has been
#'   conveyed to `na_level` appropriately beforehand with [df_explicit_na()] or [explicit_na()].
#'
#' @seealso Relevant description function [d_count_abnormal_by_baseline()].
#'
#' @name abnormal_by_baseline
NULL

#' Description Function for [s_count_abnormal_by_baseline()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Description function that produces the labels for [s_count_abnormal_by_baseline()].
#'
#' @inheritParams abnormal_by_baseline
#'
#' @return Abnormal category labels for [s_count_abnormal_by_baseline()].
#'
#' @examples
#' d_count_abnormal_by_baseline("LOW")
#'
#' @export
d_count_abnormal_by_baseline <- function(abnormal) {
  null_name <- paste0(toupper(substr(abnormal, 1, 1)), tolower(substring(abnormal, 2)))
  not_abn_name <- paste("Not", tolower(abnormal), "baseline status")
  abn_name <- paste(null_name, "baseline status")
  total_name <- "Total"

  list(
    not_abnormal = not_abn_name,
    abnormal = abn_name,
    total = total_name
  )
}

#' @describeIn abnormal_by_baseline Statistics function for a single `abnormal` level.
#'
#' @param na_level (`string`)\cr the explicit `na_level` argument you used in the pre-processing steps (maybe with
#'   [df_explicit_na()]). The default is `"<Missing>"`.
#'
#' @return
#' * `s_count_abnormal_by_baseline()` returns statistic `fraction` which is a named list with 3 labeled elements:
#'   `not_abnormal`, `abnormal`, and `total`. Each element contains a vector with `num` and `denom` patient counts.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(c(1:6)),
#'   ANRIND = factor(c(rep("LOW", 4), "NORMAL", "HIGH")),
#'   BNRIND = factor(c("LOW", "NORMAL", "HIGH", NA, "LOW", "NORMAL"))
#' )
#' df <- df_explicit_na(df)
#'
#' # Internal function - s_count_abnormal_by_baseline
#' \dontrun{
#' # Just for one abnormal level.
#' s_count_abnormal_by_baseline(df, .var = "ANRIND", abnormal = "HIGH")
#' }
#'
#' @keywords internal
s_count_abnormal_by_baseline <- function(df,
                                         .var,
                                         abnormal,
                                         na_level = "<Missing>",
                                         variables = list(id = "USUBJID", baseline = "BNRIND")) {
  checkmate::assert_string(.var)
  checkmate::assert_string(abnormal)
  checkmate::assert_string(na_level)
  assert_df_with_variables(df, c(range = .var, variables))
  checkmate::assert_subset(names(variables), c("id", "baseline"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))

  # If input is passed as character, changed to factor
  df[[.var]] <- as_factor_keep_attributes(df[[.var]], na_level = na_level)
  df[[variables$baseline]] <- as_factor_keep_attributes(df[[variables$baseline]], na_level = na_level)

  assert_valid_factor(df[[.var]], any.missing = FALSE)
  assert_valid_factor(df[[variables$baseline]], any.missing = FALSE)

  # Keep only records with valid analysis value.
  df <- df[df[[.var]] != na_level, ]

  anl <- data.frame(
    id = df[[variables$id]],
    var = df[[.var]],
    baseline = df[[variables$baseline]],
    stringsAsFactors = FALSE
  )

  # Total:
  #  - Patients in denominator: have at least one valid measurement post-baseline.
  #  - Patients in numerator: have at least one abnormality.
  total_denom <- length(unique(anl$id))
  total_num <- length(unique(anl$id[anl$var == abnormal]))

  # Baseline NA records are counted only in total rows.
  anl <- anl[anl$baseline != na_level, ]

  # Abnormal:
  #   - Patients in denominator: have abnormality at baseline.
  #   - Patients in numerator: have abnormality at baseline AND
  #     have at least one abnormality post-baseline.
  abn_denom <- length(unique(anl$id[anl$baseline == abnormal]))
  abn_num <- length(unique(anl$id[anl$baseline == abnormal & anl$var == abnormal]))

  # Not abnormal:
  #   - Patients in denominator: do not have abnormality at baseline.
  #   - Patients in numerator: do not have abnormality at baseline AND
  #     have at least one abnormality post-baseline.
  not_abn_denom <- length(unique(anl$id[anl$baseline != abnormal]))
  not_abn_num <- length(unique(anl$id[anl$baseline != abnormal & anl$var == abnormal]))

  labels <- d_count_abnormal_by_baseline(abnormal)
  list(fraction = list(
    not_abnormal = formatters::with_label(c(num = not_abn_num, denom = not_abn_denom), labels$not_abnormal),
    abnormal = formatters::with_label(c(num = abn_num, denom = abn_denom), labels$abnormal),
    total = formatters::with_label(c(num = total_num, denom = total_denom), labels$total)
  ))
}

#' @describeIn abnormal_by_baseline Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_baseline()`.
#'
#' @return
#' * `a_count_abnormal_by_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_abnormal_by_baseline
#' \dontrun{
#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `fraction` first
#' # so that the `rtables` formatting function `format_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_abnormal_by_baseline, .ungroup_stats = "fraction")
#' afun(df, .var = "ANRIND", abnormal = "LOW")
#' }
#'
#' @keywords internal
a_count_abnormal_by_baseline <- make_afun(
  s_count_abnormal_by_baseline,
  .formats = c(fraction = format_fraction)
)

#' @describeIn abnormal_by_baseline Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_baseline()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_baseline()` to the table layout.
#'
#' @examples
#' # Layout creating function.
#' basic_table() %>%
#'   count_abnormal_by_baseline(var = "ANRIND", abnormal = c(High = "HIGH")) %>%
#'   build_table(df)
#'
#' # Passing of statistics function and formatting arguments.
#' df2 <- data.frame(
#'   ID = as.character(c(1, 2, 3, 4)),
#'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BLRANGE = factor(c("LOW", "HIGH", "HIGH", "NORMAL"))
#' )
#'
#' basic_table() %>%
#'   count_abnormal_by_baseline(
#'     var = "RANGE",
#'     abnormal = c(Low = "LOW"),
#'     variables = list(id = "ID", baseline = "BLRANGE"),
#'     .formats = c(fraction = "xx / xx"),
#'     .indent_mods = c(fraction = 2L)
#'   ) %>%
#'   build_table(df2)
#'
#' @export
count_abnormal_by_baseline <- function(lyt,
                                       var,
                                       abnormal,
                                       ...,
                                       table_names = abnormal,
                                       .stats = NULL,
                                       .formats = NULL,
                                       .labels = NULL,
                                       .indent_mods = NULL) {
  checkmate::assert_character(abnormal, len = length(table_names), names = "named")
  checkmate::assert_string(var)
  afun <- make_afun(
    a_count_abnormal_by_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "fraction"
  )
  for (i in seq_along(abnormal)) {
    abn <- abnormal[i]
    lyt <- analyze(
      lyt = lyt,
      vars = var,
      var_labels = names(abn),
      afun = afun,
      table_names = table_names[i],
      extra_args = c(list(abnormal = abn), list(...)),
      show_labels = "visible"
    )
  }
  lyt
}

#' Estimation of Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the proportion of responders within a studied population.
#'
#' @inheritParams argument_convention
#'
#' @seealso [h_proportions]
#'
#' @name estimate_proportions
NULL

#' @describeIn estimate_proportions Statistics function estimating a
#'   proportion along with its confidence interval.
#'
#' @inheritParams prop_strat_wilson
#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used,
#'   it indicates whether each subject is a responder or not. `TRUE` represents
#'   a successful outcome. If a `data.frame` is provided, also the `strata` variable
#'   names must be provided in `variables` as a list element with the strata strings.
#'   In the case of `data.frame`, the logical vector of responses must be indicated as a
#'   variable name in `.var`.
#' @param method (`string`)\cr the method used to construct the confidence interval
#'   for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`,
#'   `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`.
#' @param long (`flag`)\cr a long description is required.
#'
#' @return
#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a
#'   given variable.
#'
#' @examples
#' # Case with only logical vector.
#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0)
#' s_proportion(rsp_v)
#'
#' # Example for Stratified Wilson CI
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' s_proportion(
#'   df = dta,
#'   .var = "rsp",
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "strat_wilson"
#' )
#'
#' @export
s_proportion <- function(df,
                         .var,
                         conf_level = 0.95,
                         method = c(
                           "waldcc", "wald", "clopper-pearson",
                           "wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
                           "agresti-coull", "jeffreys"
                         ),
                         weights = NULL,
                         max_iterations = 50,
                         variables = list(strata = NULL),
                         long = FALSE) {
  method <- match.arg(method)
  checkmate::assert_flag(long)
  assert_proportion_value(conf_level)

  if (!is.null(variables$strata)) {
    # Checks for strata
    if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.")
    strata_colnames <- variables$strata
    checkmate::assert_character(strata_colnames, null.ok = FALSE)
    strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
    assert_df_with_variables(df, strata_vars)

    strata <- interaction(df[strata_colnames])
    strata <- as.factor(strata)

    # Pushing down checks to prop_strat_wilson
  } else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) {
    stop("To use stratified methods you need to specify the strata variables.")
  }
  if (checkmate::test_atomic_vector(df)) {
    rsp <- as.logical(df)
  } else {
    rsp <- as.logical(df[[.var]])
  }
  n <- sum(rsp)
  p_hat <- mean(rsp)

  prop_ci <- switch(method,
    "clopper-pearson" = prop_clopper_pearson(rsp, conf_level),
    "wilson" = prop_wilson(rsp, conf_level),
    "wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE),
    "strat_wilson" = prop_strat_wilson(rsp,
      strata,
      weights,
      conf_level,
      max_iterations,
      correct = FALSE
    )$conf_int,
    "strat_wilsonc" = prop_strat_wilson(rsp,
      strata,
      weights,
      conf_level,
      max_iterations,
      correct = TRUE
    )$conf_int,
    "wald" = prop_wald(rsp, conf_level),
    "waldcc" = prop_wald(rsp, conf_level, correct = TRUE),
    "agresti-coull" = prop_agresti_coull(rsp, conf_level),
    "jeffreys" = prop_jeffreys(rsp, conf_level)
  )

  list(
    "n_prop" = formatters::with_label(c(n, p_hat), "Responders"),
    "prop_ci" = formatters::with_label(
      x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)
    )
  )
}

#' @describeIn estimate_proportions Formatted analysis function which is used as `afun`
#'   in `estimate_proportion()`.
#'
#' @return
#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_proportion <- make_afun(
  s_proportion,
  .formats = c(n_prop = "xx (xx.x%)", prop_ci = "(xx.x, xx.x)")
)

#' @describeIn estimate_proportions Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... other arguments are ultimately conveyed to [s_proportion()].
#'
#' @return
#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_proportion()` to the table layout.
#'
#' @examples
#' dta_test <- data.frame(
#'   USUBJID = paste0("S", 1:12),
#'   ARM     = rep(LETTERS[1:3], each = 4),
#'   AVAL    = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   estimate_proportion(vars = "AVAL") %>%
#'   build_table(df = dta_test)
#'
#' @export
estimate_proportion <- function(lyt,
                                vars,
                                ...,
                                show_labels = "hidden",
                                table_names = vars,
                                .stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  afun <- make_afun(
    a_proportion,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions for Calculating Proportion Confidence Intervals
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()].
#'
#' @inheritParams argument_convention
#' @inheritParams estimate_proportions
#'
#' @return Confidence interval of a proportion.
#'
#' @seealso [estimate_proportions], descriptive function [d_proportion()],
#'  and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()].
#'
#' @name h_proportions
NULL

#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()].
#'  Also referred to as Wilson score interval.
#'
#' @examples
#' rsp <- c(
#'   TRUE, TRUE, TRUE, TRUE, TRUE,
#'   FALSE, FALSE, FALSE, FALSE, FALSE
#' )
#' prop_wilson(rsp, conf_level = 0.9)
#'
#' @export
prop_wilson <- function(rsp, conf_level, correct = FALSE) {
  y <- stats::prop.test(
    sum(rsp),
    length(rsp),
    correct = correct,
    conf.level = conf_level
  )

  as.numeric(y$conf.int)
}

#' @describeIn h_proportions Calculates the stratified Wilson confidence
#'   interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern}
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are
#'   estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that
#'   minimizes the weighted squared length of the confidence interval.
#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used
#'   to find estimates of optimal weights.
#' @param correct (`flag`)\cr include the continuity correction. For further information, see for example
#'   [stats::prop.test()].
#'
#' @references
#' - \insertRef{Yan2010-jt}{tern}
#'
#' @examples
#' # Stratified Wilson confidence interval with unequal probabilities
#'
#' set.seed(1)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#' strata <- interaction(strata_data)
#' n_strata <- ncol(table(rsp, strata)) # Number of strata
#'
#' prop_strat_wilson(
#'   rsp = rsp, strata = strata,
#'   conf_level = 0.90
#' )
#'
#' # Not automatic setting of weights
#' prop_strat_wilson(
#'   rsp = rsp, strata = strata,
#'   weights = rep(1 / n_strata, n_strata),
#'   conf_level = 0.90
#' )
#'
#' @export
prop_strat_wilson <- function(rsp,
                              strata,
                              weights = NULL,
                              conf_level = 0.95,
                              max_iterations = NULL,
                              correct = FALSE) {
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(strata, len = length(rsp))
  assert_proportion_value(conf_level)

  tbl <- table(rsp, strata)
  n_strata <- ncol(tbl)

  # Checking the weights and maximum number of iterations.
  do_iter <- FALSE
  if (is.null(weights)) {
    weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
    do_iter <- TRUE

    # Iteration parameters
    if (is.null(max_iterations)) max_iterations <- 10
    checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1)
  }
  checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = ncol(tbl))
  checkmate::assert_int(sum(weights), lower = 1, upper = 1)


  xs <- tbl["TRUE", ]
  ns <- colSums(tbl)
  use_stratum <- (ns > 0)
  ns <- ns[use_stratum]
  xs <- xs[use_stratum]
  ests <- xs / ns
  vars <- ests * (1 - ests) / ns

  strata_qnorm <- strata_normal_quantile(vars, weights, conf_level)

  # Iterative setting of weights if they were not set externally
  weights_new <- if (do_iter) {
    update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights
  } else {
    weights
  }

  strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1

  ci_by_strata <- Map(
    function(x, n) {
      # Classic Wilson's confidence interval
      suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int)
    },
    x = xs,
    n = ns
  )
  lower_by_strata <- sapply(ci_by_strata, "[", 1L)
  upper_by_strata <- sapply(ci_by_strata, "[", 2L)

  lower <- sum(weights_new * lower_by_strata)
  upper <- sum(weights_new * upper_by_strata)

  # Return values
  if (do_iter) {
    list(
      conf_int = c(
        lower = lower,
        upper = upper
      ),
      weights = weights_new
    )
  } else {
    list(
      conf_int = c(
        lower = lower,
        upper = upper
      )
    )
  }
}

#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
#'   Also referred to as the `exact` method.
#'
#' @examples
#' prop_clopper_pearson(rsp, conf_level = .95)
#'
#' @export
prop_clopper_pearson <- function(rsp,
                                 conf_level) {
  y <- stats::binom.test(
    x = sum(rsp),
    n = length(rsp),
    conf.level = conf_level
  )
  as.numeric(y$conf.int)
}

#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition
#'   for a single proportion confidence interval using the normal approximation.
#'
#' @param correct (`flag`)\cr apply continuity correction.
#'
#' @examples
#' prop_wald(rsp, conf_level = 0.95)
#' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
#'
#' @export
prop_wald <- function(rsp, conf_level, correct = FALSE) {
  n <- length(rsp)
  p_hat <- mean(rsp)
  z <- stats::qnorm((1 + conf_level) / 2)
  q_hat <- 1 - p_hat
  correct <- if (correct) 1 / (2 * n) else 0

  err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct
  l_ci <- max(0, p_hat - err)
  u_ci <- min(1, p_hat + err)

  c(l_ci, u_ci)
}

#' @describeIn h_proportions Calculates the Agresti-Coull interval (created by Alan Agresti and Brent Coull) by
#'   (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI.
#'
#' @examples
#' prop_agresti_coull(rsp, conf_level = 0.95)
#'
#' @export
prop_agresti_coull <- function(rsp, conf_level) {
  n <- length(rsp)
  x_sum <- sum(rsp)
  z <- stats::qnorm((1 + conf_level) / 2)

  # Add here both z^2 / 2 successes and failures.
  x_sum_tilde <- x_sum + z^2 / 2
  n_tilde <- n + z^2

  # Then proceed as with the Wald interval.
  p_tilde <- x_sum_tilde / n_tilde
  q_tilde <- 1 - p_tilde
  err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
  l_ci <- max(0, p_tilde - err)
  u_ci <- min(1, p_tilde + err)

  c(l_ci, u_ci)
}

#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the
#'   non-informative Jeffreys prior for a binomial proportion.
#'
#' @examples
#' prop_jeffreys(rsp, conf_level = 0.95)
#'
#' @export
prop_jeffreys <- function(rsp,
                          conf_level) {
  n <- length(rsp)
  x_sum <- sum(rsp)

  alpha <- 1 - conf_level
  l_ci <- ifelse(
    x_sum == 0,
    0,
    stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  u_ci <- ifelse(
    x_sum == n,
    1,
    stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  c(l_ci, u_ci)
}

#' Description of the Proportion Summary
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function that describes the analysis in [s_proportion()].
#'
#' @inheritParams s_proportion
#' @param long (`flag`)\cr whether a long or a short (default) description is required.
#'
#' @return String describing the analysis.
#'
#' @export
d_proportion <- function(conf_level,
                         method,
                         long = FALSE) {
  label <- paste0(conf_level * 100, "% CI")

  if (long) label <- paste(label, "for Response Rates")

  method_part <- switch(method,
    "clopper-pearson" = "Clopper-Pearson",
    "waldcc" = "Wald, with correction",
    "wald" = "Wald, without correction",
    "wilson" = "Wilson, without correction",
    "strat_wilson" = "Stratified Wilson, without correction",
    "wilsonc" = "Wilson, with correction",
    "strat_wilsonc" = "Stratified Wilson, with correction",
    "agresti-coull" = "Agresti-Coull",
    "jeffreys" = "Jeffreys",
    stop(paste(method, "does not have a description"))
  )

  paste0(label, " (", method_part, ")")
}

#' Helper Function for the Estimation of Stratified Quantiles
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function wraps the estimation of stratified percentiles when we assume
#' the approximation for large numbers. This is necessary only in the case
#' proportions for each strata are unequal.
#'
#' @inheritParams argument_convention
#' @inheritParams prop_strat_wilson
#'
#' @return Stratified quantile.
#'
#' @seealso [prop_strat_wilson()]
#'
#' @examples
#' strata_data <- table(data.frame(
#'   "f1" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' ))
#' ns <- colSums(strata_data)
#' ests <- strata_data["TRUE", ] / ns
#' vars <- ests * (1 - ests) / ns
#' weights <- rep(1 / length(ns), length(ns))
#' strata_normal_quantile(vars, weights, 0.95)
#'
#' @export
strata_normal_quantile <- function(vars, weights, conf_level) {
  summands <- weights^2 * vars
  # Stratified quantile
  sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2)
}

#' Helper Function for the Estimation of Weights for `prop_strat_wilson`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function wraps the iteration procedure that allows you to estimate
#' the weights for each proportional strata. This assumes to minimize the
#' weighted squared length of the confidence interval.
#'
#' @inheritParams prop_strat_wilson
#' @param vars (`numeric`)\cr normalized proportions for each strata.
#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles.
#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
#'   be optimized in the future if we need to estimate better initial weights.
#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
#' @param max_iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked.
#' @param tol (`number`)\cr tolerance threshold for convergence.
#'
#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
#'
#' @seealso For references and details see [prop_strat_wilson()].
#'
#' @examples
#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
#' sq <- 0.674
#' ws <- rep(1 / length(vs), length(vs))
#' ns <- c(22, 18, 17, 17, 14, 12)
#'
#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
#'
#' @export
update_weights_strat_wilson <- function(vars,
                                        strata_qnorm,
                                        initial_weights,
                                        n_per_strata,
                                        max_iterations = 50,
                                        conf_level = 0.95,
                                        tol = 0.001) {
  it <- 0
  diff_v <- NULL

  while (it < max_iterations) {
    it <- it + 1
    weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
    weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
    weights_new <- weights_new_t / weights_new_b
    weights_new <- weights_new / sum(weights_new)
    strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level)
    diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
    if (diff_v[length(diff_v)] < tol) break
    initial_weights <- weights_new
  }

  if (it == max_iterations) {
    warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations)
  }

  list(
    "n_it" = it,
    "weights" = weights_new,
    "diff_v" = diff_v
  )
}

#' Incidence Rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the event rate adjusted for person-years at risk, otherwise known
#' as incidence rate. Primary analysis variable is the person-years at risk.
#'
#' @inheritParams argument_convention
#' @param control (`list`)\cr parameters for estimation details, specified by using
#'   the helper function [control_incidence_rate()]. Possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate.
#'   * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'     for confidence interval type.
#'   * `time_unit_input` (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'     indicating time unit for data input.
#'   * `time_unit_output` (`numeric`)\cr time unit for desired output (in person-years).
#' @param person_years (`numeric`)\cr total person-years at risk.
#' @param alpha (`numeric`)\cr two-sided alpha-level for confidence interval.
#' @param n_events (`integer`)\cr number of events observed.
#'
#' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate].
#'
#' @name incidence_rate
NULL

#' @describeIn incidence_rate Statistics function which estimates the incidence rate and the
#'   associated confidence interval.
#'
#' @return
#' * `s_incidence_rate()` returns the following statistics:
#'   - `person_years`: Total person-years at risk.
#'   - `n_events`: Total number of events observed.
#'   - `rate`: Estimated incidence rate.
#'   - `rate_ci`: Confidence interval for the incidence rate.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(seq(6)),
#'   CNSR = c(0, 1, 1, 0, 0, 0),
#'   AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4),
#'   ARM = factor(c("A", "A", "A", "B", "B", "B"))
#' ) %>%
#'   mutate(is_event = CNSR == 0) %>%
#'   mutate(n_events = as.integer(is_event))
#'
#' # Internal function - s_incidence_rate
#' \dontrun{
#' s_incidence_rate(
#'   df,
#'   .var = "AVAL",
#'   n_events = "n_events",
#'   control = control_incidence_rate(
#'     time_unit_input = "month",
#'     time_unit_output = 100
#'   )
#' )
#' }
#'
#' @keywords internal
s_incidence_rate <- function(df,
                             .var,
                             n_events,
                             is_event,
                             control = control_incidence_rate()) {
  if (!missing(is_event)) {
    warning("argument is_event will be deprecated. Please use n_events.")

    if (missing(n_events)) {
      assert_df_with_variables(df, list(tte = .var, is_event = is_event))
      checkmate::assert_string(.var)
      checkmate::assert_logical(df[[is_event]], any.missing = FALSE)
      checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
      n_events <- is_event
    }
  } else {
    assert_df_with_variables(df, list(tte = .var, n_events = n_events))
    checkmate::assert_string(.var)
    checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
    checkmate::assert_integer(df[[n_events]], any.missing = FALSE)
  }

  time_unit_input <- control$time_unit_input
  time_unit_output <- control$time_unit_output
  conf_level <- control$conf_level
  person_years <- sum(df[[.var]], na.rm = TRUE) * (
    1 * (time_unit_input == "year") +
      1 / 12 * (time_unit_input == "month") +
      1 / 52.14 * (time_unit_input == "week") +
      1 / 365.24 * (time_unit_input == "day")
  )
  n_events <- sum(df[[n_events]], na.rm = TRUE)

  result <- h_incidence_rate(
    person_years,
    n_events,
    control
  )
  list(
    person_years = formatters::with_label(person_years, "Total patient-years at risk"),
    n_events = formatters::with_label(n_events, "Number of adverse events observed"),
    rate = formatters::with_label(result$rate, paste("AE rate per", time_unit_output, "patient-years")),
    rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level))
  )
}

#' @describeIn incidence_rate Formatted analysis function which is used as `afun`
#'   in `estimate_incidence_rate()`.
#'
#' @return
#' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_incidence_rate
#' \dontrun{
#' a_incidence_rate(
#'   df,
#'   .var = "AVAL",
#'   n_events = "n_events",
#'   control = control_incidence_rate(time_unit_input = "month", time_unit_output = 100)
#' )
#' }
#'
#' @keywords internal
a_incidence_rate <- make_afun(
  s_incidence_rate,
  .formats = c(
    "person_years" = "xx.x",
    "n_events" = "xx",
    "rate" = "xx.xx",
    "rate_ci" = "(xx.xx, xx.xx)"
  )
)

#' @describeIn incidence_rate Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_incidence_rate()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   estimate_incidence_rate(
#'     vars = "AVAL",
#'     n_events = "n_events",
#'     control = control_incidence_rate(
#'       time_unit_input = "month",
#'       time_unit_output = 100
#'     )
#'   ) %>%
#'   build_table(df)
#'
#' @export
estimate_incidence_rate <- function(lyt,
                                    vars,
                                    ...,
                                    show_labels = "hidden",
                                    table_names = vars,
                                    .stats = NULL,
                                    .formats = NULL,
                                    .labels = NULL,
                                    .indent_mods = NULL) {
  afun <- make_afun(
    a_incidence_rate,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Helper Functions for Incidence Rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param control (`list`)\cr parameters for estimation details, specified by using
#'   the helper function [control_incidence_rate()]. Possible parameter options are:
#'   * `conf_level`: (`proportion`)\cr confidence level for the estimated incidence rate.
#'   * `conf_type`: (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'     for confidence interval type.
#'   * `time_unit_input`: (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'     indicating time unit for data input.
#'   * `time_unit_output`: (`numeric`)\cr time unit for desired output (in person-years).
#' @param person_years (`numeric`)\cr total person-years at risk.
#' @param alpha (`numeric`)\cr two-sided alpha-level for confidence interval.
#' @param n_events (`integer`)\cr number of events observed.
#'
#' @return Estimated incidence rate `rate` and associated confidence interval `rate_ci`.
#'
#' @seealso [incidence_rate]
#'
#' @name h_incidence_rate
NULL

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval based on the normal approximation for the
#'   incidence rate. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_normal(200, 2)
#'
#' @export
h_incidence_rate_normal <- function(person_years,
                                    n_events,
                                    alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  se <- sqrt(est / person_years)
  ci <- est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * se

  list(rate = est, rate_ci = ci)
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval based on the normal approximation for the
#'   logarithm of the incidence rate. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_normal_log(200, 2)
#'
#' @export
h_incidence_rate_normal_log <- function(person_years,
                                        n_events,
                                        alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  rate_est <- n_events / person_years
  rate_se <- sqrt(rate_est / person_years)
  lrate_est <- log(rate_est)
  lrate_se <- rate_se / rate_est
  ci <- exp(lrate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * lrate_se)

  list(rate = rate_est, rate_ci = ci)
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated exact confidence interval. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_exact(200, 2)
#'
#' @export
h_incidence_rate_exact <- function(person_years,
                                   n_events,
                                   alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  lcl <- stats::qchisq(p = (alpha) / 2, df = 2 * n_events) / (2 * person_years)
  ucl <- stats::qchisq(p = 1 - (alpha) / 2, df = 2 * n_events + 2) / (2 * person_years)

  list(rate = est, rate_ci = c(lcl, ucl))
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated Byar's confidence interval. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_byar(200, 2)
#'
#' @export
h_incidence_rate_byar <- function(person_years,
                                  n_events,
                                  alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  seg_1 <- n_events + 0.5
  seg_2 <- 1 - 1 / (9 * (n_events + 0.5))
  seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3
  lcl <- seg_1 * ((seg_2 - seg_3)^3) / person_years
  ucl <- seg_1 * ((seg_2 + seg_3) ^ 3) / person_years # styler: off

  list(rate = est, rate_ci = c(lcl, ucl))
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval.
#'
#' @examples
#' # Internal function - h_incidence_rate
#' \dontrun{
#' h_incidence_rate(200, 2)
#'
#' h_incidence_rate(
#'   200,
#'   2,
#'   control_incidence_rate(
#'     conf_level = 0.9,
#'     conf_type = "normal_log",
#'     time_unit_output = 100
#'   )
#' )
#' }
#'
#' @keywords internal
h_incidence_rate <- function(person_years,
                             n_events,
                             control = control_incidence_rate()) {
  alpha <- 1 - control$conf_level
  est <- switch(control$conf_type,
    normal = h_incidence_rate_normal(person_years, n_events, alpha),
    normal_log = h_incidence_rate_normal_log(person_years, n_events, alpha),
    exact = h_incidence_rate_exact(person_years, n_events, alpha),
    byar = h_incidence_rate_byar(person_years, n_events, alpha)
  )

  time_unit_output <- control$time_unit_output
  list(
    rate = est$rate * time_unit_output,
    rate_ci = est$rate_ci * time_unit_output
  )
}

#' Helper Functions for Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()].
#'
#' @inheritParams argument_convention
#' @inheritParams h_coxreg_univar_extract
#' @inheritParams cox_regression_inter
#' @inheritParams control_coxreg
#'
#' @seealso [cox_regression]
#'
#' @name h_cox_regression
NULL

#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used
#'   internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models.
#'
#' @return
#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]).
#'
#' @examples
#' # `h_coxreg_univar_formulas`
#'
#' ## Simple formulas.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
#'   )
#' )
#'
#' ## Addition of an optional strata.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
#'     strata = "SITE"
#'   )
#' )
#'
#' ## Inclusion of the interaction term.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
#'     strata = "SITE"
#'   ),
#'   interaction = TRUE
#' )
#'
#' ## Only covariates fitted in separate models.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", covariates = c("X", "y")
#'   )
#' )
#'
#' @export
h_coxreg_univar_formulas <- function(variables,
                                     interaction = FALSE) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  checkmate::assert_flag(interaction)

  if (!has_arm || is.null(variables$covariates)) {
    checkmate::assert_false(interaction)
  }

  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$covariates)) {
    forms <- paste0(
      "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
      ifelse(has_arm, variables$arm, "1"),
      ifelse(interaction, " * ", " + "),
      variables$covariates,
      ifelse(
        !is.null(variables$strata),
        paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"),
        ""
      )
    )
  } else {
    forms <- NULL
  }
  nams <- variables$covariates
  if (has_arm) {
    ref <- paste0(
      "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
      variables$arm,
      ifelse(
        !is.null(variables$strata),
        paste0(
          " + strata(", paste0(variables$strata, collapse = ", "), ")"
        ),
        ""
      )
    )
    forms <- c(ref, forms)
    nams <- c("ref", nams)
  }
  stats::setNames(forms, nams)
}

#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas
#'   string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox
#'   regression models. Interactions will not be included in multivariate Cox regression model.
#'
#' @return
#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]).
#'
#' @examples
#' # `h_coxreg_multivar_formula`
#'
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
#'   )
#' )
#'
#' # Addition of an optional strata.
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
#'     strata = "SITE"
#'   )
#' )
#'
#' # Example without treatment arm.
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
#'     strata = "SITE"
#'   )
#' )
#'
#' @export
h_coxreg_multivar_formula <- function(variables) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  y <- paste0(
    "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
    ifelse(has_arm, variables$arm, "1")
  )
  if (length(variables$covariates) > 0) {
    y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ")
  }
  if (!is.null(variables$strata)) {
    y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")")
  }
  y
}

#' @describeIn h_cox_regression Utility function to help tabulate the result of
#'   a univariate Cox regression model.
#'
#' @param effect (`string`)\cr the treatment variable.
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
#'
#' @return
#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`,
#'   `n`, `hr`, `lcl`, `ucl`, and `pval`.
#'
#' @examples
#' library(survival)
#'
#' dta_simple <- data.frame(
#'   time = c(5, 5, 10, 10, 5, 5, 10, 10),
#'   status = c(0, 0, 1, 0, 0, 1, 1, 1),
#'   armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
#'   var1 = c(45, 55, 65, 75, 55, 65, 85, 75),
#'   var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
#' )
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
#' result <- h_coxreg_univar_extract(
#'   effect = "armcd", covar = "armcd", mod = mod, data = dta_simple
#' )
#' result
#'
#' @export
h_coxreg_univar_extract <- function(effect,
                                    covar,
                                    data,
                                    mod,
                                    control = control_coxreg()) {
  checkmate::assert_string(covar)
  checkmate::assert_string(effect)
  checkmate::assert_class(mod, "coxph")
  test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]

  mod_aov <- muffled_car_anova(mod, test_statistic)
  msum <- summary(mod, conf.int = control$conf_level)
  sum_cox <- broom::tidy(msum)

  # Combine results together.
  effect_aov <- mod_aov[effect, , drop = TRUE]
  pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]]
  sum_main <- sum_cox[grepl(effect, sum_cox$level), ]

  term_label <- if (effect == covar) {
    paste0(
      levels(data[[covar]])[2],
      " vs control (",
      levels(data[[covar]])[1],
      ")"
    )
  } else {
    unname(labels_or_names(data[covar]))
  }
  data.frame(
    effect = ifelse(covar == effect, "Treatment:", "Covariate:"),
    term = covar,
    term_label = term_label,
    level = levels(data[[effect]])[2],
    n = mod[["n"]],
    hr = unname(sum_main["exp(coef)"]),
    lcl = unname(sum_main[grep("lower", names(sum_main))]),
    ucl = unname(sum_main[grep("upper", names(sum_main))]),
    pval = pval,
    stringsAsFactors = FALSE
  )
}

#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help
#'   tabulate the result of a multivariate Cox regression model for a treatment/covariate variable.
#'
#' @return
#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`,
#'   `n`, `term`, and `term_label`.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
#' result <- h_coxreg_multivar_extract(
#'   var = "var1", mod = mod, data = dta_simple
#' )
#' result
#'
#' @export
h_coxreg_multivar_extract <- function(var,
                                      data,
                                      mod,
                                      control = control_coxreg()) {
  test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
  mod_aov <- muffled_car_anova(mod, test_statistic)

  msum <- summary(mod, conf.int = control$conf_level)
  sum_anova <- broom::tidy(mod_aov)
  sum_cox <- broom::tidy(msum)

  ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")]
  names(ret_anova)[2] <- "pval"
  if (is.factor(data[[var]])) {
    ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
  } else {
    ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
  }
  names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl")
  varlab <- unname(labels_or_names(data[var]))
  ret_cox$term <- varlab

  if (is.numeric(data[[var]])) {
    ret <- ret_cox
    ret$term_label <- ret$term
  } else if (length(levels(data[[var]])) <= 2) {
    ret_anova$pval <- NA
    ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
    ret_cox$level <- gsub(var, "", ret_cox$level)
    ret_cox$term_label <- ret_cox$level
    ret <- dplyr::bind_rows(ret_anova, ret_cox)
  } else {
    ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
    ret_cox$level <- gsub(var, "", ret_cox$level)
    ret_cox$term_label <- ret_cox$level
    ret <- dplyr::bind_rows(ret_anova, ret_cox)
  }

  as.data.frame(ret)
}

#' Add Titles, Footnotes, Page Number, and a Bounding Box to a Grid Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function is useful to label grid grobs (also `ggplot2`, and `lattice` plots)
#' with title, footnote, and page numbers.
#'
#' @inheritParams grid::grob
#' @param grob a grid grob object, optionally `NULL` if only a `grob` with the decoration should be shown.
#' @param titles vector of character strings. Vector elements are separated by a newline and strings are wrapped
#'   according to the page width.
#' @param footnotes vector of character string. Same rules as for `titles`.
#' @param page string with page numeration, if `NULL` then no page number is displayed.
#' @param width_titles unit object
#' @param width_footnotes unit object
#' @param border boolean, whether a a border should be drawn around the plot or not.
#' @param margins unit object of length 4
#' @param padding  unit object of length 4
#' @param outer_margins  unit object of length 4
#' @param gp_titles a `gpar` object
#' @param gp_footnotes a `gpar` object
#'
#' @return A grid grob (`gTree`).
#'
#' @details The titles and footnotes will be ragged, i.e. each title will be wrapped individually.
#'
#' @examples
#' library(grid)
#'
#' titles <- c(
#'   "Edgar Anderson's Iris Data",
#'   paste(
#'     "This famous (Fisher's or Anderson's) iris data set gives the measurements",
#'     "in centimeters of the variables sepal length and width and petal length",
#'     "and width, respectively, for 50 flowers from each of 3 species of iris."
#'   )
#' )
#'
#' footnotes <- c(
#'   "The species are Iris setosa, versicolor, and virginica.",
#'   paste(
#'     "iris is a data frame with 150 cases (rows) and 5 variables (columns) named",
#'     "Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, and Species."
#'   )
#' )
#'
#' ## empty plot
#' grid.newpage()
#'
#' grid.draw(
#'   decorate_grob(
#'     NULL,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 4 of 10"
#'   )
#' )
#'
#' # grid
#' p <- gTree(
#'   children = gList(
#'     rectGrob(),
#'     xaxisGrob(),
#'     yaxisGrob(),
#'     textGrob("Sepal.Length", y = unit(-4, "lines")),
#'     textGrob("Petal.Length", x = unit(-3.5, "lines"), rot = 90),
#'     pointsGrob(iris$Sepal.Length, iris$Petal.Length, gp = gpar(col = iris$Species), pch = 16)
#'   ),
#'   vp = vpStack(plotViewport(), dataViewport(xData = iris$Sepal.Length, yData = iris$Petal.Length))
#' )
#' grid.newpage()
#' grid.draw(p)
#'
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' ## with ggplot2
#' library(ggplot2)
#'
#' p_gg <- ggplot2::ggplot(iris, aes(Sepal.Length, Sepal.Width, col = Species)) +
#'   ggplot2::geom_point()
#' p_gg
#' p <- ggplotGrob(p_gg)
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' ## with lattice
#' library(lattice)
#'
#' xyplot(Sepal.Length ~ Petal.Length, data = iris, col = iris$Species)
#' p <- grid.grab()
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' # with gridExtra - no borders
#' library(gridExtra)
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     tableGrob(
#'       head(mtcars)
#'     ),
#'     titles = "title",
#'     footnotes = "footnote",
#'     border = FALSE
#'   )
#' )
#'
#' @export
decorate_grob <- function(grob,
                          titles,
                          footnotes,
                          page = "",
                          width_titles = grid::unit(1, "npc"),
                          width_footnotes = grid::unit(1, "npc") - grid::stringWidth(page),
                          border = TRUE,
                          margins = grid::unit(c(1, 0, 1, 0), "lines"),
                          padding = grid::unit(rep(1, 4), "lines"),
                          outer_margins = grid::unit(c(2, 1.5, 3, 1.5), "cm"),
                          gp_titles = grid::gpar(),
                          gp_footnotes = grid::gpar(fontsize = 8),
                          name = NULL,
                          gp = grid::gpar(),
                          vp = NULL) {
  st_titles <- split_text_grob(
    titles,
    x = 0, y = 1,
    just = c("left", "top"),
    width = width_titles,
    vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 1),
    gp = gp_titles
  )

  st_footnotes <- split_text_grob(
    footnotes,
    x = 0, y = 1,
    just = c("left", "top"),
    width = width_footnotes,
    vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
    gp = gp_footnotes
  )

  grid::gTree(
    grob = grob,
    titles = titles,
    footnotes = footnotes,
    page = page,
    width_titles = width_titles,
    width_footnotes = width_footnotes,
    border = border,
    margins = margins,
    padding = padding,
    outer_margins = outer_margins,
    gp_titles = gp_titles,
    gp_footnotes = gp_footnotes,
    children = grid::gList(
      grid::gTree(
        children = grid::gList(
          st_titles,
          grid::gTree(
            children = grid::gList(
              if (border) grid::rectGrob(),
              grid::gTree(
                children = grid::gList(
                  grob
                ),
                vp = grid::plotViewport(margins = padding)
              )
            ),
            vp = grid::vpStack(
              grid::viewport(layout.pos.row = 2, layout.pos.col = 1),
              grid::plotViewport(margins = margins)
            )
          ),
          st_footnotes,
          grid::textGrob(
            page,
            x = 1, y = 0,
            just = c("right", "bottom"),
            vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
            gp = gp_footnotes
          )
        ),
        childrenvp = NULL,
        name = "titles_grob_footnotes",
        vp = grid::vpStack(
          grid::plotViewport(margins = outer_margins),
          grid::viewport(
            layout = grid::grid.layout(
              nrow = 3, ncol = 1,
              heights = grid::unit.c(
                grid::grobHeight(st_titles),
                grid::unit(1, "null"),
                grid::grobHeight(st_footnotes)
              )
            )
          )
        )
      )
    ),
    name = name,
    gp = gp,
    vp = vp,
    cl = "decoratedGrob"
  )
}

#' @importFrom grid validDetails
#' @noRd
validDetails.decoratedGrob <- function(x) {
  checkmate::assert_character(x$titles)
  checkmate::assert_character(x$footnotes)

  if (!is.null(x$grob)) {
    checkmate::assert_true(grid::is.grob(x$grob))
  }
  if (length(x$page) == 1) {
    checkmate::assert_character(x$page)
  }
  if (!grid::is.unit(x$outer_margins)) {
    checkmate::assert_vector(x$outer_margins, len = 4)
  }
  if (!grid::is.unit(x$margins)) {
    checkmate::assert_vector(x$margins, len = 4)
  }
  if (!grid::is.unit(x$padding)) {
    checkmate::assert_vector(x$padding, len = 4)
  }

  x
}

#' @importFrom grid widthDetails
#' @noRd
widthDetails.decoratedGrob <- function(x) {
  grid::unit(1, "null")
}

#' @importFrom grid heightDetails
#' @noRd
heightDetails.decoratedGrob <- function(x) {
  grid::unit(1, "null")
}

# Adapted from Paul Murell R Graphics 2nd Edition
# https://www.stat.auckland.ac.nz/~paul/RG2e/interactgrid-splittext.R
split_string <- function(text, width) {
  availwidth <- grid::convertWidth(width, "in", valueOnly = TRUE)
  textwidth <- grid::convertWidth(grid::stringWidth(text), "in", valueOnly = TRUE)
  strings <- strsplit(text, " ")[[1]]

  if (textwidth <= availwidth || length(strings) == 1) {
    text
  } else {
    gapwidth <- grid::stringWidth(" ")
    newstring <- strings[1]
    linewidth <- grid::stringWidth(newstring)

    for (i in 2:length(strings)) {
      str_width <- grid::stringWidth(strings[i])
      if (grid::convertWidth(linewidth + gapwidth + str_width, "in", valueOnly = TRUE) < availwidth) {
        sep <- " "
        linewidth <- linewidth + gapwidth + str_width
      } else {
        sep <- "\n"
        linewidth <- str_width
      }
      newstring <- paste(newstring, strings[i], sep = sep)
    }
    newstring
  }
}

#' Split Text According To Available Text Width
#'
#' Dynamically wrap text.
#'
#' @inheritParams grid::grid.text
#' @param text character string
#' @param width a unit object specifying max width of text
#'
#' @return A text grob.
#'
#' @details This code is taken from R Graphics by Paul Murell, 2nd edition
#'
#' @examples
#' # Internal function - split_text_grob
#' \dontrun{
#' sg <- split_text_grob(text = paste(
#'   "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vitae",
#'   "dapibus dolor, ac mattis erat. Nunc metus lectus, imperdiet ut enim eu,",
#'   "commodo scelerisque urna. Vestibulum facilisis metus vel nibh tempor, sed",
#'   "elementum sem tempus. Morbi quis arcu condimentum, maximus lorem id,",
#'   "tristique ante. Nullam a nunc dui. Fusce quis lacus nec ante dignissim",
#'   "faucibus nec vitae tellus. Suspendisse mollis et sapien eu ornare. Vestibulum",
#'   "placerat neque nec justo efficitur, ornare varius nulla imperdiet. Nunc justo",
#'   "sapien, vestibulum eget efficitur eget, porttitor id ante. Nulla tempor",
#'   "luctus massa id elementum. Praesent dictum, neque vitae vestibulum malesuada,",
#'   "nunc nisi blandit lacus, sit amet tristique odio dui sit amet velit."
#' ))
#'
#' library(grid)
#' grobHeight(sg)
#'
#' grid.newpage()
#' pushViewport(plotViewport())
#' grid.rect()
#' grid.draw(sg)
#'
#' grid.rect(
#'   height = grobHeight(sg), width = unit(1, "cm"), gp = gpar(fill = "red")
#' )
#'
#' # stack split_text_grob
#' grid.newpage()
#' pushViewport(plotViewport())
#' grid.rect()
#' grid.draw(split_text_grob(
#'   c("Hello, this is a test", "and yet another test"),
#'   just = c("left", "top"), x = 0, y = 1
#' ))
#' }
#'
#' @keywords internal
split_text_grob <- function(text,
                            x = grid::unit(0.5, "npc"),
                            y = grid::unit(0.5, "npc"),
                            width = grid::unit(1, "npc"),
                            just = "centre",
                            hjust = NULL,
                            vjust = NULL,
                            default.units = "npc", # nolint
                            name = NULL,
                            gp = grid::gpar(),
                            vp = NULL) {
  if (!grid::is.unit(x)) {
    x <- grid::unit(x, default.units)
  }
  if (!grid::is.unit(y)) {
    y <- grid::unit(y, default.units)
  }

  checkmate::assert_true(grid::is.unit(width))
  checkmate::assert_vector(width, len = 1)

  ## if it is a fixed unit then we do not need to recalculate when viewport resized
  if (!inherits(width, "unit.arithmetic") &&
    !is.null(attr(width, "unit")) &&
    attr(width, "unit") %in% c("cm", "inches", "mm", "points", "picas", "bigpts", "dida", "cicero", "scaledpts")) {
    attr(text, "fixed_text") <- paste(vapply(text, split_string, character(1), width = width), collapse = "\n")
  }

  grid::grob(
    text = text,
    x = x, y = y,
    width = width,
    just = just,
    hjust = hjust,
    vjust = vjust,
    rot = 0,
    check.overlap = FALSE,
    name = name,
    gp = gp,
    vp = vp,
    cl = "dynamicSplitText"
  )
}

#' @importFrom grid validDetails
#' @noRd
validDetails.dynamicSplitText <- function(x) {
  checkmate::assert_character(x$text)
  checkmate::assert_true(grid::is.unit(x$width))
  checkmate::assert_vector(x$width, len = 1)
  x
}

#' @importFrom grid heightDetails
#' @noRd
heightDetails.dynamicSplitText <- function(x) {
  txt <- if (!is.null(attr(x$text, "fixed_text"))) {
    attr(x$text, "fixed_text")
  } else {
    paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
  }
  grid::stringHeight(txt)
}

#' @importFrom grid widthDetails
#' @noRd
widthDetails.dynamicSplitText <- function(x) {
  x$width
}

#' @importFrom grid drawDetails
#' @noRd
drawDetails.dynamicSplitText <- function(x, recording) {
  txt <- if (!is.null(attr(x$text, "fixed_text"))) {
    attr(x$text, "fixed_text")
  } else {
    paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
  }

  x$width <- NULL
  x$label <- txt
  x$text <- NULL
  class(x) <- c("text", class(x)[-1])

  grid::grid.draw(x)
}

#' Update Page Number
#'
#' Automatically updates page number.
#'
#' @param npages number of pages in total
#' @param ... passed on to [decorate_grob()]
#'
#' @return Closure that increments the page number.
#'
#' @examples
#' # Internal function - decorate_grob_factory
#' \dontrun{
#' pf <- decorate_grob_factory(
#'   titles = "This is a test\nHello World",
#'   footnotes = "Here belong the footnotess",
#'   npages = 3
#' )
#'
#' library(grid)
#' draw_grob(pf(NULL))
#' draw_grob(pf(NULL))
#' draw_grob(pf(NULL))
#' }
#'
#' @keywords internal
decorate_grob_factory <- function(npages, ...) {
  current_page <- 0
  function(grob) {
    current_page <<- current_page + 1
    if (current_page > npages) {
      stop(paste("current page is", current_page, "but max.", npages, "specified."))
    }
    decorate_grob(grob = grob, page = paste("Page", current_page, "of", npages), ...)
  }
}

#' Decorate Set of `grobs` and Add Page Numbering
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Note that this uses the [decorate_grob_factory()] function.
#'
#' @param grobs a list of grid grobs
#' @param ... arguments passed on to [decorate_grob()].
#'
#' @return A decorated grob.
#'
#' @examples
#' library(ggplot2)
#' library(grid)
#' g <- with(data = iris, {
#'   list(
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Sepal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Length, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Length, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Petal.Length, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     )
#'   )
#' })
#' lg <- decorate_grob_set(grobs = g, titles = "Hello\nOne\nTwo\nThree", footnotes = "")
#'
#' draw_grob(lg[[1]])
#' draw_grob(lg[[2]])
#' draw_grob(lg[[6]])
#'
#' @export
decorate_grob_set <- function(grobs, ...) {
  n <- length(grobs)
  lgf <- decorate_grob_factory(npages = n, ...)
  lapply(grobs, lgf)
}

#' Re-implemented [range()] Default S3 method for numerical objects
#'
#' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data
#' without any warnings.
#'
#' @param x (`numeric`)\cr a sequence of numbers for which the range is computed.
#' @param na.rm (`logical`)\cr indicating if `NA` should be omitted.
#' @param finite (`logical`)\cr indicating if non-finite elements should be removed.
#'
#' @return A 2-element vector of class `numeric`.
#'
#' @examples
#' # Internal function - range_noinf
#' \dontrun{
#' range_noinf(1:5)
#' range_noinf(c(1:5, NA, NA), na.rm = TRUE)
#' range_noinf(numeric(), na.rm = TRUE)
#' range_noinf(c(1:5, NA, NA, Inf), na.rm = TRUE, finite = TRUE)
#' range_noinf(Inf)
#' range_noinf(Inf, na.rm = TRUE, finite = TRUE)
#' range_noinf(c(Inf, NA), na.rm = FALSE, finite = TRUE)
#' range_noinf(c(1, Inf, NA), na.rm = FALSE, finite = TRUE)
#' }
#'
#' @keywords internal
range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint

  checkmate::assert_numeric(x)

  if (finite) {
    x <- x[is.finite(x)] # removes NAs too
  } else if (na.rm) {
    x <- x[!is.na(x)]
  }

  if (length(x) == 0) {
    rval <- c(NA, NA)
    mode(rval) <- typeof(x)
  } else {
    rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE))
  }

  return(rval)
}

#' Utility function to create label for confidence interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#'
#' @return A `string`.
#'
#' @export
f_conf_level <- function(conf_level) {
  assert_proportion_value(conf_level)
  paste0(conf_level * 100, "% CI")
}

#' Utility function to create label for p-value
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param test_mean (`number`)\cr mean value to test under the null hypothesis.
#'
#' @return A `string`.
#'
#' @export
f_pval <- function(test_mean) {
  checkmate::assert_numeric(test_mean, len = 1)
  paste0("p-value (H0: mean = ", test_mean, ")")
}

#' Utility function to return a named list of covariate names.
#'
#' @param covariates (`character`)\cr a vector that can contain single variable names (such as
#'   `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'
#' @return A named `list` of `character` vector.
#'
#' @keywords internal
get_covariates <- function(covariates) {
  checkmate::assert_character(covariates)
  cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*"))))
  stats::setNames(as.list(cov_vars), cov_vars)
}

#' Replicate Entries of a Vector if Required
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Replicate entries of a vector if required.
#'
#' @inheritParams argument_convention
#' @param n (`count`)\cr how many entries we need.
#'
#' @return `x` if it has the required length already or is `NULL`,
#'   otherwise if it is scalar the replicated version of it with `n` entries.
#'
#' @note This function will fail if `x` is not of length `n` and/or is not a scalar.
#'
#' @export
to_n <- function(x, n) {
  if (is.null(x)) {
    NULL
  } else if (length(x) == 1) {
    rep(x, n)
  } else if (length(x) == n) {
    x
  } else {
    stop("dimension mismatch")
  }
}

#' Check Element Dimension
#'
#' Checks if the elements in `...` have the same dimension.
#'
#' @param ... (`data.frame`s or `vector`s)\cr any data frames/vectors.
#' @param omit_null (`logical`)\cr whether `NULL` elements in `...` should be omitted from the check.
#'
#' @return A `logical` value.
#'
#' @keywords internal
check_same_n <- function(..., omit_null = TRUE) {
  dots <- list(...)

  n_list <- Map(
    function(x, name) {
      if (is.null(x)) {
        if (omit_null) {
          NA_integer_
        } else {
          stop("arg", name, "is not supposed to be NULL")
        }
      } else if (is.data.frame(x)) {
        nrow(x)
      } else if (is.atomic(x)) {
        length(x)
      } else {
        stop("data structure for ", name, "is currently not supported")
      }
    },
    dots, names(dots)
  )

  n <- stats::na.omit(unlist(n_list))

  if (length(unique(n)) > 1) {
    sel <- which(n != n[1])
    stop("dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1])
  }

  TRUE
}

#' Make Names Without Dots
#'
#' @param nams (`character`)\cr vector of original names.
#'
#' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()].
#'
#' @examples
#' # Internal function - make_names
#' \dontrun{
#' make_names(c("foo Bar", "1 2 3 bla"))
#' }
#'
#' @keywords internal
make_names <- function(nams) {
  orig <- make.names(nams)
  gsub(".", "", x = orig, fixed = TRUE)
}

#' Conversion of Months to Days
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Conversion of Months to Days. This is an approximative calculation because it
#' considers each month as having an average of 30.4375 days.
#'
#' @param x (`numeric`)\cr time in months.
#'
#' @return A `numeric` vector with the time in days.
#'
#' @examples
#' x <- c(13.25, 8.15, 1, 2.834)
#' month2day(x)
#'
#' @export
month2day <- function(x) {
  checkmate::assert_numeric(x)
  x * 30.4375
}

#' Conversion of Days to Months
#'
#' @param x (`numeric`)\cr time in days.
#'
#' @return A `numeric` vector with the time in months.
#'
#' @examples
#' x <- c(403, 248, 30, 86)
#' day2month(x)
#'
#' @export
day2month <- function(x) {
  checkmate::assert_numeric(x)
  x / 30.4375
}

#' Return an empty numeric if all elements are `NA`.
#'
#' @param x (`numeric`)\cr vector.
#'
#' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`.
#'
#' @examples
#' x <- c(NA, NA, NA)
#' # Internal function - empty_vector_if_na
#' \dontrun{
#' empty_vector_if_na(x)
#' }
#'
#' @keywords internal
empty_vector_if_na <- function(x) {
  if (all(is.na(x))) {
    numeric()
  } else {
    x
  }
}

#' Combine Two Vectors Element Wise
#'
#' @param x (`vector`)\cr first vector to combine.
#' @param y (`vector`)\cr second vector to combine.
#'
#' @return A `list` where each element combines corresponding elements of `x` and `y`.
#'
#' @examples
#' combine_vectors(1:3, 4:6)
#'
#' @export
combine_vectors <- function(x, y) {
  checkmate::assert_vector(x)
  checkmate::assert_vector(y, len = length(x))

  result <- lapply(as.data.frame(rbind(x, y)), `c`)
  names(result) <- NULL
  result
}

#' Extract Elements by Name
#'
#' This utility function extracts elements from a vector `x` by `names`.
#' Differences to the standard `[` function are:
#'
#' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
#' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
#'   elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
#'
#' @param x (named `vector`)\cr where to extract named elements from.
#' @param names (`character`)\cr vector of names to extract.
#'
#' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
#'
#' @keywords internal
extract_by_name <- function(x, names) {
  if (is.null(x)) {
    return(NULL)
  }
  checkmate::assert_named(x)
  checkmate::assert_character(names)
  which_extract <- intersect(names(x), names)
  if (length(which_extract) > 0) {
    x[which_extract]
  } else {
    NULL
  }
}

#' Labels for Adverse Event Baskets
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param aesi (`character`)\cr with standardized MedDRA query name (e.g. `SMQzzNAM`) or customized query
#'   name (e.g. `CQzzNAM`).
#' @param scope (`character`)\cr with scope of query (e.g. `SMQzzSC`).
#'
#' @return A `string` with the standard label for the AE basket.
#'
#' @examples
#' adae <- tern_ex_adae
#'
#' # Standardized query label includes scope.
#' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC)
#'
#' # Customized query label.
#' aesi_label(adae$CQ01NAM)
#'
#' @export
aesi_label <- function(aesi, scope = NULL) {
  checkmate::assert_character(aesi)
  checkmate::assert_character(scope, null.ok = TRUE)
  aesi_label <- obj_label(aesi)
  aesi <- sas_na(aesi)
  aesi <- unique(aesi)[!is.na(unique(aesi))]

  lbl <- if (length(aesi) == 1 && !is.null(scope)) {
    scope <- sas_na(scope)
    scope <- unique(scope)[!is.na(unique(scope))]
    checkmate::assert_string(scope)
    paste0(aesi, " (", scope, ")")
  } else if (length(aesi) == 1 && is.null(scope)) {
    aesi
  } else {
    aesi_label
  }

  lbl
}

#' Indicate Study Arm Variable in Formula
#'
#' We use `study_arm` to indicate the study arm variable in `tern` formulas.
#'
#' @param x arm information
#'
#' @return `x`
#'
#' @keywords internal
study_arm <- function(x) {
  structure(x, varname = deparse(substitute(x)))
}

#' Smooth Function with Optional Grouping
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This produces `loess` smoothed estimates of `y` with Student confidence intervals.
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param x (`character`)\cr value with x column name.
#' @param y (`character`)\cr value with y column name.
#' @param groups (`character`)\cr vector with optional grouping variables names.
#' @param level (`numeric`)\cr level of confidence interval to use (0.95 by default).
#'
#' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and
#'   optional `groups` variables formatted as `factor` type.
#'
#' @export
get_smooths <- function(df, x, y, groups = NULL, level = 0.95) {
  checkmate::assert_data_frame(df)
  df_cols <- colnames(df)
  checkmate::assert_string(x)
  checkmate::assert_subset(x, df_cols)
  checkmate::assert_numeric(df[[x]])
  checkmate::assert_string(y)
  checkmate::assert_subset(y, df_cols)
  checkmate::assert_numeric(df[[y]])

  if (!is.null(groups)) {
    checkmate::assert_character(groups)
    checkmate::assert_subset(groups, df_cols)
  }

  smooths <- function(x, y) {
    stats::predict(stats::loess(y ~ x), se = TRUE)
  }

  if (!is.null(groups)) {
    cc <- stats::complete.cases(df[c(x, y, groups)])
    df_c <- df[cc, c(x, y, groups)]
    df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE]
    df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups]))

    df_smooth_raw <-
      by(df_c_ordered, df_c_g, function(d) {
        plx <- smooths(d[[x]], d[[y]])
        data.frame(
          x = d[[x]],
          y = plx$fit,
          ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
          yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
        )
      })

    df_smooth <- do.call(rbind, df_smooth_raw)
    df_smooth[groups] <- df_c_g

    df_smooth
  } else {
    cc <- stats::complete.cases(df[c(x, y)])
    df_c <- df[cc, ]
    plx <- smooths(df_c[[x]], df_c[[y]])

    df_smooth <- data.frame(
      x = df_c[[x]],
      y = plx$fit,
      ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
      yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
    )

    df_smooth
  }
}

#' Number of Available (Non-Missing Entries) in a Vector
#'
#' Small utility function for better readability.
#'
#' @param x (`any`)\cr vector in which to count non-missing values.
#'
#' @return Number of non-missing values.
#'
#' @examples
#' # Internal function - n_available
#' \dontrun{
#' n_available(c(1, NA, 2))
#' }
#'
#' @keywords internal
n_available <- function(x) {
  sum(!is.na(x))
}

#' Reapply Variable Labels
#'
#' This is a helper function that is used in tests.
#'
#' @param x (`vector`)\cr vector of elements that needs new labels.
#' @param varlabels (`character`)\cr vector of labels for `x`.
#' @param ... further parameters to be added to the list.
#'
#' @return `x` with variable labels reapplied.
#'
#' @export
reapply_varlabels <- function(x, varlabels, ...) {
  named_labels <- c(as.list(varlabels), list(...))
  formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels)
  x
}

# Wrapper function of survival::clogit so that when model fitting failed, a more useful message would show
clogit_with_tryCatch <- function(formula, data, ...) { # nolint
  tryCatch(
    survival::clogit(formula = formula, data = data, ...),
    error = function(e) stop("model not built successfully with survival::clogit")
  )
}

#' Combination Functions Class
#'
#' @description `r lifecycle::badge("stable")`
#'
#' `CombinationFunction` is an S4 class which extends standard functions. These are special functions that
#' can be combined and negated with the logical operators.
#'
#' @param e1 (`CombinationFunction`)\cr left hand side of logical operator.
#' @param e2 (`CombinationFunction`)\cr right hand side of logical operator.
#' @param x (`CombinationFunction`)\cr the function which should be negated.
#'
#' @return Returns a logical value indicating whether the left hand side of the equation equals the right hand side.
#'
#' @exportClass CombinationFunction
#' @export CombinationFunction
#'
#' @examples
#' higher <- function(a) {
#'   force(a)
#'   CombinationFunction(
#'     function(x) {
#'       x > a
#'     }
#'   )
#' }
#'
#' lower <- function(b) {
#'   force(b)
#'   CombinationFunction(
#'     function(x) {
#'       x < b
#'     }
#'   )
#' }
#'
#' c1 <- higher(5)
#' c2 <- lower(10)
#' c3 <- higher(5) & lower(10)
#' c3(7)
#'
#' @aliases CombinationFunction-class
#' @name combination_function
CombinationFunction <- methods::setClass("CombinationFunction", contains = "function") # nolint

#' @describeIn combination_function Logical "AND" combination of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the two argument functions. The result
#'   is then the "AND" of the two individual results.
#'
#' @export
methods::setMethod(
  "&",
  signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
  definition = function(e1, e2) {
    CombinationFunction(function(...) {
      e1(...) && e2(...)
    })
  }
)

#' @describeIn combination_function Logical "OR" combination of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the two argument functions. The result
#'   is then the "OR" of the two individual results.
#'
#' @export
methods::setMethod(
  "|",
  signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
  definition = function(e1, e2) {
    CombinationFunction(function(...) {
      e1(...) || e2(...)
    })
  }
)

#' @describeIn combination_function Logical negation of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the original function. The result
#'   is then the opposite of this results.
#'
#' @export
methods::setMethod(
  "!",
  signature = c(x = "CombinationFunction"),
  definition = function(x) {
    CombinationFunction(function(...) {
      !x(...)
    })
  }
)

#' Summary for Poisson Negative Binomial.
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Summarize results of a Poisson Negative Binomial Regression.
#' This can be used to analyze count and/or frequency data using a linear model.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_glm_count
NULL

#' Helper Functions for Poisson Models.
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Helper functions that can be used to return the results of various Poisson models.
#'
#' @inheritParams argument_convention
#'
#' @seealso [summarize_glm_count]
#'
#' @name h_glm_count
NULL

#' @describeIn h_glm_count Helper function to return results of a poisson model.
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called
#'   in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with
#'   expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
#'     groups will be summarized. Specifically, the first level of `arm` variable is taken as the
#'     reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as
#'     `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'   * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
#' @param `weights`(`character`)\cr a character vector specifying weights used
#'   in averaging predictions. Number of weights must equal the number of levels included in the covariates.
#'   Weights option passed to emmeans function (hyperlink) (link to emmeans documentation)
#'
#' @return
#' * `h_glm_poisson()` returns the results of a Poisson model.
#'
#' @examples
#' # Internal function - h_glm_poisson
#' \dontrun{
#' h_glm_poisson(
#'   .var = "AVAL",
#'   .df_row = anl,
#'   variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL)
#' )
#' }
#'
#' @keywords internal
h_glm_poisson <- function(.var,
                          .df_row,
                          variables,
                          weights) {
  arm <- variables$arm
  covariates <- variables$covariates
  offset <- .df_row[[variables$offset]]

  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  glm_fit <- stats::glm(
    formula = formula,
    offset = offset,
    data = .df_row,
    family = stats::poisson(link = "log")
  )

  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

#' @describeIn h_glm_count Helper function to return results of a quasipoisson model.
#'
#' @inheritParams summarize_glm_count
#'
#' @return
#' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model.
#'
#' @examples
#' # Internal function - h_glm_quasipoisson
#' \dontrun{
#' h_glm_quasipoisson(
#'   .var = "AVAL",
#'   .df_row = adtte,
#'   variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1"))
#' )
#' }
#'
#' @keywords internal
h_glm_quasipoisson <- function(.var,
                               .df_row,
                               variables,
                               weights) {
  arm <- variables$arm
  covariates <- variables$covariates
  offset <- .df_row[[variables$offset]]

  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  glm_fit <- stats::glm(
    formula = formula,
    offset = offset,
    data = .df_row,
    family = stats::quasipoisson(link = "log")
  )

  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

#' @describeIn h_glm_count Helper function to return the results of the
#'   selected model (poisson, quasipoisson, negative binomial).
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called
#'   in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with
#'   expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
#'     groups will be summarized. Specifically, the first level of `arm` variable is taken as the
#'     reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as
#'     `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'   * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
#' @param `weights`(`character`)\cr character vector specifying weights used in averaging predictions.
#' @param `distribution`(`character`)\cr a character value specifying the distribution
#'   used in the regression (poisson, quasipoisson).
#'
#' @return
#' * `h_glm_count()` returns the results of the selected model.
#'
#' @examples
#' # Internal function - h_glm_count
#' \dontrun{
#' h_glm_count(
#'   .var = "AVAL",
#'   .df_row = anl,
#'   variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = NULL),
#'   distribution = "poisson"
#' )
#' }
#'
#' @keywords internal
h_glm_count <- function(.var,
                        .df_row,
                        variables,
                        distribution,
                        weights) {
  if (distribution == "negbin") {
    stop("negative binomial distribution is not currently available.")
  }
  switch(distribution,
    poisson = h_glm_poisson(.var, .df_row, variables, weights),
    quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights),
    negbin = list() # h_glm_negbin(.var, .df_row, variables, weights) # nolint
  )
}

#' @describeIn h_glm_count Helper function to return the estimated means.
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
#' @param conf_level (`numeric`)\cr value used to derive the confidence interval for the rate.
#' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm.
#' @param `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
#'   summarized. Specifically, the first level of `arm` variable is taken as the reference group.
#'
#' @return
#' * `h_ppmeans()` returns the estimated means.
#'
#' @examples
#' # Internal function - h_ppmeans
#' \dontrun{
#' fits <- h_glm_count(
#'   .var = "AVAL",
#'   .df_row = anl,
#'   variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = c("REGION1")),
#'   distribution = "quasipoisson"
#' )
#'
#' h_ppmeans(
#'   obj = fits$glm_fit,
#'   .df_row = anl,
#'   arm = "ARM",
#'   conf_level = 0.95
#' )
#' }
#'
#' @keywords internal
h_ppmeans <- function(obj, .df_row, arm, conf_level) {
  alpha <- 1 - conf_level
  p <- 1 - alpha / 2

  arm_levels <- levels(.df_row[[arm]])

  out <- lapply(arm_levels, function(lev) {
    temp <- .df_row
    temp[[arm]] <- factor(lev, levels = arm_levels)

    mf <- stats::model.frame(obj$formula, data = temp)
    X <- stats::model.matrix(obj$formula, data = mf) # nolint

    rate <- stats::predict(obj, newdata = mf, type = "response")
    rate_hat <- mean(rate)

    zz <- colMeans(rate * X)
    se <- sqrt(as.numeric(t(zz) %*% stats::vcov(obj) %*% zz))
    rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat)
    rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat)

    c(rate_hat, rate_lwr, rate_upr)
  })

  names(out) <- arm_levels
  out <- do.call(rbind, out)
  if ("negbin" %in% class(obj)) {
    colnames(out) <- c("response", "asymp.LCL", "asymp.UCL")
  } else {
    colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL")
  }
  out <- as.data.frame(out)
  out[[arm]] <- rownames(out)
  out
}

#' @describeIn summarize_glm_count Statistics function that produces a named list of results
#'   of the investigated Poisson model.
#'
#' @inheritParams h_glm_count
#'
#' @return
#' * `s_glm_count()` returns a named `list` of 5 statistics:
#'   * `n`: Count of complete sample size for the group.
#'   * `rate`: Estimated event rate per follow-up time.
#'   * `rate_ci`: Confidence level for estimated rate per follow-up time.
#'   * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm.
#'   * `rate_ratio_ci`: Confidence level for the rate ratio.
#'   * `pval`: p-value.
#'
#' @examples
#' # Internal function - s_change_from_baseline
#' \dontrun{
#' s_glm_count(
#'   df = anl %>%
#'     filter(ARMCD == "ARM B"),
#'   .df_row = anl,
#'   .var = "AVAL",
#'   .in_ref_col = TRUE,
#'   variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = c("REGION1")),
#'   conf_level = 0.95,
#'   distribution = "quasipoisson",
#'   rate_mean_method = "ppmeans"
#' )
#' }
#'
#' @keywords internal
s_glm_count <- function(df,
                        .var,
                        .df_row,
                        variables,
                        .ref_group,
                        .in_ref_col,
                        distribution,
                        conf_level,
                        rate_mean_method,
                        weights,
                        scale = 1) {
  arm <- variables$arm

  y <- df[[.var]]
  smry_level <- as.character(unique(df[[arm]]))

  # ensure there is only 1 value
  checkmate::assert_scalar(smry_level)

  results <- h_glm_count(
    .var = .var,
    .df_row = .df_row,
    variables = variables,
    distribution = distribution,
    weights
  )

  if (rate_mean_method == "emmeans") {
    emmeans_smry <- summary(results$emmeans_fit, level = conf_level)
  } else if (rate_mean_method == "ppmeans") {
    emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level)
  }

  emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ]

  if (.in_ref_col) {
    list(
      n = length(y[!is.na(y)]),
      rate = formatters::with_label(
        ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate),
        "Adjusted Rate"
      ),
      rate_ci = formatters::with_label(
        c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
        f_conf_level(conf_level)
      ),
      rate_ratio = formatters::with_label(character(), "Adjusted Rate Ratio"),
      rate_ratio_ci = formatters::with_label(character(), f_conf_level(conf_level)),
      pval = formatters::with_label(character(), "p-value")
    )
  } else {
    emmeans_contrasts <- emmeans::contrast(
      results$emmeans_fit,
      method = "trt.vs.ctrl",
      ref = grep(
        as.character(unique(.ref_group[[arm]])),
        as.data.frame(results$emmeans_fit)[[arm]]
      )
    )

    contrasts_smry <- summary(
      emmeans_contrasts,
      infer = TRUE,
      adjust = "none"
    )

    smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ]

    list(
      n = length(y[!is.na(y)]),
      rate = formatters::with_label(
        ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate),
        "Adjusted Rate"
      ),
      rate_ci = formatters::with_label(
        c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
        f_conf_level(conf_level)
      ),
      rate_ratio = formatters::with_label(smry_contrasts_level$ratio, "Adjusted Rate Ratio"),
      rate_ratio_ci = formatters::with_label(
        c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL),
        f_conf_level(conf_level)
      ),
      pval = formatters::with_label(smry_contrasts_level$p.value, "p-value")
    )
  }
}

#' @describeIn summarize_glm_count Formatted analysis function which is used as `afun` in `summarize_glm_count()`.
#'
#' @return
#' * `a_glm_count()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - s_change_from_baseline
#' \dontrun{
#' a_glm_count(
#'   df = anl %>%
#'     filter(ARMCD == "ARM A"),
#'   .var = "AVAL",
#'   .df_row = anl,
#'   variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = c("REGION1")),
#'   .ref_group = "ARM B", .in_ref_col = TRUE,
#'   conf_level = 0.95,
#'   distribution = "poisson",
#'   rate_mean_method = "ppmeans"
#' )
#' }
#'
#' @keywords internal
a_glm_count <- make_afun(
  s_glm_count,
  .indent_mods = c(
    "n" = 0L,
    "rate" = 0L,
    "rate_ci" = 1L,
    "rate_ratio" = 0L,
    "rate_ratio_ci" = 1L,
    "pval" = 1L
  ),
  .formats = c(
    "n" = "xx",
    "rate" = "xx.xxxx",
    "rate_ci" = "(xx.xxxx, xx.xxxx)",
    "rate_ratio" = "xx.xxxx",
    "rate_ratio_ci" = "(xx.xxxx, xx.xxxx)",
    "pval" = "x.xxxx | (<0.0001)"
  ),
  .null_ref_cells = FALSE
)

#' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_glm_count()` to the table layout.
#'
#' @examples
#' library(dplyr)
#' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE")
#' anl$AVAL_f <- as.factor(anl$AVAL)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM", ref_group = "B: Placebo") %>%
#'   add_colcounts() %>%
#'   summarize_vars(
#'     "AVAL_f",
#'     var_labels = "Number of exacerbations per patient",
#'     .stats = c("count_fraction"),
#'     .formats = c("count_fraction" = "xx (xx.xx%)"),
#'     .label = c("Number of exacerbations per patient")
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL),
#'     conf_level = 0.95,
#'     distribution = "poisson",
#'     rate_mean_method = "emmeans",
#'     var_labels = "Unadjusted exacerbation rate (per year)",
#'     table_names = "unadj",
#'     .stats = c("rate"),
#'     .labels = c(rate = "Rate")
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
#'     conf_level = 0.95,
#'     distribution = "quasipoisson",
#'     rate_mean_method = "ppmeans",
#'     var_labels = "Adjusted (QP) exacerbation rate (per year)",
#'     table_names = "adj",
#'     .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
#'     .labels = c(
#'       rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
#'       rate_ratio_ci = "Rate Ratio CI", pval = "p value"
#'     )
#'   )
#' build_table(lyt = lyt, df = anl)
#'
#' @export
summarize_glm_count <- function(lyt,
                                vars,
                                var_labels,
                                ...,
                                show_labels = "visible",
                                table_names = vars,
                                .stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  afun <- make_afun(
    a_glm_count,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Encode Categorical Missing Values in a Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function to encode missing entries across groups of categorical
#' variables in a data frame.
#'
#' @details Missing entries are those with `NA` or empty strings and will
#'   be replaced with a specified value. If factor variables include missing
#'   values, the missing value will be inserted as the last level.
#'   Similarly, in case character or logical variables should be converted to factors
#'   with the `char_as_factor` or `logical_as_factor` options, the missing values will
#'   be set as the last level.
#'
#' @param data (`data.frame`)\cr data set.
#' @param omit_columns (`character`)\cr names of variables from `data` that should
#'   not be modified by this function.
#' @param char_as_factor (`flag`)\cr whether to convert character variables
#'   in `data` to factors.
#' @param logical_as_factor (`flag`)\cr whether to convert logical variables
#'   in `data` to factors.
#' @param na_level (`string`)\cr used to replace all `NA` or empty
#'   values inside non-`omit_columns` columns.
#'
#' @return A `data.frame` with the chosen modifications applied.
#'
#' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions.
#'
#' @examples
#' my_data <- data.frame(
#'   u = c(TRUE, FALSE, NA, TRUE),
#'   v = factor(c("A", NA, NA, NA), levels = c("Z", "A")),
#'   w = c("A", "B", NA, "C"),
#'   x = c("D", "E", "F", NA),
#'   y = c("G", "H", "I", ""),
#'   z = c(1, 2, 3, 4),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Example 1
#' # Encode missing values in all character or factor columns.
#' df_explicit_na(my_data)
#' # Also convert logical columns to factor columns.
#' df_explicit_na(my_data, logical_as_factor = TRUE)
#' # Encode missing values in a subset of columns.
#' df_explicit_na(my_data, omit_columns = c("x", "y"))
#'
#' # Example 2
#' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable.
#' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not
#' # included when generating `rtables`.
#' adsl <- tern_ex_adsl
#' adsl$SEX[adsl$SEX == "M"] <- NA
#' adsl <- df_explicit_na(adsl)
#'
#' # If you want the `Na` values to be displayed in the table use the `na_level` argument.
#' adsl <- tern_ex_adsl
#' adsl$SEX[adsl$SEX == "M"] <- NA
#' adsl <- df_explicit_na(adsl, na_level = "Missing Values")
#'
#' # Example 3
#' # Numeric variables that have missing values are not altered. This means that any `NA` value in
#' # a numeric variable will not be included in the summary statistics, nor will they be included
#' # in the denominator value for calculating the percent values.
#' adsl <- tern_ex_adsl
#' adsl$AGE[adsl$AGE < 30] <- NA
#' adsl <- df_explicit_na(adsl)
#'
#' @export
df_explicit_na <- function(data,
                           omit_columns = NULL,
                           char_as_factor = TRUE,
                           logical_as_factor = FALSE,
                           na_level = "<Missing>") {
  checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE)
  checkmate::assert_data_frame(data)
  checkmate::assert_flag(char_as_factor)
  checkmate::assert_flag(logical_as_factor)
  checkmate::assert_string(na_level)

  target_vars <- if (is.null(omit_columns)) {
    names(data)
  } else {
    setdiff(names(data), omit_columns) # May have duplicates.
  }
  if (length(target_vars) == 0) {
    return(data)
  }

  l_target_vars <- split(target_vars, target_vars)

  # Makes sure target_vars exist in data and names are not duplicated.
  assert_df_with_variables(data, l_target_vars)

  for (x in target_vars) {
    xi <- data[[x]]
    xi_label <- obj_label(xi)

    # Determine whether to convert character or logical input.
    do_char_conversion <- is.character(xi) && char_as_factor
    do_logical_conversion <- is.logical(xi) && logical_as_factor

    # Pre-convert logical to character to deal correctly with replacing NA
    # values below.
    if (do_logical_conversion) {
      xi <- as.character(xi)
    }

    if (is.factor(xi) || is.character(xi)) {
      # Handle empty strings and NA values.
      xi <- explicit_na(sas_na(xi), label = na_level)

      # Convert to factors if requested for the original type,
      # set na_level as the last value.
      if (do_char_conversion || do_logical_conversion) {
        levels_xi <- setdiff(sort(unique(xi)), na_level)
        if (na_level %in% unique(xi)) {
          levels_xi <- c(levels_xi, na_level)
        }

        xi <- factor(xi, levels = levels_xi)
      }

      data[, x] <- formatters::with_label(xi, label = xi_label)
    }
  }
  return(data)
}

#' Proportion Difference
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#'
#' @seealso [d_proportion_diff()]
#'
#' @name prop_diff
NULL

#' @describeIn prop_diff Statistics function estimating the difference
#'   in terms of responder proportion.
#'
#' @inheritParams prop_diff_strat_nc
#' @param method (`string`)\cr the method used for the confidence interval estimation.
#'
#' @return
#' * `s_proportion_diff()` returns a named list of elements `diff` and `diff_ci`.
#'
#' @note When performing an unstratified analysis, methods `"cmh"`, `"strat_newcombe"`, and `"strat_newcombecc"` are
#'   not permitted.
#'
#' @examples
#' # Summary
#'
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' s_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' # CMH example with strata
#' s_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "cmh"
#' )
#'
#' @export
s_proportion_diff <- function(df,
                              .var,
                              .ref_group,
                              .in_ref_col,
                              variables = list(strata = NULL),
                              conf_level = 0.95,
                              method = c(
                                "waldcc", "wald", "cmh",
                                "ha", "newcombe", "newcombecc",
                                "strat_newcombe", "strat_newcombecc"
                              ),
                              weights_method = "cmh") {
  method <- match.arg(method)
  if (is.null(variables$strata) && checkmate::test_subset(method, c("cmh", "strat_newcombe", "strat_newcombecc"))) {
    stop(paste(
      "When performing an unstratified analysis, methods 'cmh', 'strat_newcombe', and 'strat_newcombecc' are not",
      "permitted. Please choose a different method."
    ))
  }
  y <- list(diff = "", diff_ci = "")

  if (!.in_ref_col) {
    rsp <- c(.ref_group[[.var]], df[[.var]])
    grp <- factor(
      rep(
        c("ref", "Not-ref"),
        c(nrow(.ref_group), nrow(df))
      ),
      levels = c("ref", "Not-ref")
    )

    if (!is.null(variables$strata)) {
      strata_colnames <- variables$strata
      checkmate::assert_character(strata_colnames, null.ok = FALSE)
      strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)

      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)

      # Merging interaction strata for reference group rows data and remaining
      strata <- c(
        interaction(.ref_group[strata_colnames]),
        interaction(df[strata_colnames])
      )
      strata <- as.factor(strata)
    }

    # Defining the std way to calculate weights for strat_newcombe
    if (!is.null(variables$weights_method)) {
      weights_method <- variables$weights_method
    } else {
      weights_method <- "cmh"
    }

    y <- switch(method,
      "wald" = prop_diff_wald(rsp, grp, conf_level, correct = FALSE),
      "waldcc" = prop_diff_wald(rsp, grp, conf_level, correct = TRUE),
      "ha" = prop_diff_ha(rsp, grp, conf_level),
      "newcombe" = prop_diff_nc(rsp, grp, conf_level, correct = FALSE),
      "newcombecc" = prop_diff_nc(rsp, grp, conf_level, correct = TRUE),
      "strat_newcombe" = prop_diff_strat_nc(rsp,
        grp,
        strata,
        weights_method,
        conf_level,
        correct = FALSE
      ),
      "strat_newcombecc" = prop_diff_strat_nc(rsp,
        grp,
        strata,
        weights_method,
        conf_level,
        correct = TRUE
      ),
      "cmh" = prop_diff_cmh(rsp, grp, strata, conf_level)[c("diff", "diff_ci")]
    )

    y$diff <- y$diff * 100
    y$diff_ci <- y$diff_ci * 100
  }

  attr(y$diff, "label") <- "Difference in Response rate (%)"
  attr(y$diff_ci, "label") <- d_proportion_diff(
    conf_level, method,
    long = FALSE
  )

  y
}

#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`.
#'
#' @return
#' * `a_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' @export
a_proportion_diff <- make_afun(
  s_proportion_diff,
  .formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"),
  .indent_mods = c(diff = 0L, diff_ci = 1L)
)

#' @describeIn prop_diff Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_proportion_diff()`.
#'
#' @return
#' * `estimate_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_proportion_diff()` to the table layout.
#'
#' @examples
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   estimate_proportion_diff(
#'     vars = "rsp",
#'     conf_level = 0.90,
#'     method = "ha"
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
estimate_proportion_diff <- function(lyt,
                                     vars,
                                     ...,
                                     var_labels = vars,
                                     show_labels = "hidden",
                                     table_names = vars,
                                     .stats = NULL,
                                     .formats = NULL,
                                     .labels = NULL,
                                     .indent_mods = NULL) {
  afun <- make_afun(
    a_proportion_diff,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    var_labels = var_labels,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Check: Proportion Difference Arguments
#'
#' Verifies that and/or convert arguments into valid values to be used in the
#' estimation of difference in responder proportions.
#'
#' @inheritParams prop_diff
#' @inheritParams prop_diff_wald
#'
#' @keywords internal
check_diff_prop_ci <- function(rsp,
                               grp,
                               strata = NULL,
                               conf_level,
                               correct = NULL) {
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)
  checkmate::assert_number(conf_level, lower = 0, upper = 1)
  checkmate::assert_flag(correct, null.ok = TRUE)

  if (!is.null(strata)) {
    checkmate::assert_factor(strata, len = length(rsp))
  }

  invisible()
}

#' Description of Method Used for Proportion Comparison
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function that describes the analysis in
#' `s_proportion_diff`.
#'
#' @inheritParams s_proportion_diff
#' @param long (`logical`)\cr Whether a long or a short (default) description is required.
#'
#' @return A `string` describing the analysis.
#'
#' @seealso [prop_diff]
#'
#' @export
d_proportion_diff <- function(conf_level,
                              method,
                              long = FALSE) {
  label <- paste0(conf_level * 100, "% CI")
  if (long) {
    label <- paste(
      label,
      ifelse(
        method == "cmh",
        "for adjusted difference",
        "for difference"
      )
    )
  }

  method_part <- switch(method,
    "cmh" = "CMH, without correction",
    "waldcc" = "Wald, with correction",
    "wald" = "Wald, without correction",
    "ha" = "Anderson-Hauck",
    "newcombe" = "Newcombe, without correction",
    "newcombecc" = "Newcombe, with correction",
    "strat_newcombe" = "Stratified Newcombe, without correction",
    "strat_newcombecc" = "Stratified Newcombe, with correction",
    stop(paste(method, "does not have a description"))
  )
  paste0(label, " (", method_part, ")")
}

#' Helper Functions to Calculate Proportion Difference
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @inheritParams prop_diff
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
#'   (e.g. reference and treatment group).
#'
#' @return A named `list` of elements `diff` (proportion difference) and `diff_ci`
#'   (proportion difference confidence interval).
#'
#' @seealso [prop_diff()] for implementation of these helper functions.
#'
#' @name h_prop_diff
NULL

#' @describeIn h_prop_diff The Wald interval follows the usual textbook
#'   definition for a single proportion confidence interval using the normal
#'   approximation. It is possible to include a continuity correction for Wald's
#'   interval.
#'
#' @param correct (`logical`)\cr whether to include the continuity correction. For further
#'   information, see [stats::prop.test()].
#'
#' @examples
#' # Wald confidence interval
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
#' grp <- factor(c(rep("A", 10), rep("B", 10)))
#' prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE)
#'
#' @export
prop_diff_wald <- function(rsp,
                           grp,
                           conf_level = 0.95,
                           correct = FALSE) {
  if (isTRUE(correct)) {
    mthd <- "waldcc"
  } else {
    mthd <- "wald"
  }
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, correct = correct
  )

  # check if binary response is coded as logical
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)

  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  # x1 and n1 are non-reference groups.
  diff_ci <- desctools_binom(
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = mthd
  )

  list(
    "diff" = unname(diff_ci[, "est"]),
    "diff_ci" = unname(diff_ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Anderson-Hauck confidence interval.
#'
#' @examples
#' # Anderson-Hauck confidence interval
#' ## "Mid" case: 3/4 respond in group A, 1/2 respond in group B.
#' rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
#' grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90)
#'
#' ## Edge case: Same proportion of response in A and B.
#' rsp <- c(TRUE, FALSE, TRUE, FALSE)
#' grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6)
#'
#' @export
prop_diff_ha <- function(rsp,
                         grp,
                         conf_level) {
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)

  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  # x1 and n1 are non-reference groups.
  ci <- desctools_binom(
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = "ha"
  )
  list(
    "diff" = unname(ci[, "est"]),
    "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Newcombe confidence interval. It is based on
#'   the Wilson score confidence interval for a single binomial proportion.
#'
#' @examples
#' # Newcombe confidence interval
#'
#' set.seed(1)
#' rsp <- c(
#'   sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE),
#'   sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE)
#' )
#' grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
#' table(rsp, grp)
#' prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9)
#'
#' @export
prop_diff_nc <- function(rsp,
                         grp,
                         conf_level,
                         correct = FALSE) {
  if (isTRUE(correct)) {
    mthd <- "scorecc"
  } else {
    mthd <- "score"
  }
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)

  p_grp <- tapply(rsp, grp, mean)
  diff_p <- unname(diff(p_grp))
  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  ci <- desctools_binom(
    # x1 and n1 are non-reference groups.
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = mthd
  )
  list(
    "diff" = unname(ci[, "est"]),
    "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Calculates the weighted difference. This is defined as the difference in
#'   response rates between the experimental treatment group and the control treatment group, adjusted
#'   for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. For the CMH chi-squared
#'   test, use [stats::mantelhaen.test()].
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#'
#' @examples
#' # Cochran-Mantel-Haenszel confidence interval
#'
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_diff_cmh(
#'   rsp = rsp, grp = grp, strata = interaction(strata_data),
#'   conf_level = 0.90
#' )
#'
#' @export
prop_diff_cmh <- function(rsp,
                          grp,
                          strata,
                          conf_level = 0.95) {
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
  )

  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  # first dimension: FALSE, TRUE
  # 2nd dimension: CONTROL, TX
  # 3rd dimension: levels of strat
  # rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
  t_tbl <- table(
    factor(rsp, levels = c("FALSE", "TRUE")),
    grp,
    strata
  )
  n1 <- colSums(t_tbl[1:2, 1, ])
  n2 <- colSums(t_tbl[1:2, 2, ])
  p1 <- t_tbl[2, 1, ] / n1
  p2 <- t_tbl[2, 2, ] / n2
  # CMH weights
  use_stratum <- (n1 > 0) & (n2 > 0)
  n1 <- n1[use_stratum]
  n2 <- n2[use_stratum]
  p1 <- p1[use_stratum]
  p2 <- p2[use_stratum]
  wt <- (n1 * n2 / (n1 + n2))
  wt_normalized <- wt / sum(wt)
  est1 <- sum(wt_normalized * p1)
  est2 <- sum(wt_normalized * p2)
  estimate <- c(est1, est2)
  names(estimate) <- levels(grp)
  se1 <- sqrt(sum(wt_normalized^2 * p1 * (1 - p1) / n1))
  se2 <- sqrt(sum(wt_normalized^2 * p2 * (1 - p2) / n2))
  z <- stats::qnorm((1 + conf_level) / 2)
  err1 <- z * se1
  err2 <- z * se2
  ci1 <- c((est1 - err1), (est1 + err1))
  ci2 <- c((est2 - err2), (est2 + err2))
  estimate_ci <- list(ci1, ci2)
  names(estimate_ci) <- levels(grp)
  diff_est <- est2 - est1
  se_diff <- sqrt(sum(((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2)) * wt_normalized^2))
  diff_ci <- c(diff_est - z * se_diff, diff_est + z * se_diff)

  list(
    prop = estimate,
    prop_ci = estimate_ci,
    diff = diff_est,
    diff_ci = diff_ci,
    weights = wt_normalized,
    n1 = n1,
    n2 = n2
  )
}

#' @describeIn h_prop_diff Calculates the stratified Newcombe confidence interval and difference in response
#'   rates between the experimental treatment group and the control treatment group, adjusted for stratification
#'   factors. This implementation follows closely the one proposed by \insertCite{Yan2010-jt;textual}{tern}.
#'   Weights can be estimated from the heuristic proposed in [prop_strat_wilson()] or from CMH-derived weights
#'   (see [prop_diff_cmh()]).
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param weights_method (`string`)\cr weights method. Can be either `"cmh"` or `"heuristic"`
#'   and directs the way weights are estimated.
#'
#' @references
#' - \insertRef{Yan2010-jt}{tern}
#'
#' @examples
#' # Stratified Newcombe confidence interval
#'
#' set.seed(2)
#' data_set <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   "grp" = sample(c("Placebo", "Treatment"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_diff_strat_nc(
#'   rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
#'   weights_method = "cmh",
#'   conf_level = 0.90
#' )
#'
#' prop_diff_strat_nc(
#'   rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
#'   weights_method = "wilson_h",
#'   conf_level = 0.90
#' )
#'
#' @export
prop_diff_strat_nc <- function(rsp,
                               grp,
                               strata,
                               weights_method = c("cmh", "wilson_h"),
                               conf_level = 0.95,
                               correct = FALSE) {
  weights_method <- match.arg(weights_method)
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
  )
  checkmate::assert_number(conf_level, lower = 0, upper = 1)
  checkmate::assert_flag(correct)
  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  rsp_by_grp <- split(rsp, f = grp)
  strata_by_grp <- split(strata, f = grp)

  # Finding the weights
  weights <- if (identical(weights_method, "cmh")) {
    prop_diff_cmh(rsp = rsp, grp = grp, strata = strata)$weights
  } else if (identical(weights_method, "wilson_h")) {
    prop_strat_wilson(rsp, strata, conf_level = conf_level, correct = correct)$weights
  }
  weights[levels(strata)[!levels(strata) %in% names(weights)]] <- 0

  # Calculating lower (`l`) and upper (`u`) confidence bounds per group.
  strat_wilson_by_grp <- Map(
    prop_strat_wilson,
    rsp = rsp_by_grp,
    strata = strata_by_grp,
    weights = list(weights, weights),
    conf_level = conf_level,
    correct = correct
  )

  ci_ref <- strat_wilson_by_grp[[1]]
  ci_trt <- strat_wilson_by_grp[[2]]
  l_ref <- as.numeric(ci_ref$conf_int[1])
  u_ref <- as.numeric(ci_ref$conf_int[2])
  l_trt <- as.numeric(ci_trt$conf_int[1])
  u_trt <- as.numeric(ci_trt$conf_int[2])

  # Estimating the diff and n_ref, n_trt (it allows different weights to be used)
  t_tbl <- table(
    factor(rsp, levels = c("FALSE", "TRUE")),
    grp,
    strata
  )
  n_ref <- colSums(t_tbl[1:2, 1, ])
  n_trt <- colSums(t_tbl[1:2, 2, ])
  use_stratum <- (n_ref > 0) & (n_trt > 0)
  n_ref <- n_ref[use_stratum]
  n_trt <- n_trt[use_stratum]
  p_ref <- t_tbl[2, 1, use_stratum] / n_ref
  p_trt <- t_tbl[2, 2, use_stratum] / n_trt
  est1 <- sum(weights * p_ref)
  est2 <- sum(weights * p_trt)
  diff_est <- est2 - est1

  lambda1 <- sum(weights^2 / n_ref)
  lambda2 <- sum(weights^2 / n_trt)
  z <- stats::qnorm((1 + conf_level) / 2)

  lower <- diff_est - z * sqrt(lambda2 * l_trt * (1 - l_trt) + lambda1 * u_ref * (1 - u_ref))
  upper <- diff_est + z * sqrt(lambda1 * l_ref * (1 - l_ref) + lambda2 * u_trt * (1 - u_trt))

  list(
    "diff" = diff_est,
    "diff_ci" = c("lower" = lower, "upper" = upper)
  )
}

#' Line plot with the optional table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Line plot with the optional table.
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param alt_counts_df (`data.frame` or `NULL`)\cr data set that will be used (only) to counts objects in strata.
#' @param variables (named `character` vector) of variable names in `df` data set. Details are:
#'   * `x` (`character`)\cr name of x-axis variable.
#'   * `y` (`character`)\cr name of y-axis variable.
#'   * `strata` (`character`)\cr name of grouping variable, i.e. treatment arm. Can be `NA` to indicate lack of groups.
#'   * `paramcd` (`character`)\cr name of the variable for parameter's code. Used for y-axis label and plot's subtitle.
#'     Can be `NA` if paramcd is not to be added to the y-axis label or subtitle.
#'   * `y_unit` (`character`)\cr name of variable with units of `y`. Used for y-axis label and plot's subtitle.
#'     Can be `NA` if y unit is not to be added to the y-axis label or subtitle.
#' @param mid (`character` or `NULL`)\cr names of the statistics that will be plotted as midpoints.
#'   All the statistics indicated in `mid` variable must be present in the object returned by `sfun`,
#'   and be of a `double` or `numeric` type vector of length one.
#' @param interval (`character` or `NULL`)\cr names of the statistics that will be plotted as intervals.
#'   All the statistics indicated in `interval` variable must be present in the object returned by `sfun`,
#'   and be of a `double` or `numeric` type vector of length two.
#' @param whiskers (`character`)\cr names of the interval whiskers that will be plotted. Must match the `names`
#'   attribute of the `interval` element in the list returned by `sfun`. It is possible to specify one whisker only,
#'   lower or upper.
#' @param table (`character` or `NULL`)\cr names of the statistics that will be displayed in the table below the plot.
#'   All the statistics indicated in `table` variable must be present in the object returned by `sfun`.
#' @param sfun (`closure`)\cr the function to compute the values of required statistics. It must return a named `list`
#'   with atomic vectors. The names of the `list` elements refer to the names of the statistics and are used by `mid`,
#'   `interval`, `table`. It must be able to accept as input a vector with data for which statistics are computed.
#' @param ... optional arguments to `sfun`.
#' @param mid_type (`character`)\cr controls the type of the `mid` plot, it can be point (`p`), line (`l`),
#'   or point and line (`pl`).
#' @param mid_point_size (`integer` or `double`)\cr controls the font size of the point for `mid` plot.
#' @param position (`character` or `call`)\cr geom element position adjustment, either as a string, or the result of
#'   a call to a position adjustment function.
#' @param legend_title (`character` string)\cr legend title.
#' @param legend_position (`character`)\cr the position of the plot legend (`none`, `left`, `right`, `bottom`, `top`,
#'   or two-element numeric vector).
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot.
#' @param y_lab (`character`)\cr y-axis label. If equal to `NULL`, then no label will be added.
#' @param y_lab_add_paramcd (`logical`)\cr should paramcd, i.e. `unique(df[[variables["paramcd"]]])` be added to the
#'   y-axis label `y_lab`?
#' @param y_lab_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the y-axis
#'   label `y_lab`?
#' @param title (`character`)\cr plot title.
#' @param subtitle (`character`)\cr plot subtitle.
#' @param subtitle_add_paramcd (`logical`)\cr should paramcd, i.e. `unique(df[[variables["paramcd"]]])` be added to
#'   the plot's subtitle `subtitle`?
#' @param subtitle_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the
#'   plot's subtitle `subtitle`?
#' @param caption (`character`)\cr optional caption below the plot.
#' @param table_format (named `character` or `NULL`)\cr format patterns for descriptive statistics used in the
#'   (optional) table appended to the plot. It is passed directly to the `h_format_row` function through the `format`
#'   parameter. Names of `table_format` must match the names of statistics returned by `sfun` function.
#' @param table_labels (named `character` or `NULL`)\cr labels for descriptive statistics used in the (optional) table
#'   appended to the plot. Names of `table_labels` must match the names of statistics returned by `sfun` function.
#' @param table_font_size (`integer` or `double`)\cr controls the font size of values in the table.
#' @param newpage (`logical`)\cr should plot be drawn on new page?
#' @param col (`character`)\cr colors.
#'
#' @return A `ggplot` line plot (and statistics table if applicable).
#'
#' @examples
#' library(nestcolor)
#'
#' adsl <- tern_ex_adsl
#' adlb <- tern_ex_adlb %>% dplyr::filter(ANL01FL == "Y", PARAMCD == "ALT", AVISIT != "SCREENING")
#' adlb$AVISIT <- droplevels(adlb$AVISIT)
#' adlb <- dplyr::mutate(adlb, AVISIT = forcats::fct_reorder(AVISIT, AVISITN, min))
#'
#' # Mean with CI
#' g_lineplot(adlb, adsl, subtitle = "Laboratory Test:")
#'
#' # Mean with CI, no stratification
#' g_lineplot(adlb, variables = control_lineplot_vars(strata = NA))
#'
#' # Mean, upper whisker of CI, no strata counts N
#' g_lineplot(
#'   adlb,
#'   whiskers = "mean_ci_upr",
#'   title = "Plot of Mean and Upper 95% Confidence Limit by Visit"
#' )
#'
#' # Median with CI
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   mid = "median",
#'   interval = "median_ci",
#'   whiskers = c("median_ci_lwr", "median_ci_upr"),
#'   title = "Plot of Median and 95% Confidence Limits by Visit"
#' )
#'
#' # Mean, +/- SD
#' g_lineplot(adlb, adsl,
#'   interval = "mean_sdi",
#'   whiskers = c("mean_sdi_lwr", "mean_sdi_upr"),
#'   title = "Plot of Median +/- SD by Visit"
#' )
#'
#' # Mean with CI plot with stats table
#' g_lineplot(adlb, adsl, table = c("n", "mean", "mean_ci"))
#'
#' # Mean with CI, table and customized confidence level
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   table = c("n", "mean", "mean_ci"),
#'   control = control_summarize_vars(conf_level = 0.80),
#'   title = "Plot of Mean and 80% Confidence Limits by Visit"
#' )
#'
#' # Mean with CI, table, filtered data
#' adlb_f <- dplyr::filter(adlb, ARMCD != "ARM A" | AVISIT == "BASELINE")
#' g_lineplot(adlb_f, table = c("n", "mean"))
#'
#' @export
g_lineplot <- function(df,
                       alt_counts_df = NULL,
                       variables = control_lineplot_vars(),
                       mid = "mean",
                       interval = "mean_ci",
                       whiskers = c("mean_ci_lwr", "mean_ci_upr"),
                       table = NULL,
                       sfun = tern::s_summary,
                       ...,
                       mid_type = "pl",
                       mid_point_size = 2,
                       position = ggplot2::position_dodge(width = 0.4),
                       legend_title = NULL,
                       legend_position = "bottom",
                       ggtheme = nestcolor::theme_nest(),
                       y_lab = NULL,
                       y_lab_add_paramcd = TRUE,
                       y_lab_add_unit = TRUE,
                       title = "Plot of Mean and 95% Confidence Limits by Visit",
                       subtitle = "",
                       subtitle_add_paramcd = TRUE,
                       subtitle_add_unit = TRUE,
                       caption = NULL,
                       table_format = summary_formats(),
                       table_labels = summary_labels(),
                       table_font_size = 3,
                       newpage = TRUE,
                       col = NULL) {
  checkmate::assert_character(variables, any.missing = TRUE)
  checkmate::assert_character(mid, null.ok = TRUE)
  checkmate::assert_character(interval, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  checkmate::assert_string(title, null.ok = TRUE)
  checkmate::assert_string(subtitle, null.ok = TRUE)

  if (is.character(interval)) {
    checkmate::assert_vector(whiskers, min.len = 0, max.len = 2)
  }

  if (length(whiskers) == 1) {
    checkmate::assert_character(mid)
  }

  if (is.character(mid)) {
    checkmate::assert_scalar(mid_type)
    checkmate::assert_subset(mid_type, c("pl", "p", "l"))
  }

  x <- variables[["x"]]
  y <- variables[["y"]]
  paramcd <- variables["paramcd"] # NA if paramcd == NA or it is not in variables
  y_unit <- variables["y_unit"] # NA if y_unit == NA or it is not in variables
  if (is.na(variables["strata"])) {
    strata <- NULL # NULL if strata == NA or it is not in variables
  } else {
    strata <- variables[["strata"]]
  }
  checkmate::assert_flag(y_lab_add_paramcd, null.ok = TRUE)
  checkmate::assert_flag(subtitle_add_paramcd, null.ok = TRUE)
  if ((!is.null(y_lab) && y_lab_add_paramcd) || (!is.null(subtitle) && subtitle_add_paramcd)) {
    checkmate::assert_false(is.na(paramcd))
    checkmate::assert_scalar(unique(df[[paramcd]]))
  }

  checkmate::assert_flag(y_lab_add_unit, null.ok = TRUE)
  checkmate::assert_flag(subtitle_add_unit, null.ok = TRUE)
  if ((!is.null(y_lab) && y_lab_add_unit) || (!is.null(subtitle) && subtitle_add_unit)) {
    checkmate::assert_false(is.na(y_unit))
    checkmate::assert_scalar(unique(df[[y_unit]]))
  }

  if (!is.null(strata) && !is.null(alt_counts_df)) {
    checkmate::assert_set_equal(unique(alt_counts_df[[strata]]), unique(df[[strata]]))
  }

  ####################################### |
  # ---- Compute required statistics ----
  ####################################### |
  if (!is.null(strata)) {
    df_grp <- tidyr::expand(df, .data[[strata]], .data[[x]]) # expand based on levels of factors
  } else {
    df_grp <- tidyr::expand(df, NULL, .data[[x]])
  }
  df_grp <- df_grp %>%
    dplyr::full_join(y = df[, c(strata, x, y)], by = c(strata, x), multiple = "all") %>%
    dplyr::group_by_at(c(strata, x))

  df_stats <- df_grp %>%
    dplyr::summarise(
      data.frame(t(do.call(c, unname(sfun(.data[[y]], ...)[c(mid, interval)])))),
      .groups = "drop"
    )

  df_stats <- df_stats %>% dplyr::filter(!is.na(mid))

  # add number of objects N in strata
  if (!is.null(strata) && !is.null(alt_counts_df)) {
    strata_N <- paste0(strata, "_N") # nolint

    df_N <- as.data.frame(table(alt_counts_df[[strata]], exclude = c(NA, NaN, Inf))) # nolint
    colnames(df_N) <- c(strata, "N") # nolint
    df_N[[strata_N]] <- paste0(df_N[[strata]], " (N = ", df_N$N, ")") # nolint

    # strata_N should not be in clonames(df_stats)
    checkmate::assert_disjunct(strata_N, colnames(df_stats))

    df_stats <- merge(x = df_stats, y = df_N[, c(strata, strata_N)], by = strata)
  } else if (!is.null(strata)) {
    strata_N <- strata # nolint
  } else {
    strata_N <- NULL # nolint
  }

  ############################################### |
  # ---- Prepare certain plot's properties. ----
  ############################################### |
  # legend title
  if (is.null(legend_title) && !is.null(strata) && legend_position != "none") {
    legend_title <- attr(df[[strata]], "label")
  }

  # y label
  if (!is.null(y_lab)) {
    if (y_lab_add_paramcd) {
      y_lab <- paste(y_lab, unique(df[[paramcd]]))
    }

    if (y_lab_add_unit) {
      y_lab <- paste0(y_lab, " (", unique(df[[y_unit]]), ")")
    }

    y_lab <- trimws(y_lab)
  }

  # subtitle
  if (!is.null(subtitle)) {
    if (subtitle_add_paramcd) {
      subtitle <- paste(subtitle, unique(df[[paramcd]]))
    }

    if (subtitle_add_unit) {
      subtitle <- paste0(subtitle, " (", unique(df[[y_unit]]), ")")
    }

    subtitle <- trimws(subtitle)
  }

  ############################### |
  # ---- Build plot object. ----
  ############################### |
  p <- ggplot2::ggplot(
    data = df_stats,
    mapping = ggplot2::aes(
      x = .data[[x]], y = .data[[mid]],
      color = if (is.null(strata_N)) NULL else .data[[strata_N]],
      shape = if (is.null(strata_N)) NULL else .data[[strata_N]],
      lty = if (is.null(strata_N)) NULL else .data[[strata_N]],
      group = if (is.null(strata_N)) NULL else .data[[strata_N]]
    )
  )

  if (!is.null(mid)) {
    # points
    if (grepl("p", mid_type, fixed = TRUE)) {
      p <- p + ggplot2::geom_point(position = position, size = mid_point_size, na.rm = TRUE)
    }

    # lines
    # further conditions in if are to ensure that not all of the groups consist of only one observation
    if (grepl("l", mid_type, fixed = TRUE) &&
      !is.null(strata) &&
      !all(dplyr::summarise(df_grp, count_n = dplyr::n())[["count_n"]] == 1L)) {
      p <- p + ggplot2::geom_line(position = position, na.rm = TRUE)
    }
  }

  # interval
  if (!is.null(interval)) {
    p <- p +
      ggplot2::geom_errorbar(
        ggplot2::aes(ymin = .data[[whiskers[1]]], ymax = .data[[whiskers[max(1, length(whiskers))]]]),
        width = 0.45,
        position = position
      )

    if (length(whiskers) == 1) { # lwr or upr only; mid is then required
      # workaround as geom_errorbar does not provide single-direction whiskers
      p <- p +
        ggplot2::geom_linerange(
          data = df_stats[!is.na(df_stats[[whiskers]]), ], # as na.rm =TRUE does not suppress warnings
          ggplot2::aes(ymin = .data[[mid]], ymax = .data[[whiskers]]),
          position = position,
          na.rm = TRUE,
          show.legend = FALSE
        )
    }
  }

  p <- p +
    ggplot2::scale_y_continuous(labels = scales::comma, expand = ggplot2::expansion(c(0.25, .25))) +
    ggplot2::labs(
      title = title,
      subtitle = subtitle,
      caption = caption,
      color = legend_title,
      lty = legend_title,
      shape = legend_title,
      x = attr(df[[x]], "label"),
      y = y_lab
    )

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_color_manual(values = col)
  }

  if (!is.null(ggtheme)) {
    p <- p + ggtheme
  } else {
    p <- p +
      ggplot2::theme_bw() +
      ggplot2::theme(
        legend.key.width = grid::unit(1, "cm"),
        legend.position = legend_position,
        legend.direction = ifelse(
          legend_position %in% c("top", "bottom"),
          "horizontal",
          "vertical"
        )
      )
  }

  ############################################################# |
  # ---- Optionally, add table to the bottom of the plot. ----
  ############################################################# |
  if (!is.null(table)) {
    df_stats_table <- df_grp %>%
      dplyr::summarise(
        h_format_row(
          x = sfun(.data[[y]], ...)[table],
          format = table_format,
          labels = table_labels
        ),
        .groups = "drop"
      )

    stats_lev <- rev(setdiff(colnames(df_stats_table), c(strata, x)))

    df_stats_table <- df_stats_table %>%
      tidyr::pivot_longer(
        cols = -dplyr::all_of(c(strata, x)),
        names_to = "stat",
        values_to = "value",
        names_ptypes = list(stat = factor(levels = stats_lev))
      )

    tbl <- ggplot2::ggplot(
      df_stats_table,
      ggplot2::aes(x = .data[[x]], y = .data[["stat"]], label = .data[["value"]])
    ) +
      ggplot2::geom_text(size = table_font_size) +
      ggplot2::theme_bw() +
      ggplot2::theme(
        panel.border = ggplot2::element_blank(),
        panel.grid.major = ggplot2::element_blank(),
        panel.grid.minor = ggplot2::element_blank(),
        axis.ticks = ggplot2::element_blank(),
        axis.title = ggplot2::element_blank(),
        axis.text.x = ggplot2::element_blank(),
        axis.text.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 5)),
        strip.text = ggplot2::element_text(hjust = 0),
        strip.text.x = ggplot2::element_text(margin = ggplot2::margin(1.5, 0, 1.5, 0, "pt")),
        strip.background = ggplot2::element_rect(fill = "grey95", color = NA),
        legend.position = "none"
      )

    if (!is.null(strata)) {
      tbl <- tbl + ggplot2::facet_wrap(facets = strata, ncol = 1)
    }

    # align plot and table
    cowplot::plot_grid(p, tbl, ncol = 1)
  } else {
    p
  }
}

#' Helper function to get the right formatting in the optional table in g_lineplot.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param x (named `list`)\cr list of numerical values to be formatted and optionally labeled.
#'   Elements of `x` must be `numeric` vectors.
#' @param format (named `character` or `NULL`)\cr format patterns for `x`. Names of the `format` must
#'   match the names of `x`. This parameter is passed directly to the `rtables::format_rcell`
#'   function through the `format` parameter.
#' @param labels (named `character` or `NULL`)\cr optional labels for `x`. Names of the `labels` must
#'   match the names of `x`. When a label is not specified for an element of `x`,
#'   then this function tries to use `label` or `names` (in this order) attribute of that element
#'   (depending on which one exists and it is not `NULL` or `NA` or `NaN`). If none of these attributes
#'   are attached to a given element of `x`, then the label is automatically generated.
#'
#' @return A single row `data.frame` object.
#'
#' @examples
#' mean_ci <- c(48, 51)
#' x <- list(mean = 50, mean_ci = mean_ci)
#' format <- c(mean = "xx.x", mean_ci = "(xx.xx, xx.xx)")
#' labels <- c(mean = "My Mean")
#' h_format_row(x, format, labels)
#'
#' attr(mean_ci, "label") <- "Mean 95% CI"
#' x <- list(mean = 50, mean_ci = mean_ci)
#' h_format_row(x, format, labels)
#'
#' @export
h_format_row <- function(x, format, labels = NULL) {
  # cell: one row, one column data.frame
  format_cell <- function(x, format, label = NULL) {
    fc <- format_rcell(x = x, format = format)
    if (is.na(fc)) {
      fc <- "NA"
    }
    x_label <- attr(x, "label")
    if (!is.null(label) && !is.na(label)) {
      names(fc) <- label
    } else if (!is.null(x_label) && !is.na(x_label)) {
      names(fc) <- x_label
    } else if (length(x) == length(fc)) {
      names(fc) <- names(x)
    }
    as.data.frame(t(fc))
  }

  row <- do.call(
    cbind,
    lapply(
      names(x), function(xn) format_cell(x[[xn]], format = format[xn], label = labels[xn])
    )
  )

  row
}

#' Control Function for g_lineplot Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Default values for `variables` parameter in `g_lineplot` function.
#' A variable's default value can be overwritten for any variable.
#'
#' @param x (`character`)\cr x variable name.
#' @param y (`character`)\cr y variable name.
#' @param strata (`character` or `NA`)\cr strata variable name.
#' @param paramcd (`character` or `NA`)\cr paramcd variable name.
#' @param y_unit (`character` or `NA`)\cr y_unit variable name.
#'
#' @return A named character vector of variable names.
#'
#' @examples
#' control_lineplot_vars()
#' control_lineplot_vars(strata = NA)
#'
#' @export
control_lineplot_vars <- function(x = "AVISIT", y = "AVAL", strata = "ARM", paramcd = "PARAMCD", y_unit = "AVALU") {
  checkmate::assert_string(x)
  checkmate::assert_string(y)
  checkmate::assert_string(strata, na.ok = TRUE)
  checkmate::assert_string(paramcd, na.ok = TRUE)
  checkmate::assert_string(y_unit, na.ok = TRUE)

  variables <- c(x = x, y = y, strata = strata, paramcd = paramcd, y_unit = y_unit)
  return(variables)
}

#' Confidence Interval for Mean
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
#' geometric mean. It can be used as a `ggplot` helper function for plotting.
#'
#' @inheritParams argument_convention
#' @param n_min (`number`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
#' @param geom_mean (`logical`)\cr `TRUE` when the geometric mean should be calculated.
#'
#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
#'
#' @examples
#' stat_mean_ci(sample(10), gg_helper = FALSE)
#'
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
#'   ggplot2::geom_point()
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   geom = "errorbar"
#' )
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   fun.args = list(conf_level = 0.5),
#'   geom = "errorbar"
#' )
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   fun.args = list(conf_level = 0.5, geom_mean = TRUE),
#'   geom = "errorbar"
#' )
#'
#' @export
stat_mean_ci <- function(x,
                         conf_level = 0.95,
                         na.rm = TRUE, # nolint
                         n_min = 2,
                         gg_helper = TRUE,
                         geom_mean = FALSE) {
  if (na.rm) {
    x <- stats::na.omit(x)
  }
  n <- length(x)

  if (!geom_mean) {
    m <- mean(x)
  } else {
    negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0)
    if (negative_values_exist) {
      m <- NA_real_
    } else {
      x <- log(x)
      m <- mean(x)
    }
  }

  if (n < n_min || is.na(m)) {
    ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_)
  } else {
    hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n)
    ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci)
    if (geom_mean) {
      ci <- exp(ci)
    }
  }

  if (gg_helper) {
    m <- ifelse(is.na(m), NA_real_, m)
    ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]])
  }

  return(ci)
}

#' Confidence Interval for Median
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
#' function for plotting.
#'
#' @inheritParams argument_convention
#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
#'
#' @details The function was adapted from `DescTools/versions/0.99.35/source`
#'
#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
#'
#' @examples
#' stat_median_ci(sample(10), gg_helper = FALSE)
#'
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
#'   ggplot2::geom_point()
#' p + ggplot2::stat_summary(
#'   fun.data = stat_median_ci,
#'   geom = "errorbar"
#' )
#'
#' @export
stat_median_ci <- function(x,
                           conf_level = 0.95,
                           na.rm = TRUE, # nolint
                           gg_helper = TRUE) {
  x <- unname(x)
  if (na.rm) {
    x <- x[!is.na(x)]
  }
  n <- length(x)
  med <- stats::median(x)

  k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE)

  # k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
  if (k == 0 || is.na(med)) {
    ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_)
    empir_conf_level <- NA_real_
  } else {
    x_sort <- sort(x)
    ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1])
    empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5)
  }

  if (gg_helper) {
    ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]])
  }

  attr(ci, "conf_level") <- empir_conf_level

  return(ci)
}

#' p-Value of the Mean
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the two-sided p-value of the mean.
#'
#' @inheritParams argument_convention
#' @param n_min (`numeric`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
#' @param test_mean (`numeric`)\cr mean value to test under the null hypothesis.
#'
#' @return A p-value.
#'
#' @examples
#' stat_mean_pval(sample(10))
#'
#' stat_mean_pval(rnorm(10), test_mean = 0.5)
#'
#' @export
stat_mean_pval <- function(x,
                           na.rm = TRUE, # nolint
                           n_min = 2,
                           test_mean = 0) {
  if (na.rm) {
    x <- stats::na.omit(x)
  }
  n <- length(x)

  x_mean <- mean(x)
  x_sd <- stats::sd(x)

  if (n < n_min) {
    pv <- c(p_value = NA_real_)
  } else {
    x_se <- stats::sd(x) / sqrt(n)
    ttest <- (x_mean - test_mean) / x_se
    pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1))
  }

  return(pv)
}

#' Estimation of Proportions per Level of Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the proportion along with confidence interval of a proportion
#' regarding the level of a factor.
#'
#' @inheritParams argument_convention
#'
#' @seealso Relevant description function [d_onco_rsp_label()].
#'
#' @name estimate_multinomial_rsp
NULL

#' Description of Standard Oncology Response
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Describe the oncology response in a standard way.
#'
#' @param x (`character`)\cr the standard oncology code to be described.
#'
#' @return Response labels.
#'
#' @seealso [estimate_multinomial_rsp()]
#'
#' @examples
#' d_onco_rsp_label(
#'   c("CR", "PR", "SD", "NON CR/PD", "PD", "NE", "Missing", "<Missing>", "NE/Missing")
#' )
#'
#' # Adding some values not considered in d_onco_rsp_label
#'
#' d_onco_rsp_label(
#'   c("CR", "PR", "hello", "hi")
#' )
#'
#' @export
d_onco_rsp_label <- function(x) {
  x <- as.character(x)
  desc <- c(
    CR           = "Complete Response (CR)",
    PR           = "Partial Response (PR)",
    MR           = "Minimal/Minor Response (MR)",
    MRD          = "Minimal Residual Disease (MRD)",
    SD           = "Stable Disease (SD)",
    PD           = "Progressive Disease (PD)",
    `NON CR/PD`  = "Non-CR or Non-PD (NON CR/PD)",
    NE           = "Not Evaluable (NE)",
    `NE/Missing` = "Missing or unevaluable",
    Missing      = "Missing",
    `NA`         = "Not Applicable (NA)",
    ND           = "Not Done (ND)"
  )

  values_label <- vapply(
    X = x,
    FUN.VALUE = character(1),
    function(val) {
      if (val %in% names(desc)) desc[val] else val
    }
  )

  return(factor(values_label, levels = c(intersect(desc, values_label), setdiff(values_label, desc))))
}

#' @describeIn estimate_multinomial_rsp Statistics function which feeds the length of `x` as number
#'   of successes, and `.N_col` as total number of successes and failures into [s_proportion()].
#'
#' @return
#' * `s_length_proportion()` returns statistics from [s_proportion()].
#'
#' @examples
#' s_length_proportion(rep("CR", 10), .N_col = 100)
#' s_length_proportion(factor(character(0)), .N_col = 100)
#'
#' @export
s_length_proportion <- function(x,
                                .N_col, # nolint
                                ...) {
  checkmate::assert_multi_class(x, classes = c("factor", "character"))
  checkmate::assert_vector(x, min.len = 0, max.len = .N_col)
  checkmate::assert_vector(unique(x), min.len = 0, max.len = 1)

  n_true <- length(x)
  n_false <- .N_col - n_true
  x_logical <- rep(c(TRUE, FALSE), c(n_true, n_false))
  s_proportion(df = x_logical, ...)
}

#' @describeIn estimate_multinomial_rsp Formatted analysis function which is used as `afun`
#'   in `estimate_multinomial_response()`.
#'
#' @return
#' * `a_length_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_length_proportion(rep("CR", 10), .N_col = 100)
#' a_length_proportion(factor(character(0)), .N_col = 100)
#'
#' @export
a_length_proportion <- make_afun(
  s_length_proportion,
  .formats = c(
    n_prop = "xx (xx.x%)",
    prop_ci = "(xx.xx, xx.xx)"
  )
)

#' @describeIn estimate_multinomial_rsp Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()] and
#'   [rtables::summarize_row_groups()].
#'
#' @return
#' * `estimate_multinomial_response()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_length_proportion()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' # Use of the layout creating function.
#' dta_test <- data.frame(
#'   USUBJID = paste0("S", 1:12),
#'   ARM     = factor(rep(LETTERS[1:3], each = 4)),
#'   AVAL    = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
#' ) %>% mutate(
#'   AVALC = factor(AVAL,
#'     levels = c(0, 1),
#'     labels = c("Complete Response (CR)", "Partial Response (PR)")
#'   )
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   estimate_multinomial_response(var = "AVALC")
#'
#' tbl <- build_table(lyt, dta_test)
#'
#' html <- as_html(tbl)
#' html
#' \dontrun{
#' Viewer(html)
#' }
#'
#' @export
estimate_multinomial_response <- function(lyt,
                                          var,
                                          ...,
                                          show_labels = "hidden",
                                          table_names = var,
                                          .stats = "prop_ci",
                                          .formats = NULL,
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  afun <- make_afun(
    a_length_proportion,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  lyt <- split_rows_by(lyt, var = var)
  lyt <- summarize_row_groups(lyt)

  analyze(
    lyt,
    vars = var,
    afun = afun,
    show_labels = show_labels,
    table_names = table_names,
    extra_args = list(...)
  )
}

#' Control Function for Descriptive Statistics
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify
#' details for [s_summary()].
#'
#' @inheritParams argument_convention
#' @param quantiles (`numeric`)\cr of length two to specify the quantiles to calculate.
#' @param quantile_type (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used.
#'   Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`.
#'   This differs from R's default. See more about `type` in [stats::quantile()].
#' @param test_mean (`numeric`)\cr to test against the mean under the null hypothesis when calculating p-value.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @export
control_summarize_vars <- function(conf_level = 0.95,
                                   quantiles = c(0.25, 0.75),
                                   quantile_type = 2,
                                   test_mean = 0) {
  checkmate::assert_vector(quantiles, len = 2)
  checkmate::assert_int(quantile_type, lower = 1, upper = 9)
  checkmate::assert_numeric(test_mean)
  nullo <- lapply(quantiles, assert_proportion_value)
  assert_proportion_value(conf_level)
  list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean)
}

#' Format Function for Descriptive Statistics
#'
#' Returns format patterns for descriptive statistics. The format is understood by the `rtables`.
#'
#' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported.
#'
#' @return A named `vector` of default statistic formats for the given data type.
#'
#' @keywords internal
summary_formats <- function(type = "numeric") {
  if (type == "counts") {
    c(
      n = "xx.",
      count = "xx.",
      count_fraction = format_count_fraction,
      n_blq = "xx."
    )
  } else {
    c(
      n = "xx.",
      sum = "xx.x",
      mean = "xx.x",
      sd = "xx.x",
      se = "xx.x",
      mean_sd = "xx.x (xx.x)",
      mean_se = "xx.x (xx.x)",
      mean_ci = "(xx.xx, xx.xx)",
      mean_sei = "(xx.xx, xx.xx)",
      mean_sdi = "(xx.xx, xx.xx)",
      mean_pval = "xx.xx",
      median = "xx.x",
      mad = "xx.x",
      median_ci = "(xx.xx, xx.xx)",
      quantiles = "xx.x - xx.x",
      iqr = "xx.x",
      range = "xx.x - xx.x",
      cv = "xx.x",
      min = "xx.x",
      max = "xx.x",
      median_range = "xx.x (xx.x - xx.x)",
      geom_mean = "xx.x",
      geom_cv = "xx.x"
    )
  }
}

#' Label Function for Descriptive Statistics
#'
#' Returns labels of descriptive statistics for numeric variables.
#'
#' @return A named `vector` of default statistic labels.
#'
#' @keywords internal
summary_labels <- function() {
  c(
    mean = "Mean",
    sum = "Sum",
    sd = "SD",
    se = "SE",
    mean_sd = "Mean (SD)",
    mean_se = "Mean (SE)",
    median = "Median",
    mad = "Median Absolute Deviation",
    iqr = "IQR",
    range = "Min - Max",
    median_range = "Median (Min - Max)",
    cv = "CV (%)",
    min = "Minimum",
    max = "Maximum",
    geom_mean = "Geometric Mean",
    geom_cv = "CV % Geometric Mean",
    n = "n"
  )
}

#' Summarize Variables
#'
#' @description `r lifecycle::badge("stable")`
#'
#' We use the S3 generic function [s_summary()] to implement summaries for different `x` objects. This
#' is used as a statistics function in combination with the analyze function [summarize_vars()].
#'
#' @inheritParams argument_convention
#'
#' @name summarize_variables
NULL

#' @describeIn summarize_variables S3 generic function to produces a variable summary.
#'
#' @return
#' * `s_summary()` returns different statistics depending on the class of `x`.
#'
#' @export
s_summary <- function(x,
                      na.rm = TRUE, # nolint
                      denom,
                      .N_row, # nolint
                      .N_col, # nolint
                      na_level,
                      .var,
                      ...) {
  checkmate::assert_flag(na.rm)
  UseMethod("s_summary", x)
}

#' @describeIn summarize_variables Method for `numeric` class.
#'
#' @param control (`list`)\cr parameters for descriptive statistics details, specified by using
#'   the helper function [control_summarize_vars()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for mean and median.
#'   * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles.
#'   * `quantile_type` (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used.
#'     See more about `type` in [stats::quantile()].
#'   * `test_mean` (`numeric`)\cr value to test against the mean under the null hypothesis when calculating p-value.
#'
#' @return
#'   * If `x` is of class `numeric`, returns a `list` with the following named `numeric` items:
#'     * `n`: The [length()] of `x`.
#'     * `sum`: The [sum()] of `x`.
#'     * `mean`: The [mean()] of `x`.
#'     * `sd`: The [stats::sd()] of `x`.
#'     * `se`: The standard error of `x` mean, i.e.: (`sd(x) / sqrt(length(x))`).
#'     * `mean_sd`: The [mean()] and [stats::sd()] of `x`.
#'     * `mean_se`: The [mean()] of `x` and its standard error (see above).
#'     * `mean_ci`: The CI for the mean of `x` (from [stat_mean_ci()]).
#'     * `mean_sei`: The SE interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()] / [sqrt()]).
#'     * `mean_sdi`: The SD interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()]).
#'     * `mean_pval`: The two-sided p-value of the mean of `x` (from [stat_mean_pval()]).
#'     * `median`: The [stats::median()] of `x`.
#'     * `mad`: The median absolute deviation of `x`, i.e.: ([stats::median()] of `xc`,
#'       where `xc` = `x` - [stats::median()]).
#'     * `median_ci`: The CI for the median of `x` (from [stat_median_ci()]).
#'     * `quantiles`: Two sample quantiles of `x` (from [stats::quantile()]).
#'     * `iqr`: The [stats::IQR()] of `x`.
#'     * `range`: The [range_noinf()] of `x`.
#'     * `min`: The [max()] of `x`.
#'     * `max`: The [min()] of `x`.
#'     * `median_range`: The [median()] and [range_noinf()] of `x`.
#'     * `cv`: The coefficient of variation of `x`, i.e.: ([stats::sd()] / [mean()] * 100).
#'     * `geom_mean`: The geometric mean of `x`, i.e.: (`exp(mean(log(x)))`).
#'     * `geom_cv`: The geometric coefficient of variation of `x`, i.e.: (`sqrt(exp(sd(log(x)) ^ 2) - 1) * 100`).
#'
#' @note
#' * If `x` is an empty vector, `NA` is returned. This is the expected feature so as to return `rcell` content in
#'   `rtables` when the intersection of a column and a row delimits an empty data selection.
#' * When the `mean` function is applied to an empty vector, `NA` will be returned instead of `NaN`, the latter
#'   being standard behavior in R.
#'
#' @method s_summary numeric
#'
#' @examples
#' # `s_summary.numeric`
#'
#' ## Basic usage: empty numeric returns NA-filled items.
#' s_summary(numeric())
#'
#' ## Management of NA values.
#' x <- c(NA_real_, 1)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' x <- c(NA_real_, 1, 2)
#' s_summary(x, stats = NULL)
#'
#' ## Benefits in `rtables` contructions:
#' require(rtables)
#' dta_test <- data.frame(
#'   Group = rep(LETTERS[1:3], each = 2),
#'   sub_group = rep(letters[1:2], each = 3),
#'   x = 1:6
#' )
#'
#' ## The summary obtained in with `rtables`:
#' basic_table() %>%
#'   split_cols_by(var = "Group") %>%
#'   split_rows_by(var = "sub_group") %>%
#'   analyze(vars = "x", afun = s_summary) %>%
#'   build_table(df = dta_test)
#'
#' ## By comparison with `lapply`:
#' X <- split(dta_test, f = with(dta_test, interaction(Group, sub_group)))
#' lapply(X, function(x) s_summary(x$x))
#'
#' @export
s_summary.numeric <- function(x,
                              na.rm = TRUE, # nolint
                              denom,
                              .N_row, # nolint
                              .N_col, # nolint
                              na_level,
                              .var,
                              control = control_summarize_vars(),
                              ...) {
  checkmate::assert_numeric(x)

  if (na.rm) {
    x <- x[!is.na(x)]
  }

  y <- list()

  y$n <- c("n" = length(x))

  y$sum <- c("sum" = ifelse(length(x) == 0, NA_real_, sum(x, na.rm = FALSE)))

  y$mean <- c("mean" = ifelse(length(x) == 0, NA_real_, mean(x, na.rm = FALSE)))

  y$sd <- c("sd" = stats::sd(x, na.rm = FALSE))

  y$se <- c("se" = stats::sd(x, na.rm = FALSE) / sqrt(length(stats::na.omit(x))))

  y$mean_sd <- c(y$mean, "sd" = stats::sd(x, na.rm = FALSE))

  y$mean_se <- c(y$mean, y$se)

  mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
  y$mean_ci <- formatters::with_label(mean_ci, paste("Mean", f_conf_level(control$conf_level)))

  mean_sei <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) / sqrt(y$n)
  names(mean_sei) <- c("mean_sei_lwr", "mean_sei_upr")
  y$mean_sei <- formatters::with_label(mean_sei, "Mean -/+ 1xSE")

  mean_sdi <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE)
  names(mean_sdi) <- c("mean_sdi_lwr", "mean_sdi_upr")
  y$mean_sdi <- formatters::with_label(mean_sdi, "Mean -/+ 1xSD")

  mean_pval <- stat_mean_pval(x, test_mean = control$test_mean, na.rm = FALSE, n_min = 2)
  y$mean_pval <- formatters::with_label(mean_pval, paste("Mean", f_pval(control$test_mean)))

  y$median <- c("median" = stats::median(x, na.rm = FALSE))

  y$mad <- c("mad" = stats::median(x - y$median, na.rm = FALSE))

  median_ci <- stat_median_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
  y$median_ci <- formatters::with_label(median_ci, paste("Median", f_conf_level(control$conf_level)))

  q <- control$quantiles
  if (any(is.na(x))) {
    qnts <- rep(NA_real_, length(q))
  } else {
    qnts <- stats::quantile(x, probs = q, type = control$quantile_type, na.rm = FALSE)
  }
  names(qnts) <- paste("quantile", q, sep = "_")
  y$quantiles <- formatters::with_label(qnts, paste0(paste(paste0(q * 100, "%"), collapse = " and "), "-ile"))

  y$iqr <- c("iqr" = ifelse(
    any(is.na(x)),
    NA_real_,
    stats::IQR(x, na.rm = FALSE, type = control$quantile_type)
  ))

  y$range <- stats::setNames(range_noinf(x, na.rm = FALSE), c("min", "max"))
  y$min <- y$range[1]
  y$max <- y$range[2]

  y$median_range <- formatters::with_label(c(y$median, y$range), "Median (Min - Max)")

  y$cv <- c("cv" = unname(y$sd) / unname(y$mean) * 100)

  # Convert negative values to NA for log calculation.
  x_no_negative_vals <- x
  x_no_negative_vals[x_no_negative_vals <= 0] <- NA
  y$geom_mean <- c("geom_mean" = exp(mean(log(x_no_negative_vals), na.rm = FALSE)))
  geom_mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE, geom_mean = TRUE)
  y$geom_mean_ci <- formatters::with_label(geom_mean_ci, paste("Geometric Mean", f_conf_level(control$conf_level)))

  y$geom_cv <- c("geom_cv" = sqrt(exp(stats::sd(log(x_no_negative_vals), na.rm = FALSE) ^ 2) - 1) * 100) # styler: off

  y
}

#' @describeIn summarize_variables Method for `factor` class.
#'
#' @param denom (`string`)\cr choice of denominator for factor proportions. Options are:
#'   * `n`: number of values in this row and column intersection.
#'   * `N_row`: total number of values in this row across columns.
#'   * `N_col`: total number of values in this column across rows.
#'
#' @return
#'   * If `x` is of class `factor` or converted from `character`, returns a `list` with named `numeric` items:
#'     * `n`: The [length()] of `x`.
#'     * `count`: A list with the number of cases for each level of the factor `x`.
#'     * `count_fraction`: Similar to `count` but also includes the proportion of cases for each level of the
#'       factor `x` relative to the denominator, or `NA` if the denominator is zero.
#'
#' @note
#' * If `x` is an empty `factor`, a list is still returned for `counts` with one element
#'   per factor level. If there are no levels in `x`, the function fails.
#' * If `x` contains `NA`, it is expected that `NA` have been conveyed to `na_level`
#'   appropriately beforehand with [df_explicit_na()] or [explicit_na()].
#'
#' @method s_summary factor
#'
#' @examples
#' # `s_summary.factor`
#'
#' ## Basic usage:
#' s_summary(factor(c("a", "a", "b", "c", "a")))
#' # Empty factor returns NA-filled items.
#' s_summary(factor(levels = c("a", "b", "c")))
#'
#' ## Management of NA values.
#' x <- factor(c(NA, "Female"))
#' x <- explicit_na(x)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' ## Different denominators.
#' x <- factor(c("a", "a", "b", "c", "a"))
#' s_summary(x, denom = "N_row", .N_row = 10L)
#' s_summary(x, denom = "N_col", .N_col = 20L)
#'
#' @export
s_summary.factor <- function(x,
                             na.rm = TRUE, # nolint
                             denom = c("n", "N_row", "N_col"),
                             .N_row, # nolint
                             .N_col, # nolint
                             na_level = "<Missing>",
                             ...) {
  assert_valid_factor(x, any.missing = FALSE)
  denom <- match.arg(denom)

  if (na.rm) x <- fct_discard(x, na_level)

  y <- list()

  y$n <- length(x)

  y$count <- as.list(table(x, useNA = "ifany"))
  dn <- switch(denom,
    n = length(x),
    N_row = .N_row,
    N_col = .N_col
  )
  y$count_fraction <- lapply(
    y$count,
    function(x) {
      c(x, ifelse(dn > 0, x / dn, 0))
    }
  )

  y$n_blq <- sum(grepl("BLQ|LTR|<[1-9]", x))

  y
}

#' @describeIn summarize_variables Method for `character` class. This makes an automatic
#'   conversion to factor (with a warning) and then forwards to the method for factors.
#'
#' @param verbose (`logical`)\cr Defaults to `TRUE`, which prints out warnings and messages. It is mainly used
#'   to print out information about factor casting.
#'
#' @note
#' * Automatic conversion of character to factor does not guarantee that the table
#'   can be generated correctly. In particular for sparse tables this very likely can fail.
#'   It is therefore better to always pre-process the dataset such that factors are manually
#'   created from character variables before passing the dataset to [rtables::build_table()].
#'
#' @method s_summary character
#'
#' @examples
#' # `s_summary.character`
#'
#' ## Basic usage:
#' s_summary(c("a", "a", "b", "c", "a"), .var = "x", verbose = FALSE)
#' s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na.rm = FALSE, verbose = FALSE)
#'
#' @export
s_summary.character <- function(x,
                                na.rm = TRUE, # nolint
                                denom = c("n", "N_row", "N_col"),
                                .N_row, # nolint
                                .N_col, # nolint
                                na_level = "<Missing>",
                                .var,
                                verbose = TRUE,
                                ...) {
  y <- as_factor_keep_attributes(x, x_name = .var, na_level = na_level, verbose = verbose)
  s_summary(
    x = y,
    na.rm = na.rm,
    na_level = na_level,
    denom = denom,
    .N_row = .N_row,
    .N_col = .N_col,
    ...
  )
}

#' @describeIn summarize_variables Method for `logical` class.
#'
#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
#'   * `n`: number of values in this row and column intersection.
#'   * `N_row`: total number of values in this row across columns.
#'   * `N_col`: total number of values in this column across rows.
#'
#' @return
#'   * If `x` is of class `logical`, returns a `list` with named `numeric` items:
#'     * `n`: The [length()] of `x` (possibly after removing `NA`s).
#'     * `count`: Count of `TRUE` in `x`.
#'     * `count_fraction`: Count and proportion of `TRUE` in `x` relative to the denominator, or `NA` if the
#'       denominator is zero. Note that `NA`s in `x` are never counted or leading to `NA` here.
#'
#' @method s_summary logical
#'
#' @examples
#' # `s_summary.logical`
#'
#' ## Basic usage:
#' s_summary(c(TRUE, FALSE, TRUE, TRUE))
#'
#' ## Management of NA values.
#' x <- c(NA, TRUE, FALSE)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' ## Different denominators.
#' x <- c(TRUE, FALSE, TRUE, TRUE)
#' s_summary(x, denom = "N_row", .N_row = 10L)
#' s_summary(x, denom = "N_col", .N_col = 20L)
#'
#' @export
s_summary.logical <- function(x,
                              na.rm = TRUE, # nolint
                              denom = c("n", "N_row", "N_col"),
                              .N_row, # nolint
                              .N_col, # nolint
                              ...) {
  denom <- match.arg(denom)
  if (na.rm) x <- x[!is.na(x)]
  y <- list()
  y$n <- length(x)
  count <- sum(x, na.rm = TRUE)
  dn <- switch(denom,
    n = length(x),
    N_row = .N_row,
    N_col = .N_col
  )
  y$count <- count
  y$count_fraction <- c(count, ifelse(dn > 0, count / dn, NA))
  y$n_blq <- 0L
  y
}

#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()`.
#'
#' @return
#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_summary <- function(x,
                      ...,
                      .N_row, # nolint
                      .N_col, # nolint
                      .var) {
  UseMethod("a_summary", x)
}

.a_summary_numeric_formats <- summary_formats()
.a_summary_numeric_labels <- summary_labels()

#' @describeIn summarize_variables Formatted analysis function method for `numeric` class.
#'
#' @examples
#' # `a_summary.numeric`
#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
#'
#' @export
a_summary.numeric <- make_afun(
  s_summary.numeric,
  .formats = .a_summary_numeric_formats,
  .labels = .a_summary_numeric_labels
)

.a_summary_counts_formats <- summary_formats(type = "counts")

#' @describeIn summarize_variables Formatted analysis function method for `factor` class.
#'
#' @examples
#' # `a_summary.factor`
#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting
#' # functions can be applied correctly.
#' afun <- make_afun(
#'   getS3method("a_summary", "factor"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
#'
#' @export
a_summary.factor <- make_afun(
  s_summary.factor,
  .formats = .a_summary_counts_formats
)

#' @describeIn summarize_variables Formatted analysis function method for `character` class.
#'
#' @examples
#' # `a_summary.character`
#' afun <- make_afun(
#'   getS3method("a_summary", "character"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
#'
#' @export
a_summary.character <- make_afun(
  s_summary.character,
  .formats = .a_summary_counts_formats
)

#' @describeIn summarize_variables Formatted analysis function method for `logical` class.
#'
#' @examples
#' # `a_summary.logical`
#' afun <- make_afun(
#'   getS3method("a_summary", "logical")
#' )
#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
#'
#' @export
a_summary.logical <- make_afun(
  s_summary.logical,
  .formats = .a_summary_counts_formats
)

#' Constructor Function for [summarize_vars()] and [summarize_colvars()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor function which creates a combined formatted analysis function.
#'
#' @inheritParams argument_convention
#'
#' @return Combined formatted analysis function for use in [summarize_vars()].
#'
#' @note Since [a_summary()] is generic and we want customization of the formatting arguments
#'   via [rtables::make_afun()], we need to create another temporary generic function, with
#'   corresponding customized methods. Then in order for the methods to be found,
#'   we need to wrap them in a combined `afun`. Since this is required by two layout creating
#'   functions (and possibly others in the future), we provide a constructor that does this:
#'   [create_afun_summary()].
#'
#' @examples
#' # `create_afun_summary()` to create combined `afun`
#'
#' afun <- create_afun_summary(
#'   .stats = NULL,
#'   .formats = c(median = "xx."),
#'   .labels = c(median = "My median"),
#'   .indent_mods = c(median = 1L)
#' )
#' ## Fabricated dataset.
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT  = rep(paste0("V", 1:3), 6),
#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL    = c(9:1, rep(NA, 9))
#' )
#'
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze(vars = "AVAL", afun = afun)
#'
#' build_table(l, df = dta_test)
#'
#' @export
create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
  function(x,
           ...,
           .N_row, # nolint
           .N_col, # nolint
           .var) {
    afun <- function(x, ...) {
      UseMethod("afun", x)
    }

    numeric_stats <- afun_selected_stats(
      .stats,
      all_stats = names(.a_summary_numeric_formats)
    )
    afun.numeric <- make_afun( # nolint
      a_summary.numeric,
      .stats = numeric_stats,
      .formats = extract_by_name(.formats, numeric_stats),
      .labels = extract_by_name(.labels, numeric_stats),
      .indent_mods = extract_by_name(.indent_mods, numeric_stats)
    )

    factor_stats <- afun_selected_stats(.stats, c("n", "count", "count_fraction"))
    ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction"))
    afun.factor <- make_afun( # nolint
      a_summary.factor,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats
    )

    afun.character <- make_afun( # nolint
      a_summary.character,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats
    )

    afun.logical <- make_afun( # nolint
      a_summary.logical,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats)
    )

    afun(
      x = x,
      ...,
      .N_row = .N_row,
      .N_col = .N_col,
      .var = .var
    )
  }
}

#' @describeIn summarize_variables Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_summary()`.
#'
#' @return
#' * `summarize_vars()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_summary()` to the table layout.
#'
#' @examples
#' ## Fabricated dataset.
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT  = rep(paste0("V", 1:3), 6),
#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL    = c(9:1, rep(NA, 9))
#' )
#'
#' # `summarize_vars()` in `rtables` pipelines
#' ## Default output within a `rtables` pipeline.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   summarize_vars(vars = "AVAL")
#'
#' build_table(l, df = dta_test)
#'
#' ## Select and format statistics output.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   summarize_vars(
#'     vars = "AVAL",
#'     .stats = c("n", "mean_sd", "quantiles"),
#'     .formats = c("mean_sd" = "xx.x, xx.x"),
#'     .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3"))
#'   )
#'
#' results <- build_table(l, df = dta_test)
#' as_html(results)
#'
#' ## Use arguments interpreted by `s_summary`.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   summarize_vars(vars = "AVAL", na.rm = FALSE)
#'
#' results <- build_table(l, df = dta_test)
#'
#' ## Handle `NA` levels first when summarizing factors.
#' dta_test$AVISIT <- NA_character_
#' dta_test <- df_explicit_na(dta_test)
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   summarize_vars(vars = "AVISIT", na.rm = FALSE)
#'
#' results <- build_table(l, df = dta_test)
#' \dontrun{
#' Viewer(results)
#' }
#'
#' @export
summarize_vars <- function(lyt,
                           vars,
                           var_labels = vars,
                           nested = TRUE,
                           ...,
                           show_labels = "default",
                           table_names = vars,
                           section_div = NA_character_,
                           .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods)

  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = afun,
    nested = nested,
    extra_args = list(...),
    inclNAs = TRUE,
    show_labels = show_labels,
    table_names = table_names,
    section_div = section_div
  )
}

#' Multivariate Logistic Regression Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
#' category or specified values and corresponding Wald confidence intervals as default but allow user
#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
#' that covariate has no effect on response in model containing all specified covariates.
#' Allow option to include one two-way interaction and present similar output for
#' each interaction degree of freedom.
#'
#' @inheritParams argument_convention
#' @param drop_and_remove_str (`character`)\cr string to be dropped and removed.
#'
#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#'   Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
#'
#' @note For the formula, the variable names need to be standard `data.frame` column names without
#'   special characters.
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' df <- tidy(mod1, conf_level = 0.99)
#' df2 <- tidy(mod2, conf_level = 0.99)
#'
#' # flagging empty strings with "_"
#' df <- df_explicit_na(df, na_level = "_")
#' df2 <- df_explicit_na(df2, na_level = "_")
#'
#' result1 <- basic_table() %>%
#'   summarize_logistic(
#'     conf_level = 0.95,
#'     drop_and_remove_str = "_"
#'   ) %>%
#'   build_table(df = df)
#' result1
#'
#' result2 <- basic_table() %>%
#'   summarize_logistic(
#'     conf_level = 0.95,
#'     drop_and_remove_str = "_"
#'   ) %>%
#'   build_table(df = df2)
#' result2
#'
#' @export
summarize_logistic <- function(lyt,
                               conf_level,
                               drop_and_remove_str = "") {
  # checks
  checkmate::assert_string(drop_and_remove_str)

  sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
  sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary")
  sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary")
  split_fun <- drop_and_remove_levels(drop_and_remove_str)

  lyt <- logistic_regression_cols(lyt, conf_level = conf_level)
  lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun)
  lyt <- sum_logistic_variable_test(lyt)
  lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun)
  lyt <- sum_logistic_term_estimates(lyt)
  lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun)
  lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun)
  lyt <- sum_logistic_odds_ratios(lyt)
  lyt
}

#' Fit for Logistic Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fit a (conditional) logistic regression model.
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
#'   This will be used when fitting the (conditional) logistic regression model on the left hand
#'   side of the formula.
#'
#' @return A fitted logistic regression model.
#'
#' @section Model Specification:
#'
#' The `variables` list needs to include the following elements:
#'   * `arm`: Treatment arm variable name.
#'   * `response`: The response arm variable name. Usually this is a 0/1 variable.
#'   * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
#'   * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
#'     included in `covariates`. Then the interaction with the treatment arm is included in the model.
#'
#' @examples
#' library(dplyr)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' @export
fit_logistic <- function(data,
                         variables = list(
                           response = "Response",
                           arm = "ARMCD",
                           covariates = NULL,
                           interaction = NULL,
                           strata = NULL
                         ),
                         response_definition = "response") {
  assert_df_with_variables(data, variables)
  checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
  checkmate::assert_string(response_definition)
  checkmate::assert_true(grepl("response", response_definition))

  response_definition <- sub(
    pattern = "response",
    replacement = variables$response,
    x = response_definition,
    fixed = TRUE
  )
  form <- paste0(response_definition, " ~ ", variables$arm)
  if (!is.null(variables$covariates)) {
    form <- paste0(form, " + ", paste(variables$covariates, collapse = " + "))
  }
  if (!is.null(variables$interaction)) {
    checkmate::assert_string(variables$interaction)
    checkmate::assert_subset(variables$interaction, variables$covariates)
    form <- paste0(form, " + ", variables$arm, ":", variables$interaction)
  }
  if (!is.null(variables$strata)) {
    strata_arg <- if (length(variables$strata) > 1) {
      paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
    } else {
      variables$strata
    }
    form <- paste0(form, "+ strata(", strata_arg, ")")
  }
  formula <- stats::as.formula(form)
  if (is.null(variables$strata)) {
    stats::glm(
      formula = formula,
      data = data,
      family = stats::binomial("logit")
    )
  } else {
    clogit_with_tryCatch(
      formula = formula,
      data = data,
      x = TRUE
    )
  }
}

#' Custom Tidy Method for Binomial GLM Results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
#' with `binomial` family.
#'
#' @inheritParams argument_convention
#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise the median is used.
#' @param fit_glm logistic regression model fitted by [stats::glm()] with "binomial" family.
#'
#' @return A `data.frame` containing the tidied model.
#'
#' @method tidy glm
#'
#' @seealso [h_logistic_regression] for relevant helper functions.
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' df <- tidy(mod1, conf_level = 0.99)
#' df2 <- tidy(mod2, conf_level = 0.99)
#'
#' @export
tidy.glm <- function(fit_glm, # nolint
                     conf_level = 0.95,
                     at = NULL) {
  checkmate::assert_class(fit_glm, "glm")
  checkmate::assert_set_equal(fit_glm$family$family, "binomial")

  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  xs_class <- attr(fit_glm$terms, "dataClasses")
  interaction <- terms_name[which(!terms_name %in% names(xs_class))]
  df <- if (length(interaction) == 0) {
    h_logistic_simple_terms(
      x = terms_name,
      fit_glm = fit_glm,
      conf_level = conf_level
    )
  } else {
    h_logistic_inter_terms(
      x = terms_name,
      fit_glm = fit_glm,
      conf_level = conf_level,
      at = at
    )
  }
  for (var in c("variable", "term", "interaction", "reference")) {
    df[[var]] <- factor(df[[var]], levels = unique(df[[var]]))
  }
  df
}

#' Logistic Regression Multivariate Column Layout Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which creates a multivariate column layout summarizing logistic
#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
#'
#' @inheritParams argument_convention
#'
#' @return A layout object suitable for passing to further layouting functions. Adding this
#'   function to an `rtable` layout will split the table into columns corresponding to
#'   statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
#'
#' @export
logistic_regression_cols <- function(lyt,
                                     conf_level = 0.95) {
  vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
  var_labels <- c(
    df = "Degrees of Freedom",
    estimate = "Parameter Estimate",
    std_error = "Standard Error",
    odds_ratio = "Odds Ratio",
    ci = paste("Wald", f_conf_level(conf_level)),
    pvalue = "p-value"
  )
  split_cols_by_multivar(
    lyt = lyt,
    vars = vars,
    varlabels = var_labels
  )
}

#' Logistic Regression Summary Table Constructor Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
#'   content function.
#'
#' @return A content function.
#'
#' @export
logistic_summary_by_flag <- function(flag_var) {
  checkmate::assert_string(flag_var)
  function(lyt) {
    cfun_list <- list(
      df = cfun_by_flag("df", flag_var, format = "xx."),
      estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx"),
      std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx"),
      odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99"),
      ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L)),
      pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx | (<0.0001)")
    )
    summarize_row_groups(
      lyt = lyt,
      cfun = cfun_list
    )
  }
}

#' Controls for Cox Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters for Cox regression fit. Used internally.
#'
#' @inheritParams argument_convention
#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied
#'   treatment and candidate covariate. Note that for univariate models without treatment arm, and
#'   multivariate models, no interaction can be used so that this needs to be `FALSE`.
#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`,
#'   see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R.
#'
#' @return A `list` of items with names corresponding to the arguments.
#'
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()].
#'
#' @examples
#' control_coxreg()
#'
#' @export
control_coxreg <- function(pval_method = c("wald", "likelihood"),
                           ties = c("exact", "efron", "breslow"),
                           conf_level = 0.95,
                           interaction = FALSE) {
  pval_method <- match.arg(pval_method)
  ties <- match.arg(ties)
  checkmate::assert_flag(interaction)
  assert_proportion_value(conf_level)
  list(
    pval_method = pval_method,
    ties = ties,
    conf_level = conf_level,
    interaction = interaction
  )
}

#' Custom Tidy Methods for Cox Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param x (`list`)\cr Result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models)
#'   or [fit_coxreg_multivar()] (for multivariate models).
#'
#' @return [tidy()] returns:
#' * For `summary.coxph` objects,  a `data.frame` with columns: `Pr(>|z|)`, `exp(coef)`, `exp(-coef)`, `lower .95`,
#'   `upper .95`, `level`, and `n`.
#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`,
#'   `lcl`, `ucl`, `pval`, and `ci`.
#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`,
#'   `level`, and `ci`.
#'
#' @seealso [cox_regression]
#'
#' @name tidy_coxreg
NULL

#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results.
#'
#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results.
#'
#' @method tidy summary.coxph
#'
#' @examples
#' library(survival)
#' library(broom)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' formula <- "survival::Surv(time, status) ~ armcd + covar1"
#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder))
#' tidy(msum)
#'
#' @export
tidy.summary.coxph <- function(x, # nolint
                               ...) {
  checkmate::assert_class(x, "summary.coxph")
  pval <- x$coefficients
  confint <- x$conf.int
  levels <- rownames(pval)

  pval <- tibble::as_tibble(pval)
  confint <- tibble::as_tibble(confint)

  ret <- cbind(pval[, grepl("Pr", names(pval))], confint)
  ret$level <- levels
  ret$n <- x[["n"]]
  ret
}

#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression.
#'
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()].
#'
#' @method tidy coxreg.univar
#'
#' @examples
#' ## Cox regression: arm + 1 covariate.
#' mod1 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = "covar1"
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
#' mod2 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91, interaction = TRUE)
#' )
#'
#' tidy(mod1)
#' tidy(mod2)
#'
#' @export
tidy.coxreg.univar <- function(x, # nolint
                               ...) {
  checkmate::assert_class(x, "coxreg.univar")
  mod <- x$mod
  vars <- c(x$vars$arm, x$vars$covariates)
  has_arm <- "arm" %in% names(x$vars)

  result <- if (!has_arm) {
    Map(
      mod = mod, vars = vars,
      f = function(mod, vars) {
        h_coxreg_multivar_extract(
          var = vars,
          data = x$data,
          mod = mod,
          control = x$control
        )
      }
    )
  } else if (x$control$interaction) {
    Map(
      mod = mod, covar = vars,
      f = function(mod, covar) {
        h_coxreg_extract_interaction(
          effect = x$vars$arm, covar = covar, mod = mod, data = x$data,
          at = x$at, control = x$control
        )
      }
    )
  } else {
    Map(
      mod = mod, vars = vars,
      f = function(mod, vars) {
        h_coxreg_univar_extract(
          effect = x$vars$arm, covar = vars, data = x$data, mod = mod,
          control = x$control
        )
      }
    )
  }
  result <- do.call(rbind, result)

  result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
  result$n <- lapply(result$n, empty_vector_if_na)
  result$ci <- lapply(result$ci, empty_vector_if_na)
  result$hr <- lapply(result$hr, empty_vector_if_na)
  if (x$control$interaction) {
    result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na)
    # Remove interaction p-values due to change in specifications.
    result$pval[result$effect != "Treatment:"] <- NA
  }
  result$pval <- lapply(result$pval, empty_vector_if_na)
  attr(result, "conf_level") <- x$control$conf_level
  result
}

#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression.
#'
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()].
#'
#' @method tidy coxreg.multivar
#'
#' @examples
#' multivar_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#' broom::tidy(multivar_model)
#'
#' @export
tidy.coxreg.multivar <- function(x, # nolint
                                 ...) {
  checkmate::assert_class(x, "coxreg.multivar")
  vars <- c(x$vars$arm, x$vars$covariates)

  # Convert the model summaries to data.
  result <- Map(
    vars = vars,
    f = function(vars) {
      h_coxreg_multivar_extract(
        var = vars, data = x$data,
        mod = x$mod, control = x$control
      )
    }
  )
  result <- do.call(rbind, result)

  result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
  result$ci <- lapply(result$ci, empty_vector_if_na)
  result$hr <- lapply(result$hr, empty_vector_if_na)
  result$pval <- lapply(result$pval, empty_vector_if_na)
  result <- result[, names(result) != "n"]
  attr(result, "conf_level") <- x$control$conf_level

  result
}

#' Fits for Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fitting functions for univariate and multivariate Cox regression models.
#'
#' @param variables (`list`)\cr a named list corresponds to the names of variables found in `data`, passed as a named
#'   list and corresponding to `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from
#'   `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect
#'   estimates will be tabulated later.
#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models.
#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify
#'   the value of the covariate at which the effect should be estimated.
#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()].
#'
#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression].
#'
#' @examples
#' library(survival)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' # Testing dataset [survival::bladder].
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' plot(
#'   survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name fit_coxreg
NULL

#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs.
#'
#' @return
#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list`
#'   with 5 elements:
#'   * `mod`: Cox regression models fitted by [survival::coxph()].
#'   * `data`: The original data frame input.
#'   * `control`: The original control input.
#'   * `vars`: The variables used in the model.
#'   * `at`: Value of the covariate at which the effect should be estimated.
#'
#' @note When using `fit_coxreg_univar` there should be two study arms.
#'
#' @examples
#' # fit_coxreg_univar
#'
#' ## Cox regression: arm + 1 covariate.
#' mod1 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = "covar1"
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
#' mod2 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91, interaction = TRUE)
#' )
#'
#' ## Cox regression: arm + 1 covariate, stratified analysis.
#' mod3 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", strata = "covar2",
#'     covariates = c("covar1")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: no arm, only covariates.
#' mod4 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' @export
fit_coxreg_univar <- function(variables,
                              data,
                              at = list(),
                              control = control_coxreg()) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_df_with_variables(data, variables)
  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$strata)) {
    checkmate::assert_disjunct(control$pval_method, "likelihood")
  }
  if (has_arm) {
    assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  }
  vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE)
  for (i in vars) {
    if (is.factor(data[[i]])) {
      attr(data[[i]], "levels") <- levels(droplevels(data[[i]]))
    }
  }
  forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction)
  mod <- lapply(
    forms, function(x) {
      survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties)
    }
  )
  structure(
    list(
      mod = mod,
      data = data,
      control = control,
      vars = variables,
      at = at
    ),
    class = "coxreg.univar"
  )
}

#' @describeIn fit_coxreg Fit a multivariate Cox regression model.
#'
#' @return
#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list
#'   with 4 elements:
#'   * `mod`: Cox regression model fitted by [survival::coxph()].
#'   * `data`: The original data frame input.
#'   * `control`: The original control input.
#'   * `vars`: The variables used in the model.
#'
#' @examples
#' # fit_coxreg_multivar
#'
#' ## Cox regression: multivariate Cox regression.
#' multivar_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' # Example without treatment arm.
#' multivar_covs_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' @export
fit_coxreg_multivar <- function(variables,
                                data,
                                control = control_coxreg()) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  if (!is.null(variables$covariates)) {
    checkmate::assert_character(variables$covariates)
  }

  checkmate::assert_false(control$interaction)
  assert_df_with_variables(data, variables)
  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$strata)) {
    checkmate::assert_disjunct(control$pval_method, "likelihood")
  }

  form <- h_coxreg_multivar_formula(variables)
  mod <- survival::coxph(
    formula = stats::as.formula(form),
    data = data,
    ties = control$ties
  )
  structure(
    list(
      mod = mod,
      data = data,
      control = control,
      vars = variables
    ),
    class = "coxreg.multivar"
  )
}

#' Muffled `car::Anova`
#'
#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when
#' present, this function deliberately muffles this message.
#'
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
#'
#' @return Returns the output of [car::Anova()], with convergence message muffled.
#'
#' @keywords internal
muffled_car_anova <- function(mod, test_statistic) {
  tryCatch(
    withCallingHandlers(
      expr = {
        car::Anova(
          mod,
          test.statistic = test_statistic,
          type = "III"
        )
      },
      message = function(m) invokeRestart("muffleMessage"),
      error = function(e) {
        stop(paste(
          "the model seems to have convergence problems, please try to change",
          "the configuration of covariates or strata variables, e.g.",
          "- original error:", e
        ))
      }
    )
  )
}

#' Difference Test for Two Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Various tests were implemented to test the difference between two proportions.
#'
#' @inheritParams argument_convention
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
#'
#' @seealso [h_prop_diff_test]
#'
#' @name prop_diff_test
NULL

#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
#'
#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`, or `schouten`; specifies the test used
#'   to calculate the p-value.
#'
#' @return
#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
#'   describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
#'
#' @examples
#' # Statistics function
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50)),
#'   strat = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
#' )
#'
#' # Internal function - s_test_proportion_diff
#' \dontrun{
#' s_test_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = "strat"),
#'   method = "cmh"
#' )
#' }
#'
#' @keywords internal
s_test_proportion_diff <- function(df,
                                   .var,
                                   .ref_group,
                                   .in_ref_col,
                                   variables = list(strata = NULL),
                                   method = c("chisq", "schouten", "fisher", "cmh")) {
  method <- match.arg(method)
  y <- list(pval = "")

  if (!.in_ref_col) {
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))
    rsp <- factor(
      c(.ref_group[[.var]], df[[.var]]),
      levels = c("TRUE", "FALSE")
    )
    grp <- factor(
      rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
      levels = c("ref", "Not-ref")
    )

    if (!is.null(variables$strata) || method == "cmh") {
      strata <- variables$strata
      checkmate::assert_false(is.null(strata))
      strata_vars <- stats::setNames(as.list(strata), strata)
      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)
      strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
    }

    tbl <- switch(method,
      cmh = table(grp, rsp, strata),
      table(grp, rsp)
    )

    y$pval <- switch(method,
      chisq = prop_chisq(tbl),
      cmh = prop_cmh(tbl),
      fisher = prop_fisher(tbl),
      schouten = prop_schouten(tbl)
    )
  }

  y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method))
  y
}

#' Description of the Difference Test Between Two Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
#'
#' @inheritParams s_test_proportion_diff
#'
#' @return `string` describing the test from which the p-value is derived.
#'
#' @export
d_test_proportion_diff <- function(method) {
  checkmate::assert_string(method)
  meth_part <- switch(method,
    "schouten" = "Chi-Squared Test with Schouten Correction",
    "chisq" = "Chi-Squared Test",
    "cmh" = "Cochran-Mantel-Haenszel Test",
    "fisher" = "Fisher's Exact Test",
    stop(paste(method, "does not have a description"))
  )
  paste0("p-value (", meth_part, ")")
}

#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
#'
#' @return
#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_test_proportion_diff
#' \dontrun{
#' a_test_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = "strat"),
#'   method = "cmh"
#' )
#' }
#'
#' @keywords internal
a_test_proportion_diff <- make_afun(
  s_test_proportion_diff,
  .formats = c(pval = "x.xxxx | (<0.0001)"),
  .indent_mods = c(pval = 1L)
)

#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... other arguments are passed to [s_test_proportion_diff()].
#'
#' @return
#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_test_proportion_diff()` to the table layout.
#'
#' @examples
#' # With `rtables` pipelines.
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   test_proportion_diff(
#'     vars = "rsp",
#'     method = "cmh", variables = list(strata = "strat")
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
test_proportion_diff <- function(lyt,
                                 vars,
                                 ...,
                                 var_labels = vars,
                                 show_labels = "hidden",
                                 table_names = vars,
                                 .stats = NULL,
                                 .formats = NULL,
                                 .labels = NULL,
                                 .indent_mods = NULL) {
  afun <- make_afun(
    a_test_proportion_diff,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    var_labels = var_labels,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions to Test Proportion Differences
#'
#' Helper functions to implement various tests on the difference between two proportions.
#'
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
#'
#' @return A p-value.
#'
#' @seealso [prop_diff_test()] for implementation of these helper functions.
#'
#' @name h_prop_diff_test
NULL

#' @describeIn h_prop_diff_test performs Chi-Squared test. Internally calls [stats::prop.test()].
#'
#' @examples
#' # Non-stratified proportion difference test
#'
#' ## Data
#' A <- 20
#' B <- 20
#' set.seed(1)
#' rsp <- c(
#'   sample(c(TRUE, FALSE), size = A, prob = c(3 / 4, 1 / 4), replace = TRUE),
#'   sample(c(TRUE, FALSE), size = A, prob = c(1 / 2, 1 / 2), replace = TRUE)
#' )
#' grp <- c(rep("A", A), rep("B", B))
#' tbl <- table(grp, rsp)
#'
#' ## Chi-Squared test
#' # Internal function - prop_chisq
#' \dontrun{
#' prop_chisq(tbl)
#' }
#'
#' @keywords internal
prop_chisq <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }
  stats::prop.test(tbl, correct = FALSE)$p.value
}

#' @describeIn h_prop_diff_test performs stratified Cochran-Mantel-Haenszel test. Internally calls
#'   [stats::mantelhaen.test()]. Note that strata with less than two observations are automatically discarded.
#'
#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
#'   (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
#'
#' @examples
#' # Stratified proportion difference test
#'
#' ## Data
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' grp <- factor(rep(c("A", "B"), each = 50))
#' strata <- factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
#' tbl <- table(grp, rsp, strata)
#'
#' ## Cochran-Mantel-Haenszel test
#' # Internal function - prop_cmh
#' \dontrun{
#' prop_cmh(tbl)
#' }
#'
#' @keywords internal
prop_cmh <- function(ary) {
  checkmate::assert_array(ary)
  checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
  checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
  strata_sizes <- apply(ary, MARGIN = 3, sum)
  if (any(strata_sizes < 5)) {
    warning("<5 data points in some strata. CMH test may be incorrect.")
    ary <- ary[, , strata_sizes > 1]
  }

  stats::mantelhaen.test(ary, correct = FALSE)$p.value
}

#' @describeIn h_prop_diff_test performs the Chi-Squared test with Schouten correction.
#'
#' @seealso For information on the Schouten correction (Schouten, 1980),
#'   visit https://onlinelibrary.wiley.com/doi/abs/10.1002/bimj.4710220305.
#'
#' @examples
#' ## Chi-Squared test + Schouten correction.
#' # Internal function - prop_schouten
#' \dontrun{
#' prop_schouten(tbl)
#' }
#'
#' @keywords internal
prop_schouten <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }

  n <- sum(tbl)
  n1 <- sum(tbl[1, ])
  n2 <- sum(tbl[2, ])

  ad <- diag(tbl)
  bc <- diag(apply(tbl, 2, rev))
  ac <- tbl[, 1]
  bd <- tbl[, 2]

  t_schouten <- (n - 1) *
    (abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 /
    (n1 * n2 * sum(ac) * sum(bd))

  1 - stats::pchisq(t_schouten, df = 1)
}

#' @describeIn h_prop_diff_test performs the Fisher's exact test. Internally calls [stats::fisher.test()].
#'
#' @examples
#' ## Fisher's exact test
#' # Internal function - prop_fisher
#' \dontrun{
#' prop_fisher(tbl)
#' }
#'
#' @keywords internal
prop_fisher <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  stats::fisher.test(tbl)$p.value
}

#' Combine Factor Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Combine specified old factor Levels in a single new level.
#'
#' @param x factor
#' @param levels level names to be combined
#' @param new_level name of new level
#'
#' @return A `factor` with the new levels.
#'
#' @examples
#' x <- factor(letters[1:5], levels = letters[5:1])
#' combine_levels(x, levels = c("a", "b"))
#'
#' combine_levels(x, c("e", "b"))
#'
#' @export
combine_levels <- function(x, levels, new_level = paste(levels, collapse = "/")) {
  checkmate::assert_factor(x)
  checkmate::assert_subset(levels, levels(x))

  lvls <- levels(x)

  lvls[lvls %in% levels] <- new_level

  levels(x) <- lvls

  x
}

#' Conversion of a Vector to a Factor
#'
#' Converts `x` to a factor and keeps its attributes. Warns appropriately such that the user
#' can decide whether they prefer converting to factor manually (e.g. for full control of
#' factor levels).
#'
#' @param x (`atomic`)\cr object to convert.
#' @param x_name (`string`)\cr name of `x`.
#' @param na_level (`string`)\cr the explicit missing level which should be used when converting a character vector.
#' @param verbose defaults to `TRUE`. It prints out warnings and messages.
#'
#' @return A `factor` with same attributes (except class) as `x`. Does not modify `x` if already a `factor`.
#'
#' @examples
#' # Internal function - as_factor_keep_attributes
#' \dontrun{
#' as_factor_keep_attributes(formatters::with_label(c(1, 1, 2, 3), "id"), verbose = FALSE)
#' as_factor_keep_attributes(c("a", "b", ""), "id", verbose = FALSE)
#' }
#'
#' @keywords internal
as_factor_keep_attributes <- function(x,
                                      x_name = deparse(substitute(x)),
                                      na_level = "<Missing>",
                                      verbose = TRUE) {
  checkmate::assert_atomic(x)
  checkmate::assert_string(x_name)
  checkmate::assert_string(na_level)
  checkmate::assert_flag(verbose)
  if (is.factor(x)) {
    return(x)
  }
  x_class <- class(x)[1]
  if (verbose) {
    warning(paste(
      "automatically converting", x_class, "variable", x_name,
      "to factor, better manually convert to factor to avoid failures"
    ))
  }
  if (identical(length(x), 0L)) {
    warning(paste(
      x_name, "has length 0, this can lead to tabulation failures, better convert to factor"
    ))
  }
  if (is.character(x)) {
    x_no_na <- explicit_na(sas_na(x), label = na_level)
    if (any(na_level %in% x_no_na)) {
      do.call(
        structure,
        c(
          list(.Data = forcats::fct_relevel(x_no_na, na_level, after = Inf)),
          attributes(x)
        )
      )
    } else {
      do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
    }
  } else {
    do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
  }
}

#' Labels for Bins in Percent
#'
#' This creates labels for quantile based bins in percent. This assumes the right-closed
#' intervals as produced by [cut_quantile_bins()].
#'
#' @param probs (`proportion` vector)\cr the probabilities identifying the quantiles.
#'   This is a sorted vector of unique `proportion` values, i.e. between 0 and 1, where
#'   the boundaries 0 and 1 must not be included.
#' @param digits (`integer`)\cr number of decimal places to round the percent numbers.
#'
#' @return A `character` vector with labels in the format `[0%,20%]`, `(20%,50%]`, etc.
#'
#' @examples
#' # Internal function - bins_percent_labels
#' \dontrun{
#' # Just pass the internal probability bounds, then 0 and 100% will be added automatically.
#' bins_percent_labels(c(0.2, 0.5))
#'
#' # Determine how to round.
#' bins_percent_labels(0.35224, digits = 1)
#'
#' # Passing an empty vector just gives a single bin 0-100%.
#' bins_percent_labels(c(0, 1))
#' }
#'
#' @keywords internal
bins_percent_labels <- function(probs,
                                digits = 0) {
  if (isFALSE(0 %in% probs)) probs <- c(0, probs)
  if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
  checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
  percent <- round(probs * 100, digits = digits)
  left <- paste0(utils::head(percent, -1), "%")
  right <- paste0(utils::tail(percent, -1), "%")
  without_left_bracket <- paste0(left, ",", right, "]")
  with_left_bracket <- paste0("[", utils::head(without_left_bracket, 1))
  if (length(without_left_bracket) > 1) {
    with_left_bracket <- c(
      with_left_bracket,
      paste0("(", utils::tail(without_left_bracket, -1))
    )
  }
  with_left_bracket
}

#' Cutting Numeric Vector into Empirical Quantile Bins
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This cuts a numeric vector into sample quantile bins.
#'
#' @inheritParams bins_percent_labels
#' @param x (`numeric`)\cr the continuous variable values which should be cut into
#'   quantile bins. This may contain `NA` values, which are then
#'   not used for the quantile calculations, but included in the return vector.
#' @param labels (`character`)\cr the unique labels for the quantile bins. When there are `n`
#'   probabilities in `probs`, then this must be `n + 1` long.
#' @param type (`integer`)\cr type of quantiles to use, see [stats::quantile()] for details.
#' @param ordered (`flag`)\cr should the result be an ordered factor.
#'
#' @return A `factor` variable with appropriately-labeled bins as levels.
#'
#' @note Intervals are closed on the right side. That is, the first bin is the interval
#'   `[-Inf, q1]` where `q1` is the first quantile, the second bin is then `(q1, q2]`, etc.,
#'   and the last bin is `(qn, +Inf]` where `qn` is the last quantile.
#'
#' @examples
#' # Default is to cut into quartile bins.
#' cut_quantile_bins(cars$speed)
#'
#' # Use custom quantiles.
#' cut_quantile_bins(cars$speed, probs = c(0.1, 0.2, 0.6, 0.88))
#'
#' # Use custom labels.
#' cut_quantile_bins(cars$speed, labels = paste0("Q", 1:4))
#'
#' # NAs are preserved in result factor.
#' ozone_binned <- cut_quantile_bins(airquality$Ozone)
#' which(is.na(ozone_binned))
#' # So you might want to make these explicit.
#' explicit_na(ozone_binned)
#'
#' @export
cut_quantile_bins <- function(x,
                              probs = c(0.25, 0.5, 0.75),
                              labels = NULL,
                              type = 7,
                              ordered = TRUE) {
  checkmate::assert_flag(ordered)
  checkmate::assert_numeric(x)
  if (isFALSE(0 %in% probs)) probs <- c(0, probs)
  if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
  checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
  if (is.null(labels)) labels <- bins_percent_labels(probs)
  checkmate::assert_character(labels, len = length(probs) - 1, any.missing = FALSE, unique = TRUE)

  if (all(is.na(x))) {
    # Early return if there are only NAs in input.
    return(factor(x, ordered = ordered, levels = labels))
  }

  quantiles <- stats::quantile(
    x,
    probs = probs,
    type = type,
    na.rm = TRUE
  )

  checkmate::assert_numeric(quantiles, unique = TRUE)

  cut(
    x,
    breaks = quantiles,
    labels = labels,
    ordered_result = ordered,
    include.lowest = TRUE,
    right = TRUE
  )
}

#' Discard Certain Levels from a Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This discards the observations as well as the levels specified from a factor.
#'
#' @param x (`factor`)\cr the original factor.
#' @param discard (`character`)\cr which levels to discard.
#'
#' @return A modified `factor` with observations as well as levels from `discard` dropped.
#'
#' @examples
#' fct_discard(factor(c("a", "b", "c")), "c")
#'
#' @export
fct_discard <- function(x, discard) {
  checkmate::assert_factor(x)
  checkmate::assert_character(discard, any.missing = FALSE)
  new_obs <- x[!(x %in% discard)]
  new_levels <- setdiff(levels(x), discard)
  factor(new_obs, levels = new_levels)
}

#' Insertion of Explicit Missings in a Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This inserts explicit missings in a factor based on a condition. Additionally,
#' existing `NA` values will be explicitly converted to given `na_level`.
#'
#' @param x (`factor`)\cr the original factor.
#' @param condition (`logical`)\cr where to insert missings.
#' @param na_level (`string`)\cr which level to use for missings.
#'
#' @return A modified `factor` with inserted and existing `NA` converted to `na_level`.
#'
#' @seealso [forcats::fct_na_value_to_level()] which is used internally.
#'
#' @examples
#' fct_explicit_na_if(factor(c("a", "b", NA)), c(TRUE, FALSE, FALSE))
#'
#' @export
fct_explicit_na_if <- function(x, condition, na_level = "<Missing>") {
  checkmate::assert_factor(x, len = length(condition))
  checkmate::assert_logical(condition)
  x[condition] <- NA
  x <- forcats::fct_na_value_to_level(x, level = na_level)
  forcats::fct_drop(x, only = na_level)
}

#' Collapsing of Factor Levels and Keeping Only Those New Group Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This collapses levels and only keeps those new group levels, in the order provided.
#' The returned factor has levels in the order given, with the possible missing level last (this will
#' only be included if there are missing values).
#'
#' @param .f (`factor` or `character`)\cr original vector.
#' @param ... (named `character` vectors)\cr levels in each vector provided will be collapsed into
#'   the new level given by the respective name.
#' @param .na_level (`string`)\cr which level to use for other levels, which should be missing in the
#'   new factor. Note that this level must not be contained in the new levels specified in `...`.
#'
#' @return A modified `factor` with collapsed levels. Values and levels which are not included
#'   in the given `character` vector input will be set to the missing level `.na_level`.
#'
#' @note Any existing `NA`s in the input vector will not be replaced by the missing level. If needed,
#'   [explicit_na()] can be called separately on the result.
#'
#' @seealso [forcats::fct_collapse()], [forcats::fct_relevel()] which are used internally.
#'
#' @examples
#' fct_collapse_only(factor(c("a", "b", "c", "d")), TRT = "b", CTRL = c("c", "d"))
#'
#' @export
fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
  new_lvls <- names(list(...))
  if (checkmate::test_subset(.na_level, new_lvls)) {
    stop(paste0(".na_level currently set to '", .na_level, "' must not be contained in the new levels"))
  }
  x <- forcats::fct_collapse(.f, ..., other_level = .na_level)
  do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls)))
}

#' Counting Patients Summing Exposure Across All Patients in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Counting the number of patients and summing analysis value (i.e exposure values) across all patients
#' when a column table layout is required.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_patients_exposure_in_cols
NULL

#' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers
#'   of patients and the sum of exposure across all patients.
#'
#' @param ex_var (`character`)\cr name of the variable within `df` containing exposure values.
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will be used as label.
#'
#' @return
#' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics:
#'   * `n_patients`: Number of unique patients in `df`.
#'   * `sum_exposure`: Sum of `ex_var` across all patients in `df`.
#'
#' @examples
#' set.seed(1)
#' df <- data.frame(
#'   USUBJID = c(paste("id", seq(1, 12), sep = "")),
#'   ARMCD = c(rep("ARM A", 6), rep("ARM B", 6)),
#'   SEX = c(rep("Female", 6), rep("Male", 6)),
#'   AVAL = as.numeric(sample(seq(1, 20), 12)),
#'   stringsAsFactors = TRUE
#' )
#' adsl <- data.frame(
#'   USUBJID = c(paste("id", seq(1, 12), sep = "")),
#'   ARMCD = c(rep("ARM A", 2), rep("ARM B", 2)),
#'   SEX = c(rep("Female", 2), rep("Male", 2)),
#'   stringsAsFactors = TRUE
#' )
#'
#' # Internal function - s_count_patients_sum_exposure
#' \dontrun{
#' s_count_patients_sum_exposure(df = df, .N_col = nrow(adsl))
#' s_count_patients_sum_exposure(df = df, .N_col = nrow(adsl), .stats = "n_patients")
#' s_count_patients_sum_exposure(
#'   df = df,
#'   .N_col = nrow(adsl),
#'   custom_label = "some user's custom label"
#' )
#' }
#'
#' @keywords internal
s_count_patients_sum_exposure <- function(df,
                                          ex_var = "AVAL",
                                          id = "USUBJID",
                                          labelstr = "",
                                          .stats = c("n_patients", "sum_exposure"),
                                          .N_col, # nolint
                                          custom_label = NULL) {
  assert_df_with_variables(df, list(ex_var = ex_var, id = id))
  checkmate::assert_string(id)
  checkmate::assert_string(labelstr)
  checkmate::assert_string(custom_label, null.ok = TRUE)
  checkmate::assert_numeric(df[[ex_var]])
  checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure")))

  row_label <- if (labelstr != "") {
    labelstr
  } else if (!is.null(custom_label)) {
    custom_label
  } else {
    "Total patients numbers/person time"
  }

  y <- list()

  if ("n_patients" %in% .stats) {
    y$n_patients <-
      formatters::with_label(
        s_num_patients_content(
          df = df,
          .N_col = .N_col, # nolint
          .var = id,
          labelstr = ""
        )$unique,
        row_label
      )
  }
  if ("sum_exposure" %in% .stats) {
    y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label)
  }
  y
}

#' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in
#'   [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in
#'   [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`.
#'
#' @return
#' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()].
#'
#' @examples
#' tern:::a_count_patients_sum_exposure(
#'   df = df,
#'   var = "SEX",
#'   .N_col = nrow(df),
#'   .stats = "n_patients"
#' )
#'
#' @keywords internal
a_count_patients_sum_exposure <- function(df,
                                          var = NULL,
                                          ex_var = "AVAL",
                                          id = "USUBJID",
                                          labelstr = "",
                                          .N_col, # nolint
                                          .stats,
                                          .formats = list(n_patients = "xx (xx.x%)", sum_exposure = "xx"),
                                          custom_label = NULL) {
  if (!is.null(var)) {
    assert_df_with_variables(df, list(var = var))
    df[[var]] <- as.factor(df[[var]])
  }

  y <- list()
  if (is.null(var)) {
    y[[.stats]] <- list(Total = s_count_patients_sum_exposure(
      df = df,
      ex_var = ex_var,
      id = id,
      labelstr = labelstr,
      .N_col = .N_col,
      .stats = .stats,
      custom_label = custom_label
    )[[.stats]])
  } else {
    for (lvl in levels(df[[var]])) {
      y[[.stats]][[lvl]] <- s_count_patients_sum_exposure(
        df = subset(df, get(var) == lvl),
        ex_var = ex_var,
        id = id,
        labelstr = labelstr,
        .N_col = .N_col,
        .stats = .stats,
        custom_label = lvl
      )[[.stats]]
    }
  }

  in_rows(.list = y[[.stats]], .formats = .formats[[.stats]])
}

#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
#'   function arguments and additional format arguments. This function is a wrapper for
#'   [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further
#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
#'   add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
#'   columns, to the table layout.
#'
#' @examples
#' lyt <- basic_table() %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE)
#' result <- build_table(lyt, df = df, alt_counts_df = adsl)
#' result
#'
#' lyt2 <- basic_table() %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure")
#' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl)
#' result2
#'
#' @export
summarize_patients_exposure_in_cols <- function(lyt, # nolint
                                                var,
                                                ...,
                                                .stats = c("n_patients", "sum_exposure"),
                                                .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                                .indent_mods = NULL,
                                                col_split = TRUE) {
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(var, length(.stats)),
      varlabels = .labels[.stats],
      extra_args = list(.stats = .stats)
    )
  }
  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = a_count_patients_sum_exposure,
    extra_args = list(...)
  )
}

#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
#'   function arguments and additional format arguments. This function is a wrapper for
#'   [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()].
#'
#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required
#'   column split has been done already earlier in the layout pipe.
#'
#' @return
#' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further
#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
#'   add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
#'   columns, to the table layout.
#'
#' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows,
#'   `analyze_patients_exposure_in_cols()` generates data rows which will _not_ be repeated on multiple
#'   pages when pagination is used.
#'
#' @examples
#' lyt3 <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE)
#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl)
#' result3
#'
#' lyt4 <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
#'   summarize_patients_exposure_in_cols(
#'     var = "AVAL", col_split = TRUE,
#'     .stats = "n_patients", custom_label = "some custom label"
#'   ) %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL")
#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl)
#' result4
#'
#' lyt5 <- basic_table() %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL")
#' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl)
#' result5
#'
#' @export
analyze_patients_exposure_in_cols <- function(lyt, # nolint
                                              var = NULL,
                                              ex_var = "AVAL",
                                              col_split = TRUE,
                                              .stats = c("n_patients", "sum_exposure"),
                                              .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                              .indent_mods = 0L) {
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(ex_var, length(.stats)),
      varlabels = .labels[.stats],
      extra_args = list(.stats = .stats)
    )
  }
  lyt <- lyt %>% analyze_colvars(
    afun = a_count_patients_sum_exposure,
    indent_mod = .indent_mods,
    extra_args = list(var = var, ex_var = ex_var)
  )
  lyt
}

#' Odds Ratio Estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Compares bivariate responses between two groups in terms of odds ratios
#' along with a confidence interval.
#'
#' @inheritParams argument_convention
#'
#' @details This function uses either logistic regression for unstratified
#'   analyses, or conditional logistic regression for stratified analyses.
#'   The Wald confidence interval with the specified confidence level is
#'   calculated.
#'
#' @note For stratified analyses, there is currently no implementation for conditional
#'   likelihood confidence intervals, therefore the likelihood confidence interval is not
#'   yet available as an option. Besides, when `rsp` contains only responders or non-responders,
#'   then the result values will be `NA`, because no odds ratio estimation is possible.
#'
#' @seealso Relevant helper function [h_odds_ratio()].
#'
#' @name odds_ratio
NULL

#' @describeIn odds_ratio Statistics function which estimates the odds ratio
#'   between a treatment and a control. A `variables` list with `arm` and `strata`
#'   variable names must be passed if a stratified analysis is required.
#'
#' @inheritParams split_cols_by_groups
#'
#' @return
#' * `s_odds_ratio()` returns a named list with the statistics `or_ci`
#'   (containing `est`, `lcl`, and `ucl`) and `n_tot`.
#'
#' @examples
#' set.seed(12)
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50), levels = c("B", "A")),
#'   strata = factor(sample(c("C", "D"), 100, TRUE))
#' )
#'
#' # Unstratified analysis.
#' s_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' # Stratified analysis.
#' s_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta,
#'   variables = list(arm = "grp", strata = "strata")
#' )
#'
#' @export
s_odds_ratio <- function(df,
                         .var,
                         .ref_group,
                         .in_ref_col,
                         .df_row,
                         variables = list(arm = NULL, strata = NULL),
                         conf_level = 0.95,
                         groups_list = NULL) {
  y <- list(or_ci = "", n_tot = "")

  if (!.in_ref_col) {
    assert_proportion_value(conf_level)
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))

    if (is.null(variables$strata)) {
      data <- data.frame(
        rsp = c(.ref_group[[.var]], df[[.var]]),
        grp = factor(
          rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
          levels = c("ref", "Not-ref")
        )
      )
      y <- or_glm(data, conf_level = conf_level)
    } else {
      assert_df_with_variables(.df_row, c(list(rsp = .var), variables))

      # The group variable prepared for clogit must be synchronised with combination groups definition.
      if (is.null(groups_list)) {
        ref_grp <- as.character(unique(.ref_group[[variables$arm]]))
        trt_grp <- as.character(unique(df[[variables$arm]]))
        grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp)
      } else {
        # If more than one level in reference col.
        reference <- as.character(unique(.ref_group[[variables$arm]]))
        grp_ref_flag <- vapply(
          X = groups_list,
          FUN.VALUE = TRUE,
          FUN = function(x) all(reference %in% x)
        )
        ref_grp <- names(groups_list)[grp_ref_flag]

        # If more than one level in treatment col.
        treatment <- as.character(unique(df[[variables$arm]]))
        grp_trt_flag <- vapply(
          X = groups_list,
          FUN.VALUE = TRUE,
          FUN = function(x) all(treatment %in% x)
        )
        trt_grp <- names(groups_list)[grp_trt_flag]

        grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp)
        grp <- combine_levels(grp, levels = treatment, new_level = trt_grp)
      }

      # The reference level in `grp` must be the same as in the `rtables` column split.
      data <- data.frame(
        rsp = .df_row[[.var]],
        grp = grp,
        strata = interaction(.df_row[variables$strata])
      )
      y_all <- or_clogit(data, conf_level = conf_level)
      checkmate::assert_string(trt_grp)
      checkmate::assert_subset(trt_grp, names(y_all$or_ci))
      y$or_ci <- y_all$or_ci[[trt_grp]]
      y$n_tot <- y_all$n_tot
    }
  }

  y$or_ci <- formatters::with_label(
    x = y$or_ci,
    label = paste0("Odds Ratio (", 100 * conf_level, "% CI)")
  )

  y$n_tot <- formatters::with_label(
    x = y$n_tot,
    label = "Total n"
  )

  y
}

#' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`.
#'
#' @return
#' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' @export
a_odds_ratio <- make_afun(
  s_odds_ratio,
  .formats = c(or_ci = "xx.xx (xx.xx - xx.xx)"),
  .indent_mods = c(or_ci = 1L)
)

#' @describeIn odds_ratio Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_odds_ratio()`.
#'
#' @return
#' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_odds_ratio()` to the table layout.
#'
#' @examples
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50))
#' )
#'
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   estimate_odds_ratio(vars = "rsp")
#'
#' build_table(l, df = dta)
#'
#' @export
estimate_odds_ratio <- function(lyt,
                                vars,
                                ...,
                                show_labels = "hidden",
                                table_names = vars,
                                .stats = "or_ci",
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  afun <- make_afun(
    a_odds_ratio,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions for Odds Ratio Estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to calculate odds ratios in [estimate_odds_ratio()].
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally
#'   `strata` for [or_clogit()].
#'
#' @return A named `list` of elements `or_ci` and `n_tot`.
#'
#' @seealso [odds_ratio]
#'
#' @name h_odds_ratio
NULL

#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be
#'   exactly 2 groups in `data` as specified by the `grp` variable.
#'
#' @examples
#' # Data with 2 groups.
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)],
#'   strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' # Odds ratio based on glm.
#' or_glm(data, conf_level = 0.95)
#'
#' @export
or_glm <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)
  formula <- stats::as.formula("rsp ~ grp")
  model_fit <- stats::glm(
    formula = formula, data = data,
    family = stats::binomial(link = "logit")
  )

  # Note that here we need to discard the intercept.
  or <- exp(stats::coef(model_fit)[-1])
  or_ci <- exp(
    stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE]
  )

  values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl"))
  n_tot <- stats::setNames(nrow(model_fit$model), "n_tot")

  list(or_ci = values, n_tot = n_tot)
}

#' @describeIn h_odds_ratio estimates the odds ratio based on [survival::clogit()]. This is done for
#'   the whole data set including all groups, since the results are not the same as when doing
#'   pairwise comparisons between the groups.
#'
#' @examples
#' # Data with 3 groups.
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)],
#'   strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' # Odds ratio based on stratified estimation by conditional logistic regression.
#' or_clogit(data, conf_level = 0.95)
#'
#' @export
or_clogit <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
  checkmate::assert_multi_class(data$strata, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  data$strata <- as_factor_keep_attributes(data$strata)

  # Deviation from convention: `survival::strata` must be simply `strata`.
  formula <- stats::as.formula("rsp ~ grp + strata(strata)")
  model_fit <- clogit_with_tryCatch(formula = formula, data = data)

  # Create a list with one set of OR estimates and CI per coefficient, i.e.
  # comparison of one group vs. the reference group.
  coef_est <- stats::coef(model_fit)
  ci_est <- stats::confint(model_fit, level = conf_level)
  or_ci <- list()
  for (coef_name in names(coef_est)) {
    grp_name <- gsub("^grp", "", x = coef_name)
    or_ci[[grp_name]] <- stats::setNames(
      object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])),
      nm = c("est", "lcl", "ucl")
    )
  }
  list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n))
}

#' Tabulate Survival Duration by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate statistics such as median survival time and hazard ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_coxph_pairwise
#' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
#' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a list, which
#'   specifies the new group levels via the names and the levels that belong to it in the character vectors that are
#'   elements of the list.
#' @param label_all (`string`)\cr label for the total population analysis.
#' @param time_unit (`string`)\cr label with unit of median survival time. Default `NULL` skips displaying unit.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. Tables typically used as part of forest plot.
#'
#' @seealso [extract_survival_subgroups()]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c(
#'   "ARM" = adtte_labels[["ARM"]],
#'   "SEX" = adtte_labels[["SEX"]],
#'   "AVALU" = adtte_labels[["AVALU"]],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' @name survival_duration_subgroups
NULL

#' Prepares Survival Data for Population Subgroups in Data Frames
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates of median survival times and treatment hazard ratios for population subgroups in
#' data frames. Simple wrapper for [h_survtime_subgroups_df()] and [h_coxph_subgroups_df()]. Result is a `list`
#' of two `data.frame`s: `survtime` and `hr`. `variables` corresponds to the names of variables found in `data`,
#' passed as a named `list` and requires elements `tte`, `is_event`, `arm` and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_duration_subgroups
#' @inheritParams survival_coxph_pairwise
#'
#' @return A named `list` of two elements:
#'   * `survtime`: A `data.frame` containing columns `arm`, `n`, `n_events`, `median`, `subgroup`, `var`,
#'     `var_label`, and `row_type`.
#'   * `hr`: A `data.frame` containing columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, `conf_level`,
#'     `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @seealso [survival_duration_subgroups]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c(
#'   "ARM" = adtte_labels[["ARM"]],
#'   "SEX" = adtte_labels[["SEX"]],
#'   "AVALU" = adtte_labels[["AVALU"]],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' df_grouped <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @export
extract_survival_subgroups <- function(variables,
                                       data,
                                       groups_lists = list(),
                                       control = control_coxph(),
                                       label_all = "All Patients") {
  df_survtime <- h_survtime_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    label_all = label_all
  )
  df_hr <- h_coxph_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    control = control,
    label_all = label_all
  )

  list(survtime = df_survtime, hr = df_hr)
}

#' @describeIn survival_duration_subgroups  Formatted analysis function which is used as
#'   `afun` in `tabulate_survival_subgroups()`.
#'
#' @return
#' * `a_survival_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_survival_subgroups
#' \dontrun{
#' a_survival_subgroups(.formats = list("n" = "xx", "median" = "xx.xx"))
#' }
#'
#' @keywords internal
a_survival_subgroups <- function(.formats = list(
                                   n = "xx",
                                   n_events = "xx",
                                   n_tot_events = "xx",
                                   median = "xx.x",
                                   n_tot = "xx",
                                   hr = list(format_extreme_values(2L)),
                                   ci = list(format_extreme_values_ci(2L)),
                                   pval = "x.xxxx | (<0.0001)"
                                 )) {
  checkmate::assert_list(.formats)
  checkmate::assert_subset(
    names(.formats),
    c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
  )

  afun_lst <- Map(
    function(stat, fmt) {
      if (stat == "ci") {
        function(df, labelstr = "", ...) {
          in_rows(
            .list = combine_vectors(df$lcl, df$ucl),
            .labels = as.character(df$subgroup),
            .formats = fmt
          )
        }
      } else {
        function(df, labelstr = "", ...) {
          in_rows(
            .list = as.list(df[[stat]]),
            .labels = as.character(df$subgroup),
            .formats = fmt
          )
        }
      }
    },
    stat = names(.formats),
    fmt = .formats
  )

  afun_lst
}

#' @describeIn survival_duration_subgroups Table-creating function which creates a table
#'   summarizing survival by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
#'   and [rtables::summarize_row_groups()].
#'
#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
#'   created using [extract_survival_subgroups()].
#' @param vars (`character`)\cr the name of statistics to be reported among:
#'   * `n_tot_events`: Total number of events per group.
#'   * `n_events`: Number of events per group.
#'   * `n_tot`: Total number of observations per group.
#'   * `n`: Number of observations per group.
#'   * `median`: Median survival time.
#'   * `hr`: Hazard ratio.
#'   * `ci`: Confidence interval of hazard ratio.
#'   * `pval`: p-value of the effect.
#'   Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci`
#'   are required.
#'
#' @return An `rtables` table summarizing survival by subgroup.
#'
#' @examples
#' ## Table with default columns.
#' basic_table() %>%
#'   tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
#'
#' ## Table with a manually chosen set of columns: adding "pval".
#' basic_table() %>%
#'   tabulate_survival_subgroups(
#'     df = df,
#'     vars = c("n_tot_events", "n_events", "median", "hr", "ci", "pval"),
#'     time_unit = adtte_f$AVALU[1]
#'   )
#'
#' @export
tabulate_survival_subgroups <- function(lyt,
                                        df,
                                        vars = c("n_tot_events", "n_events", "median", "hr", "ci"),
                                        time_unit = NULL) {
  conf_level <- df$hr$conf_level[1]
  method <- df$hr$pval_label[1]

  afun_lst <- a_survival_subgroups()
  colvars <- d_survival_subgroups_colvars(
    vars,
    conf_level = conf_level,
    method = method,
    time_unit = time_unit
  )

  colvars_survtime <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n", "n_events", "median")],
    labels = colvars$labels[names(colvars$labels) %in% c("n", "n_events", "median")]
  )
  colvars_hr <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")],
    labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")]
  )

  # Columns from table_survtime are optional.
  if (length(colvars_survtime$vars) > 0) {
    lyt_survtime <- split_cols_by(lyt = lyt, var = "arm")
    lyt_survtime <- split_rows_by(
      lyt = lyt_survtime,
      var = "row_type",
      split_fun = keep_split_levels("content"),
      nested = FALSE
    )
    lyt_survtime <- summarize_row_groups(
      lyt = lyt_survtime,
      var = "var_label",
      cfun = afun_lst[names(colvars_survtime$labels)]
    )
    lyt_survtime <- split_cols_by_multivar(
      lyt = lyt_survtime,
      vars = colvars_survtime$vars,
      varlabels = colvars_survtime$labels
    )

    if ("analysis" %in% df$survtime$row_type) {
      lyt_survtime <- split_rows_by(
        lyt = lyt_survtime,
        var = "row_type",
        split_fun = keep_split_levels("analysis"),
        nested = FALSE,
        child_labels = "hidden"
      )
      lyt_survtime <- split_rows_by(lyt = lyt_survtime, var = "var_label", nested = TRUE)
      lyt_survtime <- analyze_colvars(
        lyt = lyt_survtime,
        afun = afun_lst[names(colvars_survtime$labels)],
        inclNAs = TRUE
      )
    }

    table_survtime <- build_table(lyt_survtime, df = df$survtime)
  } else {
    table_survtime <- NULL
  }

  # Columns "n_tot_events" or "n_tot", and "hr", "ci" in table_hr are required.
  lyt_hr <- split_cols_by(lyt = lyt, var = "arm")
  lyt_hr <- split_rows_by(
    lyt = lyt_hr,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )
  lyt_hr <- summarize_row_groups(
    lyt = lyt_hr,
    var = "var_label",
    cfun = afun_lst[names(colvars_hr$labels)]
  )
  lyt_hr <- split_cols_by_multivar(
    lyt = lyt_hr,
    vars = colvars_hr$vars,
    varlabels = colvars_hr$labels
  ) %>%
    append_topleft("Baseline Risk Factors")

  if ("analysis" %in% df$survtime$row_type) {
    lyt_hr <- split_rows_by(
      lyt = lyt_hr,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )
    lyt_hr <- split_rows_by(lyt = lyt_hr, var = "var_label", nested = TRUE)
    lyt_hr <- analyze_colvars(
      lyt = lyt_hr,
      afun = afun_lst[names(colvars_hr$labels)],
      inclNAs = TRUE
    )
  }
  table_hr <- build_table(lyt_hr, df = df$hr)

  # There can be one or two vars starting with "n_tot".
  n_tot_ids <- grep("^n_tot", colvars_hr$vars)
  if (is.null(table_survtime)) {
    result <- table_hr
    hr_id <- match("hr", colvars_hr$vars)
    ci_id <- match("lcl", colvars_hr$vars)
  } else {
    # Reorder the table.
    result <- cbind_rtables(table_hr[, n_tot_ids], table_survtime, table_hr[, -n_tot_ids])
    # And then calculate column indices accordingly.
    hr_id <- length(n_tot_ids) + ncol(table_survtime) + match("hr", colvars_hr$vars[-n_tot_ids])
    ci_id <- length(n_tot_ids) + ncol(table_survtime) + match("lcl", colvars_hr$vars[-n_tot_ids])
    n_tot_ids <- seq_along(n_tot_ids)
  }

  structure(
    result,
    forest_header = paste0(rev(levels(df$survtime$arm)), "\nBetter"),
    col_x = hr_id,
    col_ci = ci_id,
    # Take the first one for scaling the symbol sizes in graph.
    col_symbol_size = n_tot_ids[1]
  )
}

#' Labels for Column Variables in Survival Duration by Subgroup Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Internal function to check variables included in [tabulate_survival_subgroups()] and create column labels.
#'
#' @inheritParams tabulate_survival_subgroups
#' @inheritParams argument_convention
#' @param method (`character`)\cr p-value method for testing hazard ratio = 1.
#'
#' @return A `list` of variables and their labels to tabulate.
#'
#' @note At least one of `n_tot` and `n_tot_events` must be provided in `vars`.
#'
#' @export
d_survival_subgroups_colvars <- function(vars,
                                         conf_level,
                                         method,
                                         time_unit = NULL) {
  checkmate::assert_character(vars)
  checkmate::assert_string(time_unit, null.ok = TRUE)
  checkmate::assert_subset(c("hr", "ci"), vars)
  checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars))
  checkmate::assert_subset(
    vars,
    c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
  )

  propcase_time_label <- if (!is.null(time_unit)) {
    paste0("Median (", time_unit, ")")
  } else {
    "Median"
  }

  varlabels <- c(
    n = "n",
    n_events = "Events",
    median = propcase_time_label,
    n_tot = "Total n",
    n_tot_events = "Total Events",
    hr = "Hazard Ratio",
    ci = paste0(100 * conf_level, "% Wald CI"),
    pval = method
  )

  colvars <- vars

  # The `lcl` variable is just a placeholder available in the analysis data,
  # it is not acutally used in the tabulation.
  # Variables used in the tabulation are lcl and ucl, see `a_survival_subgroups` for details.
  colvars[colvars == "ci"] <- "lcl"

  list(
    vars = colvars,
    labels = varlabels[vars]
  )
}

#' Summary for analysis of covariance (ANCOVA).
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize results of ANCOVA. This can be used to analyze multiple endpoints and/or
#' multiple timepoints within the same response variable `.var`.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_ancova
NULL

#' Helper Function to Return Results of a Linear Model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
#'     summarized. Specifically, the first level of `arm` variable is taken as the reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as `"X1"`), and/or
#'     interaction terms indicated by `"X1 * X2"`.
#' @param interaction_item (`character`)\cr name of the variable that should have interactions
#'   with arm. if the interaction is not needed, the default option is `NULL`.
#'
#' @return The summary of a linear model.
#'
#' @examples
#' h_ancova(
#'   .var = "Sepal.Length",
#'   .df_row = iris,
#'   variables = list(arm = "Species", covariates = c("Petal.Length * Petal.Width", "Sepal.Width"))
#' )
#'
#' @export
h_ancova <- function(.var,
                     .df_row,
                     variables,
                     interaction_item = NULL) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_subset(names(variables), c("arm", "covariates"))
  assert_df_with_variables(.df_row, list(rsp = .var))

  arm <- variables$arm
  covariates <- variables$covariates
  if (!is.null(covariates) && length(covariates) > 0) {
    # Get all covariate variable names in the model.
    var_list <- get_covariates(covariates)
    assert_df_with_variables(.df_row, var_list)
  }

  covariates_part <- paste(covariates, collapse = " + ")
  if (covariates_part != "") {
    formula <- stats::as.formula(paste0(.var, " ~ ", covariates_part, " + ", arm))
  } else {
    formula <- stats::as.formula(paste0(.var, " ~ ", arm))
  }

  if (is.null(interaction_item)) {
    specs <- arm
  } else {
    specs <- c(arm, interaction_item)
  }

  lm_fit <- stats::lm(
    formula = formula,
    data = .df_row
  )
  emmeans_fit <- emmeans::emmeans(
    lm_fit,
    # Specify here the group variable over which EMM are desired.
    specs = specs,
    # Pass the data again so that the factor levels of the arm variable can be inferred.
    data = .df_row
  )

  emmeans_fit
}

#' @describeIn summarize_ancova Statistics function that produces a named list of results
#'   of the investigated linear model.
#'
#' @inheritParams h_ancova
#' @param interaction_y (`character`)\cr a selected item inside of the interaction_item column which will be used
#'   to select the specific ANCOVA results. if the interaction is not needed, the default option is `FALSE`.
#'
#' @return
#' * `s_ancova()` returns a named list of 5 statistics:
#'   * `n`: Count of complete sample size for the group.
#'   * `lsmean`: Estimated marginal means in the group.
#'   * `lsmean_diff`: Difference in estimated marginal means in comparison to the reference group.
#'     If working with the reference group, this will be empty.
#'   * `lsmean_diff_ci`: Confidence level for difference in estimated marginal means in comparison
#'     to the reference group.
#'   * `pval`: p-value (not adjusted for multiple comparisons).
#'
#' @examples
#' library(dplyr)
#'
#' df <- iris %>% filter(Species == "virginica")
#' .df_row <- iris
#' .var <- "Petal.Length"
#' variables <- list(arm = "Species", covariates = "Sepal.Length * Sepal.Width")
#' .ref_group <- iris %>% filter(Species == "setosa")
#' conf_level <- 0.95
#'
#' # Internal function - s_ancova
#' \dontrun{
#' s_ancova(
#'   df, .var, .df_row, variables, .ref_group,
#'   .in_ref_col = FALSE,
#'   conf_level, interaction_y = FALSE, interaction_item = NULL
#' )
#' }
#'
#' @keywords internal
s_ancova <- function(df,
                     .var,
                     .df_row,
                     variables,
                     .ref_group,
                     .in_ref_col,
                     conf_level,
                     interaction_y = FALSE,
                     interaction_item = NULL) {
  emmeans_fit <- h_ancova(.var = .var, variables = variables, .df_row = .df_row, interaction_item = interaction_item)

  sum_fit <- summary(
    emmeans_fit,
    level = conf_level
  )

  arm <- variables$arm

  sum_level <- as.character(unique(df[[arm]]))

  # Ensure that there is only one element in sum_level.
  checkmate::assert_scalar(sum_level)

  sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ]

  # Get the index of the ref arm
  if (interaction_y != FALSE) {
    y <- unlist(df[(df[[interaction_item]] == interaction_y), .var])
    # convert characters selected in interaction_y into the numeric order
    interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y)
    sum_fit_level <- sum_fit_level[interaction_y, ]
    # if interaction is called, reset the index
    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
    ref_key <- tail(ref_key, n = 1)
    ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key
  } else {
    y <- df[[.var]]
    # Get the index of the ref arm when interaction is not called
    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
    ref_key <- tail(ref_key, n = 1)
  }

  if (.in_ref_col) {
    list(
      n = length(y[!is.na(y)]),
      lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
      lsmean_diff = formatters::with_label(character(), "Difference in Adjusted Means"),
      lsmean_diff_ci = formatters::with_label(character(), f_conf_level(conf_level)),
      pval = formatters::with_label(character(), "p-value")
    )
  } else {
    # Estimate the differences between the marginal means.
    emmeans_contrasts <- emmeans::contrast(
      emmeans_fit,
      # Compare all arms versus the control arm.
      method = "trt.vs.ctrl",
      # Take the arm factor from .ref_group as the control arm.
      ref = ref_key,
      level = conf_level
    )
    sum_contrasts <- summary(
      emmeans_contrasts,
      # Derive confidence intervals, t-tests and p-values.
      infer = TRUE,
      # Do not adjust the p-values for multiplicity.
      adjust = "none"
    )

    sum_contrasts_level <- sum_contrasts[grepl(sum_level, sum_contrasts$contrast), ]
    if (interaction_y != FALSE) {
      sum_contrasts_level <- sum_contrasts_level[interaction_y, ]
    }

    list(
      n = length(y[!is.na(y)]),
      lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
      lsmean_diff = formatters::with_label(sum_contrasts_level$estimate, "Difference in Adjusted Means"),
      lsmean_diff_ci = formatters::with_label(
        c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL),
        f_conf_level(conf_level)
      ),
      pval = formatters::with_label(sum_contrasts_level$p.value, "p-value")
    )
  }
}

#' @describeIn summarize_ancova Formatted analysis function which is used as `afun` in `summarize_ancova()`.
#'
#' @return
#' * `a_ancova()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_ancova
#' \dontrun{
#' a_ancova(
#'   df, .var, .df_row, variables, .ref_group,
#'   .in_ref_col = FALSE,
#'   interaction_y = FALSE, interaction_item = NULL, conf_level
#' )
#' }
#'
#' @keywords internal
a_ancova <- make_afun(
  s_ancova,
  .indent_mods = c("n" = 0L, "lsmean" = 0L, "lsmean_diff" = 0L, "lsmean_diff_ci" = 1L, "pval" = 1L),
  .formats = c(
    "n" = "xx",
    "lsmean" = "xx.xx",
    "lsmean_diff" = "xx.xx",
    "lsmean_diff_ci" = "(xx.xx, xx.xx)",
    "pval" = "x.xxxx | (<0.0001)"
  ),
  .null_ref_cells = FALSE
)

#' @describeIn summarize_ancova Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_ancova()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_ancova()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("Species", ref_group = "setosa") %>%
#'   add_colcounts() %>%
#'   summarize_ancova(
#'     vars = "Petal.Length",
#'     variables = list(arm = "Species", covariates = NULL),
#'     table_names = "unadj",
#'     conf_level = 0.95, var_labels = "Unadjusted comparison",
#'     .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means")
#'   ) %>%
#'   summarize_ancova(
#'     vars = "Petal.Length",
#'     variables = list(arm = "Species", covariates = c("Sepal.Length", "Sepal.Width")),
#'     table_names = "adj",
#'     conf_level = 0.95, var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)"
#'   ) %>%
#'   build_table(iris)
#'
#' @export
summarize_ancova <- function(lyt,
                             vars,
                             var_labels,
                             ...,
                             show_labels = "visible",
                             table_names = vars,
                             .stats = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL,
                             interaction_y = FALSE,
                             interaction_item = NULL) {
  afun <- make_afun(
    a_ancova,
    interaction_y = interaction_y,
    interaction_item = interaction_item,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Cumulative Counts with Thresholds
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize cumulative counts of a (`numeric`) vector that is less than, less or equal to,
#' greater than, or greater or equal to user-specific thresholds.
#'
#' @inheritParams h_count_cumulative
#' @inheritParams argument_convention
#'
#' @seealso Relevant helper function [h_count_cumulative()], and descriptive function [d_count_cumulative()].
#'
#' @name count_cumulative
NULL

#' Helper Function for [s_count_cumulative()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to calculate count and fraction of `x` values in the lower or upper tail given a threshold.
#'
#' @inheritParams argument_convention
#' @param threshold (`number`)\cr a cutoff value as threshold to count values of `x`.
#' @param lower_tail (`logical`)\cr whether to count lower tail, default is `TRUE`.
#' @param include_eq (`logical`)\cr whether to include value equal to the `threshold` in
#'   count, default is `TRUE`.
#' @param .N_col (`count`)\cr denominator for fraction calculation.
#'
#' @return A named vector with items:
#'   * `count`: the count of values less than, less or equal to, greater than, or greater or equal to a threshold
#'     of user specification.
#'   * `fraction`: the fraction of the count.
#'
#' @seealso [count_cumulative]
#'
#' @examples
#' set.seed(1, kind = "Mersenne-Twister")
#' x <- c(sample(1:10, 10), NA)
#' .N_col <- length(x)
#' h_count_cumulative(x, 5, .N_col = .N_col)
#' h_count_cumulative(x, 5, lower_tail = FALSE, include_eq = FALSE, na.rm = FALSE, .N_col = .N_col)
#' h_count_cumulative(x, 0, lower_tail = FALSE, .N_col = .N_col)
#' h_count_cumulative(x, 100, lower_tail = FALSE, .N_col = .N_col)
#'
#' @export
h_count_cumulative <- function(x,
                               threshold,
                               lower_tail = TRUE,
                               include_eq = TRUE,
                               na.rm = TRUE, # nolint
                               .N_col) { # nolint
  checkmate::assert_numeric(x)
  checkmate::assert_numeric(threshold)
  checkmate::assert_numeric(.N_col)
  checkmate::assert_flag(lower_tail)
  checkmate::assert_flag(include_eq)
  checkmate::assert_flag(na.rm)

  is_keep <- if (na.rm) !is.na(x) else rep(TRUE, length(x))
  count <- if (lower_tail && include_eq) {
    length(x[is_keep & x <= threshold])
  } else if (lower_tail && !include_eq) {
    length(x[is_keep & x < threshold])
  } else if (!lower_tail && include_eq) {
    length(x[is_keep & x >= threshold])
  } else if (!lower_tail && !include_eq) {
    length(x[is_keep & x > threshold])
  }

  result <- c(count = count, fraction = count / .N_col)
  result
}

#' Description of Cumulative Count
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function that describes the analysis in [s_count_cumulative()].
#'
#' @inheritParams h_count_cumulative
#'
#' @return Labels for [s_count_cumulative()].
#'
#' @export
d_count_cumulative <- function(threshold, lower_tail, include_eq) {
  checkmate::assert_numeric(threshold)
  lg <- if (lower_tail) "<" else ">"
  eq <- if (include_eq) "=" else ""
  paste0(lg, eq, " ", threshold)
}

#' @describeIn count_cumulative Statistics function that produces a named list given a numeric vector of thresholds.
#'
#' @param thresholds (`numeric`)\cr vector of cutoff value for the counts.
#'
#' @return
#' * `s_count_cumulative()` returns a named list of `count_fraction`s: a list with each `thresholds` value as a
#'   component, each component containing a vector for the count and fraction.
#'
#' @examples
#' # Internal function - s_count_cumulative
#' \dontrun{
#' set.seed(1, kind = "Mersenne-Twister")
#' x <- c(sample(1:10, 10), NA)
#' .N_col <- length(x)
#' s_count_cumulative(x, thresholds = c(0, 5, 11), .N_col = .N_col)
#' s_count_cumulative(x, thresholds = c(0, 5, 11), include_eq = FALSE, na.rm = FALSE, .N_col = .N_col)
#' }
#'
#' @keywords internal
s_count_cumulative <- function(x,
                               thresholds,
                               lower_tail = TRUE,
                               include_eq = TRUE,
                               .N_col, # nolint
                               ...) {
  checkmate::assert_numeric(thresholds, min.len = 1, any.missing = FALSE)

  count_fraction_list <- Map(function(thres) {
    result <- h_count_cumulative(x, thres, lower_tail, include_eq, .N_col = .N_col, ...)
    label <- d_count_cumulative(thres, lower_tail, include_eq)
    formatters::with_label(result, label)
  }, thresholds)

  names(count_fraction_list) <- thresholds
  list(count_fraction = count_fraction_list)
}

#' @describeIn count_cumulative Formatted analysis function which is used as `afun`
#'   in `count_cumulative()`.
#'
#' @return
#' * `a_count_cumulative()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_cumulative
#' \dontrun{
#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `count_fraction` first
#' # so that the `rtables` formatting function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_cumulative, .ungroup_stats = "count_fraction")
#' afun(x, thresholds = c(0, 5, 11), .N_col = .N_col)
#' }
#'
#' @keywords internal
a_count_cumulative <- make_afun(
  s_count_cumulative,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn count_cumulative Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_cumulative()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_cumulative()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_cumulative(
#'     vars = "AGE",
#'     thresholds = c(40, 60)
#'   ) %>%
#'   build_table(tern_ex_adsl)
#'
#' @export
count_cumulative <- function(lyt,
                             vars,
                             var_labels = vars,
                             show_labels = "visible",
                             ...,
                             table_names = vars,
                             .stats = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  afun <- make_afun(
    a_count_cumulative,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    table_names = table_names,
    var_labels = var_labels,
    show_labels = show_labels,
    extra_args = list(...)
  )
}

#' Additional Assertions for `checkmate`
#'
#' Additional assertion functions which can be used together with the `checkmate` package.
#'
#' @inheritParams checkmate::assert_factor
#' @param x (`any`)\cr object to test.
#' @param df (`data.frame`)\cr data set to test.
#' @param variables (named `list` of `character`)\cr list of variables to test.
#' @param include_boundaries (`logical`)\cr whether to include boundaries when testing
#'   for proportions.
#' @param na_level (`character`)\cr the string you have been using to represent NA or
#'   missing data. For `NA` values please consider using directly [is.na()] or
#'   similar approaches.
#' @param (`integer`)\cr minimum number of factor levels. Default is `1`.
#' @param ... a collection of objects to test.
#'
#' @return Nothing if assertion passes, otherwise prints the error message.
#'
#' @name assertions
NULL

check_list_of_variables <- function(x) {
  # drop NULL elements in list
  x <- Filter(Negate(is.null), x)

  res <- checkmate::check_list(x,
    names = "named",
    min.len = 1,
    any.missing = FALSE,
    types = "character"
  )
  # no empty strings allowed
  if (isTRUE(res)) {
    res <- checkmate::check_character(unlist(x), min.chars = 1)
  }
  return(res)
}
#' @describeIn assertions Checks whether `x` is a valid list of variable names.
#'   `NULL` elements of the list `x` are dropped with `Filter(Negate(is.null), x)`.
#'
#' @examples
#' # Check whether `x` is a valid list of variable names.
#'
#' # Internal function - assert_list_of_variables
#' \dontrun{
#' assert_list_of_variables(list(val = "a"))
#' assert_list_of_variables(list(val = c("a", "b")))
#' assert_list_of_variables(list(val = c("a", "b"), val2 = NULL))
#'
#' # The following calls fail
#' assert_list_of_variables(list(1, 2))
#' assert_list_of_variables(list("bla" = 2))
#' }
#'
#' @keywords internal
assert_list_of_variables <- checkmate::makeAssertionFunction(check_list_of_variables)

check_df_with_variables <- function(df, variables, na_level = NULL) {
  checkmate::assert_data_frame(df)
  assert_list_of_variables(variables)

  # flag for equal variables and column names
  err_flag <- all(unlist(variables) %in% colnames(df))
  checkmate::assert_flag(err_flag)

  if (isFALSE(err_flag)) {
    vars <- setdiff(unlist(variables), colnames(df))
    return(paste(
      deparse(substitute(df)),
      "does not contain all specified variables as column names. Missing from dataframe:",
      paste(vars, collapse = ", ")
    ))
  }
  # checking if na_level is present and in which column
  if (!is.null(na_level)) {
    checkmate::assert_string(na_level)
    res <- unlist(lapply(as.list(df)[unlist(variables)], function(x) any(x == na_level)))
    if (any(res)) {
      return(paste0(
        deparse(substitute(df)), " contains explicit na_level (", na_level,
        ") in the following columns: ", paste0(unlist(variables)[res],
          collapse = ", "
        )
      ))
    }
  }
  return(TRUE)
}
#' @describeIn assertions Check whether `df` is a data frame with the analysis `variables`.
#'   Please notice how this produces an error when not all variables are present in the
#'   data.frame while the opposite is not required.
#'
#' @examples
#' # Check whether `df` contains the analysis `variables`.
#'
#' # Internal function - assert_df_with_variables
#' \dontrun{
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3),
#'   variables = list(val = "a")
#' )
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3),
#'   variables = list(val = c("a", "b"))
#' )
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3),
#'   variables = list(val = c("a", "b"))
#' )
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3, e = "<Missing>"),
#'   variables = list(val = c("a", "b")), na_level = "<Missing>"
#' )
#'
#' # The following calls fail
#' assert_df_with_variables(
#'   df = matrix(1:5, ncol = 2, nrow = 3),
#'   variables = list(val = "a")
#' )
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3),
#'   variables = list(val = c("a", "b", "c"))
#' )
#' assert_df_with_variables(
#'   df = data.frame(a = 5, b = 3, e = "<Missing>"),
#'   variables = list(val = c("a", "b", "e")), na_level = "<Missing>"
#' )
#' }
#'
#' @keywords internal
assert_df_with_variables <- checkmate::makeAssertionFunction(check_df_with_variables)

check_valid_factor <- function(x,
                               min.levels = 1, # nolint
                               max.levels = NULL, # nolint
                               null.ok = TRUE, # nolint
                               any.missing = TRUE, # nolint
                               n.levels = NULL, # nolint
                               len = NULL) {
  # checks on levels insertion
  checkmate::assert_int(min.levels, lower = 1)

  # main factor check
  res <- checkmate::check_factor(x,
    min.levels = min.levels,
    null.ok = null.ok,
    max.levels = max.levels,
    any.missing = any.missing,
    n.levels = n.levels
  )

  # no empty strings allowed
  if (isTRUE(res)) {
    res <- checkmate::check_character(levels(x), min.chars = 1)
  }

  return(res)
}
#' @describeIn assertions Check whether `x` is a valid factor (i.e. has levels and no empty
#'   string levels). Note that `NULL` and `NA` elements are allowed.
#'
#' @examples
#' # Check whether `x` is a valid factor.
#'
#' # Internal function - assert_valid_factor
#' \dontrun{
#' assert_valid_factor(factor(c("a", NULL)))
#' assert_valid_factor(factor(c("a", "b")))
#' assert_valid_factor(factor(c("a", "b")), len = 2)
#' assert_valid_factor(factor(c("a", NA)), any.missing = TRUE)
#' assert_valid_factor(factor("A", levels = c("A", "B")))
#'
#' # The following calls fail
#' assert_valid_factor(-1)
#' assert_valid_factor(factor(c("a", "")))
#' assert_valid_factor(factor(c("a", NA)), any.missing = FALSE)
#' assert_valid_factor(factor(NULL))
#' assert_valid_factor(factor(c(NULL, "")))
#' assert_valid_factor(factor())
#' }
#'
#' @keywords internal
assert_valid_factor <- checkmate::makeAssertionFunction(check_valid_factor)


check_df_with_factors <- function(df,
                                  variables,
                                  min.levels = 1, # nolint
                                  max.levels = NULL, # nolint
                                  any.missing = TRUE, # nolint
                                  na_level = NULL) {
  res <- check_df_with_variables(df, variables, na_level)
  # checking if all the columns specified by variables are valid factors
  if (isTRUE(res)) {
    # searching the data.frame with selected columns (variables) as a list
    res <- lapply(
      X = as.list(df)[unlist(variables)],
      FUN = check_valid_factor,
      min.levels = min.levels,
      max.levels = max.levels,
      any.missing = any.missing
    )
    res_lo <- unlist(vapply(res, Negate(isTRUE), logical(1)))
    if (any(res_lo)) {
      return(paste0(
        deparse(substitute(df)), " does not contain only factor variables among:",
        "\n* Column `", paste0(unlist(variables)[res_lo],
          "` of the data.frame -> ", res[res_lo],
          collapse = "\n* "
        )
      ))
    } else {
      res <- TRUE
    }
  }
  return(res)
}
#' @describeIn assertions Check whether `df` is a data frame where the analysis `variables`
#'   are all factors. Note that the creation of `NA` by direct call of `factor()` will
#'   trim `NA` levels out of the vector list itself.
#'
#' @examples
#' # Check whether `df` contains all factor analysis `variables`.
#' adf <- data.frame(a = factor(c("A", "B")), b = 3)
#' bdf <- data.frame(a = factor(letters[1:3]), b = factor(c(1, 2, 3)), d = 3)
#'
#' # Internal function - assert_df_with_factors
#' \dontrun{
#' assert_df_with_factors(df = adf, variables = list(val = "a"))
#' assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 1)
#' assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 2, max.levels = 2)
#' assert_df_with_factors(
#'   df = data.frame(a = factor(c("A", NA, "B")), b = 3),
#'   variable = list(val = "a"),
#'   min.levels = 2,
#'   max.levels = 2
#' )
#'
#' # The following calls fail
#' assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 1, max.levels = 1)
#' assert_df_with_factors(df = adf, variables = list(val = "a"), min.levels = 1, max.levels = 1)
#' assert_df_with_factors(df = adf, variables = list(val = "a", val = "b", val = ""))
#' assert_df_with_factors(df = adf, variables = list(val = "a", val = "b", val = "d"))
#' assert_df_with_factors(
#'   df = bdf,
#'   variables = list(val = "a", val = "b"),
#'   min.levels = 1,
#'   max.levels = 1
#' )
#' }
#'
#' @keywords internal
assert_df_with_factors <- checkmate::makeAssertionFunction(check_df_with_factors)

#' @describeIn assertions Check whether `x` is a proportion: number between 0 and 1.
#'
#' @examples
#' # Check whether `x` is between 0 and 1.
#' # Internal function - assert_proportion_value
#' \dontrun{
#' assert_proportion_value(x = 0, include_boundaries = TRUE)
#' assert_proportion_value(x = 0.3)
#'
#' # These fail
#' assert_proportion_value(x = 1.3)
#' assert_proportion_value(x = 1)
#' }
#'
#' @keywords internal
assert_proportion_value <- function(x, include_boundaries = FALSE) {
  checkmate::assert_number(x, lower = 0, upper = 1)
  checkmate::assert_flag(include_boundaries)
  if (isFALSE(include_boundaries)) {
    checkmate::assert_true(x > 0)
    checkmate::assert_true(x < 1)
  }
}

#' Occurrence Table Sorting
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to score occurrence table subtables and rows which can be used in the
#' sorting of occurrence tables.
#'
#' @name score_occurrences
NULL

#' @describeIn score_occurrences Scoring function which sums the counts across all
#'   columns. It will fail if anything else but counts are used.
#'
#' @inheritParams rtables_access
#'
#' @return
#' * `score_occurrences()` returns the sum of counts across all columns of a table row.
#'
#' @seealso [h_row_first_values()]
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   analyze_num_patients(
#'     vars = "USUBJID",
#'     .stats = c("unique"),
#'     .labels = c("Total number of patients with at least one event")
#'   ) %>%
#'   split_rows_by("AEBODSYS", child_labels = "visible", nested = FALSE) %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = c("unique", "nonunique"),
#'     .labels = c(
#'       "Total number of patients with at least one event",
#'       "Total number of events"
#'     )
#'   ) %>%
#'   count_occurrences(vars = "AEDECOD")
#'
#' tbl <- build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl) %>%
#'   prune_table()
#'
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_occurrences)
#'
#' tbl_sorted
#'
#' @export
score_occurrences <- function(table_row) {
  row_counts <- h_row_counts(table_row)
  sum(row_counts)
}

#' @describeIn score_occurrences Scoring functions can be produced by this constructor to only include
#'   specific columns in the scoring. See [h_row_counts()] for further information.
#'
#' @inheritParams has_count_in_cols
#'
#' @return
#' * `score_occurrences_cols()` returns a function that sums counts across all specified columns
#'   of a table row.
#'
#' @seealso [h_row_counts()]
#'
#' @examples
#' score_cols_a_and_b <- score_occurrences_cols(col_names = c("A: Drug X", "B: Placebo"))
#'
#' # Note that this here just sorts the AEDECOD inside the AEBODSYS. The AEBODSYS are not sorted.
#' # That would require a second pass of `sort_at_path`.
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_cols_a_and_b)
#'
#' tbl_sorted
#'
#' @export
score_occurrences_cols <- function(...) {
  function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    sum(row_counts)
  }
}

#' @describeIn score_occurrences Scoring functions produced by this constructor can be used on
#'   subtables: They sum up all specified column counts in the subtable. This is useful when
#'   there is no available content row summing up these counts.
#'
#' @return
#' * `score_occurrences_subtable()` returns a function that sums counts in each subtable
#'   across all specified columns.
#'
#' @examples
#' score_subtable_all <- score_occurrences_subtable(col_names = names(tbl))
#'
#' # Note that this code just sorts the AEBODSYS, not the AEDECOD within AEBODSYS. That
#' # would require a second pass of `sort_at_path`.
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS"), scorefun = score_subtable_all, decreasing = FALSE)
#'
#' tbl_sorted
#'
#' @export
score_occurrences_subtable <- function(...) {
  score_table_row <- score_occurrences_cols(...)
  function(table_tree) {
    table_rows <- collect_leaves(table_tree)
    counts <- vapply(table_rows, score_table_row, numeric(1))
    sum(counts)
  }
}

#' @describeIn score_occurrences Produce score function for sorting table by summing the first content row in
#'   specified columns. Note that this is extending [rtables::cont_n_onecol()] and [rtables::cont_n_allcols()].
#'
#' @return
#' * `score_occurrences_cont_cols()` returns a function that sums counts in the first content row in
#'   specified columns.
#'
#' @export
score_occurrences_cont_cols <- function(...) {
  score_table_row <- score_occurrences_cols(...)
  function(table_tree) {
    if (inherits(table_tree, "ContentRow")) {
      return(NA)
    }
    content_row <- h_content_first_row(table_tree)
    score_table_row(content_row)
  }
}

#' Convert Table into Matrix of Strings
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to use mostly within tests. `with_spaces`parameter allows
#' to test not only for content but also indentation and table structure.
#' `print_txt_to_copy` instead facilitate the testing development by returning a well
#' formatted text that needs only to be copied and pasted in the expected output.
#'
#' @param x `rtables` table.
#' @param with_spaces Should the tested table keep the indentation and other relevant spaces?
#' @param print_txt_to_copy Utility to have a way to copy the input table directly
#'   into the expected variable instead of copying it too manually.
#'
#' @return A `matrix` of `string`s.
#'
#' @export
to_string_matrix <- function(x, with_spaces = FALSE, print_txt_to_copy = FALSE) {
  checkmate::assert_flag(with_spaces)
  checkmate::assert_flag(print_txt_to_copy)

  # Producing the matrix to test
  if (with_spaces) {
    out <- strsplit(toString(matrix_form(x, TRUE)), "\\n")[[1]]
  } else {
    out <- matrix_form(x)$string
  }

  # Printing to console formatted output that needs to be copied in "expected"
  if (print_txt_to_copy) {
    out_tmp <- out
    if (!with_spaces) {
      out_tmp <- apply(out, 1, paste0, collapse = '", "')
    }
    cat(paste0('c(\n  "', paste0(out_tmp, collapse = '",\n  "'), '"\n)'))
  }

  # Return values
  return(out)
}

#' Blank for Missing Input
#'
#' Helper function to use in tabulating model results.
#'
#' @param x (`vector`)\cr input for a cell.
#'
#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise
#'   the unlisted version of `x`.
#'
#' @keywords internal
unlist_and_blank_na <- function(x) {
  unl <- unlist(x)
  if (all(is.na(unl))) {
    character()
  } else {
    unl
  }
}

#' Constructor for Content Functions given Data Frame with Flag Input
#'
#' This can be useful for tabulating model results.
#'
#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the
#'   content function.
#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned.
#' @param format (`string`)\cr `rtables` format to use.
#'
#' @return A content function which gives `df$analysis_var` at the row identified by
#'   `.df_row$flag` in the given format.
#'
#' @keywords internal
cfun_by_flag <- function(analysis_var,
                         flag_var,
                         format = "xx") {
  checkmate::assert_string(analysis_var)
  checkmate::assert_string(flag_var)
  function(df, labelstr) {
    row_index <- which(df[[flag_var]])
    x <- unlist_and_blank_na(df[[analysis_var]][row_index])
    formatters::with_label(
      rcell(x, format = format),
      labelstr
    )
  }
}

#' Content Row Function to Add Row Total to Labels
#'
#' This takes the label of the latest row split level and adds the row total in parentheses.
#'
#' @inheritParams argument_convention
#'
#' @return A `list` containing "row_count" with the row count value and the correct label.
#'
#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because
#'   the former is already split by columns and will refer to the first column of the data only.
#'
#' @keywords internal
c_label_n <- function(df,
                      labelstr,
                      .N_row) { # nolint
  label <- paste0(labelstr, " (N=", .N_row, ")")
  list(row_count = formatters::with_label(c(.N_row, .N_row), label))
}

#' Layout Creating Function to Add Row Total Counts
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This works analogously to [rtables::add_colcounts()] but on the rows. This function
#'  is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#'
#' @return A modified layout where the latest row split labels now have the row-wise
#'   total counts (i.e. without column-based subsetting) attached in parentheses.
#'
#' @note Row count values are contained in these row count rows but are not displayed
#'   so that they are not considered zero rows by default when pruning.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   split_rows_by("RACE", split_fun = drop_split_levels) %>%
#'   add_rowcounts() %>%
#'   analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>%
#'   build_table(DM)
#'
#' @export
add_rowcounts <- function(lyt) {
  c_lbl_n_fun <- make_afun(
    c_label_n,
    .stats = c("row_count"),
    .formats = c(row_count = function(x, ...) "")
  )
  summarize_row_groups(
    lyt,
    cfun = c_lbl_n_fun
  )
}

#' Obtain Column Indices
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to extract column indices from a `VTableTree` for a given
#' vector of column names.
#'
#' @param table_tree (`VTableTree`)\cr table to extract the indices from.
#' @param col_names (`character`)\cr vector of column names.
#'
#' @return A vector of column indices.
#'
#' @export
h_col_indices <- function(table_tree, col_names) {
  checkmate::assert_class(table_tree, "VTableNodeInfo")
  checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE)
  match(col_names, names(attr(col_info(table_tree), "cextra_args")))
}

#' Labels or Names of List Elements
#'
#' Internal helper function for working with nested statistic function results which typically
#' don't have labels but names that we can use.
#'
#' @param x a list
#'
#' @return A `character` vector with the labels or names for the list elements.
#'
#' @keywords internal
labels_or_names <- function(x) {
  checkmate::assert_multi_class(x, c("data.frame", "list"))
  labs <- sapply(x, obj_label)
  nams <- rlang::names2(x)
  label_is_null <- sapply(labs, is.null)
  result <- unlist(ifelse(label_is_null, nams, labs))
  return(result)
}

#' Convert to `rtable`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a new generic function to convert objects to `rtable` tables.
#'
#' @param x the object which should be converted to an `rtable`.
#' @param ... additional arguments for methods.
#'
#' @return An `rtables` table object. Note that the concrete class will depend on the method used.
#'
#' @export
as.rtable <- function(x, ...) { # nolint
  UseMethod("as.rtable", x)
}

#' @describeIn as.rtable method for converting `data.frame` that contain numeric columns to `rtable`.
#'
#' @param format the format which should be used for the columns.
#'
#' @method as.rtable data.frame
#'
#' @examples
#' x <- data.frame(
#'   a = 1:10,
#'   b = rnorm(10)
#' )
#' as.rtable(x)
#'
#' @export
as.rtable.data.frame <- function(x, format = "xx.xx", ...) {
  checkmate::assert_numeric(unlist(x))
  do.call(
    rtable,
    c(
      list(
        header = labels_or_names(x),
        format = format
      ),
      Map(
        function(row, row_name) {
          do.call(
            rrow,
            c(as.list(unname(row)),
              row.name = row_name
            )
          )
        },
        row = as.data.frame(t(x)),
        row_name = rownames(x)
      )
    )
  )
}

#' Split parameters
#'
#' @description `r lifecycle::badge("stable")`
#'
#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant
#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to
#' specific analysis function.
#'
#' @param param (`vector`)\cr the parameter to be split.
#' @param value (`vector`)\cr the value used to split.
#' @param f (`list` of `vectors`)\cr the reference to make the split
#'
#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`.
#'
#' @examples
#' f <- list(
#'   surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
#'   surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
#' )
#'
#' .stats <- c("pt_at_risk", "rate_diff")
#' h_split_param(.stats, .stats, f = f)
#'
#' # $surv
#' # [1] "pt_at_risk"
#' #
#' # $surv_diff
#' # [1] "rate_diff"
#'
#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx")
#' h_split_param(.formats, names(.formats), f = f)
#'
#' # $surv
#' # pt_at_risk event_free_rate
#' # "xx"           "xxx"
#' #
#' # $surv_diff
#' # NULL
#'
#' @export
h_split_param <- function(param,
                          value,
                          f) {
  y <- lapply(f, function(x) param[value %in% x])
  lapply(y, function(x) if (length(x) == 0) NULL else x)
}

#' Get Selected Statistics Names
#'
#' Helper function to be used for creating `afun`.
#'
#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means
#'   in this context that all default statistics should be used.
#' @param all_stats (`character`)\cr all statistics which can be selected here potentially.
#'
#' @return A `character` vector with the selected statistics.
#'
#' @keywords internal
afun_selected_stats <- function(.stats, all_stats) {
  checkmate::assert_character(.stats, null.ok = TRUE)
  checkmate::assert_character(all_stats)
  if (is.null(.stats)) {
    all_stats
  } else {
    intersect(.stats, all_stats)
  }
}

#' Add Variable Labels to Top Left Corner in Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper layout creating function to just append the variable labels of a given variables vector
#' from a given dataset in the top left corner. If a variable label is not found then the
#' variable name itself is used instead. Multiple variable labels are concatenated with slashes.
#'
#' @inheritParams argument_convention
#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`.
#' @param indent (`integer`)\cr non-negative number of nested indent space, default to 0L which means no indent.
#'   1L means two spaces indent, 2L means four spaces indent and so on.
#'
#' @return A modified layout with the new variable label(s) added to the top-left material.
#'
#' @note This is not an optimal implementation of course, since we are using here the data set
#'   itself during the layout creation. When we have a more mature `rtables` implementation then
#'   this will also be improved or not necessary anymore.
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   split_rows_by("SEX") %>%
#'   append_varlabels(DM, "SEX") %>%
#'   analyze("AGE", afun = mean) %>%
#'   append_varlabels(DM, "AGE", indent = 1)
#' build_table(lyt, DM)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("SEX") %>%
#'   analyze("AGE", afun = mean) %>%
#'   append_varlabels(DM, c("SEX", "AGE"))
#' build_table(lyt, DM)
#'
#' @export
append_varlabels <- function(lyt, df, vars, indent = 0L) {
  if (checkmate::test_flag(indent)) {
    warning("indent argument is now accepting integers. Boolean indent will be converted to integers.")
    indent <- as.integer(indent)
  }

  checkmate::assert_data_frame(df)
  checkmate::assert_character(vars)
  checkmate::assert_count(indent)

  lab <- formatters::var_labels(df[vars], fill = TRUE)
  lab <- paste(lab, collapse = " / ")
  space <- paste(rep(" ", indent * 2), collapse = "")
  lab <- paste0(space, lab)

  append_topleft(lyt, lab)
}

#' Helper Functions for Subgroup Treatment Effect Pattern (STEP) Calculations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that are used internally for the STEP calculations.
#'
#' @inheritParams argument_convention
#'
#' @name h_step
#' @include control_step.R
NULL

#' @describeIn h_step creates the windows for STEP, based on the control settings
#'   provided.
#'
#' @param x (`numeric`)\cr biomarker value(s) to use (without `NA`).
#' @param control (named `list`)\cr output from `control_step()`.
#'
#' @return
#' * `h_step_window()` returns a list containing the window-selection matrix `sel`
#'   and the interval information matrix `interval`.
#'
#' @export
h_step_window <- function(x,
                          control = control_step()) {
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  sel <- matrix(FALSE, length(x), control$num_points)
  out <- matrix(0, control$num_points, 3)
  colnames(out) <- paste("Interval", c("Center", "Lower", "Upper"))
  if (control$use_percentile) {
    # Create windows according to percentile cutoffs.
    out <- cbind(out, out)
    colnames(out)[1:3] <- paste("Percentile", c("Center", "Lower", "Upper"))
    xs <- seq(0, 1, length = control$num_points + 2)[-1]
    for (i in seq_len(control$num_points)) {
      out[i, 2:3] <- c(
        max(xs[i] - control$bandwidth, 0),
        min(xs[i] + control$bandwidth, 1)
      )
      out[i, 5:6] <- stats::quantile(x, out[i, 2:3])
      sel[, i] <- x >= out[i, 5] & x <= out[i, 6]
    }
    # Center is the middle point of the percentile window.
    out[, 1] <- xs[-control$num_points - 1]
    out[, 4] <- stats::quantile(x, out[, 1])
  } else {
    # Create windows according to cutoffs.
    m <- c(min(x), max(x))
    xs <- seq(m[1], m[2], length = control$num_points + 2)[-1]
    for (i in seq_len(control$num_points)) {
      out[i, 2:3] <- c(
        max(xs[i] - control$bandwidth, m[1]),
        min(xs[i] + control$bandwidth, m[2])
      )
      sel[, i] <- x >= out[i, 2] & x <= out[i, 3]
    }
    # Center is the same as the point for predicting.
    out[, 1] <- xs[-control$num_points - 1]
  }
  list(sel = sel, interval = out)
}

#' @describeIn h_step calculates the estimated treatment effect estimate
#'   on the linear predictor scale and corresponding standard error from a STEP `model` fitted
#'   on `data` given `variables` specification, for a single biomarker value `x`.
#'   This works for both `coxph` and `glm` models, i.e. for calculating log hazard ratio or log odds
#'   ratio estimates.
#'
#' @param model the regression model object.
#'
#' @return
#' * `h_step_trt_effect()` returns a vector with elements `est` and `se`.
#'
#' @export
h_step_trt_effect <- function(data,
                              model,
                              variables,
                              x) {
  checkmate::assert_multi_class(model, c("coxph", "glm"))
  checkmate::assert_number(x)
  assert_df_with_variables(data, variables)
  checkmate::assert_factor(data[[variables$arm]], n.levels = 2)

  newdata <- data[c(1, 1), ]
  newdata[, variables$biomarker] <- x
  newdata[, variables$arm] <- levels(data[[variables$arm]])
  model_terms <- stats::delete.response(stats::terms(model))
  model_frame <- stats::model.frame(model_terms, data = newdata, xlev = model$xlevels)
  mat <- stats::model.matrix(model_terms, data = model_frame, contrasts.arg = model$contrasts)
  coefs <- stats::coef(model)
  # Note: It is important to use the coef subset from matrix, otherwise intercept and
  # strata are included for coxph() models.
  mat <- mat[, names(coefs)]
  mat_diff <- diff(mat)
  est <- mat_diff %*% coefs
  var <- mat_diff %*% stats::vcov(model) %*% t(mat_diff)
  se <- sqrt(var)
  c(
    est = est,
    se = se
  )
}

#' @describeIn h_step builds the model formula used in survival STEP calculations.
#'
#' @return
#' * `h_step_survival_formula()` returns a model formula.
#'
#' @export
h_step_survival_formula <- function(variables,
                                    control = control_step()) {
  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_list_of_variables(variables[c("arm", "biomarker", "event", "time")])
  form <- paste0("Surv(", variables$time, ", ", variables$event, ") ~ ", variables$arm)
  if (control$degree > 0) {
    form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
  }
  if (!is.null(variables$covariates)) {
    form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
  }
  if (!is.null(variables$strata)) {
    form <- paste0(form, " + strata(", paste0(variables$strata, collapse = ", "), ")")
  }
  stats::as.formula(form)
}

#' @describeIn h_step estimates the model with `formula` built based on
#'   `variables` in `data` for a given `subset` and `control` parameters for the
#'   Cox regression.
#'
#' @param formula (`formula`)\cr the regression model formula.
#' @param subset (`logical`)\cr subset vector.
#'
#' @return
#' * `h_step_survival_est()` returns a matrix of number of observations `n`,
#'   `events`, log hazard ratio estimates `loghr`, standard error `se`,
#'   and Wald confidence interval bounds `ci_lower` and `ci_upper`. One row is
#'   included for each biomarker value in `x`.
#'
#' @export
h_step_survival_est <- function(formula,
                                data,
                                variables,
                                x,
                                subset = rep(TRUE, nrow(data)),
                                control = control_coxph()) {
  checkmate::assert_formula(formula)
  assert_df_with_variables(data, variables)
  checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  # Note: `subset` in `coxph` needs to be an expression referring to `data` variables.
  data$.subset <- subset
  coxph_warnings <- NULL
  tryCatch(
    withCallingHandlers(
      expr = {
        fit <- survival::coxph(
          formula = formula,
          data = data,
          subset = .subset,
          ties = control$ties
        )
      },
      warning = function(w) {
        coxph_warnings <<- c(coxph_warnings, w)
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )
  if (!is.null(coxph_warnings)) {
    warning(paste(
      "Fit warnings occurred, please consider using a simpler model, or",
      "larger `bandwidth`, less `num_points` in `control_step()` settings"
    ))
  }
  # Produce a matrix with one row per `x` and columns `est` and `se`.
  estimates <- t(vapply(
    X = x,
    FUN = h_step_trt_effect,
    FUN.VALUE = c(1, 2),
    data = data,
    model = fit,
    variables = variables
  ))
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  cbind(
    n = fit$n,
    events = fit$nevent,
    loghr = estimates[, "est"],
    se = estimates[, "se"],
    ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
    ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
  )
}

#' @describeIn h_step builds the model formula used in response STEP calculations.
#'
#' @return
#' * `h_step_rsp_formula()` returns a model formula.
#'
#' @export
h_step_rsp_formula <- function(variables,
                               control = c(control_step(), control_logistic())) {
  checkmate::assert_character(variables$covariates, null.ok = TRUE)
  assert_list_of_variables(variables[c("arm", "biomarker", "response")])
  response_definition <- sub(
    pattern = "response",
    replacement = variables$response,
    x = control$response_definition,
    fixed = TRUE
  )
  form <- paste0(response_definition, " ~ ", variables$arm)
  if (control$degree > 0) {
    form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
  }
  if (!is.null(variables$covariates)) {
    form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
  }
  if (!is.null(variables$strata)) {
    strata_arg <- if (length(variables$strata) > 1) {
      paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
    } else {
      variables$strata
    }
    form <- paste0(form, "+ strata(", strata_arg, ")")
  }
  stats::as.formula(form)
}

#' @describeIn h_step estimates the model with `formula` built based on
#'   `variables` in `data` for a given `subset` and `control` parameters for the
#'   logistic regression.
#'
#' @param formula (`formula`)\cr the regression model formula.
#' @param subset (`logical`)\cr subset vector.
#'
#' @return
#' * `h_step_rsp_est()` returns a matrix of number of observations `n`, log odds
#'   ratio estimates `logor`, standard error `se`, and Wald confidence interval bounds
#'   `ci_lower` and `ci_upper`. One row is included for each biomarker value in `x`.
#'
#' @export
h_step_rsp_est <- function(formula,
                           data,
                           variables,
                           x,
                           subset = rep(TRUE, nrow(data)),
                           control = control_logistic()) {
  checkmate::assert_formula(formula)
  assert_df_with_variables(data, variables)
  checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")
  # Note: `subset` in `glm` needs to be an expression referring to `data` variables.
  data$.subset <- subset
  fit_warnings <- NULL
  tryCatch(
    withCallingHandlers(
      expr = {
        fit <- if (is.null(variables$strata)) {
          stats::glm(
            formula = formula,
            data = data,
            subset = .subset,
            family = stats::binomial("logit")
          )
        } else {
          # clogit needs coxph and strata imported
          survival::clogit(
            formula = formula,
            data = data,
            subset = .subset
          )
        }
      },
      warning = function(w) {
        fit_warnings <<- c(fit_warnings, w)
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )
  if (!is.null(fit_warnings)) {
    warning(paste(
      "Fit warnings occurred, please consider using a simpler model, or",
      "larger `bandwidth`, less `num_points` in `control_step()` settings"
    ))
  }
  # Produce a matrix with one row per `x` and columns `est` and `se`.
  estimates <- t(vapply(
    X = x,
    FUN = h_step_trt_effect,
    FUN.VALUE = c(1, 2),
    data = data,
    model = fit,
    variables = variables
  ))
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  cbind(
    n = length(fit$y),
    logor = estimates[, "est"],
    se = estimates[, "se"],
    ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
    ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
  )
}

#' Occurrence Counts
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions for analyzing frequencies and fractions of occurrences for patients with occurrence
#' data. Primary analysis variables are the dictionary terms. All occurrences are counted for total
#' counts. Multiple occurrences within patient at the lowest term level displayed in the table are
#' counted only once.
#'
#' @inheritParams argument_convention
#'
#' @note By default, occurrences which don't appear in a given row split are dropped from the table and
#'   the occurrences in the table are sorted alphabetically per row split. Therefore, the corresponding layout
#'   needs to use `split_fun = drop_split_levels` in the `split_rows_by` calls. Use `drop = FALSE` if you would
#'   like to show all occurrences.
#'
#' @name count_occurrences
NULL

#' @describeIn count_occurrences Statistics function which counts number of patients that report an
#' occurrence.
#'
#' @param denom (`string`)\cr choice of denominator for patient proportions. Can be:
#'   - `N_col`: total number of patients in this column across rows
#'   - `n`: number of patients with any occurrences
#'
#' @return
#' * `s_count_occurrences()` returns a list with:
#'   * `count`: list of counts with one element per occurrence.
#'   * `count_fraction`: list of counts and fractions with one element per occurrence.
#'   * `fraction`: list of numerators and denominators with one element per occurrence.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 1, 2, 4, 4, 4)),
#'   MHDECOD = c("MH1", "MH2", "MH1", "MH1", "MH1", "MH3")
#' )
#'
#' N_per_col <- 4L
#'
#' # Count unique occurrences per subject.
#' s_count_occurrences(
#'   df,
#'   .N_col = N_per_col,
#'   .df_row = df,
#'   .var = "MHDECOD",
#'   id = "USUBJID"
#' )
#'
#' @export
s_count_occurrences <- function(df,
                                denom = c("N_col", "n"),
                                .N_col, # nolint
                                .df_row,
                                drop = TRUE,
                                .var = "MHDECOD",
                                id = "USUBJID") {
  checkmate::assert_flag(drop)
  assert_df_with_variables(df, list(range = .var, id = id))
  checkmate::assert_count(.N_col)
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[id]], classes = c("factor", "character"))
  denom <- match.arg(denom)

  occurrences <- if (drop) {
    # Note that we don't try to preserve original level order here since a) that would required
    # more time to look up in large original levels and b) that would fail for character input variable.
    occurrence_levels <- sort(unique(.df_row[[.var]]))
    if (length(occurrence_levels) == 0) {
      stop(
        "no empty `.df_row` input allowed when `drop = TRUE`,",
        " please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls"
      )
    }
    factor(df[[.var]], levels = occurrence_levels)
  } else {
    df[[.var]]
  }
  ids <- factor(df[[id]])
  dn <- switch(denom,
    n = nlevels(ids),
    N_col = .N_col
  )
  has_occurrence_per_id <- table(occurrences, ids) > 0
  n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
  list(
    count = n_ids_per_occurrence,
    count_fraction = lapply(
      n_ids_per_occurrence,
      function(i, denom) {
        if (i == 0 && denom == 0) {
          c(0, 0)
        } else {
          c(i, i / denom)
        }
      },
      denom = dn
    ),
    fraction = lapply(
      n_ids_per_occurrence,
      function(i, denom) c("num" = i, "denom" = denom),
      denom = dn
    )
  )
}

#' @describeIn count_occurrences Formatted analysis function which is used as `afun`
#'   in `count_occurrences()`.
#'
#' @return
#' * `a_count_occurrences()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_occurrences, .ungroup_stats = c("count", "count_fraction", "fraction"))
#' afun(
#'   df,
#'   .N_col = N_per_col,
#'   .df_row = df,
#'   .var = "MHDECOD",
#'   id = "USUBJID"
#' )
#'
#' @export
a_count_occurrences <- make_afun(
  s_count_occurrences,
  .formats = c(count = "xx", count_fraction = format_count_fraction_fixed_dp, fraction = format_fraction_fixed_dp)
)

#' @describeIn count_occurrences Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_occurrences()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_occurrences()` to the table layout.
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(
#'   USUBJID = as.character(c(
#'     1, 1, 2, 4, 4, 4,
#'     6, 6, 6, 7, 7, 8
#'   )),
#'   MHDECOD = c(
#'     "MH1", "MH2", "MH1", "MH1", "MH1", "MH3",
#'     "MH2", "MH2", "MH3", "MH1", "MH2", "MH4"
#'   ),
#'   ARM = rep(c("A", "B"), each = 6)
#' )
#' df_adsl <- df %>%
#'   select(USUBJID, ARM) %>%
#'   unique()
#'
#' # Create table layout
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences(vars = "MHDECOD", .stats = c("count_fraction"))
#'
#' # Apply table layout to data and produce `rtable` object
#' lyt %>%
#'   build_table(df, alt_counts_df = df_adsl) %>%
#'   prune_table()
#'
#' @export
count_occurrences <- function(lyt,
                              vars,
                              var_labels = vars,
                              show_labels = "hidden",
                              ...,
                              table_names = vars,
                              .stats = "count_fraction",
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  afun <- make_afun(
    a_count_occurrences,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = .stats
  )

  analyze(
    lyt = lyt,
    vars = vars,
    afun = afun,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    extra_args = list(...)
  )
}

#' Count the Number of Patients with a Particular Event
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` denotes the unique patient identifier.
#'
#' @inheritParams argument_convention
#'
#' @seealso [count_patients_with_flags]
#'
#' @name count_patients_with_event
NULL

#' @describeIn count_patients_with_event Statistics function which counts the number of patients for which
#'   the defined event has occurred.
#'
#' @inheritParams summarize_variables
#' @param .var (`character`)\cr name of the column that contains the unique identifier.
#' @param filters (`character`)\cr a character vector specifying the column names and flag variables
#'   to be used for counting the number of unique identifiers satisfying such conditions.
#'   Multiple column names and flags are accepted in this format
#'   `c("column_name1" = "flag1", "column_name2" = "flag2")`.
#'   Note that only equality is being accepted as condition.
#'
#' @return
#' * `s_count_patients_with_event()` returns the count and fraction of unique identifiers with the defined event.
#'
#' @examples
#' library(dplyr)
#'
#' # `s_count_patients_with_event()`
#'
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y")
#' )
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL")
#' )
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
#'   denom = "N_col",
#'   .N_col = 456
#' )
#'
#' @export
s_count_patients_with_event <- function(df,
                                        .var,
                                        filters,
                                        .N_col, # nolint
                                        .N_row, # nolint
                                        denom = c("n", "N_row", "N_col")) {
  col_names <- names(filters)
  filter_values <- filters

  checkmate::assert_subset(col_names, colnames(df))

  temp <- Map(
    function(x, y) which(df[[x]] == y),
    col_names,
    filter_values
  )
  position_satisfy_filters <- Reduce(intersect, temp)
  id_satisfy_filters <- as.character(unique(df[position_satisfy_filters, ][[.var]]))
  result <- s_count_values(
    as.character(unique(df[[.var]])),
    id_satisfy_filters,
    denom = denom,
    .N_col = .N_col,
    .N_row = .N_row
  )
  result
}

#' @describeIn count_patients_with_event Formatted analysis function which is used as `afun`
#'   in `count_patients_with_event()`.
#'
#' @return
#' * `a_count_patients_with_event()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # `a_count_patients_with_event()`
#'
#' a_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y"),
#'   .N_col = 100,
#'   .N_row = 100
#' )
#'
#' @export
a_count_patients_with_event <- make_afun(
  s_count_patients_with_event,
  .formats = c(count_fraction = format_count_fraction_fixed_dp)
)

#' @describeIn count_patients_with_event Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_patients_with_event()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_patients_with_event()` to the table layout.
#'
#' @examples
#' # `count_patients_with_event()`
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_values(
#'     "STUDYID",
#'     values = "AB12345",
#'     .stats = "count",
#'     .labels = c(count = "Total AEs")
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y"),
#'     .labels = c(count_fraction = "Total number of patients with at least one adverse event"),
#'     table_names = "tbl_all"
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
#'     .labels = c(count_fraction = "Total number of patients with fatal AEs"),
#'     table_names = "tbl_fatal"
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL", "AEREL" = "Y"),
#'     .labels = c(count_fraction = "Total number of patients with related fatal AEs"),
#'     .indent_mods = c(count_fraction = 2L),
#'     table_names = "tbl_rel_fatal"
#'   )
#' build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_patients_with_event <- function(lyt,
                                      vars,
                                      ...,
                                      table_names = vars,
                                      .stats = "count_fraction",
                                      .formats = NULL,
                                      .labels = NULL,
                                      .indent_mods = NULL) {
  afun <- make_afun(
    a_count_patients_with_event,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
    table_names = table_names
  )
}

#' Survival Time Point Analysis
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize patients' survival rate and difference of survival rates between groups at a time point.
#'
#' @inheritParams argument_convention
#' @inheritParams s_surv_time
#' @param time_point (`number`)\cr survival time point of interest.
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_surv_timepoint()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
#'     see more in [survival::survfit()]. Note option "none" is no longer supported.
#'   * `time_point` (`number`)\cr survival time point of interest.
#'
#' @name survival_timepoint
NULL

#' @describeIn survival_timepoint Statistics function which analyzes survival rate.
#'
#' @return
#' * `s_surv_timepoint()` returns the statistics:
#'   * `pt_at_risk`: Patients remaining at risk.
#'   * `event_free_rate`: Event-free rate (%).
#'   * `rate_se`: Standard error of event free rate.
#'   * `rate_ci`: Confidence interval for event free rate.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#' df <- adtte_f %>%
#'   filter(ARMCD == "ARM A")
#'
#' # Internal function - s_surv_timepoint
#' \dontrun{
#' s_surv_timepoint(df, .var = "AVAL", time_point = 7, is_event = "is_event")
#' }
#'
#' @keywords internal
s_surv_timepoint <- function(df,
                             .var,
                             time_point,
                             is_event,
                             control = control_surv_timepoint()) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_number(time_point)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = conf_level,
    conf.type = conf_type
  )
  s_srv_fit <- summary(srv_fit, times = time_point, extend = TRUE)
  df_srv_fit <- as.data.frame(s_srv_fit[c("time", "n.risk", "surv", "lower", "upper", "std.err")])
  if (df_srv_fit[["n.risk"]] == 0) {
    pt_at_risk <- event_free_rate <- rate_se <- NA_real_
    rate_ci <- c(NA_real_, NA_real_)
  } else {
    pt_at_risk <- df_srv_fit$n.risk
    event_free_rate <- df_srv_fit$surv
    rate_se <- df_srv_fit$std.err
    rate_ci <- c(df_srv_fit$lower, df_srv_fit$upper)
  }
  list(
    pt_at_risk = formatters::with_label(pt_at_risk, "Patients remaining at risk"),
    event_free_rate = formatters::with_label(event_free_rate * 100, "Event Free Rate (%)"),
    rate_se = formatters::with_label(rate_se * 100, "Standard Error of Event Free Rate"),
    rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level))
  )
}

#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
#'   when `method = "surv"`.
#'
#' @return
#' * `a_surv_timepoint()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_surv_timepoint
#' \dontrun{
#' a_surv_timepoint(df, .var = "AVAL", time_point = 7, is_event = "is_event")
#' }
#'
#' @keywords internal
a_surv_timepoint <- make_afun(
  s_surv_timepoint,
  .indent_mods = c(
    pt_at_risk = 0L,
    event_free_rate = 0L,
    rate_se = 1L,
    rate_ci = 1L
  ),
  .formats = c(
    pt_at_risk = "xx",
    event_free_rate = "xx.xx",
    rate_se = "xx.xx",
    rate_ci = "(xx.xx, xx.xx)"
  )
)

#' @describeIn survival_timepoint Statistics function which analyzes difference between two survival rates.
#'
#' @return
#' * `s_surv_timepoint_diff()` returns the statistics:
#'   * `rate_diff`: Event-free rate difference between two groups.
#'   * `rate_diff_ci`: Confidence interval for the difference.
#'   * `ztest_pval`: p-value to test the difference is 0.
#'
#' @examples
#' df_ref_group <- adtte_f %>%
#'   filter(ARMCD == "ARM B")
#'
#' # Internal function - s_surv_timepoint_diff
#' \dontrun{
#' s_surv_timepoint_diff(df, df_ref_group, .in_ref_col = TRUE, .var = "AVAL", is_event = "is_event")
#' s_surv_timepoint_diff(
#'   df,
#'   df_ref_group,
#'   .in_ref_col = FALSE,
#'   .var = "AVAL",
#'   time_point = 7,
#'   is_event = "is_event"
#' )
#' }
#'
#' @keywords internal
s_surv_timepoint_diff <- function(df,
                                  .var,
                                  .ref_group,
                                  .in_ref_col,
                                  time_point,
                                  control = control_surv_timepoint(),
                                  ...) {
  if (.in_ref_col) {
    return(
      list(
        rate_diff = formatters::with_label("", "Difference in Event Free Rate"),
        rate_diff_ci = formatters::with_label("", f_conf_level(control$conf_level)),
        ztest_pval = formatters::with_label("", "p-value (Z-test)")
      )
    )
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))
  res_per_group <- lapply(split(data, group), function(x) {
    s_surv_timepoint(df = x, .var = .var, time_point = time_point, control = control, ...)
  })

  res_x <- res_per_group[[2]]
  res_ref <- res_per_group[[1]]
  rate_diff <- res_x$event_free_rate - res_ref$event_free_rate
  se_diff <- sqrt(res_x$rate_se^2 + res_ref$rate_se^2)

  qs <- c(-1, 1) * stats::qnorm(1 - (1 - control$conf_level) / 2)
  rate_diff_ci <- rate_diff + qs * se_diff
  ztest_pval <- if (is.na(rate_diff)) {
    NA
  } else {
    2 * (1 - stats::pnorm(abs(rate_diff) / se_diff))
  }
  list(
    rate_diff = formatters::with_label(rate_diff, "Difference in Event Free Rate"),
    rate_diff_ci = formatters::with_label(rate_diff_ci, f_conf_level(control$conf_level)),
    ztest_pval = formatters::with_label(ztest_pval, "p-value (Z-test)")
  )
}

#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
#'   when `method = "surv_diff"`.
#'
#' @return
#' * `a_surv_timepoint_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_surv_timepoint_diff
#' \dontrun{
#' a_surv_timepoint_diff(
#'   df,
#'   df_ref_group,
#'   .in_ref_col = FALSE,
#'   .var = "AVAL",
#'   time_point = 7,
#'   is_event = "is_event"
#' )
#' }
#'
#' @keywords internal
a_surv_timepoint_diff <- make_afun(
  s_surv_timepoint_diff,
  .indent_mods = c(
    rate_diff = 1L,
    rate_diff_ci = 2L,
    ztest_pval = 2L
  ),
  .formats = c(
    rate_diff = "xx.xx",
    rate_diff_ci = "(xx.xx, xx.xx)",
    ztest_pval = "x.xxxx | (<0.0001)"
  )
)

#' @describeIn survival_timepoint Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param method (`string`)\cr either `surv` (survival estimations),
#'   `surv_diff` (difference in survival with the control) or `both`.
#' @param table_names_suffix (`string`)\cr optional suffix for the `table_names` used for the `rtables` to
#'   avoid warnings from duplicate table names.
#'
#' @return
#' * `surv_timepoint()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_surv_timepoint()` and/or `s_surv_timepoint_diff()` to the table layout depending on
#'   the value of `method`.
#'
#' @examples
#' # Survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 7
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' # Difference in survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 9,
#'     method = "surv_diff",
#'     .indent_mods = c("rate_diff" = 0L, "rate_diff_ci" = 2L, "ztest_pval" = 2L)
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' # Survival and difference in survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 9,
#'     method = "both"
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
surv_timepoint <- function(lyt,
                           vars,
                           ...,
                           table_names_suffix = "",
                           var_labels = "Time",
                           show_labels = "visible",
                           method = c("surv", "surv_diff", "both"),
                           .stats = c(
                             "pt_at_risk", "event_free_rate", "rate_ci",
                             "rate_diff", "rate_diff_ci", "ztest_pval"
                           ),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  method <- match.arg(method)
  checkmate::assert_string(table_names_suffix)

  f <- list(
    surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
    surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
  )
  .stats <- h_split_param(.stats, .stats, f = f)
  .formats <- h_split_param(.formats, names(.formats), f = f)
  .labels <- h_split_param(.labels, names(.labels), f = f)
  .indent_mods <- h_split_param(.indent_mods, names(.indent_mods), f = f)

  afun_surv <- make_afun(
    a_surv_timepoint,
    .stats = .stats$surv,
    .formats = .formats$surv,
    .labels = .labels$surv,
    .indent_mods = .indent_mods$surv
  )

  afun_surv_diff <- make_afun(
    a_surv_timepoint_diff,
    .stats = .stats$surv_diff,
    .formats = .formats$surv_diff,
    .labels = .labels$surv_diff,
    .indent_mods = .indent_mods$surv_diff
  )

  time_point <- list(...)$time_point

  for (i in seq_along(time_point)) {
    tpt <- time_point[i]

    if (method %in% c("surv", "both")) {
      lyt <- analyze(
        lyt,
        vars,
        var_labels = paste(tpt, var_labels),
        table_names = paste0("surv_", tpt, table_names_suffix),
        show_labels = show_labels,
        afun = afun_surv,
        extra_args = list(
          is_event = list(...)$is_event,
          control = list(...)$control,
          time_point = tpt
        )
      )
    }

    if (method %in% c("surv_diff", "both")) {
      lyt <- analyze(
        lyt,
        vars,
        var_labels = paste(tpt, var_labels),
        table_names = paste0("surv_diff_", tpt, table_names_suffix),
        show_labels = ifelse(method == "both", "hidden", show_labels),
        afun = afun_surv_diff,
        extra_args = list(
          is_event = list(...)$is_event,
          control = list(...)$control,
          time_point = tpt
        )
      )
    }
  }
  lyt
}

#' Horizontal Waterfall Plot
#'
#' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param height (`numeric``)\cr vector containing values to be plotted as the waterfall bars.
#' @param id (`character`)\cr vector containing IDs to use as the x-axis label for the waterfall bars.
#' @param col (`character`)\cr colors.
#' @param col_var (`factor`, `character` or `NULL`)\cr categorical variable for bar coloring. `NULL` by default.
#' @param xlab (`character`)\cr x label. Default is `"ID"`.
#' @param ylab (`character`)\cr y label. Default is `"Value"`.
#' @param title (`character`)\cr text to be displayed as plot title.
#' @param col_legend_title (`character`)\cr text to be displayed as legend title.
#'
#' @return A `ggplot` waterfall plot.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' g_waterfall(height = c(3, 5, -1), id = letters[1:3])
#'
#' g_waterfall(
#'   height = c(3, 5, -1),
#'   id = letters[1:3],
#'   col_var = letters[1:3]
#' )
#'
#' adsl_f <- tern_ex_adsl %>%
#'   select(USUBJID, STUDYID, ARM, ARMCD, SEX)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "OVRINV") %>%
#'   mutate(pchg = rnorm(n(), 10, 50))
#'
#' adrs_f <- head(adrs_f, 30)
#' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ]
#' head(adrs_f)
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = adrs_f$USUBJID,
#'   col_var = adrs_f$AVALC
#' )
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
#'   col_var = adrs_f$SEX
#' )
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
#'   xlab = "ID",
#'   ylab = "Percentage Change",
#'   title = "Waterfall plot"
#' )
#'
#' @export
g_waterfall <- function(height,
                        id,
                        col_var = NULL,
                        col = getOption("ggplot2.discrete.colour"),
                        xlab = NULL,
                        ylab = NULL,
                        col_legend_title = NULL,
                        title = NULL) {
  if (!is.null(col_var)) {
    check_same_n(height = height, id = id, col_var = col_var)
  } else {
    check_same_n(height = height, id = id)
  }

  checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  xlabel <- deparse(substitute(id))
  ylabel <- deparse(substitute(height))

  col_label <- if (!missing(col_var)) {
    deparse(substitute(col_var))
  }

  xlab <- if (is.null(xlab)) xlabel else xlab
  ylab <- if (is.null(ylab)) ylabel else ylab
  col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title

  plot_data <- data.frame(
    height = height,
    id = as.character(id),
    col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)),
    stringsAsFactors = FALSE
  )

  plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ]

  p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) +
    ggplot2::geom_col() +
    ggplot2::geom_text(
      label = format(plot_data_ord$height, digits = 2),
      vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5)
    ) +
    ggplot2::xlab(xlab) +
    ggplot2::ylab(ylab) +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5))

  if (!is.null(col_var)) {
    p <- p +
      ggplot2::aes(fill = col_var) +
      ggplot2::labs(fill = col_legend_title) +
      ggplot2::theme(
        legend.position = "bottom",
        legend.background = ggplot2::element_blank(),
        legend.title = ggplot2::element_text(face = "bold"),
        legend.box.background = ggplot2::element_rect(colour = "black")
      )
  }

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_fill_manual(values = col)
  }

  if (!is.null(title)) {
    p <- p +
      ggplot2::labs(title = title) +
      ggplot2::theme(plot.title = ggplot2::element_text(face = "bold"))
  }

  p
}

#' Patient Counts for Laboratory Events (Worsen From Baseline) by Highest Grade Post-Baseline
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Patient count and fraction for laboratory events (worsen from baseline) shift table.
#'
#' @inheritParams argument_convention
#'
#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()]
#'
#' @name abnormal_by_worst_grade_worsen
NULL

#' Helper Function to Prepare ADLB with Worst Labs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to prepare a `df` for generate the patient count shift table
#'
#' @param adlb (`data.frame`)\cr `ADLB` dataframe
#' @param worst_flag_low (named `vector`)\cr Worst low post-baseline lab grade flag variable
#' @param worst_flag_high (named `vector`)\cr Worst high post-baseline lab grade flag variable
#' @param direction_var (`string`)\cr Direction variable specifying the direction of the shift table of interest.
#'   Only lab records flagged by `L`, `H` or `B` are included in the shift table.
#'   * `L`: low direction only
#'   * `H`: high direction only
#'   * `B`: both low and high directions
#'
#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the
#'   worst labs specified according to `worst_flag_low` or `worst_flag_high` for the
#'   direction specified according to `direction_var`. For instance, for a lab that is
#'   needed for the low direction only, only records flagged by `worst_flag_low` are
#'   selected. For a lab that is needed for both low and high directions, the worst
#'   low records are selected for the low direction, and the worst high record are selected
#'   for the high direction.
#'
#' @seealso [abnormal_by_worst_grade_worsen]
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' @export
h_adlb_worsen <- function(adlb,
                          worst_flag_low = NULL,
                          worst_flag_high = NULL,
                          direction_var) {
  checkmate::assert_string(direction_var)
  checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H"))
  assert_df_with_variables(adlb, list("Col" = direction_var))

  if (any(unique(adlb[[direction_var]]) == "H")) {
    assert_df_with_variables(adlb, list("High" = names(worst_flag_high)))
  }

  if (any(unique(adlb[[direction_var]]) == "L")) {
    assert_df_with_variables(adlb, list("Low" = names(worst_flag_low)))
  }

  if (any(unique(adlb[[direction_var]]) == "B")) {
    assert_df_with_variables(
      adlb,
      list(
        "Low" = names(worst_flag_low),
        "High" = names(worst_flag_high)
      )
    )
  }

  # extract patients with worst post-baseline lab, either low or high or both
  worst_flag <- c(worst_flag_low, worst_flag_high)
  col_names <- names(worst_flag)
  filter_values <- worst_flag
  temp <- Map(
    function(x, y) which(adlb[[x]] == y),
    col_names,
    filter_values
  )
  position_satisfy_filters <- Reduce(union, temp)

  # select variables of interest
  adlb_f <- adlb[position_satisfy_filters, ]

  # generate subsets for different directionality
  adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ]
  adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ]
  adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ]

  # for labs requiring both high and low, data is duplicated and will be stacked on top of each other
  adlb_f_b_h <- adlb_f_b
  adlb_f_b_l <- adlb_f_b

  # extract data with worst lab
  if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) {
    # change H to High, L to Low
    adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
    adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))

    # change, B to High and Low
    adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
    adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))

    adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
    adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]

    out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l)
  } else if (!is.null(worst_flag_high)) {
    adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
    adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))

    adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]

    out <- rbind(adlb_out_h, adlb_out_b_h)
  } else if (!is.null(worst_flag_low)) {
    adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
    adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))

    adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
    adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]

    out <- rbind(adlb_out_l, adlb_out_b_l)
  }

  # label
  formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE)
  # NA
  out
}

#' Helper Function to Analyze Patients for [s_count_abnormal_lab_worsen_by_baseline()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to count the number of patients and the fraction of patients according to
#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`,
#' and the direction of interest specified in `direction_var`.
#'
#' @inheritParams argument_convention
#' @inheritParams h_adlb_worsen
#' @param baseline_var (`string`)\cr baseline lab grade variable
#'
#' @return `h_worsen_counter()` returns the counts and fraction of patients
#'   whose worst post-baseline lab grades are worse than their baseline grades, for
#'   post-baseline worst grades "1", "2", "3", "4" and "Any".
#'
#' @seealso [abnormal_by_worst_grade_worsen]
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' # `h_worsen_counter`
#' h_worsen_counter(
#'   df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"),
#'   id = "USUBJID",
#'   .var = "ATOXGR",
#'   baseline_var = "BTOXGR",
#'   direction_var = "GRADDR"
#' )
#'
#' @export
h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) {
  checkmate::assert_string(id)
  checkmate::assert_string(.var)
  checkmate::assert_string(baseline_var)
  checkmate::assert_scalar(unique(df[[direction_var]]))
  checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low"))
  assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var)))

  # remove post-baseline missing
  df <- df[df[[.var]] != "<Missing>", ]

  # obtain directionality
  direction <- unique(df[[direction_var]])

  if (direction == "Low") {
    grade <- -1:-4
    worst_grade <- -4
  } else if (direction == "High") {
    grade <- 1:4
    worst_grade <- 4
  }

  if (nrow(df) > 0) {
    by_grade <- lapply(grade, function(i) {
      # filter baseline values that is less than i or <Missing>
      df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ]
      # num: number of patients with post-baseline worst lab equal to i
      num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE]))
      # denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction
      denom <- length(unique(df_temp[[id]]))
      rm(df_temp)
      c(num = num, denom = denom)
    })
  } else {
    by_grade <- lapply(1, function(i) {
      c(num = 0, denom = 0)
    })
  }

  names(by_grade) <- as.character(seq_along(by_grade))

  # baseline grade less 4 or missing
  df_temp <- df[!df[[baseline_var]] %in% worst_grade, ]

  # denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction
  denom <- length(unique(df_temp[, id, drop = TRUE]))

  # condition 1: missing baseline and in the direction of abnormality
  con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade)
  df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ]

  # condition 2: if post-baseline values are present then post-baseline values must be worse than baseline
  if (direction == "Low") {
    con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]])))
  } else {
    con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]])))
  }

  # number of patients satisfy either conditions 1 or 2
  num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE]))

  list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom))))
}

#' @describeIn abnormal_by_worst_grade_worsen Statistics function for patients whose worst post-baseline
#'   lab grades are worse than their baseline grades.
#'
#' @param variables (named `list` of `string`)\cr list of additional analysis variables including:
#'   * `id` (`string`)\cr subject variable name.
#'   * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable.
#'   * `direction_var` (`string`)\cr see `direction_var` for more details.
#'
#' @return
#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst
#'   post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades
#'   "1", "2", "3", "4" and "Any".
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#' # Internal function - s_count_abnormal_lab_worsen_by_baseline
#' \dontrun{
#' # Patients with worsening lab grade for CRP in the direction of low
#' s_count_abnormal_lab_worsen_by_baseline(
#'   df = df %>% filter(ARMCD == "ARM A" & PARAMCD == "CRP"),
#'   .var = "ATOXGR",
#'   variables = list(
#'     id = "USUBJID",
#'     baseline_var = "BTOXGR",
#'     direction_var = "GRADDR"
#'   )
#' )
#' }
#'
#' @keywords internal
s_count_abnormal_lab_worsen_by_baseline <- function(df, # nolint
                                                    .var = "ATOXGR",
                                                    variables = list(
                                                      id = "USUBJID",
                                                      baseline_var = "BTOXGR",
                                                      direction_var = "GRADDR"
                                                    )) {
  checkmate::assert_string(.var)
  checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var"))
  checkmate::assert_string(variables$id)
  checkmate::assert_string(variables$baseline_var)
  checkmate::assert_string(variables$direction_var)
  assert_df_with_variables(df, c(aval = .var, variables[1:3]))
  assert_list_of_variables(variables)

  h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var)
}


#' @describeIn abnormal_by_worst_grade_worsen Formatted analysis function which is used as `afun`
#'   in `count_abnormal_lab_worsen_by_baseline()`.
#'
#' @return
#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with
#'   formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_abnormal_lab_worsen_by_baseline
#' \dontrun{
#' a_count_abnormal_lab_worsen_by_baseline(
#'   df = df %>% filter(ARMCD == "ARM A" & PARAMCD == "CRP"),
#'   .var = "ATOXGR",
#'   variables = list(id = "USUBJID", baseline_var = "BTOXGR", direction_var = "GRADDR")
#' )
#' }
#'
#' @keywords internal
a_count_abnormal_lab_worsen_by_baseline <- make_afun( # nolint
  s_count_abnormal_lab_worsen_by_baseline,
  .formats = c(fraction = format_fraction),
  .ungroup_stats = "fraction"
)

#' @describeIn abnormal_by_worst_grade_worsen Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting
#'   functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted
#'   rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   add_colcounts() %>%
#'   split_rows_by("PARAMCD") %>%
#'   split_rows_by("GRADDR") %>%
#'   count_abnormal_lab_worsen_by_baseline(
#'     var = "ATOXGR",
#'     variables = list(
#'       id = "USUBJID",
#'       baseline_var = "BTOXGR",
#'       direction_var = "GRADDR"
#'     )
#'   ) %>%
#'   append_topleft("Direction of Abnormality") %>%
#'   build_table(df = df, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_abnormal_lab_worsen_by_baseline <- function(lyt, # nolint
                                                  var,
                                                  ...,
                                                  table_names = NULL,
                                                  .stats = NULL,
                                                  .formats = NULL,
                                                  .labels = NULL,
                                                  .indent_mods = NULL) {
  checkmate::assert_string(var)

  afun <- make_afun(
    a_count_abnormal_lab_worsen_by_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    extra_args = list(...),
    show_labels = "hidden"
  )

  lyt
}

#' Control Function for Subgroup Treatment Effect Pattern (STEP) Calculations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for STEP calculations.
#'
#' @param biomarker (`numeric` or `NULL`)\cr optional provision of the numeric biomarker variable, which
#'   could be used to infer `bandwidth`, see below.
#' @param use_percentile (`flag`)\cr if `TRUE`, the running windows are created according to
#'   quantiles rather than actual values, i.e. the bandwidth refers to the percentage of data
#'   covered in each window. Suggest `TRUE` if the biomarker variable is not uniformly
#'   distributed.
#' @param bandwidth (`number` or `NULL`)\cr indicating the bandwidth of each window.
#'   Depending on the argument `use_percentile`, it can be either the length of actual-value
#'   windows on the real biomarker scale, or percentage windows.
#'   If `use_percentile = TRUE`, it should be a number between 0 and 1.
#'   If `NULL`, treat the bandwidth to be infinity, which means only one global model will be fitted.
#'   By default, `0.25` is used for percentage windows and one quarter of the range of the `biomarker`
#'   variable for actual-value windows.
#' @param degree (`count`)\cr the degree of polynomial function of the biomarker as an interaction term
#'   with the treatment arm fitted at each window. If 0 (default), then the biomarker variable
#'   is not included in the model fitted in each biomarker window.
#' @param num_points (`count`)\cr the number of points at which the hazard ratios are estimated. The
#'   smallest number is 2.
#'
#' @return A list of components with the same names as the arguments, except `biomarker` which is
#'   just used to calculate the `bandwidth` in case that actual biomarker windows are requested.
#'
#' @examples
#' # Provide biomarker values and request actual values to be used,
#' # so that bandwidth is chosen from range.
#' control_step(biomarker = 1:10, use_percentile = FALSE)
#'
#' # Use a global model with quadratic biomarker interaction term.
#' control_step(bandwidth = NULL, degree = 2)
#'
#' # Reduce number of points to be used.
#' control_step(num_points = 10)
#'
#' @export
control_step <- function(biomarker = NULL,
                         use_percentile = TRUE,
                         bandwidth,
                         degree = 0L,
                         num_points = 39L) {
  checkmate::assert_numeric(biomarker, null.ok = TRUE)
  checkmate::assert_flag(use_percentile)
  checkmate::assert_int(num_points, lower = 2)
  checkmate::assert_count(degree)

  if (missing(bandwidth)) {
    # Infer bandwidth
    bandwidth <- if (use_percentile) {
      0.25
    } else if (!is.null(biomarker)) {
      diff(range(biomarker, na.rm = TRUE)) / 4
    } else {
      NULL
    }
  } else {
    # Check bandwidth
    if (!is.null(bandwidth)) {
      if (use_percentile) {
        assert_proportion_value(bandwidth)
      } else {
        checkmate::assert_scalar(bandwidth)
        checkmate::assert_true(bandwidth > 0)
      }
    }
  }
  list(
    use_percentile = use_percentile,
    bandwidth = bandwidth,
    degree = as.integer(degree),
    num_points = as.integer(num_points)
  )
}

#' Pairwise CoxPH model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize p-value, HR and CIs from stratified or unstratified CoxPH model.
#'
#' @inheritParams argument_convention
#' @inheritParams s_surv_time
#' @param strat (`character` or `NULL`)\cr variable names indicating stratification factors.
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_coxph()]. Some possible parameter options are:
#'   * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1. Default method is "log-rank" which
#'     comes from [survival::survdiff()], can also be set to "wald" or "likelihood" (from [survival::coxph()]).
#'   * `ties` (`string`)\cr specifying the method for tie handling. Default is "efron",
#'     can also be set to "breslow" or "exact". See more in [survival::coxph()]
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
#'
#' @name survival_coxph_pairwise
NULL

#' @describeIn survival_coxph_pairwise Statistics function which analyzes HR, CIs of HR and p-value of a coxph model.
#'
#' @return
#' * `s_coxph_pairwise()` returns the statistics:
#'   * `pvalue`: p-value to test HR = 1.
#'   * `hr`: Hazard ratio.
#'   * `hr_ci`: Confidence interval for hazard ratio.
#'   * `n_tot`: Total number of observations.
#'   * `n_tot_events`: Total number of events.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' df <- adtte_f %>%
#'   filter(ARMCD == "ARM A")
#' df_ref_group <- adtte_f %>%
#'   filter(ARMCD == "ARM B")
#'
#' # Internal function - s_coxph_pairwise
#' \dontrun{
#' s_coxph_pairwise(df, df_ref_group, .in_ref_col = FALSE, .var = "AVAL", is_event = "is_event")
#' }
#'
#' @keywords internal
s_coxph_pairwise <- function(df,
                             .ref_group,
                             .in_ref_col,
                             .var,
                             is_event,
                             strat = NULL,
                             control = control_coxph()) {
  checkmate::assert_string(.var)
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[is_event]])
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  pval_method <- control$pval_method
  ties <- control$ties
  conf_level <- control$conf_level

  if (.in_ref_col) {
    return(
      list(
        pvalue = formatters::with_label("", paste0("p-value (", pval_method, ")")),
        hr = formatters::with_label("", "Hazard Ratio"),
        hr_ci = formatters::with_label("", f_conf_level(conf_level)),
        n_tot = formatters::with_label("", "Total n"),
        n_tot_events = formatters::with_label("", "Total events")
      )
    )
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))

  df_cox <- data.frame(
    tte = data[[.var]],
    is_event = data[[is_event]],
    arm = group
  )
  if (is.null(strat)) {
    formula_cox <- survival::Surv(tte, is_event) ~ arm
  } else {
    formula_cox <- stats::as.formula(
      paste0(
        "survival::Surv(tte, is_event) ~ arm + strata(",
        paste(strat, collapse = ","),
        ")"
      )
    )
    df_cox <- cbind(df_cox, data[strat])
  }
  cox_fit <- survival::coxph(
    formula = formula_cox,
    data = df_cox,
    ties = ties
  )
  sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE)
  orginal_survdiff <- survival::survdiff(
    formula_cox,
    data = df_cox
  )
  log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1)

  pval <- switch(pval_method,
    "wald" = sum_cox$waldtest["pvalue"],
    "log-rank" = log_rank_pvalue, # pvalue from original log-rank test survival::survdiff()
    "likelihood" = sum_cox$logtest["pvalue"]
  )
  list(
    pvalue = formatters::with_label(unname(pval), paste0("p-value (", pval_method, ")")),
    hr = formatters::with_label(sum_cox$conf.int[1, 1], "Hazard Ratio"),
    hr_ci = formatters::with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)),
    n_tot = formatters::with_label(sum_cox$n, "Total n"),
    n_tot_events = formatters::with_label(sum_cox$nevent, "Total events")
  )
}

#' @describeIn survival_coxph_pairwise Formatted analysis function which is used as `afun` in `coxph_pairwise()`.
#'
#' @return
#' * `a_coxph_pairwise()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_coxph_pairwise
#' \dontrun{
#' a_coxph_pairwise(df, df_ref_group, .in_ref_col = FALSE, .var = "AVAL", is_event = "is_event")
#' }
#'
#' @keywords internal
a_coxph_pairwise <- make_afun(
  s_coxph_pairwise,
  .indent_mods = c(pvalue = 0L, hr = 0L, hr_ci = 1L, n_tot = 0L, n_tot_events = 0L),
  .formats = c(
    pvalue = "x.xxxx | (<0.0001)",
    hr = "xx.xx",
    hr_ci = "(xx.xx, xx.xx)",
    n_tot = "xx.xx",
    n_tot_events = "xx.xx"
  )
)

#' @describeIn survival_coxph_pairwise Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `coxph_pairwise()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_coxph_pairwise()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   coxph_pairwise(
#'     vars = "AVAL",
#'     is_event = "is_event",
#'     var_labels = "Unstratified Analysis"
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   coxph_pairwise(
#'     vars = "AVAL",
#'     is_event = "is_event",
#'     var_labels = "Stratified Analysis",
#'     strat = "SEX",
#'     control = control_coxph(pval_method = "wald")
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
coxph_pairwise <- function(lyt,
                           vars,
                           ...,
                           var_labels = "CoxPH",
                           show_labels = "visible",
                           table_names = vars,
                           .stats = c("pvalue", "hr", "hr_ci"),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  afun <- make_afun(
    a_coxph_pairwise,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Helper Functions for Tabulating Survival Duration by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that tabulate in a data frame statistics such as median survival
#' time and hazard ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_coxph_pairwise
#' @inheritParams survival_duration_subgroups
#' @param arm (`factor`)\cr the treatment group variable.
#'
#' @details Main functionality is to prepare data for use in a layout-creating function.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' @name h_survival_duration_subgroups
NULL

#' @describeIn h_survival_duration_subgroups helper to prepare a data frame of median survival times by arm.
#'
#' @return
#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`.
#'
#' @examples
#' # Extract median survival time for one group.
#' h_survtime_df(
#'   tte = adtte_f$AVAL,
#'   is_event = adtte_f$is_event,
#'   arm = adtte_f$ARM
#' )
#'
#' @export
h_survtime_df <- function(tte, is_event, arm) {
  checkmate::assert_numeric(tte)
  checkmate::assert_logical(is_event, len = length(tte))
  assert_valid_factor(arm, len = length(tte))

  df_tte <- data.frame(
    tte = tte,
    is_event = is_event,
    stringsAsFactors = FALSE
  )

  # Delete NAs
  non_missing_rows <- stats::complete.cases(df_tte)
  df_tte <- df_tte[non_missing_rows, ]
  arm <- arm[non_missing_rows]

  lst_tte <- split(df_tte, arm)
  lst_results <- Map(function(x, arm) {
    if (nrow(x) > 0) {
      s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event")
      median_est <- unname(as.numeric(s_surv$median))
      n_events <- sum(x$is_event)
    } else {
      median_est <- NA
      n_events <- NA
    }

    data.frame(
      arm = arm,
      n = nrow(x),
      n_events = n_events,
      median = median_est,
      stringsAsFactors = FALSE
    )
  }, lst_tte, names(lst_tte))

  df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
  df$arm <- factor(df$arm, levels = levels(arm))
  df
}

#' @describeIn h_survival_duration_subgroups summarizes median survival times by arm and across subgroups
#'    in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
#'    requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
#'    groupings for `subgroups` variables.
#'
#' @return
#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`,
#'   `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Extract median survival time for multiple groups.
#' h_survtime_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Define groupings for BMRKR2 levels.
#' h_survtime_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_survtime_subgroups_df <- function(variables,
                                    data,
                                    groups_lists = list(),
                                    label_all = "All Patients") {
  checkmate::assert_character(variables$tte)
  checkmate::assert_character(variables$is_event)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)

  assert_df_with_variables(data, variables)

  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]])
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
    l_result <- lapply(l_data, function(grp) {
      result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]])
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn h_survival_duration_subgroups helper to prepare a data frame with estimates of
#'   treatment hazard ratio.
#'
#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
#'
#' @return
#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`,
#'   `conf_level`, `pval` and `pval_label`.
#'
#' @examples
#' # Extract hazard ratio for one group.
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM)
#'
#' # Extract hazard ratio for one group with stratification factor.
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1)
#'
#' @export
h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) {
  checkmate::assert_numeric(tte)
  checkmate::assert_logical(is_event, len = length(tte))
  assert_valid_factor(arm, n.levels = 2, len = length(tte))

  df_tte <- data.frame(tte = tte, is_event = is_event)
  strata_vars <- NULL

  if (!is.null(strata_data)) {
    if (is.data.frame(strata_data)) {
      strata_vars <- names(strata_data)
      checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte))
      assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars)))
    } else {
      assert_valid_factor(strata_data, len = nrow(df_tte))
      strata_vars <- "strata_data"
    }
    df_tte[strata_vars] <- strata_data
  }

  l_df <- split(df_tte, arm)

  if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
    # Hazard ratio and CI.
    result <- s_coxph_pairwise(
      df = l_df[[2]],
      .ref_group = l_df[[1]],
      .in_ref_col = FALSE,
      .var = "tte",
      is_event = "is_event",
      strat = strata_vars,
      control = control
    )

    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = unname(as.numeric(result$n_tot)),
      n_tot_events = unname(as.numeric(result$n_tot_events)),
      hr = unname(as.numeric(result$hr)),
      lcl = unname(result$hr_ci[1]),
      ucl = unname(result$hr_ci[2]),
      conf_level = control[["conf_level"]],
      pval = as.numeric(result$pvalue),
      pval_label = obj_label(result$pvalue),
      stringsAsFactors = FALSE
    )
  } else if (
    (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) ||
      (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
  ) {
    df_tte_complete <- df_tte[stats::complete.cases(df_tte), ]
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = nrow(df_tte_complete),
      n_tot_events = sum(df_tte_complete$is_event),
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = control[["conf_level"]],
      pval = NA,
      pval_label = NA,
      stringsAsFactors = FALSE
    )
  } else {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = 0L,
      n_tot_events = 0L,
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = control[["conf_level"]],
      pval = NA,
      pval_label = NA,
      stringsAsFactors = FALSE
    )
  }

  df
}

#' @describeIn h_survival_duration_subgroups summarizes estimates of the treatment hazard ratio
#'   across subgroups in a data frame. `variables` corresponds to the names of variables found in
#'   `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and
#'   optionally `subgroups` and `strat`. `groups_lists` optionally specifies
#'   groupings for `subgroups` variables.
#'
#' @return
#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`,
#'   `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Extract hazard ratio for multiple groups.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Define groupings of BMRKR2 levels.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' # Extract hazard ratio for multiple groups with stratification factors.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2"),
#'     strat = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#'
#' @export
h_coxph_subgroups_df <- function(variables,
                                 data,
                                 groups_lists = list(),
                                 control = control_coxph(),
                                 label_all = "All Patients") {
  checkmate::assert_character(variables$tte)
  checkmate::assert_character(variables$is_event)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_character(variables$strat, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_coxph_df(
    tte = data[[variables$tte]],
    is_event = data[[variables$is_event]],
    arm = data[[variables$arm]],
    strata_data = if (is.null(variables$strat)) NULL else data[variables$strat],
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      result <- h_coxph_df(
        tte = grp$df[[variables$tte]],
        is_event = grp$df[[variables$is_event]],
        arm = grp$df[[variables$arm]],
        strata_data = if (is.null(variables$strat)) NULL else grp$df[variables$strat],
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })

    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Split Dataframe by Subgroups
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Split a dataframe into a non-nested list of subsets.
#'
#' @inheritParams survival_duration_subgroups
#' @param data (`data.frame`)\cr dataset to split.
#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets.
#'   Unused levels not present in `data` are dropped. Note that the order in this vector
#'   determines the order in the downstream table.
#'
#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`).
#'
#' @details Main functionality is to prepare data for use in forest plot layouts.
#'
#' @examples
#' df <- data.frame(
#'   x = c(1:5),
#'   y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
#'   z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
#' )
#' formatters::var_labels(df) <- paste("label for", names(df))
#'
#' h_split_by_subgroups(
#'   data = df,
#'   subgroups = c("y", "z")
#' )
#'
#' h_split_by_subgroups(
#'   data = df,
#'   subgroups = c("y", "z"),
#'   groups_lists = list(
#'     y = list("AB" = c("A", "B"), "C" = "C")
#'   )
#' )
#'
#' @export
h_split_by_subgroups <- function(data,
                                 subgroups,
                                 groups_lists = list()) {
  checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(groups_lists, names = "named")
  checkmate::assert_subset(names(groups_lists), subgroups)
  assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups)))

  data_labels <- unname(formatters::var_labels(data))
  df_subgroups <- data[, subgroups, drop = FALSE]
  subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE)

  l_labels <- Map(function(grp_i, name_i) {
    existing_levels <- levels(droplevels(grp_i))
    grp_levels <- if (name_i %in% names(groups_lists)) {
      # For this variable groupings are defined. We check which groups are contained in the data.
      group_list_i <- groups_lists[[name_i]]
      group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE)
      names(which(group_has_levels))
    } else {
      existing_levels
    }
    df_labels <- data.frame(
      subgroup = grp_levels,
      var = name_i,
      var_label = unname(subgroup_labels[name_i]),
      stringsAsFactors = FALSE # Rationale is that subgroups may not be unique.
    )
  }, df_subgroups, names(df_subgroups))

  # Create a dataframe with one row per subgroup.
  df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE))
  row_label <- paste0(df_labels$var, ".", df_labels$subgroup)
  row_split_var <- factor(row_label, levels = row_label)

  # Create a list of data subsets.
  lapply(split(df_labels, row_split_var), function(row_i) {
    which_row <- if (row_i$var %in% names(groups_lists)) {
      data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]]
    } else {
      data[[row_i$var]] == row_i$subgroup
    }
    df <- data[which_row, ]
    rownames(df) <- NULL
    formatters::var_labels(df) <- data_labels

    list(
      df = df,
      df_labels = data.frame(row_i, row.names = NULL)
    )
  })
}

#' Helper Function to create a new `SMQ` variable in `ADAE` by stacking `SMQ` and/or `CQ` records.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper Function to create a new `SMQ` variable in `ADAE` that consists of all adverse events belonging to
#' selected Standardized/Customized queries. The new dataset will only contain records of the adverse events
#' belonging to any of the selected baskets.
#'
#' @inheritParams argument_convention
#' @param baskets (`character`)\cr variable names of the selected Standardized/Customized queries.
#' @param smq_varlabel (`string`)\cr a label for the new variable created.
#' @param keys (`character`)\cr names of the key variables to be returned along with the new variable created.
#' @param aag_summary (`data.frame`)\cr containing the `SMQ` baskets and the levels of interest for the final `SMQ`
#'   variable. This is useful when there are some levels of interest that are not observed in the `df` dataset.
#'   The two columns of this dataset should be named `basket` and `basket_name`.
#'
#' @return `data.frame` with variables in `keys` taken from `df` and new variable `SMQ` containing
#'   records belonging to the baskets selected via the `baskets` argument.
#'
#' @examples
#' adae <- tern_ex_adae[1:20, ] %>% df_explicit_na()
#' h_stack_by_baskets(df = adae)
#'
#' aag <- data.frame(
#'   NAMVAR = c("CQ01NAM", "CQ02NAM", "SMQ01NAM", "SMQ02NAM"),
#'   REFNAME = c(
#'     "D.2.1.5.3/A.1.1.1.1 AESI", "X.9.9.9.9/Y.8.8.8.8 AESI",
#'     "C.1.1.1.3/B.2.2.3.1 AESI", "C.1.1.1.3/B.3.3.3.3 AESI"
#'   ),
#'   SCOPE = c("", "", "BROAD", "BROAD"),
#'   stringsAsFactors = FALSE
#' )
#'
#' basket_name <- character(nrow(aag))
#' cq_pos <- grep("^(CQ).+NAM$", aag$NAMVAR)
#' smq_pos <- grep("^(SMQ).+NAM$", aag$NAMVAR)
#' basket_name[cq_pos] <- aag$REFNAME[cq_pos]
#' basket_name[smq_pos] <- paste0(
#'   aag$REFNAME[smq_pos], "(", aag$SCOPE[smq_pos], ")"
#' )
#'
#' aag_summary <- data.frame(
#'   basket = aag$NAMVAR,
#'   basket_name = basket_name,
#'   stringsAsFactors = TRUE
#' )
#'
#' result <- h_stack_by_baskets(df = adae, aag_summary = aag_summary)
#' all(levels(aag_summary$basket_name) %in% levels(result$SMQ))
#'
#' h_stack_by_baskets(
#'   df = adae,
#'   aag_summary = NULL,
#'   keys = c("STUDYID", "USUBJID", "AEDECOD", "ARM"),
#'   baskets = "SMQ01NAM"
#' )
#'
#' @export
h_stack_by_baskets <- function(df,
                               baskets = grep("^(SMQ|CQ).+NAM$", names(df), value = TRUE),
                               smq_varlabel = "Standardized MedDRA Query",
                               keys = c("STUDYID", "USUBJID", "ASTDTM", "AEDECOD", "AESEQ"),
                               aag_summary = NULL,
                               na_level = "<Missing>") {
  # Use of df_explicit_na() in case the user has not previously used
  df <- df_explicit_na(df, na_level = na_level)

  smq_nam <- baskets[startsWith(baskets, "SMQ")]
  # SC corresponding to NAM
  smq_sc <- gsub(pattern = "NAM", replacement = "SC", x = smq_nam, fixed = TRUE)
  smq <- stats::setNames(smq_sc, smq_nam)

  checkmate::assert_character(baskets)
  checkmate::assert_string(smq_varlabel)
  checkmate::assert_data_frame(df)
  checkmate::assert_true(all(startsWith(baskets, "SMQ") | startsWith(baskets, "CQ")))
  checkmate::assert_true(all(endsWith(baskets, "NAM")))
  checkmate::assert_subset(baskets, names(df))
  checkmate::assert_subset(keys, names(df))
  checkmate::assert_subset(smq_sc, names(df))
  checkmate::assert_string(na_level)

  if (!is.null(aag_summary)) {
    assert_df_with_variables(
      df = aag_summary,
      variables = list(val = c("basket", "basket_name"))
    )
    # Warning in case there is no match between `aag_summary$basket` and `baskets` argument.
    # Honestly, I think those should completely match. Target baskets should be the same.
    if (length(intersect(baskets, unique(aag_summary$basket))) == 0) {
      warning("There are 0 baskets in common between aag_summary$basket and `baskets` argument.")
    }
  }

  var_labels <- c(formatters::var_labels(df[, keys]), "SMQ" = smq_varlabel)

  # convert `na_level` records from baskets to NA for the later loop and from wide to long steps
  df[, c(baskets, smq_sc)][df[, c(baskets, smq_sc)] == na_level] <- NA

  if (all(is.na(df[, baskets]))) { # in case there is no level for the target baskets
    df_long <- df[-seq_len(nrow(df)), keys] # we just need an empty dataframe keeping all factor levels
  } else {
    # Concatenate SMQxxxNAM with corresponding SMQxxxSC
    df_cnct <- df[, c(keys, baskets[startsWith(baskets, "CQ")])]

    for (nam in names(smq)) {
      sc <- smq[nam] # SMQxxxSC corresponding to SMQxxxNAM
      nam_notna <- !is.na(df[[nam]])
      new_colname <- paste(nam, sc, sep = "_")
      df_cnct[nam_notna, new_colname] <- paste0(df[[nam]], "(", df[[sc]], ")")[nam_notna]
    }

    df_cnct$unique_id <- seq(1, nrow(df_cnct))
    var_cols <- names(df_cnct)[!(names(df_cnct) %in% c(keys, "unique_id"))]
    # have to convert df_cnct from tibble to dataframe
    # as it throws a warning otherwise about rownames.
    # tibble do not support rownames and reshape creates rownames

    df_long <- stats::reshape(
      data = as.data.frame(df_cnct),
      varying = var_cols,
      v.names = "SMQ",
      idvar = names(df_cnct)[names(df_cnct) %in% c(keys, "unique_id")],
      direction = "long",
      new.row.names = seq(prod(length(var_cols), nrow(df_cnct)))
    )

    df_long <- df_long[!is.na(df_long[, "SMQ"]), !(names(df_long) %in% c("time", "unique_id"))]
    df_long$SMQ <- as.factor(df_long$SMQ)
  }

  smq_levels <- setdiff(levels(df_long[["SMQ"]]), na_level)

  if (!is.null(aag_summary)) {
    # A warning in case there is no match between df and aag_summary records
    if (length(intersect(smq_levels, unique(aag_summary$basket_name))) == 0) {
      warning("There are 0 basket levels in common between aag_summary$basket_name and df.")
    }
    df_long[["SMQ"]] <- factor(
      df_long[["SMQ"]],
      levels = sort(
        c(
          smq_levels,
          setdiff(unique(aag_summary$basket_name), smq_levels)
        )
      )
    )
  } else {
    all_na_basket_flag <- vapply(df[, baskets], function(x) {
      all(is.na(x))
    }, FUN.VALUE = logical(1))
    all_na_basket <- baskets[all_na_basket_flag]

    df_long[["SMQ"]] <- factor(
      df_long[["SMQ"]],
      levels = sort(c(smq_levels, all_na_basket))
    )
  }
  formatters::var_labels(df_long) <- var_labels
  tibble::tibble(df_long)
}

#' Tabulate Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate statistics such as response rate and odds ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
#' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a
#'   list, which specifies the new group levels via the names and the
#'   levels that belong to it in the character vectors that are elements of the list.
#' @param label_all (`string`)\cr label for the total population analysis.
#' @param method (`string`)\cr specifies the test used to calculate the p-value for the difference between
#'   two proportions. For options, see [s_test_proportion_diff()]. Default is `NULL` so no test is performed.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. Tables typically used as part of forest plot.
#'
#' @seealso [extract_rsp_subgroups()]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' # Unstratified analysis.
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#' df
#'
#' @name response_subgroups
NULL

#' Prepares Response Data for Population Subgroups in Data Frames
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares response rates and odds ratios for population subgroups in data frames. Simple wrapper
#' for [h_odds_ratio_subgroups_df()] and [h_proportion_subgroups_df()]. Result is a list of two
#' `data.frames`: `prop` and `or`. `variables` corresponds to the names of variables found in `data`,
#' passed as a named `list` and requires elements `rsp`, `arm` and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param label_all (`string`)\cr label for the total population analysis.
#'
#' @return A named list of two elements:
#'   * `prop`: A `data.frame` containing columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, `var`,
#'     `var_label`, and `row_type`.
#'   * `or`: A `data.frame` containing columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`,
#'     `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @seealso [response_subgroups]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' # Unstratified analysis.
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#' df
#'
#' # Stratified analysis.
#' df_strat <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2"), strat = "STRATA1"),
#'   data = adrs_f
#' )
#' df_strat
#'
#' # Grouping of the BMRKR2 levels.
#' df_grouped <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @export
extract_rsp_subgroups <- function(variables,
                                  data,
                                  groups_lists = list(),
                                  conf_level = 0.95,
                                  method = NULL,
                                  label_all = "All Patients") {
  df_prop <- h_proportion_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    label_all = label_all
  )
  df_or <- h_odds_ratio_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    conf_level = conf_level,
    method = method,
    label_all = label_all
  )

  list(prop = df_prop, or = df_or)
}

#' @describeIn response_subgroups Formatted analysis function which is used as `afun` in `tabulate_rsp_subgroups()`.
#'
#' @return
#' * `a_response_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_response_subgroups
#' \dontrun{
#' a_response_subgroups(.formats = list("n" = "xx", "prop" = "xx.xx%"))
#' }
#'
#' @keywords internal
a_response_subgroups <- function(.formats = list(
                                   n = "xx",
                                   n_rsp = "xx",
                                   prop = "xx.x%",
                                   n_tot = "xx",
                                   or = list(format_extreme_values(2L)),
                                   ci = list(format_extreme_values_ci(2L)),
                                   pval = "x.xxxx | (<0.0001)"
                                 )) {
  checkmate::assert_list(.formats)
  checkmate::assert_subset(
    names(.formats),
    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
  )

  afun_lst <- Map(
    function(stat, fmt) {
      if (stat == "ci") {
        function(df, labelstr = "", ...) {
          in_rows(.list = combine_vectors(df$lcl, df$ucl), .labels = as.character(df$subgroup), .formats = fmt)
        }
      } else {
        function(df, labelstr = "", ...) {
          in_rows(.list = as.list(df[[stat]]), .labels = as.character(df$subgroup), .formats = fmt)
        }
      }
    },
    stat = names(.formats),
    fmt = .formats
  )

  afun_lst
}

#' @describeIn response_subgroups Table-creating function which creates a table
#'   summarizing binary response by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
#'   and [rtables::summarize_row_groups()].
#'
#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
#'   created using [extract_rsp_subgroups()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n`: Total number of observations per group.
#'   * `n_rsp`: Number of responders per group.
#'   * `prop`: Proportion of responders.
#'   * `n_tot`: Total number of observations.
#'   * `or`: Odds ratio.
#'   * `ci` : Confidence interval of odds ratio.
#'   * `pval`: p-value of the effect.
#'   Note, the statistics `n_tot`, `or` and `ci` are required.
#'
#' @return An `rtables` table summarizing binary response by subgroup.
#'
#' @examples
#' ## Table with default columns.
#' basic_table() %>%
#'   tabulate_rsp_subgroups(df)
#'
#' ## Table with selected columns.
#' basic_table() %>%
#'   tabulate_rsp_subgroups(
#'     df = df,
#'     vars = c("n_tot", "n", "n_rsp", "prop", "or", "ci")
#'   )
#'
#' @export
tabulate_rsp_subgroups <- function(lyt,
                                   df,
                                   vars = c("n_tot", "n", "prop", "or", "ci")) {
  conf_level <- df$or$conf_level[1]
  method <- if ("pval_label" %in% names(df$or)) {
    df$or$pval_label[1]
  } else {
    NULL
  }

  afun_lst <- a_response_subgroups()
  colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method)

  colvars_prop <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n", "prop", "n_rsp")],
    labels = colvars$labels[names(colvars$labels) %in% c("n", "prop", "n_rsp")]
  )
  colvars_or <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")],
    labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")]
  )

  # Columns from table_prop are optional.
  if (length(colvars_prop$vars) > 0) {
    lyt_prop <- split_cols_by(lyt = lyt, var = "arm")
    lyt_prop <- split_rows_by(
      lyt = lyt_prop,
      var = "row_type",
      split_fun = keep_split_levels("content"),
      nested = FALSE
    )
    lyt_prop <- summarize_row_groups(
      lyt = lyt_prop,
      var = "var_label",
      cfun = afun_lst[names(colvars_prop$labels)]
    )
    lyt_prop <- split_cols_by_multivar(
      lyt = lyt_prop,
      vars = colvars_prop$vars,
      varlabels = colvars_prop$labels
    )

    if ("analysis" %in% df$prop$row_type) {
      lyt_prop <- split_rows_by(
        lyt = lyt_prop,
        var = "row_type",
        split_fun = keep_split_levels("analysis"),
        nested = FALSE,
        child_labels = "hidden"
      )
      lyt_prop <- split_rows_by(lyt = lyt_prop, var = "var_label", nested = TRUE)
      lyt_prop <- analyze_colvars(
        lyt = lyt_prop,
        afun = afun_lst[names(colvars_prop$labels)],
        inclNAs = TRUE
      )
    }

    table_prop <- build_table(lyt_prop, df = df$prop)
  } else {
    table_prop <- NULL
  }

  # Columns "n_tot", "or", "ci" in table_or are required.
  lyt_or <- split_cols_by(lyt = lyt, var = "arm")
  lyt_or <- split_rows_by(
    lyt = lyt_or,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )
  lyt_or <- split_cols_by_multivar(
    lyt = lyt_or,
    vars = colvars_or$vars,
    varlabels = colvars_or$labels
  )
  lyt_or <- summarize_row_groups(
    lyt = lyt_or,
    var = "var_label",
    cfun = afun_lst[names(colvars_or$labels)]
  ) %>%
    append_topleft("Baseline Risk Factors")

  if ("analysis" %in% df$or$row_type) {
    lyt_or <- split_rows_by(
      lyt = lyt_or,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )
    lyt_or <- split_rows_by(lyt = lyt_or, var = "var_label", nested = TRUE)
    lyt_or <- analyze_colvars(
      lyt = lyt_or,
      afun = afun_lst[names(colvars_or$labels)],
      inclNAs = TRUE
    )
  }
  table_or <- build_table(lyt_or, df = df$or)

  n_tot_id <- match("n_tot", colvars_or$vars)
  if (is.null(table_prop)) {
    result <- table_or
    or_id <- match("or", colvars_or$vars)
    ci_id <- match("lcl", colvars_or$vars)
  } else {
    result <- cbind_rtables(table_or[, n_tot_id], table_prop, table_or[, -n_tot_id])
    or_id <- 1L + ncol(table_prop) + match("or", colvars_or$vars[-n_tot_id])
    ci_id <- 1L + ncol(table_prop) + match("lcl", colvars_or$vars[-n_tot_id])
    n_tot_id <- 1L
  }
  structure(
    result,
    forest_header = paste0(levels(df$prop$arm), "\nBetter"),
    col_x = or_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_id
  )
}

#' Labels for Column Variables in Binary Response by Subgroup Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Internal function to check variables included in [tabulate_rsp_subgroups()] and create column labels.
#'
#' @inheritParams argument_convention
#' @inheritParams tabulate_rsp_subgroups
#'
#' @return A `list` of variables to tabulate and their labels.
#'
#' @export
d_rsp_subgroups_colvars <- function(vars,
                                    conf_level = NULL,
                                    method = NULL) {
  checkmate::assert_character(vars)
  checkmate::assert_subset(c("n_tot", "or", "ci"), vars)
  checkmate::assert_subset(
    vars,
    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
  )

  varlabels <- c(
    n = "n",
    n_rsp = "Responders",
    prop = "Response (%)",
    n_tot = "Total n",
    or = "Odds Ratio"
  )
  colvars <- vars

  if ("ci" %in% colvars) {
    checkmate::assert_false(is.null(conf_level))

    varlabels <- c(
      varlabels,
      ci = paste0(100 * conf_level, "% CI")
    )

    # The `lcl`` variable is just a placeholder available in the analysis data,
    # it is not acutally used in the tabulation.
    # Variables used in the tabulation are lcl and ucl, see `a_response_subgroups` for details.
    colvars[colvars == "ci"] <- "lcl"
  }

  if ("pval" %in% colvars) {
    varlabels <- c(
      varlabels,
      pval = method
    )
  }

  list(
    vars = colvars,
    labels = varlabels[vars]
  )
}

#' Formatting Functions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' See below for the list of formatting functions created in `tern` to work with `rtables`.
#'
#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional
#' custom formats can be created via the [`formatters::sprintf_format()`] function.
#'
#' @family formatting functions
#' @name formatting_functions
NULL

#' Formatting Fraction and Percentage
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction together with ratio in percent.
#'
#' @param x (`integer`)\cr with elements `num` and `denom`.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
#'
#' @examples
#' format_fraction(x = c(num = 2L, denom = 3L))
#' format_fraction(x = c(num = 0L, denom = 3L))
#'
#' @family formatting functions
#' @export
format_fraction <- function(x, ...) {
  attr(x, "label") <- NULL

  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])

  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", round(x["num"] / x["denom"] * 100, 1), "%)"
    )
  }

  return(result)
}

#' Formatting Fraction and Percentage with Fixed Single Decimal Place
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction together with ratio in percent with fixed single decimal place.
#' Includes trailing zero in case of whole number percentages to always keep one decimal place.
#'
#' @param x (`integer`)\cr with elements `num` and `denom`.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
#'
#' @examples
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L))
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L))
#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L))
#'
#' @family formatting functions
#' @export
format_fraction_fixed_dp <- function(x, ...) {
  attr(x, "label") <- NULL
  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])

  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)"
    )
  }
  return(result)
}

#' Formatting Count and Fraction
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a count together with fraction with special consideration when count is `0`.
#'
#' @param x (`integer`)\cr vector of length 2, count and fraction.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
#'
#' @examples
#' format_count_fraction(x = c(2, 0.6667))
#' format_count_fraction(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_count_fraction <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] == 0) {
    "0"
  } else {
    paste0(x[1], " (", round(x[2] * 100, 1), "%)")
  }

  return(result)
}

#' Formatting Count and Percentage with Fixed Single Decimal Place
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Formats a count together with fraction with special consideration when count is `0`.
#'
#' @param x (`integer`)\cr vector of length 2, count and fraction.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
#'
#' @examples
#' format_count_fraction_fixed_dp(x = c(2, 0.6667))
#' format_count_fraction_fixed_dp(x = c(2, 0.5))
#' format_count_fraction_fixed_dp(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_count_fraction_fixed_dp <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] == 0) {
    "0"
  } else if (x[2] == 1) {
    sprintf("%d (100%%)", x[1])
  } else {
    sprintf("%d (%.1f%%)", x[1], x[2] * 100)
  }

  return(result)
}

#' Formatting: XX as Formatting Function
#'
#' Translate a string where x and dots are interpreted as number place
#' holders, and others as formatting elements.
#'
#' @param str (`string`)\cr template.
#'
#' @return An `rtables` formatting function.
#'
#' @examples
#' test <- list(c(1.658, 0.5761), c(1e1, 785.6))
#'
#' z <- format_xx("xx (xx.x)")
#' sapply(test, z)
#'
#' z <- format_xx("xx.x - xx.x")
#' sapply(test, z)
#'
#' z <- format_xx("xx.x, incl. xx.x% NE")
#' sapply(test, z)
#'
#' @family formatting functions
#' @export
format_xx <- function(str) {
  # Find position in the string.
  positions <- gregexpr(pattern = "x+\\.x+|x+", text = str, perl = TRUE)
  x_positions <- regmatches(x = str, m = positions)[[1]]

  # Roundings depends on the number of x behind [.].
  roundings <- lapply(
    X = x_positions,
    function(x) {
      y <- strsplit(split = "\\.", x = x)[[1]]
      rounding <- function(x) {
        round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0))
      }
      return(rounding)
    }
  )

  rtable_format <- function(x, output) {
    values <- Map(y = x, fun = roundings, function(y, fun) fun(y))
    regmatches(x = str, m = positions)[[1]] <- values
    return(str)
  }

  return(rtable_format)
}

#' Formatting Fraction with Lower Threshold
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction when the second element of the input `x` is the fraction. It applies
#' a lower threshold, below which it is just stated that the fraction is smaller than that.
#'
#' @param threshold (`proportion`)\cr lower threshold.
#'
#' @return An `rtables` formatting function that takes numeric input `x` where the second
#'   element is the fraction that is formatted. If the fraction is above or equal to the threshold,
#'   then it is displayed in percentage. If it is positive but below the threshold, it returns,
#'   e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned.
#'
#' @examples
#' format_fun <- format_fraction_threshold(0.05)
#' format_fun(x = c(20, 0.1))
#' format_fun(x = c(2, 0.01))
#' format_fun(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_fraction_threshold <- function(threshold) {
  assert_proportion_value(threshold)
  string_below_threshold <- paste0("<", round(threshold * 100))
  function(x, ...) {
    assert_proportion_value(x[2], include_boundaries = TRUE)
    ifelse(
      x[2] > 0.01,
      round(x[2] * 100),
      ifelse(
        x[2] == 0,
        "0",
        string_below_threshold
      )
    )
  }
}

#' Formatting Extreme Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' `rtables` formatting functions that handle extreme values.
#'
#' @param digits (`integer`)\cr number of decimal places to display.
#'
#' @details For each input, apply a format to the specified number of `digits`. If the value is
#'    below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is
#'    above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2.
#'    If it is zero, then returns "0.00".
#'
#' @family formatting functions
#' @name extreme_format
NULL

#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings
#'  used in Formatting Functions. Returns a list with elements `threshold` and `format_string`.
#'
#' @return
#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds,
#'   and `format_string`, with thresholds formatted as strings.
#'
#' @examples
#' h_get_format_threshold(2L)
#'
#' @export
h_get_format_threshold <- function(digits = 2L) {
  checkmate::assert_integerish(digits)

  low_threshold <- 1 / (10 ^ digits) # styler: off
  high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off

  string_below_threshold <- paste0("<", low_threshold)
  string_above_threshold <- paste0(">", high_threshold)

  list(
    "threshold" = c(low = low_threshold, high = high_threshold),
    "format_string" = c(low = string_below_threshold, high = string_above_threshold)
  )
}

#' @describeIn extreme_format Internal helper function to apply a threshold format to a value.
#'   Creates a formatted string to be used in Formatting Functions.
#'
#' @param x (`number`)\cr value to format.
#'
#' @return
#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation
#'   of the given value to the digit threshold, as a formatted string.
#'
#' @examples
#' h_format_threshold(0.001)
#' h_format_threshold(1000)
#'
#' @export
h_format_threshold <- function(x, digits = 2L) {
  if (is.na(x)) {
    return(x)
  }

  checkmate::assert_numeric(x, lower = 0)

  l_fmt <- h_get_format_threshold(digits)

  result <- if (x < l_fmt$threshold["low"] && 0 < x) {
    l_fmt$format_string["low"]
  } else if (x > l_fmt$threshold["high"]) {
    l_fmt$format_string["high"]
  } else {
    sprintf(fmt = paste0("%.", digits, "f"), x)
  }

  unname(result)
}

#' Formatting a Single Extreme Value
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Create Formatting Function for a single extreme value.
#'
#' @inheritParams extreme_format
#'
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value.
#'
#' @examples
#' format_fun <- format_extreme_values(2L)
#' format_fun(x = 0.127)
#' format_fun(x = Inf)
#' format_fun(x = 0)
#' format_fun(x = 0.009)
#'
#' @family formatting functions
#' @export
format_extreme_values <- function(digits = 2L) {
  function(x, ...) {
    checkmate::assert_scalar(x, na.ok = TRUE)

    h_format_threshold(x = x, digits = digits)
  }
}

#' Formatting Extreme Values Part of a Confidence Interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formatting Function for extreme values part of a confidence interval. Values
#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2.
#'
#' @inheritParams extreme_format
#'
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme
#'   values confidence interval.
#'
#' @examples
#' format_fun <- format_extreme_values_ci(2L)
#' format_fun(x = c(0.127, Inf))
#' format_fun(x = c(0, 0.009))
#'
#' @family formatting functions
#' @export
format_extreme_values_ci <- function(digits = 2L) {
  function(x, ...) {
    checkmate::assert_vector(x, len = 2)
    l_result <- h_format_threshold(x = x[1], digits = digits)
    h_result <- h_format_threshold(x = x[2], digits = digits)

    paste0("(", l_result, ", ", h_result, ")")
  }
}

#' Helper Functions for Tabulating Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that tabulate in a data frame statistics such as response rate
#' and odds ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param arm (`factor`)\cr the treatment group variable.
#'
#' @details Main functionality is to prepare data for use in a layout-creating function.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' @name h_response_subgroups
NULL

#' @describeIn h_response_subgroups helper to prepare a data frame of binary responses by arm.
#'
#' @return
#' * `h_proportion_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, and `prop`.
#'
#' @examples
#' h_proportion_df(
#'   c(TRUE, FALSE, FALSE),
#'   arm = factor(c("A", "A", "B"), levels = c("A", "B"))
#' )
#'
#' @export
h_proportion_df <- function(rsp, arm) {
  checkmate::assert_logical(rsp)
  assert_valid_factor(arm, len = length(rsp))
  non_missing_rsp <- !is.na(rsp)
  rsp <- rsp[non_missing_rsp]
  arm <- arm[non_missing_rsp]

  lst_rsp <- split(rsp, arm)
  lst_results <- Map(function(x, arm) {
    if (length(x) > 0) {
      s_prop <- s_proportion(df = x)
      data.frame(
        arm = arm,
        n = length(x),
        n_rsp = unname(s_prop$n_prop[1]),
        prop = unname(s_prop$n_prop[2]),
        stringsAsFactors = FALSE
      )
    } else {
      data.frame(
        arm = arm,
        n = 0L,
        n_rsp = NA,
        prop = NA,
        stringsAsFactors = FALSE
      )
    }
  }, lst_rsp, names(lst_rsp))

  df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
  df$arm <- factor(df$arm, levels = levels(arm))
  df
}

#' @describeIn h_response_subgroups summarizes proportion of binary responses by arm and across subgroups
#'    in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
#'    requires elements `rsp`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
#'    groupings for `subgroups` variables.
#'
#' @return
#' * `h_proportion_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`,
#'   `var`, `var_label`, and `row_type`.
#'
#' @examples
#' h_proportion_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#'
#' # Define groupings for BMRKR2 levels.
#' h_proportion_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_proportion_subgroups_df <- function(variables,
                                      data,
                                      groups_lists = list(),
                                      label_all = "All Patients") {
  checkmate::assert_character(variables$rsp)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_proportion_df(data[[variables$rsp]], data[[variables$arm]])
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      result <- h_proportion_df(grp$df[[variables$rsp]], grp$df[[variables$arm]])
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn h_response_subgroups helper to prepare a data frame with estimates of
#'   the odds ratio between a treatment and a control arm.
#'
#' @inheritParams response_subgroups
#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
#'
#' @return
#' * `h_odds_ratio_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, and
#'   optionally `pval` and `pval_label`.
#'
#' @examples
#' # Unstratatified analysis.
#' h_odds_ratio_df(
#'   c(TRUE, FALSE, FALSE, TRUE),
#'   arm = factor(c("A", "A", "B", "B"), levels = c("A", "B"))
#' )
#'
#' # Include p-value.
#' h_odds_ratio_df(adrs_f$rsp, adrs_f$ARM, method = "chisq")
#'
#' # Stratatified analysis.
#' h_odds_ratio_df(
#'   rsp = adrs_f$rsp,
#'   arm = adrs_f$ARM,
#'   strata_data = adrs_f[, c("STRATA1", "STRATA2")],
#'   method = "cmh"
#' )
#'
#' @export
h_odds_ratio_df <- function(rsp, arm, strata_data = NULL, conf_level = 0.95, method = NULL) {
  assert_valid_factor(arm, n.levels = 2, len = length(rsp))

  df_rsp <- data.frame(
    rsp = rsp,
    arm = arm
  )

  if (!is.null(strata_data)) {
    strata_var <- interaction(strata_data, drop = TRUE)
    strata_name <- "strata"

    assert_valid_factor(strata_var, len = nrow(df_rsp))

    df_rsp[[strata_name]] <- strata_var
  } else {
    strata_name <- NULL
  }

  l_df <- split(df_rsp, arm)

  if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
    # Odds ratio and CI.
    result_odds_ratio <- s_odds_ratio(
      df = l_df[[2]],
      .var = "rsp",
      .ref_group = l_df[[1]],
      .in_ref_col = FALSE,
      .df_row = df_rsp,
      variables = list(arm = "arm", strata = strata_name),
      conf_level = conf_level
    )

    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = unname(result_odds_ratio$n_tot["n_tot"]),
      or = unname(result_odds_ratio$or_ci["est"]),
      lcl = unname(result_odds_ratio$or_ci["lcl"]),
      ucl = unname(result_odds_ratio$or_ci["ucl"]),
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )

    if (!is.null(method)) {
      # Test for difference.
      result_test <- s_test_proportion_diff(
        df = l_df[[2]],
        .var = "rsp",
        .ref_group = l_df[[1]],
        .in_ref_col = FALSE,
        variables = list(strata = strata_name),
        method = method
      )

      df$pval <- as.numeric(result_test$pval)
      df$pval_label <- obj_label(result_test$pval)
    }

    # In those cases cannot go through the model so will obtain n_tot from data.
  } else if (
    (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) ||
      (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
  ) {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = sum(stats::complete.cases(df_rsp)),
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )
    if (!is.null(method)) {
      df$pval <- NA
      df$pval_label <- NA
    }
  } else {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = 0L,
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )

    if (!is.null(method)) {
      df$pval <- NA
      df$pval_label <- NA
    }
  }

  df
}

#' @describeIn h_response_subgroups summarizes estimates of the odds ratio between a treatment and a control
#'   arm across subgroups in a data frame. `variables` corresponds to the names of variables found in
#'   `data`, passed as a named list and requires elements `rsp`, `arm` and optionally `subgroups`
#'   and `strat`. `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @return
#' * `h_odds_ratio_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`,
#'   `conf_level`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Unstratified analysis.
#' h_odds_ratio_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#'
#' # Stratified analysis.
#' h_odds_ratio_subgroups_df(
#'   variables = list(
#'     rsp = "rsp",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2"),
#'     strat = c("STRATA1", "STRATA2")
#'   ),
#'   data = adrs_f
#' )
#'
#' # Define groupings of BMRKR2 levels.
#' h_odds_ratio_subgroups_df(
#'   variables = list(
#'     rsp = "rsp",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_odds_ratio_subgroups_df <- function(variables,
                                      data,
                                      groups_lists = list(),
                                      conf_level = 0.95,
                                      method = NULL,
                                      label_all = "All Patients") {
  checkmate::assert_character(variables$rsp)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_character(variables$strat, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  strata_data <- if (is.null(variables$strat)) {
    NULL
  } else {
    data[, variables$strat, drop = FALSE]
  }

  # Add All Patients.
  result_all <- h_odds_ratio_df(
    rsp = data[[variables$rsp]],
    arm = data[[variables$arm]],
    strata_data = strata_data,
    conf_level = conf_level,
    method = method
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      grp_strata_data <- if (is.null(variables$strat)) {
        NULL
      } else {
        grp$df[, variables$strat, drop = FALSE]
      }

      result <- h_odds_ratio_df(
        rsp = grp$df[[variables$rsp]],
        arm = grp$df[[variables$arm]],
        strata_data = grp_strata_data,
        conf_level = conf_level,
        method = method
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })

    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Create a STEP Graph
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Based on the STEP results, creates a `ggplot` graph showing the estimated HR or OR
#' along the continuous biomarker value subgroups.
#'
#' @param df (`tibble`)\cr result of [tidy.step()].
#' @param use_percentile (`flag`)\cr whether to use percentiles for the x axis or actual
#'   biomarker values.
#' @param est (named `list`)\cr `col` and `lty` settings for estimate line.
#' @param ci_ribbon (named `list` or `NULL`)\cr `fill` and `alpha` settings for the confidence interval
#'   ribbon area, or `NULL` to not plot a CI ribbon.
#' @param col (`character`)\cr colors.
#'
#' @return A `ggplot` STEP graph.
#'
#' @seealso Custom tidy method [tidy.step()].
#'
#' @examples
#' library(nestcolor)
#' library(survival)
#' lung$sex <- factor(lung$sex)
#'
#' # Survival example.
#' vars <- list(
#'   time = "time",
#'   event = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#'
#' step_matrix <- fit_survival_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(control_coxph(), control_step(num_points = 10, degree = 2))
#' )
#' step_data <- broom::tidy(step_matrix)
#'
#' # Default plot.
#' g_step(step_data)
#'
#' # Add the reference 1 horizontal line.
#' library(ggplot2)
#' g_step(step_data) +
#'   ggplot2::geom_hline(ggplot2::aes(yintercept = 1), linetype = 2)
#'
#' # Use actual values instead of percentiles, different color for estimate and no CI,
#' # use log scale for y axis.
#' g_step(
#'   step_data,
#'   use_percentile = FALSE,
#'   est = list(col = "blue", lty = 1),
#'   ci_ribbon = NULL
#' ) + scale_y_log10()
#'
#' # Adding another curve based on additional column.
#' step_data$extra <- exp(step_data$`Percentile Center`)
#' g_step(step_data) +
#'   ggplot2::geom_line(ggplot2::aes(y = extra), linetype = 2, color = "green")
#'
#' # Response example.
#' vars <- list(
#'   response = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#'
#' step_matrix <- fit_rsp_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(
#'     control_logistic(response_definition = "I(response == 2)"),
#'     control_step()
#'   )
#' )
#' step_data <- broom::tidy(step_matrix)
#' g_step(step_data)
#'
#' @export
g_step <- function(df,
                   use_percentile = "Percentile Center" %in% names(df),
                   est = list(col = "blue", lty = 1),
                   ci_ribbon = list(fill = getOption("ggplot2.discrete.colour")[1], alpha = 0.5),
                   col = getOption("ggplot2.discrete.colour")) {
  checkmate::assert_tibble(df)
  checkmate::assert_flag(use_percentile)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_list(est, names = "named")
  checkmate::assert_list(ci_ribbon, names = "named", null.ok = TRUE)

  x_var <- ifelse(use_percentile, "Percentile Center", "Interval Center")
  df$x <- df[[x_var]]
  attrs <- attributes(df)
  df$y <- df[[attrs$estimate]]

  # Set legend names. To be modified also at call level
  legend_names <- c("Estimate", "CI 95%")

  p <- ggplot2::ggplot(df, ggplot2::aes(x = .data[["x"]], y = .data[["y"]]))

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_color_manual(values = col)
  }

  if (!is.null(ci_ribbon)) {
    if (is.null(ci_ribbon$fill)) {
      ci_ribbon$fill <- "lightblue"
    }
    p <- p + ggplot2::geom_ribbon(
      ggplot2::aes(
        ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]],
        fill = legend_names[2]
      ),
      alpha = ci_ribbon$alpha
    ) +
      scale_fill_manual(
        name = "", values = c("CI 95%" = ci_ribbon$fill)
      )
  }
  suppressMessages(p <- p +
    ggplot2::geom_line(
      ggplot2::aes(y = .data[["y"]], color = legend_names[1]),
      linetype = est$lty
    ) +
    scale_colour_manual(
      name = "", values = c("Estimate" = "blue")
    ))

  p <- p + ggplot2::labs(x = attrs$biomarker, y = attrs$estimate)
  if (use_percentile) {
    p <- p + ggplot2::scale_x_continuous(labels = scales::percent)
  }
  p
}

#' Custom Tidy Method for STEP Results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tidy the STEP results into a `tibble` format ready for plotting.
#'
#' @param x (`step` matrix)\cr results from [fit_survival_step()].
#' @param ... not used here.
#'
#' @return A `tibble` with one row per STEP subgroup. The estimates and CIs are on the HR or OR scale,
#'   respectively. Additional attributes carry metadata also used for plotting.
#'
#' @seealso [g_step()] which consumes the result from this function.
#'
#' @method tidy step
#'
#' @examples
#' library(survival)
#' lung$sex <- factor(lung$sex)
#' vars <- list(
#'   time = "time",
#'   event = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#' step_matrix <- fit_survival_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(control_coxph(), control_step(num_points = 10, degree = 2))
#' )
#' broom::tidy(step_matrix)
#'
#' @export
tidy.step <- function(x, ...) { # nolint
  checkmate::assert_class(x, "step")
  dat <- as.data.frame(x)
  nams <- names(dat)
  is_surv <- "loghr" %in% names(dat)
  est_var <- ifelse(is_surv, "loghr", "logor")
  new_est_var <- ifelse(is_surv, "Hazard Ratio", "Odds Ratio")
  new_y_vars <- c(new_est_var, c("ci_lower", "ci_upper"))
  names(dat)[match(est_var, nams)] <- new_est_var
  dat[, new_y_vars] <- exp(dat[, new_y_vars])
  any_is_na <- any(is.na(dat[, new_y_vars]))
  any_is_very_large <- any(abs(dat[, new_y_vars]) > 1e10, na.rm = TRUE)
  if (any_is_na) {
    warning(paste(
      "Missing values in the point estimate or CI columns,",
      "this will lead to holes in the `g_step()` plot"
    ))
  }
  if (any_is_very_large) {
    warning(paste(
      "Very large absolute values in the point estimate or CI columns,",
      "consider adding `scale_y_log10()` to the `g_step()` result for plotting"
    ))
  }
  if (any_is_na || any_is_very_large) {
    warning("Consider using larger `bandwidth`, less `num_points` in `control_step()` settings for fitting")
  }
  structure(
    tibble::as_tibble(dat),
    estimate = new_est_var,
    biomarker = attr(x, "variables")$biomarker,
    ci = f_conf_level(attr(x, "control")$conf_level)
  )
}

#' Summary numeric variables in columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which can be used for creating column-wise summary tables, primarily
#' used for PK data sets. This function is a wrapper for [rtables::analyze_colvars()].
#'
#' @inheritParams argument_convention
#' @inheritParams rtables::analyze_colvars
#'
#' @return
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
#' in columns, and add it to the table layout.
#'
#' @seealso [summarize_vars()], [rtables::analyze_colvars()].
#'
#' @examples
#' library(dplyr)
#'
#' adpp <- tern_ex_adpp %>% h_pkparam_sort()
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "ARM", label_pos = "topleft") %>%
#'   split_rows_by(var = "SEX", label_pos = "topleft") %>%
#'   analyze_vars_in_cols(vars = "AGE")
#' result <- build_table(lyt = lyt, df = adpp)
#' result
#'
#' # By selecting just some statistics and ad-hoc labels
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "ARM", label_pos = "topleft") %>%
#'   split_rows_by(var = "SEX", label_pos = "topleft") %>%
#'   analyze_vars_in_cols(
#'     vars = "AGE",
#'     .stats = c("n", "cv", "geom_mean", "mean_ci", "median", "min", "max"),
#'     .labels = c(
#'       n = "myN",
#'       cv = "myCV",
#'       geom_mean = "myGeomMean",
#'       mean_ci = "Mean (95%CI)",
#'       median = "Median",
#'       min = "Minimum",
#'       max = "Maximum"
#'     )
#'   )
#' result <- build_table(lyt = lyt, df = adpp)
#' result
#'
#' lyt <- basic_table() %>%
#'   analyze_vars_in_cols(
#'     vars = "AGE",
#'     labelstr = "some custom label"
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' # PKPT03
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "TLG_DISPLAY", split_label = "PK Parameter", label_pos = "topleft") %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL",
#'     .stats = c("n", "mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max"),
#'     .labels = c(
#'       n = "n",
#'       mean = "Mean",
#'       sd = "SD",
#'       cv = "CV (%)",
#'       geom_mean = "Geometric Mean",
#'       geom_cv = "CV % Geometric Mean",
#'       median = "Median",
#'       min = "Minimum",
#'       max = "Maximum"
#'     )
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' @export
analyze_vars_in_cols <- function(lyt,
                                 vars,
                                 ...,
                                 .stats = c(
                                   "n",
                                   "mean",
                                   "sd",
                                   "se",
                                   "cv",
                                   "geom_cv"
                                 ),
                                 .labels = c(
                                   n = "n",
                                   mean = "Mean",
                                   sd = "SD",
                                   se = "SE",
                                   cv = "CV (%)",
                                   geom_cv = "CV % Geometric Mean"
                                 ),
                                 labelstr = " ",
                                 nested = TRUE,
                                 na_level = NULL,
                                 .formats = NULL) {
  checkmate::assert_string(na_level, null.ok = TRUE)
  checkmate::assert_string(labelstr)
  checkmate::assert_flag(nested)

  # Automatic assignment of formats
  if (is.null(.formats)) {
    # General values
    sf_numeric <- summary_formats("numeric")
    sf_counts <- summary_formats("counts")[-1]
    formats_v <- c(sf_numeric, sf_counts)
  } else {
    formats_v <- .formats
  }

  afun_list <- Map(
    function(stat) {
      make_afun(
        s_summary,
        .labels = labelstr,
        .stats = stat,
        .format_na_strs = na_level,
        .formats = formats_v[names(formats_v) == stat]
      )
    },
    stat = .stats
  )

  # Check for vars in the case that one or more are used
  if (length(vars) == 1) {
    vars <- rep(vars, length(.stats))
  } else if (length(vars) != length(.stats)) {
    stop(
      "Analyzed variables (vars) does not have the same ",
      "number of elements of specified statistics (.stats)."
    )
  }

  lyt <- split_cols_by_multivar(
    lyt = lyt,
    vars = vars,
    varlabels = .labels[.stats]
  )

  analyze_colvars(lyt,
    afun = afun_list,
    nested = nested,
    extra_args = list(...)
  )
}

#' Helper Function for Deriving Analysis Datasets for LBT13 and LBT14
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function that merges `ADSL` and `ADLB` datasets so that missing lab test records are inserted in the
#' output dataset.
#'
#' @param adsl (`data.frame`)\cr `ADSL` dataframe.
#' @param adlb (`data.frame`)\cr `ADLB` dataframe.
#' @param worst_flag (named `vector`)\cr Worst post-baseline lab flag variable.
#' @param by_visit (`logical`)\cr defaults to `FALSE` to generate worst grade per patient.
#'   If worst grade per patient per visit is specified for `worst_flag`, then
#'   `by_visit` should be `TRUE` to generate worst grade patient per visit.
#' @param no_fillin_visits (named `character`)\cr Visits that are not considered for post-baseline worst toxicity
#'   grade. Defaults to `c("SCREENING", "BASELINE")`.
#'
#' @return `df` containing variables shared between `adlb` and `adsl` along with variables `PARAM`, `PARAMCD`,
#'   `ATOXGR`, and `BTOXGR` relevant for analysis. Optionally, `AVISIT` are `AVISITN` are included when
#'   `by_visit = TRUE` and `no_fillin_visits = c("SCREENING", "BASELINE")`.
#'
#' @details In the result data missing records will be created for the following situations:
#'   * Patients who are present in `adsl` but have no lab data in `adlb` (both baseline and post-baseline).
#'   * Patients who do not have any post-baseline lab values.
#'   * Patients without any post-baseline values flagged as the worst.
#'
#' @examples
#' # `h_adsl_adlb_merge_using_worst_flag`
#' adlb_out <- h_adsl_adlb_merge_using_worst_flag(
#'   tern_ex_adsl,
#'   tern_ex_adlb,
#'   worst_flag = c("WGRHIFL" = "Y")
#' )
#'
#' # `h_adsl_adlb_merge_using_worst_flag` by visit example
#' adlb_out_by_visit <- h_adsl_adlb_merge_using_worst_flag(
#'   tern_ex_adsl,
#'   tern_ex_adlb,
#'   worst_flag = c("WGRLOVFL" = "Y"),
#'   by_visit = TRUE
#' )
#'
#' @export
h_adsl_adlb_merge_using_worst_flag <- function(adsl, # nolint
                                               adlb,
                                               worst_flag = c("WGRHIFL" = "Y"),
                                               by_visit = FALSE,
                                               no_fillin_visits = c("SCREENING", "BASELINE")) {
  col_names <- names(worst_flag)
  filter_values <- worst_flag

  temp <- Map(
    function(x, y) which(adlb[[x]] == y),
    col_names,
    filter_values
  )

  position_satisfy_filters <- Reduce(intersect, temp)

  adsl_adlb_common_columns <- intersect(colnames(adsl), colnames(adlb))
  columns_from_adlb <- c("USUBJID", "PARAM", "PARAMCD", "AVISIT", "AVISITN", "ATOXGR", "BTOXGR")

  adlb_f <- adlb[position_satisfy_filters, ] %>%
    dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits)
  adlb_f <- adlb_f[, columns_from_adlb]

  avisits_grid <- adlb %>%
    dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) %>%
    dplyr::pull(.data[["AVISIT"]]) %>%
    unique()

  if (by_visit) {
    adsl_lb <- expand.grid(
      USUBJID = unique(adsl$USUBJID),
      AVISIT = avisits_grid,
      PARAMCD = unique(adlb$PARAMCD)
    )

    adsl_lb <- adsl_lb %>%
      dplyr::left_join(unique(adlb[c("AVISIT", "AVISITN")]), by = "AVISIT") %>%
      dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")

    adsl1 <- adsl[, adsl_adlb_common_columns]
    adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")

    by_variables_from_adlb <- c("USUBJID", "AVISIT", "AVISITN", "PARAMCD", "PARAM")

    adlb_btoxgr <- adlb %>%
      dplyr::select(c("USUBJID", "PARAMCD", "BTOXGR")) %>%
      unique() %>%
      dplyr::rename("BTOXGR_MAP" = "BTOXGR")

    adlb_out <- merge(
      adlb_f,
      adsl_lb,
      by = by_variables_from_adlb,
      all = TRUE,
      sort = FALSE
    )
    adlb_out <- adlb_out %>%
      dplyr::left_join(adlb_btoxgr, by = c("USUBJID", "PARAMCD")) %>%
      dplyr::mutate(BTOXGR = .data$BTOXGR_MAP) %>%
      dplyr::select(-"BTOXGR_MAP")

    adlb_var_labels <- c(
      formatters::var_labels(adlb[by_variables_from_adlb]),
      formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
      formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
    )
  } else {
    adsl_lb <- expand.grid(
      USUBJID = unique(adsl$USUBJID),
      PARAMCD = unique(adlb$PARAMCD)
    )

    adsl_lb <- adsl_lb %>% dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")

    adsl1 <- adsl[, adsl_adlb_common_columns]
    adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")

    by_variables_from_adlb <- c("USUBJID", "PARAMCD", "PARAM")

    adlb_out <- merge(
      adlb_f,
      adsl_lb,
      by = by_variables_from_adlb,
      all = TRUE,
      sort = FALSE
    )

    adlb_var_labels <- c(
      formatters::var_labels(adlb[by_variables_from_adlb]),
      formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
      formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
    )
  }

  adlb_out$ATOXGR <- as.factor(adlb_out$ATOXGR)
  adlb_out$BTOXGR <- as.factor(adlb_out$BTOXGR)

  adlb_out <- df_explicit_na(adlb_out)
  formatters::var_labels(adlb_out) <- adlb_var_labels

  adlb_out
}

#' Occurrence Counts by Grade
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions for analyzing frequencies and fractions of occurrences by grade for patients
#' with occurrence data. Multiple occurrences within one individual are counted once at the
#' greatest intensity/highest grade level.
#'
#' @inheritParams argument_convention
#' @param grade_groups (named `list` of `character`)\cr containing groupings of grades.
#' @param remove_single (`logical`)\cr `TRUE` to not include the elements of one-element grade groups
#'   in the the output list; in this case only the grade groups names will be included in the output.
#'
#' @seealso Relevant helper function [h_append_grade_groups()].
#'
#' @name count_occurrences_by_grade
NULL

#' Helper function for [s_count_occurrences_by_grade()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function for [s_count_occurrences_by_grade()] to insert grade groupings into list with
#' individual grade frequencies. The order of the final result follows the order of `grade_groups`.
#' The elements under any-grade group (if any), i.e. the grade group equal to `refs` will be moved to
#' the end. Grade groups names must be unique.
#'
#' @inheritParams count_occurrences_by_grade
#' @param refs (named `list` of `numeric`)\cr where each name corresponds to a reference grade level
#'   and each entry represents a count.
#'
#' @return Formatted list of grade groupings.
#'
#' @examples
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(1:5),
#'     "Grade 1-2" = c("1", "2"),
#'     "Grade 3-4" = c("3", "4")
#'   ),
#'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
#' )
#'
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(5:1),
#'     "Grade A" = "5",
#'     "Grade B" = c("4", "3")
#'   ),
#'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
#' )
#'
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(1:5),
#'     "Grade 1-2" = c("1", "2"),
#'     "Grade 3-4" = c("3", "4")
#'   ),
#'   list("1" = 10, "2" = 5, "3" = 0)
#' )
#'
#' @export
h_append_grade_groups <- function(grade_groups, refs, remove_single = TRUE) {
  checkmate::assert_list(grade_groups)
  checkmate::assert_list(refs)
  refs_orig <- refs
  elements <- unique(unlist(grade_groups))

  ### compute sums in groups
  grp_sum <- lapply(grade_groups, function(i) do.call(sum, refs[i]))
  if (!checkmate::test_subset(elements, names(refs))) {
    padding_el <- setdiff(elements, names(refs))
    refs[padding_el] <- 0
  }
  result <- c(grp_sum, refs)

  ### order result while keeping grade_groups's ordering
  ordr <- grade_groups

  # elements of any-grade group (if any) will be moved to the end
  is_any <- sapply(grade_groups, setequal, y = names(refs))
  ordr[is_any] <- list(character(0)) # hide elements under any-grade group

  # groups-elements combined sequence
  ordr <- c(lapply(names(ordr), function(g) c(g, ordr[[g]])), recursive = TRUE, use.names = FALSE)
  ordr <- ordr[!duplicated(ordr)]

  # append remaining elements (if any)
  ordr <- union(ordr, unlist(grade_groups[is_any])) # from any-grade group
  ordr <- union(ordr, names(refs)) # from refs

  # remove elements of single-element groups, if any
  if (remove_single) {
    is_single <- sapply(grade_groups, length) == 1L
    ordr <- setdiff(ordr, unlist(grade_groups[is_single]))
  }

  # apply the order
  result <- result[ordr]

  # remove groups without any elements in the original refs
  # note: it's OK if groups have 0 value
  keep_grp <- vapply(grade_groups, function(x, rf) {
    any(x %in% rf)
  }, rf = names(refs_orig), logical(1))

  keep_el <- names(result) %in% names(refs_orig) | names(result) %in% names(keep_grp)[keep_grp]
  result <- result[keep_el]

  result
}

#' @describeIn count_occurrences_by_grade Statistics function which counts the
#'  number of patients by highest grade.
#'
#' @return
#' * `s_count_occurrences_by_grade()` returns a list of counts and fractions with one element per grade level or
#'   grade level grouping.
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(
#'   USUBJID = as.character(c(1:6, 1)),
#'   ARM = factor(c("A", "A", "A", "B", "B", "B", "A"), levels = c("A", "B")),
#'   AETOXGR = factor(c(1, 2, 3, 4, 1, 2, 3), levels = c(1:5)),
#'   AESEV = factor(
#'     x = c("MILD", "MODERATE", "SEVERE", "MILD", "MILD", "MODERATE", "SEVERE"),
#'     levels = c("MILD", "MODERATE", "SEVERE")
#'   ),
#'   stringsAsFactors = FALSE
#' )
#' df_adsl <- df %>%
#'   select(USUBJID, ARM) %>%
#'   unique()
#'
#' s_count_occurrences_by_grade(
#'   df,
#'   .N_col = 10L,
#'   .var = "AETOXGR",
#'   id = "USUBJID",
#'   grade_groups = list("ANY" = levels(df$AETOXGR))
#' )
#'
#' @export
s_count_occurrences_by_grade <- function(df,
                                         .var,
                                         .N_col, # nolint
                                         id = "USUBJID",
                                         grade_groups = list(),
                                         remove_single = TRUE,
                                         labelstr = "") {
  assert_valid_factor(df[[.var]])
  assert_df_with_variables(df, list(grade = .var, id = id))

  if (nrow(df) < 1) {
    grade_levels <- levels(df[[.var]])
    l_count <- as.list(rep(0, length(grade_levels)))
    names(l_count) <- grade_levels
  } else {
    if (isTRUE(is.factor(df[[id]]))) {
      assert_valid_factor(df[[id]], any.missing = FALSE)
    } else {
      checkmate::assert_character(df[[id]], min.chars = 1, any.missing = FALSE)
    }
    checkmate::assert_count(.N_col)

    id <- df[[id]]
    grade <- df[[.var]]

    if (!is.ordered(grade)) {
      grade_lbl <- obj_label(grade)
      lvls <- levels(grade)
      if (sum(grepl("^\\d+$", lvls)) %in% c(0, length(lvls))) {
        lvl_ord <- lvls
      } else {
        lvls[!grepl("^\\d+$", lvls)] <- min(as.numeric(lvls[grepl("^\\d+$", lvls)])) - 1
        lvl_ord <- levels(grade)[order(as.numeric(lvls))]
      }
      grade <- formatters::with_label(factor(grade, levels = lvl_ord, ordered = TRUE), grade_lbl)
    }

    df_max <- stats::aggregate(grade ~ id, FUN = max, drop = FALSE)
    l_count <- as.list(table(df_max$grade))
  }

  if (length(grade_groups) > 0) {
    l_count <- h_append_grade_groups(grade_groups, l_count, remove_single)
  }

  l_count_fraction <- lapply(l_count, function(i, denom) c(i, i / denom), denom = .N_col)

  list(
    count_fraction = l_count_fraction
  )
}

#' @describeIn count_occurrences_by_grade Formatted analysis function which is used as `afun`
#'   in `count_occurrences_by_grade()`.
#'
#' @return
#' * `a_count_occurrences_by_grade()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_occurrences_by_grade, .ungroup_stats = "count_fraction")
#' afun(
#'   df,
#'   .N_col = 10L,
#'   .var = "AETOXGR",
#'   id = "USUBJID",
#'   grade_groups = list("ANY" = levels(df$AETOXGR))
#' )
#'
#' @export
a_count_occurrences_by_grade <- make_afun(
  s_count_occurrences_by_grade,
  .formats = c("count_fraction" = format_count_fraction_fixed_dp)
)

#' @describeIn count_occurrences_by_grade Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param var_labels (`character`)\cr labels to show in the result table.
#'
#' @return
#' * `count_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_occurrences_by_grade()` to the table layout.
#'
#' @examples
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences_by_grade(
#'     var = "AESEV",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' # Define additional grade groupings.
#' grade_groups <- list(
#'   "-Any-" = c("1", "2", "3", "4", "5"),
#'   "Grade 1-2" = c("1", "2"),
#'   "Grade 3-5" = c("3", "4", "5")
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences_by_grade(
#'     var = "AETOXGR",
#'     grade_groups = grade_groups
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
count_occurrences_by_grade <- function(lyt,
                                       var,
                                       var_labels = var,
                                       show_labels = "default",
                                       ...,
                                       table_names = var,
                                       .stats = NULL,
                                       .formats = NULL,
                                       .indent_mods = NULL,
                                       .labels = NULL) {
  afun <- make_afun(
    a_count_occurrences_by_grade,
    .stats = .stats,
    .formats = .formats,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  analyze(
    lyt = lyt,
    vars = var,
    var_labels = var_labels,
    show_labels = show_labels,
    afun = afun,
    table_names = table_names,
    extra_args = list(...)
  )
}

#' @describeIn count_occurrences_by_grade Layout-creating function which can take content function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_occurrences_by_grade()` to the table layout.
#'
#' @examples
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
#'   summarize_occurrences_by_grade(
#'     var = "AESEV",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
#'   summarize_occurrences_by_grade(
#'     var = "AETOXGR",
#'     grade_groups = grade_groups
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
summarize_occurrences_by_grade <- function(lyt,
                                           var,
                                           ...,
                                           .stats = NULL,
                                           .formats = NULL,
                                           .indent_mods = NULL,
                                           .labels = NULL) {
  cfun <- make_afun(
    a_count_occurrences_by_grade,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = cfun,
    extra_args = list(...)
  )
}

#' Subgroup Treatment Effect Pattern (STEP) Fit for Binary (Response) Outcome
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This fits the Subgroup Treatment Effect Pattern logistic regression models for a binary
#' (response) outcome. The treatment arm variable must have exactly 2 levels,
#' where the first one is taken as reference and the estimated odds ratios are
#' for the comparison of the second level vs. the first one.
#'
#' The (conditional) logistic regression model which is fit is:
#'
#' `response ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
#'
#' where `degree` is specified by `control_step()`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `character`)\cr list of analysis variables:
#'   needs `response`, `arm`, `biomarker`, and optional `covariates` and `strata`.
#' @param control (named `list`)\cr combined control list from [control_step()]
#'   and [control_logistic()].
#'
#' @return A matrix of class `step`. The first part of the columns describe the
#'   subgroup intervals used for the biomarker variable, including where the
#'   center of the intervals are and their bounds. The second part of the
#'   columns contain the estimates for the treatment arm comparison.
#'
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
#'
#' @seealso [control_step()] and [control_logistic()] for the available
#'   customization options.
#'
#' @examples
#' # Testing dataset with just two treatment arms.
#' library(survival)
#' library(dplyr)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(
#'     PARAMCD == "BESRSPI",
#'     ARM %in% c("B: Placebo", "A: Drug X")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to have Placebo as reference arm for Odds Ratio calculations.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     RSP = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     SEX = factor(SEX)
#'   )
#'
#' variables <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = "AGE",
#'   response = "RSP"
#' )
#'
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
#' # We use a large enough bandwidth to avoid too small subgroups and linear separation in those.
#' step_matrix <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = 0.5))
#' )
#' dim(step_matrix)
#' head(step_matrix)
#'
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
#' # models. Or specify different logistic regression options, including confidence level.
#' step_matrix2 <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(conf_level = 0.9), control_step(bandwidth = 0.6, degree = 1))
#' )
#'
#' # Use a global constant model. This is helpful as a reference for the subgroup models.
#' step_matrix3 <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = NULL, num_points = 2L))
#' )
#'
#' # It is also possible to use strata, i.e. use conditional logistic regression models.
#' variables2 <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = "AGE",
#'   response = "RSP",
#'   strata = c("STRATA1", "STRATA2")
#' )
#'
#' step_matrix4 <- fit_rsp_step(
#'   variables = variables2,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = 0.6))
#' )
#'
#' @export
fit_rsp_step <- function(variables,
                         data,
                         control = c(control_step(), control_logistic())) {
  assert_df_with_variables(data, variables)
  checkmate::assert_list(control, names = "named")
  data <- data[!is.na(data[[variables$biomarker]]), ]
  window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
  interval_center <- window_sel$interval[, "Interval Center"]
  form <- h_step_rsp_formula(variables = variables, control = control)
  estimates <- if (is.null(control$bandwidth)) {
    h_step_rsp_est(
      formula = form,
      data = data,
      variables = variables,
      x = interval_center,
      control = control
    )
  } else {
    tmp <- mapply(
      FUN = h_step_rsp_est,
      x = interval_center,
      subset = as.list(as.data.frame(window_sel$sel)),
      MoreArgs = list(
        formula = form,
        data = data,
        variables = variables,
        control = control
      )
    )
    # Maybe we find a more elegant solution than this.
    rownames(tmp) <- c("n", "logor", "se", "ci_lower", "ci_upper")
    t(tmp)
  }
  result <- cbind(window_sel$interval, estimates)
  structure(
    result,
    class = c("step", "matrix"),
    variables = variables,
    control = control
  )
}

#' `rtables` Access Helper Functions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' These are a couple of functions that help with accessing the data in `rtables` objects.
#' Currently these work for occurrence tables, which are defined as having a count as the first
#' element and a fraction as the second element in each cell.
#'
#' @seealso [prune_occurrences] for usage of these functions.
#'
#' @name rtables_access
NULL

#' @describeIn rtables_access Helper function to extract the first values from each content
#'   cell and from specified columns in a `TableRow`. Defaults to all columns.
#'
#' @param table_row (`TableRow`)\cr an analysis row in a occurrence table.
#' @param col_names (`character`)\cr the names of the columns to extract from.
#' @param col_indices (`integer`)\cr the indices of the columns to extract from. If `col_names` are provided,
#'   then these are inferred from the names of `table_row`. Note that this currently only works well with a single
#'   column split.
#'
#' @return
#' * `h_row_first_values()` returns a `vector` of numeric values.
#'
#' @examples
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("RACE") %>%
#'   analyze("AGE", function(x) {
#'     list(
#'       "mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.x (xx.x)"),
#'       "n" = length(x),
#'       "frac" = rcell(c(0.1, 0.1), format = "xx (xx)")
#'     )
#'   }) %>%
#'   build_table(tern_ex_adsl) %>%
#'   prune_table()
#' tree_row_elem <- collect_leaves(tbl[2, ])[[1]]
#' result <- max(h_row_first_values(tree_row_elem))
#' result
#'
#' @export
h_row_first_values <- function(table_row,
                               col_names = NULL,
                               col_indices = NULL) {
  col_indices <- check_names_indices(table_row, col_names, col_indices)
  checkmate::assert_integerish(col_indices)
  checkmate::assert_subset(col_indices, seq_len(ncol(table_row)))

  # Main values are extracted
  row_vals <- row_values(table_row)[col_indices]

  # Main return
  vapply(row_vals, function(rv) {
    if (is.null(rv)) {
      NA_real_
    } else {
      rv[1L]
    }
  }, FUN.VALUE = numeric(1))
}

#' @describeIn rtables_access Helper function that extracts row values and checks if they are
#'   convertible to integers (`integerish` values).
#'
#' @return
#' * `h_row_counts()` returns a `vector` of numeric values.
#'
#' @examples
#' # Row counts (integer values)
#' \dontrun{
#' h_row_counts(tree_row_elem) # Fails because there are no integers
#' }
#' # Using values with integers
#' tree_row_elem <- collect_leaves(tbl[3, ])[[1]]
#' result <- h_row_counts(tree_row_elem)
#' # result
#'
#' @export
h_row_counts <- function(table_row,
                         col_names = NULL,
                         col_indices = NULL) {
  counts <- h_row_first_values(table_row, col_names, col_indices)
  checkmate::assert_integerish(counts)
  counts
}

#' @describeIn rtables_access helper function to extract fractions from specified columns in a `TableRow`.
#'   More specifically it extracts the second values from each content cell and checks it is a fraction.
#'
#' @return
#' * `h_row_fractions()` returns a `vector` of proportions.
#'
#' @examples
#' # Row fractions
#' tree_row_elem <- collect_leaves(tbl[4, ])[[1]]
#' h_row_fractions(tree_row_elem)
#'
#' @export
h_row_fractions <- function(table_row,
                            col_names = NULL,
                            col_indices = NULL) {
  col_indices <- check_names_indices(table_row, col_names, col_indices)
  row_vals <- row_values(table_row)[col_indices]
  fractions <- sapply(row_vals, "[", 2L)
  checkmate::assert_numeric(fractions, lower = 0, upper = 1)
  fractions
}

#' @describeIn rtables_access Helper function to extract column counts from specified columns in a table.
#'
#' @param table (`VTableNodeInfo`)\cr an occurrence table or row.
#'
#' @return
#' * `h_col_counts()` returns a `vector` of column counts.
#'
#' @export
h_col_counts <- function(table,
                         col_names = NULL,
                         col_indices = NULL) {
  col_indices <- check_names_indices(table, col_names, col_indices)
  counts <- col_counts(table)[col_indices]
  stats::setNames(counts, col_names)
}

#' @describeIn rtables_access Helper function to get first row of content table of current table.
#'
#' @return
#' * `h_content_first_row()` returns a row from an `rtables` table.
#'
#' @export
h_content_first_row <- function(table) {
  ct <- content_table(table)
  tree_children(ct)[[1]]
}

#' @describeIn rtables_access Helper function which says whether current table is a leaf in the tree.
#'
#' @return
#' * `is_leaf_table()` returns a `logical` value indicating whether current table is a leaf.
#'
#' @keywords internal
is_leaf_table <- function(table) {
  children <- tree_children(table)
  child_classes <- unique(sapply(children, class))
  identical(child_classes, "ElementaryTable")
}

#' @describeIn rtables_access Internal helper function that tests standard inputs for column indices.
#'
#' @return
#' * `check_names_indices` returns column indices.
#'
#' @keywords internal
check_names_indices <- function(table_row,
                                col_names = NULL,
                                col_indices = NULL) {
  if (!is.null(col_names)) {
    if (!is.null(col_indices)) {
      stop(
        "Inserted both col_names and col_indices when selecting row values. ",
        "Please choose one."
      )
    }
    col_indices <- h_col_indices(table_row, col_names)
  }
  if (is.null(col_indices)) {
    ll <- ifelse(is.null(ncol(table_row)), length(table_row), ncol(table_row))
    col_indices <- seq_len(ll)
  }

  return(col_indices)
}

#' Sort Data by `PK PARAM` Variable
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param pk_data (`data.frame`)\cr Pharmacokinetics dataframe
#' @param key_var (`character`)\cr key variable used to merge pk_data and metadata created by `d_pkparam()`
#'
#' @return A PK `data.frame` sorted by a `PARAM` variable.
#'
#' @examples
#' library(dplyr)
#'
#' adpp <- tern_ex_adpp %>% mutate(PKPARAM = factor(paste0(PARAM, " (", AVALU, ")")))
#' pk_ordered_data <- h_pkparam_sort(adpp)
#'
#' @export
h_pkparam_sort <- function(pk_data, key_var = "PARAMCD") {
  assert_df_with_variables(pk_data, list(key_var = key_var))
  pk_data$PARAMCD <- pk_data[[key_var]]

  ordered_pk_data <- d_pkparam()

  # Add the numeric values from ordered_pk_data to pk_data
  joined_data <- merge(pk_data, ordered_pk_data, by = "PARAMCD", suffix = c("", ".y"))

  joined_data <- joined_data[, -grep(".*.y$", colnames(joined_data))]

  joined_data$TLG_ORDER <- as.numeric(joined_data$TLG_ORDER)

  # Then order PARAM based on this column
  joined_data$PARAM <- factor(joined_data$PARAM,
    levels = unique(joined_data$PARAM[order(joined_data$TLG_ORDER)]),
    ordered = TRUE
  )

  joined_data$TLG_DISPLAY <- factor(joined_data$TLG_DISPLAY,
    levels = unique(joined_data$TLG_DISPLAY[order(joined_data$TLG_ORDER)]),
    ordered = TRUE
  )

  joined_data
}

#' Individual Patient Plots
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Line plot(s) displaying trend in patients' parameter values over time is rendered.
#' Patients' individual baseline values can be added to the plot(s) as reference.
#'
#' @inheritParams argument_convention
#' @param xvar (`string`)\cr time point variable to be plotted on x-axis.
#' @param yvar (`string`)\cr continuous analysis variable to be plotted on y-axis.
#' @param xlab (`string`)\cr plot label for x-axis.
#' @param ylab (`string`)\cr plot label for y-axis.
#' @param id_var (`string`)\cr variable used as patient identifier.
#' @param title (`string`)\cr title for plot.
#' @param subtitle (`string`)\cr subtitle for plot.
#' @param add_baseline_hline (`flag`)\cr adds horizontal line at baseline y-value on
#'   plot when TRUE.
#' @param yvar_baseline (`string`)\cr variable with baseline values only.
#'   Ignored when `add_baseline_hline` is FALSE.
#' @param ggtheme (`theme`)\cr optional graphical theme function as provided
#'   by `ggplot2` to control outlook of plot. Use `ggplot2::theme()` to tweak the display.
#' @param plotting_choices (`character`)\cr specifies options for displaying
#'   plots. Must be one of "all_in_one", "split_by_max_obs", "separate_by_obs".
#' @param max_obs_per_plot (`count`)\cr Number of observations to be plotted on one
#'   plot. Ignored when `plotting_choices` is not "separate_by_obs".
#' @param caption (`character` scalar)\cr optional caption below the plot.
#' @param col (`character`)\cr lines colors.
#'
#' @seealso Relevant helper function [h_g_ipp()].
#'
#' @name individual_patient_plot
NULL

#' Helper Function To Create Simple Line Plot over Time
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function that generates a simple line plot displaying parameter trends over time.
#'
#' @inheritParams argument_convention
#' @inheritParams g_ipp
#'
#' @return A `ggplot` line plot.
#'
#' @seealso [g_ipp()] which uses this function.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' # Select a small sample of data to plot.
#' adlb <- tern_ex_adlb %>%
#'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
#'   slice(1:36)
#'
#' p <- h_g_ipp(
#'   df = adlb,
#'   xvar = "AVISIT",
#'   yvar = "AVAL",
#'   xlab = "Visit",
#'   id_var = "USUBJID",
#'   ylab = "SGOT/ALT (U/L)",
#'   add_baseline_hline = TRUE
#' )
#' p
#'
#' @export
h_g_ipp <- function(df,
                    xvar,
                    yvar,
                    xlab,
                    ylab,
                    id_var,
                    title = "Individual Patient Plots",
                    subtitle = "",
                    caption = NULL,
                    add_baseline_hline = FALSE,
                    yvar_baseline = "BASE",
                    ggtheme = nestcolor::theme_nest(),
                    col = NULL) {
  checkmate::assert_string(xvar)
  checkmate::assert_string(yvar)
  checkmate::assert_string(yvar_baseline)
  checkmate::assert_string(id_var)
  checkmate::assert_string(xlab)
  checkmate::assert_string(ylab)
  checkmate::assert_string(title)
  checkmate::assert_string(subtitle)
  checkmate::assert_subset(c(xvar, yvar, yvar_baseline, id_var), colnames(df))
  checkmate::assert_data_frame(df)
  checkmate::assert_flag(add_baseline_hline)
  checkmate::assert_character(col, null.ok = TRUE)

  p <- ggplot2::ggplot(
    data = df,
    mapping = ggplot2::aes(
      x = .data[[xvar]],
      y = .data[[yvar]],
      group = .data[[id_var]],
      colour = .data[[id_var]]
    )
  ) +
    ggplot2::geom_line(linewidth = 0.4) +
    ggplot2::geom_point(size = 2) +
    ggplot2::labs(
      x = xlab,
      y = ylab,
      title = title,
      subtitle = subtitle,
      caption = caption
    ) +
    ggtheme

  if (add_baseline_hline) {
    baseline_df <- df[, c(id_var, yvar_baseline)]
    baseline_df <- unique(baseline_df)

    p <- p +
      ggplot2::geom_hline(
        data = baseline_df,
        mapping = ggplot2::aes(
          yintercept = .data[[yvar_baseline]],
          colour = .data[[id_var]]
        ),
        linetype = "dotdash",
        linewidth = 0.4
      ) +
      ggplot2::geom_text(
        data = baseline_df,
        mapping = ggplot2::aes(
          x = 1,
          y = .data[[yvar_baseline]],
          label = .data[[id_var]],
          colour = .data[[id_var]]
        ),
        nudge_y = 0.025 * (max(df[, yvar], na.rm = TRUE) - min(df[, yvar], na.rm = TRUE)),
        vjust = "right",
        size = 2
      )

    if (!is.null(col)) {
      p <- p +
        ggplot2::scale_color_manual(values = col)
    }
  }
  p
}

#' @describeIn individual_patient_plot Plotting function for individual patient plots which, depending on user
#'   preference, renders a single graphic or compiles a list of graphics that show trends in individual's parameter
#'   values over time.
#'
#' @return A `ggplot` object or a list of `ggplot` objects.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' # Select a small sample of data to plot.
#' adlb <- tern_ex_adlb %>%
#'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
#'   slice(1:36)
#'
#' plot_list <- g_ipp(
#'   df = adlb,
#'   xvar = "AVISIT",
#'   yvar = "AVAL",
#'   xlab = "Visit",
#'   ylab = "SGOT/ALT (U/L)",
#'   title = "Individual Patient Plots",
#'   add_baseline_hline = TRUE,
#'   plotting_choices = "split_by_max_obs",
#'   max_obs_per_plot = 5
#' )
#' plot_list
#'
#' @export
g_ipp <- function(df,
                  xvar,
                  yvar,
                  xlab,
                  ylab,
                  id_var = "USUBJID",
                  title = "Individual Patient Plots",
                  subtitle = "",
                  caption = NULL,
                  add_baseline_hline = FALSE,
                  yvar_baseline = "BASE",
                  ggtheme = nestcolor::theme_nest(),
                  plotting_choices = c("all_in_one", "split_by_max_obs", "separate_by_obs"),
                  max_obs_per_plot = 4,
                  col = NULL) {
  checkmate::assert_count(max_obs_per_plot)
  checkmate::assert_subset(plotting_choices, c("all_in_one", "split_by_max_obs", "separate_by_obs"))
  checkmate::assert_character(col, null.ok = TRUE)

  plotting_choices <- match.arg(plotting_choices)

  if (plotting_choices == "all_in_one") {
    p <- h_g_ipp(
      df = df,
      xvar = xvar,
      yvar = yvar,
      xlab = xlab,
      ylab = ylab,
      id_var = id_var,
      title = title,
      subtitle = subtitle,
      caption = caption,
      add_baseline_hline = add_baseline_hline,
      yvar_baseline = yvar_baseline,
      ggtheme = ggtheme,
      col = col
    )

    return(p)
  } else if (plotting_choices == "split_by_max_obs") {
    id_vec <- unique(df[[id_var]])
    id_list <- split(
      id_vec,
      rep(1:ceiling(length(id_vec) / max_obs_per_plot),
        each = max_obs_per_plot,
        length.out = length(id_vec)
      )
    )

    df_list <- list()
    plot_list <- list()

    for (i in seq_along(id_list)) {
      df_list[[i]] <- df[df[[id_var]] %in% id_list[[i]], ]

      plots <- h_g_ipp(
        df = df_list[[i]],
        xvar = xvar,
        yvar = yvar,
        xlab = xlab,
        ylab = ylab,
        id_var = id_var,
        title = title,
        subtitle = subtitle,
        caption = caption,
        add_baseline_hline = add_baseline_hline,
        yvar_baseline = yvar_baseline,
        ggtheme = ggtheme,
        col = col
      )

      plot_list[[i]] <- plots
    }
    return(plot_list)
  } else {
    ind_df <- split(df, df[[id_var]])
    plot_list <- lapply(
      ind_df,
      function(x) {
        h_g_ipp(
          df = x,
          xvar = xvar,
          yvar = yvar,
          xlab = xlab,
          ylab = ylab,
          id_var = id_var,
          title = title,
          subtitle = subtitle,
          caption = caption,
          add_baseline_hline = add_baseline_hline,
          yvar_baseline = yvar_baseline,
          ggtheme = ggtheme,
          col = col
        )
      }
    )

    return(plot_list)
  }
}

#' Stack Multiple Grobs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Stack grobs as a new grob with 1 column and multiple rows layout.
#'
#' @param ... grobs.
#' @param grobs list of grobs.
#' @param padding unit of length 1, space between each grob.
#' @param vp a [viewport()] object (or `NULL`).
#' @param name a character identifier for the grob.
#' @param gp A [gpar()] object.
#'
#' @return A `grob`.
#'
#' @examples
#' library(grid)
#'
#' g1 <- circleGrob(gp = gpar(col = "blue"))
#' g2 <- circleGrob(gp = gpar(col = "red"))
#' g3 <- textGrob("TEST TEXT")
#' grid.newpage()
#' grid.draw(stack_grobs(g1, g2, g3))
#'
#' showViewport()
#'
#' grid.newpage()
#' pushViewport(viewport(layout = grid.layout(1, 2)))
#' vp1 <- viewport(layout.pos.row = 1, layout.pos.col = 2)
#' grid.draw(stack_grobs(g1, g2, g3, vp = vp1, name = "test"))
#'
#' showViewport()
#' grid.ls(grobs = TRUE, viewports = TRUE, print = FALSE)
#'
#' @export
stack_grobs <- function(...,
                        grobs = list(...),
                        padding = grid::unit(2, "line"),
                        vp = NULL,
                        gp = NULL,
                        name = NULL) {
  checkmate::assert_true(
    all(vapply(grobs, grid::is.grob, logical(1)))
  )

  if (length(grobs) == 1) {
    return(grobs[[1]])
  }

  n_layout <- 2 * length(grobs) - 1
  hts <- lapply(
    seq(1, n_layout),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding
      }
    }
  )
  hts <- do.call(grid::unit.c, hts)

  main_vp <- grid::viewport(
    layout = grid::grid.layout(nrow = n_layout, ncol = 1, heights = hts)
  )

  nested_grobs <- Map(function(g, i) {
    grid::gTree(
      children = grid::gList(g),
      vp = grid::viewport(layout.pos.row = i, layout.pos.col = 1)
    )
  }, grobs, seq_along(grobs) * 2 - 1)

  grobs_mainvp <- grid::gTree(
    children = do.call(grid::gList, nested_grobs),
    vp = main_vp
  )

  grid::gTree(
    children = grid::gList(grobs_mainvp),
    vp = vp,
    gp = gp,
    name = name
  )
}

#' Arrange Multiple Grobs
#'
#' Arrange grobs as a new grob with \verb{n*m (rows*cols)} layout.
#'
#' @inheritParams stack_grobs
#' @param ncol number of columns in layout.
#' @param nrow number of rows in layout.
#' @param padding_ht unit of length 1, vertical space between each grob.
#' @param padding_wt unit of length 1, horizontal space between each grob.
#'
#' @return A `grob`.
#'
#' @examples
#' library(grid)
#'
#' # Internal function - arrange_grobs
#' \dontrun{
#' num <- lapply(1:9, textGrob)
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(grobs = num, ncol = 2))
#'
#' showViewport()
#'
#' g1 <- circleGrob(gp = gpar(col = "blue"))
#' g2 <- circleGrob(gp = gpar(col = "red"))
#' g3 <- textGrob("TEST TEXT")
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(g1, g2, g3, nrow = 2))
#'
#' showViewport()
#'
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 3))
#'
#' grid::grid.newpage()
#' grid::pushViewport(grid::viewport(layout = grid::grid.layout(1, 2)))
#' vp1 <- grid::viewport(layout.pos.row = 1, layout.pos.col = 2)
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 2, vp = vp1))
#'
#' showViewport()
#' }
#'
#' @keywords internal
arrange_grobs <- function(...,
                          grobs = list(...),
                          ncol = NULL, nrow = NULL,
                          padding_ht = grid::unit(2, "line"),
                          padding_wt = grid::unit(2, "line"),
                          vp = NULL,
                          gp = NULL,
                          name = NULL) {
  checkmate::assert_true(
    all(vapply(grobs, grid::is.grob, logical(1)))
  )

  if (length(grobs) == 1) {
    return(grobs[[1]])
  }

  if (is.null(ncol) && is.null(nrow)) {
    ncol <- 1
    nrow <- ceiling(length(grobs) / ncol)
  } else if (!is.null(ncol) && is.null(nrow)) {
    nrow <- ceiling(length(grobs) / ncol)
  } else if (is.null(ncol) && !is.null(nrow)) {
    ncol <- ceiling(length(grobs) / nrow)
  }

  if (ncol * nrow < length(grobs)) {
    stop("specififed ncol and nrow are not enough for arranging the grobs ")
  }

  if (ncol == 1) {
    return(stack_grobs(grobs = grobs, padding = padding_ht, vp = vp, gp = gp, name = name))
  }

  n_col <- 2 * ncol - 1
  n_row <- 2 * nrow - 1
  hts <- lapply(
    seq(1, n_row),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding_ht
      }
    }
  )
  hts <- do.call(grid::unit.c, hts)

  wts <- lapply(
    seq(1, n_col),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding_wt
      }
    }
  )
  wts <- do.call(grid::unit.c, wts)

  main_vp <- grid::viewport(
    layout = grid::grid.layout(nrow = n_row, ncol = n_col, widths = wts, heights = hts)
  )

  nested_grobs <- list()
  k <- 0
  for (i in seq(nrow) * 2 - 1) {
    for (j in seq(ncol) * 2 - 1) {
      k <- k + 1
      if (k <= length(grobs)) {
        nested_grobs <- c(
          nested_grobs,
          list(grid::gTree(
            children = grid::gList(grobs[[k]]),
            vp = grid::viewport(layout.pos.row = i, layout.pos.col = j)
          ))
        )
      }
    }
  }
  grobs_mainvp <- grid::gTree(
    children = do.call(grid::gList, nested_grobs),
    vp = main_vp
  )

  grid::gTree(
    children = grid::gList(grobs_mainvp),
    vp = vp,
    gp = gp,
    name = name
  )
}

#' Draw `grob`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Draw grob on device page.
#'
#' @param grob grid object
#' @param newpage draw on a new page
#' @param vp a [viewport()] object (or `NULL`).
#'
#' @return A `grob`.
#'
#' @examples
#' library(dplyr)
#' library(grid)
#'
#' # Internal function - arrange_grob
#' \dontrun{
#' rect <- rectGrob(width = grid::unit(0.5, "npc"), height = grid::unit(0.5, "npc"))
#' rect %>% draw_grob(vp = grid::viewport(angle = 45))
#'
#' num <- lapply(1:10, textGrob)
#' num %>%
#'   arrange_grobs(grobs = .) %>%
#'   draw_grob()
#' showViewport()
#' }
#'
#' @export
draw_grob <- function(grob, newpage = TRUE, vp = NULL) {
  if (newpage) {
    grid::grid.newpage()
  }
  if (!is.null(vp)) {
    grid::pushViewport(vp)
  }
  grid::grid.draw(grob)
}

tern_grob <- function(x) {
  class(x) <- unique(c("ternGrob", class(x)))
  x
}

print.ternGrob <- function(x, ...) {
  grid::grid.newpage()
  grid::grid.draw(x)
}

#' Count the Number of Patients with Particular Flags
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` denotes the unique patient identifier.
#'
#' @inheritParams argument_convention
#'
#' @seealso [count_patients_with_event]
#'
#' @name count_patients_with_flags
NULL

#' @describeIn count_patients_with_flags Statistics function which counts the number of patients for which
#'   a particular flag variable is `TRUE`.
#'
#' @inheritParams summarize_variables
#' @param .var (`character`)\cr name of the column that contains the unique identifier.
#' @param flag_variables (`character`)\cr a character vector specifying the names of `logical`
#'   variables from analysis dataset used for counting the number of unique identifiers.
#'
#' @return
#' * `s_count_patients_with_flags()` returns the count and the fraction of unique identifiers with each particular
#'   flag as a list of statistics `n`, `count`, `count_fraction`, and `n_blq`, with one element per flag.
#'
#' @examples
#' library(dplyr)
#'
#' # `s_count_patients_with_flags()`
#'
#' # Add labelled flag variables to analysis dataset.
#' adae <- tern_ex_adae %>%
#'   mutate(
#'     fl1 = TRUE,
#'     fl2 = TRTEMFL == "Y",
#'     fl3 = TRTEMFL == "Y" & AEOUT == "FATAL",
#'     fl4 = TRTEMFL == "Y" & AEOUT == "FATAL" & AEREL == "Y"
#'   )
#' labels <- c(
#'   "fl1" = "Total AEs",
#'   "fl2" = "Total number of patients with at least one adverse event",
#'   "fl3" = "Total number of patients with fatal AEs",
#'   "fl4" = "Total number of patients with related fatal AEs"
#' )
#' formatters::var_labels(adae)[names(labels)] <- labels
#'
#' s_count_patients_with_flags(
#'   adae,
#'   "SUBJID",
#'   flag_variables = c("fl1", "fl2", "fl3", "fl4"),
#'   denom = "N_col",
#'   .N_col = 1000
#' )
#'
#' @export
s_count_patients_with_flags <- function(df,
                                        .var,
                                        flag_variables,
                                        .N_col, # nolint
                                        .N_row, # nolint
                                        denom = c("n", "N_row", "N_col")) {
  if (is.null(names(flag_variables))) flag_variables <- stats::setNames(flag_variables, flag_variables)
  flag_names <- unname(flag_variables)
  flag_variables <- names(flag_variables)

  checkmate::assert_subset(flag_variables, colnames(df))
  temp <- sapply(flag_variables, function(x) {
    tmp <- Map(function(y) which(df[[y]]), x)
    position_satisfy_flags <- Reduce(intersect, tmp)
    id_satisfy_flags <- as.character(unique(df[position_satisfy_flags, ][[.var]]))
    s_count_values(
      as.character(unique(df[[.var]])),
      id_satisfy_flags,
      denom = denom,
      .N_col = .N_col,
      .N_row = .N_row
    )
  })
  colnames(temp) <- flag_names
  temp <- data.frame(t(temp))
  result <- temp %>% as.list()
  if (length(flag_variables) == 1) {
    for (i in 1:3) names(result[[i]]) <- flag_names[1]
  }
  result
}

#' @describeIn count_patients_with_flags Formatted analysis function which is used as `afun`
#'   in `count_patients_with_flags()`.
#'
#' @return
#' * `a_count_patients_with_flags()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#'
#' # `a_count_patients_with_flags()`
#'
#' afun <- make_afun(a_count_patients_with_flags,
#'   .stats = "count_fraction",
#'   .ungroup_stats = "count_fraction"
#' )
#' afun(
#'   adae,
#'   .N_col = 10L,
#'   .N_row = 10L,
#'   .var = "USUBJID",
#'   flag_variables = c("fl1", "fl2", "fl3", "fl4")
#' )
#'
#' @export
a_count_patients_with_flags <- make_afun(
  s_count_patients_with_flags,
  .formats = c("count_fraction" = format_count_fraction_fixed_dp)
)

#' @describeIn count_patients_with_flags Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_patients_with_flags()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_patients_with_flags()` to the table layout.
#'
#' @examples
#' # `count_patients_with_flags()`
#'
#' lyt2 <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_patients_with_flags(
#'     "SUBJID",
#'     flag_variables = formatters::var_labels(adae[, c("fl1", "fl2", "fl3", "fl4")]),
#'     denom = "N_col"
#'   )
#' build_table(lyt2, adae, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_patients_with_flags <- function(lyt,
                                      var,
                                      var_labels = var,
                                      show_labels = "hidden",
                                      ...,
                                      table_names = paste0("tbl_flags_", var),
                                      .stats = "count_fraction",
                                      .formats = NULL,
                                      .indent_mods = NULL) {
  afun <- make_afun(
    a_count_patients_with_flags,
    .stats = .stats,
    .formats = .formats,
    .indent_mods = .indent_mods,
    .ungroup_stats = .stats
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    var_labels = var_labels,
    show_labels = show_labels,
    afun = afun,
    table_names = table_names,
    extra_args = list(...)
  )

  lyt
}

#' Patient Counts with Abnormal Range Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the abnormal range result (`character` or `factor`)
#' and additional analysis variables are `id` (`character` or `factor`) and `baseline` (`character` or
#' `factor`). For each direction specified in `abnormal` (e.g. high or low) count patients in the
#' numerator and denominator as follows:
#'   * `num` : The number of patients with this abnormality recorded while on treatment.
#'   * `denom`: The number of patients with at least one post-baseline assessment.
#'
#' @inheritParams argument_convention
#' @param abnormal (named `list`)\cr list identifying the abnormal range level(s) in `var`. Defaults to
#'   `list(Low = "LOW", High = "HIGH")` but you can also group different levels into the named list,
#'   for example, `abnormal = list(Low = c("LOW", "LOW LOW"), High = c("HIGH", "HIGH HIGH"))`.
#'
#' @note
#' * `count_abnormal()` only works with a single variable containing multiple abnormal levels.
#' * `df` should be filtered to include only post-baseline records.
#' * the denominator includes patients that might have other abnormal levels at baseline,
#'   and patients with missing baseline. Patients with these abnormalities at
#'   baseline can be optionally excluded from numerator and denominator.
#'
#' @name abnormal
#' @include formatting_functions.R
NULL

#' @describeIn abnormal Statistics function which counts patients with abnormal range values
#'   for a single `abnormal` level.
#'
#' @param exclude_base_abn (`flag`)\cr whether to exclude subjects with baseline abnormality
#'   from numerator and denominator.
#'
#' @return
#' * `s_count_abnormal()` returns the statistic `fraction` which is a vector with `num` and `denom` counts of patients.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 1, 2, 2)),
#'   ANRIND = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BNRIND = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
#'   ONTRTFL = c("", "Y", "", "Y"),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Select only post-baseline records.
#' df <- df %>%
#'   filter(ONTRTFL == "Y")
#'
#' # Internal function - s_count_abnormal
#' \dontrun{
#' # For abnormal level "HIGH" we get the following counts.
#' s_count_abnormal(df, .var = "ANRIND", abnormal = list(high = "HIGH", low = "LOW"))
#'
#' # Optionally exclude patients with abnormality at baseline.
#' s_count_abnormal(
#'   df,
#'   .var = "ANRIND",
#'   abnormal = list(high = "HIGH", low = "LOW"),
#'   exclude_base_abn = TRUE
#' )
#' }
#'
#' @keywords internal
s_count_abnormal <- function(df,
                             .var,
                             abnormal = list(Low = "LOW", High = "HIGH"),
                             variables = list(id = "USUBJID", baseline = "BNRIND"),
                             exclude_base_abn = FALSE) {
  checkmate::assert_list(abnormal, types = "character", names = "named", len = 2, any.missing = FALSE)
  checkmate::assert_true(any(unlist(abnormal) %in% levels(df[[.var]])))
  checkmate::assert_factor(df[[.var]])
  checkmate::assert_flag(exclude_base_abn)
  assert_df_with_variables(df, c(range = .var, variables))
  checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))

  count_abnormal_single <- function(abn_name, abn) {
    # Patients in the denominator fulfill:
    # - have at least one post-baseline visit
    # - their baseline must not be abnormal if `exclude_base_abn`.
    if (exclude_base_abn) {
      denom_select <- !(df[[variables$baseline]] %in% abn)
    } else {
      denom_select <- TRUE
    }
    denom <- length(unique(df[denom_select, variables$id, drop = TRUE]))

    # Patients in the numerator fulfill:
    # - have at least one post-baseline visit with the required abnormality level
    # - are part of the denominator patients.
    num_select <- (df[[.var]] %in% abn) & denom_select
    num <- length(unique(df[num_select, variables$id, drop = TRUE]))

    formatters::with_label(c(num = num, denom = denom), abn_name)
  }

  # This will define the abnormal levels theoretically possible for a specific lab parameter
  # within a split level of a layout.
  abnormal_lev <- lapply(abnormal, intersect, levels(df[[.var]]))
  abnormal_lev <- abnormal_lev[vapply(abnormal_lev, function(x) length(x) > 0, logical(1))]

  result <- sapply(names(abnormal_lev), function(i) count_abnormal_single(i, abnormal_lev[[i]]), simplify = FALSE)
  result <- list(fraction = result)
  result
}

#' @describeIn abnormal Formatted analysis function which is used as `afun` in `count_abnormal()`.
#'
#' @return
#' * `a_count_abnormal()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_abnormal
#' \dontrun{
#' # Use the Formatted Analysis function for `analyze()`.
#' a_fun <- make_afun(a_count_abnormal, .ungroup_stats = "fraction")
#' a_fun(df, .var = "ANRIND", abnormal = list(low = "LOW", high = "HIGH"))
#' }
#'
#' @keywords internal
a_count_abnormal <- make_afun(
  s_count_abnormal,
  .formats = c(fraction = format_fraction)
)

#' @describeIn abnormal Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal()` to the table layout.
#'
#' @examples
#' # Layout creating function.
#' basic_table() %>%
#'   count_abnormal(var = "ANRIND", abnormal = list(high = "HIGH", low = "LOW")) %>%
#'   build_table(df)
#'
#' # Passing of statistics function and formatting arguments.
#' df2 <- data.frame(
#'   ID = as.character(c(1, 1, 2, 2)),
#'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BL_RANGE = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
#'   ONTRTFL = c("", "Y", "", "Y"),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Select only post-baseline records.
#' df2 <- df2 %>%
#'   filter(ONTRTFL == "Y")
#'
#' basic_table() %>%
#'   count_abnormal(
#'     var = "RANGE",
#'     abnormal = list(low = "LOW", high = "HIGH"),
#'     variables = list(id = "ID", baseline = "BL_RANGE")
#'   ) %>%
#'   build_table(df2)
#'
#' @export
count_abnormal <- function(lyt,
                           var,
                           ...,
                           table_names = var,
                           .stats = NULL,
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  afun <- make_afun(
    a_count_abnormal,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "fraction"
  )

  checkmate::assert_string(var)

  analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    table_names = table_names,
    extra_args = list(...),
    show_labels = "hidden"
  )
}

#' Convert List of Groups to Data Frame
#'
#' This converts a list of group levels into a data frame format which is expected by [rtables::add_combo_levels()].
#'
#' @param groups_list (named `list` of `character`)\cr specifies the new group levels via the names and the
#'   levels that belong to it in the character vectors that are elements of the list.
#'
#' @return [tibble::tibble()] in the required format.
#'
#' @examples
#' grade_groups <- list(
#'   "Any Grade (%)" = c("1", "2", "3", "4", "5"),
#'   "Grade 3-4 (%)" = c("3", "4"),
#'   "Grade 5 (%)" = "5"
#' )
#' # Internal function - groups_list_to_df
#' \dontrun{
#' groups_list_to_df(grade_groups)
#' }
#'
#' @keywords internal
groups_list_to_df <- function(groups_list) {
  checkmate::assert_list(groups_list, names = "named")
  lapply(groups_list, checkmate::assert_character)
  tibble::tibble(
    valname = make_names(names(groups_list)),
    label = names(groups_list),
    levelcombo = unname(groups_list),
    exargs = replicate(length(groups_list), list())
  )
}

#' Reference and Treatment Group Combination
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Facilitate the re-combination of groups divided as reference and treatment groups; it helps in arranging groups of
#' columns in the `rtables` framework and teal modules.
#'
#' @param fct (`factor`)\cr the variable with levels which needs to be grouped.
#' @param ref (`string`)\cr the reference level(s).
#' @param collapse (`string`)\cr a character string to separate `fct` and `ref`.
#'
#' @return A `list` with first item `ref` (reference) and second item `trt` (treatment).
#'
#' @examples
#' groups <- combine_groups(
#'   fct = DM$ARM,
#'   ref = c("B: Placebo")
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   summarize_vars("AGE") %>%
#'   build_table(DM)
#'
#' @export
combine_groups <- function(fct,
                           ref = NULL,
                           collapse = "/") {
  checkmate::assert_string(collapse)
  checkmate::assert_character(ref, min.chars = 1, any.missing = FALSE, null.ok = TRUE)
  checkmate::assert_multi_class(fct, classes = c("factor", "character"))

  fct <- as_factor_keep_attributes(fct)

  group_levels <- levels(fct)
  if (is.null(ref)) {
    ref <- group_levels[1]
  } else {
    checkmate::assert_subset(ref, group_levels)
  }

  groups <- list(
    ref = group_levels[group_levels %in% ref],
    trt = group_levels[!group_levels %in% ref]
  )
  stats::setNames(groups, nm = lapply(groups, paste, collapse = collapse))
}

#' Split Columns by Groups of Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @inheritParams groups_list_to_df
#' @param ... additional arguments to [rtables::split_cols_by()] in order. For instance, to
#'   control formats (`format`), add a joint column for all groups (`incl_all`).
#'
#' @return A layout object suitable for passing to further layouting functions. Adding
#'   this function to an `rtable` layout will add a column split including the given
#'   groups to the table layout.
#'
#' @seealso [rtables::split_cols_by()]
#'
#' @examples
#' # 1 - Basic use
#'
#' # Without group combination `split_cols_by_groups` is
#' # equivalent to [rtables::split_cols_by()].
#' basic_table() %>%
#'   split_cols_by_groups("ARM") %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Add a reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", ref_group = "B: Placebo") %>%
#'   add_colcounts() %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff Mean" = rcell(NULL))
#'       } else {
#'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' # 2 - Adding group specification
#'
#' # Manual preparation of the groups.
#' groups <- list(
#'   "Arms A+B" = c("A: Drug X", "B: Placebo"),
#'   "Arms A+C" = c("A: Drug X", "C: Combination")
#' )
#'
#' # Use of split_cols_by_groups without reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Including differentiated output in the reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups_list = groups, ref_group = "Arms A+B") %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff. of Averages" = rcell(NULL))
#'       } else {
#'         in_rows("Diff. of Averages" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' # 3 - Binary list dividing factor levels into reference and treatment
#'
#' # `combine_groups` defines reference and treatment.
#' groups <- combine_groups(
#'   fct = DM$ARM,
#'   ref = c("A: Drug X", "B: Placebo")
#' )
#' groups
#'
#' # Use group definition without reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups_list = groups) %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Use group definition with reference column (first item of groups).
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups, ref_group = names(groups)[1]) %>%
#'   add_colcounts() %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff Mean" = rcell(NULL))
#'       } else {
#'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' @export
split_cols_by_groups <- function(lyt,
                                 var,
                                 groups_list = NULL,
                                 ref_group = NULL,
                                 ...) {
  if (is.null(groups_list)) {
    split_cols_by(
      lyt = lyt,
      var = var,
      ref_group = ref_group,
      ...
    )
  } else {
    groups_df <- groups_list_to_df(groups_list)
    if (!is.null(ref_group)) {
      ref_group <- groups_df$valname[groups_df$label == ref_group]
    }
    split_cols_by(
      lyt = lyt,
      var = var,
      split_fun = add_combo_levels(groups_df, keep_levels = groups_df$valname),
      ref_group = ref_group,
      ...
    )
  }
}

#' Combine Counts
#'
#' Simplifies the estimation of column counts, especially when group combination is required.
#'
#' @inheritParams combine_groups
#' @inheritParams groups_list_to_df
#'
#' @return A `vector` of column counts.
#'
#' @seealso [combine_groups()]
#'
#' @examples
#' ref <- c("A: Drug X", "B: Placebo")
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
#'
#' # Internal function - combine_counts
#' \dontrun{
#' col_counts <- combine_counts(
#'   fct = DM$ARM,
#'   groups_list = groups
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   summarize_vars("AGE") %>%
#'   build_table(DM, col_counts = col_counts)
#'
#' ref <- "A: Drug X"
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
#' col_counts <- combine_counts(
#'   fct = DM$ARM,
#'   groups_list = groups
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   summarize_vars("AGE") %>%
#'   build_table(DM, col_counts = col_counts)
#' }
#'
#' @keywords internal
combine_counts <- function(fct, groups_list = NULL) {
  checkmate::assert_multi_class(fct, classes = c("factor", "character"))

  fct <- as_factor_keep_attributes(fct)

  if (is.null(groups_list)) {
    y <- table(fct)
    y <- stats::setNames(as.numeric(y), nm = dimnames(y)[[1]])
  } else {
    y <- vapply(
      X = groups_list,
      FUN = function(x) sum(table(fct)[x]),
      FUN.VALUE = 1
    )
  }
  y
}

#' Helper Functions for Tabulating Biomarker Effects on Survival by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions which are documented here separately to not confuse the user
#' when reading about the user-facing functions.
#'
#' @inheritParams survival_biomarkers_subgroups
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = FALSE)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' @name h_survival_biomarkers_subgroups
NULL

#' @describeIn h_survival_biomarkers_subgroups helps with converting the "survival" function variable list
#'   to the "Cox regression" variable list. The reason is that currently there is an inconsistency between the variable
#'   names accepted by `extract_survival_subgroups()` and `fit_coxreg_multivar()`.
#'
#' @param biomarker (`string`)\cr the name of the biomarker variable.
#'
#' @return
#' * `h_surv_to_coxreg_variables()` returns a named `list` of elements `time`, `event`, `arm`,
#'   `covariates`, and `strata`.
#'
#' @examples
#' # This is how the variable list is converted internally.
#' h_surv_to_coxreg_variables(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "EVNT",
#'     covariates = c("A", "B"),
#'     strata = "D"
#'   ),
#'   biomarker = "AGE"
#' )
#'
#' @export
h_surv_to_coxreg_variables <- function(variables, biomarker) {
  checkmate::assert_list(variables)
  checkmate::assert_string(variables$tte)
  checkmate::assert_string(variables$is_event)
  checkmate::assert_string(biomarker)
  list(
    time = variables$tte,
    event = variables$is_event,
    arm = biomarker,
    covariates = variables$covariates,
    strata = variables$strata
  )
}

#' @describeIn h_survival_biomarkers_subgroups prepares estimates for number of events, patients and median survival
#'   times, as well as hazard ratio estimates, confidence intervals and p-values, for multiple biomarkers
#'   in a given single data set.
#'   `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
#'   `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables) and optionally `subgroups` and `strat`.
#'
#' @return
#' * `h_coxreg_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
#'
#' @examples
#' # For a single population, estimate separately the effects
#' # of two biomarkers.
#' df <- h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' # If the data set is empty, still the corresponding rows with missings are returned.
#' h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "REGION1",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f[NULL, ]
#' )
#'
#' @export
h_coxreg_mult_cont_df <- function(variables,
                                  data,
                                  control = control_coxreg()) {
  assert_df_with_variables(data, variables)
  checkmate::assert_list(control, names = "named")
  checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
  conf_level <- control[["conf_level"]]
  pval_label <- paste0(
    # the regex capitalizes the first letter of the string / senetence.
    "p-value (", gsub("(^[a-z])", "\\U\\1", trimws(control[["pval_method"]]), perl = TRUE), ")"
  )
  # If there is any data, run model, otherwise return empty results.
  if (nrow(data) > 0) {
    bm_cols <- match(variables$biomarkers, names(data))
    l_result <- lapply(variables$biomarkers, function(bm) {
      coxreg_list <- fit_coxreg_multivar(
        variables = h_surv_to_coxreg_variables(variables, bm),
        data = data,
        control = control
      )
      result <- do.call(
        h_coxreg_multivar_extract,
        c(list(var = bm), coxreg_list[c("mod", "data", "control")])
      )
      data_fit <- as.data.frame(as.matrix(coxreg_list$mod$y))
      data_fit$status <- as.logical(data_fit$status)
      median <- s_surv_time(
        df = data_fit,
        .var = "time",
        is_event = "status"
      )$median
      data.frame(
        # Dummy column needed downstream to create a nested header.
        biomarker = bm,
        biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
        n_tot = coxreg_list$mod$n,
        n_tot_events = coxreg_list$mod$nevent,
        median = as.numeric(median),
        result[1L, c("hr", "lcl", "ucl")],
        conf_level = conf_level,
        pval = result[1L, "pval"],
        pval_label = pval_label,
        stringsAsFactors = FALSE
      )
    })
    do.call(rbind, args = c(l_result, make.row.names = FALSE))
  } else {
    data.frame(
      biomarker = variables$biomarkers,
      biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
      n_tot = 0L,
      n_tot_events = 0L,
      median = NA,
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      pval = NA,
      pval_label = pval_label,
      row.names = seq_along(variables$biomarkers),
      stringsAsFactors = FALSE
    )
  }
}

#' @describeIn h_survival_biomarkers_subgroups prepares a single sub-table given a `df_sub` containing
#'   the results for a single biomarker.
#'
#' @param df (`data.frame`)\cr results for a single biomarker, as part of what is
#'   returned by [extract_survival_biomarkers()] (it needs a couple of columns which are
#'   added by that high-level function relative to what is returned by [h_coxreg_mult_cont_df()],
#'   see the example).
#'
#' @return
#' * `h_tab_surv_one_biomarker()` returns an `rtables` table object with the given statistics arranged in columns.
#'
#' @examples
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
#' df1 <- df[1, ]
#' df1$subgroup <- "All patients"
#' df1$row_type <- "content"
#' df1$var <- "ALL"
#' df1$var_label <- "All patients"
#' h_tab_surv_one_biomarker(
#'   df1,
#'   vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
#'   time_unit = "days"
#' )
#'
#' @export
h_tab_surv_one_biomarker <- function(df,
                                     vars,
                                     time_unit) {
  afuns <- a_survival_subgroups()[vars]
  colvars <- d_survival_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1],
    time_unit = time_unit
  )
  h_tab_one_biomarker(
    df = df,
    afuns = afuns,
    colvars = colvars
  )
}

#' Counting Patients and Events in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Counting the number of unique patients and the total number of all and specific events
#' when a column table layout is required.
#'
#' @inheritParams argument_convention
#'
#' @name count_patients_events_in_cols
NULL

#' @describeIn count_patients_events_in_cols Statistics function which counts numbers of patients and multiple
#'   events defined by filters. Used as analysis function `afun` in `summarize_patients_events_in_cols()`.
#'
#' @param filters_list (named `list` of `character`)\cr each element in this list describes one
#'   type of event describe by filters, in the same format as [s_count_patients_with_event()].
#'   If it has a label, then this will be used for the column title.
#' @param empty_stats (`character`)\cr optional names of the statistics that should be returned empty such
#'   that corresponding table cells will stay blank.
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will
#'   be used as label.
#'
#' @return
#' * `s_count_patients_and_multiple_events()` returns a list with the statistics:
#'   - `unique`: number of unique patients in `df`.
#'   - `all`: number of rows in `df`.
#'   - one element with the same name as in `filters_list`: number of rows in `df`,
#'     i.e. events, fulfilling the filter condition.
#'
#' @examples
#' # `s_count_patients_and_multiple_events()`
#' df <- data.frame(
#'   USUBJID = rep(c("id1", "id2", "id3", "id4"), c(2, 3, 1, 1)),
#'   ARM = c("A", "A", "B", "B", "B", "B", "A"),
#'   AESER = rep("Y", 7),
#'   AESDTH = c("Y", "Y", "N", "Y", "Y", "N", "N"),
#'   AEREL = c("Y", "Y", "N", "Y", "Y", "N", "Y"),
#'   AEDECOD = c("A", "A", "A", "B", "B", "C", "D"),
#'   AEBODSYS = rep(c("SOC1", "SOC2", "SOC3"), c(3, 3, 1))
#' )
#'
#' # Internal function - s_count_patients_and_multiple_events
#' \dontrun{
#' s_count_patients_and_multiple_events(
#'   df = df,
#'   id = "USUBJID",
#'   filters_list = list(
#'     serious = c(AESER = "Y"),
#'     fatal = c(AESDTH = "Y")
#'   )
#' )
#' }
#'
#' @keywords internal
s_count_patients_and_multiple_events <- function(df, # nolint
                                                 id,
                                                 filters_list,
                                                 empty_stats = character(),
                                                 labelstr = "",
                                                 custom_label = NULL) {
  checkmate::assert_list(filters_list, names = "named")
  checkmate::assert_data_frame(df)
  checkmate::assert_string(id)
  checkmate::assert_disjunct(c("unique", "all"), names(filters_list))
  checkmate::assert_character(empty_stats)
  checkmate::assert_string(labelstr)
  checkmate::assert_string(custom_label, null.ok = TRUE)

  # Below we want to count each row in `df` once, therefore introducing this helper index column.
  df$.row_index <- as.character(seq_len(nrow(df)))
  y <- list()
  row_label <- if (labelstr != "") {
    labelstr
  } else if (!is.null(custom_label)) {
    custom_label
  } else {
    "counts"
  }
  y$unique <- formatters::with_label(
    s_num_patients_content(df = df, .N_col = 1, .var = id, required = NULL)$unique[1L],
    row_label
  )
  y$all <- formatters::with_label(
    nrow(df),
    row_label
  )
  events <- Map(
    function(filters) {
      formatters::with_label(
        s_count_patients_with_event(df = df, .var = ".row_index", filters = filters, .N_col = 1, .N_row = 1)$count,
        row_label
      )
    },
    filters = filters_list
  )
  y_complete <- c(y, events)
  y <- if (length(empty_stats) > 0) {
    y_reduced <- y_complete
    for (stat in intersect(names(y_complete), empty_stats)) {
      y_reduced[[stat]] <- formatters::with_label(character(), obj_label(y_reduced[[stat]]))
    }
    y_reduced
  } else {
    y_complete
  }
  y
}

#' @describeIn count_patients_events_in_cols Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @param col_split (`flag`)\cr whether the columns should be split.
#'   Set to `FALSE` when the required column split has been done already earlier in the layout pipe.
#'
#' @return
#' * `summarize_patients_events_in_cols()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_patients_and_multiple_events()` to the table layout.
#' @examples
#' # `summarize_patients_events_in_cols()`
#' basic_table() %>%
#'   summarize_patients_events_in_cols(
#'     filters_list = list(
#'       related = formatters::with_label(c(AEREL = "Y"), "Events (Related)"),
#'       fatal = c(AESDTH = "Y"),
#'       fatal_related = c(AEREL = "Y", AESDTH = "Y")
#'     ),
#'     custom_label = "%s Total number of patients and events"
#'   ) %>%
#'   build_table(df)
#'
#' @export
summarize_patients_events_in_cols <- function(lyt, # nolint
                                              id = "USUBJID",
                                              filters_list = list(),
                                              ...,
                                              .stats = c(
                                                "unique",
                                                "all",
                                                names(filters_list)
                                              ),
                                              .labels = c(
                                                unique = "Patients (All)",
                                                all = "Events (All)",
                                                labels_or_names(filters_list)
                                              ),
                                              col_split = TRUE) {
  afun_list <- Map(
    function(stat) {
      make_afun(
        s_count_patients_and_multiple_events,
        id = id,
        filters_list = filters_list,
        .stats = stat,
        .formats = "xx."
      )
    },
    stat = .stats
  )
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(id, length(.stats)),
      varlabels = .labels[.stats]
    )
  }
  summarize_row_groups(
    lyt = lyt,
    cfun = afun_list,
    extra_args = list(...)
  )
}

#' Tabulate Biomarker Effects on Survival by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate the estimated effects of multiple continuous biomarker variables
#' across population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#' @inheritParams survival_duration_subgroups
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. The tables are then typically used as input for forest plots.
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' @name survival_biomarkers_subgroups
NULL

#' Prepares Survival Data Estimates for Multiple Biomarkers in a Single Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of events, patients and median survival times, as well as hazard ratio estimates,
#' confidence intervals and p-values, for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a named `list` and requires elements
#' `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables), and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#' @inheritParams survival_duration_subgroups
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_tot_events`,
#'   `median`, `hr`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#'   `var_label`, and `row_type`.
#'
#' @seealso [h_coxreg_mult_cont_df()] which is used internally, [tabulate_survival_biomarkers()].
#'
#' @examples
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in multiple regression models containing one covariate `RACE`,
#' # as well as one stratification variable `STRATA1`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#'
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually.
#' df_grouped <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @export
extract_survival_biomarkers <- function(variables,
                                        data,
                                        groups_lists = list(),
                                        control = control_coxreg(),
                                        label_all = "All Patients") {
  checkmate::assert_list(variables)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_string(label_all)

  # Start with all patients.
  result_all <- h_coxreg_mult_cont_df(
    variables = variables,
    data = data,
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"
  if (is.null(variables$subgroups)) {
    # Only return result for all patients.
    result_all
  } else {
    # Add subgroups results.
    l_data <- h_split_by_subgroups(
      data,
      variables$subgroups,
      groups_lists = groups_lists
    )
    l_result <- lapply(l_data, function(grp) {
      result <- h_coxreg_mult_cont_df(
        variables = variables,
        data = grp$df,
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn survival_biomarkers_subgroups Table-creating function which creates a table
#'   summarizing biomarker effects on survival by subgroup.
#'
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#'   [extract_survival_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot_events`: Total number of events per group.
#'   * `n_tot`: Total number of observations per group.
#'   * `median`: Median survival time.
#'   * `hr`: Hazard ratio.
#'   * `ci`: Confidence interval of hazard ratio.
#'   * `pval`: p-value of the effect.
#'   Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci` are required.
#'
#' @return An `rtables` table summarizing biomarker effects on survival by subgroup.
#'
#' @note In contrast to [tabulate_survival_subgroups()] this tabulation function does
#'   not start from an input layout `lyt`. This is because internally the table is
#'   created by combining multiple subtables.
#'
#' @seealso [h_tab_surv_one_biomarker()] which is used internally, [extract_survival_biomarkers()].
#'
#' @examples
#' ## Table with default columns.
#' tabulate_survival_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_survival_biomarkers(
#'   df = df,
#'   vars = c("n_tot_events", "ci", "n_tot", "median", "hr"),
#'   time_unit = as.character(adtte_f$AVALU[1])
#' )
#'
#' ## Finally produce the forest plot.
#' \dontrun{
#' g_forest(tab, xlim = c(0.8, 1.2))
#' }
#'
#' @export
tabulate_survival_biomarkers <- function(df,
                                         vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
                                         time_unit = NULL) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(df$biomarker)
  checkmate::assert_character(df$biomarker_label)
  checkmate::assert_subset(vars, c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"))

  df_subs <- split(df, f = df$biomarker)
  tabs <- lapply(df_subs, FUN = function(df_sub) {
    tab_sub <- h_tab_surv_one_biomarker(
      df = df_sub,
      vars = vars,
      time_unit = time_unit
    )
    # Insert label row as first row in table.
    label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
    tab_sub
  })
  result <- do.call(rbind, tabs)

  n_tot_ids <- grep("^n_tot", vars)
  hr_id <- match("hr", vars)
  ci_id <- match("ci", vars)
  structure(
    result,
    forest_header = paste0(c("Higher", "Lower"), "\nBetter"),
    col_x = hr_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_ids[1]
  )
}

#' Helper Function to create a map dataframe that can be used in `trim_levels_to_map` split function.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper Function to create a map dataframe from the input dataset, which can be used as an argument in the
#' `trim_levels_to_map` split function. Based on different method, the map is constructed differently.
#'
#' @inheritParams argument_convention
#' @param abnormal (named `list`)\cr identifying the abnormal range level(s) in `df`. Based on the levels of
#'   abnormality of the input dataset, it can be something like `list(Low = "LOW LOW", High = "HIGH HIGH")` or
#'   `abnormal = list(Low = "LOW", High = "HIGH"))`
#' @param method (`string`)\cr indicates how the returned map will be constructed. Can be `"default"` or `"range"`.
#'
#' @return A map `data.frame`.
#'
#' @note If method is `"default"`, the returned map will only have the abnormal directions that are observed in the
#'   `df`, and records with all normal values will be excluded to avoid error in creating layout. If method is
#'   `"range"`, the returned map will be based on the rule that at least one observation with low range > 0
#'   for low direction and at least one observation with high range is not missing for high direction.
#'
#' @examples
#' adlb <- df_explicit_na(tern_ex_adlb)
#'
#' h_map_for_count_abnormal(
#'   df = adlb,
#'   variables = list(anl = "ANRIND", split_rows = c("LBCAT", "PARAM")),
#'   abnormal = list(low = c("LOW"), high = c("HIGH")),
#'   method = "default",
#'   na_level = "<Missing>"
#' )
#'
#' df <- data.frame(
#'   USUBJID = c(rep("1", 4), rep("2", 4), rep("3", 4)),
#'   AVISIT = c(
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2),
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2),
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2)
#'   ),
#'   PARAM = rep(c("ALT", "CPR"), 6),
#'   ANRIND = c(
#'     "NORMAL", "NORMAL", "LOW",
#'     "HIGH", "LOW", "LOW", "HIGH", "HIGH", rep("NORMAL", 4)
#'   ),
#'   ANRLO = rep(5, 12),
#'   ANRHI = rep(20, 12)
#' )
#' df$ANRIND <- factor(df$ANRIND, levels = c("LOW", "HIGH", "NORMAL"))
#' h_map_for_count_abnormal(
#'   df = df,
#'   variables = list(
#'     anl = "ANRIND",
#'     split_rows = c("PARAM"),
#'     range_low = "ANRLO",
#'     range_high = "ANRHI"
#'   ),
#'   abnormal = list(low = c("LOW"), high = c("HIGH")),
#'   method = "range",
#'   na_level = "<Missing>"
#' )
#'
#' @export
h_map_for_count_abnormal <- function(df,
                                     variables = list(
                                       anl = "ANRIND",
                                       split_rows = c("PARAM"),
                                       range_low = "ANRLO",
                                       range_high = "ANRHI"
                                     ),
                                     abnormal = list(low = c("LOW", "LOW LOW"), high = c("HIGH", "HIGH HIGH")),
                                     method = c("default", "range"),
                                     na_level = "<Missing>") {
  method <- match.arg(method)
  checkmate::assert_subset(c("anl", "split_rows"), names(variables))
  checkmate::assert_false(anyNA(df[variables$split_rows]))
  assert_df_with_variables(df,
    variables = list(anl = variables$anl, split_rows = variables$split_rows),
    na_level = na_level
  )
  assert_df_with_factors(df, list(val = variables$anl))
  assert_valid_factor(df[[variables$anl]], any.missing = FALSE)
  assert_list_of_variables(variables)
  checkmate::assert_list(abnormal, types = "character", len = 2)

  # Drop usued levels from df as they are not supposed to be in the final map
  df <- droplevels(df)

  normal_value <- setdiff(levels(df[[variables$anl]]), unlist(abnormal))

  # Based on the understanding of clinical data, there should only be one level of normal which is "NORMAL"
  checkmate::assert_vector(normal_value, len = 1)

  # Default method will only have what is observed in the df, and records with all normal values will be excluded to
  # avoid error in layout building.
  if (method == "default") {
    df_abnormal <- subset(df, df[[variables$anl]] %in% unlist(abnormal))
    map <- unique(df_abnormal[c(variables$split_rows, variables$anl)])
    map_normal <- unique(subset(map, select = variables$split_rows))
    map_normal[[variables$anl]] <- normal_value
    map <- rbind(map, map_normal)
  } else if (method == "range") {
    # range method follows the rule that at least one observation with ANRLO > 0 for low
    # direction and at least one observation with ANRHI is not missing for high direction.
    checkmate::assert_subset(c("range_low", "range_high"), names(variables))
    checkmate::assert_subset(c("LOW", "HIGH"), toupper(names(abnormal)))

    assert_df_with_variables(df,
      variables = list(
        range_low = variables$range_low,
        range_high = variables$range_high
      )
    )

    # Define low direction of map
    df_low <- subset(df, df[[variables$range_low]] > 0)
    map_low <- unique(df_low[variables$split_rows])
    low_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "LOW"]))
    low_levels_df <- as.data.frame(low_levels)
    colnames(low_levels_df) <- variables$anl
    low_levels_df <- do.call("rbind", replicate(nrow(map_low), low_levels_df, simplify = FALSE))
    rownames(map_low) <- NULL # Just to avoid strange row index in case upstream functions changed
    map_low <- map_low[rep(seq_len(nrow(map_low)), each = length(low_levels)), , drop = FALSE]
    map_low <- cbind(map_low, low_levels_df)

    # Define high direction of map
    df_high <- subset(df, df[[variables$range_high]] != na_level | !is.na(df[[variables$range_high]]))
    map_high <- unique(df_high[variables$split_rows])
    high_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "HIGH"]))
    high_levels_df <- as.data.frame(high_levels)
    colnames(high_levels_df) <- variables$anl
    high_levels_df <- do.call("rbind", replicate(nrow(map_high), high_levels_df, simplify = FALSE))
    rownames(map_high) <- NULL
    map_high <- map_high[rep(seq_len(nrow(map_high)), each = length(high_levels)), , drop = FALSE]
    map_high <- cbind(map_high, high_levels_df)

    # Define normal of map
    map_normal <- unique(rbind(map_low, map_high)[variables$split_rows])
    map_normal[variables$anl] <- normal_value

    map <- rbind(map_low, map_high, map_normal)
  }

  # map should be all characters
  map <- data.frame(lapply(map, as.character), stringsAsFactors = FALSE)

  # sort the map final output by split_rows variables
  for (i in rev(seq_len(length(variables$split_rows)))) {
    map <- map[order(map[[i]]), ]
  }
  map
}

#' Helper Function for Tabulation of a Single Biomarker Result
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Please see [h_tab_surv_one_biomarker()] and [h_tab_rsp_one_biomarker()], which use this function for examples.
#' This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @param df (`data.frame`)\cr results for a single biomarker.
#' @param afuns (named `list` of `function`)\cr analysis functions.
#' @param colvars (`list` with `vars` and `labels`)\cr variables to tabulate and their labels.
#'
#' @return An `rtables` table object with statistics in columns.
#'
#' @export
h_tab_one_biomarker <- function(df,
                                afuns,
                                colvars) {
  lyt <- basic_table()

  # Row split by row type - only keep the content rows here.
  lyt <- split_rows_by(
    lyt = lyt,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )

  # Summarize rows with all patients.
  lyt <- summarize_row_groups(
    lyt = lyt,
    var = "var_label",
    cfun = afuns
  )

  # Split cols by the multiple variables to populate into columns.
  lyt <- split_cols_by_multivar(
    lyt = lyt,
    vars = colvars$vars,
    varlabels = colvars$labels
  )

  # If there is any subgroup variables, we extend the layout accordingly.
  if ("analysis" %in% df$row_type) {
    # Now only continue with the subgroup rows.
    lyt <- split_rows_by(
      lyt = lyt,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )

    # Split by the subgroup variable.
    lyt <- split_rows_by(
      lyt = lyt,
      var = "var",
      labels_var = "var_label",
      nested = TRUE,
      child_labels = "visible"
    )

    # Then analyze colvars for each subgroup.
    lyt <- summarize_row_groups(
      lyt = lyt,
      cfun = afuns,
      var = "subgroup"
    )
  }
  build_table(lyt, df = df)
}

#' Control Function for Logistic Regression Model Fitting
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for logistic regression models.
#' `conf_level` refers to the confidence level used for the Odds Ratio CIs.
#'
#' @inheritParams argument_convention
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
#'   This will be used when fitting the logistic regression model on the left hand side of the formula.
#'   Note that the evaluated expression should result in either a logical vector or a factor with 2
#'   levels. By default this is just `"response"` such that the original response variable is used
#'   and not modified further.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @examples
#' # Standard options.
#' control_logistic()
#'
#' # Modify confidence level.
#' control_logistic(conf_level = 0.9)
#'
#' # Use a different response definition.
#' control_logistic(response_definition = "I(response %in% c('CR', 'PR'))")
#'
#' @export
control_logistic <- function(response_definition = "response",
                             conf_level = 0.95) {
  checkmate::assert_true(grepl("response", response_definition))
  checkmate::assert_string(response_definition)
  assert_proportion_value(conf_level)
  list(
    response_definition = response_definition,
    conf_level = conf_level
  )
}

#' Subgroup Treatment Effect Pattern (STEP) Fit for Survival Outcome
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This fits the Subgroup Treatment Effect Pattern models for a survival outcome. The treatment arm
#' variable must have exactly 2 levels, where the first one is taken as reference and the estimated
#' hazard ratios are for the comparison of the second level vs. the first one.
#'
#' The model which is fit is:
#'
#' `Surv(time, event) ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
#'
#' where `degree` is specified by `control_step()`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `character`)\cr list of analysis variables: needs `time`, `event`,
#'   `arm`, `biomarker`, and optional `covariates` and `strata`.
#' @param control (named `list`)\cr combined control list from [control_step()] and [control_coxph()].
#'
#' @return A matrix of class `step`. The first part of the columns describe the subgroup intervals used
#'   for the biomarker variable, including where the center of the intervals are and their bounds. The
#'   second part of the columns contain the estimates for the treatment arm comparison.
#'
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
#'
#' @seealso [control_step()] and [control_coxph()] for the available customization options.
#'
#' @examples
#' # Testing dataset with just two treatment arms.
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("ARM" = "Treatment Arm", "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' variables <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = c("AGE", "BMRKR2"),
#'   event = "is_event",
#'   time = "AVAL"
#' )
#'
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
#' step_matrix <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f
#' )
#' dim(step_matrix)
#' head(step_matrix)
#'
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
#' # models. Or specify different Cox regression options.
#' step_matrix2 <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f,
#'   control = c(control_coxph(conf_level = 0.9), control_step(degree = 2))
#' )
#'
#' # Use a global model with cubic interaction and only 5 points.
#' step_matrix3 <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f,
#'   control = c(control_coxph(), control_step(bandwidth = NULL, degree = 3, num_points = 5L))
#' )
#'
#' @export
fit_survival_step <- function(variables,
                              data,
                              control = c(control_step(), control_coxph())) {
  checkmate::assert_list(control)
  assert_df_with_variables(data, variables)
  data <- data[!is.na(data[[variables$biomarker]]), ]
  window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
  interval_center <- window_sel$interval[, "Interval Center"]
  form <- h_step_survival_formula(variables = variables, control = control)
  estimates <- if (is.null(control$bandwidth)) {
    h_step_survival_est(
      formula = form,
      data = data,
      variables = variables,
      x = interval_center,
      control = control
    )
  } else {
    tmp <- mapply(
      FUN = h_step_survival_est,
      x = interval_center,
      subset = as.list(as.data.frame(window_sel$sel)),
      MoreArgs = list(
        formula = form,
        data = data,
        variables = variables,
        control = control
      )
    )
    # Maybe we find a more elegant solution than this.
    rownames(tmp) <- c("n", "events", "loghr", "se", "ci_lower", "ci_upper")
    t(tmp)
  }
  result <- cbind(window_sel$interval, estimates)
  structure(
    result,
    class = c("step", "matrix"),
    variables = variables,
    control = control
  )
}

#' Summarize Variables in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This analyze function uses the S3 generic function [s_summary()] to summarize different variables
#' that are arranged in columns. Additional standard formatting arguments are available. It is a
#' minimal wrapper for [rtables::analyze_colvars()].
#'
#' @inheritParams argument_convention
#' @param ... arguments passed to `s_summary()`.
#'
#' @return
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
#' in columns, and add it to the table layout.
#'
#' @seealso [rtables::split_cols_by_multivar()] and [analyze_colvars_functions].
#'
#' @examples
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT = rep(paste0("V", 1:3), 6),
#'   ARM = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL = c(9:1, rep(NA, 9)),
#'   CHG = c(1:9, rep(NA, 9))
#' )
#'
#' ## Default output within a `rtables` pipeline.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars() %>%
#'   build_table(dta_test)
#'
#' ## Selection of statistics, formats and labels also work.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars(
#'     .stats = c("n", "mean_sd"),
#'     .formats = c("mean_sd" = "xx.x, xx.x"),
#'     .labels = c(n = "n", mean_sd = "Mean, SD")
#'   ) %>%
#'   build_table(dta_test)
#'
#' ## Use arguments interpreted by `s_summary`.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars(na.rm = FALSE) %>%
#'   build_table(dta_test)
#'
#' @export
summarize_colvars <- function(lyt,
                              ...,
                              .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods)

  analyze_colvars(
    lyt,
    afun = afun,
    extra_args = list(...)
  )
}

#' Number of Patients
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Count the number of unique and non-unique patients in a column (variable).
#'
#' @inheritParams argument_convention
#' @param x (`character` or `factor`)\cr vector of patient IDs.
#' @param count_by (`character` or `factor`)\cr optional vector to be combined with `x` when counting
#'   `nonunique` records.
#' @param unique_count_suffix (`logical`)\cr should `"(n)"` suffix be added to `unique_count` labels.
#'   Defaults to `TRUE`.
#'
#' @name summarize_num_patients
NULL

#' @describeIn summarize_num_patients Statistics function which counts the number of
#'   unique patients, the corresponding percentage taken with respect to the
#'   total number of patients, and the number of non-unique patients.
#'
#' @return
#' * `s_num_patients()` returns a named `list` of 3 statistics:
#'   * `unique`: Vector of counts and percentages.
#'   * `nonunique`: Vector of counts.
#'   * `unique_count`: Counts.
#'
#' @examples
#' # Use the statistics function to count number of unique and nonunique patients.
#' s_num_patients(x = as.character(c(1, 1, 1, 2, 4, NA)), labelstr = "", .N_col = 6L)
#' s_num_patients(
#'   x = as.character(c(1, 1, 1, 2, 4, NA)),
#'   labelstr = "",
#'   .N_col = 6L,
#'   count_by = as.character(c(1, 1, 2, 1, 1, 1))
#' )
#'
#' @export
s_num_patients <- function(x, labelstr, .N_col, count_by = NULL, unique_count_suffix = TRUE) { # nolint

  checkmate::assert_string(labelstr)
  checkmate::assert_count(.N_col)
  checkmate::assert_multi_class(x, classes = c("factor", "character"))
  checkmate::assert_flag(unique_count_suffix)

  count1 <- n_available(unique(x))
  count2 <- n_available(x)

  if (!is.null(count_by)) {
    checkmate::assert_vector(count_by, len = length(x))
    checkmate::assert_multi_class(count_by, classes = c("factor", "character"))
    count2 <- n_available(unique(interaction(x, count_by)))
  }

  out <- list(
    unique = formatters::with_label(c(count1, ifelse(count1 == 0 && .N_col == 0, 0, count1 / .N_col)), labelstr),
    nonunique = formatters::with_label(count2, labelstr),
    unique_count = formatters::with_label(count1, ifelse(unique_count_suffix, paste(labelstr, "(n)"), labelstr))
  )

  out
}

#' @describeIn summarize_num_patients Statistics function which counts the number of unique patients
#'   in a column (variable), the corresponding percentage taken with respect to the total number of
#'   patients, and the number of non-unique patients in the column.
#'
#' @param required (`character` or `NULL`)\cr optional name of a variable that is required to be non-missing.
#'
#' @return
#' * `s_num_patients_content()` returns the same values as `s_num_patients()`.
#'
#' @examples
#' # Count number of unique and non-unique patients.
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA)),
#'   EVENT = as.character(c(10, 15, 10, 17, 8))
#' )
#' s_num_patients_content(df, .N_col = 5, .var = "USUBJID")
#'
#' df_by_event <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA)),
#'   EVENT = as.character(c(10, 15, 10, 17, 8))
#' )
#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID")
#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID", count_by = "EVENT")
#'
#' @export
s_num_patients_content <- function(df,
                                   labelstr = "",
                                   .N_col, # nolint
                                   .var,
                                   required = NULL,
                                   count_by = NULL,
                                   unique_count_suffix = TRUE) {
  checkmate::assert_string(.var)
  checkmate::assert_data_frame(df)
  if (is.null(count_by)) {
    assert_df_with_variables(df, list(id = .var))
  } else {
    assert_df_with_variables(df, list(id = .var, count_by = count_by))
  }
  if (!is.null(required)) {
    checkmate::assert_string(required)
    assert_df_with_variables(df, list(required = required))
    df <- df[!is.na(df[[required]]), , drop = FALSE]
  }

  x <- df[[.var]]
  y <- switch(as.numeric(!is.null(count_by)) + 1,
    NULL,
    df[[count_by]]
  )

  s_num_patients(
    x = x,
    labelstr = labelstr,
    .N_col = .N_col,
    count_by = y,
    unique_count_suffix = unique_count_suffix
  )
}

c_num_patients <- make_afun(
  s_num_patients_content,
  .stats = c("unique", "nonunique", "unique_count"),
  .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx")
)

#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_num_patients()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_num_patients_content()` to the table layout.
#'
#' @export
summarize_num_patients <- function(lyt,
                                   var,
                                   .stats = NULL,
                                   .formats = NULL,
                                   .labels = c(
                                     unique = "Number of patients with at least one event",
                                     nonunique = "Number of events"
                                   ),
                                   indent_mod = 0L,
                                   ...) {
  if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
  if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]

  cfun <- make_afun(
    c_num_patients,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels
  )

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = cfun,
    extra_args = list(...),
    indent_mod = indent_mod
  )
}

#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `analyze_num_patients()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_num_patients_content()` to the table layout.
#'
#' @details In general, functions that starts with `analyze*` are expected to
#'   work like [rtables::analyze()], while functions that starts with `summarize*`
#'   are based upon [rtables::summarize_row_groups()]. The latter provides a
#'   value for each dividing split in the row and column space, but, being it
#'   bound to the fundamental splits, it is repeated by design in every page
#'   when pagination is involved.
#'
#' @note As opposed to [summarize_num_patients()], this function does not repeat the produced rows.
#'
#' @examples
#' df_tmp <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA, 6, 6, 8, 9)),
#'   ARM = c("A", "A", "A", "A", "A", "B", "B", "B", "B"),
#'   AGE = c(10, 15, 10, 17, 8, 11, 11, 19, 17)
#' )
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   analyze_num_patients("USUBJID", .stats = c("unique")) %>%
#'   build_table(df_tmp)
#' tbl
#'
#' @export
analyze_num_patients <- function(lyt,
                                 vars,
                                 .stats = NULL,
                                 .formats = NULL,
                                 .labels = c(
                                   unique = "Number of patients with at least one event",
                                   nonunique = "Number of events"
                                 ),
                                 show_labels = c("default", "visible", "hidden"),
                                 indent_mod = 0L,
                                 ...) {
  if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
  if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]

  afun <- make_afun(
    c_num_patients,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels
  )

  analyze(
    afun = afun,
    lyt = lyt,
    vars = vars,
    extra_args = list(...),
    show_labels = show_labels,
    indent_mod = indent_mod
  )
}

#' Generate PK reference dataset
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @return `data.frame` of PK parameters
#'
#' @examples
#' pk_reference_dataset <- d_pkparam()
#'
#' @export
d_pkparam <- function() {
  pk_dataset <- as.data.frame(matrix(
    c(
      "TMAX", "Time of CMAX", "Tmax", "Plasma/Blood/Serum", "1",
      "CMAX", "Max Conc", "Cmax", "Plasma/Blood/Serum", "2",
      "CMAXD", "Max Conc Norm by Dose", "Cmax/D", "Plasma/Blood/Serum", "3",
      "AUCIFO", "AUC Infinity Obs", "AUCinf obs", "Plasma/Blood/Serum", "4",
      "AUCIFP", "AUC Infinity Pred", "AUCinf pred", "Plasma/Blood/Serum", "5",
      "AUCIFOD", "AUC Infinity Obs Norm by Dose", "AUCinf/D obs", "Plasma/Blood/Serum", "6",
      "AUCIFD", "AUC Infinity Pred Norm by Dose", "AUCinf/D pred", "Plasma/Blood/Serum", "7",
      "AUCPEO", "AUC %Extrapolation Obs", "AUCinf extrap obs", "Plasma/Blood/Serum", "8",
      "AUCPEP", "AUC %Extrapolation Pred", "AUCinf extrap pred", "Plasma/Blood/Serum", "9",
      "AUCINT", "AUC from T1 to T2", "AUCupper-lower ", "Plasma/Blood/Serum", "10",
      "AUCTAU", "AUC Over Dosing Interval", "AUCtau", "Plasma/Blood/Serum", "11",
      "AUCLST", "AUC to Last Nonzero Conc", "AUClast", "Plasma/Blood/Serum", "12",
      "AUCALL", "AUC All", "AUCall", "Plasma/Blood/Serum", "13",
      "AUMCIFO", "AUMC Infinity Obs", "AUMCinf obs", "Plasma/Blood/Serum", "14",
      "AUMCIFP", "AUMC Infinity Pred", "AUMCinf pred", "Plasma/Blood/Serum", "15",
      "AUMCPEO", "AUMC % Extrapolation Obs", "AUMC extrap obs", "Plasma/Blood/Serum", "16",
      "AUMCPEP", "AUMC % Extrapolation Pred", "AUMC extrap pred", "Plasma/Blood/Serum", "17",
      "AUMCTAU", "AUMC Over Dosing Interval", "AUMCtau", "Plasma/Blood/Serum", "18",
      "AUMCLST", "AUMC to Last Nonzero Conc", "AUMClast", "Plasma/Blood/Serum", "19",
      "AURCIFO", "AURC Infinity Obs", "AURCinf obs", "Plasma/Blood/Serum", "20",
      "AURCIFP", "AURC Infinity Pred", "AURCinf pred", "Plasma/Blood/Serum", "21",
      "AURCPEO", "AURC % Extrapolation Obs", "AURC extrap obs", "Plasma/Blood/Serum", "22",
      "AURCPEP", "AURC % Extrapolation Pred", "AURC extrap pred", "Plasma/Blood/Serum", "23",
      "AURCLST", "AURC Dosing to Last Conc", "AURClast", "Plasma/Blood/Serum", "24",
      "AURCALL", "AURC All", "AURCall", "Plasma/Blood/Serum", "25",
      "TLST", "Time of Last Nonzero Conc", "Tlast", "Plasma/Blood/Serum", "26",
      "CO", "Initial Conc", "CO", "Plasma/Blood/Serum", "27",
      "C0", "Initial Conc", "C0", "Plasma/Blood/Serum", "28",
      "CAVG", "Average Conc", "Cavg", "Plasma/Blood/Serum", "29",
      "CLST", "Last Nonzero Conc", "Clast", "Plasma/Blood/Serum", "30",
      "CMIN", "Min Conc", "Cmin", "Plasma/Blood/Serum", "31",
      "LAMZHL", "Half-Life Lambda z", "t1/2", "Plasma/Blood/Serum", "32",
      "CLFO", "Total CL Obs by F", "CL/F obs", "Plasma/Blood/Serum", "33",
      "CLFP", "Total CL Pred by F", "CL/F pred", "Plasma/Blood/Serum", "34",
      "CLO", "Total CL Obs", "CL obs", "Plasma/Blood/Serum", "35",
      "CLP", "Total CL Pred", "CL pred", "Plasma/Blood/Serum", "36",
      "CLSS", "Total CL Steady State Pred", "CLss", "Plasma/Blood/Serum", "37",
      "CLSSF", "Total CL Steady State Pred by F", "CLss/F", "Plasma/Blood/Serum", "38",
      "VZFO", "Vz Obs by F", "Vz/F obs", "Plasma/Blood/Serum", "39",
      "VZFP", "Vz Pred by F", "Vz/F pred", "Plasma/Blood/Serum", "40",
      "VZO", "Vz Obs", "Vz obs", "Plasma/Blood/Serum", "41",
      "VZP", "Vz Pred", "Vz pred", "Plasma/Blood/Serum", "42",
      "VSSO", "Vol Dist Steady State Obs", "Vss obs", "Plasma/Blood/Serum", "43",
      "VSSP", "Vol Dist Steady State Pred", "Vss pred", "Plasma/Blood/Serum", "44",
      "LAMZ", "Lambda z", "Lambda z", "Plasma/Blood/Serum", "45",
      "LAMZLL", "Lambda z Lower Limit", "Lambda z lower", "Plasma/Blood/Serum", "46",
      "LAMZUL", "Lambda z Upper Limit", "Lambda z upper", "Plasma/Blood/Serum", "47",
      "LAMZNPT", "Number of Points for Lambda z", "No points Lambda z", "Plasma/Blood/Serum", "48",
      "MRTIFO", "MRT Infinity Obs", "MRTinf obs", "Plasma/Blood/Serum", "49",
      "MRTIFP", "MRT Infinity Pred", "MRTinf pred", "Plasma/Blood/Serum", "50",
      "MRTLST", "MRT to Last Nonzero Conc", "MRTlast", "Plasma/Blood/Serum", "51",
      "R2", "R Squared", "Rsq", "Plasma/Blood/Serum", "52",
      "R2ADJ", "R Squared Adjusted", "Rsq adjusted", "Plasma/Blood/Serum", "53",
      "TLAG", "Time Until First Nonzero Conc", "TIag", "Plasma/Blood/Serum", "54",
      "TMIN", "Time of CMIN Observation", "Tmin", "Plasma/Blood/Serum", "55",
      "ACCI", "Accumulation Index", "Accumulation Index", "Plasma/Blood/Serum/Urine", "56",
      "FLUCP", "Fluctuation%", "Fluctuation", "Plasma/Blood/Serum", "57",
      "CORRXY", "Correlation Between TimeX and Log ConcY", "Corr xy", "Plasma/Blood/Serum", "58",
      "RCAMINT", "Amt Rec from T1 to T2", "Ae", "Urine", "59",
      "RCPCINT", "Pct Rec from T1 to T2", "Fe", "Urine", "60",
      "VOLPK", "Sum of Urine Vol", "Urine volume", "Urine", "61",
      "RENALCL", "Renal CL", "CLR", "Plasma/Blood/Serum/Urine", "62",
      "ERTMAX", "Time of Max Excretion Rate", "Tmax Rate", "Urine", "63",
      "RMAX", "Time of Maximum Response", "Rmax", "Matrix of PD", "64",
      "RMIN", "Time of Minimum Response", "Rmin", "Matrix of PD", "65",
      "ERMAX", "Max Excretion Rate", "Max excretion rate", "Urine", "66",
      "MIDPTLST", "Midpoint of Collection Interval", "Midpoint last", "Urine", "67",
      "ERLST", "Last Meas Excretion Rate", "Rate last", "Urine", "68",
      "TON", "Time to Onset", "Tonset", "Matrix of PD", "69",
      "TOFF", "Time to Offset", "Toffset", "Matrix of PD", "70",
      "TBBLP", "Time Below Baseline %", "Time %Below Baseline", "Matrix of PD", "71",
      "TBTP", "Time Below Threshold %", "Time %Below Threshold", "Matrix of PD", "72",
      "TABL", "Time Above Baseline", "Time Above Baseline", "Matrix of PD", "73",
      "TAT", "Time Above Threshold", "Time Above Threshold", "Matrix of PD", "74",
      "TBT", "Time Below Threshold", "Time Below Threshold", "Matrix of PD", "75",
      "TBLT", "Time Between Baseline and Threshold", "Time Between Baseline Threshold", "Matrix of PD", "76",
      "BLRSP", "Baseline Response", "Baseline", "Matrix of PD", "77",
      "TSHDRSP", "Response Threshold", "Threshold", "Matrix of PD", "78",
      "AUCABL", "AUC Above Baseline", "AUC above baseline", "Matrix of PD", "79",
      "AUCAT", "AUC Above Threshold", "AUC above threshold", "Matrix of PD", "80",
      "AUCBBL", "AUC Below Baseline", "AUC below baseline", "Matrix of PD", "81",
      "AUCBT", "AUC Below Threshold", "AUC below threshold", "Matrix of PD", "82",
      "AUCBLDIF", "Diff AUC Above Base and AUC Below Base", "AUC diff baseline", "Matrix of PD", "83",
      "AUCTDIF", "Diff AUC Above Thr and AUC Below Thr", "AUCnet threshold", "Matrix of PD", "84",
      "TDIFF", "Diff Time to Offset and Time to Onset", "Diff toffset-tonset", "Matrix of PD", "85",
      "AUCPBEO", "AUC %Back Extrapolation Obs", "AUC%Back extrap obs", "Plasma/Blood/Serum", "86",
      "AUCPBEP", "AUC %Back Extrapolation Pred", "AUC%Back extrap pred", "Plasma/Blood/Serum", "87",
      "TSLP1L", "Lower Time Limit Slope 1st", "Slope1 lower", "Matrix of PD", "88",
      "TSLP1U", "Upper Time Limit Slope 1st Segment", "Slope1 upper", "Matrix of PD", "89",
      "TSLP2L", "Lower Time Limit Slope 2nd Segment", "Slope2 lower", "Matrix of PD", "90",
      "TSLP2U", "Upper Time Limit Slope 2nd Segment", "Slope2 upper", "Matrix of PD", "91",
      "SLP1", "Slope, 1st Segment", "Slope1", "Matrix of PD", "92",
      "SLP2", "Slope, 2nd Segment", "Slope2", "Matrix of PD", "93",
      "SLP1PT", "Number of Points for Slope 1st Segment", "No points slope1", "Matrix of PD", "94",
      "SLP2PT", "Number of Points for Slope 2nd Segment", "No points slope2", "Matrix of PD", "95",
      "R2ADJS1", "R-Squared Adjusted Slope, 1st Segment", "Rsq adjusted slope1", "Matrix of PD", "96",
      "R2ADJS2", "R-Squared Adjusted Slope, 2nd Segment", "Rsq adjusted slope2", "Matrix of PD", "97",
      "R2SLP1", "R Squared, Slope, 1st Segment", "Rsq slope1", "Matrix of PD", "98",
      "R2SLP2", "R Squared, Slope, 2nd Segment", "Rsq slope2", "Matrix of PD", "99",
      "CORRXYS1", "Corr Btw TimeX and Log ConcY, Slope 1st", "Corr xy slope1", "Plasma/Blood/Serum", "100",
      "CORRXYS2", "Corr Btw TimeX and Log ConcY, Slope 1st Slope 2nd", "Corr xy slope2", "Plasma/Blood/Serum", "101",
      "AILAMZ", "Accumulation Index using Lambda z", "AILAMZ", "Plasma/Blood/Serum", "102",
      "ARAUC", "Accumulation Ratio AUCTAU", "ARAUC", "Plasma/Blood/Serum", "103",
      "ARAUCD", "Accum Ratio AUCTAU norm by dose", "ARAUCD", "Plasma/Blood/Serum", "104",
      "ARAUCIFO", "Accum Ratio AUC Infinity Obs", "ARAUCIFO", "Plasma/Blood/Serum", "105",
      "ARAUCIFP", "Accum Ratio AUC Infinity Pred", "ARAUCIFP", "Plasma/Blood/Serum", "106",
      "ARAUCIND", "Accum Ratio AUC T1 to T2 norm by dose", "ARAUCIND_T1_T2_UNIT", "Plasma/Blood/Serum", "107",
      "ARAUCINT", "Accumulation Ratio AUC from T1 to T2", "ARAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "108",
      "ARAUCIOD", "Accum Ratio AUCIFO Norm by Dose", "ARAUCIOD", "Plasma/Blood/Serum", "109",
      "ARAUCIPD", "Accum Ratio AUCIFP Norm by Dose", "ARAUCIPD", "Plasma/Blood/Serum", "110",
      "ARAUCLST", "Accum Ratio AUC to Last Nonzero Conc", "ARAUCLST", "Plasma/Blood/Serum", "111",
      "ARCMAX", "Accumulation Ratio Cmax", "ARCMAX", "Plasma/Blood/Serum", "112",
      "ARCMAXD", "Accum Ratio Cmax norm by dose", "ARCMAXD", "Plasma/Blood/Serum", "113",
      "ARCMIN", "Accumulation Ratio Cmin", "ARCMIN", "Plasma/Blood/Serum", "114",
      "ARCMIND", "Accum Ratio Cmin norm by dose", "ARCMIND", "Plasma/Blood/Serum", "115",
      "ARCTROUD", "Accum Ratio Ctrough norm by dose", "ARCTROUD", "Plasma/Blood/Serum", "116",
      "ARCTROUG", "Accumulation Ratio Ctrough", "ARCTROUG", "Plasma/Blood/Serum", "117",
      "AUCALLB", "AUC All Norm by BMI", "AUCall_B", "Plasma/Blood/Serum", "118",
      "AUCALLD", "AUC All Norm by Dose", "AUCall_D", "Plasma/Blood/Serum", "119",
      "AUCALLS", "AUC All Norm by SA", "AUCall_S", "Plasma/Blood/Serum", "120",
      "AUCALLW", "AUC All Norm by WT", "AUCall_W", "Plasma/Blood/Serum", "121",
      "AUCIFOB", "AUC Infinity Obs Norm by BMI", "AUCINF_obs_B", "Plasma/Blood/Serum", "122",
      "AUCIFOLN", "AUC Infinity Obs LN Transformed", "AUCIFOLN", "Plasma/Blood/Serum", "123",
      "AUCIFOS", "AUC Infinity Obs Norm by SA", "AUCINF_obs_S", "Plasma/Blood/Serum", "124",
      "AUCIFOUB", "AUC Infinity Obs, Unbound Drug", "AUCIFOUB", "Plasma/Blood/Serum", "125",
      "AUCIFOW", "AUC Infinity Obs Norm by WT", "AUCINF_obs_W", "Plasma/Blood/Serum", "126",
      "AUCIFPB", "AUC Infinity Pred Norm by BMI", "AUCINF_pred_B", "Plasma/Blood/Serum", "127",
      "AUCIFPD", "AUC Infinity Pred Norm by Dose", "AUCINF_pred_D", "Plasma/Blood/Serum", "128",
      "AUCIFPS", "AUC Infinity Pred Norm by SA", "AUCINF_pred_S", "Plasma/Blood/Serum", "129",
      "AUCIFPUB", "AUC Infinity Pred, Unbound Drug", "AUCIFPUB", "Plasma/Blood/Serum", "130",
      "AUCIFPW", "AUC Infinity Pred Norm by WT", "AUCINF_pred_W", "Plasma/Blood/Serum", "131",
      "AUCINTB", "AUC from T1 to T2 Norm by BMI", "AUC_B_T1_T2_UNIT", "Plasma/Blood/Serum", "132",
      "AUCINTD", "AUC from T1 to T2 Norm by Dose", "AUC_D_T1_T2_UNIT", "Plasma/Blood/Serum", "133",
      "AUCINTS", "AUC from T1 to T2 Norm by SA", "AUC_S_T1_T2_UNIT", "Plasma/Blood/Serum", "134",
      "AUCINTW", "AUC from T1 to T2 Norm by WT", "AUC_W_T1_T2_UNIT", "Plasma/Blood/Serum", "135",
      "AUCLSTB", "AUC to Last Nonzero Conc Norm by BMI", "AUClast_B", "Plasma/Blood/Serum", "136",
      "AUCLSTD", "AUC to Last Nonzero Conc Norm by Dose", "AUClast_D", "Plasma/Blood/Serum", "137",
      "AUCLSTLN", "AUC to Last Nonzero Conc LN Transformed", "AUCLSTLN", "Plasma/Blood/Serum", "138",
      "AUCLSTS", "AUC to Last Nonzero Conc Norm by SA", "AUClast_S", "Plasma/Blood/Serum", "139",
      "AUCLSTUB", "AUC to Last Nonzero Conc, Unbound Drug", "AUCLSTUB", "Plasma/Blood/Serum", "140",
      "AUCLSTW", "AUC to Last Nonzero Conc Norm by WT", "AUClast_W", "Plasma/Blood/Serum", "141",
      "AUCTAUB", "AUC Over Dosing Interval Norm by BMI", "AUC_TAU_B", "Plasma/Blood/Serum", "142",
      "AUCTAUD", "AUC Over Dosing Interval Norm by Dose", "AUC_TAU_D", "Plasma/Blood/Serum", "143",
      "AUCTAUS", "AUC Over Dosing Interval Norm by SA", "AUC_TAU_S", "Plasma/Blood/Serum", "144",
      "AUCTAUW", "AUC Over Dosing Interval Norm by WT", "AUC_TAU_W", "Plasma/Blood/Serum", "145",
      "AUMCIFOB", "AUMC Infinity Obs Norm by BMI", "AUMCINF_obs_B", "Plasma/Blood/Serum", "146",
      "AUMCIFOD", "AUMC Infinity Obs Norm by Dose", "AUMCINF_obs_D", "Plasma/Blood/Serum", "147",
      "AUMCIFOS", "AUMC Infinity Obs Norm by SA", "AUMCINF_obs_S", "Plasma/Blood/Serum", "148",
      "AUMCIFOW", "AUMC Infinity Obs Norm by WT", "AUMCINF_obs_W", "Plasma/Blood/Serum", "149",
      "AUMCIFPB", "AUMC Infinity Pred Norm by BMI", "AUMCINF_pred_B", "Plasma/Blood/Serum", "150",
      "AUMCIFPD", "AUMC Infinity Pred Norm by Dose", "AUMCINF_pred_D", "Plasma/Blood/Serum", "151",
      "AUMCIFPS", "AUMC Infinity Pred Norm by SA", "AUMCINF_pred_S", "Plasma/Blood/Serum", "152",
      "AUMCIFPW", "AUMC Infinity Pred Norm by WT", "AUMCINF_pred_W", "Plasma/Blood/Serum", "153",
      "AUMCLSTB", "AUMC to Last Nonzero Conc Norm by BMI", "AUMClast_B", "Plasma/Blood/Serum", "154",
      "AUMCLSTD", "AUMC to Last Nonzero Conc Norm by Dose", "AUMClast_D", "Plasma/Blood/Serum", "155",
      "AUMCLSTS", "AUMC to Last Nonzero Conc Norm by SA", "AUMClast_S", "Plasma/Blood/Serum", "156",
      "AUMCLSTW", "AUMC to Last Nonzero Conc Norm by WT", "AUMClast_W", "Plasma/Blood/Serum", "157",
      "AUMCTAUB", "AUMC Over Dosing Interval Norm by BMI", "AUMCTAUB", "Plasma/Blood/Serum", "158",
      "AUMCTAUD", "AUMC Over Dosing Interval Norm by Dose", "AUMCTAUD", "Plasma/Blood/Serum", "159",
      "AUMCTAUS", "AUMC Over Dosing Interval Norm by SA", "AUMCTAUS", "Plasma/Blood/Serum", "160",
      "AUMCTAUW", "AUMC Over Dosing Interval Norm by WT", "AUMCTAUW", "Plasma/Blood/Serum", "161",
      "AURCALLB", "AURC All Norm by BMI", "AURCALLB", "Plasma/Blood/Serum", "162",
      "AURCALLD", "AURC All Norm by Dose", "AURCALLD", "Plasma/Blood/Serum", "163",
      "AURCALLS", "AURC All Norm by SA", "AURCALLS", "Plasma/Blood/Serum", "164",
      "AURCALLW", "AURC All Norm by WT", "AURCALLW", "Plasma/Blood/Serum", "165",
      "AURCIFOB", "AURC Infinity Obs Norm by BMI", "AURCIFOB", "Plasma/Blood/Serum", "166",
      "AURCIFOD", "AURC Infinity Obs Norm by Dose", "AURCIFOD", "Plasma/Blood/Serum", "167",
      "AURCIFOS", "AURC Infinity Obs Norm by SA", "AURCIFOS", "Plasma/Blood/Serum", "168",
      "AURCIFOW", "AURC Infinity Obs Norm by WT", "AURCIFOW", "Plasma/Blood/Serum", "169",
      "AURCIFPB", "AURC Infinity Pred Norm by BMI", "AURCIFPB", "Plasma/Blood/Serum", "170",
      "AURCIFPD", "AURC Infinity Pred Norm by Dose", "AURCIFPD", "Plasma/Blood/Serum", "171",
      "AURCIFPS", "AURC Infinity Pred Norm by SA", "AURCIFPS", "Plasma/Blood/Serum", "172",
      "AURCIFPW", "AURC Infinity Pred Norm by WT", "AURCIFPW", "Plasma/Blood/Serum", "173",
      "AURCINT", "AURC from T1 to T2", "AURCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "174",
      "AURCINTB", "AURC from T1 to T2 Norm by BMI", "AURCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "175",
      "AURCINTD", "AURC from T1 to T2 Norm by Dose", "AURCINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "176",
      "AURCINTS", "AURC from T1 to T2 Norm by SA", "AURCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "177",
      "AURCINTW", "AURC from T1 to T2 Norm by WT", "AURCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "178",
      "AURCLSTB", "AURC to Last Nonzero Rate Norm by BMI", "AURCLSTB", "Plasma/Blood/Serum", "179",
      "AURCLSTD", "AURC to Last Nonzero Rate Norm by Dose", "AURCLSTD", "Plasma/Blood/Serum", "180",
      "AURCLSTS", "AURC to Last Nonzero Rate Norm by SA", "AURCLSTS", "Plasma/Blood/Serum", "181",
      "AURCLSTW", "AURC to Last Nonzero Rate Norm by WT", "AURCLSTW", "Plasma/Blood/Serum", "182",
      "C0B", "Initial Conc Norm by BMI", "C0B", "Plasma/Blood/Serum", "183",
      "C0D", "Initial Conc Norm by Dose", "C0D", "Plasma/Blood/Serum", "184",
      "C0S", "Initial Conc Norm by SA", "C0S", "Plasma/Blood/Serum", "185",
      "C0W", "Initial Conc Norm by WT", "C0W", "Plasma/Blood/Serum", "186",
      "CAVGB", "Average Conc Norm by BMI", "CAVGB", "Plasma/Blood/Serum", "187",
      "CAVGD", "Average Conc Norm by Dose", "CAVGD", "Plasma/Blood/Serum", "188",
      "CAVGINT", "Average Conc from T1 to T2", "CAVGINT_T1_T2_UNIT", "Plasma/Blood/Serum", "189",
      "CAVGINTB", "Average Conc from T1 to T2 Norm by BMI", "CAVGINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "190",
      "CAVGINTD", "Average Conc from T1 to T2 Norm by Dose", "CAVGINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "191",
      "CAVGINTS", "Average Conc from T1 to T2 Norm by SA", "CAVGINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "192",
      "CAVGINTW", "Average Conc from T1 to T2 Norm by WT", "CAVGINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "193",
      "CAVGS", "Average Conc Norm by SA", "CAVGS", "Plasma/Blood/Serum", "194",
      "CAVGW", "Average Conc Norm by WT", "CAVGW", "Plasma/Blood/Serum", "195",
      "CHTMAX", "Concentration at Half Tmax", "CHTMAX", "Plasma/Blood/Serum", "196",
      "CLFOB", "Total CL Obs by F Norm by BMI", "CLFOB", "Plasma/Blood/Serum", "197",
      "CLFOD", "Total CL Obs by F Norm by Dose", "CLFOD", "Plasma/Blood/Serum", "198",
      "CLFOS", "Total CL Obs by F Norm by SA", "CLFOS", "Plasma/Blood/Serum", "199",
      "CLFOW", "Total CL Obs by F Norm by WT", "CLFOW", "Plasma/Blood/Serum", "200",
      "CLFPB", "Total CL Pred by F Norm by BMI", "CLFPB", "Plasma/Blood/Serum", "201",
      "CLFPD", "Total CL Pred by F Norm by Dose", "CLFPD", "Plasma/Blood/Serum", "202",
      "CLFPS", "Total CL Pred by F Norm by SA", "CLFPS", "Plasma/Blood/Serum", "203",
      "CLFPW", "Total CL Pred by F Norm by WT", "CLFPW", "Plasma/Blood/Serum", "204",
      "CLFTAU", "Total CL by F for Dose Int", "CLFTAU", "Plasma/Blood/Serum", "205",
      "CLFTAUB", "Total CL by F for Dose Int Norm by BMI", "CLFTAUB", "Plasma/Blood/Serum", "206",
      "CLFTAUD", "Total CL by F for Dose Int Norm by Dose", "CLFTAUD", "Plasma/Blood/Serum", "207",
      "CLFTAUS", "Total CL by F for Dose Int Norm by SA", "CLFTAUS", "Plasma/Blood/Serum", "208",
      "CLFTAUW", "Total CL by F for Dose Int Norm by WT", "CLFTAUW", "Plasma/Blood/Serum", "209",
      "CLFUB", "Apparent CL for Unbound Drug", "CLFUB", "Plasma/Blood/Serum", "210",
      "CLOB", "Total CL Obs Norm by BMI", "CLOB", "Plasma/Blood/Serum", "211",
      "CLOD", "Total CL Obs Norm by Dose", "CLOD", "Plasma/Blood/Serum", "212",
      "CLOS", "Total CL Obs Norm by SA", "CLOS", "Plasma/Blood/Serum", "213",
      "CLOUB", "Total CL Obs for Unbound Drug", "CLOUB", "Plasma/Blood/Serum", "214",
      "CLOW", "Total CL Obs Norm by WT", "CLOW", "Plasma/Blood/Serum", "215",
      "CLPB", "Total CL Pred Norm by BMI", "CLPB", "Plasma/Blood/Serum", "216",
      "CLPD", "Total CL Pred Norm by Dose", "CLPD", "Plasma/Blood/Serum", "217",
      "CLPS", "Total CL Pred Norm by SA", "CLPS", "Plasma/Blood/Serum", "218",
      "CLPUB", "Total CL Pred for Unbound Drug", "CLPUB", "Plasma/Blood/Serum", "219",
      "CLPW", "Total CL Pred Norm by WT", "CLPW", "Plasma/Blood/Serum", "220",
      "CLRPCLEV", "Renal CL as Pct CL EV", "CLRPCLEV", "Urine", "221",
      "CLRPCLIV", "Renal CL as Pct CL IV", "CLRPCLIV", "Urine", "222",
      "CLSTB", "Last Nonzero Conc Norm by BMI", "CLSTB", "Plasma/Blood/Serum", "223",
      "CLSTD", "Last Nonzero Conc Norm by Dose", "CLSTD", "Plasma/Blood/Serum", "224",
      "CLSTS", "Last Nonzero Conc Norm by SA", "CLSTS", "Plasma/Blood/Serum", "225",
      "CLSTW", "Last Nonzero Conc Norm by WT", "CLSTW", "Plasma/Blood/Serum", "226",
      "CLTAU", "Total CL for Dose Int", "CLTAU", "Plasma/Blood/Serum", "227",
      "CLTAUB", "Total CL for Dose Int Norm by BMI", "CLTAUB", "Plasma/Blood/Serum", "228",
      "CLTAUD", "Total CL for Dose Int Norm by Dose", "CLTAUD", "Plasma/Blood/Serum", "229",
      "CLTAUS", "Total CL for Dose Int Norm by SA", "CLTAUS", "Plasma/Blood/Serum", "230",
      "CLTAUW", "Total CL for Dose Int Norm by WT", "CLTAUW", "Plasma/Blood/Serum", "231",
      "CMAXB", "Max Conc Norm by BMI", "CMAX_B", "Plasma/Blood/Serum", "232",
      "CMAXLN", "Max Conc LN Transformed", "CMAXLN", "Plasma/Blood/Serum", "233",
      "CMAXS", "Max Conc Norm by SA", "CMAXS", "Plasma/Blood/Serum", "234",
      "CMAXUB", "Max Conc, Unbound Drug", "CMAXUB", "Plasma/Blood/Serum", "235",
      "CMAXW", "Max Conc Norm by WT", "CMAXW", "Plasma/Blood/Serum", "236",
      "CMINB", "Min Conc Norm by BMI", "CMINB", "Plasma/Blood/Serum", "237",
      "CMIND", "Min Conc Norm by Dose", "CMIND", "Plasma/Blood/Serum", "238",
      "CMINS", "Min Conc Norm by SA", "CMINS", "Plasma/Blood/Serum", "239",
      "CMINW", "Min Conc Norm by WT", "CMINW", "Plasma/Blood/Serum", "240",
      "CONC", "Concentration", "CONC", "Plasma/Blood/Serum", "241",
      "CONCB", "Conc by BMI", "CONCB", "Plasma/Blood/Serum", "242",
      "CONCD", "Conc by Dose", "CONCD", "Plasma/Blood/Serum", "243",
      "CONCS", "Conc by SA", "CONCS", "Plasma/Blood/Serum", "244",
      "CONCW", "Conc by WT", "CONCW", "Plasma/Blood/Serum", "245",
      "CTROUGH", "Conc Trough", "CTROUGH", "Plasma/Blood/Serum", "246",
      "CTROUGHB", "Conc Trough by BMI", "CTROUGHB", "Plasma/Blood/Serum", "247",
      "CTROUGHD", "Conc Trough by Dose", "CTROUGHD", "Plasma/Blood/Serum", "248",
      "CTROUGHS", "Conc Trough by SA", "CTROUGHS", "Plasma/Blood/Serum", "249",
      "CTROUGHW", "Conc Trough by WT", "CTROUGHW", "Plasma/Blood/Serum", "250",
      "EFFHL", "Effective Half-Life", "EFFHL", "Plasma/Blood/Serum", "251",
      "ERINT", "Excret Rate from T1 to T2", "ERINT_T1_T2_UNIT", "Plasma/Blood/Serum", "252",
      "ERINTB", "Excret Rate from T1 to T2 Norm by BMI", "ERINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "253",
      "ERINTD", "Excret Rate from T1 to T2 Norm by Dose", "ERINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "254",
      "ERINTS", "Excret Rate from T1 to T2 Norm by SA", "ERINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "255",
      "ERINTW", "Excret Rate from T1 to T2 Norm by WT", "ERINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "256",
      "ERLSTB", "Last Meas Excretion Rate Norm by BMI", "ERLSTB", "Plasma/Blood/Serum", "257",
      "ERLSTD", "Last Meas Excretion Rate Norm by Dose", "ERLSTD", "Plasma/Blood/Serum", "258",
      "ERLSTS", "Last Meas Excretion Rate Norm by SA", "ERLSTS", "Plasma/Blood/Serum", "259",
      "ERLSTW", "Last Meas Excretion Rate Norm by WT", "ERLSTW", "Plasma/Blood/Serum", "260",
      "ERMAXB", "Max Excretion Rate Norm by BMI", "ERMAXB", "Plasma/Blood/Serum", "261",
      "ERMAXD", "Max Excretion Rate Norm by Dose", "ERMAXD", "Plasma/Blood/Serum", "262",
      "ERMAXS", "Max Excretion Rate Norm by SA", "ERMAXS", "Plasma/Blood/Serum", "263",
      "ERMAXW", "Max Excretion Rate Norm by WT", "ERMAXW", "Plasma/Blood/Serum", "264",
      "ERTLST", "Midpoint of Interval of Last Nonzero ER", "ERTLST", "Plasma/Blood/Serum", "265",
      "FABS", "Absolute Bioavailability", "FABS", "Plasma/Blood/Serum", "266",
      "FB", "Fraction Bound", "FB", "Plasma/Blood/Serum", "267",
      "FREL", "Relative Bioavailability", "FREL", "Plasma/Blood/Serum", "268",
      "FREXINT", "Fract Excr from T1 to T2", "FREXINT_T1_T2_UNIT", "Plasma/Blood/Serum", "269",
      "FU", "Fraction Unbound", "FU", "Plasma/Blood/Serum", "270",
      "HDCL", "Hemodialysis Clearance", "HDCL", "Plasma/Blood/Serum", "271",
      "HDER", "Hemodialysis Extraction Ratio", "HDER", "Plasma/Blood/Serum", "272",
      "HTMAX", "Half Tmax", "HTMAX", "Plasma/Blood/Serum", "273",
      "LAMZLTAU", "Lambda z Lower Limit TAU", "LAMZLTAU", "Plasma/Blood/Serum", "274",
      "LAMZNTAU", "Number of Points for Lambda z TAU", "LAMZNTAU", "Plasma/Blood/Serum", "275",
      "LAMZSPN", "Lambda z Span", "LAMZSPN", "Plasma/Blood/Serum", "276",
      "LAMZTAU", "Lambda z TAU", "LAMZTAU", "Plasma/Blood/Serum", "277",
      "LAMZUTAU", "Lambda z Upper Limit TAU", "LAMZUTAU", "Plasma/Blood/Serum", "278",
      "MAT", "Mean Absorption Time", "MAT", "Plasma/Blood/Serum", "279",
      "MRAUCIFO", "Metabolite Ratio for AUC Infinity Obs", "MRAUCIFO", "Plasma/Blood/Serum", "280",
      "MRAUCIFP", "Metabolite Ratio for AUC Infinity Pred", "MRAUCIFP", "Plasma/Blood/Serum", "281",
      "MRAUCINT", "Metabolite Ratio AUC from T1 to T2", "MRAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "282",
      "MRAUCLST", "Metabolite Ratio AUC Last Nonzero Conc", "MRAUCLST", "Plasma/Blood/Serum", "283",
      "MRAUCTAU", "Metabolite Ratio for AUC Dosing Interval", "MRAUCTAU", "Plasma/Blood/Serum", "284",
      "MRCMAX", "Metabolite Ratio for Max Conc", "MRCMAX", "Plasma/Blood/Serum", "285",
      "MRTEVIFO", "MRT Extravasc Infinity Obs", "MRTEVIFO", "Plasma/Blood/Serum", "286",
      "MRTEVIFP", "MRT Extravasc Infinity Pred", "MRTEVIFP", "Plasma/Blood/Serum", "287",
      "MRTEVLST", "MRT Extravasc to Last Nonzero Conc", "MRTEVLST", "Plasma/Blood/Serum", "288",
      "MRTIVIFO", "MRT Intravasc Infinity Obs", "MRTIVIFO", "Plasma/Blood/Serum", "289",
      "MRTIVIFP", "MRT Intravasc Infinity Pred", "MRTIVIFP", "Plasma/Blood/Serum", "290",
      "MRTIVLST", "MRT Intravasc to Last Nonzero Conc", "MRTIVLST", "Plasma/Blood/Serum", "291",
      "NRENALCL", "Nonrenal CL", "NRENALCL", "Urine", "292",
      "NRENLCLB", "Nonrenal CL Norm by BMI", "NRENLCLB", "Urine", "293",
      "NRENLCLD", "Nonrenal CL Norm by Dose", "NRENLCLD", "Urine", "294",
      "NRENLCLS", "Nonrenal CL Norm by SA", "NRENLCLS", "Urine", "295",
      "NRENLCLW", "Nonrenal CL Norm by WT", "NRENLCLW", "Urine", "296",
      "PTROUGHR", "Peak Trough Ratio", "PTROUGHR", "Plasma/Blood/Serum", "297",
      "RAAUC", "Ratio AUC", "RAAUC", "Plasma/Blood/Serum", "298",
      "RAAUCIFO", "Ratio AUC Infinity Obs", "RAAUCIFO", "Plasma/Blood/Serum", "299",
      "RAAUCIFP", "Ratio AUC Infinity Pred", "RAAUCIFP", "Plasma/Blood/Serum", "300",
      "RACMAX", "Ratio CMAX", "RACMAX", "Plasma/Blood/Serum", "301",
      "RAMAXMIN", "Ratio of CMAX to CMIN", "RAMAXMIN", "Plasma/Blood/Serum", "302",
      "RCAMIFO", "Amt Rec Infinity Obs", "RCAMIFO", "Plasma/Blood/Serum", "303",
      "RCAMIFOB", "Amt Rec Infinity Obs Norm by BMI", "RCAMIFOB", "Plasma/Blood/Serum", "304",
      "RCAMIFOS", "Amt Rec Infinity Obs Norm by SA", "RCAMIFOS", "Plasma/Blood/Serum", "305",
      "RCAMIFOW", "Amt Rec Infinity Obs Norm by WT", "RCAMIFOW", "Plasma/Blood/Serum", "306",
      "RCAMIFP", "Amt Rec Infinity Pred", "RCAMIFP", "Plasma/Blood/Serum", "307",
      "RCAMIFPB", "Amt Rec Infinity Pred Norm by BMI", "RCAMIFPB", "Plasma/Blood/Serum", "308",
      "RCAMIFPS", "Amt Rec Infinity Pred Norm by SA", "RCAMIFPS", "Plasma/Blood/Serum", "309",
      "RCAMIFPW", "Amt Rec Infinity Pred Norm by WT", "RCAMIFPW", "Plasma/Blood/Serum", "310",
      "RCAMINTB", "Amt Rec from T1 to T2 Norm by BMI", "RCAMINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "311",
      "RCAMINTS", "Amt Rec from T1 to T2 Norm by SA", "RCAMINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "312",
      "RCAMINTW", "Amt Rec from T1 to T2 Norm by WT", "RCAMINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "313",
      "RCAMTAU", "Amt Rec Over Dosing Interval", "RCAMTAU", "Plasma/Blood/Serum", "314",
      "RCAMTAUB", "Amt Rec Over Dosing Interval Norm by BMI", "RCAMTAUB", "Plasma/Blood/Serum", "315",
      "RCAMTAUS", "Amt Rec Over Dosing Interval Norm by SA", "RCAMTAUS", "Plasma/Blood/Serum", "316",
      "RCAMTAUW", "Amt Rec Over Dosing Interval Norm by WT", "RCAMTAUW", "Plasma/Blood/Serum", "317",
      "RCPCIFO", "Pct Rec Infinity Obs", "RCPCIFO", "Plasma/Blood/Serum", "318",
      "RCPCIFOB", "Pct Rec Infinity Obs Norm by BMI", "RCPCIFOB", "Plasma/Blood/Serum", "319",
      "RCPCIFOS", "Pct Rec Infinity Obs Norm by SA", "RCPCIFOS", "Plasma/Blood/Serum", "320",
      "RCPCIFOW", "Pct Rec Infinity Obs Norm by WT", "RCPCIFOW", "Plasma/Blood/Serum", "321",
      "RCPCIFP", "Pct Rec Infinity Pred", "RCPCIFP", "Plasma/Blood/Serum", "322",
      "RCPCIFPB", "Pct Rec Infinity Pred Norm by BMI", "RCPCIFPB", "Plasma/Blood/Serum", "323",
      "RCPCIFPS", "Pct Rec Infinity Pred Norm by SA", "RCPCIFPS", "Plasma/Blood/Serum", "324",
      "RCPCIFPW", "Pct Rec Infinity Pred Norm by WT", "RCPCIFPW", "Plasma/Blood/Serum", "325",
      "RCPCINTB", "Pct Rec from T1 to T2 Norm by BMI", "RCPCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "326",
      "RCPCINTS", "Pct Rec from T1 to T2 Norm by SA", "RCPCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "327",
      "RCPCINTW", "Pct Rec from T1 to T2 Norm by WT", "RCPCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "328",
      "RCPCLST", "Pct Rec to Last Nonzero Conc", "RCPCLST", "Plasma/Blood/Serum", "329",
      "RCPCTAU", "Pct Rec Over Dosing Interval", "RCPCTAU", "Plasma/Blood/Serum", "330",
      "RCPCTAUB", "Pct Rec Over Dosing Interval Norm by BMI", "RCPCTAUB", "Plasma/Blood/Serum", "331",
      "RCPCTAUS", "Pct Rec Over Dosing Interval Norm by SA", "RCPCTAUS", "Plasma/Blood/Serum", "332",
      "RCPCTAUW", "Pct Rec Over Dosing Interval Norm by WT", "RCPCTAUW", "Plasma/Blood/Serum", "333",
      "RENALCLB", "Renal CL Norm by BMI", "RENALCLB", "Urine", "334",
      "RENALCLD", "Renal CL Norm by Dose", "RENALCLD", "Urine", "335",
      "RENALCLS", "Renal CL Norm by SA", "RENALCLS", "Urine", "336",
      "RENALCLW", "Renal CL Norm by WT", "RENALCLW", "Urine", "337",
      "RENCLTAU", "Renal CL for Dose Int", "RENCLTAU", "Urine", "338",
      "RNCLINT", "Renal CL from T1 to T2", "RNCLINT_T1_T2_UNIT", "Urine", "339",
      "RNCLINTB", "Renal CL from T1 to T2 Norm by BMI", "RNCLINTB_T1_T2_UNIT", "Urine", "340",
      "RNCLINTD", "Renal CL from T1 to T2 Norm by Dose", "RNCLINTD_T1_T2_UNIT", "Urine", "341",
      "RNCLINTS", "Renal CL from T1 to T2 Norm by SA", "RNCLINTS_T1_T2_UNIT", "Urine", "342",
      "RNCLINTW", "Renal CL from T1 to T2 Norm by WT", "RNCLINTW_T1_T2_UNIT", "Urine", "343",
      "RNCLTAUB", "Renal CL for Dose Int Norm by BMI", "RNCLTAUB", "Urine", "344",
      "RNCLTAUD", "Renal CL for Dose Int Norm by Dose", "RNCLTAUD", "Urine", "345",
      "RNCLTAUS", "Renal CL for Dose Int Norm by SA", "RNCLTAUS", "Urine", "346",
      "RNCLTAUW", "Renal CL for Dose Int Norm by WT", "RNCLTAUW", "Urine", "347",
      "RNCLUB", "Renal CL for Unbound Drug", "RNCLUB", "Urine", "348",
      "SRAUC", "Stationarity Ratio AUC", "SRAUC", "Plasma/Blood/Serum", "349",
      "SWING", "Swing", "SWING", "Plasma/Blood/Serum", "350",
      "TAUHL", "Half-Life TAU", "TAUHL", "Plasma/Blood/Serum", "351",
      "TBBL", "Time Below Baseline", "Time_Below_B", "Plasma/Blood/Serum", "352",
      "TROUGHPR", "Trough Peak Ratio", "TROUGHPR", "Plasma/Blood/Serum", "353",
      "V0", "Vol Dist Initial", "V0", "Plasma/Blood/Serum", "354",
      "V0B", "Vol Dist Initial Norm by BMI", "V0B", "Plasma/Blood/Serum", "355",
      "V0D", "Vol Dist Initial Norm by Dose", "V0D", "Plasma/Blood/Serum", "356",
      "V0S", "Vol Dist Initial Norm by SA", "V0S", "Plasma/Blood/Serum", "357",
      "V0W", "Vol Dist Initial Norm by WT", "V0W", "Plasma/Blood/Serum", "358",
      "VSSOB", "Vol Dist Steady State Obs Norm by BMI", "VSSOB", "Plasma/Blood/Serum", "359",
      "VSSOBD", "Vol Dist Steady State Obs by B", "VSSOBD", "Plasma/Blood/Serum", "360",
      "VSSOD", "Vol Dist Steady State Obs Norm by Dose", "VSSOD", "Plasma/Blood/Serum", "361",
      "VSSOF", "Vol Dist Steady State Obs by F", "VSSOF", "Plasma/Blood/Serum", "362",
      "VSSOS", "Vol Dist Steady State Obs Norm by SA", "VSSOS", "Plasma/Blood/Serum", "363",
      "VSSOUB", "Vol Dist Steady State Obs by UB", "VSSOUB", "Plasma/Blood/Serum", "364",
      "VSSOW", "Vol Dist Steady State Obs Norm by WT", "VSSOW", "Plasma/Blood/Serum", "365",
      "VSSPB", "Vol Dist Steady State Pred Norm by BMI", "VSSPB", "Plasma/Blood/Serum", "366",
      "VSSPBD", "Vol Dist Steady State Pred by B", "VSSPBD", "Plasma/Blood/Serum", "367",
      "VSSPD", "Vol Dist Steady State Pred Norm by Dose", "VSSPD", "Plasma/Blood/Serum", "368",
      "VSSPF", "Vol Dist Steady State Pred by F", "VSSPF", "Plasma/Blood/Serum", "369",
      "VSSPS", "Vol Dist Steady State Pred Norm by SA", "VSSPS", "Plasma/Blood/Serum", "370",
      "VSSPUB", "Vol Dist Steady State Pred by UB", "VSSPUB", "Plasma/Blood/Serum", "371",
      "VSSPW", "Vol Dist Steady State Pred Norm by WT", "VSSPW", "Plasma/Blood/Serum", "372",
      "VZ", "Vol Z", "Vz", "Plasma/Blood/Serum", "373",
      "VZF", "Vol Z by F", "Vz_F", "Plasma/Blood/Serum", "374",
      "VZFOB", "Vz Obs by F Norm by BMI", "VZFOB", "Plasma/Blood/Serum", "375",
      "VZFOD", "Vz Obs by F Norm by Dose", "VZFOD", "Plasma/Blood/Serum", "376",
      "VZFOS", "Vz Obs by F Norm by SA", "VZFOS", "Plasma/Blood/Serum", "377",
      "VZFOUB", "Vz Obs by F for UB", "VZFOUB", "Plasma/Blood/Serum", "378",
      "VZFOW", "Vz Obs by F Norm by WT", "VZFOW", "Plasma/Blood/Serum", "379",
      "VZFPB", "Vz Pred by F Norm by BMI", "VZFPB", "Plasma/Blood/Serum", "380",
      "VZFPD", "Vz Pred by F Norm by Dose", "VZFPD", "Plasma/Blood/Serum", "381",
      "VZFPS", "Vz Pred by F Norm by SA", "VZFPS", "Plasma/Blood/Serum", "382",
      "VZFPUB", "Vz Pred by F for UB", "VZFPUB", "Plasma/Blood/Serum", "383",
      "VZFPW", "Vz Pred by F Norm by WT", "VZFPW", "Plasma/Blood/Serum", "384",
      "VZFTAU", "Vz for Dose Int by F", "VZFTAU", "Plasma/Blood/Serum", "385",
      "VZFTAUB", "Vz for Dose Int by F Norm by BMI", "VZFTAUB", "Plasma/Blood/Serum", "386",
      "VZFTAUD", "Vz for Dose Int by F Norm by Dose", "VZFTAUD", "Plasma/Blood/Serum", "387",
      "VZFTAUS", "Vz for Dose Int by F Norm by SA", "VZFTAUS", "Plasma/Blood/Serum", "388",
      "VZFTAUW", "Vz for Dose Int by F Norm by WT", "VZFTAUW", "Plasma/Blood/Serum", "389",
      "VZOB", "Vz Obs Norm by BMI", "VZOB", "Plasma/Blood/Serum", "390",
      "VZOD", "Vz Obs Norm by Dose", "VZOD", "Plasma/Blood/Serum", "391",
      "VZOS", "Vz Obs Norm by SA", "VZOS", "Plasma/Blood/Serum", "392",
      "VZOUB", "Vz Obs for UB", "VZOUB", "Plasma/Blood/Serum", "393",
      "VZOW", "Vz Obs Norm by WT", "VZOW", "Plasma/Blood/Serum", "394",
      "VZPB", "Vz Pred Norm by BMI", "VZPB", "Plasma/Blood/Serum", "395",
      "VZPD", "Vz Pred Norm by Dose", "VZPD", "Plasma/Blood/Serum", "396",
      "VZPS", "Vz Pred Norm by SA", "VZPS", "Plasma/Blood/Serum", "397",
      "VZPUB", "Vz Pred for UB", "VZPUB", "Plasma/Blood/Serum", "398"
    ),
    ncol = 5,
    byrow = TRUE
  ))
  colnames(pk_dataset) <- c("PARAMCD", "PARAM", "TLG_DISPLAY", "MATRIX", "TLG_ORDER")
  pk_dataset
}

#' Counting Missed Doses
#'
#' @description `r lifecycle::badge("stable")`
#'
#' These are specific functions to count patients with missed doses. The difference to [count_cumulative()] is
#' mainly the special labels.
#'
#' @inheritParams argument_convention
#'
#' @seealso Relevant description function [d_count_missed_doses()].
#'
#' @name count_missed_doses
NULL

#' @describeIn count_missed_doses Statistics function to count non-missing values.
#'
#' @return
#' * `s_count_nonmissing()` returns the statistic `n` which is the count of non-missing values in `x`.
#'
#' @examples
#' set.seed(1)
#' x <- c(sample(1:10, 10), NA)
#'
#' # Internal function - s_count_nonmissing
#' \dontrun{
#' s_count_nonmissing(x)
#' }
#'
#' @keywords internal
s_count_nonmissing <- function(x) {
  list(n = n_available(x))
}

#' Description Function that Calculates Labels for [s_count_missed_doses()].
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams s_count_missed_doses
#'
#' @return [d_count_missed_doses()] returns a named `character` vector with the labels.
#'
#' @seealso [s_count_missed_doses()]
#'
#' @export
d_count_missed_doses <- function(thresholds) {
  paste0("At least ", thresholds, " missed dose", ifelse(thresholds > 1, "s", ""))
}

#' @describeIn count_missed_doses Statistics function to count patients with missed doses.
#'
#' @param thresholds (vector of `count`)\cr number of missed doses the patients at least had.
#'
#' @return
#' * `s_count_missed_doses()` returns the statistics `n` and `count_fraction` with one element for each threshold.
#'
#' @examples
#' # Internal function - s_count_missed_doses
#' \dontrun{
#' s_count_missed_doses(x = c(0, 1, 0, 2, 3, 4, 0, 2), thresholds = c(2, 5), .N_col = 10)
#' }
#'
#' @keywords internal
s_count_missed_doses <- function(x,
                                 thresholds,
                                 .N_col) { # nolint
  stat <- s_count_cumulative(
    x = x,
    thresholds = thresholds,
    lower_tail = FALSE,
    include_eq = TRUE,
    .N_col = .N_col
  )
  labels <- d_count_missed_doses(thresholds)
  for (i in seq_along(stat$count_fraction)) {
    stat$count_fraction[[i]] <- formatters::with_label(stat$count_fraction[[i]], label = labels[i])
  }
  n_stat <- s_count_nonmissing(x)
  c(n_stat, stat)
}

#' @describeIn count_missed_doses Formatted analysis function which is used as `afun`
#'   in `count_missed_doses()`.
#'
#' @return
#' * `a_count_missed_doses()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_missed_doses
#' \dontrun{
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_missed_doses, .ungroup_stats = "count_fraction")
#' afun(x = c(0, 1, 0, 2, 3, 4, 0, 2), thresholds = c(2, 5), .N_col = 10)
#' }
#'
#' @keywords internal
a_count_missed_doses <- make_afun(
  s_count_missed_doses,
  .formats = c(n = "xx", count_fraction = format_count_fraction)
)

#' @describeIn count_missed_doses Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @inheritParams s_count_cumulative
#'
#' @return
#' * `count_missed_doses()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_missed_doses()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' anl <- tern_ex_adsl %>%
#'   distinct(STUDYID, USUBJID, ARM) %>%
#'   mutate(
#'     PARAMCD = "TNDOSMIS",
#'     PARAM = "Total number of missed doses during study",
#'     AVAL = sample(0:20, size = nrow(tern_ex_adsl), replace = TRUE),
#'     AVALC = ""
#'   )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_missed_doses("AVAL", thresholds = c(1, 5, 10, 15), var_labels = "Missed Doses") %>%
#'   build_table(anl, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_missed_doses <- function(lyt,
                               vars,
                               var_labels = vars,
                               show_labels = "visible",
                               ...,
                               table_names = vars,
                               .stats = NULL,
                               .formats = NULL,
                               .labels = NULL,
                               .indent_mods = NULL) {
  afun <- make_afun(
    a_count_missed_doses,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt = lyt,
    vars = vars,
    afun = afun,
    var_labels = var_labels,
    table_names = table_names,
    show_labels = show_labels,
    extra_args = list(...)
  )
}

#' Survival Time Analysis
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize median survival time and CIs, percentiles of survival times, survival
#' time range of censored/event patients.
#'
#' @inheritParams argument_convention
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_surv_time()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival time.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", or "log-log",
#'     see more in [survival::survfit()]. Note option "none" is not supported.
#'   * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time.
#'
#' @name survival_time
NULL

#' @describeIn survival_time Statistics function which analyzes survival times.
#'
#' @return
#' * `s_surv_time()` returns the statistics:
#'   * `median`: Median survival time.
#'   * `median_ci`: Confidence interval for median time.
#'   * `quantiles`: Survival time for two specified quantiles.
#'   * `range_censor`: Survival time range for censored observations.
#'   * `range_event`: Survival time range for observations with events.
#'   * `range`: Survival time range for all observations.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#' df <- adtte_f %>% filter(ARMCD == "ARM A")
#'
#' # Internal function - s_surv_time
#' \dontrun{
#' s_surv_time(df, .var = "AVAL", is_event = "is_event")
#' }
#'
#' @keywords internal
s_surv_time <- function(df,
                        .var,
                        is_event,
                        control = control_surv_time()) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level
  quantiles <- control$quantiles

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = conf_level,
    conf.type = conf_type
  )
  srv_tab <- summary(srv_fit, extend = TRUE)$table
  srv_qt_tab <- stats::quantile(srv_fit, probs = quantiles)$quantile
  range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE)
  range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE)
  range <- range_noinf(df[[.var]], na.rm = TRUE)
  list(
    median = formatters::with_label(unname(srv_tab["median"]), "Median"),
    median_ci = formatters::with_label(
      unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))]), f_conf_level(conf_level)
    ),
    quantiles = formatters::with_label(
      unname(srv_qt_tab), paste0(quantiles[1] * 100, "% and ", quantiles[2] * 100, "%-ile")
    ),
    range_censor = formatters::with_label(range_censor, "Range (censored)"),
    range_event = formatters::with_label(range_event, "Range (event)"),
    range = formatters::with_label(range, "Range")
  )
}

#' @describeIn survival_time Formatted analysis function which is used as `afun` in `surv_time()`.
#'
#' @return
#' * `a_surv_time()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_surv_time
#' \dontrun{
#' a_surv_time(df, .var = "AVAL", is_event = "is_event")
#' }
#'
#' @keywords internal
a_surv_time <- make_afun(
  s_surv_time,
  .indent_mods = c(
    "median" = 0L,
    "median_ci" = 1L,
    "quantiles" = 0L,
    "range_censor" = 0L,
    "range_event" = 0L,
    "range" = 0L
  ),
  .formats = c(
    "median" = "xx.x",
    "median_ci" = "(xx.x, xx.x)",
    "quantiles" = "xx.x, xx.x",
    "range_censor" = "xx.x to xx.x",
    "range_event" = "xx.x to xx.x",
    "range" = "xx.x to xx.x"
  )
)

#' @describeIn survival_time Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `surv_time()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_surv_time()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD") %>%
#'   add_colcounts() %>%
#'   surv_time(
#'     vars = "AVAL",
#'     var_labels = "Survival Time (Months)",
#'     is_event = "is_event",
#'     control = control_surv_time(conf_level = 0.9, conf_type = "log-log")
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
surv_time <- function(lyt,
                      vars,
                      ...,
                      var_labels = "Time to Event",
                      table_names = vars,
                      .stats = c("median", "median_ci", "quantiles", "range_censor", "range_event"),
                      .formats = NULL,
                      .labels = NULL,
                      .indent_mods = NULL) {
  afun <- make_afun(
    a_surv_time,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = "visible",
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Counting Specific Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' We can count the occurrence of specific values in a variable of interest.
#'
#' @inheritParams argument_convention
#'
#' @note
#' * For `factor` variables, `s_count_values` checks whether `values` are all included in the levels of `x`
#'   and fails otherwise.
#' * For `count_values()`, variable labels are shown when there is more than one element in `vars`,
#'   otherwise they are hidden.
#'
#' @name count_values_funs
NULL

#' @describeIn count_values_funs S3 generic function to count values.
#'
#' @inheritParams s_summary.logical
#' @param values (`character`)\cr specific values that should be counted.
#'
#' @return
#' * `s_count_values()` returns output of [s_summary()] for specified values of a non-numeric variable.
#'
#' @export
s_count_values <- function(x,
                           values,
                           na.rm = TRUE, # nolint
                           .N_col, # nolint
                           .N_row, # nolint
                           denom = c("n", "N_row", "N_col")) {
  UseMethod("s_count_values", x)
}

#' @describeIn count_values_funs Method for `character` class.
#'
#' @method s_count_values character
#'
#' @examples
#' # `s_count_values.character`
#' s_count_values(x = c("a", "b", "a"), values = "a")
#' s_count_values(x = c("a", "b", "a", NA, NA), values = "b", na.rm = FALSE)
#'
#' @export
s_count_values.character <- function(x,
                                     values = "Y",
                                     na.rm = TRUE, # nolint
                                     ...) {
  checkmate::assert_character(values)

  if (na.rm) {
    x <- x[!is.na(x)]
  }

  is_in_values <- x %in% values

  s_summary(is_in_values, ...)
}

#' @describeIn count_values_funs Method for `factor` class. This makes an automatic
#'   conversion to `character` and then forwards to the method for characters.
#'
#' @method s_count_values factor
#'
#' @examples
#' # `s_count_values.factor`
#' s_count_values(x = factor(c("a", "b", "a")), values = "a")
#'
#' @export
s_count_values.factor <- function(x,
                                  values = "Y",
                                  ...) {
  s_count_values(as.character(x), values = as.character(values), ...)
}

#' @describeIn count_values_funs Method for `logical` class.
#'
#' @method s_count_values logical
#'
#' @examples
#' # `s_count_values.logical`
#' s_count_values(x = c(TRUE, FALSE, TRUE))
#'
#' @export
s_count_values.logical <- function(x, values = TRUE, ...) {
  checkmate::assert_logical(values)
  s_count_values(as.character(x), values = as.character(values), ...)
}

#' @describeIn count_values_funs Formatted analysis function which is used as `afun`
#'   in `count_values()`.
#'
#' @return
#' * `a_count_values()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # `a_count_values`
#' a_count_values(x = factor(c("a", "b", "a")), values = "a", .N_col = 10, .N_row = 10)
#'
#' @export
a_count_values <- make_afun(
  s_count_values,
  .formats = c(count_fraction = "xx (xx.xx%)", count = "xx")
)

#' @describeIn count_values_funs Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_values()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_values()` to the table layout.
#'
#' @examples
#' # `count_values`
#' basic_table() %>%
#'   count_values("Species", values = "setosa") %>%
#'   build_table(iris)
#'
#' @export
count_values <- function(lyt,
                         vars,
                         values,
                         ...,
                         table_names = vars,
                         .stats = "count_fraction",
                         .formats = NULL,
                         .labels = c(count_fraction = paste(values, collapse = ", ")),
                         .indent_mods = NULL) {
  afun <- make_afun(
    a_count_values,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = c(list(values = values), list(...)),
    show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
    table_names = table_names
  )
}

#' Control Function for CoxPH Model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for CoxPH model, typically used internally to specify
#' details of CoxPH model for [s_coxph_pairwise()]. `conf_level` refers to Hazard Ratio estimation.
#'
#' @inheritParams argument_convention
#' @param pval_method (`string`)\cr p-value method for testing hazard ratio = 1.
#'   Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
#' @param ties (`string`)\cr specifying the method for tie handling. Default is `"efron"`,
#'   can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()].
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"),
                          ties = c("efron", "breslow", "exact"),
                          conf_level = 0.95) {
  pval_method <- match.arg(pval_method)
  ties <- match.arg(ties)
  assert_proportion_value(conf_level)

  list(pval_method = pval_method, ties = ties, conf_level = conf_level)
}

#' Control Function for `survfit` Model for Survival Time
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
#' details of `survfit` model for [s_surv_time()]. `conf_level` refers to survival time estimation.
#'
#' @inheritParams argument_convention
#' @param conf_type (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
#'   see more in [survival::survfit()]. Note option "none" is no longer supported.
#' @param quantiles (`numeric`)\cr of length two to specify the quantiles of survival time.
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_surv_time <- function(conf_level = 0.95,
                              conf_type = c("plain", "log", "log-log"),
                              quantiles = c(0.25, 0.75)) {
  conf_type <- match.arg(conf_type)
  checkmate::assert_numeric(quantiles, lower = 0, upper = 1, len = 2, unique = TRUE, sorted = TRUE)
  nullo <- lapply(quantiles, assert_proportion_value)
  assert_proportion_value(conf_level)
  list(conf_level = conf_level, conf_type = conf_type, quantiles = quantiles)
}

#' Control Function for `survfit` Model for Patient's Survival Rate at time point
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
#' details of `survfit` model for [s_surv_timepoint()]. `conf_level` refers to patient risk estimation at a time point.
#'
#' @inheritParams argument_convention
#' @inheritParams control_surv_time
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_surv_timepoint <- function(conf_level = 0.95,
                                   conf_type = c("plain", "log", "log-log")) {
  conf_type <- match.arg(conf_type)
  assert_proportion_value(conf_level)
  list(
    conf_level = conf_level,
    conf_type = conf_type
  )
}

#' Control function for incidence rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for the incidence rate, used
#' internally to specify details in `s_incidence_rate()`.
#'
#' @inheritParams argument_convention
#' @param time_unit_input (`string`)\cr `day`, `month`, or `year` (default)
#'   indicating time unit for data input.
#' @param time_unit_output (`numeric`)\cr time unit for desired output (in person-years).
#' @param conf_type (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'   for confidence interval type.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @seealso [incidence_rate]
#'
#' @examples
#' control_incidence_rate(0.9, "exact", "month", 100)
#'
#' @export
control_incidence_rate <- function(conf_level = 0.95,
                                   conf_type = c("normal", "normal_log", "exact", "byar"),
                                   time_unit_input = c("year", "day", "week", "month"),
                                   time_unit_output = 1) {
  conf_type <- match.arg(conf_type)
  time_unit_input <- match.arg(time_unit_input)
  checkmate::assert_number(time_unit_output)
  assert_proportion_value(conf_level)

  list(
    conf_level = conf_level,
    conf_type = conf_type,
    time_unit_input = time_unit_input,
    time_unit_output = time_unit_output
  )
}

#' Patient Counts with the Most Extreme Post-baseline Toxicity Grade per Direction of Abnormality
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the toxicity grade (`factor`), and additional
#' analysis variables are `id` (`character` or `factor`), `param` (`factor`) and `grade_dir` (`factor`).
#' The pre-processing steps are crucial when using this function.
#' For a certain direction (e.g. high or low) this function counts
#' patients in the denominator as number of patients with at least one valid measurement during treatment,
#' and patients in the numerator as follows:
#'   * `1` to `4`: Numerator is number of patients with worst grades 1-4 respectively;
#'   * `Any`: Numerator is number of patients with at least one abnormality, which means grade is different from 0.
#'
#' @inheritParams argument_convention
#'
#' @details The pre-processing steps are crucial when using this function. From the standard lab grade variable
#'   `ATOXGR`, derive the following two variables:
#'   * A grade direction variable (e.g. `GRADE_DIR`) is required in order to obtain
#'     the correct denominators when building the layout as it is used to define row splitting.
#'   * A toxicity grade variable (e.g. `GRADE_ANL`) where all negative values from
#'     `ATOXGR` are replaced by their absolute values.
#'
#' @note Prior to tabulation, `df` must be filtered to include only post-baseline records with worst grade flags.
#'
#' @name abnormal_by_worst_grade
NULL

#' @describeIn abnormal_by_worst_grade Statistics function which counts patients by worst grade.
#'
#' @return
#' * `s_count_abnormal_by_worst_grade()` returns the single statistic `count_fraction` with grades 1 to 4 and
#'   "Any" results.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#' adlb <- tern_ex_adlb
#'
#' # Data is modified in order to have some parameters with grades only in one direction
#' # and simulate the real data.
#' adlb$ATOXGR[adlb$PARAMCD == "ALT" & adlb$ATOXGR %in% c("1", "2", "3", "4")] <- "-1"
#' adlb$ANRIND[adlb$PARAMCD == "ALT" & adlb$ANRIND == "HIGH"] <- "LOW"
#' adlb$WGRHIFL[adlb$PARAMCD == "ALT"] <- ""
#'
#' adlb$ATOXGR[adlb$PARAMCD == "IGA" & adlb$ATOXGR %in% c("-1", "-2", "-3", "-4")] <- "1"
#' adlb$ANRIND[adlb$PARAMCD == "IGA" & adlb$ANRIND == "LOW"] <- "HIGH"
#' adlb$WGRLOFL[adlb$PARAMCD == "IGA"] <- ""
#'
#' # Here starts the real pre-processing.
#' adlb_f <- adlb %>%
#'   filter(!AVISIT %in% c("SCREENING", "BASELINE")) %>%
#'   mutate(
#'     GRADE_DIR = factor(
#'       case_when(
#'         ATOXGR %in% c("-1", "-2", "-3", "-4") ~ "LOW",
#'         ATOXGR == "0" ~ "ZERO",
#'         ATOXGR %in% c("1", "2", "3", "4") ~ "HIGH"
#'       ),
#'       levels = c("LOW", "ZERO", "HIGH")
#'     ),
#'     GRADE_ANL = fct_relevel(
#'       fct_recode(ATOXGR, `1` = "-1", `2` = "-2", `3` = "-3", `4` = "-4"),
#'       c("0", "1", "2", "3", "4")
#'     )
#'   ) %>%
#'   filter(WGRLOFL == "Y" | WGRHIFL == "Y") %>%
#'   droplevels()
#'
#' adlb_f_alt <- adlb_f %>%
#'   filter(PARAMCD == "ALT") %>%
#'   droplevels()
#' full_parent_df <- list(adlb_f_alt, "not_needed")
#' cur_col_subset <- list(rep(TRUE, nrow(adlb_f_alt)), "not_needed")
#'
#' # This mimics a split structure on PARAM and GRADE_DIR for a total column
#' spl_context <- data.frame(
#'   split = c("PARAM", "GRADE_DIR"),
#'   full_parent_df = I(full_parent_df),
#'   cur_col_subset = I(cur_col_subset)
#' )
#'
#' # Internal function - s_count_abnormal_by_worst_grade
#' \dontrun{
#' s_count_abnormal_by_worst_grade(
#'   df = adlb_f_alt,
#'   .spl_context = spl_context,
#'   .var = "GRADE_ANL"
#' )
#' }
#'
#' @keywords internal
s_count_abnormal_by_worst_grade <- function(df, # nolint
                                            .var = "GRADE_ANL",
                                            .spl_context,
                                            variables = list(
                                              id = "USUBJID",
                                              param = "PARAM",
                                              grade_dir = "GRADE_DIR"
                                            )) {
  checkmate::assert_string(.var)
  assert_valid_factor(df[[.var]])
  assert_valid_factor(df[[variables$param]])
  assert_valid_factor(df[[variables$grade_dir]])
  assert_df_with_variables(df, c(a = .var, variables))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))

  # To verify that the `split_rows_by` are performed with correct variables.
  checkmate::assert_subset(c(variables[["param"]], variables[["grade_dir"]]), .spl_context$split)
  first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
  x_lvls <- c(setdiff(levels(df[[.var]]), "0"), "Any")
  result <- split(numeric(0), factor(x_lvls))

  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
  # Some subjects may have a record for high and low directions but
  # should be counted only once.
  denom <- length(unique(subj_cur_col))

  for (lvl in x_lvls) {
    if (lvl != "Any") {
      df_lvl <- df[df[[.var]] == lvl, ]
    } else {
      df_lvl <- df[df[[.var]] != 0, ]
    }
    num <- length(unique(df_lvl[["USUBJID"]]))
    fraction <- ifelse(denom == 0, 0, num / denom)
    result[[lvl]] <- formatters::with_label(c(count = num, fraction = fraction), lvl)
  }

  result <- list(count_fraction = result)
  result
}

#' @describeIn abnormal_by_worst_grade Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_worst_grade()`.
#'
#' @return
#' * `a_count_abnormal_by_worst_grade()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_count_abnormal_by_worst_grade
#' \dontrun{
#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `count_fraction` first
#' # so that the `rtables` formatting function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_abnormal_by_worst_grade, .ungroup_stats = "count_fraction")
#' afun(df = adlb_f_alt, .spl_context = spl_context)
#' }
#'
#' @keywords internal
a_count_abnormal_by_worst_grade <- make_afun( # nolint
  s_count_abnormal_by_worst_grade,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn abnormal_by_worst_grade Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_worst_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_worst_grade()` to the table layout.
#'
#' @examples
#' # Map excludes records without abnormal grade since they should not be displayed
#' # in the table.
#' map <- unique(adlb_f[adlb_f$GRADE_DIR != "ZERO", c("PARAM", "GRADE_DIR", "GRADE_ANL")]) %>%
#'   lapply(as.character) %>%
#'   as.data.frame() %>%
#'   arrange(PARAM, desc(GRADE_DIR), GRADE_ANL)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAM") %>%
#'   split_rows_by("GRADE_DIR", split_fun = trim_levels_to_map(map)) %>%
#'   count_abnormal_by_worst_grade(
#'     var = "GRADE_ANL",
#'     variables = list(id = "USUBJID", param = "PARAM", grade_dir = "GRADE_DIR")
#'   ) %>%
#'   build_table(df = adlb_f)
#'
#' @export
count_abnormal_by_worst_grade <- function(lyt,
                                          var,
                                          ...,
                                          .stats = NULL,
                                          .formats = NULL,
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  afun <- make_afun(
    a_count_abnormal_by_worst_grade,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    extra_args = list(...),
    show_labels = "hidden"
  )
}

#' Summarize the Change from Baseline or Absolute Baseline Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` indicates the numerical change from baseline results,
#' and additional required secondary analysis variables are `value` and `baseline_flag`.
#' Depending on the baseline flag, either the absolute baseline values (at baseline)
#' or the change from baseline values (post-baseline) are then summarized.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_change
NULL

#' @describeIn summarize_change Statistics function that summarizes baseline or post-baseline visits.
#'
#' @return
#' * `s_change_from_baseline()` returns the same values returned by [s_summary.numeric()].
#'
#' @note The data in `df` must be either all be from baseline or post-baseline visits. Otherwise
#'   an error will be thrown.
#'
#' @examples
#' df <- data.frame(
#'   chg = c(1, 2, 3),
#'   is_bl = c(TRUE, TRUE, TRUE),
#'   val = c(4, 5, 6)
#' )
#'
#' # Internal function - s_change_from_baseline
#' \dontrun{
#' s_change_from_baseline(
#'   df,
#'   .var = "chg",
#'   variables = list(value = "val", baseline_flag = "is_bl")
#' )
#' }
#'
#' @keywords internal
s_change_from_baseline <- function(df,
                                   .var,
                                   variables,
                                   na.rm = TRUE, # nolint
                                   ...) {
  checkmate::assert_numeric(df[[variables$value]])
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[variables$baseline_flag]])
  checkmate::assert_vector(unique(df[[variables$baseline_flag]]), max.len = 1)
  assert_df_with_variables(df, c(variables, list(chg = .var)))

  combined <- ifelse(
    df[[variables$baseline_flag]],
    df[[variables$value]],
    df[[.var]]
  )
  if (is.logical(combined) && identical(length(combined), 0L)) {
    combined <- numeric(0)
  }
  s_summary(combined, na.rm = na.rm, ...)
}

#' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`.
#'
#' @return
#' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # Internal function - a_change_from_baseline
#' \dontrun{
#' a_change_from_baseline(
#'   df,
#'   .var = "chg",
#'   variables = list(value = "val", baseline_flag = "is_bl")
#' )
#' }
#'
#' @keywords internal
a_change_from_baseline <- make_afun(
  s_change_from_baseline,
  .formats = c(
    n = "xx",
    mean_sd = "xx.xx (xx.xx)",
    mean_se = "xx.xx (xx.xx)",
    median = "xx.xx",
    range = "xx.xx - xx.xx",
    mean_ci = "(xx.xx, xx.xx)",
    median_ci = "(xx.xx, xx.xx)",
    mean_pval = "xx.xx"
  ),
  .labels = c(
    mean_sd = "Mean (SD)",
    mean_se = "Mean (SE)",
    median = "Median",
    range = "Min - Max"
  )
)

#' @describeIn summarize_change Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_change()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_change_from_baseline()` to the table layout.
#'
#' @note To be used after a split on visits in the layout, such that each data subset only contains
#'   either baseline or post-baseline data.
#'
#' @examples
#' # `summarize_change()`
#'
#' ## Fabricated dataset.
#' library(dplyr)
#'
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   AVISIT = rep(paste0("V", 1:3), 6),
#'   ARM = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL = c(9:1, rep(NA, 9))
#' ) %>%
#'   mutate(ABLFLL = AVISIT == "V1") %>%
#'   group_by(USUBJID) %>%
#'   mutate(
#'     BLVAL = AVAL[ABLFLL],
#'     CHG = AVAL - BLVAL
#'   ) %>%
#'   ungroup()
#'
#' results <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>%
#'   build_table(dta_test)
#' \dontrun{
#' Viewer(results)
#' }
#'
#' @export
summarize_change <- function(lyt,
                             vars,
                             ...,
                             table_names = vars,
                             .stats = c("n", "mean_sd", "median", "range"),
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  afun <- make_afun(
    a_change_from_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    table_names = table_names
  )
}

1		#' Missing Data
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Substitute missing data with a string or factor level.
6		#'
7		#' @param x (`factor` or `character` vector)\cr values for which any missing values should be substituted.
8		#' @param label (`character`)\cr string that missing data should be replaced with.
9		#'
10		#' @return `x` with any `NA` values substituted by `label`.
11		#'
12		#' @examples
13		#' explicit_na(c(NA, "a", "b"))
14		#' is.na(explicit_na(c(NA, "a", "b")))
15		#'
16		#' explicit_na(factor(c(NA, "a", "b")))
17		#' is.na(explicit_na(factor(c(NA, "a", "b"))))
18		#'
19		#' explicit_na(sas_na(c("a", "")))
20		#'
21		#' @export
22		explicit_na <- function(x, label = "<Missing>") {
23	406x	checkmate::assert_string(label)
24
25	406x	if (is.factor(x)) {
26	304x	x <- forcats::fct_na_value_to_level(x, label)
27	304x	forcats::fct_drop(x, only = label)
28	102x	} else if (is.character(x)) {
29	102x	x[is.na(x)] <- label
30	102x	x
31		} else {
32	!	stop("only factors and character vectors allowed")
33		}
34		}
35
36		#' Convert Strings to `NA`
37		#'
38		#' @description `r lifecycle::badge("stable")`
39		#'
40		#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to
41		#' convert these values to `NA`s.
42		#'
43		#' @inheritParams explicit_na
44		#' @param empty (`logical`)\cr if `TRUE` empty strings get replaced by `NA`.
45		#' @param whitespaces (`logical`)\cr if `TRUE` then strings made from whitespaces only get replaced with `NA`.
46		#'
47		#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of
48		#' `empty` and `whitespaces`.
49		#'
50		#' @examples
51		#' sas_na(c("1", "", " ", " ", "b"))
52		#' sas_na(factor(c("", " ", "b")))
53		#'
54		#' is.na(sas_na(c("1", "", " ", " ", "b")))
55		#'
56		#' @export
57		sas_na <- function(x, empty = TRUE, whitespaces = TRUE) {
58	407x	checkmate::assert_flag(empty)
59	407x	checkmate::assert_flag(whitespaces)
60
61	407x	if (is.factor(x)) {
62	301x	empty_levels <- levels(x) == ""
63	11x	if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA
64
65	301x	ws_levels <- grepl("^\\s+$", levels(x))
66	!	if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA
67
68	301x	x
69	106x	} else if (is.character(x)) {
70	106x	if (empty) x[x == ""] <- NA_character_
71
72	106x	if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_
73
74	106x	x
75		} else {
76	!	stop("only factors and character vectors allowed")
77		}
78		}

1		#' Cox Proportional Hazards Regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @details Cox models are the most commonly used methods to estimate the magnitude of
10		#' the effect in survival analysis. It assumes proportional hazards: the ratio
11		#' of the hazards between groups (e.g., two arms) is constant over time.
12		#' This ratio is referred to as the "hazard ratio" (HR) and is one of the
13		#' most commonly reported metrics to describe the effect size in survival
14		#' analysis (NEST Team, 2020).
15		#'
16		#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant
17		#' helper functions, and [tidy_coxreg] for custom tidy methods.
18		#'
19		#' @examples
20		#' library(survival)
21		#'
22		#' # Testing dataset [survival::bladder].
23		#' set.seed(1, kind = "Mersenne-Twister")
24		#' dta_bladder <- with(
25		#' data = bladder[bladder$enum < 5, ],
26		#' tibble::tibble(
27		#' TIME = stop,
28		#' STATUS = event,
29		#' ARM = as.factor(rx),
30		#' COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"),
31		#' COVAR2 = factor(
32		#' sample(as.factor(enum)),
33		#' levels = 1:4, labels = c("F", "F", "M", "M")
34		#' ) %>% formatters::with_label("Sex (F/M)")
35		#' )
36		#' )
37		#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
38		#' dta_bladder$STUDYID <- factor("X")
39		#'
40		#' plot(
41		#' survfit(Surv(TIME, STATUS) ~ ARM + COVAR1, data = dta_bladder),
42		#' lty = 2:4,
43		#' xlab = "Months",
44		#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
45		#' )
46		#'
47		#' @name cox_regression
48		NULL
49
50		#' @describeIn cox_regression Statistics function that transforms results tabulated
51		#' from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list.
52		#'
53		#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg]
54		#' function with tidying applied via [broom::tidy()].
55		#' @param .stats (`character`)\cr the name of statistics to be reported among:
56		#' * `n`: number of observations (univariate only)
57		#' * `hr`: hazard ratio
58		#' * `ci`: confidence interval
59		#' * `pval`: p-value of the treatment effect
60		#' * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only)
61		#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model.
62		#' Defaults to "all". Other options include "var_main" for main effects, "inter" for interaction effects,
63		#' and "multi_lvl" for multivariate model covariate level rows. When `.which_vars` is "all" specific
64		#' variables can be selected by specifying `.var_nms`.
65		#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically
66		#' this is the name of a variable. If using variable labels, `var` should be a vector of both the desired
67		#' variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars`
68		#' is "var_main" `.var_nms` should be only the variable name.
69		#'
70		#' @return
71		#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s).
72		#'
73		#' @examples
74		#' # s_coxreg
75		#'
76		#' # Univariate
77		#' u1_variables <- list(
78		#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
79		#' )
80		#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder)
81		#' df1 <- broom::tidy(univar_model)
82		#' s_coxreg(model_df = df1, .stats = "hr")
83		#'
84		#' # Univariate with interactions
85		#' univar_model_inter <- fit_coxreg_univar(
86		#' variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder
87		#' )
88		#' df1_inter <- broom::tidy(univar_model_inter)
89		#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1")
90		#'
91		#' # Univariate without treatment arm - only "COVAR2" covariate effects
92		#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
93		#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder)
94		#' df1_covs <- broom::tidy(univar_covs_model)
95		#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)"))
96		#'
97		#' # Multivariate.
98		#' m1_variables <- list(
99		#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
100		#' )
101		#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder)
102		#' df2 <- broom::tidy(multivar_model)
103		#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1")
104		#' s_coxreg(
105		#' model_df = df2, .stats = "pval", .which_vars = "multi_lvl",
106		#' .var_nms = c("COVAR1", "A Covariate Label")
107		#' )
108		#'
109		#' # Multivariate without treatment arm - only "COVAR1" main effect
110		#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
111		#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder)
112		#' df2_covs <- broom::tidy(multivar_covs_model)
113		#' s_coxreg(model_df = df2_covs, .stats = "hr")
114		#'
115		#' @export
116		s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) {
117	178x	assert_df_with_variables(model_df, list(term = "term", stat = .stats))
118	178x	checkmate::assert_multi_class(model_df$term, classes = c("factor", "character"))
119	178x	model_df$term <- as.character(model_df$term)
120	178x	.var_nms <- .var_nms[!is.na(.var_nms)]
121
122	177x	if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ]
123	39x	if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1)
124
125		# We need a list with names corresponding to the stats to display of equal length to the list of stats.
126	178x	y <- split(model_df, f = model_df$term, drop = FALSE)
127	178x	y <- stats::setNames(y, nm = rep(.stats, length(y)))
128
129	178x	if (.which_vars == "var_main") {
130	79x	y <- lapply(y, function(x) x[1, ]) # only main effect
131	99x	} else if (.which_vars %in% c("inter", "multi_lvl")) {
132	75x	y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect
133		}
134
135	178x	lapply(
136	178x	X = y,
137	178x	FUN = function(x) {
138	180x	z <- as.list(x[[.stats]])
139	180x	stats::setNames(z, nm = x$term_label)
140		}
141		)
142		}
143
144		#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()]
145		#' and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`.
146		#'
147		#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`.
148		#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`.
149		#' @param na_level (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`.
150		#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to
151		#' avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching).
152		#'
153		#' @return
154		#' * `a_coxreg()` returns formatted [rtables::CellValue()].
155		#'
156		#' @examples
157		#' tern:::a_coxreg(
158		#' df = dta_bladder,
159		#' labelstr = "Label 1",
160		#' variables = u1_variables,
161		#' .spl_context = list(value = "COVAR1"),
162		#' .stats = "n",
163		#' .formats = "xx"
164		#' )
165		#'
166		#' tern:::a_coxreg(
167		#' df = dta_bladder,
168		#' labelstr = "",
169		#' variables = u1_variables,
170		#' .spl_context = list(value = "COVAR2"),
171		#' .stats = "pval",
172		#' .formats = "xx.xxxx"
173		#' )
174		#'
175		#' @keywords internal
176		a_coxreg <- function(df,
177		labelstr,
178		eff = FALSE,
179		var_main = FALSE,
180		multivar = FALSE,
181		variables,
182		at = list(),
183		control = control_coxreg(),
184		.spl_context,
185		.stats,
186		.formats,
187		na_level = "",
188		cache_env = NULL) {
189	176x	cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm
190	176x	cov <- tail(.spl_context$value, 1) # current variable/covariate
191	176x	var_lbl <- formatters::var_labels(df)[cov] # check for df labels
192	78x	if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) labelstr <- var_lbl # use df labels if none
193	176x	if (eff \|\| multivar \|\| cov_no_arm) {
194	77x	control$interaction <- FALSE
195		} else {
196	99x	variables$covariates <- cov
197	35x	if (var_main) control$interaction <- TRUE
198		}
199
200	176x	if (is.null(cache_env[[cov]])) {
201	28x	if (!multivar) {
202	21x	model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy()
203		} else {
204	7x	model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy()
205		}
206	28x	cache_env[[cov]] <- model
207		} else {
208	148x	model <- cache_env[[cov]]
209		}
210	99x	if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_
211
212	176x	if (cov_no_arm \|\| (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) {
213	15x	multivar <- TRUE
214	3x	if (!cov_no_arm) var_main <- TRUE
215		}
216
217	176x	vars_coxreg <- list(which_vars = "all", var_nms = NULL)
218	176x	if (eff) {
219	35x	if (multivar && !var_main) { # multivar treatment level
220	6x	var_lbl_arm <- formatters::var_labels(df)[[variables$arm]]
221	6x	vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl")
222		} else { # treatment effect
223	29x	vars_coxreg["var_nms"] <- variables$arm
224	6x	if (var_main) vars_coxreg["which_vars"] <- "var_main"
225		}
226		} else {
227	141x	if (!multivar \|\| (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level
228	108x	vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main")
229	33x	} else if (multivar) { # multivar covariate level
230	33x	vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl")
231	6x	if (var_main) model[cov, .stats] <- NA_real_
232		}
233	35x	if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect
234		}
235	176x	var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]]
236	176x	var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) {
237	21x	paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels
238	176x	} else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) \|\|
239	176x	(multivar && var_main && is.numeric(df[[cov]]))) {
240	58x	labelstr # other main effect labels
241	176x	} else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) {
242	6x	"All" # multivar numeric covariate
243		} else {
244	91x	names(var_vals)
245		}
246	176x	in_rows(
247	176x	.list = var_vals, .names = var_names, .labels = var_names,
248	176x	.formats = stats::setNames(rep(.formats, length(var_names)), var_names),
249	176x	.format_na_strs = stats::setNames(rep(na_level, length(var_names)), var_names)
250		)
251		}
252
253		#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table
254		#' layout. This function is a wrapper for several `rtables` layouting functions. This function
255		#' is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()].
256		#'
257		#' @inheritParams fit_coxreg_univar
258		#' @param multivar (`flag`)\cr Defaults to `FALSE`. If `TRUE` multivariate Cox regression will run, otherwise
259		#' univariate Cox regression will run.
260		#' @param common_var (`character`)\cr the name of a factor variable in the dataset which takes the same value
261		#' for all rows. This should be created during pre-processing if no such variable currently exists.
262		#' @param .section_div (`character`)\cr string which should be repeated as a section divider between sections.
263		#' Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between
264		#' treatment and covariate sections and the second between different covariates.
265		#'
266		#' @return
267		#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions,
268		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table
269		#' containing the chosen statistics to the table layout.
270		#'
271		#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`,
272		#' `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate
273		#' Cox regression models, respectively.
274		#'
275		#' @examples
276		#' # summarize_coxreg
277		#'
278		#' result_univar <- basic_table() %>%
279		#' summarize_coxreg(variables = u1_variables) %>%
280		#' build_table(dta_bladder)
281		#' result_univar
282		#'
283		#' result_multivar <- basic_table() %>%
284		#' summarize_coxreg(
285		#' variables = m1_variables,
286		#' multivar = TRUE,
287		#' ) %>%
288		#' build_table(dta_bladder)
289		#' result_multivar
290		#'
291		#' result_univar_covs <- basic_table() %>%
292		#' summarize_coxreg(
293		#' variables = u2_variables,
294		#' ) %>%
295		#' build_table(dta_bladder)
296		#' result_univar_covs
297		#'
298		#' result_multivar_covs <- basic_table() %>%
299		#' summarize_coxreg(
300		#' variables = m2_variables,
301		#' multivar = TRUE,
302		#' varlabels = c("Covariate 1", "Covariate 2") # custom labels
303		#' ) %>%
304		#' build_table(dta_bladder)
305		#' result_multivar_covs
306		#'
307		#' @export
308		summarize_coxreg <- function(lyt,
309		variables,
310		control = control_coxreg(),
311		at = list(),
312		multivar = FALSE,
313		common_var = "STUDYID",
314		.stats = c("n", "hr", "ci", "pval", "pval_inter"),
315		.formats = c(
316		n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)",
317		pval = "x.xxxx \| (<0.0001)", pval_inter = "x.xxxx \| (<0.0001)"
318		),
319		varlabels = NULL,
320		.indent_mods = NULL,
321		na_level = "",
322		.section_div = NA_character_) {
323	10x	if (multivar && control$interaction) {
324	1x	warning(paste(
325	1x	"Interactions are not available for multivariate cox regression using summarize_coxreg.",
326	1x	"The model will be calculated without interaction effects."
327		))
328		}
329	10x	if (control$interaction && !"arm" %in% names(variables)) {
330	1x	stop("To include interactions please specify 'arm' in variables.")
331		}
332
333	9x	.stats <- if (!"arm" %in% names(variables) \|\| multivar) { # only valid statistics
334	4x	intersect(c("hr", "ci", "pval"), .stats)
335	9x	} else if (control$interaction) {
336	3x	intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats)
337		} else {
338	2x	intersect(c("n", "hr", "ci", "pval"), .stats)
339		}
340	9x	stat_labels <- c(
341	9x	n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"),
342	9x	pval = "p-value", pval_inter = "Interaction p-value"
343		)
344	9x	stat_labels <- stat_labels[names(stat_labels) %in% .stats]
345	9x	.formats <- .formats[names(.formats) %in% .stats]
346	9x	env <- new.env() # create caching environment
347
348	9x	lyt <- lyt %>%
349	9x	split_cols_by_multivar(
350	9x	vars = rep(common_var, length(.stats)),
351	9x	varlabels = stat_labels,
352	9x	extra_args = list(
353	9x	.stats = .stats, .formats = .formats, na_level = rep(na_level, length(.stats)),
354	9x	cache_env = replicate(length(.stats), list(env))
355		)
356		)
357
358	9x	if ("arm" %in% names(variables)) { # treatment effect
359	7x	lyt <- lyt %>%
360	7x	split_rows_by(
361	7x	common_var,
362	7x	split_label = "Treatment:",
363	7x	label_pos = "visible",
364	7x	section_div = head(.section_div, 1)
365		) %>%
366	7x	summarize_row_groups(
367	7x	cfun = a_coxreg,
368	7x	extra_args = list(
369	7x	variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar
370		)
371		)
372	7x	if (multivar) { # treatment level effects
373	2x	lyt <- lyt %>%
374	2x	analyze_colvars(
375	2x	afun = a_coxreg,
376	2x	extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar)
377		)
378		}
379		}
380
381	9x	if ("covariates" %in% names(variables)) { # covariate main effects
382	9x	lyt <- lyt %>%
383	9x	split_rows_by_multivar(
384	9x	vars = variables$covariates,
385	9x	varlabels = varlabels,
386	9x	split_label = "Covariate:",
387	9x	nested = FALSE,
388	9x	section_div = tail(.section_div, 1)
389		) %>%
390	9x	summarize_row_groups(
391	9x	cfun = a_coxreg,
392	9x	extra_args = list(
393	9x	variables = variables, at = at, control = control, multivar = multivar,
394	9x	var_main = if (multivar) multivar else control$interaction
395		)
396		)
397	2x	if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm
398	9x	if (multivar \|\| control$interaction) { # covariate level effects
399	7x	lyt <- lyt %>%
400	7x	analyze_colvars(
401	7x	afun = a_coxreg,
402	7x	extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = "")
403		)
404		}
405		}
406
407	9x	lyt
408		}

1		#' Pairwise Formula Special Term
2		#'
3		#' @description `r lifecycle::badge("deprecated")`
4		#'
5		#' The special term `pairwise` indicate that the model should be fitted individually for
6		#' every tested level in comparison to the reference level.
7		#'
8		#' @param x the variable for which pairwise result is expected.
9		#'
10		#' @return Variable "paired".
11		#'
12		#' @details Let's `ARM` being a factor with level A, B, C; let's be B the reference level,
13		#' a model calling the formula including `pairwise(ARM)` will result in two models:
14		#' * A model including only levels A and B, and effect of A estimated in reference to B.
15		#' * A model including only levels C and B, the effect of C estimated in reference to B.
16		#'
17		#' @export
18		pairwise <- function(x) {
19	!	lifecycle::deprecate_warn("0.8.1.9013", "pairwise()", "univariate()")
20	!	structure(x, varname = deparse(substitute(x)))
21		}
22
23		#' Univariate Formula Special Term
24		#'
25		#' @description `r lifecycle::badge("stable")`
26		#'
27		#' The special term `univariate` indicate that the model should be fitted individually for
28		#' every variable included in univariate.
29		#'
30		#' @param x A vector of variable name separated by commas.
31		#'
32		#' @return When used within a model formula, produces univariate models for each variable provided.
33		#'
34		#' @details
35		#' If provided alongside with pairwise specification, the model
36		#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models
37		#' + `y ~ ARM`
38		#' + `y ~ ARM + SEX`
39		#' + `y ~ ARM + AGE`
40		#' + `y ~ ARM + RACE`
41		#'
42		#' @export
43		univariate <- function(x) {
44	1x	structure(x, varname = deparse(substitute(x)))
45		}
46
47		# Get the right-hand-term of a formula
48		rht <- function(x) {
49	4x	checkmate::assert_formula(x)
50	4x	y <- as.character(rev(x)[[1]])
51	4x	return(y)
52		}
53
54		#' Hazard Ratio Estimation in Interactions
55		#'
56		#' This function estimates the hazard ratios between arms when an interaction variable is given with
57		#' specific values.
58		#'
59		#' @param variable,given Names of two variable in interaction. We seek the estimation of the levels of `variable`
60		#' given the levels of `given`.
61		#' @param lvl_var,lvl_given corresponding levels has given by `levels`.
62		#' @param mmat A name numeric filled with 0 used as template to obtain the design matrix.
63		#' @param coef Numeric of estimated coefficients.
64		#' @param vcov Variance-covariance matrix of underlying model.
65		#' @param conf_level Single numeric for the confidence level of estimate intervals.
66		#'
67		#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
68		#' and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex.
69		#' The cox regression estimates the coefficients along with a variance-covariance matrix for:
70		#'
71		#' - b1 (arm b), b2 (arm c)
72		#' - b3 (sex m)
73		#' - b4 (arm b: sex m), b5 (arm c: sex m)
74		#'
75		#' Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation
76		#' will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5),
77		#' therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained
78		#' as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95.
79		#'
80		#' @return A list of matrix (one per level of variable) with rows corresponding to the combinations of
81		#' `variable` and `given`, with columns:
82		#' * `coef_hat`: Estimation of the coefficient.
83		#' * `coef_se`: Standard error of the estimation.
84		#' * `hr`: Hazard ratio.
85		#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
86		#'
87		#' @seealso [s_cox_multivariate()].
88		#'
89		#' @examples
90		#' library(dplyr)
91		#' library(survival)
92		#'
93		#' ADSL <- tern_ex_adsl %>%
94		#' filter(SEX %in% c("F", "M"))
95		#'
96		#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS")
97		#' adtte$ARMCD <- droplevels(adtte$ARMCD)
98		#' adtte$SEX <- droplevels(adtte$SEX)
99		#'
100		#' mod <- coxph(
101		#' formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2,
102		#' data = adtte
103		#' )
104		#'
105		#' mmat <- stats::model.matrix(mod)[1, ]
106		#' mmat[!mmat == 0] <- 0
107		#'
108		#' # Internal function - estimate_coef
109		#' \dontrun{
110		#' estimate_coef(
111		#' variable = "ARMCD", given = "SEX", lvl_var = "ARM A", lvl_given = "M",
112		#' coef = stats::coef(mod), mmat = mmat, vcov = stats::vcov(mod), conf_level = .95
113		#' )
114		#' }
115		#'
116		#' @keywords internal
117		estimate_coef <- function(variable, given,
118		lvl_var, lvl_given,
119		coef,
120		mmat,
121		vcov,
122		conf_level = 0.95) {
123	8x	var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
124	8x	giv_lvl <- paste0(given, lvl_given)
125
126	8x	design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
127	8x	design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
128	8x	design_mat <- within(
129	8x	data = design_mat,
130	8x	expr = {
131	8x	inter <- paste0(variable, ":", given)
132	8x	rev_inter <- paste0(given, ":", variable)
133		}
134		)
135
136	8x	split_by_variable <- design_mat$variable
137	8x	interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")
138
139	8x	design_mat <- apply(
140	8x	X = design_mat, MARGIN = 1, FUN = function(x) {
141	27x	mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
142	27x	return(mmat)
143		}
144		)
145	8x	colnames(design_mat) <- interaction_names
146
147	8x	betas <- as.matrix(coef)
148
149	8x	coef_hat <- t(design_mat) %*% betas
150	8x	dimnames(coef_hat)[2] <- "coef"
151
152	8x	coef_se <- apply(design_mat, 2, function(x) {
153	27x	vcov_el <- as.logical(x)
154	27x	y <- vcov[vcov_el, vcov_el]
155	27x	y <- sum(y)
156	27x	y <- sqrt(y)
157	27x	return(y)
158		})
159
160	8x	q_norm <- stats::qnorm((1 + conf_level) / 2)
161	8x	y <- cbind(coef_hat, `se(coef)` = coef_se)
162
163	8x	y <- apply(y, 1, function(x) {
164	27x	x["hr"] <- exp(x["coef"])
165	27x	x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
166	27x	x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
167
168	27x	return(x)
169		})
170
171	8x	y <- t(y)
172	8x	y <- by(y, split_by_variable, identity)
173	8x	y <- lapply(y, as.matrix)
174
175	8x	attr(y, "details") <- paste0(
176	8x	"Estimations of ", variable,
177	8x	" hazard ratio given the level of ", given, " compared to ",
178	8x	variable, " level ", lvl_var[1], "."
179		)
180	8x	return(y)
181		}
182
183		#' `tryCatch` around `car::Anova`
184		#'
185		#' Captures warnings when executing [car::Anova].
186		#'
187		#' @inheritParams car::Anova
188		#'
189		#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings.
190		#'
191		#' @examples
192		#' # `car::Anova` on cox regression model including strata and expected
193		#' # a likelihood ratio test triggers a warning as only Wald method is
194		#' # accepted.
195		#'
196		#' library(survival)
197		#'
198		#' mod <- coxph(
199		#' formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps),
200		#' data = ovarian
201		#' )
202		#'
203		#' # Internal function - try_car_anova
204		#' \dontrun{
205		#' with_wald <- try_car_anova(mod = mod, test.statistic = "Wald")
206		#' with_lr <- try_car_anova(mod = mod, test.statistic = "LR")
207		#' }
208		#'
209		#' @keywords internal
210		try_car_anova <- function(mod,
211		test.statistic) { # nolint
212	2x	y <- tryCatch(
213	2x	withCallingHandlers(
214	2x	expr = {
215	2x	warn_text <- c()
216	2x	list(
217	2x	aov = car::Anova(
218	2x	mod,
219	2x	test.statistic = test.statistic,
220	2x	type = "III"
221		),
222	2x	warn_text = warn_text
223		)
224		},
225	2x	warning = function(w) {
226		# If a warning is detected it is handled as "w".
227	!	warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w))
228
229		# A warning is sometimes expected, then, we want to restart
230		# the execution while ignoring the warning.
231	!	invokeRestart("muffleWarning")
232		}
233		),
234	2x	finally = {
235		}
236		)
237
238	2x	return(y)
239		}
240
241		#' Fit the Cox Regression Model and Anova
242		#'
243		#' The functions allows to derive from the [survival::coxph()] results the effect p.values using [car::Anova()].
244		#' This last package introduces more flexibility to get the effect p.values.
245		#'
246		#' @inheritParams t_coxreg
247		#'
248		#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and
249		#' `aov` (result of [car::Anova()]).
250		#'
251		#' @noRd
252		fit_n_aov <- function(formula,
253		data = data,
254		conf_level = conf_level,
255		pval_method = c("wald", "likelihood"),
256		...) {
257	1x	pval_method <- match.arg(pval_method)
258
259	1x	environment(formula) <- environment()
260	1x	suppressWarnings({
261		# We expect some warnings due to coxph which fails strict programming.
262	1x	mod <- survival::coxph(formula, data = data, ...)
263	1x	msum <- summary(mod, conf.int = conf_level)
264		})
265
266	1x	aov <- try_car_anova(
267	1x	mod,
268	1x	test.statistic = switch(pval_method,
269	1x	"wald" = "Wald",
270	1x	"likelihood" = "LR"
271		)
272		)
273
274	1x	warn_attr <- aov$warn_text
275	!	if (!is.null(aov$warn_text)) message(warn_attr)
276
277	1x	aov <- aov$aov
278	1x	y <- list(mod = mod, msum = msum, aov = aov)
279	1x	attr(y, "message") <- warn_attr
280
281	1x	return(y)
282		}
283
284		# argument_checks
285		check_formula <- function(formula) {
286	1x	if (!(inherits(formula, "formula"))) {
287	1x	stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.")
288		}
289
290	!	invisible()
291		}
292
293		check_covariate_formulas <- function(covariates) {
294	1x	if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) \|\| is.null(covariates)) {
295	1x	stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).")
296		}
297
298	!	invisible()
299		}
300
301		name_covariate_names <- function(covariates) {
302	1x	miss_names <- names(covariates) == ""
303	1x	no_names <- is.null(names(covariates))
304	!	if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name")
305	!	if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
306	1x	return(covariates)
307		}
308
309		check_increments <- function(increments, covariates) {
310	1x	if (!is.null(increments)) {
311	1x	covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
312	1x	lapply(
313	1x	X = names(increments), FUN = function(x) {
314	3x	if (!x %in% covariates) {
315	1x	warning(
316	1x	paste(
317	1x	"Check `increments`, the `increment` for ", x,
318	1x	"doesn't match any names in investigated covariate(s)."
319		)
320		)
321		}
322		}
323		)
324		}
325
326	1x	invisible()
327		}
328
329		#' Multivariate Cox Model - Summarized Results
330		#'
331		#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or
332		#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually
333		#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the
334		#' covariates included in the model.
335		#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the
336		#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis,
337		#' the p.values need to be interpreted with caution. (Statistical Analysis of Clinical Trials Data with R,
338		#' `NEST's bookdown`)
339		#'
340		#' @param formula (`formula`)\cr A formula corresponding to the investigated [survival::Surv()] survival model
341		#' including covariates.
342		#' @param data (`data.frame`)\cr A data frame which includes the variable in formula and covariates.
343		#' @param conf_level (`proportion`)\cr The confidence level for the hazard ratio interval estimations. Default is 0.95.
344		#' @param pval_method (`character`)\cr The method used for the estimation of p-values, should be one of
345		#' "wald" (default) or "likelihood".
346		#' @param ... Optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the
347		#' method for tie handling, one of `exact` (default), `efron`, `breslow`.
348		#'
349		#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`.
350		#'
351		#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms
352		#' but is out of scope as defined by the Global Data Standards Repository
353		#' (`GDS_Standard_TLG_Specs_Tables_2.doc`).
354		#'
355		#' @seealso [estimate_coef()].
356		#'
357		#' @examples
358		#' library(dplyr)
359		#'
360		#' adtte <- tern_ex_adtte
361		#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered
362		#' adtte_f <- filter(
363		#' adtte_f,
364		#' PARAMCD == "OS" &
365		#' SEX %in% c("F", "M") &
366		#' RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
367		#' )
368		#' adtte_f$SEX <- droplevels(adtte_f$SEX)
369		#' adtte_f$RACE <- droplevels(adtte_f$RACE)
370		#'
371		#' # Internal function - s_cox_multivariate
372		#' \dontrun{
373		#' s_cox_multivariate(
374		#' formula = Surv(time = AVAL, event = 1 - CNSR) ~ (ARMCD + RACE + AGE)^2, data = adtte_f
375		#' )
376		#' }
377		#'
378		#' @keywords internal
379		s_cox_multivariate <- function(formula, data,
380		conf_level = 0.95,
381		pval_method = c("wald", "likelihood"),
382		...) {
383	1x	tf <- stats::terms(formula, specials = c("strata"))
384	1x	covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))]
385	1x	lapply(
386	1x	X = covariates,
387	1x	FUN = function(x) {
388	3x	if (is.character(data[[x]])) {
389	1x	data[[x]] <<- as.factor(data[[x]])
390		}
391	3x	invisible()
392		}
393		)
394	1x	pval_method <- match.arg(pval_method)
395
396		# Results directly exported from environment(fit_n_aov) to environment(s_function_draft)
397	1x	y <- fit_n_aov(
398	1x	formula = formula,
399	1x	data = data,
400	1x	conf_level = conf_level,
401	1x	pval_method = pval_method,
402		...
403		)
404	1x	mod <- y$mod
405	1x	aov <- y$aov
406	1x	msum <- y$msum
407	1x	list2env(as.list(y), environment())
408
409	1x	all_term_labs <- attr(mod$terms, "term.labels")
410	1x	term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)]
411	1x	names(term_labs) <- term_labs
412
413	1x	coef_inter <- NULL
414	1x	if (any(attr(mod$terms, "order") > 1)) {
415	1x	for_inter <- all_term_labs[attr(mod$terms, "order") > 1]
416	1x	names(for_inter) <- for_inter
417	1x	mmat <- stats::model.matrix(mod)[1, ]
418	1x	mmat[!mmat == 0] <- 0
419	1x	mcoef <- stats::coef(mod)
420	1x	mvcov <- stats::vcov(mod)
421
422	1x	estimate_coef_local <- function(variable, given) {
423	6x	estimate_coef(
424	6x	variable, given,
425	6x	coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level,
426	6x	lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]])
427		)
428		}
429
430	1x	coef_inter <- lapply(
431	1x	for_inter, function(x) {
432	3x	y <- attr(mod$terms, "factor")[, x]
433	3x	y <- names(y[y > 0])
434	3x	Map(estimate_coef_local, variable = y, given = rev(y))
435		}
436		)
437		}
438
439	1x	list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter)
440		}

1		#' Cox Regression Helper: Interactions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Test and estimate the effect of a treatment in interaction with a covariate.
6		#' The effect is estimated as the HR of the tested treatment for a given level
7		#' of the covariate, in comparison to the treatment control.
8		#'
9		#' @inheritParams argument_convention
10		#' @param x (`numeric` or `factor`)\cr the values of the effect to be tested.
11		#' @param effect (`string`)\cr the name of the effect to be tested and estimated.
12		#' @param covar (`string`)\cr the name of the covariate in the model.
13		#' @param mod (`coxph`)\cr the Cox regression model.
14		#' @param label (`string`)\cr the label to be returned as `term_label`.
15		#' @param control (`list`)\cr a list of controls as returned by [control_coxreg()].
16		#' @param ... see methods.
17		#'
18		#' @examples
19		#' library(survival)
20		#'
21		#' set.seed(1, kind = "Mersenne-Twister")
22		#'
23		#' # Testing dataset [survival::bladder].
24		#' dta_bladder <- with(
25		#' data = bladder[bladder$enum < 5, ],
26		#' data.frame(
27		#' time = stop,
28		#' status = event,
29		#' armcd = as.factor(rx),
30		#' covar1 = as.factor(enum),
31		#' covar2 = factor(
32		#' sample(as.factor(enum)),
33		#' levels = 1:4,
34		#' labels = c("F", "F", "M", "M")
35		#' )
36		#' )
37		#' )
38		#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
39		#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
40		#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
41		#'
42		#' plot(
43		#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
44		#' lty = 2:4,
45		#' xlab = "Months",
46		#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
47		#' )
48		#'
49		#' @name cox_regression_inter
50		NULL
51
52		#' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect.
53		#'
54		#' @return
55		#' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following
56		#' variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`.
57		#'
58		#' @export
59		h_coxreg_inter_effect <- function(x,
60		effect,
61		covar,
62		mod,
63		label,
64		control,
65		...) {
66	16x	UseMethod("h_coxreg_inter_effect", x)
67		}
68
69		#' @describeIn cox_regression_inter Estimate the interaction with a `numeric` covariate.
70		#'
71		#' @param at (`list`)\cr a list with items named after the covariate, every
72		#' item is a vector of levels at which the interaction should be estimated.
73		#'
74		#' @export
75		h_coxreg_inter_effect.numeric <- function(x,
76		effect,
77		covar,
78		mod,
79		label,
80		control,
81		at,
82		...) {
83	7x	betas <- stats::coef(mod)
84	7x	attrs <- attr(stats::terms(mod), "term.labels")
85	7x	term_indices <- grep(
86	7x	pattern = effect,
87	7x	x = attrs[!grepl("strata\\(", attrs)]
88		)
89	7x	checkmate::assert_vector(term_indices, len = 2)
90	7x	betas <- betas[term_indices]
91	7x	betas_var <- diag(stats::vcov(mod))[term_indices]
92	7x	betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]]
93	7x	xval <- if (is.null(at[[covar]])) {
94	6x	stats::median(x)
95		} else {
96	1x	at[[covar]]
97		}
98	7x	effect_index <- !grepl(covar, names(betas))
99	7x	coef_hat <- betas[effect_index] + xval * betas[!effect_index]
100	7x	coef_se <- sqrt(
101	7x	betas_var[effect_index] +
102	7x	xval ^ 2 * betas_var[!effect_index] + # styler: off
103	7x	2 * xval * betas_cov
104		)
105	7x	q_norm <- stats::qnorm((1 + control$conf_level) / 2)
106	7x	data.frame(
107	7x	effect = "Covariate:",
108	7x	term = rep(covar, length(xval)),
109	7x	term_label = paste0(" ", xval),
110	7x	level = as.character(xval),
111	7x	n = NA,
112	7x	hr = exp(coef_hat),
113	7x	lcl = exp(coef_hat - q_norm * coef_se),
114	7x	ucl = exp(coef_hat + q_norm * coef_se),
115	7x	pval = NA,
116	7x	pval_inter = NA,
117	7x	stringsAsFactors = FALSE
118		)
119		}
120
121		#' @describeIn cox_regression_inter Estimate the interaction with a `factor` covariate.
122		#'
123		#' @param data (`data.frame`)\cr the data frame on which the model was fit.
124		#'
125		#' @export
126		h_coxreg_inter_effect.factor <- function(x,
127		effect,
128		covar,
129		mod,
130		label,
131		control,
132		data,
133		...) {
134	9x	y <- h_coxreg_inter_estimations(
135	9x	variable = effect, given = covar,
136	9x	lvl_var = levels(data[[effect]]),
137	9x	lvl_given = levels(data[[covar]]),
138	9x	mod = mod,
139	9x	conf_level = 0.95
140	9x	)[[1]]
141
142	9x	data.frame(
143	9x	effect = "Covariate:",
144	9x	term = rep(covar, nrow(y)),
145	9x	term_label = as.character(paste0(" ", levels(data[[covar]]))),
146	9x	level = as.character(levels(data[[covar]])),
147	9x	n = NA,
148	9x	hr = y[, "hr"],
149	9x	lcl = y[, "lcl"],
150	9x	ucl = y[, "ucl"],
151	9x	pval = NA,
152	9x	pval_inter = NA,
153	9x	stringsAsFactors = FALSE
154		)
155		}
156
157		#' @describeIn cox_regression_inter A higher level function to get
158		#' the results of the interaction test and the estimated values.
159		#'
160		#' @return
161		#' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If
162		#' no interaction, [h_coxreg_univar_extract()] is applied instead.
163		#'
164		#' @examples
165		#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
166		#' h_coxreg_extract_interaction(
167		#' mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder,
168		#' control = control_coxreg()
169		#' )
170		#'
171		#' @export
172		h_coxreg_extract_interaction <- function(effect,
173		covar,
174		mod,
175		data,
176		at,
177		control) {
178	21x	if (!any(attr(stats::terms(mod), "order") == 2)) {
179	8x	y <- h_coxreg_univar_extract(
180	8x	effect = effect, covar = covar, mod = mod, data = data, control = control
181		)
182	8x	y$pval_inter <- NA
183	8x	y
184		} else {
185	13x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
186
187		# Test the main treatment effect.
188	13x	mod_aov <- muffled_car_anova(mod, test_statistic)
189	13x	sum_anova <- broom::tidy(mod_aov)
190	13x	pval <- sum_anova[sum_anova$term == effect, ][["p.value"]]
191
192		# Test the interaction effect.
193	13x	pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]]
194	13x	covar_test <- data.frame(
195	13x	effect = "Covariate:",
196	13x	term = covar,
197	13x	term_label = unname(labels_or_names(data[covar])),
198	13x	level = "",
199	13x	n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval,
200	13x	pval_inter = pval_inter,
201	13x	stringsAsFactors = FALSE
202		)
203		# Estimate the interaction.
204	13x	y <- h_coxreg_inter_effect(
205	13x	data[[covar]],
206	13x	covar = covar,
207	13x	effect = effect,
208	13x	mod = mod,
209	13x	label = unname(labels_or_names(data[covar])),
210	13x	at = at,
211	13x	control = control,
212	13x	data = data
213		)
214	13x	rbind(covar_test, y)
215		}
216		}
217
218		#' @describeIn cox_regression_inter Hazard ratio estimation in interactions.
219		#'
220		#' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation
221		#' of the levels of `variable` given the levels of `given`.
222		#' @param lvl_var,lvl_given (`character`)\cr corresponding levels has given by [levels()].
223		#' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]).
224		#'
225		#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
226		#' and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex.
227		#' The cox regression estimates the coefficients along with a variance-covariance matrix for:
228		#'
229		#' - b1 (arm b), b2 (arm c)
230		#' - b3 (sex m)
231		#' - b4 (arm b: sex m), b5 (arm c: sex m)
232		#'
233		#' The estimation of the Hazard Ratio for arm C/sex M is given in reference
234		#' to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5).
235		#' The interaction coefficient is deduced by b2 + b5 while the standard error
236		#' is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$.
237		#'
238		#' @return
239		#' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding
240		#' to the combinations of `variable` and `given`, with columns:
241		#' * `coef_hat`: Estimation of the coefficient.
242		#' * `coef_se`: Standard error of the estimation.
243		#' * `hr`: Hazard ratio.
244		#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
245		#'
246		#' @examples
247		#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
248		#' result <- h_coxreg_inter_estimations(
249		#' variable = "armcd", given = "covar1",
250		#' lvl_var = levels(dta_bladder$armcd),
251		#' lvl_given = levels(dta_bladder$covar1),
252		#' mod = mod, conf_level = .95
253		#' )
254		#' result
255		#'
256		#' @export
257		h_coxreg_inter_estimations <- function(variable,
258		given,
259		lvl_var,
260		lvl_given,
261		mod,
262		conf_level = 0.95) {
263	10x	var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
264	10x	giv_lvl <- paste0(given, lvl_given)
265	10x	design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
266	10x	design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
267	10x	design_mat <- within(
268	10x	data = design_mat,
269	10x	expr = {
270	10x	inter <- paste0(variable, ":", given)
271	10x	rev_inter <- paste0(given, ":", variable)
272		}
273		)
274	10x	split_by_variable <- design_mat$variable
275	10x	interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")
276
277	10x	mmat <- stats::model.matrix(mod)[1, ]
278	10x	mmat[!mmat == 0] <- 0
279
280	10x	design_mat <- apply(
281	10x	X = design_mat, MARGIN = 1, FUN = function(x) {
282	32x	mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
283	32x	mmat
284		}
285		)
286	10x	colnames(design_mat) <- interaction_names
287
288	10x	coef <- stats::coef(mod)
289	10x	vcov <- stats::vcov(mod)
290	10x	betas <- as.matrix(coef)
291	10x	coef_hat <- t(design_mat) %*% betas
292	10x	dimnames(coef_hat)[2] <- "coef"
293	10x	coef_se <- apply(
294	10x	design_mat, 2,
295	10x	function(x) {
296	32x	vcov_el <- as.logical(x)
297	32x	y <- vcov[vcov_el, vcov_el]
298	32x	y <- sum(y)
299	32x	y <- sqrt(y)
300	32x	return(y)
301		}
302		)
303	10x	q_norm <- stats::qnorm((1 + conf_level) / 2)
304	10x	y <- cbind(coef_hat, `se(coef)` = coef_se)
305	10x	y <- apply(y, 1, function(x) {
306	32x	x["hr"] <- exp(x["coef"])
307	32x	x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
308	32x	x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
309	32x	x
310		})
311	10x	y <- t(y)
312	10x	y <- by(y, split_by_variable, identity)
313	10x	y <- lapply(y, as.matrix)
314	10x	attr(y, "details") <- paste0(
315	10x	"Estimations of ", variable,
316	10x	" hazard ratio given the level of ", given, " compared to ",
317	10x	variable, " level ", lvl_var[1], "."
318		)
319	10x	y
320		}

1		#' Count Patients with Marked Laboratory Abnormalities
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Primary analysis variable `.var` indicates whether single, replicated or last marked laboratory
6		#' abnormality was observed (`factor`). Additional analysis variables are `id` (`character` or `factor`)
7		#' and `direction` (`factor`) indicating the direction of the abnormality. Denominator is number of
8		#' patients with at least one valid measurement during the analysis.
9		#' * For `Single, not last` and `Last or replicated`: Numerator is number of patients
10		#' with `Single, not last` and `Last or replicated` levels, respectively.
11		#' * For `Any`: Numerator is the number of patients with either single or
12		#' replicated marked abnormalities.
13		#'
14		#' @inheritParams argument_convention
15		#' @param category (`list`)\cr with different marked category names for single
16		#' and last or replicated.
17		#'
18		#' @note `Single, not last` and `Last or replicated` levels are mutually exclusive. If a patient has
19		#' abnormalities that meet both the `Single, not last` and `Last or replicated` criteria, then the
20		#' patient will be counted only under the `Last or replicated` category.
21		#'
22		#' @name abnormal_by_marked
23		NULL
24
25		#' @describeIn abnormal_by_marked Statistics function for patients with marked lab abnormalities.
26		#'
27		#' @return
28		#' * `s_count_abnormal_by_marked()` returns statistic `count_fraction` with `Single, not last`,
29		#' `Last or replicated`, and `Any` results.
30		#'
31		#' @examples
32		#' library(dplyr)
33		#'
34		#' df <- data.frame(
35		#' USUBJID = as.character(c(rep(1, 5), rep(2, 5), rep(1, 5), rep(2, 5))),
36		#' ARMCD = factor(c(rep("ARM A", 5), rep("ARM B", 5), rep("ARM A", 5), rep("ARM B", 5))),
37		#' ANRIND = factor(c(
38		#' "NORMAL", "HIGH", "HIGH", "HIGH HIGH", "HIGH",
39		#' "HIGH", "HIGH", "HIGH HIGH", "NORMAL", "HIGH HIGH", "NORMAL", "LOW", "LOW", "LOW LOW", "LOW",
40		#' "LOW", "LOW", "LOW LOW", "NORMAL", "LOW LOW"
41		#' )),
42		#' ONTRTFL = rep(c("", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), 2),
43		#' PARAMCD = factor(c(rep("CRP", 10), rep("ALT", 10))),
44		#' AVALCAT1 = factor(rep(c("", "", "", "SINGLE", "REPLICATED", "", "", "LAST", "", "SINGLE"), 2)),
45		#' stringsAsFactors = FALSE
46		#' )
47		#'
48		#' df <- df %>%
49		#' mutate(abn_dir = factor(
50		#' case_when(
51		#' ANRIND == "LOW LOW" ~ "Low",
52		#' ANRIND == "HIGH HIGH" ~ "High",
53		#' TRUE ~ ""
54		#' ),
55		#' levels = c("Low", "High")
56		#' ))
57		#'
58		#' # Select only post-baseline records.
59		#' df <- df %>% filter(ONTRTFL == "Y")
60		#' df_crp <- df %>%
61		#' filter(PARAMCD == "CRP") %>%
62		#' droplevels()
63		#' full_parent_df <- list(df_crp, "not_needed")
64		#' cur_col_subset <- list(rep(TRUE, nrow(df_crp)), "not_needed")
65		#' spl_context <- data.frame(
66		#' split = c("PARAMCD", "GRADE_DIR"),
67		#' full_parent_df = I(full_parent_df),
68		#' cur_col_subset = I(cur_col_subset)
69		#' )
70		#' # Internal function - s_count_abnormal_by_marked
71		#' \dontrun{
72		#' s_count_abnormal_by_marked(
73		#' df = df_crp %>% filter(abn_dir == "High"),
74		#' .spl_context = spl_context,
75		#' .var = "AVALCAT1",
76		#' variables = list(id = "USUBJID", param = "PARAMCD", direction = "abn_dir")
77		#' )
78		#' }
79		#'
80		#' @keywords internal
81		s_count_abnormal_by_marked <- function(df,
82		.var = "AVALCAT1",
83		.spl_context,
84		category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")),
85		variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir")) {
86	3x	checkmate::assert_string(.var)
87	3x	checkmate::assert_list(variables)
88	3x	checkmate::assert_list(category)
89	3x	checkmate::assert_subset(names(category), c("single", "last_replicated"))
90	3x	checkmate::assert_subset(names(variables), c("id", "param", "direction"))
91	3x	checkmate::assert_vector(unique(df[[variables$direction]]), max.len = 1)
92
93	2x	assert_df_with_variables(df, c(aval = .var, variables))
94	2x	checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
95	2x	checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
96
97
98	2x	first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
99		# Patients in the denominator have at least one post-baseline visit.
100	2x	subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
101	2x	subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
102		# Some subjects may have a record for high and low directions but
103		# should be counted only once.
104	2x	denom <- length(unique(subj_cur_col))
105
106	2x	if (denom != 0) {
107	2x	subjects_last_replicated <- unique(
108	2x	df[df[[.var]] %in% category[["last_replicated"]], variables$id, drop = TRUE]
109		)
110	2x	subjects_single <- unique(
111	2x	df[df[[.var]] %in% category[["single"]], variables$id, drop = TRUE]
112		)
113		# Subjects who have both single and last/replicated abnormalities are counted in only the last/replicated group.
114	2x	subjects_single <- setdiff(subjects_single, subjects_last_replicated)
115	2x	n_single <- length(subjects_single)
116	2x	n_last_replicated <- length(subjects_last_replicated)
117	2x	n_any <- n_single + n_last_replicated
118	2x	result <- list(count_fraction = list(
119	2x	"Single, not last" = c(n_single, n_single / denom),
120	2x	"Last or replicated" = c(n_last_replicated, n_last_replicated / denom),
121	2x	"Any Abnormality" = c(n_any, n_any / denom)
122		))
123		} else {
124	!	result <- list(count_fraction = list(
125	!	"Single, not last" = c(0, 0),
126	!	"Last or replicated" = c(0, 0),
127	!	"Any Abnormality" = c(0, 0)
128		))
129		}
130
131	2x	result
132		}
133
134		#' @describeIn abnormal_by_marked Formatted analysis function which is used as `afun`
135		#' in `count_abnormal_by_marked()`.
136		#'
137		#' @return
138		#' * `a_count_abnormal_by_marked()` returns the corresponding list with formatted [rtables::CellValue()].
139		#'
140		#' @examples
141		#' # Internal function - a_count_abnormal_by_marked
142		#' \dontrun{
143		#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `count_fraction` first
144		#' # so that the `rtables` formatting function `format_count_fraction()` can be applied correctly.
145		#' afun <- make_afun(a_count_abnormal_by_marked, .ungroup_stats = "count_fraction")
146		#' afun(
147		#' df = df_crp %>% filter(abn_dir == "High"),
148		#' .spl_context = spl_context,
149		#' variables = list(id = "USUBJID", param = "PARAMCD", direction = "abn_dir")
150		#' )
151		#' }
152		#'
153		#' @keywords internal
154		a_count_abnormal_by_marked <- make_afun(
155		s_count_abnormal_by_marked,
156		.formats = c(count_fraction = format_count_fraction)
157		)
158
159		#' @describeIn abnormal_by_marked Layout-creating function which can take statistics function arguments
160		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
161		#'
162		#' @return
163		#' * `count_abnormal_by_marked()` returns a layout object suitable for passing to further layouting functions,
164		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
165		#' the statistics from `s_count_abnormal_by_marked()` to the table layout.
166		#'
167		#' @examples
168		#' map <- unique(
169		#' df[df$abn_dir %in% c("Low", "High") & df$AVALCAT1 != "", c("PARAMCD", "abn_dir")]
170		#' ) %>%
171		#' lapply(as.character) %>%
172		#' as.data.frame() %>%
173		#' arrange(PARAMCD, abn_dir)
174		#'
175		#' basic_table() %>%
176		#' split_cols_by("ARMCD") %>%
177		#' split_rows_by("PARAMCD") %>%
178		#' summarize_num_patients(
179		#' var = "USUBJID",
180		#' .stats = "unique_count"
181		#' ) %>%
182		#' split_rows_by(
183		#' "abn_dir",
184		#' split_fun = trim_levels_to_map(map)
185		#' ) %>%
186		#' count_abnormal_by_marked(
187		#' var = "AVALCAT1",
188		#' variables = list(
189		#' id = "USUBJID",
190		#' param = "PARAMCD",
191		#' direction = "abn_dir"
192		#' )
193		#' ) %>%
194		#' build_table(df = df)
195		#'
196		#' basic_table() %>%
197		#' split_cols_by("ARMCD") %>%
198		#' split_rows_by("PARAMCD") %>%
199		#' summarize_num_patients(
200		#' var = "USUBJID",
201		#' .stats = "unique_count"
202		#' ) %>%
203		#' split_rows_by(
204		#' "abn_dir",
205		#' split_fun = trim_levels_in_group("abn_dir")
206		#' ) %>%
207		#' count_abnormal_by_marked(
208		#' var = "AVALCAT1",
209		#' variables = list(
210		#' id = "USUBJID",
211		#' param = "PARAMCD",
212		#' direction = "abn_dir"
213		#' )
214		#' ) %>%
215		#' build_table(df = df)
216		#'
217		#' @export
218		count_abnormal_by_marked <- function(lyt,
219		var,
220		...,
221		.stats = NULL,
222		.formats = NULL,
223		.labels = NULL,
224		.indent_mods = NULL) {
225	1x	checkmate::assert_string(var)
226
227	1x	afun <- make_afun(
228	1x	a_count_abnormal_by_marked,
229	1x	.stats = .stats,
230	1x	.formats = .formats,
231	1x	.labels = .labels,
232	1x	.indent_mods = .indent_mods,
233	1x	.ungroup_stats = "count_fraction"
234		)
235
236	1x	lyt <- analyze(
237	1x	lyt = lyt,
238	1x	vars = var,
239	1x	afun = afun,
240	1x	show_labels = "hidden",
241	1x	extra_args = c(list(...))
242		)
243	1x	lyt
244		}

1		#' Helper Functions for Tabulating Biomarker Effects on Binary Response by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions which are documented here separately to not confuse the user
6		#' when reading about the user-facing functions.
7		#'
8		#' @inheritParams response_biomarkers_subgroups
9		#' @inheritParams extract_rsp_biomarkers
10		#' @inheritParams argument_convention
11		#'
12		#' @examples
13		#' library(dplyr)
14		#' library(forcats)
15		#'
16		#' adrs <- tern_ex_adrs
17		#' adrs_labels <- formatters::var_labels(adrs)
18		#'
19		#' adrs_f <- adrs %>%
20		#' filter(PARAMCD == "BESRSPI") %>%
21		#' mutate(rsp = AVALC == "CR")
22		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
23		#'
24		#' @name h_response_biomarkers_subgroups
25		NULL
26
27		#' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list
28		#' to the "logistic regression" variable list. The reason is that currently there is an
29		#' inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`.
30		#'
31		#' @param biomarker (`string`)\cr the name of the biomarker variable.
32		#'
33		#' @return
34		#' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`.
35		#'
36		#' @examples
37		#' # This is how the variable list is converted internally.
38		#' h_rsp_to_logistic_variables(
39		#' variables = list(
40		#' rsp = "RSP",
41		#' covariates = c("A", "B"),
42		#' strat = "D"
43		#' ),
44		#' biomarker = "AGE"
45		#' )
46		#'
47		#' @export
48		h_rsp_to_logistic_variables <- function(variables, biomarker) {
49	37x	checkmate::assert_list(variables)
50	37x	checkmate::assert_string(variables$rsp)
51	37x	checkmate::assert_string(biomarker)
52	37x	list(
53	37x	response = variables$rsp,
54	37x	arm = biomarker,
55	37x	covariates = variables$covariates,
56	37x	strata = variables$strat
57		)
58		}
59
60		#' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and
61		#' overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple
62		#' biomarkers in a given single data set.
63		#' `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
64		#' `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates`
65		#' and `strat`.
66		#'
67		#' @return
68		#' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
69		#'
70		#' @examples
71		#' # For a single population, estimate separately the effects
72		#' # of two biomarkers.
73		#' df <- h_logistic_mult_cont_df(
74		#' variables = list(
75		#' rsp = "rsp",
76		#' biomarkers = c("BMRKR1", "AGE"),
77		#' covariates = "SEX"
78		#' ),
79		#' data = adrs_f
80		#' )
81		#' df
82		#'
83		#' # If the data set is empty, still the corresponding rows with missings are returned.
84		#' h_coxreg_mult_cont_df(
85		#' variables = list(
86		#' rsp = "rsp",
87		#' biomarkers = c("BMRKR1", "AGE"),
88		#' covariates = "SEX",
89		#' strat = "STRATA1"
90		#' ),
91		#' data = adrs_f[NULL, ]
92		#' )
93		#'
94		#' @export
95		h_logistic_mult_cont_df <- function(variables,
96		data,
97		control = control_logistic()) {
98	22x	assert_df_with_variables(data, variables)
99
100	22x	checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
101	22x	checkmate::assert_list(control, names = "named")
102
103	22x	conf_level <- control[["conf_level"]]
104	22x	pval_label <- "p-value (Wald)"
105
106		# If there is any data, run model, otherwise return empty results.
107	22x	if (nrow(data) > 0) {
108	21x	bm_cols <- match(variables$biomarkers, names(data))
109	21x	l_result <- lapply(variables$biomarkers, function(bm) {
110	36x	model_fit <- fit_logistic(
111	36x	variables = h_rsp_to_logistic_variables(variables, bm),
112	36x	data = data,
113	36x	response_definition = control$response_definition
114		)
115	36x	result <- h_logistic_simple_terms(
116	36x	x = bm,
117	36x	fit_glm = model_fit,
118	36x	conf_level = control$conf_level
119		)
120	36x	resp_vector <- if (inherits(model_fit, "glm")) {
121	26x	model_fit$model[[variables$rsp]]
122		} else {
123	10x	as.logical(as.matrix(model_fit$y)[, "status"])
124		}
125	36x	data.frame(
126		# Dummy column needed downstream to create a nested header.
127	36x	biomarker = bm,
128	36x	biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
129	36x	n_tot = length(resp_vector),
130	36x	n_rsp = sum(resp_vector),
131	36x	prop = mean(resp_vector),
132	36x	or = as.numeric(result[1L, "odds_ratio"]),
133	36x	lcl = as.numeric(result[1L, "lcl"]),
134	36x	ucl = as.numeric(result[1L, "ucl"]),
135	36x	conf_level = conf_level,
136	36x	pval = as.numeric(result[1L, "pvalue"]),
137	36x	pval_label = pval_label,
138	36x	stringsAsFactors = FALSE
139		)
140		})
141	21x	do.call(rbind, args = c(l_result, make.row.names = FALSE))
142		} else {
143	1x	data.frame(
144	1x	biomarker = variables$biomarkers,
145	1x	biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
146	1x	n_tot = 0L,
147	1x	n_rsp = 0L,
148	1x	prop = NA,
149	1x	or = NA,
150	1x	lcl = NA,
151	1x	ucl = NA,
152	1x	conf_level = conf_level,
153	1x	pval = NA,
154	1x	pval_label = pval_label,
155	1x	row.names = seq_along(variables$biomarkers),
156	1x	stringsAsFactors = FALSE
157		)
158		}
159		}
160
161		#' @describeIn h_response_biomarkers_subgroups prepares a single sub-table given a `df_sub` containing
162		#' the results for a single biomarker.
163		#'
164		#' @param df (`data.frame`)\cr results for a single biomarker, as part of what is
165		#' returned by [extract_rsp_biomarkers()] (it needs a couple of columns which are
166		#' added by that high-level function relative to what is returned by [h_logistic_mult_cont_df()],
167		#' see the example).
168		#'
169		#' @return
170		#' * `h_tab_rsp_one_biomarker()` returns an `rtables` table object with the given statistics arranged in columns.
171		#'
172		#' @examples
173		#' # Starting from above `df`, zoom in on one biomarker and add required columns.
174		#' df1 <- df[1, ]
175		#' df1$subgroup <- "All patients"
176		#' df1$row_type <- "content"
177		#' df1$var <- "ALL"
178		#' df1$var_label <- "All patients"
179		#'
180		#' # Internal function - h_tab_rsp_one_biomarker
181		#' \dontrun{
182		#' h_tab_rsp_one_biomarker(
183		#' df1,
184		#' vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval")
185		#' )
186		#' }
187		#'
188		#' @export
189		h_tab_rsp_one_biomarker <- function(df,
190		vars) {
191	6x	afuns <- a_response_subgroups()[vars]
192	6x	colvars <- d_rsp_subgroups_colvars(
193	6x	vars,
194	6x	conf_level = df$conf_level[1],
195	6x	method = df$pval_label[1]
196		)
197	6x	h_tab_one_biomarker(
198	6x	df = df,
199	6x	afuns = afuns,
200	6x	colvars = colvars
201		)
202		}

1		#' Tabulate Biomarker Effects on Binary Response by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Tabulate the estimated effects of multiple continuous biomarker variables
6		#' on a binary response endpoint across population subgroups.
7		#'
8		#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
9		#' [extract_rsp_biomarkers()].
10		#' @param vars (`character`)\cr the names of statistics to be reported among:
11		#' * `n_tot`: Total number of patients per group.
12		#' * `n_rsp`: Total number of responses per group.
13		#' * `prop`: Total response proportion per group.
14		#' * `or`: Odds ratio.
15		#' * `ci`: Confidence interval of odds ratio.
16		#' * `pval`: p-value of the effect.
17		#' Note, the statistics `n_tot`, `or` and `ci` are required.
18		#'
19		#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
20		#'
21		#' @details These functions create a layout starting from a data frame which contains
22		#' the required statistics. The tables are then typically used as input for forest plots.
23		#'
24		#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
25		#' not start from an input layout `lyt`. This is because internally the table is
26		#' created by combining multiple subtables.
27		#'
28		#' @seealso [h_tab_rsp_one_biomarker()] which is used internally, [extract_rsp_biomarkers()].
29		#'
30		#' @examples
31		#' library(dplyr)
32		#' library(forcats)
33		#'
34		#' adrs <- tern_ex_adrs
35		#' adrs_labels <- formatters::var_labels(adrs)
36		#'
37		#' adrs_f <- adrs %>%
38		#' filter(PARAMCD == "BESRSPI") %>%
39		#' mutate(rsp = AVALC == "CR")
40		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
41		#' \dontrun{
42		#' ## Table with default columns.
43		#' # df <- <need_data_input_to_work>
44		#' tabulate_rsp_biomarkers(df)
45		#'
46		#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
47		#' tab <- tabulate_rsp_biomarkers(
48		#' df = df,
49		#' vars = c("n_rsp", "ci", "n_tot", "prop", "or")
50		#' )
51		#'
52		#' ## Finally produce the forest plot.
53		#' g_forest(tab, xlim = c(0.7, 1.4))
54		#' }
55		#'
56		#' @export
57		#' @name response_biomarkers_subgroups
58		tabulate_rsp_biomarkers <- function(df,
59		vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval")) {
60	3x	checkmate::assert_data_frame(df)
61	3x	checkmate::assert_character(df$biomarker)
62	3x	checkmate::assert_character(df$biomarker_label)
63	3x	checkmate::assert_subset(vars, c("n_tot", "n_rsp", "prop", "or", "ci", "pval"))
64
65	3x	df_subs <- split(df, f = df$biomarker)
66	3x	tabs <- lapply(df_subs, FUN = function(df_sub) {
67	5x	tab_sub <- h_tab_rsp_one_biomarker(
68	5x	df = df_sub,
69	5x	vars = vars
70		)
71		# Insert label row as first row in table.
72	5x	label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
73	5x	tab_sub
74		})
75	3x	result <- do.call(rbind, tabs)
76
77	3x	n_id <- grep("n_tot", vars)
78	3x	or_id <- match("or", vars)
79	3x	ci_id <- match("ci", vars)
80	3x	structure(
81	3x	result,
82	3x	forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
83	3x	col_x = or_id,
84	3x	col_ci = ci_id,
85	3x	col_symbol_size = n_id
86		)
87		}
88
89		#' Prepares Response Data Estimates for Multiple Biomarkers in a Single Data Frame
90		#'
91		#' @description `r lifecycle::badge("stable")`
92		#'
93		#' Prepares estimates for number of responses, patients and overall response rate,
94		#' as well as odds ratio estimates, confidence intervals and p-values,
95		#' for multiple biomarkers across population subgroups in a single data frame.
96		#' `variables` corresponds to the names of variables found in `data`, passed as a
97		#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
98		#' biomarker variables) and optionally `covariates`, `subgroups` and `strat`.
99		#' `groups_lists` optionally specifies groupings for `subgroups` variables.
100		#'
101		#' @inheritParams argument_convention
102		#' @inheritParams response_subgroups
103		#' @param control (named `list`)\cr controls for the response definition and the
104		#' confidence level produced by [control_logistic()].
105		#'
106		#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
107		#' `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
108		#' `var_label`, and `row_type`.
109		#'
110		#' @note You can also specify a continuous variable in `rsp` and then use the
111		#' `response_definition` control to convert that internally to a logical
112		#' variable reflecting binary response.
113		#'
114		#' @seealso [h_logistic_mult_cont_df()] which is used internally.
115		#'
116		#' @examples
117		#' library(dplyr)
118		#' library(forcats)
119		#'
120		#' adrs <- tern_ex_adrs
121		#' adrs_labels <- formatters::var_labels(adrs)
122		#'
123		#' adrs_f <- adrs %>%
124		#' filter(PARAMCD == "BESRSPI") %>%
125		#' mutate(rsp = AVALC == "CR")
126		#'
127		#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
128		#' # in logistic regression models with one covariate `RACE`. The subgroups
129		#' # are defined by the levels of `BMRKR2`.
130		#' df <- extract_rsp_biomarkers(
131		#' variables = list(
132		#' rsp = "rsp",
133		#' biomarkers = c("BMRKR1", "AGE"),
134		#' covariates = "SEX",
135		#' subgroups = "BMRKR2"
136		#' ),
137		#' data = adrs_f
138		#' )
139		#' df
140		#'
141		#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
142		#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
143		#' # which is then binarized internally (response is defined as this variable
144		#' # being larger than 500).
145		#' df_grouped <- extract_rsp_biomarkers(
146		#' variables = list(
147		#' rsp = "EOSDY",
148		#' biomarkers = c("BMRKR1", "AGE"),
149		#' covariates = "SEX",
150		#' subgroups = "BMRKR2",
151		#' strat = "STRATA1"
152		#' ),
153		#' data = adrs_f,
154		#' groups_lists = list(
155		#' BMRKR2 = list(
156		#' "low" = "LOW",
157		#' "low/medium" = c("LOW", "MEDIUM"),
158		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
159		#' )
160		#' ),
161		#' control = control_logistic(
162		#' response_definition = "I(response > 500)"
163		#' )
164		#' )
165		#' df_grouped
166		#'
167		#' @export
168		extract_rsp_biomarkers <- function(variables,
169		data,
170		groups_lists = list(),
171		control = control_logistic(),
172		label_all = "All Patients") {
173	4x	assert_list_of_variables(variables)
174	4x	checkmate::assert_string(variables$rsp)
175	4x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
176	4x	checkmate::assert_string(label_all)
177
178		# Start with all patients.
179	4x	result_all <- h_logistic_mult_cont_df(
180	4x	variables = variables,
181	4x	data = data,
182	4x	control = control
183		)
184	4x	result_all$subgroup <- label_all
185	4x	result_all$var <- "ALL"
186	4x	result_all$var_label <- label_all
187	4x	result_all$row_type <- "content"
188	4x	if (is.null(variables$subgroups)) {
189		# Only return result for all patients.
190	1x	result_all
191		} else {
192		# Add subgroups results.
193	3x	l_data <- h_split_by_subgroups(
194	3x	data,
195	3x	variables$subgroups,
196	3x	groups_lists = groups_lists
197		)
198	3x	l_result <- lapply(l_data, function(grp) {
199	15x	result <- h_logistic_mult_cont_df(
200	15x	variables = variables,
201	15x	data = grp$df,
202	15x	control = control
203		)
204	15x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
205	15x	cbind(result, result_labels)
206		})
207	3x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
208	3x	result_subgroups$row_type <- "analysis"
209	3x	rbind(
210	3x	result_all,
211	3x	result_subgroups
212		)
213		}
214		}

1		#' Compare Variables Between Groups
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Comparison with a reference group for different `x` objects.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @note
10		#' * For factor variables, `denom` for factor proportions can only be `n` since the purpose is to compare proportions
11		#' between columns, therefore a row-based proportion would not make sense. Proportion based on `N_col` would
12		#' be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted
13		#' for as explicit factor levels.
14		#' * For character variables, automatic conversion to factor does not guarantee that the table
15		#' will be generated correctly. In particular for sparse tables this very likely can fail.
16		#' Therefore it is always better to manually convert character variables to factors during pre-processing.
17		#' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison
18		#' is well defined.
19		#' * When factor variables contains `NA`, it is expected that `NA` values have been conveyed to `na_level`
20		#' appropriately beforehand via [df_explicit_na()].
21		#'
22		#' @seealso Relevant constructor function [create_afun_compare()], and [s_summary()] which is used internally
23		#' to compute a summary within `s_compare()`.
24		#'
25		#' @name compare_variables
26		#' @include summarize_variables.R
27		NULL
28
29		#' @describeIn compare_variables S3 generic function to produce a comparison summary.
30		#'
31		#' @return
32		#' * `s_compare()` returns output of [s_summary()] and comparisons versus the reference group in the form of p-values.
33		#'
34		#' @export
35		s_compare <- function(x,
36		.ref_group,
37		.in_ref_col,
38		...) {
39	9x	UseMethod("s_compare", x)
40		}
41
42		#' @describeIn compare_variables Method for `numeric` class. This uses the standard t-test
43		#' to calculate the p-value.
44		#'
45		#' @method s_compare numeric
46		#'
47		#' @examples
48		#' # `s_compare.numeric`
49		#'
50		#' ## Usual case where both this and the reference group vector have more than 1 value.
51		#' s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE)
52		#'
53		#' ## If one group has not more than 1 value, then p-value is not calculated.
54		#' s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE)
55		#'
56		#' ## Empty numeric does not fail, it returns NA-filled items and no p-value.
57		#' s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE)
58		#'
59		#' @export
60		s_compare.numeric <- function(x,
61		.ref_group,
62		.in_ref_col,
63		...) {
64	2x	checkmate::assert_numeric(x)
65	2x	checkmate::assert_numeric(.ref_group)
66	2x	checkmate::assert_flag(.in_ref_col)
67
68	2x	y <- s_summary.numeric(x = x, ...)
69
70	2x	y$pval <- if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) {
71	1x	stats::t.test(x, .ref_group)$p.value
72		} else {
73	1x	character()
74		}
75
76	2x	y
77		}
78
79		#' @describeIn compare_variables Method for `factor` class. This uses the chi-squared test
80		#' to calculate the p-value.
81		#'
82		#' @param denom (`string`)\cr choice of denominator for factor proportions,
83		#' can only be `n` (number of values in this row and column intersection).
84		#'
85		#' @method s_compare factor
86		#'
87		#' @examples
88		#' # `s_compare.factor`
89		#'
90		#' ## Basic usage:
91		#' x <- factor(c("a", "a", "b", "c", "a"))
92		#' y <- factor(c("a", "b", "c"))
93		#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE)
94		#'
95		#' ## Management of NA values.
96		#' x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA)))
97		#' y <- explicit_na(factor(c("a", "b", "c", NA)))
98		#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
99		#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
100		#'
101		#' @export
102		s_compare.factor <- function(x,
103		.ref_group,
104		.in_ref_col,
105		denom = "n",
106		na.rm = TRUE, # nolint
107		na_level = "<Missing>",
108		...) {
109	3x	checkmate::assert_flag(.in_ref_col)
110	3x	assert_valid_factor(x, any.missing = FALSE)
111	3x	assert_valid_factor(.ref_group, any.missing = FALSE)
112	3x	denom <- match.arg(denom)
113
114	3x	y <- s_summary.factor(
115	3x	x = x,
116	3x	denom = denom,
117	3x	na.rm = na.rm,
118	3x	na_level = na_level,
119		...
120		)
121
122	3x	if (na.rm) {
123	3x	x <- fct_discard(x, na_level)
124	3x	.ref_group <- fct_discard(.ref_group, na_level)
125		}
126
127	3x	checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2)
128
129	3x	y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
130	3x	tab <- rbind(table(x), table(.ref_group))
131	3x	res <- suppressWarnings(stats::chisq.test(tab))
132	3x	res$p.value
133		} else {
134	!	character()
135		}
136
137	3x	y
138		}
139
140		#' @describeIn compare_variables Method for `character` class. This makes an automatic
141		#' conversion to `factor` (with a warning) and then forwards to the method for factors.
142		#'
143		#' @param verbose (`logical`)\cr Whether warnings and messages should be printed. Mainly used
144		#' to print out information about factor casting. Defaults to `TRUE`.
145		#'
146		#' @method s_compare character
147		#'
148		#' @examples
149		#' # `s_compare.character`
150		#'
151		#' ## Basic usage:
152		#' x <- c("a", "a", "b", "c", "a")
153		#' y <- c("a", "b", "c")
154		#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
155		#'
156		#' ## Note that missing values handling can make a large difference:
157		#' x <- c("a", "a", "b", "c", "a", NA)
158		#' y <- c("a", "b", "c", rep(NA, 20))
159		#' s_compare(x,
160		#' .ref_group = y, .in_ref_col = FALSE,
161		#' .var = "x", verbose = FALSE
162		#' )
163		#' s_compare(x,
164		#' .ref_group = y, .in_ref_col = FALSE, .var = "x",
165		#' na.rm = FALSE, verbose = FALSE
166		#' )
167		#'
168		#' @export
169		s_compare.character <- function(x,
170		.ref_group,
171		.in_ref_col,
172		denom = "n",
173		na.rm = TRUE, # nolint
174		na_level = "<Missing>",
175		.var,
176		verbose = TRUE,
177		...) {
178	1x	x <- as_factor_keep_attributes(x, x_name = .var, na_level = na_level, verbose = verbose)
179	1x	.ref_group <- as_factor_keep_attributes(.ref_group, x_name = .var, na_level = na_level, verbose = verbose)
180	1x	s_compare(
181	1x	x = x,
182	1x	.ref_group = .ref_group,
183	1x	.in_ref_col = .in_ref_col,
184	1x	denom = denom,
185	1x	na.rm = na.rm,
186	1x	na_level = na_level,
187		...
188		)
189		}
190
191		#' @describeIn compare_variables Method for `logical` class. A chi-squared test
192		#' is used. If missing values are not removed, then they are counted as `FALSE`.
193		#'
194		#' @method s_compare logical
195		#'
196		#' @examples
197		#' # `s_compare.logical`
198		#'
199		#' ## Basic usage:
200		#' x <- c(TRUE, FALSE, TRUE, TRUE)
201		#' y <- c(FALSE, FALSE, TRUE)
202		#' s_compare(x, .ref_group = y, .in_ref_col = FALSE)
203		#'
204		#' ## Management of NA values.
205		#' x <- c(NA, TRUE, FALSE)
206		#' y <- c(NA, NA, NA, NA, FALSE)
207		#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
208		#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
209		#'
210		#' @export
211		s_compare.logical <- function(x,
212		.ref_group,
213		.in_ref_col,
214		na.rm = TRUE, # nolint
215		denom = "n",
216		...) {
217	3x	denom <- match.arg(denom)
218
219	3x	y <- s_summary.logical(
220	3x	x = x,
221	3x	na.rm = na.rm,
222	3x	denom = denom,
223		...
224		)
225
226	3x	if (na.rm) {
227	2x	x <- stats::na.omit(x)
228	2x	.ref_group <- stats::na.omit(.ref_group)
229		} else {
230	1x	x[is.na(x)] <- FALSE
231	1x	.ref_group[is.na(.ref_group)] <- FALSE
232		}
233
234	3x	y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
235	3x	x <- factor(x, levels = c(TRUE, FALSE))
236	3x	.ref_group <- factor(.ref_group, levels = c(TRUE, FALSE))
237	3x	tbl <- rbind(table(x), table(.ref_group))
238	3x	suppressWarnings(prop_chisq(tbl))
239		} else {
240	!	character()
241		}
242
243	3x	y
244		}
245
246		#' @describeIn compare_variables Formatted analysis function which is used as `afun`
247		#' in `compare_vars()`.
248		#'
249		#' @return
250		#' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()].
251		#'
252		#' @export
253		a_compare <- function(x,
254		.ref_group,
255		.in_ref_col,
256		...,
257		.var) {
258	!	UseMethod("a_compare", x)
259		}
260
261		#' @describeIn compare_variables Formatted analysis function method for `numeric` class.
262		#'
263		#' @examples
264		#' # `a_compare.numeric`
265		#' a_compare(
266		#' rnorm(10, 5, 1),
267		#' .ref_group = rnorm(20, -5, 1),
268		#' .in_ref_col = FALSE,
269		#' .var = "bla"
270		#' )
271		#'
272		#' @export
273		a_compare.numeric <- make_afun(
274		s_compare.numeric,
275		.formats = c(
276		.a_summary_numeric_formats,
277		pval = "x.xxxx \| (<0.0001)"
278		),
279		.labels = c(
280		.a_summary_numeric_labels,
281		pval = "p-value (t-test)"
282		),
283		.null_ref_cells = FALSE
284		)
285
286		.a_compare_counts_formats <- c(
287		.a_summary_counts_formats,
288		pval = "x.xxxx \| (<0.0001)"
289		)
290
291		.a_compare_counts_labels <- c(
292		pval = "p-value (chi-squared test)"
293		)
294
295		#' @describeIn compare_variables Formatted analysis function method for `factor` class.
296		#'
297		#' @examples
298		#' # `a_compare.factor`
299		#' # We need to ungroup `count` and `count_fraction` first so that the `rtables` formatting
300		#' # functions can be applied correctly.
301		#' afun <- make_afun(
302		#' getS3method("a_compare", "factor"),
303		#' .ungroup_stats = c("count", "count_fraction")
304		#' )
305		#' x <- factor(c("a", "a", "b", "c", "a"))
306		#' y <- factor(c("a", "a", "b", "c"))
307		#' afun(x, .ref_group = y, .in_ref_col = FALSE)
308		#'
309		#' @export
310		a_compare.factor <- make_afun(
311		s_compare.factor,
312		.formats = .a_compare_counts_formats,
313		.labels = .a_compare_counts_labels,
314		.null_ref_cells = FALSE
315		)
316
317		#' @describeIn compare_variables Formatted analysis function method for `character` class.
318		#'
319		#' @examples
320		#' # `a_compare.character`
321		#' afun <- make_afun(
322		#' getS3method("a_compare", "character"),
323		#' .ungroup_stats = c("count", "count_fraction")
324		#' )
325		#' x <- c("A", "B", "A", "C")
326		#' y <- c("B", "A", "C")
327		#' afun(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
328		#'
329		#' @export
330		a_compare.character <- make_afun(
331		s_compare.character,
332		.formats = .a_compare_counts_formats,
333		.labels = .a_compare_counts_labels,
334		.null_ref_cells = FALSE
335		)
336
337		#' @describeIn compare_variables Formatted analysis function method for `logical` class.
338		#'
339		#' @examples
340		#' # `a_compare.logical`
341		#' afun <- make_afun(
342		#' getS3method("a_compare", "logical")
343		#' )
344		#' x <- c(TRUE, FALSE, FALSE, TRUE, TRUE)
345		#' y <- c(TRUE, FALSE)
346		#' afun(x, .ref_group = y, .in_ref_col = FALSE)
347		#'
348		#' @export
349		a_compare.logical <- make_afun(
350		s_compare.logical,
351		.formats = .a_compare_counts_formats,
352		.labels = .a_compare_counts_labels,
353		.null_ref_cells = FALSE
354		)
355
356		#' Constructor Function for [compare_vars()]
357		#'
358		#' @description `r lifecycle::badge("stable")`
359		#'
360		#' Constructor function which creates a combined formatted analysis function.
361		#'
362		#' @inheritParams argument_convention
363		#'
364		#' @return Combined formatted analysis function for use in [compare_vars()].
365		#'
366		#' @note Since [a_compare()] is generic and we want customization of the formatting arguments
367		#' via [rtables::make_afun()], we need to create another temporary generic function, with
368		#' corresponding customized methods. Then in order for the methods to be found,
369		#' we need to wrap them in a combined `afun`. Since this is required by two layout creating
370		#' functions (and possibly others in the future), we provide a constructor that does this:
371		#' [create_afun_compare()].
372		#'
373		#' @seealso [compare_vars()]
374		#'
375		#' @examples
376		#' # `create_afun_compare()` to create combined `afun`
377		#'
378		#' afun <- create_afun_compare(
379		#' .stats = c("n", "count_fraction", "mean_sd", "pval"),
380		#' .indent_mods = c(pval = 1L)
381		#' )
382		#'
383		#' lyt <- basic_table() %>%
384		#' split_cols_by("ARMCD", ref_group = "ARM A") %>%
385		#' analyze(
386		#' "AGE",
387		#' afun = afun,
388		#' show_labels = "visible"
389		#' )
390		#' build_table(lyt, df = tern_ex_adsl)
391		#'
392		#' lyt <- basic_table() %>%
393		#' split_cols_by("ARMCD", ref_group = "ARM A") %>%
394		#' analyze(
395		#' "SEX",
396		#' afun = afun,
397		#' show_labels = "visible"
398		#' )
399		#' build_table(lyt, df = tern_ex_adsl)
400		#'
401		#' @export
402		create_afun_compare <- function(.stats = NULL,
403		.formats = NULL,
404		.labels = NULL,
405		.indent_mods = NULL) {
406	2x	function(x,
407	2x	.ref_group,
408	2x	.in_ref_col,
409		...,
410	2x	.var) {
411	12x	afun <- function(x, ...) {
412	12x	UseMethod("afun", x)
413		}
414
415	12x	numeric_stats <- afun_selected_stats(
416	12x	.stats,
417	12x	all_stats = c(names(.a_summary_numeric_formats), "pval")
418		)
419	12x	afun.numeric <- make_afun( # nolint
420	12x	a_compare.numeric,
421	12x	.stats = numeric_stats,
422	12x	.formats = extract_by_name(.formats, numeric_stats),
423	12x	.labels = extract_by_name(.labels, numeric_stats),
424	12x	.indent_mods = extract_by_name(.indent_mods, numeric_stats),
425	12x	.null_ref_cells = FALSE
426		)
427
428	12x	factor_stats <- afun_selected_stats(
429	12x	.stats,
430	12x	all_stats = names(.a_compare_counts_formats)
431		)
432	12x	ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction"))
433	12x	afun.factor <- make_afun( # nolint
434	12x	a_compare.factor,
435	12x	.stats = factor_stats,
436	12x	.formats = extract_by_name(.formats, factor_stats),
437	12x	.labels = extract_by_name(.labels, factor_stats),
438	12x	.indent_mods = extract_by_name(.indent_mods, factor_stats),
439	12x	.ungroup_stats = ungroup_stats,
440	12x	.null_ref_cells = FALSE
441		)
442
443	12x	afun.character <- make_afun( # nolint
444	12x	a_compare.character,
445	12x	.stats = factor_stats,
446	12x	.formats = extract_by_name(.formats, factor_stats),
447	12x	.labels = extract_by_name(.labels, factor_stats),
448	12x	.indent_mods = extract_by_name(.indent_mods, factor_stats),
449	12x	.ungroup_stats = ungroup_stats,
450	12x	.null_ref_cells = FALSE
451		)
452
453	12x	afun.logical <- make_afun( # nolint
454	12x	a_compare.logical,
455	12x	.stats = factor_stats,
456	12x	.formats = extract_by_name(.formats, factor_stats),
457	12x	.labels = extract_by_name(.labels, factor_stats),
458	12x	.indent_mods = extract_by_name(.indent_mods, factor_stats),
459	12x	.null_ref_cells = FALSE
460		)
461
462	12x	afun(
463	12x	x = x,
464	12x	.ref_group = .ref_group,
465	12x	.in_ref_col = .in_ref_col,
466		...,
467	12x	.var = .var
468		)
469		}
470		}
471
472		#' @describeIn compare_variables Layout-creating function which can take statistics function arguments
473		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
474		#'
475		#' @param ... arguments passed to `s_compare()`.
476		#'
477		#' @return
478		#' * `compare_vars()` returns a layout object suitable for passing to further layouting functions,
479		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
480		#' the statistics from `s_compare()` to the table layout.
481		#'
482		#' @examples
483		#' # `compare_vars()` in `rtables` pipelines
484		#'
485		#' ## Default output within a `rtables` pipeline.
486		#' lyt <- basic_table() %>%
487		#' split_cols_by("ARMCD", ref_group = "ARM B") %>%
488		#' compare_vars(c("AGE", "SEX"))
489		#' build_table(lyt, tern_ex_adsl)
490		#'
491		#' ## Select and format statistics output.
492		#' lyt <- basic_table() %>%
493		#' split_cols_by("ARMCD", ref_group = "ARM C") %>%
494		#' compare_vars(
495		#' vars = "AGE",
496		#' .stats = c("mean_sd", "pval"),
497		#' .formats = c(mean_sd = "xx.x, xx.x"),
498		#' .labels = c(mean_sd = "Mean, SD")
499		#' )
500		#' build_table(lyt, df = tern_ex_adsl)
501		#'
502		#' @export
503		compare_vars <- function(lyt,
504		vars,
505		var_labels = vars,
506		nested = TRUE,
507		...,
508		show_labels = "default",
509		table_names = vars,
510		.stats = c("n", "mean_sd", "count_fraction", "pval"),
511		.formats = NULL,
512		.labels = NULL,
513		.indent_mods = NULL) {
514	2x	afun <- create_afun_compare(.stats, .formats, .labels, .indent_mods)
515
516	2x	analyze(
517	2x	lyt = lyt,
518	2x	vars = vars,
519	2x	var_labels = var_labels,
520	2x	afun = afun,
521	2x	nested = nested,
522	2x	extra_args = list(...),
523	2x	inclNAs = TRUE,
524	2x	show_labels = show_labels,
525	2x	table_names = table_names
526		)
527		}

1		#' Occurrence Table Pruning
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Family of constructor and condition functions to flexibly prune occurrence tables.
6		#' The condition functions always return whether the row result is higher than the threshold.
7		#' Since they are of class [CombinationFunction()] they can be logically combined with other condition
8		#' functions.
9		#'
10		#' @note Since most table specifications are worded positively, we name our constructor and condition
11		#' functions positively, too. However, note that the result of [keep_rows()] says what
12		#' should be pruned, to conform with the [rtables::prune_table()] interface.
13		#'
14		#' @examples
15		#' \dontrun{
16		#' tab <- basic_table() %>%
17		#' split_cols_by("ARM") %>%
18		#' split_rows_by("RACE") %>%
19		#' split_rows_by("STRATA1") %>%
20		#' summarize_row_groups() %>%
21		#' summarize_vars("COUNTRY", .stats = "count_fraction") %>%
22		#' build_table(DM)
23		#' }
24		#'
25		#' @name prune_occurrences
26		NULL
27
28		#' @describeIn prune_occurrences Constructor for creating pruning functions based on
29		#' a row condition function. This removes all analysis rows (`TableRow`) that should be
30		#' pruned, i.e., don't fulfill the row condition. It removes the sub-tree if there are no
31		#' children left.
32		#'
33		#' @param row_condition (`CombinationFunction`)\cr condition function which works on individual
34		#' analysis rows and flags whether these should be kept in the pruned table.
35		#'
36		#' @return
37		#' * `keep_rows()` returns a pruning function that can be used with [rtables::prune_table()]
38		#' to prune an `rtables` table.
39		#'
40		#' @examples
41		#' \dontrun{
42		#' # `keep_rows`
43		#' is_non_empty <- !CombinationFunction(all_zero_or_na)
44		#' prune_table(tab, keep_rows(is_non_empty))
45		#' }
46		#'
47		#' @export
48		keep_rows <- function(row_condition) {
49	6x	checkmate::assert_function(row_condition)
50	6x	function(table_tree) {
51	2256x	if (inherits(table_tree, "TableRow")) {
52	1872x	return(!row_condition(table_tree))
53		}
54	384x	children <- tree_children(table_tree)
55	384x	identical(length(children), 0L)
56		}
57		}
58
59		#' @describeIn prune_occurrences Constructor for creating pruning functions based on
60		#' a condition for the (first) content row in leaf tables. This removes all leaf tables where
61		#' the first content row does not fulfill the condition. It does not check individual rows.
62		#' It then proceeds recursively by removing the sub tree if there are no children left.
63		#'
64		#' @param content_row_condition (`CombinationFunction`)\cr condition function which works on individual
65		#' first content rows of leaf tables and flags whether these leaf tables should be kept in the pruned table.
66		#'
67		#' @return
68		#' * `keep_content_rows()` returns a pruning function that checks the condition on the first content
69		#' row of leaf tables in the table.
70		#'
71		#' @examples
72		#' # `keep_content_rows`
73		#' # Internal function - has_count_in_cols
74		#' \dontrun{
75		#' more_than_twenty <- has_count_in_cols(atleast = 20L, col_names = names(tab))
76		#' prune_table(tab, keep_content_rows(more_than_twenty))
77		#' }
78		#'
79		#' @export
80		keep_content_rows <- function(content_row_condition) {
81	1x	checkmate::assert_function(content_row_condition)
82	1x	function(table_tree) {
83	166x	if (is_leaf_table(table_tree)) {
84	24x	content_row <- h_content_first_row(table_tree)
85	24x	return(!content_row_condition(content_row))
86		}
87	142x	if (inherits(table_tree, "DataRow")) {
88	120x	return(FALSE)
89		}
90	22x	children <- tree_children(table_tree)
91	22x	identical(length(children), 0L)
92		}
93		}
94
95		#' @describeIn prune_occurrences Constructor for creating condition functions on total counts in the specified columns.
96		#'
97		#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
98		#' @param ... arguments for row or column access, see [rtables_access]: either `col_names` (`character`) including
99		#' the names of the columns which should be used, or alternatively `col_indices` (`integer`) giving the indices
100		#' directly instead.
101		#'
102		#' @return
103		#' * `has_count_in_cols()` returns a condition function that sums the counts in the specified column.
104		#'
105		#' @examples
106		#' # Internal function - has_count_in_cols
107		#' \dontrun{
108		#' more_than_one <- has_count_in_cols(atleast = 1L, col_names = names(tab))
109		#' prune_table(tab, keep_rows(more_than_one))
110		#' }
111		#'
112		#' @keywords internal
113		has_count_in_cols <- function(atleast, ...) {
114	3x	checkmate::assert_count(atleast)
115	3x	CombinationFunction(function(table_row) {
116	334x	row_counts <- h_row_counts(table_row, ...)
117	334x	total_count <- sum(row_counts)
118	334x	total_count >= atleast
119		})
120		}
121
122		#' @describeIn prune_occurrences Constructor for creating condition functions on any of the counts in
123		#' the specified columns satisfying a threshold.
124		#'
125		#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
126		#'
127		#' @return
128		#' * `has_count_in_any_col()` returns a condition function that compares the counts in the
129		#' specified columns with the threshold.
130		#'
131		#' @examples
132		#' \dontrun{
133		#' # `has_count_in_any_col`
134		#' any_more_than_one <- has_count_in_any_col(atleast = 1L, col_names = names(tab))
135		#' prune_table(tab, keep_rows(any_more_than_one))
136		#' }
137		#'
138		#' @export
139		has_count_in_any_col <- function(atleast, ...) {
140	!	checkmate::assert_count(atleast)
141	!	CombinationFunction(function(table_row) {
142	!	row_counts <- h_row_counts(table_row, ...)
143	!	any(row_counts >= atleast)
144		})
145		}
146
147		#' @describeIn prune_occurrences Constructor for creating condition functions on total fraction in
148		#' the specified columns.
149		#'
150		#' @return
151		#' * `has_fraction_in_cols()` returns a condition function that sums the counts in the
152		#' specified column, and computes the fraction by dividing by the total column counts.
153		#'
154		#' @examples
155		#' \dontrun{
156		#' # `has_fraction_in_cols`
157		#' more_than_five_percent <- has_fraction_in_cols(atleast = 0.05, col_names = names(tab))
158		#' prune_table(tab, keep_rows(more_than_five_percent))
159		#' }
160		#'
161		#' @export
162		has_fraction_in_cols <- function(atleast, ...) {
163	1x	assert_proportion_value(atleast, include_boundaries = TRUE)
164	1x	CombinationFunction(function(table_row) {
165	303x	row_counts <- h_row_counts(table_row, ...)
166	303x	total_count <- sum(row_counts)
167	303x	col_counts <- h_col_counts(table_row, ...)
168	303x	total_n <- sum(col_counts)
169	303x	total_percent <- total_count / total_n
170	303x	total_percent >= atleast
171		})
172		}
173
174		#' @describeIn prune_occurrences Constructor for creating condition functions on any fraction in
175		#' the specified columns.
176		#'
177		#' @return
178		#' * `has_fraction_in_any_col()` returns a condition function that looks at the fractions
179		#' in the specified columns and checks whether any of them fulfill the threshold.
180		#'
181		#' @examples
182		#' \dontrun{
183		#' # `has_fraction_in_any_col`
184		#' any_atleast_five_percent <- has_fraction_in_any_col(atleast = 0.05, col_names = names(tab))
185		#' prune_table(tab, keep_rows(more_than_five_percent))
186		#' }
187		#'
188		#' @export
189		has_fraction_in_any_col <- function(atleast, ...) {
190	!	assert_proportion_value(atleast, include_boundaries = TRUE)
191	!	CombinationFunction(function(table_row) {
192	!	row_fractions <- h_row_fractions(table_row, ...)
193	!	any(row_fractions >= atleast)
194		})
195		}
196
197		#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
198		#' between the fractions reported in each specified column.
199		#'
200		#' @return
201		#' * `has_fractions_difference()` returns a condition function that extracts the fractions of each
202		#' specified column, and computes the difference of the minimum and maximum.
203		#'
204		#' @examples
205		#' \dontrun{
206		#' # `has_fractions_difference`
207		#' more_than_five_percent_diff <- has_fractions_difference(atleast = 0.05, col_names = names(tab))
208		#' prune_table(tab, keep_rows(more_than_five_percent_diff))
209		#' }
210		#'
211		#' @export
212		has_fractions_difference <- function(atleast, ...) {
213	1x	assert_proportion_value(atleast, include_boundaries = TRUE)
214	1x	CombinationFunction(function(table_row) {
215	243x	fractions <- h_row_fractions(table_row, ...)
216	243x	difference <- diff(range(fractions))
217	243x	difference >= atleast
218		})
219		}
220
221		#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
222		#' between the counts reported in each specified column.
223		#'
224		#' @return
225		#' * `has_counts_difference()` returns a condition function that extracts the counts of each
226		#' specified column, and computes the difference of the minimum and maximum.
227		#'
228		#' @examples
229		#' # Internal function - has_counts_difference
230		#' \dontrun{
231		#' more_than_one_diff <- has_counts_difference(atleast = 1L, col_names = names(tab))
232		#' prune_table(tab, keep_rows(more_than_one_diff))
233		#' }
234		#'
235		#' @keywords internal
236		has_counts_difference <- function(atleast, ...) {
237	1x	checkmate::assert_count(atleast)
238	1x	CombinationFunction(function(table_row) {
239	27x	counts <- h_row_counts(table_row, ...)
240	27x	difference <- diff(range(counts))
241	27x	difference >= atleast
242		})
243		}

1		#' Patient Counts with Abnormal Range Values by Baseline Status
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Primary analysis variable `.var` indicates the abnormal range result (`character` or `factor`), and additional
6		#' analysis variables are `id` (`character` or `factor`) and `baseline` (`character` or `factor`). For each
7		#' direction specified in `abnormal` (e.g. high or low) we condition on baseline range result and count
8		#' patients in the numerator and denominator as follows:
9		#' * `Not <Abnormal>`
10		#' * `denom`: the number of patients without abnormality at baseline (excluding those with missing baseline)
11		#' * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
12		#' * `<Abnormal>`
13		#' * `denom`: the number of patients with abnormality at baseline
14		#' * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
15		#' * `Total`
16		#' * `denom`: the number of patients with at least one valid measurement post-baseline
17		#' * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
18		#'
19		#' @inheritParams argument_convention
20		#' @param abnormal (`character`)\cr identifying the abnormal range level(s) in `.var`.
21		#'
22		#' @note
23		#' * `df` should be filtered to include only post-baseline records.
24		#' * If the baseline variable or analysis variable contains `NA`, it is expected that `NA` has been
25		#' conveyed to `na_level` appropriately beforehand with [df_explicit_na()] or [explicit_na()].
26		#'
27		#' @seealso Relevant description function [d_count_abnormal_by_baseline()].
28		#'
29		#' @name abnormal_by_baseline
30		NULL
31
32		#' Description Function for [s_count_abnormal_by_baseline()]
33		#'
34		#' @description `r lifecycle::badge("stable")`
35		#'
36		#' Description function that produces the labels for [s_count_abnormal_by_baseline()].
37		#'
38		#' @inheritParams abnormal_by_baseline
39		#'
40		#' @return Abnormal category labels for [s_count_abnormal_by_baseline()].
41		#'
42		#' @examples
43		#' d_count_abnormal_by_baseline("LOW")
44		#'
45		#' @export
46		d_count_abnormal_by_baseline <- function(abnormal) {
47	7x	null_name <- paste0(toupper(substr(abnormal, 1, 1)), tolower(substring(abnormal, 2)))
48	7x	not_abn_name <- paste("Not", tolower(abnormal), "baseline status")
49	7x	abn_name <- paste(null_name, "baseline status")
50	7x	total_name <- "Total"
51
52	7x	list(
53	7x	not_abnormal = not_abn_name,
54	7x	abnormal = abn_name,
55	7x	total = total_name
56		)
57		}
58
59		#' @describeIn abnormal_by_baseline Statistics function for a single `abnormal` level.
60		#'
61		#' @param na_level (`string`)\cr the explicit `na_level` argument you used in the pre-processing steps (maybe with
62		#' [df_explicit_na()]). The default is `"<Missing>"`.
63		#'
64		#' @return
65		#' * `s_count_abnormal_by_baseline()` returns statistic `fraction` which is a named list with 3 labeled elements:
66		#' `not_abnormal`, `abnormal`, and `total`. Each element contains a vector with `num` and `denom` patient counts.
67		#'
68		#' @examples
69		#' df <- data.frame(
70		#' USUBJID = as.character(c(1:6)),
71		#' ANRIND = factor(c(rep("LOW", 4), "NORMAL", "HIGH")),
72		#' BNRIND = factor(c("LOW", "NORMAL", "HIGH", NA, "LOW", "NORMAL"))
73		#' )
74		#' df <- df_explicit_na(df)
75		#'
76		#' # Internal function - s_count_abnormal_by_baseline
77		#' \dontrun{
78		#' # Just for one abnormal level.
79		#' s_count_abnormal_by_baseline(df, .var = "ANRIND", abnormal = "HIGH")
80		#' }
81		#'
82		#' @keywords internal
83		s_count_abnormal_by_baseline <- function(df,
84		.var,
85		abnormal,
86		na_level = "<Missing>",
87		variables = list(id = "USUBJID", baseline = "BNRIND")) {
88	5x	checkmate::assert_string(.var)
89	5x	checkmate::assert_string(abnormal)
90	5x	checkmate::assert_string(na_level)
91	5x	assert_df_with_variables(df, c(range = .var, variables))
92	5x	checkmate::assert_subset(names(variables), c("id", "baseline"))
93	5x	checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
94	5x	checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
95	5x	checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
96
97		# If input is passed as character, changed to factor
98	5x	df[[.var]] <- as_factor_keep_attributes(df[[.var]], na_level = na_level)
99	5x	df[[variables$baseline]] <- as_factor_keep_attributes(df[[variables$baseline]], na_level = na_level)
100
101	5x	assert_valid_factor(df[[.var]], any.missing = FALSE)
102	4x	assert_valid_factor(df[[variables$baseline]], any.missing = FALSE)
103
104		# Keep only records with valid analysis value.
105	3x	df <- df[df[[.var]] != na_level, ]
106
107	3x	anl <- data.frame(
108	3x	id = df[[variables$id]],
109	3x	var = df[[.var]],
110	3x	baseline = df[[variables$baseline]],
111	3x	stringsAsFactors = FALSE
112		)
113
114		# Total:
115		# - Patients in denominator: have at least one valid measurement post-baseline.
116		# - Patients in numerator: have at least one abnormality.
117	3x	total_denom <- length(unique(anl$id))
118	3x	total_num <- length(unique(anl$id[anl$var == abnormal]))
119
120		# Baseline NA records are counted only in total rows.
121	3x	anl <- anl[anl$baseline != na_level, ]
122
123		# Abnormal:
124		# - Patients in denominator: have abnormality at baseline.
125		# - Patients in numerator: have abnormality at baseline AND
126		# have at least one abnormality post-baseline.
127	3x	abn_denom <- length(unique(anl$id[anl$baseline == abnormal]))
128	3x	abn_num <- length(unique(anl$id[anl$baseline == abnormal & anl$var == abnormal]))
129
130		# Not abnormal:
131		# - Patients in denominator: do not have abnormality at baseline.
132		# - Patients in numerator: do not have abnormality at baseline AND
133		# have at least one abnormality post-baseline.
134	3x	not_abn_denom <- length(unique(anl$id[anl$baseline != abnormal]))
135	3x	not_abn_num <- length(unique(anl$id[anl$baseline != abnormal & anl$var == abnormal]))
136
137	3x	labels <- d_count_abnormal_by_baseline(abnormal)
138	3x	list(fraction = list(
139	3x	not_abnormal = formatters::with_label(c(num = not_abn_num, denom = not_abn_denom), labels$not_abnormal),
140	3x	abnormal = formatters::with_label(c(num = abn_num, denom = abn_denom), labels$abnormal),
141	3x	total = formatters::with_label(c(num = total_num, denom = total_denom), labels$total)
142		))
143		}
144
145		#' @describeIn abnormal_by_baseline Formatted analysis function which is used as `afun`
146		#' in `count_abnormal_by_baseline()`.
147		#'
148		#' @return
149		#' * `a_count_abnormal_by_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
150		#'
151		#' @examples
152		#' # Internal function - a_count_abnormal_by_baseline
153		#' \dontrun{
154		#' # Use the Formatted Analysis function for `analyze()`. We need to ungroup `fraction` first
155		#' # so that the `rtables` formatting function `format_fraction()` can be applied correctly.
156		#' afun <- make_afun(a_count_abnormal_by_baseline, .ungroup_stats = "fraction")
157		#' afun(df, .var = "ANRIND", abnormal = "LOW")
158		#' }
159		#'
160		#' @keywords internal
161		a_count_abnormal_by_baseline <- make_afun(
162		s_count_abnormal_by_baseline,
163		.formats = c(fraction = format_fraction)
164		)
165
166		#' @describeIn abnormal_by_baseline Layout-creating function which can take statistics function arguments
167		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
168		#'
169		#' @return
170		#' * `count_abnormal_by_baseline()` returns a layout object suitable for passing to further layouting functions,
171		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
172		#' the statistics from `s_count_abnormal_by_baseline()` to the table layout.
173		#'
174		#' @examples
175		#' # Layout creating function.
176		#' basic_table() %>%
177		#' count_abnormal_by_baseline(var = "ANRIND", abnormal = c(High = "HIGH")) %>%
178		#' build_table(df)
179		#'
180		#' # Passing of statistics function and formatting arguments.
181		#' df2 <- data.frame(
182		#' ID = as.character(c(1, 2, 3, 4)),
183		#' RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
184		#' BLRANGE = factor(c("LOW", "HIGH", "HIGH", "NORMAL"))
185		#' )
186		#'
187		#' basic_table() %>%
188		#' count_abnormal_by_baseline(
189		#' var = "RANGE",
190		#' abnormal = c(Low = "LOW"),
191		#' variables = list(id = "ID", baseline = "BLRANGE"),
192		#' .formats = c(fraction = "xx / xx"),
193		#' .indent_mods = c(fraction = 2L)
194		#' ) %>%
195		#' build_table(df2)
196		#'
197		#' @export
198		count_abnormal_by_baseline <- function(lyt,
199		var,
200		abnormal,
201		...,
202		table_names = abnormal,
203		.stats = NULL,
204		.formats = NULL,
205		.labels = NULL,
206		.indent_mods = NULL) {
207	2x	checkmate::assert_character(abnormal, len = length(table_names), names = "named")
208	2x	checkmate::assert_string(var)
209	2x	afun <- make_afun(
210	2x	a_count_abnormal_by_baseline,
211	2x	.stats = .stats,
212	2x	.formats = .formats,
213	2x	.labels = .labels,
214	2x	.indent_mods = .indent_mods,
215	2x	.ungroup_stats = "fraction"
216		)
217	2x	for (i in seq_along(abnormal)) {
218	4x	abn <- abnormal[i]
219	4x	lyt <- analyze(
220	4x	lyt = lyt,
221	4x	vars = var,
222	4x	var_labels = names(abn),
223	4x	afun = afun,
224	4x	table_names = table_names[i],
225	4x	extra_args = c(list(abnormal = abn), list(...)),
226	4x	show_labels = "visible"
227		)
228		}
229	2x	lyt
230		}

1		#' Estimation of Proportions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Estimate the proportion of responders within a studied population.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @seealso [h_proportions]
10		#'
11		#' @name estimate_proportions
12		NULL
13
14		#' @describeIn estimate_proportions Statistics function estimating a
15		#' proportion along with its confidence interval.
16		#'
17		#' @inheritParams prop_strat_wilson
18		#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used,
19		#' it indicates whether each subject is a responder or not. `TRUE` represents
20		#' a successful outcome. If a `data.frame` is provided, also the `strata` variable
21		#' names must be provided in `variables` as a list element with the strata strings.
22		#' In the case of `data.frame`, the logical vector of responses must be indicated as a
23		#' variable name in `.var`.
24		#' @param method (`string`)\cr the method used to construct the confidence interval
25		#' for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`,
26		#' `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`.
27		#' @param long (`flag`)\cr a long description is required.
28		#'
29		#' @return
30		#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a
31		#' given variable.
32		#'
33		#' @examples
34		#' # Case with only logical vector.
35		#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0)
36		#' s_proportion(rsp_v)
37		#'
38		#' # Example for Stratified Wilson CI
39		#' nex <- 100 # Number of example rows
40		#' dta <- data.frame(
41		#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
42		#' "grp" = sample(c("A", "B"), nex, TRUE),
43		#' "f1" = sample(c("a1", "a2"), nex, TRUE),
44		#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
45		#' stringsAsFactors = TRUE
46		#' )
47		#'
48		#' s_proportion(
49		#' df = dta,
50		#' .var = "rsp",
51		#' variables = list(strata = c("f1", "f2")),
52		#' conf_level = 0.90,
53		#' method = "strat_wilson"
54		#' )
55		#'
56		#' @export
57		s_proportion <- function(df,
58		.var,
59		conf_level = 0.95,
60		method = c(
61		"waldcc", "wald", "clopper-pearson",
62		"wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
63		"agresti-coull", "jeffreys"
64		),
65		weights = NULL,
66		max_iterations = 50,
67		variables = list(strata = NULL),
68		long = FALSE) {
69	125x	method <- match.arg(method)
70	125x	checkmate::assert_flag(long)
71	125x	assert_proportion_value(conf_level)
72
73	125x	if (!is.null(variables$strata)) {
74		# Checks for strata
75	!	if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.")
76	!	strata_colnames <- variables$strata
77	!	checkmate::assert_character(strata_colnames, null.ok = FALSE)
78	!	strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
79	!	assert_df_with_variables(df, strata_vars)
80
81	!	strata <- interaction(df[strata_colnames])
82	!	strata <- as.factor(strata)
83
84		# Pushing down checks to prop_strat_wilson
85	125x	} else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) {
86	!	stop("To use stratified methods you need to specify the strata variables.")
87		}
88	125x	if (checkmate::test_atomic_vector(df)) {
89	125x	rsp <- as.logical(df)
90		} else {
91	!	rsp <- as.logical(df[[.var]])
92		}
93	125x	n <- sum(rsp)
94	125x	p_hat <- mean(rsp)
95
96	125x	prop_ci <- switch(method,
97	125x	"clopper-pearson" = prop_clopper_pearson(rsp, conf_level),
98	125x	"wilson" = prop_wilson(rsp, conf_level),
99	125x	"wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE),
100	125x	"strat_wilson" = prop_strat_wilson(rsp,
101	125x	strata,
102	125x	weights,
103	125x	conf_level,
104	125x	max_iterations,
105	125x	correct = FALSE
106	125x	)$conf_int,
107	125x	"strat_wilsonc" = prop_strat_wilson(rsp,
108	125x	strata,
109	125x	weights,
110	125x	conf_level,
111	125x	max_iterations,
112	125x	correct = TRUE
113	125x	)$conf_int,
114	125x	"wald" = prop_wald(rsp, conf_level),
115	125x	"waldcc" = prop_wald(rsp, conf_level, correct = TRUE),
116	125x	"agresti-coull" = prop_agresti_coull(rsp, conf_level),
117	125x	"jeffreys" = prop_jeffreys(rsp, conf_level)
118		)
119
120	125x	list(
121	125x	"n_prop" = formatters::with_label(c(n, p_hat), "Responders"),
122	125x	"prop_ci" = formatters::with_label(
123	125x	x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)
124		)
125		)
126		}
127
128		#' @describeIn estimate_proportions Formatted analysis function which is used as `afun`
129		#' in `estimate_proportion()`.
130		#'
131		#' @return
132		#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
133		#'
134		#' @export
135		a_proportion <- make_afun(
136		s_proportion,
137		.formats = c(n_prop = "xx (xx.x%)", prop_ci = "(xx.x, xx.x)")
138		)
139
140		#' @describeIn estimate_proportions Layout-creating function which can take statistics function arguments
141		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
142		#'
143		#' @param ... other arguments are ultimately conveyed to [s_proportion()].
144		#'
145		#' @return
146		#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions,
147		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
148		#' the statistics from `s_proportion()` to the table layout.
149		#'
150		#' @examples
151		#' dta_test <- data.frame(
152		#' USUBJID = paste0("S", 1:12),
153		#' ARM = rep(LETTERS[1:3], each = 4),
154		#' AVAL = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
155		#' )
156		#'
157		#' basic_table() %>%
158		#' split_cols_by("ARM") %>%
159		#' estimate_proportion(vars = "AVAL") %>%
160		#' build_table(df = dta_test)
161		#'
162		#' @export
163		estimate_proportion <- function(lyt,
164		vars,
165		...,
166		show_labels = "hidden",
167		table_names = vars,
168		.stats = NULL,
169		.formats = NULL,
170		.labels = NULL,
171		.indent_mods = NULL) {
172	3x	afun <- make_afun(
173	3x	a_proportion,
174	3x	.stats = .stats,
175	3x	.formats = .formats,
176	3x	.labels = .labels,
177	3x	.indent_mods = .indent_mods
178		)
179	3x	analyze(
180	3x	lyt,
181	3x	vars,
182	3x	afun = afun,
183	3x	extra_args = list(...),
184	3x	show_labels = show_labels,
185	3x	table_names = table_names
186		)
187		}
188
189		#' Helper Functions for Calculating Proportion Confidence Intervals
190		#'
191		#' @description `r lifecycle::badge("stable")`
192		#'
193		#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()].
194		#'
195		#' @inheritParams argument_convention
196		#' @inheritParams estimate_proportions
197		#'
198		#' @return Confidence interval of a proportion.
199		#'
200		#' @seealso [estimate_proportions], descriptive function [d_proportion()],
201		#' and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()].
202		#'
203		#' @name h_proportions
204		NULL
205
206		#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()].
207		#' Also referred to as Wilson score interval.
208		#'
209		#' @examples
210		#' rsp <- c(
211		#' TRUE, TRUE, TRUE, TRUE, TRUE,
212		#' FALSE, FALSE, FALSE, FALSE, FALSE
213		#' )
214		#' prop_wilson(rsp, conf_level = 0.9)
215		#'
216		#' @export
217		prop_wilson <- function(rsp, conf_level, correct = FALSE) {
218	5x	y <- stats::prop.test(
219	5x	sum(rsp),
220	5x	length(rsp),
221	5x	correct = correct,
222	5x	conf.level = conf_level
223		)
224
225	5x	as.numeric(y$conf.int)
226		}
227
228		#' @describeIn h_proportions Calculates the stratified Wilson confidence
229		#' interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern}
230		#'
231		#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
232		#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are
233		#' estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that
234		#' minimizes the weighted squared length of the confidence interval.
235		#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used
236		#' to find estimates of optimal weights.
237		#' @param correct (`flag`)\cr include the continuity correction. For further information, see for example
238		#' [stats::prop.test()].
239		#'
240		#' @references
241		#' - \insertRef{Yan2010-jt}{tern}
242		#'
243		#' @examples
244		#' # Stratified Wilson confidence interval with unequal probabilities
245		#'
246		#' set.seed(1)
247		#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
248		#' strata_data <- data.frame(
249		#' "f1" = sample(c("a", "b"), 100, TRUE),
250		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
251		#' stringsAsFactors = TRUE
252		#' )
253		#' strata <- interaction(strata_data)
254		#' n_strata <- ncol(table(rsp, strata)) # Number of strata
255		#'
256		#' prop_strat_wilson(
257		#' rsp = rsp, strata = strata,
258		#' conf_level = 0.90
259		#' )
260		#'
261		#' # Not automatic setting of weights
262		#' prop_strat_wilson(
263		#' rsp = rsp, strata = strata,
264		#' weights = rep(1 / n_strata, n_strata),
265		#' conf_level = 0.90
266		#' )
267		#'
268		#' @export
269		prop_strat_wilson <- function(rsp,
270		strata,
271		weights = NULL,
272		conf_level = 0.95,
273		max_iterations = NULL,
274		correct = FALSE) {
275	20x	checkmate::assert_logical(rsp, any.missing = FALSE)
276	20x	checkmate::assert_factor(strata, len = length(rsp))
277	20x	assert_proportion_value(conf_level)
278
279	20x	tbl <- table(rsp, strata)
280	20x	n_strata <- ncol(tbl)
281
282		# Checking the weights and maximum number of iterations.
283	20x	do_iter <- FALSE
284	20x	if (is.null(weights)) {
285	6x	weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
286	6x	do_iter <- TRUE
287
288		# Iteration parameters
289	2x	if (is.null(max_iterations)) max_iterations <- 10
290	6x	checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1)
291		}
292	20x	checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = ncol(tbl))
293	20x	checkmate::assert_int(sum(weights), lower = 1, upper = 1)
294
295
296	20x	xs <- tbl["TRUE", ]
297	20x	ns <- colSums(tbl)
298	20x	use_stratum <- (ns > 0)
299	20x	ns <- ns[use_stratum]
300	20x	xs <- xs[use_stratum]
301	20x	ests <- xs / ns
302	20x	vars <- ests * (1 - ests) / ns
303
304	20x	strata_qnorm <- strata_normal_quantile(vars, weights, conf_level)
305
306		# Iterative setting of weights if they were not set externally
307	20x	weights_new <- if (do_iter) {
308	6x	update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights
309		} else {
310	14x	weights
311		}
312
313	20x	strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1
314
315	20x	ci_by_strata <- Map(
316	20x	function(x, n) {
317		# Classic Wilson's confidence interval
318	139x	suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int)
319		},
320	20x	x = xs,
321	20x	n = ns
322		)
323	20x	lower_by_strata <- sapply(ci_by_strata, "[", 1L)
324	20x	upper_by_strata <- sapply(ci_by_strata, "[", 2L)
325
326	20x	lower <- sum(weights_new * lower_by_strata)
327	20x	upper <- sum(weights_new * upper_by_strata)
328
329		# Return values
330	20x	if (do_iter) {
331	6x	list(
332	6x	conf_int = c(
333	6x	lower = lower,
334	6x	upper = upper
335		),
336	6x	weights = weights_new
337		)
338		} else {
339	14x	list(
340	14x	conf_int = c(
341	14x	lower = lower,
342	14x	upper = upper
343		)
344		)
345		}
346		}
347
348		#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
349		#' Also referred to as the `exact` method.
350		#'
351		#' @examples
352		#' prop_clopper_pearson(rsp, conf_level = .95)
353		#'
354		#' @export
355		prop_clopper_pearson <- function(rsp,
356		conf_level) {
357	1x	y <- stats::binom.test(
358	1x	x = sum(rsp),
359	1x	n = length(rsp),
360	1x	conf.level = conf_level
361		)
362	1x	as.numeric(y$conf.int)
363		}
364
365		#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition
366		#' for a single proportion confidence interval using the normal approximation.
367		#'
368		#' @param correct (`flag`)\cr apply continuity correction.
369		#'
370		#' @examples
371		#' prop_wald(rsp, conf_level = 0.95)
372		#' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
373		#'
374		#' @export
375		prop_wald <- function(rsp, conf_level, correct = FALSE) {
376	122x	n <- length(rsp)
377	122x	p_hat <- mean(rsp)
378	122x	z <- stats::qnorm((1 + conf_level) / 2)
379	122x	q_hat <- 1 - p_hat
380	122x	correct <- if (correct) 1 / (2 * n) else 0
381
382	122x	err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct
383	122x	l_ci <- max(0, p_hat - err)
384	122x	u_ci <- min(1, p_hat + err)
385
386	122x	c(l_ci, u_ci)
387		}
388
389		#' @describeIn h_proportions Calculates the Agresti-Coull interval (created by Alan Agresti and Brent Coull) by
390		#' (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI.
391		#'
392		#' @examples
393		#' prop_agresti_coull(rsp, conf_level = 0.95)
394		#'
395		#' @export
396		prop_agresti_coull <- function(rsp, conf_level) {
397	2x	n <- length(rsp)
398	2x	x_sum <- sum(rsp)
399	2x	z <- stats::qnorm((1 + conf_level) / 2)
400
401		# Add here both z^2 / 2 successes and failures.
402	2x	x_sum_tilde <- x_sum + z^2 / 2
403	2x	n_tilde <- n + z^2
404
405		# Then proceed as with the Wald interval.
406	2x	p_tilde <- x_sum_tilde / n_tilde
407	2x	q_tilde <- 1 - p_tilde
408	2x	err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
409	2x	l_ci <- max(0, p_tilde - err)
410	2x	u_ci <- min(1, p_tilde + err)
411
412	2x	c(l_ci, u_ci)
413		}
414
415		#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the
416		#' non-informative Jeffreys prior for a binomial proportion.
417		#'
418		#' @examples
419		#' prop_jeffreys(rsp, conf_level = 0.95)
420		#'
421		#' @export
422		prop_jeffreys <- function(rsp,
423		conf_level) {
424	4x	n <- length(rsp)
425	4x	x_sum <- sum(rsp)
426
427	4x	alpha <- 1 - conf_level
428	4x	l_ci <- ifelse(
429	4x	x_sum == 0,
430	4x	0,
431	4x	stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
432		)
433
434	4x	u_ci <- ifelse(
435	4x	x_sum == n,
436	4x	1,
437	4x	stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
438		)
439
440	4x	c(l_ci, u_ci)
441		}
442
443		#' Description of the Proportion Summary
444		#'
445		#' @description `r lifecycle::badge("stable")`
446		#'
447		#' This is a helper function that describes the analysis in [s_proportion()].
448		#'
449		#' @inheritParams s_proportion
450		#' @param long (`flag`)\cr whether a long or a short (default) description is required.
451		#'
452		#' @return String describing the analysis.
453		#'
454		#' @export
455		d_proportion <- function(conf_level,
456		method,
457		long = FALSE) {
458	137x	label <- paste0(conf_level * 100, "% CI")
459
460	!	if (long) label <- paste(label, "for Response Rates")
461
462	137x	method_part <- switch(method,
463	137x	"clopper-pearson" = "Clopper-Pearson",
464	137x	"waldcc" = "Wald, with correction",
465	137x	"wald" = "Wald, without correction",
466	137x	"wilson" = "Wilson, without correction",
467	137x	"strat_wilson" = "Stratified Wilson, without correction",
468	137x	"wilsonc" = "Wilson, with correction",
469	137x	"strat_wilsonc" = "Stratified Wilson, with correction",
470	137x	"agresti-coull" = "Agresti-Coull",
471	137x	"jeffreys" = "Jeffreys",
472	137x	stop(paste(method, "does not have a description"))
473		)
474
475	137x	paste0(label, " (", method_part, ")")
476		}
477
478		#' Helper Function for the Estimation of Stratified Quantiles
479		#'
480		#' @description `r lifecycle::badge("stable")`
481		#'
482		#' This function wraps the estimation of stratified percentiles when we assume
483		#' the approximation for large numbers. This is necessary only in the case
484		#' proportions for each strata are unequal.
485		#'
486		#' @inheritParams argument_convention
487		#' @inheritParams prop_strat_wilson
488		#'
489		#' @return Stratified quantile.
490		#'
491		#' @seealso [prop_strat_wilson()]
492		#'
493		#' @examples
494		#' strata_data <- table(data.frame(
495		#' "f1" = sample(c(TRUE, FALSE), 100, TRUE),
496		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
497		#' stringsAsFactors = TRUE
498		#' ))
499		#' ns <- colSums(strata_data)
500		#' ests <- strata_data["TRUE", ] / ns
501		#' vars <- ests * (1 - ests) / ns
502		#' weights <- rep(1 / length(ns), length(ns))
503		#' strata_normal_quantile(vars, weights, 0.95)
504		#'
505		#' @export
506		strata_normal_quantile <- function(vars, weights, conf_level) {
507	42x	summands <- weights^2 * vars
508		# Stratified quantile
509	42x	sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2)
510		}
511
512		#' Helper Function for the Estimation of Weights for `prop_strat_wilson`
513		#'
514		#' @description `r lifecycle::badge("stable")`
515		#'
516		#' This function wraps the iteration procedure that allows you to estimate
517		#' the weights for each proportional strata. This assumes to minimize the
518		#' weighted squared length of the confidence interval.
519		#'
520		#' @inheritParams prop_strat_wilson
521		#' @param vars (`numeric`)\cr normalized proportions for each strata.
522		#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles.
523		#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
524		#' be optimized in the future if we need to estimate better initial weights.
525		#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
526		#' @param max_iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked.
527		#' @param tol (`number`)\cr tolerance threshold for convergence.
528		#'
529		#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
530		#'
531		#' @seealso For references and details see [prop_strat_wilson()].
532		#'
533		#' @examples
534		#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
535		#' sq <- 0.674
536		#' ws <- rep(1 / length(vs), length(vs))
537		#' ns <- c(22, 18, 17, 17, 14, 12)
538		#'
539		#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
540		#'
541		#' @export
542		update_weights_strat_wilson <- function(vars,
543		strata_qnorm,
544		initial_weights,
545		n_per_strata,
546		max_iterations = 50,
547		conf_level = 0.95,
548		tol = 0.001) {
549	8x	it <- 0
550	8x	diff_v <- NULL
551
552	8x	while (it < max_iterations) {
553	20x	it <- it + 1
554	20x	weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
555	20x	weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
556	20x	weights_new <- weights_new_t / weights_new_b
557	20x	weights_new <- weights_new / sum(weights_new)
558	20x	strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level)
559	20x	diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
560	8x	if (diff_v[length(diff_v)] < tol) break
561	12x	initial_weights <- weights_new
562		}
563
564	8x	if (it == max_iterations) {
565	!	warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations)
566		}
567
568	8x	list(
569	8x	"n_it" = it,
570	8x	"weights" = weights_new,
571	8x	"diff_v" = diff_v
572		)
573		}

1		#' Incidence Rate
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Estimate the event rate adjusted for person-years at risk, otherwise known
6		#' as incidence rate. Primary analysis variable is the person-years at risk.
7		#'
8		#' @inheritParams argument_convention
9		#' @param control (`list`)\cr parameters for estimation details, specified by using
10		#' the helper function [control_incidence_rate()]. Possible parameter options are:
11		#' * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate.
12		#' * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
13		#' for confidence interval type.
14		#' * `time_unit_input` (`string`)\cr `day`, `week`, `month`, or `year` (default)
15		#' indicating time unit for data input.
16		#' * `time_unit_output` (`numeric`)\cr time unit for desired output (in person-years).
17		#' @param person_years (`numeric`)\cr total person-years at risk.
18		#' @param alpha (`numeric`)\cr two-sided alpha-level for confidence interval.
19		#' @param n_events (`integer`)\cr number of events observed.
20		#'
21		#' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate].
22		#'
23		#' @name incidence_rate
24		NULL
25
26		#' @describeIn incidence_rate Statistics function which estimates the incidence rate and the
27		#' associated confidence interval.
28		#'
29		#' @return
30		#' * `s_incidence_rate()` returns the following statistics:
31		#' - `person_years`: Total person-years at risk.
32		#' - `n_events`: Total number of events observed.
33		#' - `rate`: Estimated incidence rate.
34		#' - `rate_ci`: Confidence interval for the incidence rate.
35		#'
36		#' @examples
37		#' library(dplyr)
38		#'
39		#' df <- data.frame(
40		#' USUBJID = as.character(seq(6)),
41		#' CNSR = c(0, 1, 1, 0, 0, 0),
42		#' AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4),
43		#' ARM = factor(c("A", "A", "A", "B", "B", "B"))
44		#' ) %>%
45		#' mutate(is_event = CNSR == 0) %>%
46		#' mutate(n_events = as.integer(is_event))
47		#'
48		#' # Internal function - s_incidence_rate
49		#' \dontrun{
50		#' s_incidence_rate(
51		#' df,
52		#' .var = "AVAL",
53		#' n_events = "n_events",
54		#' control = control_incidence_rate(
55		#' time_unit_input = "month",
56		#' time_unit_output = 100
57		#' )
58		#' )
59		#' }
60		#'
61		#' @keywords internal
62		s_incidence_rate <- function(df,
63		.var,
64		n_events,
65		is_event,
66		control = control_incidence_rate()) {
67	1x	if (!missing(is_event)) {
68	!	warning("argument is_event will be deprecated. Please use n_events.")
69
70	!	if (missing(n_events)) {
71	!	assert_df_with_variables(df, list(tte = .var, is_event = is_event))
72	!	checkmate::assert_string(.var)
73	!	checkmate::assert_logical(df[[is_event]], any.missing = FALSE)
74	!	checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
75	!	n_events <- is_event
76		}
77		} else {
78	1x	assert_df_with_variables(df, list(tte = .var, n_events = n_events))
79	1x	checkmate::assert_string(.var)
80	1x	checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
81	1x	checkmate::assert_integer(df[[n_events]], any.missing = FALSE)
82		}
83
84	1x	time_unit_input <- control$time_unit_input
85	1x	time_unit_output <- control$time_unit_output
86	1x	conf_level <- control$conf_level
87	1x	person_years <- sum(df[[.var]], na.rm = TRUE) * (
88	1x	1 * (time_unit_input == "year") +
89	1x	1 / 12 * (time_unit_input == "month") +
90	1x	1 / 52.14 * (time_unit_input == "week") +
91	1x	1 / 365.24 * (time_unit_input == "day")
92		)
93	1x	n_events <- sum(df[[n_events]], na.rm = TRUE)
94
95	1x	result <- h_incidence_rate(
96	1x	person_years,
97	1x	n_events,
98	1x	control
99		)
100	1x	list(
101	1x	person_years = formatters::with_label(person_years, "Total patient-years at risk"),
102	1x	n_events = formatters::with_label(n_events, "Number of adverse events observed"),
103	1x	rate = formatters::with_label(result$rate, paste("AE rate per", time_unit_output, "patient-years")),
104	1x	rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level))
105		)
106		}
107
108		#' @describeIn incidence_rate Formatted analysis function which is used as `afun`
109		#' in `estimate_incidence_rate()`.
110		#'
111		#' @return
112		#' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()].
113		#'
114		#' @examples
115		#' # Internal function - a_incidence_rate
116		#' \dontrun{
117		#' a_incidence_rate(
118		#' df,
119		#' .var = "AVAL",
120		#' n_events = "n_events",
121		#' control = control_incidence_rate(time_unit_input = "month", time_unit_output = 100)
122		#' )
123		#' }
124		#'
125		#' @keywords internal
126		a_incidence_rate <- make_afun(
127		s_incidence_rate,
128		.formats = c(
129		"person_years" = "xx.x",
130		"n_events" = "xx",
131		"rate" = "xx.xx",
132		"rate_ci" = "(xx.xx, xx.xx)"
133		)
134		)
135
136		#' @describeIn incidence_rate Layout-creating function which can take statistics function arguments
137		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
138		#'
139		#' @return
140		#' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions,
141		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
142		#' the statistics from `s_incidence_rate()` to the table layout.
143		#'
144		#' @examples
145		#' basic_table() %>%
146		#' split_cols_by("ARM") %>%
147		#' add_colcounts() %>%
148		#' estimate_incidence_rate(
149		#' vars = "AVAL",
150		#' n_events = "n_events",
151		#' control = control_incidence_rate(
152		#' time_unit_input = "month",
153		#' time_unit_output = 100
154		#' )
155		#' ) %>%
156		#' build_table(df)
157		#'
158		#' @export
159		estimate_incidence_rate <- function(lyt,
160		vars,
161		...,
162		show_labels = "hidden",
163		table_names = vars,
164		.stats = NULL,
165		.formats = NULL,
166		.labels = NULL,
167		.indent_mods = NULL) {
168	1x	afun <- make_afun(
169	1x	a_incidence_rate,
170	1x	.stats = .stats,
171	1x	.formats = .formats,
172	1x	.labels = .labels,
173	1x	.indent_mods = .indent_mods
174		)
175
176	1x	analyze(
177	1x	lyt,
178	1x	vars,
179	1x	show_labels = show_labels,
180	1x	table_names = table_names,
181	1x	afun = afun,
182	1x	extra_args = list(...)
183		)
184		}
185
186		#' Helper Functions for Incidence Rate
187		#'
188		#' @description `r lifecycle::badge("stable")`
189		#'
190		#' @param control (`list`)\cr parameters for estimation details, specified by using
191		#' the helper function [control_incidence_rate()]. Possible parameter options are:
192		#' * `conf_level`: (`proportion`)\cr confidence level for the estimated incidence rate.
193		#' * `conf_type`: (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
194		#' for confidence interval type.
195		#' * `time_unit_input`: (`string`)\cr `day`, `week`, `month`, or `year` (default)
196		#' indicating time unit for data input.
197		#' * `time_unit_output`: (`numeric`)\cr time unit for desired output (in person-years).
198		#' @param person_years (`numeric`)\cr total person-years at risk.
199		#' @param alpha (`numeric`)\cr two-sided alpha-level for confidence interval.
200		#' @param n_events (`integer`)\cr number of events observed.
201		#'
202		#' @return Estimated incidence rate `rate` and associated confidence interval `rate_ci`.
203		#'
204		#' @seealso [incidence_rate]
205		#'
206		#' @name h_incidence_rate
207		NULL
208
209		#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
210		#' associated confidence interval based on the normal approximation for the
211		#' incidence rate. Unit is one person-year.
212		#'
213		#' @examples
214		#' h_incidence_rate_normal(200, 2)
215		#'
216		#' @export
217		h_incidence_rate_normal <- function(person_years,
218		n_events,
219		alpha = 0.05) {
220	1x	checkmate::assert_number(person_years)
221	1x	checkmate::assert_number(n_events)
222	1x	assert_proportion_value(alpha)
223
224	1x	est <- n_events / person_years
225	1x	se <- sqrt(est / person_years)
226	1x	ci <- est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * se
227
228	1x	list(rate = est, rate_ci = ci)
229		}
230
231		#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
232		#' associated confidence interval based on the normal approximation for the
233		#' logarithm of the incidence rate. Unit is one person-year.
234		#'
235		#' @examples
236		#' h_incidence_rate_normal_log(200, 2)
237		#'
238		#' @export
239		h_incidence_rate_normal_log <- function(person_years,
240		n_events,
241		alpha = 0.05) {
242	5x	checkmate::assert_number(person_years)
243	5x	checkmate::assert_number(n_events)
244	5x	assert_proportion_value(alpha)
245
246	5x	rate_est <- n_events / person_years
247	5x	rate_se <- sqrt(rate_est / person_years)
248	5x	lrate_est <- log(rate_est)
249	5x	lrate_se <- rate_se / rate_est
250	5x	ci <- exp(lrate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * lrate_se)
251
252	5x	list(rate = rate_est, rate_ci = ci)
253		}
254
255		#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
256		#' associated exact confidence interval. Unit is one person-year.
257		#'
258		#' @examples
259		#' h_incidence_rate_exact(200, 2)
260		#'
261		#' @export
262		h_incidence_rate_exact <- function(person_years,
263		n_events,
264		alpha = 0.05) {
265	1x	checkmate::assert_number(person_years)
266	1x	checkmate::assert_number(n_events)
267	1x	assert_proportion_value(alpha)
268
269	1x	est <- n_events / person_years
270	1x	lcl <- stats::qchisq(p = (alpha) / 2, df = 2 * n_events) / (2 * person_years)
271	1x	ucl <- stats::qchisq(p = 1 - (alpha) / 2, df = 2 * n_events + 2) / (2 * person_years)
272
273	1x	list(rate = est, rate_ci = c(lcl, ucl))
274		}
275
276		#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
277		#' associated Byar's confidence interval. Unit is one person-year.
278		#'
279		#' @examples
280		#' h_incidence_rate_byar(200, 2)
281		#'
282		#' @export
283		h_incidence_rate_byar <- function(person_years,
284		n_events,
285		alpha = 0.05) {
286	1x	checkmate::assert_number(person_years)
287	1x	checkmate::assert_number(n_events)
288	1x	assert_proportion_value(alpha)
289
290	1x	est <- n_events / person_years
291	1x	seg_1 <- n_events + 0.5
292	1x	seg_2 <- 1 - 1 / (9 * (n_events + 0.5))
293	1x	seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3
294	1x	lcl <- seg_1 * ((seg_2 - seg_3)^3) / person_years
295	1x	ucl <- seg_1 * ((seg_2 + seg_3) ^ 3) / person_years # styler: off
296
297	1x	list(rate = est, rate_ci = c(lcl, ucl))
298		}
299
300		#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
301		#' associated confidence interval.
302		#'
303		#' @examples
304		#' # Internal function - h_incidence_rate
305		#' \dontrun{
306		#' h_incidence_rate(200, 2)
307		#'
308		#' h_incidence_rate(
309		#' 200,
310		#' 2,
311		#' control_incidence_rate(
312		#' conf_level = 0.9,
313		#' conf_type = "normal_log",
314		#' time_unit_output = 100
315		#' )
316		#' )
317		#' }
318		#'
319		#' @keywords internal
320		h_incidence_rate <- function(person_years,
321		n_events,
322		control = control_incidence_rate()) {
323	4x	alpha <- 1 - control$conf_level
324	4x	est <- switch(control$conf_type,
325	4x	normal = h_incidence_rate_normal(person_years, n_events, alpha),
326	4x	normal_log = h_incidence_rate_normal_log(person_years, n_events, alpha),
327	4x	exact = h_incidence_rate_exact(person_years, n_events, alpha),
328	4x	byar = h_incidence_rate_byar(person_years, n_events, alpha)
329		)
330
331	4x	time_unit_output <- control$time_unit_output
332	4x	list(
333	4x	rate = est$rate * time_unit_output,
334	4x	rate_ci = est$rate_ci * time_unit_output
335		)
336		}

1		#' Helper Functions for Cox Proportional Hazards Regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()].
6		#'
7		#' @inheritParams argument_convention
8		#' @inheritParams h_coxreg_univar_extract
9		#' @inheritParams cox_regression_inter
10		#' @inheritParams control_coxreg
11		#'
12		#' @seealso [cox_regression]
13		#'
14		#' @name h_cox_regression
15		NULL
16
17		#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used
18		#' internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models.
19		#'
20		#' @return
21		#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]).
22		#'
23		#' @examples
24		#' # `h_coxreg_univar_formulas`
25		#'
26		#' ## Simple formulas.
27		#' h_coxreg_univar_formulas(
28		#' variables = list(
29		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
30		#' )
31		#' )
32		#'
33		#' ## Addition of an optional strata.
34		#' h_coxreg_univar_formulas(
35		#' variables = list(
36		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
37		#' strata = "SITE"
38		#' )
39		#' )
40		#'
41		#' ## Inclusion of the interaction term.
42		#' h_coxreg_univar_formulas(
43		#' variables = list(
44		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
45		#' strata = "SITE"
46		#' ),
47		#' interaction = TRUE
48		#' )
49		#'
50		#' ## Only covariates fitted in separate models.
51		#' h_coxreg_univar_formulas(
52		#' variables = list(
53		#' time = "time", event = "status", covariates = c("X", "y")
54		#' )
55		#' )
56		#'
57		#' @export
58		h_coxreg_univar_formulas <- function(variables,
59		interaction = FALSE) {
60	38x	checkmate::assert_list(variables, names = "named")
61	38x	has_arm <- "arm" %in% names(variables)
62	38x	arm_name <- if (has_arm) "arm" else NULL
63
64	38x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
65
66	38x	checkmate::assert_flag(interaction)
67
68	38x	if (!has_arm \|\| is.null(variables$covariates)) {
69	10x	checkmate::assert_false(interaction)
70		}
71
72	36x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
73
74	36x	if (!is.null(variables$covariates)) {
75	35x	forms <- paste0(
76	35x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
77	35x	ifelse(has_arm, variables$arm, "1"),
78	35x	ifelse(interaction, " * ", " + "),
79	35x	variables$covariates,
80	35x	ifelse(
81	35x	!is.null(variables$strata),
82	35x	paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"),
83		""
84		)
85		)
86		} else {
87	1x	forms <- NULL
88		}
89	36x	nams <- variables$covariates
90	36x	if (has_arm) {
91	29x	ref <- paste0(
92	29x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
93	29x	variables$arm,
94	29x	ifelse(
95	29x	!is.null(variables$strata),
96	29x	paste0(
97	29x	" + strata(", paste0(variables$strata, collapse = ", "), ")"
98		),
99		""
100		)
101		)
102	29x	forms <- c(ref, forms)
103	29x	nams <- c("ref", nams)
104		}
105	36x	stats::setNames(forms, nams)
106		}
107
108		#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas
109		#' string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox
110		#' regression models. Interactions will not be included in multivariate Cox regression model.
111		#'
112		#' @return
113		#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]).
114		#'
115		#' @examples
116		#' # `h_coxreg_multivar_formula`
117		#'
118		#' h_coxreg_multivar_formula(
119		#' variables = list(
120		#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
121		#' )
122		#' )
123		#'
124		#' # Addition of an optional strata.
125		#' h_coxreg_multivar_formula(
126		#' variables = list(
127		#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
128		#' strata = "SITE"
129		#' )
130		#' )
131		#'
132		#' # Example without treatment arm.
133		#' h_coxreg_multivar_formula(
134		#' variables = list(
135		#' time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
136		#' strata = "SITE"
137		#' )
138		#' )
139		#'
140		#' @export
141		h_coxreg_multivar_formula <- function(variables) {
142	57x	checkmate::assert_list(variables, names = "named")
143	57x	has_arm <- "arm" %in% names(variables)
144	57x	arm_name <- if (has_arm) "arm" else NULL
145
146	57x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
147
148	57x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
149
150	57x	y <- paste0(
151	57x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
152	57x	ifelse(has_arm, variables$arm, "1")
153		)
154	57x	if (length(variables$covariates) > 0) {
155	18x	y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ")
156		}
157	57x	if (!is.null(variables$strata)) {
158	5x	y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")")
159		}
160	57x	y
161		}
162
163		#' @describeIn h_cox_regression Utility function to help tabulate the result of
164		#' a univariate Cox regression model.
165		#'
166		#' @param effect (`string`)\cr the treatment variable.
167		#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
168		#'
169		#' @return
170		#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`,
171		#' `n`, `hr`, `lcl`, `ucl`, and `pval`.
172		#'
173		#' @examples
174		#' library(survival)
175		#'
176		#' dta_simple <- data.frame(
177		#' time = c(5, 5, 10, 10, 5, 5, 10, 10),
178		#' status = c(0, 0, 1, 0, 0, 1, 1, 1),
179		#' armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
180		#' var1 = c(45, 55, 65, 75, 55, 65, 85, 75),
181		#' var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
182		#' )
183		#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
184		#' result <- h_coxreg_univar_extract(
185		#' effect = "armcd", covar = "armcd", mod = mod, data = dta_simple
186		#' )
187		#' result
188		#'
189		#' @export
190		h_coxreg_univar_extract <- function(effect,
191		covar,
192		data,
193		mod,
194		control = control_coxreg()) {
195	43x	checkmate::assert_string(covar)
196	43x	checkmate::assert_string(effect)
197	43x	checkmate::assert_class(mod, "coxph")
198	43x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
199
200	43x	mod_aov <- muffled_car_anova(mod, test_statistic)
201	43x	msum <- summary(mod, conf.int = control$conf_level)
202	43x	sum_cox <- broom::tidy(msum)
203
204		# Combine results together.
205	43x	effect_aov <- mod_aov[effect, , drop = TRUE]
206	43x	pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]]
207	43x	sum_main <- sum_cox[grepl(effect, sum_cox$level), ]
208
209	43x	term_label <- if (effect == covar) {
210	22x	paste0(
211	22x	levels(data[[covar]])[2],
212	22x	" vs control (",
213	22x	levels(data[[covar]])[1],
214		")"
215		)
216		} else {
217	21x	unname(labels_or_names(data[covar]))
218		}
219	43x	data.frame(
220	43x	effect = ifelse(covar == effect, "Treatment:", "Covariate:"),
221	43x	term = covar,
222	43x	term_label = term_label,
223	43x	level = levels(data[[effect]])[2],
224	43x	n = mod[["n"]],
225	43x	hr = unname(sum_main["exp(coef)"]),
226	43x	lcl = unname(sum_main[grep("lower", names(sum_main))]),
227	43x	ucl = unname(sum_main[grep("upper", names(sum_main))]),
228	43x	pval = pval,
229	43x	stringsAsFactors = FALSE
230		)
231		}
232
233		#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help
234		#' tabulate the result of a multivariate Cox regression model for a treatment/covariate variable.
235		#'
236		#' @return
237		#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`,
238		#' `n`, `term`, and `term_label`.
239		#'
240		#' @examples
241		#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
242		#' result <- h_coxreg_multivar_extract(
243		#' var = "var1", mod = mod, data = dta_simple
244		#' )
245		#' result
246		#'
247		#' @export
248		h_coxreg_multivar_extract <- function(var,
249		data,
250		mod,
251		control = control_coxreg()) {
252	76x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
253	76x	mod_aov <- muffled_car_anova(mod, test_statistic)
254
255	76x	msum <- summary(mod, conf.int = control$conf_level)
256	76x	sum_anova <- broom::tidy(mod_aov)
257	76x	sum_cox <- broom::tidy(msum)
258
259	76x	ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")]
260	76x	names(ret_anova)[2] <- "pval"
261	76x	if (is.factor(data[[var]])) {
262	29x	ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
263		} else {
264	47x	ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
265		}
266	76x	names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl")
267	76x	varlab <- unname(labels_or_names(data[var]))
268	76x	ret_cox$term <- varlab
269
270	76x	if (is.numeric(data[[var]])) {
271	47x	ret <- ret_cox
272	47x	ret$term_label <- ret$term
273	29x	} else if (length(levels(data[[var]])) <= 2) {
274	18x	ret_anova$pval <- NA
275	18x	ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
276	18x	ret_cox$level <- gsub(var, "", ret_cox$level)
277	18x	ret_cox$term_label <- ret_cox$level
278	18x	ret <- dplyr::bind_rows(ret_anova, ret_cox)
279		} else {
280	11x	ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
281	11x	ret_cox$level <- gsub(var, "", ret_cox$level)
282	11x	ret_cox$term_label <- ret_cox$level
283	11x	ret <- dplyr::bind_rows(ret_anova, ret_cox)
284		}
285
286	76x	as.data.frame(ret)
287		}

1		#' Add Titles, Footnotes, Page Number, and a Bounding Box to a Grid Grob
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This function is useful to label grid grobs (also `ggplot2`, and `lattice` plots)
6		#' with title, footnote, and page numbers.
7		#'
8		#' @inheritParams grid::grob
9		#' @param grob a grid grob object, optionally `NULL` if only a `grob` with the decoration should be shown.
10		#' @param titles vector of character strings. Vector elements are separated by a newline and strings are wrapped
11		#' according to the page width.
12		#' @param footnotes vector of character string. Same rules as for `titles`.
13		#' @param page string with page numeration, if `NULL` then no page number is displayed.
14		#' @param width_titles unit object
15		#' @param width_footnotes unit object
16		#' @param border boolean, whether a a border should be drawn around the plot or not.
17		#' @param margins unit object of length 4
18		#' @param padding unit object of length 4
19		#' @param outer_margins unit object of length 4
20		#' @param gp_titles a `gpar` object
21		#' @param gp_footnotes a `gpar` object
22		#'
23		#' @return A grid grob (`gTree`).
24		#'
25		#' @details The titles and footnotes will be ragged, i.e. each title will be wrapped individually.
26		#'
27		#' @examples
28		#' library(grid)
29		#'
30		#' titles <- c(
31		#' "Edgar Anderson's Iris Data",
32		#' paste(
33		#' "This famous (Fisher's or Anderson's) iris data set gives the measurements",
34		#' "in centimeters of the variables sepal length and width and petal length",
35		#' "and width, respectively, for 50 flowers from each of 3 species of iris."
36		#' )
37		#' )
38		#'
39		#' footnotes <- c(
40		#' "The species are Iris setosa, versicolor, and virginica.",
41		#' paste(
42		#' "iris is a data frame with 150 cases (rows) and 5 variables (columns) named",
43		#' "Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, and Species."
44		#' )
45		#' )
46		#'
47		#' ## empty plot
48		#' grid.newpage()
49		#'
50		#' grid.draw(
51		#' decorate_grob(
52		#' NULL,
53		#' titles = titles,
54		#' footnotes = footnotes,
55		#' page = "Page 4 of 10"
56		#' )
57		#' )
58		#'
59		#' # grid
60		#' p <- gTree(
61		#' children = gList(
62		#' rectGrob(),
63		#' xaxisGrob(),
64		#' yaxisGrob(),
65		#' textGrob("Sepal.Length", y = unit(-4, "lines")),
66		#' textGrob("Petal.Length", x = unit(-3.5, "lines"), rot = 90),
67		#' pointsGrob(iris$Sepal.Length, iris$Petal.Length, gp = gpar(col = iris$Species), pch = 16)
68		#' ),
69		#' vp = vpStack(plotViewport(), dataViewport(xData = iris$Sepal.Length, yData = iris$Petal.Length))
70		#' )
71		#' grid.newpage()
72		#' grid.draw(p)
73		#'
74		#' grid.newpage()
75		#' grid.draw(
76		#' decorate_grob(
77		#' grob = p,
78		#' titles = titles,
79		#' footnotes = footnotes,
80		#' page = "Page 6 of 129"
81		#' )
82		#' )
83		#'
84		#' ## with ggplot2
85		#' library(ggplot2)
86		#'
87		#' p_gg <- ggplot2::ggplot(iris, aes(Sepal.Length, Sepal.Width, col = Species)) +
88		#' ggplot2::geom_point()
89		#' p_gg
90		#' p <- ggplotGrob(p_gg)
91		#' grid.newpage()
92		#' grid.draw(
93		#' decorate_grob(
94		#' grob = p,
95		#' titles = titles,
96		#' footnotes = footnotes,
97		#' page = "Page 6 of 129"
98		#' )
99		#' )
100		#'
101		#' ## with lattice
102		#' library(lattice)
103		#'
104		#' xyplot(Sepal.Length ~ Petal.Length, data = iris, col = iris$Species)
105		#' p <- grid.grab()
106		#' grid.newpage()
107		#' grid.draw(
108		#' decorate_grob(
109		#' grob = p,
110		#' titles = titles,
111		#' footnotes = footnotes,
112		#' page = "Page 6 of 129"
113		#' )
114		#' )
115		#'
116		#' # with gridExtra - no borders
117		#' library(gridExtra)
118		#' grid.newpage()
119		#' grid.draw(
120		#' decorate_grob(
121		#' tableGrob(
122		#' head(mtcars)
123		#' ),
124		#' titles = "title",
125		#' footnotes = "footnote",
126		#' border = FALSE
127		#' )
128		#' )
129		#'
130		#' @export
131		decorate_grob <- function(grob,
132		titles,
133		footnotes,
134		page = "",
135		width_titles = grid::unit(1, "npc"),
136		width_footnotes = grid::unit(1, "npc") - grid::stringWidth(page),
137		border = TRUE,
138		margins = grid::unit(c(1, 0, 1, 0), "lines"),
139		padding = grid::unit(rep(1, 4), "lines"),
140		outer_margins = grid::unit(c(2, 1.5, 3, 1.5), "cm"),
141		gp_titles = grid::gpar(),
142		gp_footnotes = grid::gpar(fontsize = 8),
143		name = NULL,
144		gp = grid::gpar(),
145		vp = NULL) {
146	8x	st_titles <- split_text_grob(
147	8x	titles,
148	8x	x = 0, y = 1,
149	8x	just = c("left", "top"),
150	8x	width = width_titles,
151	8x	vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 1),
152	8x	gp = gp_titles
153		)
154
155	8x	st_footnotes <- split_text_grob(
156	8x	footnotes,
157	8x	x = 0, y = 1,
158	8x	just = c("left", "top"),
159	8x	width = width_footnotes,
160	8x	vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
161	8x	gp = gp_footnotes
162		)
163
164	8x	grid::gTree(
165	8x	grob = grob,
166	8x	titles = titles,
167	8x	footnotes = footnotes,
168	8x	page = page,
169	8x	width_titles = width_titles,
170	8x	width_footnotes = width_footnotes,
171	8x	border = border,
172	8x	margins = margins,
173	8x	padding = padding,
174	8x	outer_margins = outer_margins,
175	8x	gp_titles = gp_titles,
176	8x	gp_footnotes = gp_footnotes,
177	8x	children = grid::gList(
178	8x	grid::gTree(
179	8x	children = grid::gList(
180	8x	st_titles,
181	8x	grid::gTree(
182	8x	children = grid::gList(
183	8x	if (border) grid::rectGrob(),
184	8x	grid::gTree(
185	8x	children = grid::gList(
186	8x	grob
187		),
188	8x	vp = grid::plotViewport(margins = padding)
189		)
190		),
191	8x	vp = grid::vpStack(
192	8x	grid::viewport(layout.pos.row = 2, layout.pos.col = 1),
193	8x	grid::plotViewport(margins = margins)
194		)
195		),
196	8x	st_footnotes,
197	8x	grid::textGrob(
198	8x	page,
199	8x	x = 1, y = 0,
200	8x	just = c("right", "bottom"),
201	8x	vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
202	8x	gp = gp_footnotes
203		)
204		),
205	8x	childrenvp = NULL,
206	8x	name = "titles_grob_footnotes",
207	8x	vp = grid::vpStack(
208	8x	grid::plotViewport(margins = outer_margins),
209	8x	grid::viewport(
210	8x	layout = grid::grid.layout(
211	8x	nrow = 3, ncol = 1,
212	8x	heights = grid::unit.c(
213	8x	grid::grobHeight(st_titles),
214	8x	grid::unit(1, "null"),
215	8x	grid::grobHeight(st_footnotes)
216		)
217		)
218		)
219		)
220		)
221		),
222	8x	name = name,
223	8x	gp = gp,
224	8x	vp = vp,
225	8x	cl = "decoratedGrob"
226		)
227		}
228
229		#' @importFrom grid validDetails
230		#' @noRd
231		validDetails.decoratedGrob <- function(x) {
232	!	checkmate::assert_character(x$titles)
233	!	checkmate::assert_character(x$footnotes)
234
235	!	if (!is.null(x$grob)) {
236	!	checkmate::assert_true(grid::is.grob(x$grob))
237		}
238	!	if (length(x$page) == 1) {
239	!	checkmate::assert_character(x$page)
240		}
241	!	if (!grid::is.unit(x$outer_margins)) {
242	!	checkmate::assert_vector(x$outer_margins, len = 4)
243		}
244	!	if (!grid::is.unit(x$margins)) {
245	!	checkmate::assert_vector(x$margins, len = 4)
246		}
247	!	if (!grid::is.unit(x$padding)) {
248	!	checkmate::assert_vector(x$padding, len = 4)
249		}
250
251	!	x
252		}
253
254		#' @importFrom grid widthDetails
255		#' @noRd
256		widthDetails.decoratedGrob <- function(x) {
257	!	grid::unit(1, "null")
258		}
259
260		#' @importFrom grid heightDetails
261		#' @noRd
262		heightDetails.decoratedGrob <- function(x) {
263	!	grid::unit(1, "null")
264		}
265
266		# Adapted from Paul Murell R Graphics 2nd Edition
267		# https://www.stat.auckland.ac.nz/~paul/RG2e/interactgrid-splittext.R
268		split_string <- function(text, width) {
269	1x	availwidth <- grid::convertWidth(width, "in", valueOnly = TRUE)
270	1x	textwidth <- grid::convertWidth(grid::stringWidth(text), "in", valueOnly = TRUE)
271	1x	strings <- strsplit(text, " ")[[1]]
272
273	1x	if (textwidth <= availwidth \|\| length(strings) == 1) {
274	!	text
275		} else {
276	1x	gapwidth <- grid::stringWidth(" ")
277	1x	newstring <- strings[1]
278	1x	linewidth <- grid::stringWidth(newstring)
279
280	1x	for (i in 2:length(strings)) {
281	7x	str_width <- grid::stringWidth(strings[i])
282	7x	if (grid::convertWidth(linewidth + gapwidth + str_width, "in", valueOnly = TRUE) < availwidth) {
283	5x	sep <- " "
284	5x	linewidth <- linewidth + gapwidth + str_width
285		} else {
286	2x	sep <- "\n"
287	2x	linewidth <- str_width
288		}
289	7x	newstring <- paste(newstring, strings[i], sep = sep)
290		}
291	1x	newstring
292		}
293		}
294
295		#' Split Text According To Available Text Width
296		#'
297		#' Dynamically wrap text.
298		#'
299		#' @inheritParams grid::grid.text
300		#' @param text character string
301		#' @param width a unit object specifying max width of text
302		#'
303		#' @return A text grob.
304		#'
305		#' @details This code is taken from R Graphics by Paul Murell, 2nd edition
306		#'
307		#' @examples
308		#' # Internal function - split_text_grob
309		#' \dontrun{
310		#' sg <- split_text_grob(text = paste(
311		#' "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum vitae",
312		#' "dapibus dolor, ac mattis erat. Nunc metus lectus, imperdiet ut enim eu,",
313		#' "commodo scelerisque urna. Vestibulum facilisis metus vel nibh tempor, sed",
314		#' "elementum sem tempus. Morbi quis arcu condimentum, maximus lorem id,",
315		#' "tristique ante. Nullam a nunc dui. Fusce quis lacus nec ante dignissim",
316		#' "faucibus nec vitae tellus. Suspendisse mollis et sapien eu ornare. Vestibulum",
317		#' "placerat neque nec justo efficitur, ornare varius nulla imperdiet. Nunc justo",
318		#' "sapien, vestibulum eget efficitur eget, porttitor id ante. Nulla tempor",
319		#' "luctus massa id elementum. Praesent dictum, neque vitae vestibulum malesuada,",
320		#' "nunc nisi blandit lacus, sit amet tristique odio dui sit amet velit."
321		#' ))
322		#'
323		#' library(grid)
324		#' grobHeight(sg)
325		#'
326		#' grid.newpage()
327		#' pushViewport(plotViewport())
328		#' grid.rect()
329		#' grid.draw(sg)
330		#'
331		#' grid.rect(
332		#' height = grobHeight(sg), width = unit(1, "cm"), gp = gpar(fill = "red")
333		#' )
334		#'
335		#' # stack split_text_grob
336		#' grid.newpage()
337		#' pushViewport(plotViewport())
338		#' grid.rect()
339		#' grid.draw(split_text_grob(
340		#' c("Hello, this is a test", "and yet another test"),
341		#' just = c("left", "top"), x = 0, y = 1
342		#' ))
343		#' }
344		#'
345		#' @keywords internal
346		split_text_grob <- function(text,
347		x = grid::unit(0.5, "npc"),
348		y = grid::unit(0.5, "npc"),
349		width = grid::unit(1, "npc"),
350		just = "centre",
351		hjust = NULL,
352		vjust = NULL,
353		default.units = "npc", # nolint
354		name = NULL,
355		gp = grid::gpar(),
356		vp = NULL) {
357	16x	if (!grid::is.unit(x)) {
358	16x	x <- grid::unit(x, default.units)
359		}
360	16x	if (!grid::is.unit(y)) {
361	16x	y <- grid::unit(y, default.units)
362		}
363
364	16x	checkmate::assert_true(grid::is.unit(width))
365	16x	checkmate::assert_vector(width, len = 1)
366
367		## if it is a fixed unit then we do not need to recalculate when viewport resized
368	16x	if (!inherits(width, "unit.arithmetic") &&
369	16x	!is.null(attr(width, "unit")) &&
370	16x	attr(width, "unit") %in% c("cm", "inches", "mm", "points", "picas", "bigpts", "dida", "cicero", "scaledpts")) {
371	!	attr(text, "fixed_text") <- paste(vapply(text, split_string, character(1), width = width), collapse = "\n")
372		}
373
374	16x	grid::grob(
375	16x	text = text,
376	16x	x = x, y = y,
377	16x	width = width,
378	16x	just = just,
379	16x	hjust = hjust,
380	16x	vjust = vjust,
381	16x	rot = 0,
382	16x	check.overlap = FALSE,
383	16x	name = name,
384	16x	gp = gp,
385	16x	vp = vp,
386	16x	cl = "dynamicSplitText"
387		)
388		}
389
390		#' @importFrom grid validDetails
391		#' @noRd
392		validDetails.dynamicSplitText <- function(x) {
393	!	checkmate::assert_character(x$text)
394	!	checkmate::assert_true(grid::is.unit(x$width))
395	!	checkmate::assert_vector(x$width, len = 1)
396	!	x
397		}
398
399		#' @importFrom grid heightDetails
400		#' @noRd
401		heightDetails.dynamicSplitText <- function(x) {
402	!	txt <- if (!is.null(attr(x$text, "fixed_text"))) {
403	!	attr(x$text, "fixed_text")
404		} else {
405	!	paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
406		}
407	!	grid::stringHeight(txt)
408		}
409
410		#' @importFrom grid widthDetails
411		#' @noRd
412		widthDetails.dynamicSplitText <- function(x) {
413	!	x$width
414		}
415
416		#' @importFrom grid drawDetails
417		#' @noRd
418		drawDetails.dynamicSplitText <- function(x, recording) {
419	!	txt <- if (!is.null(attr(x$text, "fixed_text"))) {
420	!	attr(x$text, "fixed_text")
421		} else {
422	!	paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
423		}
424
425	!	x$width <- NULL
426	!	x$label <- txt
427	!	x$text <- NULL
428	!	class(x) <- c("text", class(x)[-1])
429
430	!	grid::grid.draw(x)
431		}
432
433		#' Update Page Number
434		#'
435		#' Automatically updates page number.
436		#'
437		#' @param npages number of pages in total
438		#' @param ... passed on to [decorate_grob()]
439		#'
440		#' @return Closure that increments the page number.
441		#'
442		#' @examples
443		#' # Internal function - decorate_grob_factory
444		#' \dontrun{
445		#' pf <- decorate_grob_factory(
446		#' titles = "This is a test\nHello World",
447		#' footnotes = "Here belong the footnotess",
448		#' npages = 3
449		#' )
450		#'
451		#' library(grid)
452		#' draw_grob(pf(NULL))
453		#' draw_grob(pf(NULL))
454		#' draw_grob(pf(NULL))
455		#' }
456		#'
457		#' @keywords internal
458		decorate_grob_factory <- function(npages, ...) {
459	2x	current_page <- 0
460	2x	function(grob) {
461	7x	current_page <<- current_page + 1
462	7x	if (current_page > npages) {
463	1x	stop(paste("current page is", current_page, "but max.", npages, "specified."))
464		}
465	6x	decorate_grob(grob = grob, page = paste("Page", current_page, "of", npages), ...)
466		}
467		}
468
469		#' Decorate Set of `grobs` and Add Page Numbering
470		#'
471		#' @description `r lifecycle::badge("stable")`
472		#'
473		#' Note that this uses the [decorate_grob_factory()] function.
474		#'
475		#' @param grobs a list of grid grobs
476		#' @param ... arguments passed on to [decorate_grob()].
477		#'
478		#' @return A decorated grob.
479		#'
480		#' @examples
481		#' library(ggplot2)
482		#' library(grid)
483		#' g <- with(data = iris, {
484		#' list(
485		#' ggplot2::ggplotGrob(
486		#' ggplot2::ggplot(mapping = aes(Sepal.Length, Sepal.Width, col = Species)) +
487		#' ggplot2::geom_point()
488		#' ),
489		#' ggplot2::ggplotGrob(
490		#' ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Length, col = Species)) +
491		#' ggplot2::geom_point()
492		#' ),
493		#' ggplot2::ggplotGrob(
494		#' ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Width, col = Species)) +
495		#' ggplot2::geom_point()
496		#' ),
497		#' ggplot2::ggplotGrob(
498		#' ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Length, col = Species)) +
499		#' ggplot2::geom_point()
500		#' ),
501		#' ggplot2::ggplotGrob(
502		#' ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Width, col = Species)) +
503		#' ggplot2::geom_point()
504		#' ),
505		#' ggplot2::ggplotGrob(
506		#' ggplot2::ggplot(mapping = aes(Petal.Length, Petal.Width, col = Species)) +
507		#' ggplot2::geom_point()
508		#' )
509		#' )
510		#' })
511		#' lg <- decorate_grob_set(grobs = g, titles = "Hello\nOne\nTwo\nThree", footnotes = "")
512		#'
513		#' draw_grob(lg[[1]])
514		#' draw_grob(lg[[2]])
515		#' draw_grob(lg[[6]])
516		#'
517		#' @export
518		decorate_grob_set <- function(grobs, ...) {
519	1x	n <- length(grobs)
520	1x	lgf <- decorate_grob_factory(npages = n, ...)
521	1x	lapply(grobs, lgf)
522		}

1		#' Re-implemented [range()] Default S3 method for numerical objects
2		#'
3		#' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data
4		#' without any warnings.
5		#'
6		#' @param x (`numeric`)\cr a sequence of numbers for which the range is computed.
7		#' @param na.rm (`logical`)\cr indicating if `NA` should be omitted.
8		#' @param finite (`logical`)\cr indicating if non-finite elements should be removed.
9		#'
10		#' @return A 2-element vector of class `numeric`.
11		#'
12		#' @examples
13		#' # Internal function - range_noinf
14		#' \dontrun{
15		#' range_noinf(1:5)
16		#' range_noinf(c(1:5, NA, NA), na.rm = TRUE)
17		#' range_noinf(numeric(), na.rm = TRUE)
18		#' range_noinf(c(1:5, NA, NA, Inf), na.rm = TRUE, finite = TRUE)
19		#' range_noinf(Inf)
20		#' range_noinf(Inf, na.rm = TRUE, finite = TRUE)
21		#' range_noinf(c(Inf, NA), na.rm = FALSE, finite = TRUE)
22		#' range_noinf(c(1, Inf, NA), na.rm = FALSE, finite = TRUE)
23		#' }
24		#'
25		#' @keywords internal
26		range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint
27
28	652x	checkmate::assert_numeric(x)
29
30	652x	if (finite) {
31	24x	x <- x[is.finite(x)] # removes NAs too
32	628x	} else if (na.rm) {
33	468x	x <- x[!is.na(x)]
34		}
35
36	652x	if (length(x) == 0) {
37	44x	rval <- c(NA, NA)
38	44x	mode(rval) <- typeof(x)
39		} else {
40	608x	rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE))
41		}
42
43	652x	return(rval)
44		}
45
46		#' Utility function to create label for confidence interval
47		#'
48		#' @description `r lifecycle::badge("stable")`
49		#'
50		#' @inheritParams argument_convention
51		#'
52		#' @return A `string`.
53		#'
54		#' @export
55		f_conf_level <- function(conf_level) {
56	755x	assert_proportion_value(conf_level)
57	753x	paste0(conf_level * 100, "% CI")
58		}
59
60		#' Utility function to create label for p-value
61		#'
62		#' @description `r lifecycle::badge("stable")`
63		#'
64		#' @param test_mean (`number`)\cr mean value to test under the null hypothesis.
65		#'
66		#' @return A `string`.
67		#'
68		#' @export
69		f_pval <- function(test_mean) {
70	151x	checkmate::assert_numeric(test_mean, len = 1)
71	149x	paste0("p-value (H0: mean = ", test_mean, ")")
72		}
73
74		#' Utility function to return a named list of covariate names.
75		#'
76		#' @param covariates (`character`)\cr a vector that can contain single variable names (such as
77		#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
78		#'
79		#' @return A named `list` of `character` vector.
80		#'
81		#' @keywords internal
82		get_covariates <- function(covariates) {
83	14x	checkmate::assert_character(covariates)
84	12x	cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*"))))
85	12x	stats::setNames(as.list(cov_vars), cov_vars)
86		}
87
88		#' Replicate Entries of a Vector if Required
89		#'
90		#' @description `r lifecycle::badge("stable")`
91		#'
92		#' Replicate entries of a vector if required.
93		#'
94		#' @inheritParams argument_convention
95		#' @param n (`count`)\cr how many entries we need.
96		#'
97		#' @return `x` if it has the required length already or is `NULL`,
98		#' otherwise if it is scalar the replicated version of it with `n` entries.
99		#'
100		#' @note This function will fail if `x` is not of length `n` and/or is not a scalar.
101		#'
102		#' @export
103		to_n <- function(x, n) {
104	1x	if (is.null(x)) {
105	!	NULL
106	1x	} else if (length(x) == 1) {
107	!	rep(x, n)
108	1x	} else if (length(x) == n) {
109	1x	x
110		} else {
111	!	stop("dimension mismatch")
112		}
113		}
114
115		#' Check Element Dimension
116		#'
117		#' Checks if the elements in `...` have the same dimension.
118		#'
119		#' @param ... (`data.frame`s or `vector`s)\cr any data frames/vectors.
120		#' @param omit_null (`logical`)\cr whether `NULL` elements in `...` should be omitted from the check.
121		#'
122		#' @return A `logical` value.
123		#'
124		#' @keywords internal
125		check_same_n <- function(..., omit_null = TRUE) {
126	2x	dots <- list(...)
127
128	2x	n_list <- Map(
129	2x	function(x, name) {
130	5x	if (is.null(x)) {
131	!	if (omit_null) {
132	2x	NA_integer_
133		} else {
134	!	stop("arg", name, "is not supposed to be NULL")
135		}
136	5x	} else if (is.data.frame(x)) {
137	!	nrow(x)
138	5x	} else if (is.atomic(x)) {
139	5x	length(x)
140		} else {
141	!	stop("data structure for ", name, "is currently not supported")
142		}
143		},
144	2x	dots, names(dots)
145		)
146
147	2x	n <- stats::na.omit(unlist(n_list))
148
149	2x	if (length(unique(n)) > 1) {
150	!	sel <- which(n != n[1])
151	!	stop("dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1])
152		}
153
154	2x	TRUE
155		}
156
157		#' Make Names Without Dots
158		#'
159		#' @param nams (`character`)\cr vector of original names.
160		#'
161		#' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()].
162		#'
163		#' @examples
164		#' # Internal function - make_names
165		#' \dontrun{
166		#' make_names(c("foo Bar", "1 2 3 bla"))
167		#' }
168		#'
169		#' @keywords internal
170		make_names <- function(nams) {
171	6x	orig <- make.names(nams)
172	6x	gsub(".", "", x = orig, fixed = TRUE)
173		}
174
175		#' Conversion of Months to Days
176		#'
177		#' @description `r lifecycle::badge("stable")`
178		#'
179		#' Conversion of Months to Days. This is an approximative calculation because it
180		#' considers each month as having an average of 30.4375 days.
181		#'
182		#' @param x (`numeric`)\cr time in months.
183		#'
184		#' @return A `numeric` vector with the time in days.
185		#'
186		#' @examples
187		#' x <- c(13.25, 8.15, 1, 2.834)
188		#' month2day(x)
189		#'
190		#' @export
191		month2day <- function(x) {
192	1x	checkmate::assert_numeric(x)
193	1x	x * 30.4375
194		}
195
196		#' Conversion of Days to Months
197		#'
198		#' @param x (`numeric`)\cr time in days.
199		#'
200		#' @return A `numeric` vector with the time in months.
201		#'
202		#' @examples
203		#' x <- c(403, 248, 30, 86)
204		#' day2month(x)
205		#'
206		#' @export
207		day2month <- function(x) {
208	15x	checkmate::assert_numeric(x)
209	15x	x / 30.4375
210		}
211
212		#' Return an empty numeric if all elements are `NA`.
213		#'
214		#' @param x (`numeric`)\cr vector.
215		#'
216		#' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`.
217		#'
218		#' @examples
219		#' x <- c(NA, NA, NA)
220		#' # Internal function - empty_vector_if_na
221		#' \dontrun{
222		#' empty_vector_if_na(x)
223		#' }
224		#'
225		#' @keywords internal
226		empty_vector_if_na <- function(x) {
227	610x	if (all(is.na(x))) {
228	185x	numeric()
229		} else {
230	425x	x
231		}
232		}
233
234		#' Combine Two Vectors Element Wise
235		#'
236		#' @param x (`vector`)\cr first vector to combine.
237		#' @param y (`vector`)\cr second vector to combine.
238		#'
239		#' @return A `list` where each element combines corresponding elements of `x` and `y`.
240		#'
241		#' @examples
242		#' combine_vectors(1:3, 4:6)
243		#'
244		#' @export
245		combine_vectors <- function(x, y) {
246	49x	checkmate::assert_vector(x)
247	49x	checkmate::assert_vector(y, len = length(x))
248
249	49x	result <- lapply(as.data.frame(rbind(x, y)), `c`)
250	49x	names(result) <- NULL
251	49x	result
252		}
253
254		#' Extract Elements by Name
255		#'
256		#' This utility function extracts elements from a vector `x` by `names`.
257		#' Differences to the standard `[` function are:
258		#'
259		#' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
260		#' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
261		#' elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
262		#'
263		#' @param x (named `vector`)\cr where to extract named elements from.
264		#' @param names (`character`)\cr vector of names to extract.
265		#'
266		#' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
267		#'
268		#' @keywords internal
269		extract_by_name <- function(x, names) {
270	3279x	if (is.null(x)) {
271	2857x	return(NULL)
272		}
273	422x	checkmate::assert_named(x)
274	422x	checkmate::assert_character(names)
275	422x	which_extract <- intersect(names(x), names)
276	422x	if (length(which_extract) > 0) {
277	202x	x[which_extract]
278		} else {
279	220x	NULL
280		}
281		}
282
283		#' Labels for Adverse Event Baskets
284		#'
285		#' @description `r lifecycle::badge("stable")`
286		#'
287		#' @param aesi (`character`)\cr with standardized MedDRA query name (e.g. `SMQzzNAM`) or customized query
288		#' name (e.g. `CQzzNAM`).
289		#' @param scope (`character`)\cr with scope of query (e.g. `SMQzzSC`).
290		#'
291		#' @return A `string` with the standard label for the AE basket.
292		#'
293		#' @examples
294		#' adae <- tern_ex_adae
295		#'
296		#' # Standardized query label includes scope.
297		#' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC)
298		#'
299		#' # Customized query label.
300		#' aesi_label(adae$CQ01NAM)
301		#'
302		#' @export
303		aesi_label <- function(aesi, scope = NULL) {
304	3x	checkmate::assert_character(aesi)
305	3x	checkmate::assert_character(scope, null.ok = TRUE)
306	3x	aesi_label <- obj_label(aesi)
307	3x	aesi <- sas_na(aesi)
308	3x	aesi <- unique(aesi)[!is.na(unique(aesi))]
309
310	3x	lbl <- if (length(aesi) == 1 && !is.null(scope)) {
311	1x	scope <- sas_na(scope)
312	1x	scope <- unique(scope)[!is.na(unique(scope))]
313	1x	checkmate::assert_string(scope)
314	1x	paste0(aesi, " (", scope, ")")
315	3x	} else if (length(aesi) == 1 && is.null(scope)) {
316	1x	aesi
317		} else {
318	1x	aesi_label
319		}
320
321	3x	lbl
322		}
323
324		#' Indicate Study Arm Variable in Formula
325		#'
326		#' We use `study_arm` to indicate the study arm variable in `tern` formulas.
327		#'
328		#' @param x arm information
329		#'
330		#' @return `x`
331		#'
332		#' @keywords internal
333		study_arm <- function(x) {
334	!	structure(x, varname = deparse(substitute(x)))
335		}
336
337		#' Smooth Function with Optional Grouping
338		#'
339		#' @description `r lifecycle::badge("stable")`
340		#'
341		#' This produces `loess` smoothed estimates of `y` with Student confidence intervals.
342		#'
343		#' @param df (`data.frame`)\cr data set containing all analysis variables.
344		#' @param x (`character`)\cr value with x column name.
345		#' @param y (`character`)\cr value with y column name.
346		#' @param groups (`character`)\cr vector with optional grouping variables names.
347		#' @param level (`numeric`)\cr level of confidence interval to use (0.95 by default).
348		#'
349		#' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and
350		#' optional `groups` variables formatted as `factor` type.
351		#'
352		#' @export
353		get_smooths <- function(df, x, y, groups = NULL, level = 0.95) {
354	5x	checkmate::assert_data_frame(df)
355	5x	df_cols <- colnames(df)
356	5x	checkmate::assert_string(x)
357	5x	checkmate::assert_subset(x, df_cols)
358	5x	checkmate::assert_numeric(df[[x]])
359	5x	checkmate::assert_string(y)
360	5x	checkmate::assert_subset(y, df_cols)
361	5x	checkmate::assert_numeric(df[[y]])
362
363	5x	if (!is.null(groups)) {
364	4x	checkmate::assert_character(groups)
365	4x	checkmate::assert_subset(groups, df_cols)
366		}
367
368	5x	smooths <- function(x, y) {
369	18x	stats::predict(stats::loess(y ~ x), se = TRUE)
370		}
371
372	5x	if (!is.null(groups)) {
373	4x	cc <- stats::complete.cases(df[c(x, y, groups)])
374	4x	df_c <- df[cc, c(x, y, groups)]
375	4x	df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE]
376	4x	df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups]))
377
378	4x	df_smooth_raw <-
379	4x	by(df_c_ordered, df_c_g, function(d) {
380	17x	plx <- smooths(d[[x]], d[[y]])
381	17x	data.frame(
382	17x	x = d[[x]],
383	17x	y = plx$fit,
384	17x	ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
385	17x	yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
386		)
387		})
388
389	4x	df_smooth <- do.call(rbind, df_smooth_raw)
390	4x	df_smooth[groups] <- df_c_g
391
392	4x	df_smooth
393		} else {
394	1x	cc <- stats::complete.cases(df[c(x, y)])
395	1x	df_c <- df[cc, ]
396	1x	plx <- smooths(df_c[[x]], df_c[[y]])
397
398	1x	df_smooth <- data.frame(
399	1x	x = df_c[[x]],
400	1x	y = plx$fit,
401	1x	ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
402	1x	yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
403		)
404
405	1x	df_smooth
406		}
407		}
408
409		#' Number of Available (Non-Missing Entries) in a Vector
410		#'
411		#' Small utility function for better readability.
412		#'
413		#' @param x (`any`)\cr vector in which to count non-missing values.
414		#'
415		#' @return Number of non-missing values.
416		#'
417		#' @examples
418		#' # Internal function - n_available
419		#' \dontrun{
420		#' n_available(c(1, NA, 2))
421		#' }
422		#'
423		#' @keywords internal
424		n_available <- function(x) {
425	190x	sum(!is.na(x))
426		}
427
428		#' Reapply Variable Labels
429		#'
430		#' This is a helper function that is used in tests.
431		#'
432		#' @param x (`vector`)\cr vector of elements that needs new labels.
433		#' @param varlabels (`character`)\cr vector of labels for `x`.
434		#' @param ... further parameters to be added to the list.
435		#'
436		#' @return `x` with variable labels reapplied.
437		#'
438		#' @export
439		reapply_varlabels <- function(x, varlabels, ...) {
440	10x	named_labels <- c(as.list(varlabels), list(...))
441	10x	formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels)
442	10x	x
443		}
444
445		# Wrapper function of survival::clogit so that when model fitting failed, a more useful message would show
446		clogit_with_tryCatch <- function(formula, data, ...) { # nolint
447	30x	tryCatch(
448	30x	survival::clogit(formula = formula, data = data, ...),
449	30x	error = function(e) stop("model not built successfully with survival::clogit")
450		)
451		}

1		#' Combination Functions Class
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' `CombinationFunction` is an S4 class which extends standard functions. These are special functions that
6		#' can be combined and negated with the logical operators.
7		#'
8		#' @param e1 (`CombinationFunction`)\cr left hand side of logical operator.
9		#' @param e2 (`CombinationFunction`)\cr right hand side of logical operator.
10		#' @param x (`CombinationFunction`)\cr the function which should be negated.
11		#'
12		#' @return Returns a logical value indicating whether the left hand side of the equation equals the right hand side.
13		#'
14		#' @exportClass CombinationFunction
15		#' @export CombinationFunction
16		#'
17		#' @examples
18		#' higher <- function(a) {
19		#' force(a)
20		#' CombinationFunction(
21		#' function(x) {
22		#' x > a
23		#' }
24		#' )
25		#' }
26		#'
27		#' lower <- function(b) {
28		#' force(b)
29		#' CombinationFunction(
30		#' function(x) {
31		#' x < b
32		#' }
33		#' )
34		#' }
35		#'
36		#' c1 <- higher(5)
37		#' c2 <- lower(10)
38		#' c3 <- higher(5) & lower(10)
39		#' c3(7)
40		#'
41		#' @aliases CombinationFunction-class
42		#' @name combination_function
43		CombinationFunction <- methods::setClass("CombinationFunction", contains = "function") # nolint
44
45		#' @describeIn combination_function Logical "AND" combination of `CombinationFunction` functions.
46		#' The resulting object is of the same class, and evaluates the two argument functions. The result
47		#' is then the "AND" of the two individual results.
48		#'
49		#' @export
50		methods::setMethod(
51		"&",
52		signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
53		definition = function(e1, e2) {
54	4x	CombinationFunction(function(...) {
55	490x	e1(...) && e2(...)
56		})
57		}
58		)
59
60		#' @describeIn combination_function Logical "OR" combination of `CombinationFunction` functions.
61		#' The resulting object is of the same class, and evaluates the two argument functions. The result
62		#' is then the "OR" of the two individual results.
63		#'
64		#' @export
65		methods::setMethod(
66		"\|",
67		signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
68		definition = function(e1, e2) {
69	2x	CombinationFunction(function(...) {
70	4x	e1(...) \|\| e2(...)
71		})
72		}
73		)
74
75		#' @describeIn combination_function Logical negation of `CombinationFunction` functions.
76		#' The resulting object is of the same class, and evaluates the original function. The result
77		#' is then the opposite of this results.
78		#'
79		#' @export
80		methods::setMethod(
81		"!",
82		signature = c(x = "CombinationFunction"),
83		definition = function(x) {
84	2x	CombinationFunction(function(...) {
85	305x	!x(...)
86		})
87		}
88		)

1		#' Summary for Poisson Negative Binomial.
2		#'
3		#' @description `r lifecycle::badge("experimental")`
4		#'
5		#' Summarize results of a Poisson Negative Binomial Regression.
6		#' This can be used to analyze count and/or frequency data using a linear model.
7		#'
8		#' @inheritParams argument_convention
9		#'
10		#' @name summarize_glm_count
11		NULL
12
13		#' Helper Functions for Poisson Models.
14		#'
15		#' @description `r lifecycle::badge("experimental")`
16		#'
17		#' Helper functions that can be used to return the results of various Poisson models.
18		#'
19		#' @inheritParams argument_convention
20		#'
21		#' @seealso [summarize_glm_count]
22		#'
23		#' @name h_glm_count
24		NULL
25
26		#' @describeIn h_glm_count Helper function to return results of a poisson model.
27		#'
28		#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called
29		#' in `.var` and `variables`.
30		#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with
31		#' expected elements:
32		#' * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
33		#' groups will be summarized. Specifically, the first level of `arm` variable is taken as the
34		#' reference group.
35		#' * `covariates` (`character`)\cr a vector that can contain single variable names (such as
36		#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
37		#' * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
38		#' @param `weights`(`character`)\cr a character vector specifying weights used
39		#' in averaging predictions. Number of weights must equal the number of levels included in the covariates.
40		#' Weights option passed to emmeans function (hyperlink) (link to emmeans documentation)
41		#'
42		#' @return
43		#' * `h_glm_poisson()` returns the results of a Poisson model.
44		#'
45		#' @examples
46		#' # Internal function - h_glm_poisson
47		#' \dontrun{
48		#' h_glm_poisson(
49		#' .var = "AVAL",
50		#' .df_row = anl,
51		#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL)
52		#' )
53		#' }
54		#'
55		#' @keywords internal
56		h_glm_poisson <- function(.var,
57		.df_row,
58		variables,
59		weights) {
60	9x	arm <- variables$arm
61	9x	covariates <- variables$covariates
62	9x	offset <- .df_row[[variables$offset]]
63
64	7x	formula <- stats::as.formula(paste0(
65	7x	.var, " ~ ",
66		" + ",
67	7x	paste(covariates, collapse = " + "),
68		" + ",
69	7x	arm
70		))
71
72	7x	glm_fit <- stats::glm(
73	7x	formula = formula,
74	7x	offset = offset,
75	7x	data = .df_row,
76	7x	family = stats::poisson(link = "log")
77		)
78
79	7x	emmeans_fit <- emmeans::emmeans(
80	7x	glm_fit,
81	7x	specs = arm,
82	7x	data = .df_row,
83	7x	type = "response",
84	7x	offset = 0,
85	7x	weights = weights
86		)
87
88	7x	list(
89	7x	glm_fit = glm_fit,
90	7x	emmeans_fit = emmeans_fit
91		)
92		}
93
94		#' @describeIn h_glm_count Helper function to return results of a quasipoisson model.
95		#'
96		#' @inheritParams summarize_glm_count
97		#'
98		#' @return
99		#' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model.
100		#'
101		#' @examples
102		#' # Internal function - h_glm_quasipoisson
103		#' \dontrun{
104		#' h_glm_quasipoisson(
105		#' .var = "AVAL",
106		#' .df_row = adtte,
107		#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1"))
108		#' )
109		#' }
110		#'
111		#' @keywords internal
112		h_glm_quasipoisson <- function(.var,
113		.df_row,
114		variables,
115		weights) {
116	7x	arm <- variables$arm
117	7x	covariates <- variables$covariates
118	7x	offset <- .df_row[[variables$offset]]
119
120	5x	formula <- stats::as.formula(paste0(
121	5x	.var, " ~ ",
122		" + ",
123	5x	paste(covariates, collapse = " + "),
124		" + ",
125	5x	arm
126		))
127
128	5x	glm_fit <- stats::glm(
129	5x	formula = formula,
130	5x	offset = offset,
131	5x	data = .df_row,
132	5x	family = stats::quasipoisson(link = "log")
133		)
134
135	5x	emmeans_fit <- emmeans::emmeans(
136	5x	glm_fit,
137	5x	specs = arm,
138	5x	data = .df_row,
139	5x	type = "response",
140	5x	offset = 0,
141	5x	weights = weights
142		)
143
144	5x	list(
145	5x	glm_fit = glm_fit,
146	5x	emmeans_fit = emmeans_fit
147		)
148		}
149
150		#' @describeIn h_glm_count Helper function to return the results of the
151		#' selected model (poisson, quasipoisson, negative binomial).
152		#'
153		#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called
154		#' in `.var` and `variables`.
155		#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with
156		#' expected elements:
157		#' * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
158		#' groups will be summarized. Specifically, the first level of `arm` variable is taken as the
159		#' reference group.
160		#' * `covariates` (`character`)\cr a vector that can contain single variable names (such as
161		#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
162		#' * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
163		#' @param `weights`(`character`)\cr character vector specifying weights used in averaging predictions.
164		#' @param `distribution`(`character`)\cr a character value specifying the distribution
165		#' used in the regression (poisson, quasipoisson).
166		#'
167		#' @return
168		#' * `h_glm_count()` returns the results of the selected model.
169		#'
170		#' @examples
171		#' # Internal function - h_glm_count
172		#' \dontrun{
173		#' h_glm_count(
174		#' .var = "AVAL",
175		#' .df_row = anl,
176		#' variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = NULL),
177		#' distribution = "poisson"
178		#' )
179		#' }
180		#'
181		#' @keywords internal
182		h_glm_count <- function(.var,
183		.df_row,
184		variables,
185		distribution,
186		weights) {
187	11x	if (distribution == "negbin") {
188	!	stop("negative binomial distribution is not currently available.")
189		}
190	9x	switch(distribution,
191	6x	poisson = h_glm_poisson(.var, .df_row, variables, weights),
192	3x	quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights),
193	!	negbin = list() # h_glm_negbin(.var, .df_row, variables, weights) # nolint
194		)
195		}
196
197		#' @describeIn h_glm_count Helper function to return the estimated means.
198		#'
199		#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
200		#' @param conf_level (`numeric`)\cr value used to derive the confidence interval for the rate.
201		#' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm.
202		#' @param `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
203		#' summarized. Specifically, the first level of `arm` variable is taken as the reference group.
204		#'
205		#' @return
206		#' * `h_ppmeans()` returns the estimated means.
207		#'
208		#' @examples
209		#' # Internal function - h_ppmeans
210		#' \dontrun{
211		#' fits <- h_glm_count(
212		#' .var = "AVAL",
213		#' .df_row = anl,
214		#' variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = c("REGION1")),
215		#' distribution = "quasipoisson"
216		#' )
217		#'
218		#' h_ppmeans(
219		#' obj = fits$glm_fit,
220		#' .df_row = anl,
221		#' arm = "ARM",
222		#' conf_level = 0.95
223		#' )
224		#' }
225		#'
226		#' @keywords internal
227		h_ppmeans <- function(obj, .df_row, arm, conf_level) {
228	3x	alpha <- 1 - conf_level
229	3x	p <- 1 - alpha / 2
230
231	3x	arm_levels <- levels(.df_row[[arm]])
232
233	3x	out <- lapply(arm_levels, function(lev) {
234	9x	temp <- .df_row
235	9x	temp[[arm]] <- factor(lev, levels = arm_levels)
236
237	9x	mf <- stats::model.frame(obj$formula, data = temp)
238	9x	X <- stats::model.matrix(obj$formula, data = mf) # nolint
239
240	9x	rate <- stats::predict(obj, newdata = mf, type = "response")
241	9x	rate_hat <- mean(rate)
242
243	9x	zz <- colMeans(rate * X)
244	9x	se <- sqrt(as.numeric(t(zz) %% stats::vcov(obj) %% zz))
245	9x	rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat)
246	9x	rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat)
247
248	9x	c(rate_hat, rate_lwr, rate_upr)
249		})
250
251	3x	names(out) <- arm_levels
252	3x	out <- do.call(rbind, out)
253	3x	if ("negbin" %in% class(obj)) {
254	!	colnames(out) <- c("response", "asymp.LCL", "asymp.UCL")
255		} else {
256	3x	colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL")
257		}
258	3x	out <- as.data.frame(out)
259	3x	out[[arm]] <- rownames(out)
260	3x	out
261		}
262
263		#' @describeIn summarize_glm_count Statistics function that produces a named list of results
264		#' of the investigated Poisson model.
265		#'
266		#' @inheritParams h_glm_count
267		#'
268		#' @return
269		#' * `s_glm_count()` returns a named `list` of 5 statistics:
270		#' * `n`: Count of complete sample size for the group.
271		#' * `rate`: Estimated event rate per follow-up time.
272		#' * `rate_ci`: Confidence level for estimated rate per follow-up time.
273		#' * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm.
274		#' * `rate_ratio_ci`: Confidence level for the rate ratio.
275		#' * `pval`: p-value.
276		#'
277		#' @examples
278		#' # Internal function - s_change_from_baseline
279		#' \dontrun{
280		#' s_glm_count(
281		#' df = anl %>%
282		#' filter(ARMCD == "ARM B"),
283		#' .df_row = anl,
284		#' .var = "AVAL",
285		#' .in_ref_col = TRUE,
286		#' variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = c("REGION1")),
287		#' conf_level = 0.95,
288		#' distribution = "quasipoisson",
289		#' rate_mean_method = "ppmeans"
290		#' )
291		#' }
292		#'
293		#' @keywords internal
294		s_glm_count <- function(df,
295		.var,
296		.df_row,
297		variables,
298		.ref_group,
299		.in_ref_col,
300		distribution,
301		conf_level,
302		rate_mean_method,
303		weights,
304		scale = 1) {
305	3x	arm <- variables$arm
306
307	3x	y <- df[[.var]]
308	2x	smry_level <- as.character(unique(df[[arm]]))
309
310		# ensure there is only 1 value
311	2x	checkmate::assert_scalar(smry_level)
312
313	2x	results <- h_glm_count(
314	2x	.var = .var,
315	2x	.df_row = .df_row,
316	2x	variables = variables,
317	2x	distribution = distribution,
318	2x	weights
319		)
320
321	2x	if (rate_mean_method == "emmeans") {
322	!	emmeans_smry <- summary(results$emmeans_fit, level = conf_level)
323	2x	} else if (rate_mean_method == "ppmeans") {
324	2x	emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level)
325		}
326
327	2x	emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ]
328
329	2x	if (.in_ref_col) {
330	1x	list(
331	1x	n = length(y[!is.na(y)]),
332	1x	rate = formatters::with_label(
333	1x	ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate),
334	1x	"Adjusted Rate"
335		),
336	1x	rate_ci = formatters::with_label(
337	1x	c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
338	1x	f_conf_level(conf_level)
339		),
340	1x	rate_ratio = formatters::with_label(character(), "Adjusted Rate Ratio"),
341	1x	rate_ratio_ci = formatters::with_label(character(), f_conf_level(conf_level)),
342	1x	pval = formatters::with_label(character(), "p-value")
343		)
344		} else {
345	1x	emmeans_contrasts <- emmeans::contrast(
346	1x	results$emmeans_fit,
347	1x	method = "trt.vs.ctrl",
348	1x	ref = grep(
349	1x	as.character(unique(.ref_group[[arm]])),
350	1x	as.data.frame(results$emmeans_fit)[[arm]]
351		)
352		)
353
354	1x	contrasts_smry <- summary(
355	1x	emmeans_contrasts,
356	1x	infer = TRUE,
357	1x	adjust = "none"
358		)
359
360	1x	smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ]
361
362	1x	list(
363	1x	n = length(y[!is.na(y)]),
364	1x	rate = formatters::with_label(
365	1x	ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate),
366	1x	"Adjusted Rate"
367		),
368	1x	rate_ci = formatters::with_label(
369	1x	c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
370	1x	f_conf_level(conf_level)
371		),
372	1x	rate_ratio = formatters::with_label(smry_contrasts_level$ratio, "Adjusted Rate Ratio"),
373	1x	rate_ratio_ci = formatters::with_label(
374	1x	c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL),
375	1x	f_conf_level(conf_level)
376		),
377	1x	pval = formatters::with_label(smry_contrasts_level$p.value, "p-value")
378		)
379		}
380		}
381
382		#' @describeIn summarize_glm_count Formatted analysis function which is used as `afun` in `summarize_glm_count()`.
383		#'
384		#' @return
385		#' * `a_glm_count()` returns the corresponding list with formatted [rtables::CellValue()].
386		#'
387		#' @examples
388		#' # Internal function - s_change_from_baseline
389		#' \dontrun{
390		#' a_glm_count(
391		#' df = anl %>%
392		#' filter(ARMCD == "ARM A"),
393		#' .var = "AVAL",
394		#' .df_row = anl,
395		#' variables = list(arm = "ARMCD", offset = "lgTMATRSK", covariates = c("REGION1")),
396		#' .ref_group = "ARM B", .in_ref_col = TRUE,
397		#' conf_level = 0.95,
398		#' distribution = "poisson",
399		#' rate_mean_method = "ppmeans"
400		#' )
401		#' }
402		#'
403		#' @keywords internal
404		a_glm_count <- make_afun(
405		s_glm_count,
406		.indent_mods = c(
407		"n" = 0L,
408		"rate" = 0L,
409		"rate_ci" = 1L,
410		"rate_ratio" = 0L,
411		"rate_ratio_ci" = 1L,
412		"pval" = 1L
413		),
414		.formats = c(
415		"n" = "xx",
416		"rate" = "xx.xxxx",
417		"rate_ci" = "(xx.xxxx, xx.xxxx)",
418		"rate_ratio" = "xx.xxxx",
419		"rate_ratio_ci" = "(xx.xxxx, xx.xxxx)",
420		"pval" = "x.xxxx \| (<0.0001)"
421		),
422		.null_ref_cells = FALSE
423		)
424
425		#' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments
426		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
427		#'
428		#' @return
429		#' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions,
430		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
431		#' the statistics from `s_glm_count()` to the table layout.
432		#'
433		#' @examples
434		#' library(dplyr)
435		#' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE")
436		#' anl$AVAL_f <- as.factor(anl$AVAL)
437		#'
438		#' lyt <- basic_table() %>%
439		#' split_cols_by("ARM", ref_group = "B: Placebo") %>%
440		#' add_colcounts() %>%
441		#' summarize_vars(
442		#' "AVAL_f",
443		#' var_labels = "Number of exacerbations per patient",
444		#' .stats = c("count_fraction"),
445		#' .formats = c("count_fraction" = "xx (xx.xx%)"),
446		#' .label = c("Number of exacerbations per patient")
447		#' ) %>%
448		#' summarize_glm_count(
449		#' vars = "AVAL",
450		#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL),
451		#' conf_level = 0.95,
452		#' distribution = "poisson",
453		#' rate_mean_method = "emmeans",
454		#' var_labels = "Unadjusted exacerbation rate (per year)",
455		#' table_names = "unadj",
456		#' .stats = c("rate"),
457		#' .labels = c(rate = "Rate")
458		#' ) %>%
459		#' summarize_glm_count(
460		#' vars = "AVAL",
461		#' variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
462		#' conf_level = 0.95,
463		#' distribution = "quasipoisson",
464		#' rate_mean_method = "ppmeans",
465		#' var_labels = "Adjusted (QP) exacerbation rate (per year)",
466		#' table_names = "adj",
467		#' .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
468		#' .labels = c(
469		#' rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
470		#' rate_ratio_ci = "Rate Ratio CI", pval = "p value"
471		#' )
472		#' )
473		#' build_table(lyt = lyt, df = anl)
474		#'
475		#' @export
476		summarize_glm_count <- function(lyt,
477		vars,
478		var_labels,
479		...,
480		show_labels = "visible",
481		table_names = vars,
482		.stats = NULL,
483		.formats = NULL,
484		.labels = NULL,
485		.indent_mods = NULL) {
486	1x	afun <- make_afun(
487	1x	a_glm_count,
488	1x	.stats = .stats,
489	1x	.formats = .formats,
490	1x	.labels = .labels,
491	1x	.indent_mods = .indent_mods
492		)
493
494	1x	analyze(
495	1x	lyt,
496	1x	vars,
497	1x	var_labels = var_labels,
498	1x	show_labels = show_labels,
499	1x	table_names = table_names,
500	1x	afun = afun,
501	1x	extra_args = list(...)
502		)
503		}

1		#' Encode Categorical Missing Values in a Data Frame
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This is a helper function to encode missing entries across groups of categorical
6		#' variables in a data frame.
7		#'
8		#' @details Missing entries are those with `NA` or empty strings and will
9		#' be replaced with a specified value. If factor variables include missing
10		#' values, the missing value will be inserted as the last level.
11		#' Similarly, in case character or logical variables should be converted to factors
12		#' with the `char_as_factor` or `logical_as_factor` options, the missing values will
13		#' be set as the last level.
14		#'
15		#' @param data (`data.frame`)\cr data set.
16		#' @param omit_columns (`character`)\cr names of variables from `data` that should
17		#' not be modified by this function.
18		#' @param char_as_factor (`flag`)\cr whether to convert character variables
19		#' in `data` to factors.
20		#' @param logical_as_factor (`flag`)\cr whether to convert logical variables
21		#' in `data` to factors.
22		#' @param na_level (`string`)\cr used to replace all `NA` or empty
23		#' values inside non-`omit_columns` columns.
24		#'
25		#' @return A `data.frame` with the chosen modifications applied.
26		#'
27		#' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions.
28		#'
29		#' @examples
30		#' my_data <- data.frame(
31		#' u = c(TRUE, FALSE, NA, TRUE),
32		#' v = factor(c("A", NA, NA, NA), levels = c("Z", "A")),
33		#' w = c("A", "B", NA, "C"),
34		#' x = c("D", "E", "F", NA),
35		#' y = c("G", "H", "I", ""),
36		#' z = c(1, 2, 3, 4),
37		#' stringsAsFactors = FALSE
38		#' )
39		#'
40		#' # Example 1
41		#' # Encode missing values in all character or factor columns.
42		#' df_explicit_na(my_data)
43		#' # Also convert logical columns to factor columns.
44		#' df_explicit_na(my_data, logical_as_factor = TRUE)
45		#' # Encode missing values in a subset of columns.
46		#' df_explicit_na(my_data, omit_columns = c("x", "y"))
47		#'
48		#' # Example 2
49		#' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable.
50		#' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not
51		#' # included when generating `rtables`.
52		#' adsl <- tern_ex_adsl
53		#' adsl$SEX[adsl$SEX == "M"] <- NA
54		#' adsl <- df_explicit_na(adsl)
55		#'
56		#' # If you want the `Na` values to be displayed in the table use the `na_level` argument.
57		#' adsl <- tern_ex_adsl
58		#' adsl$SEX[adsl$SEX == "M"] <- NA
59		#' adsl <- df_explicit_na(adsl, na_level = "Missing Values")
60		#'
61		#' # Example 3
62		#' # Numeric variables that have missing values are not altered. This means that any `NA` value in
63		#' # a numeric variable will not be included in the summary statistics, nor will they be included
64		#' # in the denominator value for calculating the percent values.
65		#' adsl <- tern_ex_adsl
66		#' adsl$AGE[adsl$AGE < 30] <- NA
67		#' adsl <- df_explicit_na(adsl)
68		#'
69		#' @export
70		df_explicit_na <- function(data,
71		omit_columns = NULL,
72		char_as_factor = TRUE,
73		logical_as_factor = FALSE,
74		na_level = "<Missing>") {
75	28x	checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE)
76	27x	checkmate::assert_data_frame(data)
77	26x	checkmate::assert_flag(char_as_factor)
78	25x	checkmate::assert_flag(logical_as_factor)
79	25x	checkmate::assert_string(na_level)
80
81	23x	target_vars <- if (is.null(omit_columns)) {
82	21x	names(data)
83		} else {
84	2x	setdiff(names(data), omit_columns) # May have duplicates.
85		}
86	23x	if (length(target_vars) == 0) {
87	1x	return(data)
88		}
89
90	22x	l_target_vars <- split(target_vars, target_vars)
91
92		# Makes sure target_vars exist in data and names are not duplicated.
93	22x	assert_df_with_variables(data, l_target_vars)
94
95	22x	for (x in target_vars) {
96	515x	xi <- data[[x]]
97	515x	xi_label <- obj_label(xi)
98
99		# Determine whether to convert character or logical input.
100	515x	do_char_conversion <- is.character(xi) && char_as_factor
101	515x	do_logical_conversion <- is.logical(xi) && logical_as_factor
102
103		# Pre-convert logical to character to deal correctly with replacing NA
104		# values below.
105	515x	if (do_logical_conversion) {
106	2x	xi <- as.character(xi)
107		}
108
109	515x	if (is.factor(xi) \|\| is.character(xi)) {
110		# Handle empty strings and NA values.
111	388x	xi <- explicit_na(sas_na(xi), label = na_level)
112
113		# Convert to factors if requested for the original type,
114		# set na_level as the last value.
115	388x	if (do_char_conversion \|\| do_logical_conversion) {
116	81x	levels_xi <- setdiff(sort(unique(xi)), na_level)
117	81x	if (na_level %in% unique(xi)) {
118	21x	levels_xi <- c(levels_xi, na_level)
119		}
120
121	81x	xi <- factor(xi, levels = levels_xi)
122		}
123
124	388x	data[, x] <- formatters::with_label(xi, label = xi_label)
125		}
126		}
127	22x	return(data)
128		}

1		#' Proportion Difference
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' @inheritParams argument_convention
6		#'
7		#' @seealso [d_proportion_diff()]
8		#'
9		#' @name prop_diff
10		NULL
11
12		#' @describeIn prop_diff Statistics function estimating the difference
13		#' in terms of responder proportion.
14		#'
15		#' @inheritParams prop_diff_strat_nc
16		#' @param method (`string`)\cr the method used for the confidence interval estimation.
17		#'
18		#' @return
19		#' * `s_proportion_diff()` returns a named list of elements `diff` and `diff_ci`.
20		#'
21		#' @note When performing an unstratified analysis, methods `"cmh"`, `"strat_newcombe"`, and `"strat_newcombecc"` are
22		#' not permitted.
23		#'
24		#' @examples
25		#' # Summary
26		#'
27		#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
28		#' nex <- 100 # Number of example rows
29		#' dta <- data.frame(
30		#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
31		#' "grp" = sample(c("A", "B"), nex, TRUE),
32		#' "f1" = sample(c("a1", "a2"), nex, TRUE),
33		#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
34		#' stringsAsFactors = TRUE
35		#' )
36		#'
37		#' s_proportion_diff(
38		#' df = subset(dta, grp == "A"),
39		#' .var = "rsp",
40		#' .ref_group = subset(dta, grp == "B"),
41		#' .in_ref_col = FALSE,
42		#' conf_level = 0.90,
43		#' method = "ha"
44		#' )
45		#'
46		#' # CMH example with strata
47		#' s_proportion_diff(
48		#' df = subset(dta, grp == "A"),
49		#' .var = "rsp",
50		#' .ref_group = subset(dta, grp == "B"),
51		#' .in_ref_col = FALSE,
52		#' variables = list(strata = c("f1", "f2")),
53		#' conf_level = 0.90,
54		#' method = "cmh"
55		#' )
56		#'
57		#' @export
58		s_proportion_diff <- function(df,
59		.var,
60		.ref_group,
61		.in_ref_col,
62		variables = list(strata = NULL),
63		conf_level = 0.95,
64		method = c(
65		"waldcc", "wald", "cmh",
66		"ha", "newcombe", "newcombecc",
67		"strat_newcombe", "strat_newcombecc"
68		),
69		weights_method = "cmh") {
70	2x	method <- match.arg(method)
71	2x	if (is.null(variables$strata) && checkmate::test_subset(method, c("cmh", "strat_newcombe", "strat_newcombecc"))) {
72	!	stop(paste(
73	!	"When performing an unstratified analysis, methods 'cmh', 'strat_newcombe', and 'strat_newcombecc' are not",
74	!	"permitted. Please choose a different method."
75		))
76		}
77	2x	y <- list(diff = "", diff_ci = "")
78
79	2x	if (!.in_ref_col) {
80	2x	rsp <- c(.ref_group[[.var]], df[[.var]])
81	2x	grp <- factor(
82	2x	rep(
83	2x	c("ref", "Not-ref"),
84	2x	c(nrow(.ref_group), nrow(df))
85		),
86	2x	levels = c("ref", "Not-ref")
87		)
88
89	2x	if (!is.null(variables$strata)) {
90	1x	strata_colnames <- variables$strata
91	1x	checkmate::assert_character(strata_colnames, null.ok = FALSE)
92	1x	strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
93
94	1x	assert_df_with_variables(df, strata_vars)
95	1x	assert_df_with_variables(.ref_group, strata_vars)
96
97		# Merging interaction strata for reference group rows data and remaining
98	1x	strata <- c(
99	1x	interaction(.ref_group[strata_colnames]),
100	1x	interaction(df[strata_colnames])
101		)
102	1x	strata <- as.factor(strata)
103		}
104
105		# Defining the std way to calculate weights for strat_newcombe
106	2x	if (!is.null(variables$weights_method)) {
107	!	weights_method <- variables$weights_method
108		} else {
109	2x	weights_method <- "cmh"
110		}
111
112	2x	y <- switch(method,
113	2x	"wald" = prop_diff_wald(rsp, grp, conf_level, correct = FALSE),
114	2x	"waldcc" = prop_diff_wald(rsp, grp, conf_level, correct = TRUE),
115	2x	"ha" = prop_diff_ha(rsp, grp, conf_level),
116	2x	"newcombe" = prop_diff_nc(rsp, grp, conf_level, correct = FALSE),
117	2x	"newcombecc" = prop_diff_nc(rsp, grp, conf_level, correct = TRUE),
118	2x	"strat_newcombe" = prop_diff_strat_nc(rsp,
119	2x	grp,
120	2x	strata,
121	2x	weights_method,
122	2x	conf_level,
123	2x	correct = FALSE
124		),
125	2x	"strat_newcombecc" = prop_diff_strat_nc(rsp,
126	2x	grp,
127	2x	strata,
128	2x	weights_method,
129	2x	conf_level,
130	2x	correct = TRUE
131		),
132	2x	"cmh" = prop_diff_cmh(rsp, grp, strata, conf_level)[c("diff", "diff_ci")]
133		)
134
135	2x	y$diff <- y$diff * 100
136	2x	y$diff_ci <- y$diff_ci * 100
137		}
138
139	2x	attr(y$diff, "label") <- "Difference in Response rate (%)"
140	2x	attr(y$diff_ci, "label") <- d_proportion_diff(
141	2x	conf_level, method,
142	2x	long = FALSE
143		)
144
145	2x	y
146		}
147
148		#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`.
149		#'
150		#' @return
151		#' * `a_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
152		#'
153		#' @examples
154		#' a_proportion_diff(
155		#' df = subset(dta, grp == "A"),
156		#' .var = "rsp",
157		#' .ref_group = subset(dta, grp == "B"),
158		#' .in_ref_col = FALSE,
159		#' conf_level = 0.90,
160		#' method = "ha"
161		#' )
162		#'
163		#' @export
164		a_proportion_diff <- make_afun(
165		s_proportion_diff,
166		.formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"),
167		.indent_mods = c(diff = 0L, diff_ci = 1L)
168		)
169
170		#' @describeIn prop_diff Layout-creating function which can take statistics function arguments
171		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
172		#'
173		#' @param ... arguments passed to `s_proportion_diff()`.
174		#'
175		#' @return
176		#' * `estimate_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
177		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
178		#' the statistics from `s_proportion_diff()` to the table layout.
179		#'
180		#' @examples
181		#' l <- basic_table() %>%
182		#' split_cols_by(var = "grp", ref_group = "B") %>%
183		#' estimate_proportion_diff(
184		#' vars = "rsp",
185		#' conf_level = 0.90,
186		#' method = "ha"
187		#' )
188		#'
189		#' build_table(l, df = dta)
190		#'
191		#' @export
192		estimate_proportion_diff <- function(lyt,
193		vars,
194		...,
195		var_labels = vars,
196		show_labels = "hidden",
197		table_names = vars,
198		.stats = NULL,
199		.formats = NULL,
200		.labels = NULL,
201		.indent_mods = NULL) {
202	3x	afun <- make_afun(
203	3x	a_proportion_diff,
204	3x	.stats = .stats,
205	3x	.formats = .formats,
206	3x	.labels = .labels,
207	3x	.indent_mods = .indent_mods
208		)
209
210	3x	analyze(
211	3x	lyt,
212	3x	vars,
213	3x	afun = afun,
214	3x	var_labels = var_labels,
215	3x	extra_args = list(...),
216	3x	show_labels = show_labels,
217	3x	table_names = table_names
218		)
219		}
220
221		#' Check: Proportion Difference Arguments
222		#'
223		#' Verifies that and/or convert arguments into valid values to be used in the
224		#' estimation of difference in responder proportions.
225		#'
226		#' @inheritParams prop_diff
227		#' @inheritParams prop_diff_wald
228		#'
229		#' @keywords internal
230		check_diff_prop_ci <- function(rsp,
231		grp,
232		strata = NULL,
233		conf_level,
234		correct = NULL) {
235	17x	checkmate::assert_logical(rsp, any.missing = FALSE)
236	17x	checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)
237	17x	checkmate::assert_number(conf_level, lower = 0, upper = 1)
238	17x	checkmate::assert_flag(correct, null.ok = TRUE)
239
240	17x	if (!is.null(strata)) {
241	11x	checkmate::assert_factor(strata, len = length(rsp))
242		}
243
244	17x	invisible()
245		}
246
247		#' Description of Method Used for Proportion Comparison
248		#'
249		#' @description `r lifecycle::badge("stable")`
250		#'
251		#' This is an auxiliary function that describes the analysis in
252		#' `s_proportion_diff`.
253		#'
254		#' @inheritParams s_proportion_diff
255		#' @param long (`logical`)\cr Whether a long or a short (default) description is required.
256		#'
257		#' @return A `string` describing the analysis.
258		#'
259		#' @seealso [prop_diff]
260		#'
261		#' @export
262		d_proportion_diff <- function(conf_level,
263		method,
264		long = FALSE) {
265	8x	label <- paste0(conf_level * 100, "% CI")
266	8x	if (long) {
267	!	label <- paste(
268	!	label,
269	!	ifelse(
270	!	method == "cmh",
271	!	"for adjusted difference",
272	!	"for difference"
273		)
274		)
275		}
276
277	8x	method_part <- switch(method,
278	8x	"cmh" = "CMH, without correction",
279	8x	"waldcc" = "Wald, with correction",
280	8x	"wald" = "Wald, without correction",
281	8x	"ha" = "Anderson-Hauck",
282	8x	"newcombe" = "Newcombe, without correction",
283	8x	"newcombecc" = "Newcombe, with correction",
284	8x	"strat_newcombe" = "Stratified Newcombe, without correction",
285	8x	"strat_newcombecc" = "Stratified Newcombe, with correction",
286	8x	stop(paste(method, "does not have a description"))
287		)
288	8x	paste0(label, " (", method_part, ")")
289		}
290
291		#' Helper Functions to Calculate Proportion Difference
292		#'
293		#' @description `r lifecycle::badge("stable")`
294		#'
295		#' @inheritParams argument_convention
296		#' @inheritParams prop_diff
297		#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
298		#' (e.g. reference and treatment group).
299		#'
300		#' @return A named `list` of elements `diff` (proportion difference) and `diff_ci`
301		#' (proportion difference confidence interval).
302		#'
303		#' @seealso [prop_diff()] for implementation of these helper functions.
304		#'
305		#' @name h_prop_diff
306		NULL
307
308		#' @describeIn h_prop_diff The Wald interval follows the usual textbook
309		#' definition for a single proportion confidence interval using the normal
310		#' approximation. It is possible to include a continuity correction for Wald's
311		#' interval.
312		#'
313		#' @param correct (`logical`)\cr whether to include the continuity correction. For further
314		#' information, see [stats::prop.test()].
315		#'
316		#' @examples
317		#' # Wald confidence interval
318		#' set.seed(2)
319		#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
320		#' grp <- factor(c(rep("A", 10), rep("B", 10)))
321		#' prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE)
322		#'
323		#' @export
324		prop_diff_wald <- function(rsp,
325		grp,
326		conf_level = 0.95,
327		correct = FALSE) {
328	2x	if (isTRUE(correct)) {
329	1x	mthd <- "waldcc"
330		} else {
331	1x	mthd <- "wald"
332		}
333	2x	grp <- as_factor_keep_attributes(grp)
334	2x	check_diff_prop_ci(
335	2x	rsp = rsp, grp = grp, conf_level = conf_level, correct = correct
336		)
337
338		# check if binary response is coded as logical
339	2x	checkmate::assert_logical(rsp, any.missing = FALSE)
340	2x	checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)
341
342	2x	tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
343		# x1 and n1 are non-reference groups.
344	2x	diff_ci <- desctools_binom(
345	2x	x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
346	2x	x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
347	2x	conf.level = conf_level,
348	2x	method = mthd
349		)
350
351	2x	list(
352	2x	"diff" = unname(diff_ci[, "est"]),
353	2x	"diff_ci" = unname(diff_ci[, c("lwr.ci", "upr.ci")])
354		)
355		}
356
357		#' @describeIn h_prop_diff Anderson-Hauck confidence interval.
358		#'
359		#' @examples
360		#' # Anderson-Hauck confidence interval
361		#' ## "Mid" case: 3/4 respond in group A, 1/2 respond in group B.
362		#' rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
363		#' grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
364		#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90)
365		#'
366		#' ## Edge case: Same proportion of response in A and B.
367		#' rsp <- c(TRUE, FALSE, TRUE, FALSE)
368		#' grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
369		#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6)
370		#'
371		#' @export
372		prop_diff_ha <- function(rsp,
373		grp,
374		conf_level) {
375	3x	grp <- as_factor_keep_attributes(grp)
376	3x	check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)
377
378	3x	tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
379		# x1 and n1 are non-reference groups.
380	3x	ci <- desctools_binom(
381	3x	x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
382	3x	x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
383	3x	conf.level = conf_level,
384	3x	method = "ha"
385		)
386	3x	list(
387	3x	"diff" = unname(ci[, "est"]),
388	3x	"diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
389		)
390		}
391
392		#' @describeIn h_prop_diff Newcombe confidence interval. It is based on
393		#' the Wilson score confidence interval for a single binomial proportion.
394		#'
395		#' @examples
396		#' # Newcombe confidence interval
397		#'
398		#' set.seed(1)
399		#' rsp <- c(
400		#' sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE),
401		#' sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE)
402		#' )
403		#' grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
404		#' table(rsp, grp)
405		#' prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9)
406		#'
407		#' @export
408		prop_diff_nc <- function(rsp,
409		grp,
410		conf_level,
411		correct = FALSE) {
412	1x	if (isTRUE(correct)) {
413	!	mthd <- "scorecc"
414		} else {
415	1x	mthd <- "score"
416		}
417	1x	grp <- as_factor_keep_attributes(grp)
418	1x	check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)
419
420	1x	p_grp <- tapply(rsp, grp, mean)
421	1x	diff_p <- unname(diff(p_grp))
422	1x	tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
423	1x	ci <- desctools_binom(
424		# x1 and n1 are non-reference groups.
425	1x	x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
426	1x	x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
427	1x	conf.level = conf_level,
428	1x	method = mthd
429		)
430	1x	list(
431	1x	"diff" = unname(ci[, "est"]),
432	1x	"diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
433		)
434		}
435
436		#' @describeIn h_prop_diff Calculates the weighted difference. This is defined as the difference in
437		#' response rates between the experimental treatment group and the control treatment group, adjusted
438		#' for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. For the CMH chi-squared
439		#' test, use [stats::mantelhaen.test()].
440		#'
441		#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
442		#'
443		#' @examples
444		#' # Cochran-Mantel-Haenszel confidence interval
445		#'
446		#' set.seed(2)
447		#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
448		#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
449		#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
450		#' strata_data <- data.frame(
451		#' "f1" = sample(c("a", "b"), 100, TRUE),
452		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
453		#' stringsAsFactors = TRUE
454		#' )
455		#'
456		#' prop_diff_cmh(
457		#' rsp = rsp, grp = grp, strata = interaction(strata_data),
458		#' conf_level = 0.90
459		#' )
460		#'
461		#' @export
462		prop_diff_cmh <- function(rsp,
463		grp,
464		strata,
465		conf_level = 0.95) {
466	7x	grp <- as_factor_keep_attributes(grp)
467	7x	strata <- as_factor_keep_attributes(strata)
468	7x	check_diff_prop_ci(
469	7x	rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
470		)
471
472	7x	if (any(tapply(rsp, strata, length) < 5)) {
473	!	warning("Less than 5 observations in some strata.")
474		}
475
476		# first dimension: FALSE, TRUE
477		# 2nd dimension: CONTROL, TX
478		# 3rd dimension: levels of strat
479		# rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
480	7x	t_tbl <- table(
481	7x	factor(rsp, levels = c("FALSE", "TRUE")),
482	7x	grp,
483	7x	strata
484		)
485	7x	n1 <- colSums(t_tbl[1:2, 1, ])
486	7x	n2 <- colSums(t_tbl[1:2, 2, ])
487	7x	p1 <- t_tbl[2, 1, ] / n1
488	7x	p2 <- t_tbl[2, 2, ] / n2
489		# CMH weights
490	7x	use_stratum <- (n1 > 0) & (n2 > 0)
491	7x	n1 <- n1[use_stratum]
492	7x	n2 <- n2[use_stratum]
493	7x	p1 <- p1[use_stratum]
494	7x	p2 <- p2[use_stratum]
495	7x	wt <- (n1 * n2 / (n1 + n2))
496	7x	wt_normalized <- wt / sum(wt)
497	7x	est1 <- sum(wt_normalized * p1)
498	7x	est2 <- sum(wt_normalized * p2)
499	7x	estimate <- c(est1, est2)
500	7x	names(estimate) <- levels(grp)
501	7x	se1 <- sqrt(sum(wt_normalized^2 * p1 * (1 - p1) / n1))
502	7x	se2 <- sqrt(sum(wt_normalized^2 * p2 * (1 - p2) / n2))
503	7x	z <- stats::qnorm((1 + conf_level) / 2)
504	7x	err1 <- z * se1
505	7x	err2 <- z * se2
506	7x	ci1 <- c((est1 - err1), (est1 + err1))
507	7x	ci2 <- c((est2 - err2), (est2 + err2))
508	7x	estimate_ci <- list(ci1, ci2)
509	7x	names(estimate_ci) <- levels(grp)
510	7x	diff_est <- est2 - est1
511	7x	se_diff <- sqrt(sum(((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2)) * wt_normalized^2))
512	7x	diff_ci <- c(diff_est - z * se_diff, diff_est + z * se_diff)
513
514	7x	list(
515	7x	prop = estimate,
516	7x	prop_ci = estimate_ci,
517	7x	diff = diff_est,
518	7x	diff_ci = diff_ci,
519	7x	weights = wt_normalized,
520	7x	n1 = n1,
521	7x	n2 = n2
522		)
523		}
524
525		#' @describeIn h_prop_diff Calculates the stratified Newcombe confidence interval and difference in response
526		#' rates between the experimental treatment group and the control treatment group, adjusted for stratification
527		#' factors. This implementation follows closely the one proposed by \insertCite{Yan2010-jt;textual}{tern}.
528		#' Weights can be estimated from the heuristic proposed in [prop_strat_wilson()] or from CMH-derived weights
529		#' (see [prop_diff_cmh()]).
530		#'
531		#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
532		#' @param weights_method (`string`)\cr weights method. Can be either `"cmh"` or `"heuristic"`
533		#' and directs the way weights are estimated.
534		#'
535		#' @references
536		#' - \insertRef{Yan2010-jt}{tern}
537		#'
538		#' @examples
539		#' # Stratified Newcombe confidence interval
540		#'
541		#' set.seed(2)
542		#' data_set <- data.frame(
543		#' "rsp" = sample(c(TRUE, FALSE), 100, TRUE),
544		#' "f1" = sample(c("a", "b"), 100, TRUE),
545		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
546		#' "grp" = sample(c("Placebo", "Treatment"), 100, TRUE),
547		#' stringsAsFactors = TRUE
548		#' )
549		#'
550		#' prop_diff_strat_nc(
551		#' rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
552		#' weights_method = "cmh",
553		#' conf_level = 0.90
554		#' )
555		#'
556		#' prop_diff_strat_nc(
557		#' rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
558		#' weights_method = "wilson_h",
559		#' conf_level = 0.90
560		#' )
561		#'
562		#' @export
563		prop_diff_strat_nc <- function(rsp,
564		grp,
565		strata,
566		weights_method = c("cmh", "wilson_h"),
567		conf_level = 0.95,
568		correct = FALSE) {
569	4x	weights_method <- match.arg(weights_method)
570	4x	grp <- as_factor_keep_attributes(grp)
571	4x	strata <- as_factor_keep_attributes(strata)
572	4x	check_diff_prop_ci(
573	4x	rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
574		)
575	4x	checkmate::assert_number(conf_level, lower = 0, upper = 1)
576	4x	checkmate::assert_flag(correct)
577	4x	if (any(tapply(rsp, strata, length) < 5)) {
578	!	warning("Less than 5 observations in some strata.")
579		}
580
581	4x	rsp_by_grp <- split(rsp, f = grp)
582	4x	strata_by_grp <- split(strata, f = grp)
583
584		# Finding the weights
585	4x	weights <- if (identical(weights_method, "cmh")) {
586	3x	prop_diff_cmh(rsp = rsp, grp = grp, strata = strata)$weights
587	4x	} else if (identical(weights_method, "wilson_h")) {
588	1x	prop_strat_wilson(rsp, strata, conf_level = conf_level, correct = correct)$weights
589		}
590	4x	weights[levels(strata)[!levels(strata) %in% names(weights)]] <- 0
591
592		# Calculating lower (`l`) and upper (`u`) confidence bounds per group.
593	4x	strat_wilson_by_grp <- Map(
594	4x	prop_strat_wilson,
595	4x	rsp = rsp_by_grp,
596	4x	strata = strata_by_grp,
597	4x	weights = list(weights, weights),
598	4x	conf_level = conf_level,
599	4x	correct = correct
600		)
601
602	4x	ci_ref <- strat_wilson_by_grp[[1]]
603	4x	ci_trt <- strat_wilson_by_grp[[2]]
604	4x	l_ref <- as.numeric(ci_ref$conf_int[1])
605	4x	u_ref <- as.numeric(ci_ref$conf_int[2])
606	4x	l_trt <- as.numeric(ci_trt$conf_int[1])
607	4x	u_trt <- as.numeric(ci_trt$conf_int[2])
608
609		# Estimating the diff and n_ref, n_trt (it allows different weights to be used)
610	4x	t_tbl <- table(
611	4x	factor(rsp, levels = c("FALSE", "TRUE")),
612	4x	grp,
613	4x	strata
614		)
615	4x	n_ref <- colSums(t_tbl[1:2, 1, ])
616	4x	n_trt <- colSums(t_tbl[1:2, 2, ])
617	4x	use_stratum <- (n_ref > 0) & (n_trt > 0)
618	4x	n_ref <- n_ref[use_stratum]
619	4x	n_trt <- n_trt[use_stratum]
620	4x	p_ref <- t_tbl[2, 1, use_stratum] / n_ref
621	4x	p_trt <- t_tbl[2, 2, use_stratum] / n_trt
622	4x	est1 <- sum(weights * p_ref)
623	4x	est2 <- sum(weights * p_trt)
624	4x	diff_est <- est2 - est1
625
626	4x	lambda1 <- sum(weights^2 / n_ref)
627	4x	lambda2 <- sum(weights^2 / n_trt)
628	4x	z <- stats::qnorm((1 + conf_level) / 2)
629
630	4x	lower <- diff_est - z * sqrt(lambda2 * l_trt * (1 - l_trt) + lambda1 * u_ref * (1 - u_ref))
631	4x	upper <- diff_est + z * sqrt(lambda1 * l_ref * (1 - l_ref) + lambda2 * u_trt * (1 - u_trt))
632
633	4x	list(
634	4x	"diff" = diff_est,
635	4x	"diff_ci" = c("lower" = lower, "upper" = upper)
636		)
637		}

1		#' Line plot with the optional table
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Line plot with the optional table.
6		#'
7		#' @param df (`data.frame`)\cr data set containing all analysis variables.
8		#' @param alt_counts_df (`data.frame` or `NULL`)\cr data set that will be used (only) to counts objects in strata.
9		#' @param variables (named `character` vector) of variable names in `df` data set. Details are:
10		#' * `x` (`character`)\cr name of x-axis variable.
11		#' * `y` (`character`)\cr name of y-axis variable.
12		#' * `strata` (`character`)\cr name of grouping variable, i.e. treatment arm. Can be `NA` to indicate lack of groups.
13		#' * `paramcd` (`character`)\cr name of the variable for parameter's code. Used for y-axis label and plot's subtitle.
14		#' Can be `NA` if paramcd is not to be added to the y-axis label or subtitle.
15		#' * `y_unit` (`character`)\cr name of variable with units of `y`. Used for y-axis label and plot's subtitle.
16		#' Can be `NA` if y unit is not to be added to the y-axis label or subtitle.
17		#' @param mid (`character` or `NULL`)\cr names of the statistics that will be plotted as midpoints.
18		#' All the statistics indicated in `mid` variable must be present in the object returned by `sfun`,
19		#' and be of a `double` or `numeric` type vector of length one.
20		#' @param interval (`character` or `NULL`)\cr names of the statistics that will be plotted as intervals.
21		#' All the statistics indicated in `interval` variable must be present in the object returned by `sfun`,
22		#' and be of a `double` or `numeric` type vector of length two.
23		#' @param whiskers (`character`)\cr names of the interval whiskers that will be plotted. Must match the `names`
24		#' attribute of the `interval` element in the list returned by `sfun`. It is possible to specify one whisker only,
25		#' lower or upper.
26		#' @param table (`character` or `NULL`)\cr names of the statistics that will be displayed in the table below the plot.
27		#' All the statistics indicated in `table` variable must be present in the object returned by `sfun`.
28		#' @param sfun (`closure`)\cr the function to compute the values of required statistics. It must return a named `list`
29		#' with atomic vectors. The names of the `list` elements refer to the names of the statistics and are used by `mid`,
30		#' `interval`, `table`. It must be able to accept as input a vector with data for which statistics are computed.
31		#' @param ... optional arguments to `sfun`.
32		#' @param mid_type (`character`)\cr controls the type of the `mid` plot, it can be point (`p`), line (`l`),
33		#' or point and line (`pl`).
34		#' @param mid_point_size (`integer` or `double`)\cr controls the font size of the point for `mid` plot.
35		#' @param position (`character` or `call`)\cr geom element position adjustment, either as a string, or the result of
36		#' a call to a position adjustment function.
37		#' @param legend_title (`character` string)\cr legend title.
38		#' @param legend_position (`character`)\cr the position of the plot legend (`none`, `left`, `right`, `bottom`, `top`,
39		#' or two-element numeric vector).
40		#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot.
41		#' @param y_lab (`character`)\cr y-axis label. If equal to `NULL`, then no label will be added.
42		#' @param y_lab_add_paramcd (`logical`)\cr should paramcd, i.e. `unique(df[[variables["paramcd"]]])` be added to the
43		#' y-axis label `y_lab`?
44		#' @param y_lab_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the y-axis
45		#' label `y_lab`?
46		#' @param title (`character`)\cr plot title.
47		#' @param subtitle (`character`)\cr plot subtitle.
48		#' @param subtitle_add_paramcd (`logical`)\cr should paramcd, i.e. `unique(df[[variables["paramcd"]]])` be added to
49		#' the plot's subtitle `subtitle`?
50		#' @param subtitle_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the
51		#' plot's subtitle `subtitle`?
52		#' @param caption (`character`)\cr optional caption below the plot.
53		#' @param table_format (named `character` or `NULL`)\cr format patterns for descriptive statistics used in the
54		#' (optional) table appended to the plot. It is passed directly to the `h_format_row` function through the `format`
55		#' parameter. Names of `table_format` must match the names of statistics returned by `sfun` function.
56		#' @param table_labels (named `character` or `NULL`)\cr labels for descriptive statistics used in the (optional) table
57		#' appended to the plot. Names of `table_labels` must match the names of statistics returned by `sfun` function.
58		#' @param table_font_size (`integer` or `double`)\cr controls the font size of values in the table.
59		#' @param newpage (`logical`)\cr should plot be drawn on new page?
60		#' @param col (`character`)\cr colors.
61		#'
62		#' @return A `ggplot` line plot (and statistics table if applicable).
63		#'
64		#' @examples
65		#' library(nestcolor)
66		#'
67		#' adsl <- tern_ex_adsl
68		#' adlb <- tern_ex_adlb %>% dplyr::filter(ANL01FL == "Y", PARAMCD == "ALT", AVISIT != "SCREENING")
69		#' adlb$AVISIT <- droplevels(adlb$AVISIT)
70		#' adlb <- dplyr::mutate(adlb, AVISIT = forcats::fct_reorder(AVISIT, AVISITN, min))
71		#'
72		#' # Mean with CI
73		#' g_lineplot(adlb, adsl, subtitle = "Laboratory Test:")
74		#'
75		#' # Mean with CI, no stratification
76		#' g_lineplot(adlb, variables = control_lineplot_vars(strata = NA))
77		#'
78		#' # Mean, upper whisker of CI, no strata counts N
79		#' g_lineplot(
80		#' adlb,
81		#' whiskers = "mean_ci_upr",
82		#' title = "Plot of Mean and Upper 95% Confidence Limit by Visit"
83		#' )
84		#'
85		#' # Median with CI
86		#' g_lineplot(
87		#' adlb,
88		#' adsl,
89		#' mid = "median",
90		#' interval = "median_ci",
91		#' whiskers = c("median_ci_lwr", "median_ci_upr"),
92		#' title = "Plot of Median and 95% Confidence Limits by Visit"
93		#' )
94		#'
95		#' # Mean, +/- SD
96		#' g_lineplot(adlb, adsl,
97		#' interval = "mean_sdi",
98		#' whiskers = c("mean_sdi_lwr", "mean_sdi_upr"),
99		#' title = "Plot of Median +/- SD by Visit"
100		#' )
101		#'
102		#' # Mean with CI plot with stats table
103		#' g_lineplot(adlb, adsl, table = c("n", "mean", "mean_ci"))
104		#'
105		#' # Mean with CI, table and customized confidence level
106		#' g_lineplot(
107		#' adlb,
108		#' adsl,
109		#' table = c("n", "mean", "mean_ci"),
110		#' control = control_summarize_vars(conf_level = 0.80),
111		#' title = "Plot of Mean and 80% Confidence Limits by Visit"
112		#' )
113		#'
114		#' # Mean with CI, table, filtered data
115		#' adlb_f <- dplyr::filter(adlb, ARMCD != "ARM A" \| AVISIT == "BASELINE")
116		#' g_lineplot(adlb_f, table = c("n", "mean"))
117		#'
118		#' @export
119		g_lineplot <- function(df,
120		alt_counts_df = NULL,
121		variables = control_lineplot_vars(),
122		mid = "mean",
123		interval = "mean_ci",
124		whiskers = c("mean_ci_lwr", "mean_ci_upr"),
125		table = NULL,
126		sfun = tern::s_summary,
127		...,
128		mid_type = "pl",
129		mid_point_size = 2,
130		position = ggplot2::position_dodge(width = 0.4),
131		legend_title = NULL,
132		legend_position = "bottom",
133		ggtheme = nestcolor::theme_nest(),
134		y_lab = NULL,
135		y_lab_add_paramcd = TRUE,
136		y_lab_add_unit = TRUE,
137		title = "Plot of Mean and 95% Confidence Limits by Visit",
138		subtitle = "",
139		subtitle_add_paramcd = TRUE,
140		subtitle_add_unit = TRUE,
141		caption = NULL,
142		table_format = summary_formats(),
143		table_labels = summary_labels(),
144		table_font_size = 3,
145		newpage = TRUE,
146		col = NULL) {
147	2x	checkmate::assert_character(variables, any.missing = TRUE)
148	2x	checkmate::assert_character(mid, null.ok = TRUE)
149	2x	checkmate::assert_character(interval, null.ok = TRUE)
150	2x	checkmate::assert_character(col, null.ok = TRUE)
151
152	2x	checkmate::assert_string(title, null.ok = TRUE)
153	2x	checkmate::assert_string(subtitle, null.ok = TRUE)
154
155	2x	if (is.character(interval)) {
156	2x	checkmate::assert_vector(whiskers, min.len = 0, max.len = 2)
157		}
158
159	2x	if (length(whiskers) == 1) {
160	!	checkmate::assert_character(mid)
161		}
162
163	2x	if (is.character(mid)) {
164	2x	checkmate::assert_scalar(mid_type)
165	2x	checkmate::assert_subset(mid_type, c("pl", "p", "l"))
166		}
167
168	2x	x <- variables[["x"]]
169	2x	y <- variables[["y"]]
170	2x	paramcd <- variables["paramcd"] # NA if paramcd == NA or it is not in variables
171	2x	y_unit <- variables["y_unit"] # NA if y_unit == NA or it is not in variables
172	2x	if (is.na(variables["strata"])) {
173	!	strata <- NULL # NULL if strata == NA or it is not in variables
174		} else {
175	2x	strata <- variables[["strata"]]
176		}
177	2x	checkmate::assert_flag(y_lab_add_paramcd, null.ok = TRUE)
178	2x	checkmate::assert_flag(subtitle_add_paramcd, null.ok = TRUE)
179	2x	if ((!is.null(y_lab) && y_lab_add_paramcd) \|\| (!is.null(subtitle) && subtitle_add_paramcd)) {
180	2x	checkmate::assert_false(is.na(paramcd))
181	2x	checkmate::assert_scalar(unique(df[[paramcd]]))
182		}
183
184	2x	checkmate::assert_flag(y_lab_add_unit, null.ok = TRUE)
185	2x	checkmate::assert_flag(subtitle_add_unit, null.ok = TRUE)
186	2x	if ((!is.null(y_lab) && y_lab_add_unit) \|\| (!is.null(subtitle) && subtitle_add_unit)) {
187	2x	checkmate::assert_false(is.na(y_unit))
188	2x	checkmate::assert_scalar(unique(df[[y_unit]]))
189		}
190
191	2x	if (!is.null(strata) && !is.null(alt_counts_df)) {
192	2x	checkmate::assert_set_equal(unique(alt_counts_df[[strata]]), unique(df[[strata]]))
193		}
194
195		####################################### \|
196		# ---- Compute required statistics ----
197		####################################### \|
198	2x	if (!is.null(strata)) {
199	2x	df_grp <- tidyr::expand(df, .data[[strata]], .data[[x]]) # expand based on levels of factors
200		} else {
201	!	df_grp <- tidyr::expand(df, NULL, .data[[x]])
202		}
203	2x	df_grp <- df_grp %>%
204	2x	dplyr::full_join(y = df[, c(strata, x, y)], by = c(strata, x), multiple = "all") %>%
205	2x	dplyr::group_by_at(c(strata, x))
206
207	2x	df_stats <- df_grp %>%
208	2x	dplyr::summarise(
209	2x	data.frame(t(do.call(c, unname(sfun(.data[[y]], ...)[c(mid, interval)])))),
210	2x	.groups = "drop"
211		)
212
213	2x	df_stats <- df_stats %>% dplyr::filter(!is.na(mid))
214
215		# add number of objects N in strata
216	2x	if (!is.null(strata) && !is.null(alt_counts_df)) {
217	2x	strata_N <- paste0(strata, "_N") # nolint
218
219	2x	df_N <- as.data.frame(table(alt_counts_df[[strata]], exclude = c(NA, NaN, Inf))) # nolint
220	2x	colnames(df_N) <- c(strata, "N") # nolint
221	2x	df_N[[strata_N]] <- paste0(df_N[[strata]], " (N = ", df_N$N, ")") # nolint
222
223		# strata_N should not be in clonames(df_stats)
224	2x	checkmate::assert_disjunct(strata_N, colnames(df_stats))
225
226	2x	df_stats <- merge(x = df_stats, y = df_N[, c(strata, strata_N)], by = strata)
227	!	} else if (!is.null(strata)) {
228	!	strata_N <- strata # nolint
229		} else {
230	!	strata_N <- NULL # nolint
231		}
232
233		############################################### \|
234		# ---- Prepare certain plot's properties. ----
235		############################################### \|
236		# legend title
237	2x	if (is.null(legend_title) && !is.null(strata) && legend_position != "none") {
238	2x	legend_title <- attr(df[[strata]], "label")
239		}
240
241		# y label
242	2x	if (!is.null(y_lab)) {
243	1x	if (y_lab_add_paramcd) {
244	1x	y_lab <- paste(y_lab, unique(df[[paramcd]]))
245		}
246
247	1x	if (y_lab_add_unit) {
248	1x	y_lab <- paste0(y_lab, " (", unique(df[[y_unit]]), ")")
249		}
250
251	1x	y_lab <- trimws(y_lab)
252		}
253
254		# subtitle
255	2x	if (!is.null(subtitle)) {
256	2x	if (subtitle_add_paramcd) {
257	2x	subtitle <- paste(subtitle, unique(df[[paramcd]]))
258		}
259
260	2x	if (subtitle_add_unit) {
261	2x	subtitle <- paste0(subtitle, " (", unique(df[[y_unit]]), ")")
262		}
263
264	2x	subtitle <- trimws(subtitle)
265		}
266
267		############################### \|
268		# ---- Build plot object. ----
269		############################### \|
270	2x	p <- ggplot2::ggplot(
271	2x	data = df_stats,
272	2x	mapping = ggplot2::aes(
273	2x	x = .data[[x]], y = .data[[mid]],
274	2x	color = if (is.null(strata_N)) NULL else .data[[strata_N]],
275	2x	shape = if (is.null(strata_N)) NULL else .data[[strata_N]],
276	2x	lty = if (is.null(strata_N)) NULL else .data[[strata_N]],
277	2x	group = if (is.null(strata_N)) NULL else .data[[strata_N]]
278		)
279		)
280
281	2x	if (!is.null(mid)) {
282		# points
283	2x	if (grepl("p", mid_type, fixed = TRUE)) {
284	2x	p <- p + ggplot2::geom_point(position = position, size = mid_point_size, na.rm = TRUE)
285		}
286
287		# lines
288		# further conditions in if are to ensure that not all of the groups consist of only one observation
289	2x	if (grepl("l", mid_type, fixed = TRUE) &&
290	2x	!is.null(strata) &&
291	2x	!all(dplyr::summarise(df_grp, count_n = dplyr::n())[["count_n"]] == 1L)) {
292	2x	p <- p + ggplot2::geom_line(position = position, na.rm = TRUE)
293		}
294		}
295
296		# interval
297	2x	if (!is.null(interval)) {
298	2x	p <- p +
299	2x	ggplot2::geom_errorbar(
300	2x	ggplot2::aes(ymin = .data[[whiskers[1]]], ymax = .data[[whiskers[max(1, length(whiskers))]]]),
301	2x	width = 0.45,
302	2x	position = position
303		)
304
305	2x	if (length(whiskers) == 1) { # lwr or upr only; mid is then required
306		# workaround as geom_errorbar does not provide single-direction whiskers
307	!	p <- p +
308	!	ggplot2::geom_linerange(
309	!	data = df_stats[!is.na(df_stats[[whiskers]]), ], # as na.rm =TRUE does not suppress warnings
310	!	ggplot2::aes(ymin = .data[[mid]], ymax = .data[[whiskers]]),
311	!	position = position,
312	!	na.rm = TRUE,
313	!	show.legend = FALSE
314		)
315		}
316		}
317
318	2x	p <- p +
319	2x	ggplot2::scale_y_continuous(labels = scales::comma, expand = ggplot2::expansion(c(0.25, .25))) +
320	2x	ggplot2::labs(
321	2x	title = title,
322	2x	subtitle = subtitle,
323	2x	caption = caption,
324	2x	color = legend_title,
325	2x	lty = legend_title,
326	2x	shape = legend_title,
327	2x	x = attr(df[[x]], "label"),
328	2x	y = y_lab
329		)
330
331	2x	if (!is.null(col)) {
332	!	p <- p +
333	!	ggplot2::scale_color_manual(values = col)
334		}
335
336	2x	if (!is.null(ggtheme)) {
337	2x	p <- p + ggtheme
338		} else {
339	!	p <- p +
340	!	ggplot2::theme_bw() +
341	!	ggplot2::theme(
342	!	legend.key.width = grid::unit(1, "cm"),
343	!	legend.position = legend_position,
344	!	legend.direction = ifelse(
345	!	legend_position %in% c("top", "bottom"),
346	!	"horizontal",
347	!	"vertical"
348		)
349		)
350		}
351
352		############################################################# \|
353		# ---- Optionally, add table to the bottom of the plot. ----
354		############################################################# \|
355	2x	if (!is.null(table)) {
356	1x	df_stats_table <- df_grp %>%
357	1x	dplyr::summarise(
358	1x	h_format_row(
359	1x	x = sfun(.data[[y]], ...)[table],
360	1x	format = table_format,
361	1x	labels = table_labels
362		),
363	1x	.groups = "drop"
364		)
365
366	1x	stats_lev <- rev(setdiff(colnames(df_stats_table), c(strata, x)))
367
368	1x	df_stats_table <- df_stats_table %>%
369	1x	tidyr::pivot_longer(
370	1x	cols = -dplyr::all_of(c(strata, x)),
371	1x	names_to = "stat",
372	1x	values_to = "value",
373	1x	names_ptypes = list(stat = factor(levels = stats_lev))
374		)
375
376	1x	tbl <- ggplot2::ggplot(
377	1x	df_stats_table,
378	1x	ggplot2::aes(x = .data[[x]], y = .data[["stat"]], label = .data[["value"]])
379		) +
380	1x	ggplot2::geom_text(size = table_font_size) +
381	1x	ggplot2::theme_bw() +
382	1x	ggplot2::theme(
383	1x	panel.border = ggplot2::element_blank(),
384	1x	panel.grid.major = ggplot2::element_blank(),
385	1x	panel.grid.minor = ggplot2::element_blank(),
386	1x	axis.ticks = ggplot2::element_blank(),
387	1x	axis.title = ggplot2::element_blank(),
388	1x	axis.text.x = ggplot2::element_blank(),
389	1x	axis.text.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 5)),
390	1x	strip.text = ggplot2::element_text(hjust = 0),
391	1x	strip.text.x = ggplot2::element_text(margin = ggplot2::margin(1.5, 0, 1.5, 0, "pt")),
392	1x	strip.background = ggplot2::element_rect(fill = "grey95", color = NA),
393	1x	legend.position = "none"
394		)
395
396	1x	if (!is.null(strata)) {
397	1x	tbl <- tbl + ggplot2::facet_wrap(facets = strata, ncol = 1)
398		}
399
400		# align plot and table
401	1x	cowplot::plot_grid(p, tbl, ncol = 1)
402		} else {
403	1x	p
404		}
405		}
406
407		#' Helper function to get the right formatting in the optional table in g_lineplot.
408		#'
409		#' @description `r lifecycle::badge("stable")`
410		#'
411		#' @param x (named `list`)\cr list of numerical values to be formatted and optionally labeled.
412		#' Elements of `x` must be `numeric` vectors.
413		#' @param format (named `character` or `NULL`)\cr format patterns for `x`. Names of the `format` must
414		#' match the names of `x`. This parameter is passed directly to the `rtables::format_rcell`
415		#' function through the `format` parameter.
416		#' @param labels (named `character` or `NULL`)\cr optional labels for `x`. Names of the `labels` must
417		#' match the names of `x`. When a label is not specified for an element of `x`,
418		#' then this function tries to use `label` or `names` (in this order) attribute of that element
419		#' (depending on which one exists and it is not `NULL` or `NA` or `NaN`). If none of these attributes
420		#' are attached to a given element of `x`, then the label is automatically generated.
421		#'
422		#' @return A single row `data.frame` object.
423		#'
424		#' @examples
425		#' mean_ci <- c(48, 51)
426		#' x <- list(mean = 50, mean_ci = mean_ci)
427		#' format <- c(mean = "xx.x", mean_ci = "(xx.xx, xx.xx)")
428		#' labels <- c(mean = "My Mean")
429		#' h_format_row(x, format, labels)
430		#'
431		#' attr(mean_ci, "label") <- "Mean 95% CI"
432		#' x <- list(mean = 50, mean_ci = mean_ci)
433		#' h_format_row(x, format, labels)
434		#'
435		#' @export
436		h_format_row <- function(x, format, labels = NULL) {
437		# cell: one row, one column data.frame
438	19x	format_cell <- function(x, format, label = NULL) {
439	56x	fc <- format_rcell(x = x, format = format)
440	56x	if (is.na(fc)) {
441	!	fc <- "NA"
442		}
443	56x	x_label <- attr(x, "label")
444	56x	if (!is.null(label) && !is.na(label)) {
445	37x	names(fc) <- label
446	19x	} else if (!is.null(x_label) && !is.na(x_label)) {
447	18x	names(fc) <- x_label
448	1x	} else if (length(x) == length(fc)) {
449	!	names(fc) <- names(x)
450		}
451	56x	as.data.frame(t(fc))
452		}
453
454	19x	row <- do.call(
455	19x	cbind,
456	19x	lapply(
457	19x	names(x), function(xn) format_cell(x[[xn]], format = format[xn], label = labels[xn])
458		)
459		)
460
461	19x	row
462		}
463
464		#' Control Function for g_lineplot Function
465		#'
466		#' @description `r lifecycle::badge("stable")`
467		#'
468		#' Default values for `variables` parameter in `g_lineplot` function.
469		#' A variable's default value can be overwritten for any variable.
470		#'
471		#' @param x (`character`)\cr x variable name.
472		#' @param y (`character`)\cr y variable name.
473		#' @param strata (`character` or `NA`)\cr strata variable name.
474		#' @param paramcd (`character` or `NA`)\cr paramcd variable name.
475		#' @param y_unit (`character` or `NA`)\cr y_unit variable name.
476		#'
477		#' @return A named character vector of variable names.
478		#'
479		#' @examples
480		#' control_lineplot_vars()
481		#' control_lineplot_vars(strata = NA)
482		#'
483		#' @export
484		control_lineplot_vars <- function(x = "AVISIT", y = "AVAL", strata = "ARM", paramcd = "PARAMCD", y_unit = "AVALU") {
485	2x	checkmate::assert_string(x)
486	2x	checkmate::assert_string(y)
487	2x	checkmate::assert_string(strata, na.ok = TRUE)
488	2x	checkmate::assert_string(paramcd, na.ok = TRUE)
489	2x	checkmate::assert_string(y_unit, na.ok = TRUE)
490
491	2x	variables <- c(x = x, y = y, strata = strata, paramcd = paramcd, y_unit = y_unit)
492	2x	return(variables)
493		}

1		#' Confidence Interval for Mean
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
6		#' geometric mean. It can be used as a `ggplot` helper function for plotting.
7		#'
8		#' @inheritParams argument_convention
9		#' @param n_min (`number`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
10		#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
11		#' @param geom_mean (`logical`)\cr `TRUE` when the geometric mean should be calculated.
12		#'
13		#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
14		#'
15		#' @examples
16		#' stat_mean_ci(sample(10), gg_helper = FALSE)
17		#'
18		#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
19		#' ggplot2::geom_point()
20		#'
21		#' p + ggplot2::stat_summary(
22		#' fun.data = stat_mean_ci,
23		#' geom = "errorbar"
24		#' )
25		#'
26		#' p + ggplot2::stat_summary(
27		#' fun.data = stat_mean_ci,
28		#' fun.args = list(conf_level = 0.5),
29		#' geom = "errorbar"
30		#' )
31		#'
32		#' p + ggplot2::stat_summary(
33		#' fun.data = stat_mean_ci,
34		#' fun.args = list(conf_level = 0.5, geom_mean = TRUE),
35		#' geom = "errorbar"
36		#' )
37		#'
38		#' @export
39		stat_mean_ci <- function(x,
40		conf_level = 0.95,
41		na.rm = TRUE, # nolint
42		n_min = 2,
43		gg_helper = TRUE,
44		geom_mean = FALSE) {
45	298x	if (na.rm) {
46	2x	x <- stats::na.omit(x)
47		}
48	298x	n <- length(x)
49
50	298x	if (!geom_mean) {
51	150x	m <- mean(x)
52		} else {
53	148x	negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0)
54	148x	if (negative_values_exist) {
55	18x	m <- NA_real_
56		} else {
57	130x	x <- log(x)
58	130x	m <- mean(x)
59		}
60		}
61
62	298x	if (n < n_min \|\| is.na(m)) {
63	84x	ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_)
64		} else {
65	214x	hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n)
66	214x	ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci)
67	214x	if (geom_mean) {
68	101x	ci <- exp(ci)
69		}
70		}
71
72	298x	if (gg_helper) {
73	!	m <- ifelse(is.na(m), NA_real_, m)
74	!	ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]])
75		}
76
77	298x	return(ci)
78		}
79
80		#' Confidence Interval for Median
81		#'
82		#' @description `r lifecycle::badge("stable")`
83		#'
84		#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
85		#' function for plotting.
86		#'
87		#' @inheritParams argument_convention
88		#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
89		#'
90		#' @details The function was adapted from `DescTools/versions/0.99.35/source`
91		#'
92		#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
93		#'
94		#' @examples
95		#' stat_median_ci(sample(10), gg_helper = FALSE)
96		#'
97		#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
98		#' ggplot2::geom_point()
99		#' p + ggplot2::stat_summary(
100		#' fun.data = stat_median_ci,
101		#' geom = "errorbar"
102		#' )
103		#'
104		#' @export
105		stat_median_ci <- function(x,
106		conf_level = 0.95,
107		na.rm = TRUE, # nolint
108		gg_helper = TRUE) {
109	151x	x <- unname(x)
110	151x	if (na.rm) {
111	3x	x <- x[!is.na(x)]
112		}
113	151x	n <- length(x)
114	151x	med <- stats::median(x)
115
116	151x	k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE)
117
118		# k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
119	151x	if (k == 0 \|\| is.na(med)) {
120	69x	ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_)
121	69x	empir_conf_level <- NA_real_
122		} else {
123	82x	x_sort <- sort(x)
124	82x	ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1])
125	82x	empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5)
126		}
127
128	151x	if (gg_helper) {
129	!	ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]])
130		}
131
132	151x	attr(ci, "conf_level") <- empir_conf_level
133
134	151x	return(ci)
135		}
136
137		#' p-Value of the Mean
138		#'
139		#' @description `r lifecycle::badge("stable")`
140		#'
141		#' Convenient function for calculating the two-sided p-value of the mean.
142		#'
143		#' @inheritParams argument_convention
144		#' @param n_min (`numeric`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
145		#' @param test_mean (`numeric`)\cr mean value to test under the null hypothesis.
146		#'
147		#' @return A p-value.
148		#'
149		#' @examples
150		#' stat_mean_pval(sample(10))
151		#'
152		#' stat_mean_pval(rnorm(10), test_mean = 0.5)
153		#'
154		#' @export
155		stat_mean_pval <- function(x,
156		na.rm = TRUE, # nolint
157		n_min = 2,
158		test_mean = 0) {
159	152x	if (na.rm) {
160	4x	x <- stats::na.omit(x)
161		}
162	152x	n <- length(x)
163
164	152x	x_mean <- mean(x)
165	152x	x_sd <- stats::sd(x)
166
167	152x	if (n < n_min) {
168	36x	pv <- c(p_value = NA_real_)
169		} else {
170	116x	x_se <- stats::sd(x) / sqrt(n)
171	116x	ttest <- (x_mean - test_mean) / x_se
172	116x	pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1))
173		}
174
175	152x	return(pv)
176		}

1		#' Estimation of Proportions per Level of Factor
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Estimate the proportion along with confidence interval of a proportion
6		#' regarding the level of a factor.
7		#'
8		#' @inheritParams argument_convention
9		#'
10		#' @seealso Relevant description function [d_onco_rsp_label()].
11		#'
12		#' @name estimate_multinomial_rsp
13		NULL
14
15		#' Description of Standard Oncology Response
16		#'
17		#' @description `r lifecycle::badge("stable")`
18		#'
19		#' Describe the oncology response in a standard way.
20		#'
21		#' @param x (`character`)\cr the standard oncology code to be described.
22		#'
23		#' @return Response labels.
24		#'
25		#' @seealso [estimate_multinomial_rsp()]
26		#'
27		#' @examples
28		#' d_onco_rsp_label(
29		#' c("CR", "PR", "SD", "NON CR/PD", "PD", "NE", "Missing", "<Missing>", "NE/Missing")
30		#' )
31		#'
32		#' # Adding some values not considered in d_onco_rsp_label
33		#'
34		#' d_onco_rsp_label(
35		#' c("CR", "PR", "hello", "hi")
36		#' )
37		#'
38		#' @export
39		d_onco_rsp_label <- function(x) {
40	2x	x <- as.character(x)
41	2x	desc <- c(
42	2x	CR = "Complete Response (CR)",
43	2x	PR = "Partial Response (PR)",
44	2x	MR = "Minimal/Minor Response (MR)",
45	2x	MRD = "Minimal Residual Disease (MRD)",
46	2x	SD = "Stable Disease (SD)",
47	2x	PD = "Progressive Disease (PD)",
48	2x	`NON CR/PD` = "Non-CR or Non-PD (NON CR/PD)",
49	2x	NE = "Not Evaluable (NE)",
50	2x	`NE/Missing` = "Missing or unevaluable",
51	2x	Missing = "Missing",
52	2x	`NA` = "Not Applicable (NA)",
53	2x	ND = "Not Done (ND)"
54		)
55
56	2x	values_label <- vapply(
57	2x	X = x,
58	2x	FUN.VALUE = character(1),
59	2x	function(val) {
60	!	if (val %in% names(desc)) desc[val] else val
61		}
62		)
63
64	2x	return(factor(values_label, levels = c(intersect(desc, values_label), setdiff(values_label, desc))))
65		}
66
67		#' @describeIn estimate_multinomial_rsp Statistics function which feeds the length of `x` as number
68		#' of successes, and `.N_col` as total number of successes and failures into [s_proportion()].
69		#'
70		#' @return
71		#' * `s_length_proportion()` returns statistics from [s_proportion()].
72		#'
73		#' @examples
74		#' s_length_proportion(rep("CR", 10), .N_col = 100)
75		#' s_length_proportion(factor(character(0)), .N_col = 100)
76		#'
77		#' @export
78		s_length_proportion <- function(x,
79		.N_col, # nolint
80		...) {
81	4x	checkmate::assert_multi_class(x, classes = c("factor", "character"))
82	3x	checkmate::assert_vector(x, min.len = 0, max.len = .N_col)
83	2x	checkmate::assert_vector(unique(x), min.len = 0, max.len = 1)
84
85	1x	n_true <- length(x)
86	1x	n_false <- .N_col - n_true
87	1x	x_logical <- rep(c(TRUE, FALSE), c(n_true, n_false))
88	1x	s_proportion(df = x_logical, ...)
89		}
90
91		#' @describeIn estimate_multinomial_rsp Formatted analysis function which is used as `afun`
92		#' in `estimate_multinomial_response()`.
93		#'
94		#' @return
95		#' * `a_length_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
96		#'
97		#' @examples
98		#' a_length_proportion(rep("CR", 10), .N_col = 100)
99		#' a_length_proportion(factor(character(0)), .N_col = 100)
100		#'
101		#' @export
102		a_length_proportion <- make_afun(
103		s_length_proportion,
104		.formats = c(
105		n_prop = "xx (xx.x%)",
106		prop_ci = "(xx.xx, xx.xx)"
107		)
108		)
109
110		#' @describeIn estimate_multinomial_rsp Layout-creating function which can take statistics function arguments
111		#' and additional format arguments. This function is a wrapper for [rtables::analyze()] and
112		#' [rtables::summarize_row_groups()].
113		#'
114		#' @return
115		#' * `estimate_multinomial_response()` returns a layout object suitable for passing to further layouting functions,
116		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
117		#' the statistics from `s_length_proportion()` to the table layout.
118		#'
119		#' @examples
120		#' library(dplyr)
121		#'
122		#' # Use of the layout creating function.
123		#' dta_test <- data.frame(
124		#' USUBJID = paste0("S", 1:12),
125		#' ARM = factor(rep(LETTERS[1:3], each = 4)),
126		#' AVAL = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
127		#' ) %>% mutate(
128		#' AVALC = factor(AVAL,
129		#' levels = c(0, 1),
130		#' labels = c("Complete Response (CR)", "Partial Response (PR)")
131		#' )
132		#' )
133		#'
134		#' lyt <- basic_table() %>%
135		#' split_cols_by("ARM") %>%
136		#' estimate_multinomial_response(var = "AVALC")
137		#'
138		#' tbl <- build_table(lyt, dta_test)
139		#'
140		#' html <- as_html(tbl)
141		#' html
142		#' \dontrun{
143		#' Viewer(html)
144		#' }
145		#'
146		#' @export
147		estimate_multinomial_response <- function(lyt,
148		var,
149		...,
150		show_labels = "hidden",
151		table_names = var,
152		.stats = "prop_ci",
153		.formats = NULL,
154		.labels = NULL,
155		.indent_mods = NULL) {
156	1x	afun <- make_afun(
157	1x	a_length_proportion,
158	1x	.stats = .stats,
159	1x	.formats = .formats,
160	1x	.labels = .labels,
161	1x	.indent_mods = .indent_mods
162		)
163	1x	lyt <- split_rows_by(lyt, var = var)
164	1x	lyt <- summarize_row_groups(lyt)
165
166	1x	analyze(
167	1x	lyt,
168	1x	vars = var,
169	1x	afun = afun,
170	1x	show_labels = show_labels,
171	1x	table_names = table_names,
172	1x	extra_args = list(...)
173		)
174		}

1		#' Multivariate Logistic Regression Table
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
6		#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
7		#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
8		#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
9		#' category or specified values and corresponding Wald confidence intervals as default but allow user
10		#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
11		#' that covariate has no effect on response in model containing all specified covariates.
12		#' Allow option to include one two-way interaction and present similar output for
13		#' each interaction degree of freedom.
14		#'
15		#' @inheritParams argument_convention
16		#' @param drop_and_remove_str (`character`)\cr string to be dropped and removed.
17		#'
18		#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
19		#' Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
20		#'
21		#' @note For the formula, the variable names need to be standard `data.frame` column names without
22		#' special characters.
23		#'
24		#' @examples
25		#' library(dplyr)
26		#' library(broom)
27		#'
28		#' adrs_f <- tern_ex_adrs %>%
29		#' filter(PARAMCD == "BESRSPI") %>%
30		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
31		#' mutate(
32		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
33		#' RACE = factor(RACE),
34		#' SEX = factor(SEX)
35		#' )
36		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
37		#' mod1 <- fit_logistic(
38		#' data = adrs_f,
39		#' variables = list(
40		#' response = "Response",
41		#' arm = "ARMCD",
42		#' covariates = c("AGE", "RACE")
43		#' )
44		#' )
45		#' mod2 <- fit_logistic(
46		#' data = adrs_f,
47		#' variables = list(
48		#' response = "Response",
49		#' arm = "ARMCD",
50		#' covariates = c("AGE", "RACE"),
51		#' interaction = "AGE"
52		#' )
53		#' )
54		#'
55		#' df <- tidy(mod1, conf_level = 0.99)
56		#' df2 <- tidy(mod2, conf_level = 0.99)
57		#'
58		#' # flagging empty strings with "_"
59		#' df <- df_explicit_na(df, na_level = "_")
60		#' df2 <- df_explicit_na(df2, na_level = "_")
61		#'
62		#' result1 <- basic_table() %>%
63		#' summarize_logistic(
64		#' conf_level = 0.95,
65		#' drop_and_remove_str = "_"
66		#' ) %>%
67		#' build_table(df = df)
68		#' result1
69		#'
70		#' result2 <- basic_table() %>%
71		#' summarize_logistic(
72		#' conf_level = 0.95,
73		#' drop_and_remove_str = "_"
74		#' ) %>%
75		#' build_table(df = df2)
76		#' result2
77		#'
78		#' @export
79		summarize_logistic <- function(lyt,
80		conf_level,
81		drop_and_remove_str = "") {
82		# checks
83	3x	checkmate::assert_string(drop_and_remove_str)
84
85	3x	sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
86	3x	sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary")
87	3x	sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary")
88	3x	split_fun <- drop_and_remove_levels(drop_and_remove_str)
89
90	3x	lyt <- logistic_regression_cols(lyt, conf_level = conf_level)
91	3x	lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun)
92	3x	lyt <- sum_logistic_variable_test(lyt)
93	3x	lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun)
94	3x	lyt <- sum_logistic_term_estimates(lyt)
95	3x	lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun)
96	3x	lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun)
97	3x	lyt <- sum_logistic_odds_ratios(lyt)
98	3x	lyt
99		}
100
101		#' Fit for Logistic Regression
102		#'
103		#' @description `r lifecycle::badge("stable")`
104		#'
105		#' Fit a (conditional) logistic regression model.
106		#'
107		#' @inheritParams argument_convention
108		#' @param data (`data.frame`)\cr the data frame on which the model was fit.
109		#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
110		#' This will be used when fitting the (conditional) logistic regression model on the left hand
111		#' side of the formula.
112		#'
113		#' @return A fitted logistic regression model.
114		#'
115		#' @section Model Specification:
116		#'
117		#' The `variables` list needs to include the following elements:
118		#' * `arm`: Treatment arm variable name.
119		#' * `response`: The response arm variable name. Usually this is a 0/1 variable.
120		#' * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
121		#' * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
122		#' included in `covariates`. Then the interaction with the treatment arm is included in the model.
123		#'
124		#' @examples
125		#' library(dplyr)
126		#'
127		#' adrs_f <- tern_ex_adrs %>%
128		#' filter(PARAMCD == "BESRSPI") %>%
129		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
130		#' mutate(
131		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
132		#' RACE = factor(RACE),
133		#' SEX = factor(SEX)
134		#' )
135		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
136		#' mod1 <- fit_logistic(
137		#' data = adrs_f,
138		#' variables = list(
139		#' response = "Response",
140		#' arm = "ARMCD",
141		#' covariates = c("AGE", "RACE")
142		#' )
143		#' )
144		#' mod2 <- fit_logistic(
145		#' data = adrs_f,
146		#' variables = list(
147		#' response = "Response",
148		#' arm = "ARMCD",
149		#' covariates = c("AGE", "RACE"),
150		#' interaction = "AGE"
151		#' )
152		#' )
153		#'
154		#' @export
155		fit_logistic <- function(data,
156		variables = list(
157		response = "Response",
158		arm = "ARMCD",
159		covariates = NULL,
160		interaction = NULL,
161		strata = NULL
162		),
163		response_definition = "response") {
164	62x	assert_df_with_variables(data, variables)
165	62x	checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
166	62x	checkmate::assert_string(response_definition)
167	62x	checkmate::assert_true(grepl("response", response_definition))
168
169	62x	response_definition <- sub(
170	62x	pattern = "response",
171	62x	replacement = variables$response,
172	62x	x = response_definition,
173	62x	fixed = TRUE
174		)
175	62x	form <- paste0(response_definition, " ~ ", variables$arm)
176	62x	if (!is.null(variables$covariates)) {
177	28x	form <- paste0(form, " + ", paste(variables$covariates, collapse = " + "))
178		}
179	62x	if (!is.null(variables$interaction)) {
180	17x	checkmate::assert_string(variables$interaction)
181	17x	checkmate::assert_subset(variables$interaction, variables$covariates)
182	17x	form <- paste0(form, " + ", variables$arm, ":", variables$interaction)
183		}
184	62x	if (!is.null(variables$strata)) {
185	14x	strata_arg <- if (length(variables$strata) > 1) {
186	7x	paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
187		} else {
188	7x	variables$strata
189		}
190	14x	form <- paste0(form, "+ strata(", strata_arg, ")")
191		}
192	62x	formula <- stats::as.formula(form)
193	62x	if (is.null(variables$strata)) {
194	48x	stats::glm(
195	48x	formula = formula,
196	48x	data = data,
197	48x	family = stats::binomial("logit")
198		)
199		} else {
200	14x	clogit_with_tryCatch(
201	14x	formula = formula,
202	14x	data = data,
203	14x	x = TRUE
204		)
205		}
206		}
207
208		#' Custom Tidy Method for Binomial GLM Results
209		#'
210		#' @description `r lifecycle::badge("stable")`
211		#'
212		#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
213		#' with `binomial` family.
214		#'
215		#' @inheritParams argument_convention
216		#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise the median is used.
217		#' @param fit_glm logistic regression model fitted by [stats::glm()] with "binomial" family.
218		#'
219		#' @return A `data.frame` containing the tidied model.
220		#'
221		#' @method tidy glm
222		#'
223		#' @seealso [h_logistic_regression] for relevant helper functions.
224		#'
225		#' @examples
226		#' library(dplyr)
227		#' library(broom)
228		#'
229		#' adrs_f <- tern_ex_adrs %>%
230		#' filter(PARAMCD == "BESRSPI") %>%
231		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
232		#' mutate(
233		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
234		#' RACE = factor(RACE),
235		#' SEX = factor(SEX)
236		#' )
237		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
238		#' mod1 <- fit_logistic(
239		#' data = adrs_f,
240		#' variables = list(
241		#' response = "Response",
242		#' arm = "ARMCD",
243		#' covariates = c("AGE", "RACE")
244		#' )
245		#' )
246		#' mod2 <- fit_logistic(
247		#' data = adrs_f,
248		#' variables = list(
249		#' response = "Response",
250		#' arm = "ARMCD",
251		#' covariates = c("AGE", "RACE"),
252		#' interaction = "AGE"
253		#' )
254		#' )
255		#'
256		#' df <- tidy(mod1, conf_level = 0.99)
257		#' df2 <- tidy(mod2, conf_level = 0.99)
258		#'
259		#' @export
260		tidy.glm <- function(fit_glm, # nolint
261		conf_level = 0.95,
262		at = NULL) {
263	5x	checkmate::assert_class(fit_glm, "glm")
264	5x	checkmate::assert_set_equal(fit_glm$family$family, "binomial")
265
266	5x	terms_name <- attr(stats::terms(fit_glm), "term.labels")
267	5x	xs_class <- attr(fit_glm$terms, "dataClasses")
268	5x	interaction <- terms_name[which(!terms_name %in% names(xs_class))]
269	5x	df <- if (length(interaction) == 0) {
270	2x	h_logistic_simple_terms(
271	2x	x = terms_name,
272	2x	fit_glm = fit_glm,
273	2x	conf_level = conf_level
274		)
275		} else {
276	3x	h_logistic_inter_terms(
277	3x	x = terms_name,
278	3x	fit_glm = fit_glm,
279	3x	conf_level = conf_level,
280	3x	at = at
281		)
282		}
283	5x	for (var in c("variable", "term", "interaction", "reference")) {
284	20x	df[[var]] <- factor(df[[var]], levels = unique(df[[var]]))
285		}
286	5x	df
287		}
288
289		#' Logistic Regression Multivariate Column Layout Function
290		#'
291		#' @description `r lifecycle::badge("stable")`
292		#'
293		#' Layout-creating function which creates a multivariate column layout summarizing logistic
294		#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
295		#'
296		#' @inheritParams argument_convention
297		#'
298		#' @return A layout object suitable for passing to further layouting functions. Adding this
299		#' function to an `rtable` layout will split the table into columns corresponding to
300		#' statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
301		#'
302		#' @export
303		logistic_regression_cols <- function(lyt,
304		conf_level = 0.95) {
305	4x	vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
306	4x	var_labels <- c(
307	4x	df = "Degrees of Freedom",
308	4x	estimate = "Parameter Estimate",
309	4x	std_error = "Standard Error",
310	4x	odds_ratio = "Odds Ratio",
311	4x	ci = paste("Wald", f_conf_level(conf_level)),
312	4x	pvalue = "p-value"
313		)
314	4x	split_cols_by_multivar(
315	4x	lyt = lyt,
316	4x	vars = vars,
317	4x	varlabels = var_labels
318		)
319		}
320
321		#' Logistic Regression Summary Table Constructor Function
322		#'
323		#' @description `r lifecycle::badge("stable")`
324		#'
325		#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
326		#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
327		#'
328		#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
329		#' content function.
330		#'
331		#' @return A content function.
332		#'
333		#' @export
334		logistic_summary_by_flag <- function(flag_var) {
335	10x	checkmate::assert_string(flag_var)
336	10x	function(lyt) {
337	10x	cfun_list <- list(
338	10x	df = cfun_by_flag("df", flag_var, format = "xx."),
339	10x	estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx"),
340	10x	std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx"),
341	10x	odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99"),
342	10x	ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L)),
343	10x	pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx \| (<0.0001)")
344		)
345	10x	summarize_row_groups(
346	10x	lyt = lyt,
347	10x	cfun = cfun_list
348		)
349		}
350		}

1		#' Controls for Cox Regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Sets a list of parameters for Cox regression fit. Used internally.
6		#'
7		#' @inheritParams argument_convention
8		#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
9		#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied
10		#' treatment and candidate covariate. Note that for univariate models without treatment arm, and
11		#' multivariate models, no interaction can be used so that this needs to be `FALSE`.
12		#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`,
13		#' see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R.
14		#'
15		#' @return A `list` of items with names corresponding to the arguments.
16		#'
17		#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()].
18		#'
19		#' @examples
20		#' control_coxreg()
21		#'
22		#' @export
23		control_coxreg <- function(pval_method = c("wald", "likelihood"),
24		ties = c("exact", "efron", "breslow"),
25		conf_level = 0.95,
26		interaction = FALSE) {
27	40x	pval_method <- match.arg(pval_method)
28	40x	ties <- match.arg(ties)
29	40x	checkmate::assert_flag(interaction)
30	40x	assert_proportion_value(conf_level)
31	40x	list(
32	40x	pval_method = pval_method,
33	40x	ties = ties,
34	40x	conf_level = conf_level,
35	40x	interaction = interaction
36		)
37		}
38
39		#' Custom Tidy Methods for Cox Regression
40		#'
41		#' @description `r lifecycle::badge("stable")`
42		#'
43		#' @inheritParams argument_convention
44		#' @param x (`list`)\cr Result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models)
45		#' or [fit_coxreg_multivar()] (for multivariate models).
46		#'
47		#' @return [tidy()] returns:
48		#' * For `summary.coxph` objects, a `data.frame` with columns: `Pr(>\|z\|)`, `exp(coef)`, `exp(-coef)`, `lower .95`,
49		#' `upper .95`, `level`, and `n`.
50		#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`,
51		#' `lcl`, `ucl`, `pval`, and `ci`.
52		#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`,
53		#' `level`, and `ci`.
54		#'
55		#' @seealso [cox_regression]
56		#'
57		#' @name tidy_coxreg
58		NULL
59
60		#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results.
61		#'
62		#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results.
63		#'
64		#' @method tidy summary.coxph
65		#'
66		#' @examples
67		#' library(survival)
68		#' library(broom)
69		#'
70		#' set.seed(1, kind = "Mersenne-Twister")
71		#'
72		#' dta_bladder <- with(
73		#' data = bladder[bladder$enum < 5, ],
74		#' data.frame(
75		#' time = stop,
76		#' status = event,
77		#' armcd = as.factor(rx),
78		#' covar1 = as.factor(enum),
79		#' covar2 = factor(
80		#' sample(as.factor(enum)),
81		#' levels = 1:4, labels = c("F", "F", "M", "M")
82		#' )
83		#' )
84		#' )
85		#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
86		#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
87		#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
88		#'
89		#' formula <- "survival::Surv(time, status) ~ armcd + covar1"
90		#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder))
91		#' tidy(msum)
92		#'
93		#' @export
94		tidy.summary.coxph <- function(x, # nolint
95		...) {
96	120x	checkmate::assert_class(x, "summary.coxph")
97	120x	pval <- x$coefficients
98	120x	confint <- x$conf.int
99	120x	levels <- rownames(pval)
100
101	120x	pval <- tibble::as_tibble(pval)
102	120x	confint <- tibble::as_tibble(confint)
103
104	120x	ret <- cbind(pval[, grepl("Pr", names(pval))], confint)
105	120x	ret$level <- levels
106	120x	ret$n <- x[["n"]]
107	120x	ret
108		}
109
110		#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression.
111		#'
112		#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()].
113		#'
114		#' @method tidy coxreg.univar
115		#'
116		#' @examples
117		#' ## Cox regression: arm + 1 covariate.
118		#' mod1 <- fit_coxreg_univar(
119		#' variables = list(
120		#' time = "time", event = "status", arm = "armcd",
121		#' covariates = "covar1"
122		#' ),
123		#' data = dta_bladder,
124		#' control = control_coxreg(conf_level = 0.91)
125		#' )
126		#'
127		#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
128		#' mod2 <- fit_coxreg_univar(
129		#' variables = list(
130		#' time = "time", event = "status", arm = "armcd",
131		#' covariates = c("covar1", "covar2")
132		#' ),
133		#' data = dta_bladder,
134		#' control = control_coxreg(conf_level = 0.91, interaction = TRUE)
135		#' )
136		#'
137		#' tidy(mod1)
138		#' tidy(mod2)
139		#'
140		#' @export
141		tidy.coxreg.univar <- function(x, # nolint
142		...) {
143	26x	checkmate::assert_class(x, "coxreg.univar")
144	26x	mod <- x$mod
145	26x	vars <- c(x$vars$arm, x$vars$covariates)
146	26x	has_arm <- "arm" %in% names(x$vars)
147
148	26x	result <- if (!has_arm) {
149	5x	Map(
150	5x	mod = mod, vars = vars,
151	5x	f = function(mod, vars) {
152	6x	h_coxreg_multivar_extract(
153	6x	var = vars,
154	6x	data = x$data,
155	6x	mod = mod,
156	6x	control = x$control
157		)
158		}
159		)
160	26x	} else if (x$control$interaction) {
161	8x	Map(
162	8x	mod = mod, covar = vars,
163	8x	f = function(mod, covar) {
164	17x	h_coxreg_extract_interaction(
165	17x	effect = x$vars$arm, covar = covar, mod = mod, data = x$data,
166	17x	at = x$at, control = x$control
167		)
168		}
169		)
170		} else {
171	13x	Map(
172	13x	mod = mod, vars = vars,
173	13x	f = function(mod, vars) {
174	34x	h_coxreg_univar_extract(
175	34x	effect = x$vars$arm, covar = vars, data = x$data, mod = mod,
176	34x	control = x$control
177		)
178		}
179		)
180		}
181	26x	result <- do.call(rbind, result)
182
183	26x	result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
184	26x	result$n <- lapply(result$n, empty_vector_if_na)
185	26x	result$ci <- lapply(result$ci, empty_vector_if_na)
186	26x	result$hr <- lapply(result$hr, empty_vector_if_na)
187	26x	if (x$control$interaction) {
188	8x	result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na)
189		# Remove interaction p-values due to change in specifications.
190	8x	result$pval[result$effect != "Treatment:"] <- NA
191		}
192	26x	result$pval <- lapply(result$pval, empty_vector_if_na)
193	26x	attr(result, "conf_level") <- x$control$conf_level
194	26x	result
195		}
196
197		#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression.
198		#'
199		#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()].
200		#'
201		#' @method tidy coxreg.multivar
202		#'
203		#' @examples
204		#' multivar_model <- fit_coxreg_multivar(
205		#' variables = list(
206		#' time = "time", event = "status", arm = "armcd",
207		#' covariates = c("covar1", "covar2")
208		#' ),
209		#' data = dta_bladder
210		#' )
211		#' broom::tidy(multivar_model)
212		#'
213		#' @export
214		tidy.coxreg.multivar <- function(x, # nolint
215		...) {
216	8x	checkmate::assert_class(x, "coxreg.multivar")
217	8x	vars <- c(x$vars$arm, x$vars$covariates)
218
219		# Convert the model summaries to data.
220	8x	result <- Map(
221	8x	vars = vars,
222	8x	f = function(vars) {
223	28x	h_coxreg_multivar_extract(
224	28x	var = vars, data = x$data,
225	28x	mod = x$mod, control = x$control
226		)
227		}
228		)
229	8x	result <- do.call(rbind, result)
230
231	8x	result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
232	8x	result$ci <- lapply(result$ci, empty_vector_if_na)
233	8x	result$hr <- lapply(result$hr, empty_vector_if_na)
234	8x	result$pval <- lapply(result$pval, empty_vector_if_na)
235	8x	result <- result[, names(result) != "n"]
236	8x	attr(result, "conf_level") <- x$control$conf_level
237
238	8x	result
239		}
240
241		#' Fits for Cox Proportional Hazards Regression
242		#'
243		#' @description `r lifecycle::badge("stable")`
244		#'
245		#' Fitting functions for univariate and multivariate Cox regression models.
246		#'
247		#' @param variables (`list`)\cr a named list corresponds to the names of variables found in `data`, passed as a named
248		#' list and corresponding to `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from
249		#' `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect
250		#' estimates will be tabulated later.
251		#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models.
252		#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify
253		#' the value of the covariate at which the effect should be estimated.
254		#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()].
255		#'
256		#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression].
257		#'
258		#' @examples
259		#' library(survival)
260		#'
261		#' set.seed(1, kind = "Mersenne-Twister")
262		#'
263		#' # Testing dataset [survival::bladder].
264		#' dta_bladder <- with(
265		#' data = bladder[bladder$enum < 5, ],
266		#' data.frame(
267		#' time = stop,
268		#' status = event,
269		#' armcd = as.factor(rx),
270		#' covar1 = as.factor(enum),
271		#' covar2 = factor(
272		#' sample(as.factor(enum)),
273		#' levels = 1:4, labels = c("F", "F", "M", "M")
274		#' )
275		#' )
276		#' )
277		#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
278		#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
279		#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
280		#'
281		#' plot(
282		#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
283		#' lty = 2:4,
284		#' xlab = "Months",
285		#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
286		#' )
287		#'
288		#' @name fit_coxreg
289		NULL
290
291		#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs.
292		#'
293		#' @return
294		#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list`
295		#' with 5 elements:
296		#' * `mod`: Cox regression models fitted by [survival::coxph()].
297		#' * `data`: The original data frame input.
298		#' * `control`: The original control input.
299		#' * `vars`: The variables used in the model.
300		#' * `at`: Value of the covariate at which the effect should be estimated.
301		#'
302		#' @note When using `fit_coxreg_univar` there should be two study arms.
303		#'
304		#' @examples
305		#' # fit_coxreg_univar
306		#'
307		#' ## Cox regression: arm + 1 covariate.
308		#' mod1 <- fit_coxreg_univar(
309		#' variables = list(
310		#' time = "time", event = "status", arm = "armcd",
311		#' covariates = "covar1"
312		#' ),
313		#' data = dta_bladder,
314		#' control = control_coxreg(conf_level = 0.91)
315		#' )
316		#'
317		#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
318		#' mod2 <- fit_coxreg_univar(
319		#' variables = list(
320		#' time = "time", event = "status", arm = "armcd",
321		#' covariates = c("covar1", "covar2")
322		#' ),
323		#' data = dta_bladder,
324		#' control = control_coxreg(conf_level = 0.91, interaction = TRUE)
325		#' )
326		#'
327		#' ## Cox regression: arm + 1 covariate, stratified analysis.
328		#' mod3 <- fit_coxreg_univar(
329		#' variables = list(
330		#' time = "time", event = "status", arm = "armcd", strata = "covar2",
331		#' covariates = c("covar1")
332		#' ),
333		#' data = dta_bladder,
334		#' control = control_coxreg(conf_level = 0.91)
335		#' )
336		#'
337		#' ## Cox regression: no arm, only covariates.
338		#' mod4 <- fit_coxreg_univar(
339		#' variables = list(
340		#' time = "time", event = "status",
341		#' covariates = c("covar1", "covar2")
342		#' ),
343		#' data = dta_bladder
344		#' )
345		#'
346		#' @export
347		fit_coxreg_univar <- function(variables,
348		data,
349		at = list(),
350		control = control_coxreg()) {
351	31x	checkmate::assert_list(variables, names = "named")
352	31x	has_arm <- "arm" %in% names(variables)
353	31x	arm_name <- if (has_arm) "arm" else NULL
354
355	31x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
356
357	31x	assert_df_with_variables(data, variables)
358	31x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
359
360	31x	if (!is.null(variables$strata)) {
361	4x	checkmate::assert_disjunct(control$pval_method, "likelihood")
362		}
363	30x	if (has_arm) {
364	24x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
365		}
366	29x	vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE)
367	29x	for (i in vars) {
368	66x	if (is.factor(data[[i]])) {
369	58x	attr(data[[i]], "levels") <- levels(droplevels(data[[i]]))
370		}
371		}
372	29x	forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction)
373	29x	mod <- lapply(
374	29x	forms, function(x) {
375	62x	survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties)
376		}
377		)
378	29x	structure(
379	29x	list(
380	29x	mod = mod,
381	29x	data = data,
382	29x	control = control,
383	29x	vars = variables,
384	29x	at = at
385		),
386	29x	class = "coxreg.univar"
387		)
388		}
389
390		#' @describeIn fit_coxreg Fit a multivariate Cox regression model.
391		#'
392		#' @return
393		#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list
394		#' with 4 elements:
395		#' * `mod`: Cox regression model fitted by [survival::coxph()].
396		#' * `data`: The original data frame input.
397		#' * `control`: The original control input.
398		#' * `vars`: The variables used in the model.
399		#'
400		#' @examples
401		#' # fit_coxreg_multivar
402		#'
403		#' ## Cox regression: multivariate Cox regression.
404		#' multivar_model <- fit_coxreg_multivar(
405		#' variables = list(
406		#' time = "time", event = "status", arm = "armcd",
407		#' covariates = c("covar1", "covar2")
408		#' ),
409		#' data = dta_bladder
410		#' )
411		#'
412		#' # Example without treatment arm.
413		#' multivar_covs_model <- fit_coxreg_multivar(
414		#' variables = list(
415		#' time = "time", event = "status",
416		#' covariates = c("covar1", "covar2")
417		#' ),
418		#' data = dta_bladder
419		#' )
420		#'
421		#' @export
422		fit_coxreg_multivar <- function(variables,
423		data,
424		control = control_coxreg()) {
425	51x	checkmate::assert_list(variables, names = "named")
426	51x	has_arm <- "arm" %in% names(variables)
427	51x	arm_name <- if (has_arm) "arm" else NULL
428
429	51x	if (!is.null(variables$covariates)) {
430	13x	checkmate::assert_character(variables$covariates)
431		}
432
433	51x	checkmate::assert_false(control$interaction)
434	51x	assert_df_with_variables(data, variables)
435	51x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
436
437	51x	if (!is.null(variables$strata)) {
438	3x	checkmate::assert_disjunct(control$pval_method, "likelihood")
439		}
440
441	50x	form <- h_coxreg_multivar_formula(variables)
442	50x	mod <- survival::coxph(
443	50x	formula = stats::as.formula(form),
444	50x	data = data,
445	50x	ties = control$ties
446		)
447	50x	structure(
448	50x	list(
449	50x	mod = mod,
450	50x	data = data,
451	50x	control = control,
452	50x	vars = variables
453		),
454	50x	class = "coxreg.multivar"
455		)
456		}
457
458		#' Muffled `car::Anova`
459		#'
460		#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when
461		#' present, this function deliberately muffles this message.
462		#'
463		#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
464		#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
465		#'
466		#' @return Returns the output of [car::Anova()], with convergence message muffled.
467		#'
468		#' @keywords internal
469		muffled_car_anova <- function(mod, test_statistic) {
470	134x	tryCatch(
471	134x	withCallingHandlers(
472	134x	expr = {
473	134x	car::Anova(
474	134x	mod,
475	134x	test.statistic = test_statistic,
476	134x	type = "III"
477		)
478		},
479	134x	message = function(m) invokeRestart("muffleMessage"),
480	134x	error = function(e) {
481	1x	stop(paste(
482	1x	"the model seems to have convergence problems, please try to change",
483	1x	"the configuration of covariates or strata variables, e.g.",
484	1x	"- original error:", e
485		))
486		}
487		)
488		)
489		}

1		#' Difference Test for Two Proportions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Various tests were implemented to test the difference between two proportions.
6		#'
7		#' @inheritParams argument_convention
8		#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
9		#'
10		#' @seealso [h_prop_diff_test]
11		#'
12		#' @name prop_diff_test
13		NULL
14
15		#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
16		#'
17		#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`, or `schouten`; specifies the test used
18		#' to calculate the p-value.
19		#'
20		#' @return
21		#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
22		#' describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
23		#'
24		#' @examples
25		#' # Statistics function
26		#' dta <- data.frame(
27		#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
28		#' grp = factor(rep(c("A", "B"), each = 50)),
29		#' strat = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
30		#' )
31		#'
32		#' # Internal function - s_test_proportion_diff
33		#' \dontrun{
34		#' s_test_proportion_diff(
35		#' df = subset(dta, grp == "A"),
36		#' .var = "rsp",
37		#' .ref_group = subset(dta, grp == "B"),
38		#' .in_ref_col = FALSE,
39		#' variables = list(strata = "strat"),
40		#' method = "cmh"
41		#' )
42		#' }
43		#'
44		#' @keywords internal
45		s_test_proportion_diff <- function(df,
46		.var,
47		.ref_group,
48		.in_ref_col,
49		variables = list(strata = NULL),
50		method = c("chisq", "schouten", "fisher", "cmh")) {
51	30x	method <- match.arg(method)
52	30x	y <- list(pval = "")
53
54	30x	if (!.in_ref_col) {
55	30x	assert_df_with_variables(df, list(rsp = .var))
56	30x	assert_df_with_variables(.ref_group, list(rsp = .var))
57	30x	rsp <- factor(
58	30x	c(.ref_group[[.var]], df[[.var]]),
59	30x	levels = c("TRUE", "FALSE")
60		)
61	30x	grp <- factor(
62	30x	rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
63	30x	levels = c("ref", "Not-ref")
64		)
65
66	30x	if (!is.null(variables$strata) \|\| method == "cmh") {
67	12x	strata <- variables$strata
68	12x	checkmate::assert_false(is.null(strata))
69	12x	strata_vars <- stats::setNames(as.list(strata), strata)
70	12x	assert_df_with_variables(df, strata_vars)
71	12x	assert_df_with_variables(.ref_group, strata_vars)
72	12x	strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
73		}
74
75	30x	tbl <- switch(method,
76	30x	cmh = table(grp, rsp, strata),
77	30x	table(grp, rsp)
78		)
79
80	30x	y$pval <- switch(method,
81	30x	chisq = prop_chisq(tbl),
82	30x	cmh = prop_cmh(tbl),
83	30x	fisher = prop_fisher(tbl),
84	30x	schouten = prop_schouten(tbl)
85		)
86		}
87
88	30x	y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method))
89	30x	y
90		}
91
92		#' Description of the Difference Test Between Two Proportions
93		#'
94		#' @description `r lifecycle::badge("stable")`
95		#'
96		#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
97		#'
98		#' @inheritParams s_test_proportion_diff
99		#'
100		#' @return `string` describing the test from which the p-value is derived.
101		#'
102		#' @export
103		d_test_proportion_diff <- function(method) {
104	41x	checkmate::assert_string(method)
105	41x	meth_part <- switch(method,
106	41x	"schouten" = "Chi-Squared Test with Schouten Correction",
107	41x	"chisq" = "Chi-Squared Test",
108	41x	"cmh" = "Cochran-Mantel-Haenszel Test",
109	41x	"fisher" = "Fisher's Exact Test",
110	41x	stop(paste(method, "does not have a description"))
111		)
112	41x	paste0("p-value (", meth_part, ")")
113		}
114
115		#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
116		#'
117		#' @return
118		#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
119		#'
120		#' @examples
121		#' # Internal function - a_test_proportion_diff
122		#' \dontrun{
123		#' a_test_proportion_diff(
124		#' df = subset(dta, grp == "A"),
125		#' .var = "rsp",
126		#' .ref_group = subset(dta, grp == "B"),
127		#' .in_ref_col = FALSE,
128		#' variables = list(strata = "strat"),
129		#' method = "cmh"
130		#' )
131		#' }
132		#'
133		#' @keywords internal
134		a_test_proportion_diff <- make_afun(
135		s_test_proportion_diff,
136		.formats = c(pval = "x.xxxx \| (<0.0001)"),
137		.indent_mods = c(pval = 1L)
138		)
139
140		#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
141		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
142		#'
143		#' @param ... other arguments are passed to [s_test_proportion_diff()].
144		#'
145		#' @return
146		#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
147		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
148		#' the statistics from `s_test_proportion_diff()` to the table layout.
149		#'
150		#' @examples
151		#' # With `rtables` pipelines.
152		#' l <- basic_table() %>%
153		#' split_cols_by(var = "grp", ref_group = "B") %>%
154		#' test_proportion_diff(
155		#' vars = "rsp",
156		#' method = "cmh", variables = list(strata = "strat")
157		#' )
158		#'
159		#' build_table(l, df = dta)
160		#'
161		#' @export
162		test_proportion_diff <- function(lyt,
163		vars,
164		...,
165		var_labels = vars,
166		show_labels = "hidden",
167		table_names = vars,
168		.stats = NULL,
169		.formats = NULL,
170		.labels = NULL,
171		.indent_mods = NULL) {
172	5x	afun <- make_afun(
173	5x	a_test_proportion_diff,
174	5x	.stats = .stats,
175	5x	.formats = .formats,
176	5x	.labels = .labels,
177	5x	.indent_mods = .indent_mods
178		)
179	5x	analyze(
180	5x	lyt,
181	5x	vars,
182	5x	afun = afun,
183	5x	var_labels = var_labels,
184	5x	extra_args = list(...),
185	5x	show_labels = show_labels,
186	5x	table_names = table_names
187		)
188		}
189
190		#' Helper Functions to Test Proportion Differences
191		#'
192		#' Helper functions to implement various tests on the difference between two proportions.
193		#'
194		#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
195		#'
196		#' @return A p-value.
197		#'
198		#' @seealso [prop_diff_test()] for implementation of these helper functions.
199		#'
200		#' @name h_prop_diff_test
201		NULL
202
203		#' @describeIn h_prop_diff_test performs Chi-Squared test. Internally calls [stats::prop.test()].
204		#'
205		#' @examples
206		#' # Non-stratified proportion difference test
207		#'
208		#' ## Data
209		#' A <- 20
210		#' B <- 20
211		#' set.seed(1)
212		#' rsp <- c(
213		#' sample(c(TRUE, FALSE), size = A, prob = c(3 / 4, 1 / 4), replace = TRUE),
214		#' sample(c(TRUE, FALSE), size = A, prob = c(1 / 2, 1 / 2), replace = TRUE)
215		#' )
216		#' grp <- c(rep("A", A), rep("B", B))
217		#' tbl <- table(grp, rsp)
218		#'
219		#' ## Chi-Squared test
220		#' # Internal function - prop_chisq
221		#' \dontrun{
222		#' prop_chisq(tbl)
223		#' }
224		#'
225		#' @keywords internal
226		prop_chisq <- function(tbl) {
227	23x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
228	23x	tbl <- tbl[, c("TRUE", "FALSE")]
229	23x	if (any(colSums(tbl) == 0)) {
230	2x	return(1)
231		}
232	21x	stats::prop.test(tbl, correct = FALSE)$p.value
233		}
234
235		#' @describeIn h_prop_diff_test performs stratified Cochran-Mantel-Haenszel test. Internally calls
236		#' [stats::mantelhaen.test()]. Note that strata with less than two observations are automatically discarded.
237		#'
238		#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
239		#' (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
240		#'
241		#' @examples
242		#' # Stratified proportion difference test
243		#'
244		#' ## Data
245		#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
246		#' grp <- factor(rep(c("A", "B"), each = 50))
247		#' strata <- factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
248		#' tbl <- table(grp, rsp, strata)
249		#'
250		#' ## Cochran-Mantel-Haenszel test
251		#' # Internal function - prop_cmh
252		#' \dontrun{
253		#' prop_cmh(tbl)
254		#' }
255		#'
256		#' @keywords internal
257		prop_cmh <- function(ary) {
258	16x	checkmate::assert_array(ary)
259	16x	checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
260	16x	checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
261	16x	strata_sizes <- apply(ary, MARGIN = 3, sum)
262	16x	if (any(strata_sizes < 5)) {
263	1x	warning("<5 data points in some strata. CMH test may be incorrect.")
264	1x	ary <- ary[, , strata_sizes > 1]
265		}
266
267	16x	stats::mantelhaen.test(ary, correct = FALSE)$p.value
268		}
269
270		#' @describeIn h_prop_diff_test performs the Chi-Squared test with Schouten correction.
271		#'
272		#' @seealso For information on the Schouten correction (Schouten, 1980),
273		#' visit https://onlinelibrary.wiley.com/doi/abs/10.1002/bimj.4710220305.
274		#'
275		#' @examples
276		#' ## Chi-Squared test + Schouten correction.
277		#' # Internal function - prop_schouten
278		#' \dontrun{
279		#' prop_schouten(tbl)
280		#' }
281		#'
282		#' @keywords internal
283		prop_schouten <- function(tbl) {
284	100x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
285	100x	tbl <- tbl[, c("TRUE", "FALSE")]
286	100x	if (any(colSums(tbl) == 0)) {
287	1x	return(1)
288		}
289
290	99x	n <- sum(tbl)
291	99x	n1 <- sum(tbl[1, ])
292	99x	n2 <- sum(tbl[2, ])
293
294	99x	ad <- diag(tbl)
295	99x	bc <- diag(apply(tbl, 2, rev))
296	99x	ac <- tbl[, 1]
297	99x	bd <- tbl[, 2]
298
299	99x	t_schouten <- (n - 1) *
300	99x	(abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 /
301	99x	(n1 * n2 * sum(ac) * sum(bd))
302
303	99x	1 - stats::pchisq(t_schouten, df = 1)
304		}
305
306		#' @describeIn h_prop_diff_test performs the Fisher's exact test. Internally calls [stats::fisher.test()].
307		#'
308		#' @examples
309		#' ## Fisher's exact test
310		#' # Internal function - prop_fisher
311		#' \dontrun{
312		#' prop_fisher(tbl)
313		#' }
314		#'
315		#' @keywords internal
316		prop_fisher <- function(tbl) {
317	2x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
318	2x	tbl <- tbl[, c("TRUE", "FALSE")]
319	2x	stats::fisher.test(tbl)$p.value
320		}

1		#' Combine Factor Levels
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Combine specified old factor Levels in a single new level.
6		#'
7		#' @param x factor
8		#' @param levels level names to be combined
9		#' @param new_level name of new level
10		#'
11		#' @return A `factor` with the new levels.
12		#'
13		#' @examples
14		#' x <- factor(letters[1:5], levels = letters[5:1])
15		#' combine_levels(x, levels = c("a", "b"))
16		#'
17		#' combine_levels(x, c("e", "b"))
18		#'
19		#' @export
20		combine_levels <- function(x, levels, new_level = paste(levels, collapse = "/")) {
21	4x	checkmate::assert_factor(x)
22	4x	checkmate::assert_subset(levels, levels(x))
23
24	4x	lvls <- levels(x)
25
26	4x	lvls[lvls %in% levels] <- new_level
27
28	4x	levels(x) <- lvls
29
30	4x	x
31		}
32
33		#' Conversion of a Vector to a Factor
34		#'
35		#' Converts `x` to a factor and keeps its attributes. Warns appropriately such that the user
36		#' can decide whether they prefer converting to factor manually (e.g. for full control of
37		#' factor levels).
38		#'
39		#' @param x (`atomic`)\cr object to convert.
40		#' @param x_name (`string`)\cr name of `x`.
41		#' @param na_level (`string`)\cr the explicit missing level which should be used when converting a character vector.
42		#' @param verbose defaults to `TRUE`. It prints out warnings and messages.
43		#'
44		#' @return A `factor` with same attributes (except class) as `x`. Does not modify `x` if already a `factor`.
45		#'
46		#' @examples
47		#' # Internal function - as_factor_keep_attributes
48		#' \dontrun{
49		#' as_factor_keep_attributes(formatters::with_label(c(1, 1, 2, 3), "id"), verbose = FALSE)
50		#' as_factor_keep_attributes(c("a", "b", ""), "id", verbose = FALSE)
51		#' }
52		#'
53		#' @keywords internal
54		as_factor_keep_attributes <- function(x,
55		x_name = deparse(substitute(x)),
56		na_level = "<Missing>",
57		verbose = TRUE) {
58	159x	checkmate::assert_atomic(x)
59	159x	checkmate::assert_string(x_name)
60	159x	checkmate::assert_string(na_level)
61	159x	checkmate::assert_flag(verbose)
62	159x	if (is.factor(x)) {
63	144x	return(x)
64		}
65	15x	x_class <- class(x)[1]
66	15x	if (verbose) {
67	15x	warning(paste(
68	15x	"automatically converting", x_class, "variable", x_name,
69	15x	"to factor, better manually convert to factor to avoid failures"
70		))
71		}
72	15x	if (identical(length(x), 0L)) {
73	1x	warning(paste(
74	1x	x_name, "has length 0, this can lead to tabulation failures, better convert to factor"
75		))
76		}
77	15x	if (is.character(x)) {
78	15x	x_no_na <- explicit_na(sas_na(x), label = na_level)
79	15x	if (any(na_level %in% x_no_na)) {
80	3x	do.call(
81	3x	structure,
82	3x	c(
83	3x	list(.Data = forcats::fct_relevel(x_no_na, na_level, after = Inf)),
84	3x	attributes(x)
85		)
86		)
87		} else {
88	12x	do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
89		}
90		} else {
91	!	do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
92		}
93		}
94
95		#' Labels for Bins in Percent
96		#'
97		#' This creates labels for quantile based bins in percent. This assumes the right-closed
98		#' intervals as produced by [cut_quantile_bins()].
99		#'
100		#' @param probs (`proportion` vector)\cr the probabilities identifying the quantiles.
101		#' This is a sorted vector of unique `proportion` values, i.e. between 0 and 1, where
102		#' the boundaries 0 and 1 must not be included.
103		#' @param digits (`integer`)\cr number of decimal places to round the percent numbers.
104		#'
105		#' @return A `character` vector with labels in the format `[0%,20%]`, `(20%,50%]`, etc.
106		#'
107		#' @examples
108		#' # Internal function - bins_percent_labels
109		#' \dontrun{
110		#' # Just pass the internal probability bounds, then 0 and 100% will be added automatically.
111		#' bins_percent_labels(c(0.2, 0.5))
112		#'
113		#' # Determine how to round.
114		#' bins_percent_labels(0.35224, digits = 1)
115		#'
116		#' # Passing an empty vector just gives a single bin 0-100%.
117		#' bins_percent_labels(c(0, 1))
118		#' }
119		#'
120		#' @keywords internal
121		bins_percent_labels <- function(probs,
122		digits = 0) {
123	1x	if (isFALSE(0 %in% probs)) probs <- c(0, probs)
124	1x	if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
125	8x	checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
126	8x	percent <- round(probs * 100, digits = digits)
127	8x	left <- paste0(utils::head(percent, -1), "%")
128	8x	right <- paste0(utils::tail(percent, -1), "%")
129	8x	without_left_bracket <- paste0(left, ",", right, "]")
130	8x	with_left_bracket <- paste0("[", utils::head(without_left_bracket, 1))
131	8x	if (length(without_left_bracket) > 1) {
132	6x	with_left_bracket <- c(
133	6x	with_left_bracket,
134	6x	paste0("(", utils::tail(without_left_bracket, -1))
135		)
136		}
137	8x	with_left_bracket
138		}
139
140		#' Cutting Numeric Vector into Empirical Quantile Bins
141		#'
142		#' @description `r lifecycle::badge("stable")`
143		#'
144		#' This cuts a numeric vector into sample quantile bins.
145		#'
146		#' @inheritParams bins_percent_labels
147		#' @param x (`numeric`)\cr the continuous variable values which should be cut into
148		#' quantile bins. This may contain `NA` values, which are then
149		#' not used for the quantile calculations, but included in the return vector.
150		#' @param labels (`character`)\cr the unique labels for the quantile bins. When there are `n`
151		#' probabilities in `probs`, then this must be `n + 1` long.
152		#' @param type (`integer`)\cr type of quantiles to use, see [stats::quantile()] for details.
153		#' @param ordered (`flag`)\cr should the result be an ordered factor.
154		#'
155		#' @return A `factor` variable with appropriately-labeled bins as levels.
156		#'
157		#' @note Intervals are closed on the right side. That is, the first bin is the interval
158		#' `[-Inf, q1]` where `q1` is the first quantile, the second bin is then `(q1, q2]`, etc.,
159		#' and the last bin is `(qn, +Inf]` where `qn` is the last quantile.
160		#'
161		#' @examples
162		#' # Default is to cut into quartile bins.
163		#' cut_quantile_bins(cars$speed)
164		#'
165		#' # Use custom quantiles.
166		#' cut_quantile_bins(cars$speed, probs = c(0.1, 0.2, 0.6, 0.88))
167		#'
168		#' # Use custom labels.
169		#' cut_quantile_bins(cars$speed, labels = paste0("Q", 1:4))
170		#'
171		#' # NAs are preserved in result factor.
172		#' ozone_binned <- cut_quantile_bins(airquality$Ozone)
173		#' which(is.na(ozone_binned))
174		#' # So you might want to make these explicit.
175		#' explicit_na(ozone_binned)
176		#'
177		#' @export
178		cut_quantile_bins <- function(x,
179		probs = c(0.25, 0.5, 0.75),
180		labels = NULL,
181		type = 7,
182		ordered = TRUE) {
183	8x	checkmate::assert_flag(ordered)
184	8x	checkmate::assert_numeric(x)
185	7x	if (isFALSE(0 %in% probs)) probs <- c(0, probs)
186	7x	if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
187	8x	checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
188	7x	if (is.null(labels)) labels <- bins_percent_labels(probs)
189	8x	checkmate::assert_character(labels, len = length(probs) - 1, any.missing = FALSE, unique = TRUE)
190
191	8x	if (all(is.na(x))) {
192		# Early return if there are only NAs in input.
193	1x	return(factor(x, ordered = ordered, levels = labels))
194		}
195
196	7x	quantiles <- stats::quantile(
197	7x	x,
198	7x	probs = probs,
199	7x	type = type,
200	7x	na.rm = TRUE
201		)
202
203	7x	checkmate::assert_numeric(quantiles, unique = TRUE)
204
205	6x	cut(
206	6x	x,
207	6x	breaks = quantiles,
208	6x	labels = labels,
209	6x	ordered_result = ordered,
210	6x	include.lowest = TRUE,
211	6x	right = TRUE
212		)
213		}
214
215		#' Discard Certain Levels from a Factor
216		#'
217		#' @description `r lifecycle::badge("stable")`
218		#'
219		#' This discards the observations as well as the levels specified from a factor.
220		#'
221		#' @param x (`factor`)\cr the original factor.
222		#' @param discard (`character`)\cr which levels to discard.
223		#'
224		#' @return A modified `factor` with observations as well as levels from `discard` dropped.
225		#'
226		#' @examples
227		#' fct_discard(factor(c("a", "b", "c")), "c")
228		#'
229		#' @export
230		fct_discard <- function(x, discard) {
231	248x	checkmate::assert_factor(x)
232	248x	checkmate::assert_character(discard, any.missing = FALSE)
233	248x	new_obs <- x[!(x %in% discard)]
234	248x	new_levels <- setdiff(levels(x), discard)
235	248x	factor(new_obs, levels = new_levels)
236		}
237
238		#' Insertion of Explicit Missings in a Factor
239		#'
240		#' @description `r lifecycle::badge("stable")`
241		#'
242		#' This inserts explicit missings in a factor based on a condition. Additionally,
243		#' existing `NA` values will be explicitly converted to given `na_level`.
244		#'
245		#' @param x (`factor`)\cr the original factor.
246		#' @param condition (`logical`)\cr where to insert missings.
247		#' @param na_level (`string`)\cr which level to use for missings.
248		#'
249		#' @return A modified `factor` with inserted and existing `NA` converted to `na_level`.
250		#'
251		#' @seealso [forcats::fct_na_value_to_level()] which is used internally.
252		#'
253		#' @examples
254		#' fct_explicit_na_if(factor(c("a", "b", NA)), c(TRUE, FALSE, FALSE))
255		#'
256		#' @export
257		fct_explicit_na_if <- function(x, condition, na_level = "<Missing>") {
258	1x	checkmate::assert_factor(x, len = length(condition))
259	1x	checkmate::assert_logical(condition)
260	1x	x[condition] <- NA
261	1x	x <- forcats::fct_na_value_to_level(x, level = na_level)
262	1x	forcats::fct_drop(x, only = na_level)
263		}
264
265		#' Collapsing of Factor Levels and Keeping Only Those New Group Levels
266		#'
267		#' @description `r lifecycle::badge("stable")`
268		#'
269		#' This collapses levels and only keeps those new group levels, in the order provided.
270		#' The returned factor has levels in the order given, with the possible missing level last (this will
271		#' only be included if there are missing values).
272		#'
273		#' @param .f (`factor` or `character`)\cr original vector.
274		#' @param ... (named `character` vectors)\cr levels in each vector provided will be collapsed into
275		#' the new level given by the respective name.
276		#' @param .na_level (`string`)\cr which level to use for other levels, which should be missing in the
277		#' new factor. Note that this level must not be contained in the new levels specified in `...`.
278		#'
279		#' @return A modified `factor` with collapsed levels. Values and levels which are not included
280		#' in the given `character` vector input will be set to the missing level `.na_level`.
281		#'
282		#' @note Any existing `NA`s in the input vector will not be replaced by the missing level. If needed,
283		#' [explicit_na()] can be called separately on the result.
284		#'
285		#' @seealso [forcats::fct_collapse()], [forcats::fct_relevel()] which are used internally.
286		#'
287		#' @examples
288		#' fct_collapse_only(factor(c("a", "b", "c", "d")), TRT = "b", CTRL = c("c", "d"))
289		#'
290		#' @export
291		fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
292	4x	new_lvls <- names(list(...))
293	4x	if (checkmate::test_subset(.na_level, new_lvls)) {
294	1x	stop(paste0(".na_level currently set to '", .na_level, "' must not be contained in the new levels"))
295		}
296	3x	x <- forcats::fct_collapse(.f, ..., other_level = .na_level)
297	3x	do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls)))
298		}

1		#' Counting Patients Summing Exposure Across All Patients in Columns
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Counting the number of patients and summing analysis value (i.e exposure values) across all patients
6		#' when a column table layout is required.
7		#'
8		#' @inheritParams argument_convention
9		#'
10		#' @name summarize_patients_exposure_in_cols
11		NULL
12
13		#' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers
14		#' of patients and the sum of exposure across all patients.
15		#'
16		#' @param ex_var (`character`)\cr name of the variable within `df` containing exposure values.
17		#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will be used as label.
18		#'
19		#' @return
20		#' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics:
21		#' * `n_patients`: Number of unique patients in `df`.
22		#' * `sum_exposure`: Sum of `ex_var` across all patients in `df`.
23		#'
24		#' @examples
25		#' set.seed(1)
26		#' df <- data.frame(
27		#' USUBJID = c(paste("id", seq(1, 12), sep = "")),
28		#' ARMCD = c(rep("ARM A", 6), rep("ARM B", 6)),
29		#' SEX = c(rep("Female", 6), rep("Male", 6)),
30		#' AVAL = as.numeric(sample(seq(1, 20), 12)),
31		#' stringsAsFactors = TRUE
32		#' )
33		#' adsl <- data.frame(
34		#' USUBJID = c(paste("id", seq(1, 12), sep = "")),
35		#' ARMCD = c(rep("ARM A", 2), rep("ARM B", 2)),
36		#' SEX = c(rep("Female", 2), rep("Male", 2)),
37		#' stringsAsFactors = TRUE
38		#' )
39		#'
40		#' # Internal function - s_count_patients_sum_exposure
41		#' \dontrun{
42		#' s_count_patients_sum_exposure(df = df, .N_col = nrow(adsl))
43		#' s_count_patients_sum_exposure(df = df, .N_col = nrow(adsl), .stats = "n_patients")
44		#' s_count_patients_sum_exposure(
45		#' df = df,
46		#' .N_col = nrow(adsl),
47		#' custom_label = "some user's custom label"
48		#' )
49		#' }
50		#'
51		#' @keywords internal
52		s_count_patients_sum_exposure <- function(df,
53		ex_var = "AVAL",
54		id = "USUBJID",
55		labelstr = "",
56		.stats = c("n_patients", "sum_exposure"),
57		.N_col, # nolint
58		custom_label = NULL) {
59	50x	assert_df_with_variables(df, list(ex_var = ex_var, id = id))
60	50x	checkmate::assert_string(id)
61	50x	checkmate::assert_string(labelstr)
62	50x	checkmate::assert_string(custom_label, null.ok = TRUE)
63	50x	checkmate::assert_numeric(df[[ex_var]])
64	50x	checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure")))
65
66	50x	row_label <- if (labelstr != "") {
67	18x	labelstr
68	50x	} else if (!is.null(custom_label)) {
69	24x	custom_label
70		} else {
71	8x	"Total patients numbers/person time"
72		}
73
74	50x	y <- list()
75
76	50x	if ("n_patients" %in% .stats) {
77	20x	y$n_patients <-
78	20x	formatters::with_label(
79	20x	s_num_patients_content(
80	20x	df = df,
81	20x	.N_col = .N_col, # nolint
82	20x	.var = id,
83	20x	labelstr = ""
84	20x	)$unique,
85	20x	row_label
86		)
87		}
88	50x	if ("sum_exposure" %in% .stats) {
89	31x	y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label)
90		}
91	50x	y
92		}
93
94		#' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in
95		#' [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in
96		#' [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`.
97		#'
98		#' @return
99		#' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()].
100		#'
101		#' @examples
102		#' tern:::a_count_patients_sum_exposure(
103		#' df = df,
104		#' var = "SEX",
105		#' .N_col = nrow(df),
106		#' .stats = "n_patients"
107		#' )
108		#'
109		#' @keywords internal
110		a_count_patients_sum_exposure <- function(df,
111		var = NULL,
112		ex_var = "AVAL",
113		id = "USUBJID",
114		labelstr = "",
115		.N_col, # nolint
116		.stats,
117		.formats = list(n_patients = "xx (xx.x%)", sum_exposure = "xx"),
118		custom_label = NULL) {
119	39x	if (!is.null(var)) {
120	10x	assert_df_with_variables(df, list(var = var))
121	10x	df[[var]] <- as.factor(df[[var]])
122		}
123
124	39x	y <- list()
125	39x	if (is.null(var)) {
126	29x	y[[.stats]] <- list(Total = s_count_patients_sum_exposure(
127	29x	df = df,
128	29x	ex_var = ex_var,
129	29x	id = id,
130	29x	labelstr = labelstr,
131	29x	.N_col = .N_col,
132	29x	.stats = .stats,
133	29x	custom_label = custom_label
134	29x	)[[.stats]])
135		} else {
136	10x	for (lvl in levels(df[[var]])) {
137	20x	y[[.stats]][[lvl]] <- s_count_patients_sum_exposure(
138	20x	df = subset(df, get(var) == lvl),
139	20x	ex_var = ex_var,
140	20x	id = id,
141	20x	labelstr = labelstr,
142	20x	.N_col = .N_col,
143	20x	.stats = .stats,
144	20x	custom_label = lvl
145	20x	)[[.stats]]
146		}
147		}
148
149	39x	in_rows(.list = y[[.stats]], .formats = .formats[[.stats]])
150		}
151
152		#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
153		#' function arguments and additional format arguments. This function is a wrapper for
154		#' [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()].
155		#'
156		#' @return
157		#' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further
158		#' layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
159		#' add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
160		#' columns, to the table layout.
161		#'
162		#' @examples
163		#' lyt <- basic_table() %>%
164		#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE)
165		#' result <- build_table(lyt, df = df, alt_counts_df = adsl)
166		#' result
167		#'
168		#' lyt2 <- basic_table() %>%
169		#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure")
170		#' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl)
171		#' result2
172		#'
173		#' @export
174		summarize_patients_exposure_in_cols <- function(lyt, # nolint
175		var,
176		...,
177		.stats = c("n_patients", "sum_exposure"),
178		.labels = c(n_patients = "Patients", sum_exposure = "Person time"),
179		.indent_mods = NULL,
180		col_split = TRUE) {
181	5x	if (col_split) {
182	3x	lyt <- split_cols_by_multivar(
183	3x	lyt = lyt,
184	3x	vars = rep(var, length(.stats)),
185	3x	varlabels = .labels[.stats],
186	3x	extra_args = list(.stats = .stats)
187		)
188		}
189	5x	summarize_row_groups(
190	5x	lyt = lyt,
191	5x	var = var,
192	5x	cfun = a_count_patients_sum_exposure,
193	5x	extra_args = list(...)
194		)
195		}
196
197		#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
198		#' function arguments and additional format arguments. This function is a wrapper for
199		#' [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()].
200		#'
201		#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required
202		#' column split has been done already earlier in the layout pipe.
203		#'
204		#' @return
205		#' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further
206		#' layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
207		#' add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
208		#' columns, to the table layout.
209		#'
210		#' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows,
211		#' `analyze_patients_exposure_in_cols()` generates data rows which will _not_ be repeated on multiple
212		#' pages when pagination is used.
213		#'
214		#' @examples
215		#' lyt3 <- basic_table() %>%
216		#' split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
217		#' summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>%
218		#' analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE)
219		#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl)
220		#' result3
221		#'
222		#' lyt4 <- basic_table() %>%
223		#' split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
224		#' summarize_patients_exposure_in_cols(
225		#' var = "AVAL", col_split = TRUE,
226		#' .stats = "n_patients", custom_label = "some custom label"
227		#' ) %>%
228		#' analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL")
229		#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl)
230		#' result4
231		#'
232		#' lyt5 <- basic_table() %>%
233		#' analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL")
234		#' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl)
235		#' result5
236		#'
237		#' @export
238		analyze_patients_exposure_in_cols <- function(lyt, # nolint
239		var = NULL,
240		ex_var = "AVAL",
241		col_split = TRUE,
242		.stats = c("n_patients", "sum_exposure"),
243		.labels = c(n_patients = "Patients", sum_exposure = "Person time"),
244		.indent_mods = 0L) {
245	3x	if (col_split) {
246	3x	lyt <- split_cols_by_multivar(
247	3x	lyt = lyt,
248	3x	vars = rep(ex_var, length(.stats)),
249	3x	varlabels = .labels[.stats],
250	3x	extra_args = list(.stats = .stats)
251		)
252		}
253	3x	lyt <- lyt %>% analyze_colvars(
254	3x	afun = a_count_patients_sum_exposure,
255	3x	indent_mod = .indent_mods,
256	3x	extra_args = list(var = var, ex_var = ex_var)
257		)
258	3x	lyt
259		}

1		#' Odds Ratio Estimation
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Compares bivariate responses between two groups in terms of odds ratios
6		#' along with a confidence interval.
7		#'
8		#' @inheritParams argument_convention
9		#'
10		#' @details This function uses either logistic regression for unstratified
11		#' analyses, or conditional logistic regression for stratified analyses.
12		#' The Wald confidence interval with the specified confidence level is
13		#' calculated.
14		#'
15		#' @note For stratified analyses, there is currently no implementation for conditional
16		#' likelihood confidence intervals, therefore the likelihood confidence interval is not
17		#' yet available as an option. Besides, when `rsp` contains only responders or non-responders,
18		#' then the result values will be `NA`, because no odds ratio estimation is possible.
19		#'
20		#' @seealso Relevant helper function [h_odds_ratio()].
21		#'
22		#' @name odds_ratio
23		NULL
24
25		#' @describeIn odds_ratio Statistics function which estimates the odds ratio
26		#' between a treatment and a control. A `variables` list with `arm` and `strata`
27		#' variable names must be passed if a stratified analysis is required.
28		#'
29		#' @inheritParams split_cols_by_groups
30		#'
31		#' @return
32		#' * `s_odds_ratio()` returns a named list with the statistics `or_ci`
33		#' (containing `est`, `lcl`, and `ucl`) and `n_tot`.
34		#'
35		#' @examples
36		#' set.seed(12)
37		#' dta <- data.frame(
38		#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
39		#' grp = factor(rep(c("A", "B"), each = 50), levels = c("B", "A")),
40		#' strata = factor(sample(c("C", "D"), 100, TRUE))
41		#' )
42		#'
43		#' # Unstratified analysis.
44		#' s_odds_ratio(
45		#' df = subset(dta, grp == "A"),
46		#' .var = "rsp",
47		#' .ref_group = subset(dta, grp == "B"),
48		#' .in_ref_col = FALSE,
49		#' .df_row = dta
50		#' )
51		#'
52		#' # Stratified analysis.
53		#' s_odds_ratio(
54		#' df = subset(dta, grp == "A"),
55		#' .var = "rsp",
56		#' .ref_group = subset(dta, grp == "B"),
57		#' .in_ref_col = FALSE,
58		#' .df_row = dta,
59		#' variables = list(arm = "grp", strata = "strata")
60		#' )
61		#'
62		#' @export
63		s_odds_ratio <- function(df,
64		.var,
65		.ref_group,
66		.in_ref_col,
67		.df_row,
68		variables = list(arm = NULL, strata = NULL),
69		conf_level = 0.95,
70		groups_list = NULL) {
71	65x	y <- list(or_ci = "", n_tot = "")
72
73	65x	if (!.in_ref_col) {
74	65x	assert_proportion_value(conf_level)
75	65x	assert_df_with_variables(df, list(rsp = .var))
76	65x	assert_df_with_variables(.ref_group, list(rsp = .var))
77
78	65x	if (is.null(variables$strata)) {
79	52x	data <- data.frame(
80	52x	rsp = c(.ref_group[[.var]], df[[.var]]),
81	52x	grp = factor(
82	52x	rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
83	52x	levels = c("ref", "Not-ref")
84		)
85		)
86	52x	y <- or_glm(data, conf_level = conf_level)
87		} else {
88	13x	assert_df_with_variables(.df_row, c(list(rsp = .var), variables))
89
90		# The group variable prepared for clogit must be synchronised with combination groups definition.
91	13x	if (is.null(groups_list)) {
92	12x	ref_grp <- as.character(unique(.ref_group[[variables$arm]]))
93	12x	trt_grp <- as.character(unique(df[[variables$arm]]))
94	12x	grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp)
95		} else {
96		# If more than one level in reference col.
97	1x	reference <- as.character(unique(.ref_group[[variables$arm]]))
98	1x	grp_ref_flag <- vapply(
99	1x	X = groups_list,
100	1x	FUN.VALUE = TRUE,
101	1x	FUN = function(x) all(reference %in% x)
102		)
103	1x	ref_grp <- names(groups_list)[grp_ref_flag]
104
105		# If more than one level in treatment col.
106	1x	treatment <- as.character(unique(df[[variables$arm]]))
107	1x	grp_trt_flag <- vapply(
108	1x	X = groups_list,
109	1x	FUN.VALUE = TRUE,
110	1x	FUN = function(x) all(treatment %in% x)
111		)
112	1x	trt_grp <- names(groups_list)[grp_trt_flag]
113
114	1x	grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp)
115	1x	grp <- combine_levels(grp, levels = treatment, new_level = trt_grp)
116		}
117
118		# The reference level in `grp` must be the same as in the `rtables` column split.
119	13x	data <- data.frame(
120	13x	rsp = .df_row[[.var]],
121	13x	grp = grp,
122	13x	strata = interaction(.df_row[variables$strata])
123		)
124	13x	y_all <- or_clogit(data, conf_level = conf_level)
125	13x	checkmate::assert_string(trt_grp)
126	13x	checkmate::assert_subset(trt_grp, names(y_all$or_ci))
127	12x	y$or_ci <- y_all$or_ci[[trt_grp]]
128	12x	y$n_tot <- y_all$n_tot
129		}
130		}
131
132	64x	y$or_ci <- formatters::with_label(
133	64x	x = y$or_ci,
134	64x	label = paste0("Odds Ratio (", 100 * conf_level, "% CI)")
135		)
136
137	64x	y$n_tot <- formatters::with_label(
138	64x	x = y$n_tot,
139	64x	label = "Total n"
140		)
141
142	64x	y
143		}
144
145		#' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`.
146		#'
147		#' @return
148		#' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()].
149		#'
150		#' @examples
151		#' a_odds_ratio(
152		#' df = subset(dta, grp == "A"),
153		#' .var = "rsp",
154		#' .ref_group = subset(dta, grp == "B"),
155		#' .in_ref_col = FALSE,
156		#' .df_row = dta
157		#' )
158		#'
159		#' @export
160		a_odds_ratio <- make_afun(
161		s_odds_ratio,
162		.formats = c(or_ci = "xx.xx (xx.xx - xx.xx)"),
163		.indent_mods = c(or_ci = 1L)
164		)
165
166		#' @describeIn odds_ratio Layout-creating function which can take statistics function arguments
167		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
168		#'
169		#' @param ... arguments passed to `s_odds_ratio()`.
170		#'
171		#' @return
172		#' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions,
173		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
174		#' the statistics from `s_odds_ratio()` to the table layout.
175		#'
176		#' @examples
177		#' dta <- data.frame(
178		#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
179		#' grp = factor(rep(c("A", "B"), each = 50))
180		#' )
181		#'
182		#' l <- basic_table() %>%
183		#' split_cols_by(var = "grp", ref_group = "B") %>%
184		#' estimate_odds_ratio(vars = "rsp")
185		#'
186		#' build_table(l, df = dta)
187		#'
188		#' @export
189		estimate_odds_ratio <- function(lyt,
190		vars,
191		...,
192		show_labels = "hidden",
193		table_names = vars,
194		.stats = "or_ci",
195		.formats = NULL,
196		.labels = NULL,
197		.indent_mods = NULL) {
198	3x	afun <- make_afun(
199	3x	a_odds_ratio,
200	3x	.stats = .stats,
201	3x	.formats = .formats,
202	3x	.labels = .labels,
203	3x	.indent_mods = .indent_mods
204		)
205
206	3x	analyze(
207	3x	lyt,
208	3x	vars,
209	3x	afun = afun,
210	3x	extra_args = list(...),
211	3x	show_labels = show_labels,
212	3x	table_names = table_names
213		)
214		}
215
216		#' Helper Functions for Odds Ratio Estimation
217		#'
218		#' @description `r lifecycle::badge("stable")`
219		#'
220		#' Functions to calculate odds ratios in [estimate_odds_ratio()].
221		#'
222		#' @inheritParams argument_convention
223		#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally
224		#' `strata` for [or_clogit()].
225		#'
226		#' @return A named `list` of elements `or_ci` and `n_tot`.
227		#'
228		#' @seealso [odds_ratio]
229		#'
230		#' @name h_odds_ratio
231		NULL
232
233		#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be
234		#' exactly 2 groups in `data` as specified by the `grp` variable.
235		#'
236		#' @examples
237		#' # Data with 2 groups.
238		#' data <- data.frame(
239		#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)),
240		#' grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)],
241		#' strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)],
242		#' stringsAsFactors = TRUE
243		#' )
244		#'
245		#' # Odds ratio based on glm.
246		#' or_glm(data, conf_level = 0.95)
247		#'
248		#' @export
249		or_glm <- function(data, conf_level) {
250	55x	checkmate::assert_logical(data$rsp)
251	55x	assert_proportion_value(conf_level)
252	55x	assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
253	55x	checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
254
255	55x	data$grp <- as_factor_keep_attributes(data$grp)
256	55x	assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)
257	55x	formula <- stats::as.formula("rsp ~ grp")
258	55x	model_fit <- stats::glm(
259	55x	formula = formula, data = data,
260	55x	family = stats::binomial(link = "logit")
261		)
262
263		# Note that here we need to discard the intercept.
264	55x	or <- exp(stats::coef(model_fit)[-1])
265	55x	or_ci <- exp(
266	55x	stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE]
267		)
268
269	55x	values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl"))
270	55x	n_tot <- stats::setNames(nrow(model_fit$model), "n_tot")
271
272	55x	list(or_ci = values, n_tot = n_tot)
273		}
274
275		#' @describeIn h_odds_ratio estimates the odds ratio based on [survival::clogit()]. This is done for
276		#' the whole data set including all groups, since the results are not the same as when doing
277		#' pairwise comparisons between the groups.
278		#'
279		#' @examples
280		#' # Data with 3 groups.
281		#' data <- data.frame(
282		#' rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)),
283		#' grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)],
284		#' strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)],
285		#' stringsAsFactors = TRUE
286		#' )
287		#'
288		#' # Odds ratio based on stratified estimation by conditional logistic regression.
289		#' or_clogit(data, conf_level = 0.95)
290		#'
291		#' @export
292		or_clogit <- function(data, conf_level) {
293	16x	checkmate::assert_logical(data$rsp)
294	16x	assert_proportion_value(conf_level)
295	16x	assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata"))
296	16x	checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
297	16x	checkmate::assert_multi_class(data$strata, classes = c("factor", "character"))
298
299	16x	data$grp <- as_factor_keep_attributes(data$grp)
300	16x	data$strata <- as_factor_keep_attributes(data$strata)
301
302		# Deviation from convention: `survival::strata` must be simply `strata`.
303	16x	formula <- stats::as.formula("rsp ~ grp + strata(strata)")
304	16x	model_fit <- clogit_with_tryCatch(formula = formula, data = data)
305
306		# Create a list with one set of OR estimates and CI per coefficient, i.e.
307		# comparison of one group vs. the reference group.
308	16x	coef_est <- stats::coef(model_fit)
309	16x	ci_est <- stats::confint(model_fit, level = conf_level)
310	16x	or_ci <- list()
311	16x	for (coef_name in names(coef_est)) {
312	18x	grp_name <- gsub("^grp", "", x = coef_name)
313	18x	or_ci[[grp_name]] <- stats::setNames(
314	18x	object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])),
315	18x	nm = c("est", "lcl", "ucl")
316		)
317		}
318	16x	list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n))
319		}

1		#' Tabulate Survival Duration by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Tabulate statistics such as median survival time and hazard ratio for population subgroups.
6		#'
7		#' @inheritParams argument_convention
8		#' @inheritParams survival_coxph_pairwise
9		#' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
10		#' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a list, which
11		#' specifies the new group levels via the names and the levels that belong to it in the character vectors that are
12		#' elements of the list.
13		#' @param label_all (`string`)\cr label for the total population analysis.
14		#' @param time_unit (`string`)\cr label with unit of median survival time. Default `NULL` skips displaying unit.
15		#'
16		#' @details These functions create a layout starting from a data frame which contains
17		#' the required statistics. Tables typically used as part of forest plot.
18		#'
19		#' @seealso [extract_survival_subgroups()]
20		#'
21		#' @examples
22		#' library(dplyr)
23		#' library(forcats)
24		#'
25		#' adtte <- tern_ex_adtte
26		#'
27		#' # Save variable labels before data processing steps.
28		#' adtte_labels <- formatters::var_labels(adtte)
29		#'
30		#' adtte_f <- adtte %>%
31		#' filter(
32		#' PARAMCD == "OS",
33		#' ARM %in% c("B: Placebo", "A: Drug X"),
34		#' SEX %in% c("M", "F")
35		#' ) %>%
36		#' mutate(
37		#' # Reorder levels of ARM to display reference arm before treatment arm.
38		#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
39		#' SEX = droplevels(SEX),
40		#' AVALU = as.character(AVALU),
41		#' is_event = CNSR == 0
42		#' )
43		#' labels <- c(
44		#' "ARM" = adtte_labels[["ARM"]],
45		#' "SEX" = adtte_labels[["SEX"]],
46		#' "AVALU" = adtte_labels[["AVALU"]],
47		#' "is_event" = "Event Flag"
48		#' )
49		#' formatters::var_labels(adtte_f)[names(labels)] <- labels
50		#'
51		#' df <- extract_survival_subgroups(
52		#' variables = list(
53		#' tte = "AVAL",
54		#' is_event = "is_event",
55		#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
56		#' ),
57		#' data = adtte_f
58		#' )
59		#' df
60		#'
61		#' @name survival_duration_subgroups
62		NULL
63
64		#' Prepares Survival Data for Population Subgroups in Data Frames
65		#'
66		#' @description `r lifecycle::badge("stable")`
67		#'
68		#' Prepares estimates of median survival times and treatment hazard ratios for population subgroups in
69		#' data frames. Simple wrapper for [h_survtime_subgroups_df()] and [h_coxph_subgroups_df()]. Result is a `list`
70		#' of two `data.frame`s: `survtime` and `hr`. `variables` corresponds to the names of variables found in `data`,
71		#' passed as a named `list` and requires elements `tte`, `is_event`, `arm` and optionally `subgroups` and `strat`.
72		#' `groups_lists` optionally specifies groupings for `subgroups` variables.
73		#'
74		#' @inheritParams argument_convention
75		#' @inheritParams survival_duration_subgroups
76		#' @inheritParams survival_coxph_pairwise
77		#'
78		#' @return A named `list` of two elements:
79		#' * `survtime`: A `data.frame` containing columns `arm`, `n`, `n_events`, `median`, `subgroup`, `var`,
80		#' `var_label`, and `row_type`.
81		#' * `hr`: A `data.frame` containing columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, `conf_level`,
82		#' `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
83		#'
84		#' @seealso [survival_duration_subgroups]
85		#'
86		#' @examples
87		#' library(dplyr)
88		#' library(forcats)
89		#'
90		#' adtte <- tern_ex_adtte
91		#' adtte_labels <- formatters::var_labels(adtte)
92		#'
93		#' adtte_f <- adtte %>%
94		#' filter(
95		#' PARAMCD == "OS",
96		#' ARM %in% c("B: Placebo", "A: Drug X"),
97		#' SEX %in% c("M", "F")
98		#' ) %>%
99		#' mutate(
100		#' # Reorder levels of ARM to display reference arm before treatment arm.
101		#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
102		#' SEX = droplevels(SEX),
103		#' AVALU = as.character(AVALU),
104		#' is_event = CNSR == 0
105		#' )
106		#' labels <- c(
107		#' "ARM" = adtte_labels[["ARM"]],
108		#' "SEX" = adtte_labels[["SEX"]],
109		#' "AVALU" = adtte_labels[["AVALU"]],
110		#' "is_event" = "Event Flag"
111		#' )
112		#' formatters::var_labels(adtte_f)[names(labels)] <- labels
113		#'
114		#' df <- extract_survival_subgroups(
115		#' variables = list(
116		#' tte = "AVAL",
117		#' is_event = "is_event",
118		#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
119		#' ),
120		#' data = adtte_f
121		#' )
122		#' df
123		#'
124		#' df_grouped <- extract_survival_subgroups(
125		#' variables = list(
126		#' tte = "AVAL",
127		#' is_event = "is_event",
128		#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
129		#' ),
130		#' data = adtte_f,
131		#' groups_lists = list(
132		#' BMRKR2 = list(
133		#' "low" = "LOW",
134		#' "low/medium" = c("LOW", "MEDIUM"),
135		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
136		#' )
137		#' )
138		#' )
139		#' df_grouped
140		#'
141		#' @export
142		extract_survival_subgroups <- function(variables,
143		data,
144		groups_lists = list(),
145		control = control_coxph(),
146		label_all = "All Patients") {
147	8x	df_survtime <- h_survtime_subgroups_df(
148	8x	variables,
149	8x	data,
150	8x	groups_lists = groups_lists,
151	8x	label_all = label_all
152		)
153	8x	df_hr <- h_coxph_subgroups_df(
154	8x	variables,
155	8x	data,
156	8x	groups_lists = groups_lists,
157	8x	control = control,
158	8x	label_all = label_all
159		)
160
161	8x	list(survtime = df_survtime, hr = df_hr)
162		}
163
164		#' @describeIn survival_duration_subgroups Formatted analysis function which is used as
165		#' `afun` in `tabulate_survival_subgroups()`.
166		#'
167		#' @return
168		#' * `a_survival_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
169		#'
170		#' @examples
171		#' # Internal function - a_survival_subgroups
172		#' \dontrun{
173		#' a_survival_subgroups(.formats = list("n" = "xx", "median" = "xx.xx"))
174		#' }
175		#'
176		#' @keywords internal
177		a_survival_subgroups <- function(.formats = list(
178		n = "xx",
179		n_events = "xx",
180		n_tot_events = "xx",
181		median = "xx.x",
182		n_tot = "xx",
183		hr = list(format_extreme_values(2L)),
184		ci = list(format_extreme_values_ci(2L)),
185		pval = "x.xxxx \| (<0.0001)"
186		)) {
187	12x	checkmate::assert_list(.formats)
188	12x	checkmate::assert_subset(
189	12x	names(.formats),
190	12x	c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
191		)
192
193	12x	afun_lst <- Map(
194	12x	function(stat, fmt) {
195	90x	if (stat == "ci") {
196	11x	function(df, labelstr = "", ...) {
197	20x	in_rows(
198	20x	.list = combine_vectors(df$lcl, df$ucl),
199	20x	.labels = as.character(df$subgroup),
200	20x	.formats = fmt
201		)
202		}
203		} else {
204	79x	function(df, labelstr = "", ...) {
205	111x	in_rows(
206	111x	.list = as.list(df[[stat]]),
207	111x	.labels = as.character(df$subgroup),
208	111x	.formats = fmt
209		)
210		}
211		}
212		},
213	12x	stat = names(.formats),
214	12x	fmt = .formats
215		)
216
217	12x	afun_lst
218		}
219
220		#' @describeIn survival_duration_subgroups Table-creating function which creates a table
221		#' summarizing survival by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
222		#' and [rtables::summarize_row_groups()].
223		#'
224		#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
225		#' created using [extract_survival_subgroups()].
226		#' @param vars (`character`)\cr the name of statistics to be reported among:
227		#' * `n_tot_events`: Total number of events per group.
228		#' * `n_events`: Number of events per group.
229		#' * `n_tot`: Total number of observations per group.
230		#' * `n`: Number of observations per group.
231		#' * `median`: Median survival time.
232		#' * `hr`: Hazard ratio.
233		#' * `ci`: Confidence interval of hazard ratio.
234		#' * `pval`: p-value of the effect.
235		#' Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci`
236		#' are required.
237		#'
238		#' @return An `rtables` table summarizing survival by subgroup.
239		#'
240		#' @examples
241		#' ## Table with default columns.
242		#' basic_table() %>%
243		#' tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
244		#'
245		#' ## Table with a manually chosen set of columns: adding "pval".
246		#' basic_table() %>%
247		#' tabulate_survival_subgroups(
248		#' df = df,
249		#' vars = c("n_tot_events", "n_events", "median", "hr", "ci", "pval"),
250		#' time_unit = adtte_f$AVALU[1]
251		#' )
252		#'
253		#' @export
254		tabulate_survival_subgroups <- function(lyt,
255		df,
256		vars = c("n_tot_events", "n_events", "median", "hr", "ci"),
257		time_unit = NULL) {
258	5x	conf_level <- df$hr$conf_level[1]
259	5x	method <- df$hr$pval_label[1]
260
261	5x	afun_lst <- a_survival_subgroups()
262	5x	colvars <- d_survival_subgroups_colvars(
263	5x	vars,
264	5x	conf_level = conf_level,
265	5x	method = method,
266	5x	time_unit = time_unit
267		)
268
269	5x	colvars_survtime <- list(
270	5x	vars = colvars$vars[names(colvars$labels) %in% c("n", "n_events", "median")],
271	5x	labels = colvars$labels[names(colvars$labels) %in% c("n", "n_events", "median")]
272		)
273	5x	colvars_hr <- list(
274	5x	vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")],
275	5x	labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")]
276		)
277
278		# Columns from table_survtime are optional.
279	5x	if (length(colvars_survtime$vars) > 0) {
280	4x	lyt_survtime <- split_cols_by(lyt = lyt, var = "arm")
281	4x	lyt_survtime <- split_rows_by(
282	4x	lyt = lyt_survtime,
283	4x	var = "row_type",
284	4x	split_fun = keep_split_levels("content"),
285	4x	nested = FALSE
286		)
287	4x	lyt_survtime <- summarize_row_groups(
288	4x	lyt = lyt_survtime,
289	4x	var = "var_label",
290	4x	cfun = afun_lst[names(colvars_survtime$labels)]
291		)
292	4x	lyt_survtime <- split_cols_by_multivar(
293	4x	lyt = lyt_survtime,
294	4x	vars = colvars_survtime$vars,
295	4x	varlabels = colvars_survtime$labels
296		)
297
298	4x	if ("analysis" %in% df$survtime$row_type) {
299	3x	lyt_survtime <- split_rows_by(
300	3x	lyt = lyt_survtime,
301	3x	var = "row_type",
302	3x	split_fun = keep_split_levels("analysis"),
303	3x	nested = FALSE,
304	3x	child_labels = "hidden"
305		)
306	3x	lyt_survtime <- split_rows_by(lyt = lyt_survtime, var = "var_label", nested = TRUE)
307	3x	lyt_survtime <- analyze_colvars(
308	3x	lyt = lyt_survtime,
309	3x	afun = afun_lst[names(colvars_survtime$labels)],
310	3x	inclNAs = TRUE
311		)
312		}
313
314	4x	table_survtime <- build_table(lyt_survtime, df = df$survtime)
315		} else {
316	1x	table_survtime <- NULL
317		}
318
319		# Columns "n_tot_events" or "n_tot", and "hr", "ci" in table_hr are required.
320	5x	lyt_hr <- split_cols_by(lyt = lyt, var = "arm")
321	5x	lyt_hr <- split_rows_by(
322	5x	lyt = lyt_hr,
323	5x	var = "row_type",
324	5x	split_fun = keep_split_levels("content"),
325	5x	nested = FALSE
326		)
327	5x	lyt_hr <- summarize_row_groups(
328	5x	lyt = lyt_hr,
329	5x	var = "var_label",
330	5x	cfun = afun_lst[names(colvars_hr$labels)]
331		)
332	5x	lyt_hr <- split_cols_by_multivar(
333	5x	lyt = lyt_hr,
334	5x	vars = colvars_hr$vars,
335	5x	varlabels = colvars_hr$labels
336		) %>%
337	5x	append_topleft("Baseline Risk Factors")
338
339	5x	if ("analysis" %in% df$survtime$row_type) {
340	4x	lyt_hr <- split_rows_by(
341	4x	lyt = lyt_hr,
342	4x	var = "row_type",
343	4x	split_fun = keep_split_levels("analysis"),
344	4x	nested = FALSE,
345	4x	child_labels = "hidden"
346		)
347	4x	lyt_hr <- split_rows_by(lyt = lyt_hr, var = "var_label", nested = TRUE)
348	4x	lyt_hr <- analyze_colvars(
349	4x	lyt = lyt_hr,
350	4x	afun = afun_lst[names(colvars_hr$labels)],
351	4x	inclNAs = TRUE
352		)
353		}
354	5x	table_hr <- build_table(lyt_hr, df = df$hr)
355
356		# There can be one or two vars starting with "n_tot".
357	5x	n_tot_ids <- grep("^n_tot", colvars_hr$vars)
358	5x	if (is.null(table_survtime)) {
359	1x	result <- table_hr
360	1x	hr_id <- match("hr", colvars_hr$vars)
361	1x	ci_id <- match("lcl", colvars_hr$vars)
362		} else {
363		# Reorder the table.
364	4x	result <- cbind_rtables(table_hr[, n_tot_ids], table_survtime, table_hr[, -n_tot_ids])
365		# And then calculate column indices accordingly.
366	4x	hr_id <- length(n_tot_ids) + ncol(table_survtime) + match("hr", colvars_hr$vars[-n_tot_ids])
367	4x	ci_id <- length(n_tot_ids) + ncol(table_survtime) + match("lcl", colvars_hr$vars[-n_tot_ids])
368	4x	n_tot_ids <- seq_along(n_tot_ids)
369		}
370
371	5x	structure(
372	5x	result,
373	5x	forest_header = paste0(rev(levels(df$survtime$arm)), "\nBetter"),
374	5x	col_x = hr_id,
375	5x	col_ci = ci_id,
376		# Take the first one for scaling the symbol sizes in graph.
377	5x	col_symbol_size = n_tot_ids[1]
378		)
379		}
380
381		#' Labels for Column Variables in Survival Duration by Subgroup Table
382		#'
383		#' @description `r lifecycle::badge("stable")`
384		#'
385		#' Internal function to check variables included in [tabulate_survival_subgroups()] and create column labels.
386		#'
387		#' @inheritParams tabulate_survival_subgroups
388		#' @inheritParams argument_convention
389		#' @param method (`character`)\cr p-value method for testing hazard ratio = 1.
390		#'
391		#' @return A `list` of variables and their labels to tabulate.
392		#'
393		#' @note At least one of `n_tot` and `n_tot_events` must be provided in `vars`.
394		#'
395		#' @export
396		d_survival_subgroups_colvars <- function(vars,
397		conf_level,
398		method,
399		time_unit = NULL) {
400	12x	checkmate::assert_character(vars)
401	12x	checkmate::assert_string(time_unit, null.ok = TRUE)
402	12x	checkmate::assert_subset(c("hr", "ci"), vars)
403	12x	checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars))
404	12x	checkmate::assert_subset(
405	12x	vars,
406	12x	c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
407		)
408
409	12x	propcase_time_label <- if (!is.null(time_unit)) {
410	11x	paste0("Median (", time_unit, ")")
411		} else {
412	1x	"Median"
413		}
414
415	12x	varlabels <- c(
416	12x	n = "n",
417	12x	n_events = "Events",
418	12x	median = propcase_time_label,
419	12x	n_tot = "Total n",
420	12x	n_tot_events = "Total Events",
421	12x	hr = "Hazard Ratio",
422	12x	ci = paste0(100 * conf_level, "% Wald CI"),
423	12x	pval = method
424		)
425
426	12x	colvars <- vars
427
428		# The `lcl` variable is just a placeholder available in the analysis data,
429		# it is not acutally used in the tabulation.
430		# Variables used in the tabulation are lcl and ucl, see `a_survival_subgroups` for details.
431	12x	colvars[colvars == "ci"] <- "lcl"
432
433	12x	list(
434	12x	vars = colvars,
435	12x	labels = varlabels[vars]
436		)
437		}

1		#' Summary for analysis of covariance (ANCOVA).
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Summarize results of ANCOVA. This can be used to analyze multiple endpoints and/or
6		#' multiple timepoints within the same response variable `.var`.
7		#'
8		#' @inheritParams argument_convention
9		#'
10		#' @name summarize_ancova
11		NULL
12
13		#' Helper Function to Return Results of a Linear Model
14		#'
15		#' @description `r lifecycle::badge("stable")`
16		#'
17		#' @inheritParams argument_convention
18		#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
19		#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with expected elements:
20		#' * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
21		#' summarized. Specifically, the first level of `arm` variable is taken as the reference group.
22		#' * `covariates` (`character`)\cr a vector that can contain single variable names (such as `"X1"`), and/or
23		#' interaction terms indicated by `"X1 * X2"`.
24		#' @param interaction_item (`character`)\cr name of the variable that should have interactions
25		#' with arm. if the interaction is not needed, the default option is `NULL`.
26		#'
27		#' @return The summary of a linear model.
28		#'
29		#' @examples
30		#' h_ancova(
31		#' .var = "Sepal.Length",
32		#' .df_row = iris,
33		#' variables = list(arm = "Species", covariates = c("Petal.Length * Petal.Width", "Sepal.Width"))
34		#' )
35		#'
36		#' @export
37		h_ancova <- function(.var,
38		.df_row,
39		variables,
40		interaction_item = NULL) {
41	15x	checkmate::assert_string(.var)
42	15x	checkmate::assert_list(variables)
43	15x	checkmate::assert_subset(names(variables), c("arm", "covariates"))
44	15x	assert_df_with_variables(.df_row, list(rsp = .var))
45
46	14x	arm <- variables$arm
47	14x	covariates <- variables$covariates
48	14x	if (!is.null(covariates) && length(covariates) > 0) {
49		# Get all covariate variable names in the model.
50	11x	var_list <- get_covariates(covariates)
51	11x	assert_df_with_variables(.df_row, var_list)
52		}
53
54	13x	covariates_part <- paste(covariates, collapse = " + ")
55	13x	if (covariates_part != "") {
56	10x	formula <- stats::as.formula(paste0(.var, " ~ ", covariates_part, " + ", arm))
57		} else {
58	3x	formula <- stats::as.formula(paste0(.var, " ~ ", arm))
59		}
60
61	13x	if (is.null(interaction_item)) {
62	9x	specs <- arm
63		} else {
64	4x	specs <- c(arm, interaction_item)
65		}
66
67	13x	lm_fit <- stats::lm(
68	13x	formula = formula,
69	13x	data = .df_row
70		)
71	13x	emmeans_fit <- emmeans::emmeans(
72	13x	lm_fit,
73		# Specify here the group variable over which EMM are desired.
74	13x	specs = specs,
75		# Pass the data again so that the factor levels of the arm variable can be inferred.
76	13x	data = .df_row
77		)
78
79	13x	emmeans_fit
80		}
81
82		#' @describeIn summarize_ancova Statistics function that produces a named list of results
83		#' of the investigated linear model.
84		#'
85		#' @inheritParams h_ancova
86		#' @param interaction_y (`character`)\cr a selected item inside of the interaction_item column which will be used
87		#' to select the specific ANCOVA results. if the interaction is not needed, the default option is `FALSE`.
88		#'
89		#' @return
90		#' * `s_ancova()` returns a named list of 5 statistics:
91		#' * `n`: Count of complete sample size for the group.
92		#' * `lsmean`: Estimated marginal means in the group.
93		#' * `lsmean_diff`: Difference in estimated marginal means in comparison to the reference group.
94		#' If working with the reference group, this will be empty.
95		#' * `lsmean_diff_ci`: Confidence level for difference in estimated marginal means in comparison
96		#' to the reference group.
97		#' * `pval`: p-value (not adjusted for multiple comparisons).
98		#'
99		#' @examples
100		#' library(dplyr)
101		#'
102		#' df <- iris %>% filter(Species == "virginica")
103		#' .df_row <- iris
104		#' .var <- "Petal.Length"
105		#' variables <- list(arm = "Species", covariates = "Sepal.Length * Sepal.Width")
106		#' .ref_group <- iris %>% filter(Species == "setosa")
107		#' conf_level <- 0.95
108		#'
109		#' # Internal function - s_ancova
110		#' \dontrun{
111		#' s_ancova(
112		#' df, .var, .df_row, variables, .ref_group,
113		#' .in_ref_col = FALSE,
114		#' conf_level, interaction_y = FALSE, interaction_item = NULL
115		#' )
116		#' }
117		#'
118		#' @keywords internal
119		s_ancova <- function(df,
120		.var,
121		.df_row,
122		variables,
123		.ref_group,
124		.in_ref_col,
125		conf_level,
126		interaction_y = FALSE,
127		interaction_item = NULL) {
128	3x	emmeans_fit <- h_ancova(.var = .var, variables = variables, .df_row = .df_row, interaction_item = interaction_item)
129
130	3x	sum_fit <- summary(
131	3x	emmeans_fit,
132	3x	level = conf_level
133		)
134
135	3x	arm <- variables$arm
136
137	3x	sum_level <- as.character(unique(df[[arm]]))
138
139		# Ensure that there is only one element in sum_level.
140	3x	checkmate::assert_scalar(sum_level)
141
142	2x	sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ]
143
144		# Get the index of the ref arm
145	2x	if (interaction_y != FALSE) {
146	1x	y <- unlist(df[(df[[interaction_item]] == interaction_y), .var])
147		# convert characters selected in interaction_y into the numeric order
148	1x	interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y)
149	1x	sum_fit_level <- sum_fit_level[interaction_y, ]
150		# if interaction is called, reset the index
151	1x	ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
152	1x	ref_key <- tail(ref_key, n = 1)
153	1x	ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key
154		} else {
155	1x	y <- df[[.var]]
156		# Get the index of the ref arm when interaction is not called
157	1x	ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
158	1x	ref_key <- tail(ref_key, n = 1)
159		}
160
161	2x	if (.in_ref_col) {
162	1x	list(
163	1x	n = length(y[!is.na(y)]),
164	1x	lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
165	1x	lsmean_diff = formatters::with_label(character(), "Difference in Adjusted Means"),
166	1x	lsmean_diff_ci = formatters::with_label(character(), f_conf_level(conf_level)),
167	1x	pval = formatters::with_label(character(), "p-value")
168		)
169		} else {
170		# Estimate the differences between the marginal means.
171	1x	emmeans_contrasts <- emmeans::contrast(
172	1x	emmeans_fit,
173		# Compare all arms versus the control arm.
174	1x	method = "trt.vs.ctrl",
175		# Take the arm factor from .ref_group as the control arm.
176	1x	ref = ref_key,
177	1x	level = conf_level
178		)
179	1x	sum_contrasts <- summary(
180	1x	emmeans_contrasts,
181		# Derive confidence intervals, t-tests and p-values.
182	1x	infer = TRUE,
183		# Do not adjust the p-values for multiplicity.
184	1x	adjust = "none"
185		)
186
187	1x	sum_contrasts_level <- sum_contrasts[grepl(sum_level, sum_contrasts$contrast), ]
188	1x	if (interaction_y != FALSE) {
189	!	sum_contrasts_level <- sum_contrasts_level[interaction_y, ]
190		}
191
192	1x	list(
193	1x	n = length(y[!is.na(y)]),
194	1x	lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
195	1x	lsmean_diff = formatters::with_label(sum_contrasts_level$estimate, "Difference in Adjusted Means"),
196	1x	lsmean_diff_ci = formatters::with_label(
197	1x	c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL),
198	1x	f_conf_level(conf_level)
199		),
200	1x	pval = formatters::with_label(sum_contrasts_level$p.value, "p-value")
201		)
202		}
203		}
204
205		#' @describeIn summarize_ancova Formatted analysis function which is used as `afun` in `summarize_ancova()`.
206		#'
207		#' @return
208		#' * `a_ancova()` returns the corresponding list with formatted [rtables::CellValue()].
209		#'
210		#' @examples
211		#' # Internal function - a_ancova
212		#' \dontrun{
213		#' a_ancova(
214		#' df, .var, .df_row, variables, .ref_group,
215		#' .in_ref_col = FALSE,
216		#' interaction_y = FALSE, interaction_item = NULL, conf_level
217		#' )
218		#' }
219		#'
220		#' @keywords internal
221		a_ancova <- make_afun(
222		s_ancova,
223		.indent_mods = c("n" = 0L, "lsmean" = 0L, "lsmean_diff" = 0L, "lsmean_diff_ci" = 1L, "pval" = 1L),
224		.formats = c(
225		"n" = "xx",
226		"lsmean" = "xx.xx",
227		"lsmean_diff" = "xx.xx",
228		"lsmean_diff_ci" = "(xx.xx, xx.xx)",
229		"pval" = "x.xxxx \| (<0.0001)"
230		),
231		.null_ref_cells = FALSE
232		)
233
234		#' @describeIn summarize_ancova Layout-creating function which can take statistics function arguments
235		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
236		#'
237		#' @return
238		#' * `summarize_ancova()` returns a layout object suitable for passing to further layouting functions,
239		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
240		#' the statistics from `s_ancova()` to the table layout.
241		#'
242		#' @examples
243		#' basic_table() %>%
244		#' split_cols_by("Species", ref_group = "setosa") %>%
245		#' add_colcounts() %>%
246		#' summarize_ancova(
247		#' vars = "Petal.Length",
248		#' variables = list(arm = "Species", covariates = NULL),
249		#' table_names = "unadj",
250		#' conf_level = 0.95, var_labels = "Unadjusted comparison",
251		#' .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means")
252		#' ) %>%
253		#' summarize_ancova(
254		#' vars = "Petal.Length",
255		#' variables = list(arm = "Species", covariates = c("Sepal.Length", "Sepal.Width")),
256		#' table_names = "adj",
257		#' conf_level = 0.95, var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)"
258		#' ) %>%
259		#' build_table(iris)
260		#'
261		#' @export
262		summarize_ancova <- function(lyt,
263		vars,
264		var_labels,
265		...,
266		show_labels = "visible",
267		table_names = vars,
268		.stats = NULL,
269		.formats = NULL,
270		.labels = NULL,
271		.indent_mods = NULL,
272		interaction_y = FALSE,
273		interaction_item = NULL) {
274	3x	afun <- make_afun(
275	3x	a_ancova,
276	3x	interaction_y = interaction_y,
277	3x	interaction_item = interaction_item,
278	3x	.stats = .stats,
279	3x	.formats = .formats,
280	3x	.labels = .labels,
281	3x	.indent_mods = .indent_mods
282		)
283
284	3x	analyze(
285	3x	lyt,
286	3x	vars,
287	3x	var_labels = var_labels,
288	3x	show_labels = show_labels,
289	3x	table_names = table_names,
290	3x	afun = afun,
291	3x	extra_args = list(...)
292		)
293		}