tern coverage - 95.30%

Files
Source

#' Multivariate Logistic Regression Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
#' category or specified values and corresponding Wald confidence intervals as default but allow user
#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
#' that covariate has no effect on response in model containing all specified covariates.
#' Allow option to include one two-way interaction and present similar output for
#' each interaction degree of freedom.
#'
#' @inheritParams argument_convention
#' @param drop_and_remove_str (`character`)\cr string to be dropped and removed.
#'
#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#'   Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
#'
#' @note For the formula, the variable names need to be standard `data.frame` column names without
#'   special characters.
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' df <- tidy(mod1, conf_level = 0.99)
#' df2 <- tidy(mod2, conf_level = 0.99)
#'
#' # flagging empty strings with "_"
#' df <- df_explicit_na(df, na_level = "_")
#' df2 <- df_explicit_na(df2, na_level = "_")
#'
#' result1 <- basic_table() %>%
#'   summarize_logistic(
#'     conf_level = 0.95,
#'     drop_and_remove_str = "_"
#'   ) %>%
#'   build_table(df = df)
#' result1
#'
#' result2 <- basic_table() %>%
#'   summarize_logistic(
#'     conf_level = 0.95,
#'     drop_and_remove_str = "_"
#'   ) %>%
#'   build_table(df = df2)
#' result2
#'
#' @export
summarize_logistic <- function(lyt,
                               conf_level,
                               drop_and_remove_str = "",
                               .indent_mods = NULL) {
  # checks
  checkmate::assert_string(drop_and_remove_str)

  sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
  sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods)
  sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods)
  split_fun <- drop_and_remove_levels(drop_and_remove_str)

  lyt <- logistic_regression_cols(lyt, conf_level = conf_level)
  lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun)
  lyt <- sum_logistic_variable_test(lyt)
  lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun)
  lyt <- sum_logistic_term_estimates(lyt)
  lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun)
  lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun)
  lyt <- sum_logistic_odds_ratios(lyt)
  lyt
}

#' Fit for Logistic Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fit a (conditional) logistic regression model.
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
#'   This will be used when fitting the (conditional) logistic regression model on the left hand
#'   side of the formula.
#'
#' @return A fitted logistic regression model.
#'
#' @section Model Specification:
#'
#' The `variables` list needs to include the following elements:
#'   * `arm`: Treatment arm variable name.
#'   * `response`: The response arm variable name. Usually this is a 0/1 variable.
#'   * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
#'   * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
#'     included in `covariates`. Then the interaction with the treatment arm is included in the model.
#'
#' @examples
#' library(dplyr)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' @export
fit_logistic <- function(data,
                         variables = list(
                           response = "Response",
                           arm = "ARMCD",
                           covariates = NULL,
                           interaction = NULL,
                           strata = NULL
                         ),
                         response_definition = "response") {
  assert_df_with_variables(data, variables)
  checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
  checkmate::assert_string(response_definition)
  checkmate::assert_true(grepl("response", response_definition))

  response_definition <- sub(
    pattern = "response",
    replacement = variables$response,
    x = response_definition,
    fixed = TRUE
  )
  form <- paste0(response_definition, " ~ ", variables$arm)
  if (!is.null(variables$covariates)) {
    form <- paste0(form, " + ", paste(variables$covariates, collapse = " + "))
  }
  if (!is.null(variables$interaction)) {
    checkmate::assert_string(variables$interaction)
    checkmate::assert_subset(variables$interaction, variables$covariates)
    form <- paste0(form, " + ", variables$arm, ":", variables$interaction)
  }
  if (!is.null(variables$strata)) {
    strata_arg <- if (length(variables$strata) > 1) {
      paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
    } else {
      variables$strata
    }
    form <- paste0(form, "+ strata(", strata_arg, ")")
  }
  formula <- stats::as.formula(form)
  if (is.null(variables$strata)) {
    stats::glm(
      formula = formula,
      data = data,
      family = stats::binomial("logit")
    )
  } else {
    clogit_with_tryCatch(
      formula = formula,
      data = data,
      x = TRUE
    )
  }
}

#' Custom Tidy Method for Binomial GLM Results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
#' with `binomial` family.
#'
#' @inheritParams argument_convention
#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise the median is used.
#' @param x logistic regression model fitted by [stats::glm()] with "binomial" family.
#'
#' @return A `data.frame` containing the tidied model.
#'
#' @method tidy glm
#'
#' @seealso [h_logistic_regression] for relevant helper functions.
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' df <- tidy(mod1, conf_level = 0.99)
#' df2 <- tidy(mod2, conf_level = 0.99)
#'
#' @export
tidy.glm <- function(x, # nolint
                     conf_level = 0.95,
                     at = NULL,
                     ...) {
  checkmate::assert_class(x, "glm")
  checkmate::assert_set_equal(x$family$family, "binomial")

  terms_name <- attr(stats::terms(x), "term.labels")
  xs_class <- attr(x$terms, "dataClasses")
  interaction <- terms_name[which(!terms_name %in% names(xs_class))]
  df <- if (length(interaction) == 0) {
    h_logistic_simple_terms(
      x = terms_name,
      fit_glm = x,
      conf_level = conf_level
    )
  } else {
    h_logistic_inter_terms(
      x = terms_name,
      fit_glm = x,
      conf_level = conf_level,
      at = at
    )
  }
  for (var in c("variable", "term", "interaction", "reference")) {
    df[[var]] <- factor(df[[var]], levels = unique(df[[var]]))
  }
  df
}

#' Logistic Regression Multivariate Column Layout Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which creates a multivariate column layout summarizing logistic
#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
#'
#' @inheritParams argument_convention
#'
#' @return A layout object suitable for passing to further layouting functions. Adding this
#'   function to an `rtable` layout will split the table into columns corresponding to
#'   statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
#'
#' @export
logistic_regression_cols <- function(lyt,
                                     conf_level = 0.95) {
  vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
  var_labels <- c(
    df = "Degrees of Freedom",
    estimate = "Parameter Estimate",
    std_error = "Standard Error",
    odds_ratio = "Odds Ratio",
    ci = paste("Wald", f_conf_level(conf_level)),
    pvalue = "p-value"
  )
  split_cols_by_multivar(
    lyt = lyt,
    vars = vars,
    varlabels = var_labels
  )
}

#' Logistic Regression Summary Table Constructor Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
#'   content function.
#'
#' @return A content function.
#'
#' @export
logistic_summary_by_flag <- function(flag_var, .indent_mods = NULL) {
  checkmate::assert_string(flag_var)
  function(lyt) {
    cfun_list <- list(
      df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods),
      estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
      std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
      odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods),
      ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods),
      pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx | (<0.0001)", .indent_mods = .indent_mods)
    )
    summarize_row_groups(
      lyt = lyt,
      cfun = cfun_list
    )
  }
}

#' Helper Function to create a new `SMQ` variable in `ADAE` by stacking `SMQ` and/or `CQ` records.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper Function to create a new `SMQ` variable in `ADAE` that consists of all adverse events belonging to
#' selected Standardized/Customized queries. The new dataset will only contain records of the adverse events
#' belonging to any of the selected baskets.
#'
#' @inheritParams argument_convention
#' @param baskets (`character`)\cr variable names of the selected Standardized/Customized queries.
#' @param smq_varlabel (`string`)\cr a label for the new variable created.
#' @param keys (`character`)\cr names of the key variables to be returned along with the new variable created.
#' @param aag_summary (`data.frame`)\cr containing the `SMQ` baskets and the levels of interest for the final `SMQ`
#'   variable. This is useful when there are some levels of interest that are not observed in the `df` dataset.
#'   The two columns of this dataset should be named `basket` and `basket_name`.
#'
#' @return `data.frame` with variables in `keys` taken from `df` and new variable `SMQ` containing
#'   records belonging to the baskets selected via the `baskets` argument.
#'
#' @examples
#' adae <- tern_ex_adae[1:20, ] %>% df_explicit_na()
#' h_stack_by_baskets(df = adae)
#'
#' aag <- data.frame(
#'   NAMVAR = c("CQ01NAM", "CQ02NAM", "SMQ01NAM", "SMQ02NAM"),
#'   REFNAME = c(
#'     "D.2.1.5.3/A.1.1.1.1 AESI", "X.9.9.9.9/Y.8.8.8.8 AESI",
#'     "C.1.1.1.3/B.2.2.3.1 AESI", "C.1.1.1.3/B.3.3.3.3 AESI"
#'   ),
#'   SCOPE = c("", "", "BROAD", "BROAD"),
#'   stringsAsFactors = FALSE
#' )
#'
#' basket_name <- character(nrow(aag))
#' cq_pos <- grep("^(CQ).+NAM$", aag$NAMVAR)
#' smq_pos <- grep("^(SMQ).+NAM$", aag$NAMVAR)
#' basket_name[cq_pos] <- aag$REFNAME[cq_pos]
#' basket_name[smq_pos] <- paste0(
#'   aag$REFNAME[smq_pos], "(", aag$SCOPE[smq_pos], ")"
#' )
#'
#' aag_summary <- data.frame(
#'   basket = aag$NAMVAR,
#'   basket_name = basket_name,
#'   stringsAsFactors = TRUE
#' )
#'
#' result <- h_stack_by_baskets(df = adae, aag_summary = aag_summary)
#' all(levels(aag_summary$basket_name) %in% levels(result$SMQ))
#'
#' h_stack_by_baskets(
#'   df = adae,
#'   aag_summary = NULL,
#'   keys = c("STUDYID", "USUBJID", "AEDECOD", "ARM"),
#'   baskets = "SMQ01NAM"
#' )
#'
#' @export
h_stack_by_baskets <- function(df,
                               baskets = grep("^(SMQ|CQ).+NAM$", names(df), value = TRUE),
                               smq_varlabel = "Standardized MedDRA Query",
                               keys = c("STUDYID", "USUBJID", "ASTDTM", "AEDECOD", "AESEQ"),
                               aag_summary = NULL,
                               na_level = "<Missing>") {
  # Use of df_explicit_na() in case the user has not previously used
  df <- df_explicit_na(df, na_level = na_level)

  smq_nam <- baskets[startsWith(baskets, "SMQ")]
  # SC corresponding to NAM
  smq_sc <- gsub(pattern = "NAM", replacement = "SC", x = smq_nam, fixed = TRUE)
  smq <- stats::setNames(smq_sc, smq_nam)

  checkmate::assert_character(baskets)
  checkmate::assert_string(smq_varlabel)
  checkmate::assert_data_frame(df)
  checkmate::assert_true(all(startsWith(baskets, "SMQ") | startsWith(baskets, "CQ")))
  checkmate::assert_true(all(endsWith(baskets, "NAM")))
  checkmate::assert_subset(baskets, names(df))
  checkmate::assert_subset(keys, names(df))
  checkmate::assert_subset(smq_sc, names(df))
  checkmate::assert_string(na_level)

  if (!is.null(aag_summary)) {
    assert_df_with_variables(
      df = aag_summary,
      variables = list(val = c("basket", "basket_name"))
    )
    # Warning in case there is no match between `aag_summary$basket` and `baskets` argument.
    # Honestly, I think those should completely match. Target baskets should be the same.
    if (length(intersect(baskets, unique(aag_summary$basket))) == 0) {
      warning("There are 0 baskets in common between aag_summary$basket and `baskets` argument.")
    }
  }

  var_labels <- c(formatters::var_labels(df[, keys]), "SMQ" = smq_varlabel)

  # convert `na_level` records from baskets to NA for the later loop and from wide to long steps
  df[, c(baskets, smq_sc)][df[, c(baskets, smq_sc)] == na_level] <- NA

  if (all(is.na(df[, baskets]))) { # in case there is no level for the target baskets
    df_long <- df[-seq_len(nrow(df)), keys] # we just need an empty dataframe keeping all factor levels
  } else {
    # Concatenate SMQxxxNAM with corresponding SMQxxxSC
    df_cnct <- df[, c(keys, baskets[startsWith(baskets, "CQ")])]

    for (nam in names(smq)) {
      sc <- smq[nam] # SMQxxxSC corresponding to SMQxxxNAM
      nam_notna <- !is.na(df[[nam]])
      new_colname <- paste(nam, sc, sep = "_")
      df_cnct[nam_notna, new_colname] <- paste0(df[[nam]], "(", df[[sc]], ")")[nam_notna]
    }

    df_cnct$unique_id <- seq(1, nrow(df_cnct))
    var_cols <- names(df_cnct)[!(names(df_cnct) %in% c(keys, "unique_id"))]
    # have to convert df_cnct from tibble to dataframe
    # as it throws a warning otherwise about rownames.
    # tibble do not support rownames and reshape creates rownames

    df_long <- stats::reshape(
      data = as.data.frame(df_cnct),
      varying = var_cols,
      v.names = "SMQ",
      idvar = names(df_cnct)[names(df_cnct) %in% c(keys, "unique_id")],
      direction = "long",
      new.row.names = seq(prod(length(var_cols), nrow(df_cnct)))
    )

    df_long <- df_long[!is.na(df_long[, "SMQ"]), !(names(df_long) %in% c("time", "unique_id"))]
    df_long$SMQ <- as.factor(df_long$SMQ)
  }

  smq_levels <- setdiff(levels(df_long[["SMQ"]]), na_level)

  if (!is.null(aag_summary)) {
    # A warning in case there is no match between df and aag_summary records
    if (length(intersect(smq_levels, unique(aag_summary$basket_name))) == 0) {
      warning("There are 0 basket levels in common between aag_summary$basket_name and df.")
    }
    df_long[["SMQ"]] <- factor(
      df_long[["SMQ"]],
      levels = sort(
        c(
          smq_levels,
          setdiff(unique(aag_summary$basket_name), smq_levels)
        )
      )
    )
  } else {
    all_na_basket_flag <- vapply(df[, baskets], function(x) {
      all(is.na(x))
    }, FUN.VALUE = logical(1))
    all_na_basket <- baskets[all_na_basket_flag]

    df_long[["SMQ"]] <- factor(
      df_long[["SMQ"]],
      levels = sort(c(smq_levels, all_na_basket))
    )
  }
  formatters::var_labels(df_long) <- var_labels
  tibble::tibble(df_long)
}

#' Univariate Formula Special Term
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The special term `univariate` indicate that the model should be fitted individually for
#' every variable included in univariate.
#'
#' @param x A vector of variable name separated by commas.
#'
#' @return When used within a model formula, produces univariate models for each variable provided.
#'
#' @details
#' If provided alongside with pairwise specification, the model
#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models
#' + `y ~ ARM`
#' + `y ~ ARM + SEX`
#' + `y ~ ARM + AGE`
#' + `y ~ ARM + RACE`
#'
#' @export
univariate <- function(x) {
  structure(x, varname = deparse(substitute(x)))
}

# Get the right-hand-term of a formula
rht <- function(x) {
  checkmate::assert_formula(x)
  y <- as.character(rev(x)[[1]])
  return(y)
}

#' Hazard Ratio Estimation in Interactions
#'
#' This function estimates the hazard ratios between arms when an interaction variable is given with
#' specific values.
#'
#' @param variable,given Names of two variable in interaction. We seek the estimation of the levels of `variable`
#'   given the levels of `given`.
#' @param lvl_var,lvl_given corresponding levels has given by `levels`.
#' @param mmat A name numeric filled with 0 used as template to obtain the design matrix.
#' @param coef Numeric of estimated coefficients.
#' @param vcov Variance-covariance matrix of underlying model.
#' @param conf_level Single numeric for the confidence level of estimate intervals.
#'
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
#'   and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex.
#'   The cox regression estimates the coefficients along with a variance-covariance matrix for:
#'
#'   - b1 (arm b), b2 (arm c)
#'   - b3 (sex m)
#'   - b4 (arm b: sex m), b5 (arm c: sex m)
#'
#'   Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation
#'   will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5),
#'   therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained
#'   as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95.
#'
#' @return A list of matrix (one per level of variable) with rows corresponding to the combinations of
#'   `variable` and `given`, with columns:
#'   * `coef_hat`: Estimation of the coefficient.
#'   * `coef_se`: Standard error of the estimation.
#'   * `hr`: Hazard ratio.
#'   * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
#'
#' @seealso [s_cox_multivariate()].
#'
#' @examples
#' library(dplyr)
#' library(survival)
#'
#' ADSL <- tern_ex_adsl %>%
#'   filter(SEX %in% c("F", "M"))
#'
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS")
#' adtte$ARMCD <- droplevels(adtte$ARMCD)
#' adtte$SEX <- droplevels(adtte$SEX)
#'
#' mod <- coxph(
#'   formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2,
#'   data = adtte
#' )
#'
#' mmat <- stats::model.matrix(mod)[1, ]
#' mmat[!mmat == 0] <- 0
#'
#' @keywords internal
estimate_coef <- function(variable, given,
                          lvl_var, lvl_given,
                          coef,
                          mmat,
                          vcov,
                          conf_level = 0.95) {
  var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
  giv_lvl <- paste0(given, lvl_given)

  design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
  design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
  design_mat <- within(
    data = design_mat,
    expr = {
      inter <- paste0(variable, ":", given)
      rev_inter <- paste0(given, ":", variable)
    }
  )

  split_by_variable <- design_mat$variable
  interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")

  design_mat <- apply(
    X = design_mat, MARGIN = 1, FUN = function(x) {
      mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
      return(mmat)
    }
  )
  colnames(design_mat) <- interaction_names

  betas <- as.matrix(coef)

  coef_hat <- t(design_mat) %*% betas
  dimnames(coef_hat)[2] <- "coef"

  coef_se <- apply(design_mat, 2, function(x) {
    vcov_el <- as.logical(x)
    y <- vcov[vcov_el, vcov_el]
    y <- sum(y)
    y <- sqrt(y)
    return(y)
  })

  q_norm <- stats::qnorm((1 + conf_level) / 2)
  y <- cbind(coef_hat, `se(coef)` = coef_se)

  y <- apply(y, 1, function(x) {
    x["hr"] <- exp(x["coef"])
    x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
    x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])

    return(x)
  })

  y <- t(y)
  y <- by(y, split_by_variable, identity)
  y <- lapply(y, as.matrix)

  attr(y, "details") <- paste0(
    "Estimations of ", variable,
    " hazard ratio given the level of ", given, " compared to ",
    variable, " level ", lvl_var[1], "."
  )
  return(y)
}

#' `tryCatch` around `car::Anova`
#'
#' Captures warnings when executing [car::Anova].
#'
#' @inheritParams car::Anova
#'
#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings.
#'
#' @examples
#' # `car::Anova` on cox regression model including strata and expected
#' # a likelihood ratio test triggers a warning as only `Wald` method is
#' # accepted.
#'
#' library(survival)
#'
#' mod <- coxph(
#'   formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps),
#'   data = ovarian
#' )
#'
#' @keywords internal
try_car_anova <- function(mod,
                          test.statistic) { # nolint
  y <- tryCatch(
    withCallingHandlers(
      expr = {
        warn_text <- c()
        list(
          aov = car::Anova(
            mod,
            test.statistic = test.statistic,
            type = "III"
          ),
          warn_text = warn_text
        )
      },
      warning = function(w) {
        # If a warning is detected it is handled as "w".
        warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w))

        # A warning is sometimes expected, then, we want to restart
        # the execution while ignoring the warning.
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )

  return(y)
}

#' Fit the Cox Regression Model and `Anova`
#'
#' The functions allows to derive from the [survival::coxph()] results the effect p.values using [car::Anova()].
#' This last package introduces more flexibility to get the effect p.values.
#'
#' @inheritParams t_coxreg
#'
#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and
#'   `aov` (result of [car::Anova()]).
#'
#' @noRd
fit_n_aov <- function(formula,
                      data = data,
                      conf_level = conf_level,
                      pval_method = c("wald", "likelihood"),
                      ...) {
  pval_method <- match.arg(pval_method)

  environment(formula) <- environment()
  suppressWarnings({
    # We expect some warnings due to coxph which fails strict programming.
    mod <- survival::coxph(formula, data = data, ...)
    msum <- summary(mod, conf.int = conf_level)
  })

  aov <- try_car_anova(
    mod,
    test.statistic = switch(pval_method,
      "wald" = "Wald",
      "likelihood" = "LR"
    )
  )

  warn_attr <- aov$warn_text
  if (!is.null(aov$warn_text)) message(warn_attr)

  aov <- aov$aov
  y <- list(mod = mod, msum = msum, aov = aov)
  attr(y, "message") <- warn_attr

  return(y)
}

# argument_checks
check_formula <- function(formula) {
  if (!(inherits(formula, "formula"))) {
    stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.")
  }

  invisible()
}

check_covariate_formulas <- function(covariates) {
  if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) || is.null(covariates)) {
    stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).")
  }

  invisible()
}

name_covariate_names <- function(covariates) {
  miss_names <- names(covariates) == ""
  no_names <- is.null(names(covariates))
  if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name")
  if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
  return(covariates)
}

check_increments <- function(increments, covariates) {
  if (!is.null(increments)) {
    covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
    lapply(
      X = names(increments), FUN = function(x) {
        if (!x %in% covariates) {
          warning(
            paste(
              "Check `increments`, the `increment` for ", x,
              "doesn't match any names in investigated covariate(s)."
            )
          )
        }
      }
    )
  }

  invisible()
}

#' Multivariate Cox Model - Summarized Results
#'
#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or
#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually
#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the
#' covariates included in the model.
#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the
#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis,
#' the p.values need to be interpreted with caution. (**Statistical Analysis of Clinical Trials Data with R**,
#' `NEST's bookdown`)
#'
#' @param formula (`formula`)\cr A formula corresponding to the investigated [survival::Surv()] survival model
#'   including covariates.
#' @param data (`data.frame`)\cr A data frame which includes the variable in formula and covariates.
#' @param conf_level (`proportion`)\cr The confidence level for the hazard ratio interval estimations. Default is 0.95.
#' @param pval_method (`character`)\cr The method used for the estimation of p-values, should be one of
#'   `"wald"` (default) or `"likelihood"`.
#' @param ... Optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the
#'   method for tie handling, one of `exact` (default), `efron`, `breslow`.
#'
#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`.
#'
#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms
#'   but is out of scope as defined by the  Global Data Standards Repository
#'   (**`GDS_Standard_TLG_Specs_Tables_2.doc`**).
#'
#' @seealso [estimate_coef()].
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered
#' adtte_f <- filter(
#'   adtte_f,
#'   PARAMCD == "OS" &
#'     SEX %in% c("F", "M") &
#'     RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
#' )
#' adtte_f$SEX <- droplevels(adtte_f$SEX)
#' adtte_f$RACE <- droplevels(adtte_f$RACE)
#'
#' @keywords internal
s_cox_multivariate <- function(formula, data,
                               conf_level = 0.95,
                               pval_method = c("wald", "likelihood"),
                               ...) {
  tf <- stats::terms(formula, specials = c("strata"))
  covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))]
  lapply(
    X = covariates,
    FUN = function(x) {
      if (is.character(data[[x]])) {
        data[[x]] <<- as.factor(data[[x]])
      }
      invisible()
    }
  )
  pval_method <- match.arg(pval_method)

  # Results directly exported from environment(fit_n_aov) to environment(s_function_draft)
  y <- fit_n_aov(
    formula = formula,
    data = data,
    conf_level = conf_level,
    pval_method = pval_method,
    ...
  )
  mod <- y$mod
  aov <- y$aov
  msum <- y$msum
  list2env(as.list(y), environment())

  all_term_labs <- attr(mod$terms, "term.labels")
  term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)]
  names(term_labs) <- term_labs

  coef_inter <- NULL
  if (any(attr(mod$terms, "order") > 1)) {
    for_inter <- all_term_labs[attr(mod$terms, "order") > 1]
    names(for_inter) <- for_inter
    mmat <- stats::model.matrix(mod)[1, ]
    mmat[!mmat == 0] <- 0
    mcoef <- stats::coef(mod)
    mvcov <- stats::vcov(mod)

    estimate_coef_local <- function(variable, given) {
      estimate_coef(
        variable, given,
        coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level,
        lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]])
      )
    }

    coef_inter <- lapply(
      for_inter, function(x) {
        y <- attr(mod$terms, "factor")[, x]
        y <- names(y[y > 0])
        Map(estimate_coef_local, variable = y, given = rev(y))
      }
    )
  }

  list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter)
}

#' Cumulative Counts with Thresholds
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize cumulative counts of a (`numeric`) vector that is less than, less or equal to,
#' greater than, or greater or equal to user-specific thresholds.
#'
#' @inheritParams h_count_cumulative
#' @inheritParams argument_convention
#'
#' @seealso Relevant helper function [h_count_cumulative()], and descriptive function [d_count_cumulative()].
#'
#' @name count_cumulative
NULL

#' Helper Function for [s_count_cumulative()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to calculate count and fraction of `x` values in the lower or upper tail given a threshold.
#'
#' @inheritParams argument_convention
#' @param threshold (`number`)\cr a cutoff value as threshold to count values of `x`.
#' @param lower_tail (`logical`)\cr whether to count lower tail, default is `TRUE`.
#' @param include_eq (`logical`)\cr whether to include value equal to the `threshold` in
#'   count, default is `TRUE`.
#' @param .N_col (`count`)\cr denominator for fraction calculation.
#'
#' @return A named vector with items:
#'   * `count`: the count of values less than, less or equal to, greater than, or greater or equal to a threshold
#'     of user specification.
#'   * `fraction`: the fraction of the count.
#'
#' @seealso [count_cumulative]
#'
#' @examples
#' set.seed(1, kind = "Mersenne-Twister")
#' x <- c(sample(1:10, 10), NA)
#' .N_col <- length(x)
#' h_count_cumulative(x, 5, .N_col = .N_col)
#' h_count_cumulative(x, 5, lower_tail = FALSE, include_eq = FALSE, na.rm = FALSE, .N_col = .N_col)
#' h_count_cumulative(x, 0, lower_tail = FALSE, .N_col = .N_col)
#' h_count_cumulative(x, 100, lower_tail = FALSE, .N_col = .N_col)
#'
#' @export
h_count_cumulative <- function(x,
                               threshold,
                               lower_tail = TRUE,
                               include_eq = TRUE,
                               na.rm = TRUE, # nolint
                               .N_col) { # nolint
  checkmate::assert_numeric(x)
  checkmate::assert_numeric(threshold)
  checkmate::assert_numeric(.N_col)
  checkmate::assert_flag(lower_tail)
  checkmate::assert_flag(include_eq)
  checkmate::assert_flag(na.rm)

  is_keep <- if (na.rm) !is.na(x) else rep(TRUE, length(x))
  count <- if (lower_tail && include_eq) {
    length(x[is_keep & x <= threshold])
  } else if (lower_tail && !include_eq) {
    length(x[is_keep & x < threshold])
  } else if (!lower_tail && include_eq) {
    length(x[is_keep & x >= threshold])
  } else if (!lower_tail && !include_eq) {
    length(x[is_keep & x > threshold])
  }

  result <- c(count = count, fraction = count / .N_col)
  result
}

#' Description of Cumulative Count
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function that describes the analysis in [s_count_cumulative()].
#'
#' @inheritParams h_count_cumulative
#'
#' @return Labels for [s_count_cumulative()].
#'
#' @export
d_count_cumulative <- function(threshold, lower_tail, include_eq) {
  checkmate::assert_numeric(threshold)
  lg <- if (lower_tail) "<" else ">"
  eq <- if (include_eq) "=" else ""
  paste0(lg, eq, " ", threshold)
}

#' @describeIn count_cumulative Statistics function that produces a named list given a numeric vector of thresholds.
#'
#' @param thresholds (`numeric`)\cr vector of cutoff value for the counts.
#'
#' @return
#' * `s_count_cumulative()` returns a named list of `count_fraction`s: a list with each `thresholds` value as a
#'   component, each component containing a vector for the count and fraction.
#'
#' @keywords internal
s_count_cumulative <- function(x,
                               thresholds,
                               lower_tail = TRUE,
                               include_eq = TRUE,
                               .N_col, # nolint
                               ...) {
  checkmate::assert_numeric(thresholds, min.len = 1, any.missing = FALSE)

  count_fraction_list <- Map(function(thres) {
    result <- h_count_cumulative(x, thres, lower_tail, include_eq, .N_col = .N_col, ...)
    label <- d_count_cumulative(thres, lower_tail, include_eq)
    formatters::with_label(result, label)
  }, thresholds)

  names(count_fraction_list) <- thresholds
  list(count_fraction = count_fraction_list)
}

#' @describeIn count_cumulative Formatted analysis function which is used as `afun`
#'   in `count_cumulative()`.
#'
#' @return
#' * `a_count_cumulative()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_cumulative <- make_afun(
  s_count_cumulative,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn count_cumulative Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_cumulative()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_cumulative()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_cumulative(
#'     vars = "AGE",
#'     thresholds = c(40, 60)
#'   ) %>%
#'   build_table(tern_ex_adsl)
#'
#' @export
count_cumulative <- function(lyt,
                             vars,
                             var_labels = vars,
                             show_labels = "visible",
                             ...,
                             table_names = vars,
                             .stats = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  afun <- make_afun(
    a_count_cumulative,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    table_names = table_names,
    var_labels = var_labels,
    show_labels = show_labels,
    extra_args = list(...)
  )
}

#' Confidence Intervals for a Difference of Binomials
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Several confidence intervals for the difference between proportions.
#'
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
#'   (e.g. reference and treatment group).
#'
#' @name desctools_binom
NULL

#' Recycle List of Parameters
#'
#' This function recycles all supplied elements to the maximal dimension.
#'
#' @param ... (`any`)\cr Elements to recycle.
#'
#' @return A `list`.
#'
#' @keywords internal
#' @noRd
h_recycle <- function(...) {
  lst <- list(...)
  maxdim <- max(lengths(lst))
  res <- lapply(lst, rep, length.out = maxdim)
  attr(res, "maxdim") <- maxdim
  return(res)
}

#' @describeIn desctools_binom Several confidence intervals for the difference between proportions.
#'
#' @return A `matrix` of 3 values:
#'   * `est`: estimate of proportion difference.
#'   * `lwr.ci`: estimate of lower end of the confidence interval.
#'   * `upr.ci`: estimate of upper end of the confidence interval.
#'
#' @keywords internal
desctools_binom <- function(x1, n1, x2, n2, conf.level = 0.95, sides = c( # nolint
                              "two.sided",
                              "left", "right"
                            ), method = c(
                              "ac", "wald", "waldcc", "score",
                              "scorecc", "mn", "mee", "blj", "ha", "hal", "jp"
                            )) {
  if (missing(sides)) {
    sides <- match.arg(sides)
  }
  if (missing(method)) {
    method <- match.arg(method)
  }
  iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, # nolint
                           method) {
    if (sides != "two.sided") {
      conf.level <- 1 - 2 * (1 - conf.level) # nolint
    }
    alpha <- 1 - conf.level
    kappa <- stats::qnorm(1 - alpha / 2)
    p1_hat <- x1 / n1
    p2_hat <- x2 / n2
    est <- p1_hat - p2_hat
    switch(method,
      wald = {
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      waldcc = {
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        term2 <- term2 + 0.5 * (1 / n1 + 1 / n2)
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      ac = {
        n1 <- n1 + 2
        n2 <- n2 + 2
        x1 <- x1 + 1
        x2 <- x2 + 1
        p1_hat <- x1 / n1
        p2_hat <- x2 / n2
        est1 <- p1_hat - p2_hat
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        ci_lwr <- max(-1, est1 - term2)
        ci_upr <- min(1, est1 + term2)
      },
      exact = {
        ci_lwr <- NA
        ci_upr <- NA
      },
      score = {
        w1 <- desctools_binomci(
          x = x1, n = n1, conf.level = conf.level,
          method = "wilson"
        )
        w2 <- desctools_binomci(
          x = x2, n = n2, conf.level = conf.level,
          method = "wilson"
        )
        l1 <- w1[2]
        u1 <- w1[3]
        l2 <- w2[2]
        u2 <- w2[3]
        ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 +
          u2 * (1 - u2) / n2)
        ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 +
          l2 * (1 - l2) / n2)
      },
      scorecc = {
        w1 <- desctools_binomci(
          x = x1, n = n1, conf.level = conf.level,
          method = "wilsoncc"
        )
        w2 <- desctools_binomci(
          x = x2, n = n2, conf.level = conf.level,
          method = "wilsoncc"
        )
        l1 <- w1[2]
        u1 <- w1[3]
        l2 <- w2[2]
        u2 <- w2[3]
        ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 +
          (u2 - p2_hat)^2))
        ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat -
          l2)^2))
      },
      mee = {
        .score <- function(p1, n1, p2, n2, dif) {
          if (dif > 1) dif <- 1
          if (dif < -1) dif <- -1
          diff <- p1 - p2 - dif
          if (abs(diff) == 0) {
            res <- 0
          } else {
            t <- n2 / n1
            a <- 1 + t
            b <- -(1 + t + p1 + t * p2 + dif * (t + 2))
            c <- dif * dif + dif * (2 * p1 + t + 1) + p1 +
              t * p2
            d <- -p1 * dif * (1 + dif)
            v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2
            if (abs(v) < .Machine$double.eps) v <- 0
            s <- sqrt((b / a / 3)^2 - c / a / 3)
            u <- ifelse(v > 0, 1, -1) * s
            w <- (3.141592654 + acos(v / u^3)) / 3
            p1d <- 2 * u * cos(w) - b / a / 3
            p2d <- p1d - dif
            n <- n1 + n2
            res <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2)
          }
          return(sqrt(res))
        }
        pval <- function(delta) {
          z <- (est - delta) / .score(
            p1_hat, n1, p2_hat,
            n2, delta
          )
          2 * min(stats::pnorm(z), 1 - stats::pnorm(z))
        }
        ci_lwr <- max(-1, stats::uniroot(function(delta) {
          pval(delta) -
            alpha
        }, interval = c(-1 + 1e-06, est - 1e-06))$root)
        ci_upr <- min(1, stats::uniroot(function(delta) {
          pval(delta) -
            alpha
        }, interval = c(est + 1e-06, 1 - 1e-06))$root)
      },
      blj = {
        p1_dash <- (x1 + 0.5) / (n1 + 1)
        p2_dash <- (x2 + 0.5) / (n2 + 1)
        vd <- p1_dash * (1 - p1_dash) / n1 + p2_dash * (1 -
          p2_dash) / n2
        term2 <- kappa * sqrt(vd)
        est_dash <- p1_dash - p2_dash
        ci_lwr <- max(-1, est_dash - term2)
        ci_upr <- min(1, est_dash + term2)
      },
      ha = {
        term2 <- 1 / (2 * min(n1, n2)) + kappa * sqrt(p1_hat *
          (1 - p1_hat) / (n1 - 1) + p2_hat * (1 - p2_hat) / (n2 -
            1))
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      mn = {
        .conf <- function(x1, n1, x2, n2, z, lower = FALSE) {
          p1 <- x1 / n1
          p2 <- x2 / n2
          p_hat <- p1 - p2
          dp <- 1 + ifelse(lower, 1, -1) * p_hat
          i <- 1
          while (i <= 50) {
            dp <- 0.5 * dp
            y <- p_hat + ifelse(lower, -1, 1) * dp
            score <- .score(p1, n1, p2, n2, y)
            if (score < z) {
              p_hat <- y
            }
            if ((dp < 1e-07) || (abs(z - score) < 1e-06)) {
              (break)()
            } else {
              i <- i +
                1
            }
          }
          return(y)
        }
        .score <- function(p1, n1, p2, n2, dif) {
          diff <- p1 - p2 - dif
          if (abs(diff) == 0) {
            res <- 0
          } else {
            t <- n2 / n1
            a <- 1 + t
            b <- -(1 + t + p1 + t * p2 + dif * (t + 2))
            c <- dif * dif + dif * (2 * p1 + t + 1) + p1 +
              t * p2
            d <- -p1 * dif * (1 + dif)
            v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2
            s <- sqrt((b / a / 3)^2 - c / a / 3)
            u <- ifelse(v > 0, 1, -1) * s
            w <- (3.141592654 + acos(v / u^3)) / 3
            p1d <- 2 * u * cos(w) - b / a / 3
            p2d <- p1d - dif
            n <- n1 + n2
            var <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) *
              n / (n - 1)
            res <- diff^2 / var
          }
          return(res)
        }
        z <- stats::qchisq(conf.level, 1)
        ci_lwr <- max(-1, .conf(x1, n1, x2, n2, z, TRUE))
        ci_upr <- min(1, .conf(x1, n1, x2, n2, z, FALSE))
      },
      beal = {
        a <- p1_hat + p2_hat
        b <- p1_hat - p2_hat
        u <- ((1 / n1) + (1 / n2)) / 4
        v <- ((1 / n1) - (1 / n2)) / 4
        V <- u * ((2 - a) * a - b^2) + 2 * v * (1 - a) * b # nolint
        z <- stats::qchisq(p = 1 - alpha / 2, df = 1)
        A <- sqrt(z * (V + z * u^2 * (2 - a) * a + z * v^2 * (1 - a)^2)) # nolint
        B <- (b + z * v * (1 - a)) / (1 + z * u) # nolint
        ci_lwr <- max(-1, B - A / (1 + z * u))
        ci_upr <- min(1, B + A / (1 + z * u))
      },
      hal = {
        psi <- (p1_hat + p2_hat) / 2
        u <- (1 / n1 + 1 / n2) / 4
        v <- (1 / n1 - 1 / n2) / 4
        z <- kappa
        theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 *
          psi)) / (1 + z^2 * u)
        w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) -
          (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) *
          (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) *
          psi + z^2 * v^2 * (1 - 2 * psi)^2)
        c(theta + w, theta - w)
        ci_lwr <- max(-1, theta - w)
        ci_upr <- min(1, theta + w)
      },
      jp = {
        psi <- 0.5 * ((x1 + 0.5) / (n1 + 1) + (x2 + 0.5) / (n2 +
          1))
        u <- (1 / n1 + 1 / n2) / 4
        v <- (1 / n1 - 1 / n2) / 4
        z <- kappa
        theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 *
          psi)) / (1 + z^2 * u)
        w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) -
          (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) *
          (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) *
          psi + z^2 * v^2 * (1 - 2 * psi)^2)
        c(theta + w, theta - w)
        ci_lwr <- max(-1, theta - w)
        ci_upr <- min(1, theta + w)
      },
    )
    ci <- c(
      est = est, lwr.ci = min(ci_lwr, ci_upr),
      upr.ci = max(ci_lwr, ci_upr)
    )
    if (sides == "left") {
      ci[3] <- 1
    } else if (sides == "right") {
      ci[2] <- -1
    }
    return(ci)
  }
  method <- match.arg(arg = method, several.ok = TRUE)
  sides <- match.arg(arg = sides, several.ok = TRUE)
  lst <- h_recycle(
    x1 = x1, n1 = n1, x2 = x2, n2 = n2, conf.level = conf.level,
    sides = sides, method = method
  )
  res <- t(sapply(1:attr(lst, "maxdim"), function(i) {
    iBinomDiffCI(
      x1 = lst$x1[i],
      n1 = lst$n1[i], x2 = lst$x2[i], n2 = lst$n2[i], conf.level = lst$conf.level[i],
      sides = lst$sides[i], method = lst$method[i]
    )
  }))
  lgn <- h_recycle(x1 = if (is.null(names(x1))) {
    paste("x1", seq_along(x1), sep = ".")
  } else {
    names(x1)
  }, n1 = if (is.null(names(n1))) {
    paste("n1", seq_along(n1), sep = ".")
  } else {
    names(n1)
  }, x2 = if (is.null(names(x2))) {
    paste("x2", seq_along(x2), sep = ".")
  } else {
    names(x2)
  }, n2 = if (is.null(names(n2))) {
    paste("n2", seq_along(n2), sep = ".")
  } else {
    names(n2)
  }, conf.level = conf.level, sides = sides, method = method)
  xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
    length(unique(x)) !=
      1
  })]), 1, paste, collapse = ":")
  rownames(res) <- xn
  return(res)
}

#' @describeIn desctools_binom Compute confidence intervals for binomial proportions.
#'
#' @param x (`count`)\cr number of successes
#' @param n (`count`)\cr number of trials
#' @param conf.level (`proportion`)\cr confidence level, defaults to 0.95.
#' @param sides (`character`)\cr side of the confidence interval to compute. Must be one of `"two-sided"` (default),
#'   `"left"`, or `"right"`.
#' @param method (`character`)\cr method to use. Can be one out of: `"wald"`, `"wilson"`, `"wilsoncc"`,
#' `"agresti-coull"`, `"jeffreys"`, `"modified wilson"`, `"modified jeffreys"`, `"clopper-pearson"`, `"arcsine"`,
#' `"logit"`, `"witting"`, `"pratt"`, `"midp"`, `"lik"`, and `"blaker"`.
#'
#' @return A `matrix` with 3 columns containing:
#'   * `est`: estimate of proportion difference.
#'   * `lwr.ci`: lower end of the confidence interval.
#'   * `upr.ci`: upper end of the confidence interval.
#'
#' @keywords internal
desctools_binomci <- function(x,
                              n,
                              conf.level = 0.95, # nolint
                              sides = c("two.sided", "left", "right"),
                              method = c(
                                "wilson", "wald", "waldcc", "agresti-coull",
                                "jeffreys", "modified wilson", "wilsoncc", "modified jeffreys",
                                "clopper-pearson", "arcsine", "logit", "witting", "pratt",
                                "midp", "lik", "blaker"
                              ),
                              rand = 123,
                              tol = 1e-05) {
  if (missing(method)) {
    method <- "wilson"
  }
  if (missing(sides)) {
    sides <- "two.sided"
  }
  iBinomCI <- function(x, n, conf.level = 0.95, sides = c( # nolint
                         "two.sided",
                         "left", "right"
                       ), method = c(
                         "wilson", "wilsoncc", "wald",
                         "waldcc", "agresti-coull", "jeffreys", "modified wilson",
                         "modified jeffreys", "clopper-pearson", "arcsine", "logit",
                         "witting", "pratt", "midp", "lik", "blaker"
                       ), rand = 123,
                       tol = 1e-05) {
    if (length(x) != 1) {
      stop("'x' has to be of length 1 (number of successes)")
    }
    if (length(n) != 1) {
      stop("'n' has to be of length 1 (number of trials)")
    }
    if (length(conf.level) != 1) {
      stop("'conf.level' has to be of length 1 (confidence level)")
    }
    if (conf.level < 0.5 || conf.level > 1) {
      stop("'conf.level' has to be in [0.5, 1]")
    }
    sides <- match.arg(sides, choices = c(
      "two.sided", "left",
      "right"
    ), several.ok = FALSE)
    if (sides != "two.sided") {
      conf.level <- 1 - 2 * (1 - conf.level) # nolint
    }
    alpha <- 1 - conf.level
    kappa <- stats::qnorm(1 - alpha / 2)
    p_hat <- x / n
    q_hat <- 1 - p_hat
    est <- p_hat
    switch(match.arg(arg = method, choices = c(
      "wilson",
      "wald", "waldcc", "wilsoncc", "agresti-coull", "jeffreys",
      "modified wilson", "modified jeffreys", "clopper-pearson",
      "arcsine", "logit", "witting", "pratt", "midp", "lik",
      "blaker"
    )),
    wald = {
      term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n)
      ci_lwr <- max(0, p_hat - term2)
      ci_upr <- min(1, p_hat + term2)
    },
    waldcc = {
      term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n)
      term2 <- term2 + 1 / (2 * n)
      ci_lwr <- max(0, p_hat - term2)
      ci_upr <- min(1, p_hat + term2)
    },
    wilson = {
      term1 <- (x + kappa^2 / 2) / (n + kappa^2)
      term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat *
        q_hat + kappa^2 / (4 * n))
      ci_lwr <- max(0, term1 - term2)
      ci_upr <- min(1, term1 + term2)
    },
    wilsoncc = {
      lci <- (2 * x + kappa^2 - 1 - kappa * sqrt(kappa^2 -
        2 - 1 / n + 4 * p_hat * (n * q_hat + 1))) / (2 *
        (n + kappa^2))
      uci <- (2 * x + kappa^2 + 1 + kappa * sqrt(kappa^2 +
        2 - 1 / n + 4 * p_hat * (n * q_hat - 1))) / (2 *
        (n + kappa^2))
      ci_lwr <- max(0, ifelse(p_hat == 0, 0, lci))
      ci_upr <- min(1, ifelse(p_hat == 1, 1, uci))
    },
    `agresti-coull` = {
      x_tilde <- x + kappa^2 / 2
      n_tilde <- n + kappa^2
      p_tilde <- x_tilde / n_tilde
      q_tilde <- 1 - p_tilde
      est <- p_tilde
      term2 <- kappa * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
      ci_lwr <- max(0, p_tilde - term2)
      ci_upr <- min(1, p_tilde + term2)
    },
    jeffreys = {
      if (x == 0) {
        ci_lwr <- 0
      } else {
        ci_lwr <- stats::qbeta(
          alpha / 2,
          x + 0.5, n - x + 0.5
        )
      }
      if (x == n) {
        ci_upr <- 1
      } else {
        ci_upr <- stats::qbeta(1 -
          alpha / 2, x + 0.5, n - x + 0.5)
      }
    },
    `modified wilson` = {
      term1 <- (x + kappa^2 / 2) / (n + kappa^2)
      term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat *
        q_hat + kappa^2 / (4 * n))
      if ((n <= 50 & x %in% c(1, 2)) | (n >= 51 & x %in%
        c(1:3))) {
        ci_lwr <- 0.5 * stats::qchisq(alpha, 2 *
          x) / n
      } else {
        ci_lwr <- max(0, term1 - term2)
      }
      if ((n <= 50 & x %in% c(n - 1, n - 2)) | (n >= 51 &
        x %in% c(n - (1:3)))) {
        ci_upr <- 1 - 0.5 * stats::qchisq(
          alpha,
          2 * (n - x)
        ) / n
      } else {
        ci_upr <- min(1, term1 +
          term2)
      }
    },
    `modified jeffreys` = {
      if (x == n) {
        ci_lwr <- (alpha / 2)^(1 / n)
      } else {
        if (x <= 1) {
          ci_lwr <- 0
        } else {
          ci_lwr <- stats::qbeta(
            alpha / 2,
            x + 0.5, n - x + 0.5
          )
        }
      }
      if (x == 0) {
        ci_upr <- 1 - (alpha / 2)^(1 / n)
      } else {
        if (x >= n - 1) {
          ci_upr <- 1
        } else {
          ci_upr <- stats::qbeta(1 -
            alpha / 2, x + 0.5, n - x + 0.5)
        }
      }
    },
    `clopper-pearson` = {
      ci_lwr <- stats::qbeta(alpha / 2, x, n - x + 1)
      ci_upr <- stats::qbeta(1 - alpha / 2, x + 1, n - x)
    },
    arcsine = {
      p_tilde <- (x + 0.375) / (n + 0.75)
      est <- p_tilde
      ci_lwr <- sin(asin(sqrt(p_tilde)) - 0.5 * kappa / sqrt(n))^2
      ci_upr <- sin(asin(sqrt(p_tilde)) + 0.5 * kappa / sqrt(n))^2
    },
    logit = {
      lambda_hat <- log(x / (n - x))
      V_hat <- n / (x * (n - x)) # nolint
      lambda_lower <- lambda_hat - kappa * sqrt(V_hat)
      lambda_upper <- lambda_hat + kappa * sqrt(V_hat)
      ci_lwr <- exp(lambda_lower) / (1 + exp(lambda_lower))
      ci_upr <- exp(lambda_upper) / (1 + exp(lambda_upper))
    },
    witting = {
      set.seed(rand)
      x_tilde <- x + stats::runif(1, min = 0, max = 1)
      pbinom_abscont <- function(q, size, prob) {
        v <- trunc(q)
        term1 <- stats::pbinom(v - 1, size = size, prob = prob)
        term2 <- (q - v) * stats::dbinom(v, size = size, prob = prob)
        return(term1 + term2)
      }
      qbinom_abscont <- function(p, size, x) {
        fun <- function(prob, size, x, p) {
          pbinom_abscont(x, size, prob) - p
        }
        stats::uniroot(fun,
          interval = c(0, 1), size = size,
          x = x, p = p
        )$root
      }
      ci_lwr <- qbinom_abscont(1 - alpha, size = n, x = x_tilde)
      ci_upr <- qbinom_abscont(alpha, size = n, x = x_tilde)
    },
    pratt = {
      if (x == 0) {
        ci_lwr <- 0
        ci_upr <- 1 - alpha^(1 / n)
      } else if (x == 1) {
        ci_lwr <- 1 - (1 - alpha / 2)^(1 / n)
        ci_upr <- 1 - (alpha / 2)^(1 / n)
      } else if (x == (n - 1)) {
        ci_lwr <- (alpha / 2)^(1 / n)
        ci_upr <- (1 - alpha / 2)^(1 / n)
      } else if (x == n) {
        ci_lwr <- alpha^(1 / n)
        ci_upr <- 1
      } else {
        z <- stats::qnorm(1 - alpha / 2)
        A <- ((x + 1) / (n - x))^2 # nolint
        B <- 81 * (x + 1) * (n - x) - 9 * n - 8 # nolint
        C <- (0 - 3) * z * sqrt(9 * (x + 1) * (n - x) * (9 * n + 5 - z^2) + n + 1) # nolint
        D <- 81 * (x + 1)^2 - 9 * (x + 1) * (2 + z^2) + 1 # nolint
        E <- 1 + A * ((B + C) / D)^3 # nolint
        ci_upr <- 1 / E
        A <- (x / (n - x - 1))^2 # nolint
        B <- 81 * x * (n - x - 1) - 9 * n - 8 # nolint
        C <- 3 * z * sqrt(9 * x * (n - x - 1) * (9 * n + 5 - z^2) + n + 1) # nolint
        D <- 81 * x^2 - 9 * x * (2 + z^2) + 1 # nolint
        E <- 1 + A * ((B + C) / D)^3 # nolint
        ci_lwr <- 1 / E
      }
    },
    midp = {
      f_low <- function(pi, x, n) {
        1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x,
          size = n, prob = pi, lower.tail = FALSE
        ) -
          (1 - conf.level) / 2
      }
      f_up <- function(pi, x, n) {
        1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x -
          1, size = n, prob = pi) - (1 - conf.level) / 2
      }
      ci_lwr <- 0
      ci_upr <- 1
      if (x != 0) {
        ci_lwr <- stats::uniroot(f_low,
          interval = c(0, p_hat),
          x = x, n = n
        )$root
      }
      if (x != n) {
        ci_upr <- stats::uniroot(f_up, interval = c(
          p_hat,
          1
        ), x = x, n = n)$root
      }
    },
    lik = {
      ci_lwr <- 0
      ci_upr <- 1
      z <- stats::qnorm(1 - alpha * 0.5)
      tol <- .Machine$double.eps^0.5
      BinDev <- function(y, x, mu, wt, bound = 0, tol = .Machine$double.eps^0.5, # nolint
                         ...) {
        ll_y <- ifelse(y %in% c(0, 1), 0, stats::dbinom(x, wt,
          y,
          log = TRUE
        ))
        ll_mu <- ifelse(mu %in% c(0, 1), 0, stats::dbinom(x,
          wt, mu,
          log = TRUE
        ))
        res <- ifelse(abs(y - mu) < tol, 0, sign(y -
          mu) * sqrt(-2 * (ll_y - ll_mu)))
        return(res - bound)
      }
      if (x != 0 && tol < p_hat) {
        ci_lwr <- if (BinDev(
          tol, x, p_hat, n, -z,
          tol
        ) <= 0) {
          stats::uniroot(
            f = BinDev, interval = c(tol, if (p_hat <
              tol || p_hat == 1) {
              1 - tol
            } else {
              p_hat
            }), bound = -z,
            x = x, mu = p_hat, wt = n
          )$root
        }
      }
      if (x != n && p_hat < (1 - tol)) {
        ci_upr <- if (BinDev(y = 1 - tol, x = x, mu = ifelse(p_hat >
          1 - tol, tol, p_hat), wt = n, bound = z, tol = tol) <
          0) {
          ci_lwr <- if (BinDev(
            tol, x, if (p_hat <
              tol || p_hat == 1) {
              1 - tol
            } else {
              p_hat
            }, n,
            -z, tol
          ) <= 0) {
            stats::uniroot(
              f = BinDev, interval = c(tol, p_hat),
              bound = -z, x = x, mu = p_hat, wt = n
            )$root
          }
        } else {
          stats::uniroot(
            f = BinDev, interval = c(if (p_hat >
              1 - tol) {
              tol
            } else {
              p_hat
            }, 1 - tol), bound = z,
            x = x, mu = p_hat, wt = n
          )$root
        }
      }
    },
    blaker = {
      acceptbin <- function(x, n, p) {
        p1 <- 1 - stats::pbinom(x - 1, n, p)
        p2 <- stats::pbinom(x, n, p)
        a1 <- p1 + stats::pbinom(stats::qbinom(p1, n, p) - 1, n, p)
        a2 <- p2 + 1 - stats::pbinom(
          stats::qbinom(1 - p2, n, p), n,
          p
        )
        return(min(a1, a2))
      }
      ci_lwr <- 0
      ci_upr <- 1
      if (x != 0) {
        ci_lwr <- stats::qbeta((1 - conf.level) / 2, x, n -
          x + 1)
        while (acceptbin(x, n, ci_lwr + tol) < (1 -
          conf.level)) {
          ci_lwr <- ci_lwr + tol
        }
      }
      if (x != n) {
        ci_upr <- stats::qbeta(1 - (1 - conf.level) / 2, x +
          1, n - x)
        while (acceptbin(x, n, ci_upr - tol) < (1 -
          conf.level)) {
          ci_upr <- ci_upr - tol
        }
      }
    }
    )
    ci <- c(est = est, lwr.ci = max(0, ci_lwr), upr.ci = min(
      1,
      ci_upr
    ))
    if (sides == "left") {
      ci[3] <- 1
    } else if (sides == "right") {
      ci[2] <- 0
    }
    return(ci)
  }
  lst <- list(
    x = x, n = n, conf.level = conf.level, sides = sides,
    method = method, rand = rand
  )
  maxdim <- max(unlist(lapply(lst, length)))
  lgp <- lapply(lst, rep, length.out = maxdim)
  lgn <- h_recycle(x = if (is.null(names(x))) {
    paste("x", seq_along(x), sep = ".")
  } else {
    names(x)
  }, n = if (is.null(names(n))) {
    paste("n", seq_along(n), sep = ".")
  } else {
    names(n)
  }, conf.level = conf.level, sides = sides, method = method)
  xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
    length(unique(x)) !=
      1
  })]), 1, paste, collapse = ":")
  res <- t(sapply(1:maxdim, function(i) {
    iBinomCI(
      x = lgp$x[i],
      n = lgp$n[i], conf.level = lgp$conf.level[i], sides = lgp$sides[i],
      method = lgp$method[i], rand = lgp$rand[i]
    )
  }))
  colnames(res)[1] <- c("est")
  rownames(res) <- xn
  return(res)
}

#' Re-implemented [range()] Default S3 method for numerical objects
#'
#' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data
#' without any warnings.
#'
#' @param x (`numeric`)\cr a sequence of numbers for which the range is computed.
#' @param na.rm (`logical`)\cr indicating if `NA` should be omitted.
#' @param finite (`logical`)\cr indicating if non-finite elements should be removed.
#'
#' @return A 2-element vector of class `numeric`.
#'
#' @keywords internal
range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint

  checkmate::assert_numeric(x)

  if (finite) {
    x <- x[is.finite(x)] # removes NAs too
  } else if (na.rm) {
    x <- x[!is.na(x)]
  }

  if (length(x) == 0) {
    rval <- c(NA, NA)
    mode(rval) <- typeof(x)
  } else {
    rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE))
  }

  return(rval)
}

#' Utility function to create label for confidence interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#'
#' @return A `string`.
#'
#' @export
f_conf_level <- function(conf_level) {
  assert_proportion_value(conf_level)
  paste0(conf_level * 100, "% CI")
}

#' Utility function to create label for p-value
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param test_mean (`number`)\cr mean value to test under the null hypothesis.
#'
#' @return A `string`.
#'
#' @export
f_pval <- function(test_mean) {
  checkmate::assert_numeric(test_mean, len = 1)
  paste0("p-value (H0: mean = ", test_mean, ")")
}

#' Utility function to return a named list of covariate names.
#'
#' @param covariates (`character`)\cr a vector that can contain single variable names (such as
#'   `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'
#' @return A named `list` of `character` vector.
#'
#' @keywords internal
get_covariates <- function(covariates) {
  checkmate::assert_character(covariates)
  cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*"))))
  stats::setNames(as.list(cov_vars), cov_vars)
}

#' Replicate Entries of a Vector if Required
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Replicate entries of a vector if required.
#'
#' @inheritParams argument_convention
#' @param n (`count`)\cr how many entries we need.
#'
#' @return `x` if it has the required length already or is `NULL`,
#'   otherwise if it is scalar the replicated version of it with `n` entries.
#'
#' @note This function will fail if `x` is not of length `n` and/or is not a scalar.
#'
#' @export
to_n <- function(x, n) {
  if (is.null(x)) {
    NULL
  } else if (length(x) == 1) {
    rep(x, n)
  } else if (length(x) == n) {
    x
  } else {
    stop("dimension mismatch")
  }
}

#' Check Element Dimension
#'
#' Checks if the elements in `...` have the same dimension.
#'
#' @param ... (`data.frame`s or `vector`s)\cr any data frames/vectors.
#' @param omit_null (`logical`)\cr whether `NULL` elements in `...` should be omitted from the check.
#'
#' @return A `logical` value.
#'
#' @keywords internal
check_same_n <- function(..., omit_null = TRUE) {
  dots <- list(...)

  n_list <- Map(
    function(x, name) {
      if (is.null(x)) {
        if (omit_null) {
          NA_integer_
        } else {
          stop("arg", name, "is not supposed to be NULL")
        }
      } else if (is.data.frame(x)) {
        nrow(x)
      } else if (is.atomic(x)) {
        length(x)
      } else {
        stop("data structure for ", name, "is currently not supported")
      }
    },
    dots, names(dots)
  )

  n <- stats::na.omit(unlist(n_list))

  if (length(unique(n)) > 1) {
    sel <- which(n != n[1])
    stop("dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1])
  }

  TRUE
}

#' Make Names Without Dots
#'
#' @param nams (`character`)\cr vector of original names.
#'
#' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()].
#'
#' @keywords internal
make_names <- function(nams) {
  orig <- make.names(nams)
  gsub(".", "", x = orig, fixed = TRUE)
}

#' Conversion of Months to Days
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Conversion of Months to Days. This is an approximative calculation because it
#' considers each month as having an average of 30.4375 days.
#'
#' @param x (`numeric`)\cr time in months.
#'
#' @return A `numeric` vector with the time in days.
#'
#' @examples
#' x <- c(13.25, 8.15, 1, 2.834)
#' month2day(x)
#'
#' @export
month2day <- function(x) {
  checkmate::assert_numeric(x)
  x * 30.4375
}

#' Conversion of Days to Months
#'
#' @param x (`numeric`)\cr time in days.
#'
#' @return A `numeric` vector with the time in months.
#'
#' @examples
#' x <- c(403, 248, 30, 86)
#' day2month(x)
#'
#' @export
day2month <- function(x) {
  checkmate::assert_numeric(x)
  x / 30.4375
}

#' Return an empty numeric if all elements are `NA`.
#'
#' @param x (`numeric`)\cr vector.
#'
#' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`.
#'
#' @examples
#' x <- c(NA, NA, NA)
#' # Internal function - empty_vector_if_na
#' @keywords internal
empty_vector_if_na <- function(x) {
  if (all(is.na(x))) {
    numeric()
  } else {
    x
  }
}

#' Combine Two Vectors Element Wise
#'
#' @param x (`vector`)\cr first vector to combine.
#' @param y (`vector`)\cr second vector to combine.
#'
#' @return A `list` where each element combines corresponding elements of `x` and `y`.
#'
#' @examples
#' combine_vectors(1:3, 4:6)
#'
#' @export
combine_vectors <- function(x, y) {
  checkmate::assert_vector(x)
  checkmate::assert_vector(y, len = length(x))

  result <- lapply(as.data.frame(rbind(x, y)), `c`)
  names(result) <- NULL
  result
}

#' Extract Elements by Name
#'
#' This utility function extracts elements from a vector `x` by `names`.
#' Differences to the standard `[` function are:
#'
#' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
#' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
#'   elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
#'
#' @param x (named `vector`)\cr where to extract named elements from.
#' @param names (`character`)\cr vector of names to extract.
#'
#' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
#'
#' @keywords internal
extract_by_name <- function(x, names) {
  if (is.null(x)) {
    return(NULL)
  }
  checkmate::assert_named(x)
  checkmate::assert_character(names)
  which_extract <- intersect(names(x), names)
  if (length(which_extract) > 0) {
    x[which_extract]
  } else {
    NULL
  }
}

#' Labels for Adverse Event Baskets
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param aesi (`character`)\cr with standardized `MedDRA` query name (e.g. `SMQzzNAM`) or customized query
#'   name (e.g. `CQzzNAM`).
#' @param scope (`character`)\cr with scope of query (e.g. `SMQzzSC`).
#'
#' @return A `string` with the standard label for the `AE` basket.
#'
#' @examples
#' adae <- tern_ex_adae
#'
#' # Standardized query label includes scope.
#' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC)
#'
#' # Customized query label.
#' aesi_label(adae$CQ01NAM)
#'
#' @export
aesi_label <- function(aesi, scope = NULL) {
  checkmate::assert_character(aesi)
  checkmate::assert_character(scope, null.ok = TRUE)
  aesi_label <- obj_label(aesi)
  aesi <- sas_na(aesi)
  aesi <- unique(aesi)[!is.na(unique(aesi))]

  lbl <- if (length(aesi) == 1 && !is.null(scope)) {
    scope <- sas_na(scope)
    scope <- unique(scope)[!is.na(unique(scope))]
    checkmate::assert_string(scope)
    paste0(aesi, " (", scope, ")")
  } else if (length(aesi) == 1 && is.null(scope)) {
    aesi
  } else {
    aesi_label
  }

  lbl
}

#' Indicate Study Arm Variable in Formula
#'
#' We use `study_arm` to indicate the study arm variable in `tern` formulas.
#'
#' @param x arm information
#'
#' @return `x`
#'
#' @keywords internal
study_arm <- function(x) {
  structure(x, varname = deparse(substitute(x)))
}

#' Smooth Function with Optional Grouping
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This produces `loess` smoothed estimates of `y` with Student confidence intervals.
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param x (`character`)\cr value with x column name.
#' @param y (`character`)\cr value with y column name.
#' @param groups (`character`)\cr vector with optional grouping variables names.
#' @param level (`numeric`)\cr level of confidence interval to use (0.95 by default).
#'
#' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and
#'   optional `groups` variables formatted as `factor` type.
#'
#' @export
get_smooths <- function(df, x, y, groups = NULL, level = 0.95) {
  checkmate::assert_data_frame(df)
  df_cols <- colnames(df)
  checkmate::assert_string(x)
  checkmate::assert_subset(x, df_cols)
  checkmate::assert_numeric(df[[x]])
  checkmate::assert_string(y)
  checkmate::assert_subset(y, df_cols)
  checkmate::assert_numeric(df[[y]])

  if (!is.null(groups)) {
    checkmate::assert_character(groups)
    checkmate::assert_subset(groups, df_cols)
  }

  smooths <- function(x, y) {
    stats::predict(stats::loess(y ~ x), se = TRUE)
  }

  if (!is.null(groups)) {
    cc <- stats::complete.cases(df[c(x, y, groups)])
    df_c <- df[cc, c(x, y, groups)]
    df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE]
    df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups]))

    df_smooth_raw <-
      by(df_c_ordered, df_c_g, function(d) {
        plx <- smooths(d[[x]], d[[y]])
        data.frame(
          x = d[[x]],
          y = plx$fit,
          ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
          yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
        )
      })

    df_smooth <- do.call(rbind, df_smooth_raw)
    df_smooth[groups] <- df_c_g

    df_smooth
  } else {
    cc <- stats::complete.cases(df[c(x, y)])
    df_c <- df[cc, ]
    plx <- smooths(df_c[[x]], df_c[[y]])

    df_smooth <- data.frame(
      x = df_c[[x]],
      y = plx$fit,
      ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
      yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
    )

    df_smooth
  }
}

#' Number of Available (Non-Missing Entries) in a Vector
#'
#' Small utility function for better readability.
#'
#' @param x (`any`)\cr vector in which to count non-missing values.
#'
#' @return Number of non-missing values.
#'
#' @keywords internal
n_available <- function(x) {
  sum(!is.na(x))
}

#' Reapply Variable Labels
#'
#' This is a helper function that is used in tests.
#'
#' @param x (`vector`)\cr vector of elements that needs new labels.
#' @param varlabels (`character`)\cr vector of labels for `x`.
#' @param ... further parameters to be added to the list.
#'
#' @return `x` with variable labels reapplied.
#'
#' @export
reapply_varlabels <- function(x, varlabels, ...) {
  named_labels <- c(as.list(varlabels), list(...))
  formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels)
  x
}

# Wrapper function of survival::clogit so that when model fitting failed, a more useful message would show
clogit_with_tryCatch <- function(formula, data, ...) { # nolint
  tryCatch(
    survival::clogit(formula = formula, data = data, ...),
    error = function(e) stop("model not built successfully with survival::clogit")
  )
}

#' Control Function for Descriptive Statistics
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify
#' details for [s_summary()].
#'
#' @inheritParams argument_convention
#' @param quantiles (`numeric`)\cr of length two to specify the quantiles to calculate.
#' @param quantile_type (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used.
#'   Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`.
#'   This differs from R's default. See more about `type` in [stats::quantile()].
#' @param test_mean (`numeric`)\cr to test against the mean under the null hypothesis when calculating p-value.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @export
control_summarize_vars <- function(conf_level = 0.95,
                                   quantiles = c(0.25, 0.75),
                                   quantile_type = 2,
                                   test_mean = 0) {
  checkmate::assert_vector(quantiles, len = 2)
  checkmate::assert_int(quantile_type, lower = 1, upper = 9)
  checkmate::assert_numeric(test_mean)
  nullo <- lapply(quantiles, assert_proportion_value)
  assert_proportion_value(conf_level)
  list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean)
}

#' Format Function for Descriptive Statistics
#'
#' Returns format patterns for descriptive statistics. The format is understood by the `rtables`.
#'
#' @param type (`string`)\cr choice of a summary data type. Only `counts` and `numeric` types are currently supported.
#'
#' @return A named `vector` of default statistic formats for the given data type.
#'
#' @keywords internal
summary_formats <- function(type = "numeric") {
  if (type == "counts") {
    c(
      n = "xx.",
      count = "xx.",
      count_fraction = format_count_fraction,
      n_blq = "xx."
    )
  } else {
    c(
      n = "xx.",
      sum = "xx.x",
      mean = "xx.x",
      sd = "xx.x",
      se = "xx.x",
      mean_sd = "xx.x (xx.x)",
      mean_se = "xx.x (xx.x)",
      mean_ci = "(xx.xx, xx.xx)",
      mean_sei = "(xx.xx, xx.xx)",
      mean_sdi = "(xx.xx, xx.xx)",
      mean_pval = "xx.xx",
      median = "xx.x",
      mad = "xx.x",
      median_ci = "(xx.xx, xx.xx)",
      quantiles = "xx.x - xx.x",
      iqr = "xx.x",
      range = "xx.x - xx.x",
      cv = "xx.x",
      min = "xx.x",
      max = "xx.x",
      median_range = "xx.x (xx.x - xx.x)",
      geom_mean = "xx.x",
      geom_cv = "xx.x"
    )
  }
}

#' Label Function for Descriptive Statistics
#'
#' Returns labels of descriptive statistics for numeric variables.
#'
#' @return A named `vector` of default statistic labels.
#'
#' @keywords internal
summary_labels <- function() {
  c(
    mean = "Mean",
    sum = "Sum",
    sd = "SD",
    se = "SE",
    mean_sd = "Mean (SD)",
    mean_se = "Mean (SE)",
    median = "Median",
    mad = "Median Absolute Deviation",
    iqr = "IQR",
    range = "Min - Max",
    median_range = "Median (Min - Max)",
    cv = "CV (%)",
    min = "Minimum",
    max = "Maximum",
    geom_mean = "Geometric Mean",
    geom_cv = "CV % Geometric Mean",
    n = "n"
  )
}

#' Summarize Variables
#'
#' @description `r lifecycle::badge("stable")`
#'
#' We use the S3 generic function [s_summary()] to implement summaries for different `x` objects. This
#' is used as a statistics function in combination with the analyze function [summarize_vars()].
#'
#' @inheritParams argument_convention
#'
#' @name summarize_variables
NULL

#' @describeIn summarize_variables S3 generic function to produces a variable summary.
#'
#' @return
#' * `s_summary()` returns different statistics depending on the class of `x`.
#'
#' @export
s_summary <- function(x,
                      na.rm = TRUE, # nolint
                      denom,
                      .N_row, # nolint
                      .N_col, # nolint
                      .var,
                      ...) {
  checkmate::assert_flag(na.rm)
  UseMethod("s_summary", x)
}

#' @describeIn summarize_variables Method for `numeric` class.
#'
#' @param control (`list`)\cr parameters for descriptive statistics details, specified by using
#'   the helper function [control_summarize_vars()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for mean and median.
#'   * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles.
#'   * `quantile_type` (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used.
#'     See more about `type` in [stats::quantile()].
#'   * `test_mean` (`numeric`)\cr value to test against the mean under the null hypothesis when calculating p-value.
#'
#' @return
#'   * If `x` is of class `numeric`, returns a `list` with the following named `numeric` items:
#'     * `n`: The [length()] of `x`.
#'     * `sum`: The [sum()] of `x`.
#'     * `mean`: The [mean()] of `x`.
#'     * `sd`: The [stats::sd()] of `x`.
#'     * `se`: The standard error of `x` mean, i.e.: (`sd(x) / sqrt(length(x))`).
#'     * `mean_sd`: The [mean()] and [stats::sd()] of `x`.
#'     * `mean_se`: The [mean()] of `x` and its standard error (see above).
#'     * `mean_ci`: The CI for the mean of `x` (from [stat_mean_ci()]).
#'     * `mean_sei`: The SE interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()] / [sqrt()]).
#'     * `mean_sdi`: The SD interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()]).
#'     * `mean_pval`: The two-sided p-value of the mean of `x` (from [stat_mean_pval()]).
#'     * `median`: The [stats::median()] of `x`.
#'     * `mad`: The median absolute deviation of `x`, i.e.: ([stats::median()] of `xc`,
#'       where `xc` = `x` - [stats::median()]).
#'     * `median_ci`: The CI for the median of `x` (from [stat_median_ci()]).
#'     * `quantiles`: Two sample quantiles of `x` (from [stats::quantile()]).
#'     * `iqr`: The [stats::IQR()] of `x`.
#'     * `range`: The [range_noinf()] of `x`.
#'     * `min`: The [max()] of `x`.
#'     * `max`: The [min()] of `x`.
#'     * `median_range`: The [median()] and [range_noinf()] of `x`.
#'     * `cv`: The coefficient of variation of `x`, i.e.: ([stats::sd()] / [mean()] * 100).
#'     * `geom_mean`: The geometric mean of `x`, i.e.: (`exp(mean(log(x)))`).
#'     * `geom_cv`: The geometric coefficient of variation of `x`, i.e.: (`sqrt(exp(sd(log(x)) ^ 2) - 1) * 100`).
#'
#' @note
#' * If `x` is an empty vector, `NA` is returned. This is the expected feature so as to return `rcell` content in
#'   `rtables` when the intersection of a column and a row delimits an empty data selection.
#' * When the `mean` function is applied to an empty vector, `NA` will be returned instead of `NaN`, the latter
#'   being standard behavior in R.
#'
#' @method s_summary numeric
#'
#' @examples
#' # `s_summary.numeric`
#'
#' ## Basic usage: empty numeric returns NA-filled items.
#' s_summary(numeric())
#'
#' ## Management of NA values.
#' x <- c(NA_real_, 1)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' x <- c(NA_real_, 1, 2)
#' s_summary(x, stats = NULL)
#'
#' ## Benefits in `rtables` contructions:
#' require(rtables)
#' dta_test <- data.frame(
#'   Group = rep(LETTERS[1:3], each = 2),
#'   sub_group = rep(letters[1:2], each = 3),
#'   x = 1:6
#' )
#'
#' ## The summary obtained in with `rtables`:
#' basic_table() %>%
#'   split_cols_by(var = "Group") %>%
#'   split_rows_by(var = "sub_group") %>%
#'   analyze(vars = "x", afun = s_summary) %>%
#'   build_table(df = dta_test)
#'
#' ## By comparison with `lapply`:
#' X <- split(dta_test, f = with(dta_test, interaction(Group, sub_group)))
#' lapply(X, function(x) s_summary(x$x))
#'
#' @export
s_summary.numeric <- function(x,
                              na.rm = TRUE, # nolint
                              denom,
                              .N_row, # nolint
                              .N_col, # nolint
                              .var,
                              control = control_summarize_vars(),
                              ...) {
  checkmate::assert_numeric(x)

  if (na.rm) {
    x <- x[!is.na(x)]
  }

  y <- list()

  y$n <- c("n" = length(x))

  y$sum <- c("sum" = ifelse(length(x) == 0, NA_real_, sum(x, na.rm = FALSE)))

  y$mean <- c("mean" = ifelse(length(x) == 0, NA_real_, mean(x, na.rm = FALSE)))

  y$sd <- c("sd" = stats::sd(x, na.rm = FALSE))

  y$se <- c("se" = stats::sd(x, na.rm = FALSE) / sqrt(length(stats::na.omit(x))))

  y$mean_sd <- c(y$mean, "sd" = stats::sd(x, na.rm = FALSE))

  y$mean_se <- c(y$mean, y$se)

  mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
  y$mean_ci <- formatters::with_label(mean_ci, paste("Mean", f_conf_level(control$conf_level)))

  mean_sei <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) / sqrt(y$n)
  names(mean_sei) <- c("mean_sei_lwr", "mean_sei_upr")
  y$mean_sei <- formatters::with_label(mean_sei, "Mean -/+ 1xSE")

  mean_sdi <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE)
  names(mean_sdi) <- c("mean_sdi_lwr", "mean_sdi_upr")
  y$mean_sdi <- formatters::with_label(mean_sdi, "Mean -/+ 1xSD")

  mean_pval <- stat_mean_pval(x, test_mean = control$test_mean, na.rm = FALSE, n_min = 2)
  y$mean_pval <- formatters::with_label(mean_pval, paste("Mean", f_pval(control$test_mean)))

  y$median <- c("median" = stats::median(x, na.rm = FALSE))

  y$mad <- c("mad" = stats::median(x - y$median, na.rm = FALSE))

  median_ci <- stat_median_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
  y$median_ci <- formatters::with_label(median_ci, paste("Median", f_conf_level(control$conf_level)))

  q <- control$quantiles
  if (any(is.na(x))) {
    qnts <- rep(NA_real_, length(q))
  } else {
    qnts <- stats::quantile(x, probs = q, type = control$quantile_type, na.rm = FALSE)
  }
  names(qnts) <- paste("quantile", q, sep = "_")
  y$quantiles <- formatters::with_label(qnts, paste0(paste(paste0(q * 100, "%"), collapse = " and "), "-ile"))

  y$iqr <- c("iqr" = ifelse(
    any(is.na(x)),
    NA_real_,
    stats::IQR(x, na.rm = FALSE, type = control$quantile_type)
  ))

  y$range <- stats::setNames(range_noinf(x, na.rm = FALSE), c("min", "max"))
  y$min <- y$range[1]
  y$max <- y$range[2]

  y$median_range <- formatters::with_label(c(y$median, y$range), "Median (Min - Max)")

  y$cv <- c("cv" = unname(y$sd) / unname(y$mean) * 100)

  # Convert negative values to NA for log calculation.
  x_no_negative_vals <- x
  x_no_negative_vals[x_no_negative_vals <= 0] <- NA
  y$geom_mean <- c("geom_mean" = exp(mean(log(x_no_negative_vals), na.rm = FALSE)))
  geom_mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE, geom_mean = TRUE)
  y$geom_mean_ci <- formatters::with_label(geom_mean_ci, paste("Geometric Mean", f_conf_level(control$conf_level)))

  y$geom_cv <- c("geom_cv" = sqrt(exp(stats::sd(log(x_no_negative_vals), na.rm = FALSE) ^ 2) - 1) * 100) # styler: off

  y
}

#' @describeIn summarize_variables Method for `factor` class.
#'
#' @param denom (`string`)\cr choice of denominator for factor proportions. Options are:
#'   * `n`: number of values in this row and column intersection.
#'   * `N_row`: total number of values in this row across columns.
#'   * `N_col`: total number of values in this column across rows.
#'
#' @return
#'   * If `x` is of class `factor` or converted from `character`, returns a `list` with named `numeric` items:
#'     * `n`: The [length()] of `x`.
#'     * `count`: A list with the number of cases for each level of the factor `x`.
#'     * `count_fraction`: Similar to `count` but also includes the proportion of cases for each level of the
#'       factor `x` relative to the denominator, or `NA` if the denominator is zero.
#'
#' @note
#' * If `x` is an empty `factor`, a list is still returned for `counts` with one element
#'   per factor level. If there are no levels in `x`, the function fails.
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
#'   set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
#'   factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the
#'   default `na_level` (`"<Missing>"`) will also be excluded when `na.rm` is set to `TRUE`.
#'
#' @method s_summary factor
#'
#' @examples
#' # `s_summary.factor`
#'
#' ## Basic usage:
#' s_summary(factor(c("a", "a", "b", "c", "a")))
#' # Empty factor returns NA-filled items.
#' s_summary(factor(levels = c("a", "b", "c")))
#'
#' ## Management of NA values.
#' x <- factor(c(NA, "Female"))
#' x <- explicit_na(x)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' ## Different denominators.
#' x <- factor(c("a", "a", "b", "c", "a"))
#' s_summary(x, denom = "N_row", .N_row = 10L)
#' s_summary(x, denom = "N_col", .N_col = 20L)
#'
#' @export
s_summary.factor <- function(x,
                             na.rm = TRUE, # nolint
                             denom = c("n", "N_row", "N_col"),
                             .N_row, # nolint
                             .N_col, # nolint
                             ...) {
  assert_valid_factor(x)
  denom <- match.arg(denom)

  if (na.rm) {
    x <- x[!is.na(x)] %>% fct_discard("<Missing>")
  } else {
    x <- x %>% explicit_na(label = "NA")
  }

  y <- list()

  y$n <- length(x)

  y$count <- as.list(table(x, useNA = "ifany"))
  dn <- switch(denom,
    n = length(x),
    N_row = .N_row,
    N_col = .N_col
  )
  y$count_fraction <- lapply(
    y$count,
    function(x) {
      c(x, ifelse(dn > 0, x / dn, 0))
    }
  )

  y$n_blq <- sum(grepl("BLQ|LTR|<[1-9]", x))

  y
}

#' @describeIn summarize_variables Method for `character` class. This makes an automatic
#'   conversion to factor (with a warning) and then forwards to the method for factors.
#'
#' @param verbose (`logical`)\cr Defaults to `TRUE`, which prints out warnings and messages. It is mainly used
#'   to print out information about factor casting.
#'
#' @note
#' * Automatic conversion of character to factor does not guarantee that the table
#'   can be generated correctly. In particular for sparse tables this very likely can fail.
#'   It is therefore better to always pre-process the dataset such that factors are manually
#'   created from character variables before passing the dataset to [rtables::build_table()].
#'
#' @method s_summary character
#'
#' @examples
#' # `s_summary.character`
#'
#' ## Basic usage:
#' s_summary(c("a", "a", "b", "c", "a"), .var = "x", verbose = FALSE)
#' s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na.rm = FALSE, verbose = FALSE)
#'
#' @export
s_summary.character <- function(x,
                                na.rm = TRUE, # nolint
                                denom = c("n", "N_row", "N_col"),
                                .N_row, # nolint
                                .N_col, # nolint
                                .var,
                                verbose = TRUE,
                                ...) {
  if (na.rm) {
    y <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose)
  } else {
    y <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose, na_level = "NA")
  }

  s_summary(
    x = y,
    na.rm = na.rm,
    denom = denom,
    .N_row = .N_row,
    .N_col = .N_col,
    ...
  )
}

#' @describeIn summarize_variables Method for `logical` class.
#'
#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
#'   * `n`: number of values in this row and column intersection.
#'   * `N_row`: total number of values in this row across columns.
#'   * `N_col`: total number of values in this column across rows.
#'
#' @return
#'   * If `x` is of class `logical`, returns a `list` with named `numeric` items:
#'     * `n`: The [length()] of `x` (possibly after removing `NA`s).
#'     * `count`: Count of `TRUE` in `x`.
#'     * `count_fraction`: Count and proportion of `TRUE` in `x` relative to the denominator, or `NA` if the
#'       denominator is zero. Note that `NA`s in `x` are never counted or leading to `NA` here.
#'
#' @method s_summary logical
#'
#' @examples
#' # `s_summary.logical`
#'
#' ## Basic usage:
#' s_summary(c(TRUE, FALSE, TRUE, TRUE))
#'
#' ## Management of NA values.
#' x <- c(NA, TRUE, FALSE)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' ## Different denominators.
#' x <- c(TRUE, FALSE, TRUE, TRUE)
#' s_summary(x, denom = "N_row", .N_row = 10L)
#' s_summary(x, denom = "N_col", .N_col = 20L)
#'
#' @export
s_summary.logical <- function(x,
                              na.rm = TRUE, # nolint
                              denom = c("n", "N_row", "N_col"),
                              .N_row, # nolint
                              .N_col, # nolint
                              ...) {
  denom <- match.arg(denom)
  if (na.rm) x <- x[!is.na(x)]
  y <- list()
  y$n <- length(x)
  count <- sum(x, na.rm = TRUE)
  dn <- switch(denom,
    n = length(x),
    N_row = .N_row,
    N_col = .N_col
  )
  y$count <- count
  y$count_fraction <- c(count, ifelse(dn > 0, count / dn, NA))
  y$n_blq <- 0L
  y
}

#' @describeIn summarize_variables Formatted analysis function which is used as `afun` in `summarize_vars()`.
#'
#' @return
#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_summary <- function(x,
                      ...,
                      .N_row, # nolint
                      .N_col, # nolint
                      .var) {
  UseMethod("a_summary", x)
}

.a_summary_numeric_formats <- summary_formats()
.a_summary_numeric_labels <- summary_labels()

#' @describeIn summarize_variables Formatted analysis function method for `numeric` class.
#'
#' @examples
#' # `a_summary.numeric`
#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
#'
#' @export
a_summary.numeric <- make_afun(
  s_summary.numeric,
  .formats = .a_summary_numeric_formats,
  .labels = .a_summary_numeric_labels
)

.a_summary_counts_formats <- summary_formats(type = "counts")

#' @describeIn summarize_variables Formatted analysis function method for `factor` class.
#'
#' @examples
#' # `a_summary.factor`
#' # We need to ungroup `count` and `count_fraction` first so that the rtables formatting
#' # functions can be applied correctly.
#' afun <- make_afun(
#'   getS3method("a_summary", "factor"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' afun(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
#'
#' @export
a_summary.factor <- make_afun(
  s_summary.factor,
  .formats = .a_summary_counts_formats
)

#' @describeIn summarize_variables Formatted analysis function method for `character` class.
#'
#' @examples
#' # `a_summary.character`
#' afun <- make_afun(
#'   getS3method("a_summary", "character"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' afun(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
#'
#' @export
a_summary.character <- make_afun(
  s_summary.character,
  .formats = .a_summary_counts_formats
)

#' @describeIn summarize_variables Formatted analysis function method for `logical` class.
#'
#' @examples
#' # `a_summary.logical`
#' afun <- make_afun(
#'   getS3method("a_summary", "logical")
#' )
#' afun(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
#'
#' @export
a_summary.logical <- make_afun(
  s_summary.logical,
  .formats = .a_summary_counts_formats
)

#' Constructor Function for [summarize_vars()] and [summarize_colvars()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor function which creates a combined formatted analysis function.
#'
#' @inheritParams argument_convention
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return Combined formatted analysis function for use in [summarize_vars()].
#'
#' @note Since [a_summary()] is generic and we want customization of the formatting arguments
#'   via [rtables::make_afun()], we need to create another temporary generic function, with
#'   corresponding customized methods. Then in order for the methods to be found,
#'   we need to wrap them in a combined `afun`. Since this is required by two layout creating
#'   functions (and possibly others in the future), we provide a constructor that does this:
#'   [create_afun_summary()].
#'
#' @examples
#' # `create_afun_summary()` to create combined `afun`
#'
#' afun <- create_afun_summary(
#'   .stats = NULL,
#'   .formats = c(median = "xx."),
#'   .labels = c(median = "My median"),
#'   .indent_mods = c(median = 1L)
#' )
#' ## Fabricated dataset.
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT  = rep(paste0("V", 1:3), 6),
#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL    = c(9:1, rep(NA, 9))
#' )
#'
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze(vars = "AVAL", afun = afun)
#'
#' build_table(l, df = dta_test)
#'
#' @export
create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
  function(x,
           ...,
           .N_row, # nolint
           .N_col, # nolint
           .var) {
    afun <- function(x, ...) {
      UseMethod("afun", x)
    }

    numeric_stats <- afun_selected_stats(
      .stats,
      all_stats = names(.a_summary_numeric_formats)
    )
    afun.numeric <- make_afun( # nolint
      a_summary.numeric,
      .stats = numeric_stats,
      .formats = extract_by_name(.formats, numeric_stats),
      .labels = extract_by_name(.labels, numeric_stats),
      .indent_mods = extract_by_name(.indent_mods, numeric_stats)
    )

    factor_stats <- afun_selected_stats(.stats, c("n", "count", "count_fraction"))
    ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction"))
    afun.factor <- make_afun( # nolint
      a_summary.factor,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats
    )

    afun.character <- make_afun( # nolint
      a_summary.character,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats
    )

    afun.logical <- make_afun( # nolint
      a_summary.logical,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats)
    )

    afun(
      x = x,
      ...,
      .N_row = .N_row,
      .N_col = .N_col,
      .var = .var
    )
  }
}

#' @describeIn summarize_variables Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_summary()`.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `summarize_vars()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_summary()` to the table layout.
#'
#' @examples
#' ## Fabricated dataset.
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT  = rep(paste0("V", 1:3), 6),
#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL    = c(9:1, rep(NA, 9))
#' )
#'
#' # `summarize_vars()` in `rtables` pipelines
#' ## Default output within a `rtables` pipeline.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   summarize_vars(vars = "AVAL")
#'
#' build_table(l, df = dta_test)
#'
#' ## Select and format statistics output.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   summarize_vars(
#'     vars = "AVAL",
#'     .stats = c("n", "mean_sd", "quantiles"),
#'     .formats = c("mean_sd" = "xx.x, xx.x"),
#'     .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3"))
#'   )
#'
#' results <- build_table(l, df = dta_test)
#' as_html(results)
#'
#' ## Use arguments interpreted by `s_summary`.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   summarize_vars(vars = "AVAL", na.rm = FALSE)
#'
#' results <- build_table(l, df = dta_test)
#'
#' ## Handle `NA` levels first when summarizing factors.
#' dta_test$AVISIT <- NA_character_
#' dta_test <- df_explicit_na(dta_test)
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   summarize_vars(vars = "AVISIT", na.rm = FALSE)
#'
#' results <- build_table(l, df = dta_test)
#' \donttest{
#' Viewer(results)
#' }
#'
#' @export
summarize_vars <- function(lyt,
                           vars,
                           var_labels = vars,
                           nested = TRUE,
                           ...,
                           na_level = NA_character_,
                           show_labels = "default",
                           table_names = vars,
                           section_div = NA_character_,
                           .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods)

  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = afun,
    nested = nested,
    extra_args = list(...),
    na_str = na_level,
    inclNAs = TRUE,
    show_labels = show_labels,
    table_names = table_names,
    section_div = section_div
  )
}

#' Summary numeric variables in columns
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Layout-creating function which can be used for creating column-wise summary tables.
#' This function sets the analysis methods as column labels and is a wrapper for
#' [rtables::analyze_colvars()]. It was designed principally for PK tables.
#'
#' @inheritParams argument_convention
#' @inheritParams rtables::analyze_colvars
#' @param row_labels (`character`)\cr as this function works in columns space, usual `.labels`
#'   character vector applies on the column space. You can change the row labels by defining this
#'   parameter to a named character vector with names corresponding to the split values. It defaults
#'   to `NULL` and if it contains only one `string`, it will duplicate that as a row label.
#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current
#'   label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr`
#'   to define row labels. This behavior is not supported as we never need to overload row labels.
#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns.
#'   This option allows you to add multiple instances of this functions, also in a nested fashion,
#'   without adding more splits. This split must happen only one time on a single layout.
#'
#' @return
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
#' in columns, and add it to the table layout.
#'
#' @note This is an experimental implementation of [rtables::summarize_row_groups()] and
#'   [rtables::analyze_colvars()] that may be subjected to changes as `rtables` extends its
#'   support to more complex analysis pipelines on the column space. For the same reasons,
#'   we encourage to read the examples carefully and file issues for cases that differ from
#'   them.
#'
#'   Here `labelstr` behaves differently than usual. If it is not defined (default as `NULL`),
#'   row labels are assigned automatically to the split values in case of `rtables::analyze_colvars`
#'   (`do_summarize_row_groups = FALSE`, the default), and to the group label for
#'   `do_summarize_row_groups = TRUE`.
#'
#' @seealso [summarize_vars()], [rtables::analyze_colvars()].
#'
#' @examples
#' library(dplyr)
#'
#' # Data preparation
#' adpp <- tern_ex_adpp %>% h_pkparam_sort()
#'
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "STRATA1", label_pos = "topleft") %>%
#'   split_rows_by(
#'     var = "SEX",
#'     label_pos = "topleft",
#'     child_label = "hidden"
#'   ) %>% # Removes duplicated labels
#'   analyze_vars_in_cols(vars = "AGE")
#' result <- build_table(lyt = lyt, df = adpp)
#' result
#'
#' # By selecting just some statistics and ad-hoc labels
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "ARM", label_pos = "topleft") %>%
#'   split_rows_by(
#'     var = "SEX",
#'     label_pos = "topleft",
#'     child_labels = "hidden",
#'     split_fun = drop_split_levels
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AGE",
#'     .stats = c("n", "cv", "geom_mean"),
#'     .labels = c(
#'       n = "aN",
#'       cv = "aCV",
#'       geom_mean = "aGeomMean"
#'     )
#'   )
#' result <- build_table(lyt = lyt, df = adpp)
#' result
#'
#' # Changing row labels
#' lyt <- basic_table() %>%
#'   analyze_vars_in_cols(
#'     vars = "AGE",
#'     row_labels = "some custom label"
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' # Pharmacokinetic parameters
#' lyt <- basic_table() %>%
#'   split_rows_by(
#'     var = "TLG_DISPLAY",
#'     split_label = "PK Parameter",
#'     label_pos = "topleft",
#'     child_label = "hidden"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL"
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' # Multiple calls (summarize label and analyze underneath)
#' lyt <- basic_table() %>%
#'   split_rows_by(
#'     var = "TLG_DISPLAY",
#'     split_label = "PK Parameter",
#'     label_pos = "topleft"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL",
#'     do_summarize_row_groups = TRUE # does a summarize level
#'   ) %>%
#'   split_rows_by("SEX",
#'     child_label = "hidden",
#'     label_pos = "topleft"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL",
#'     split_col_vars = FALSE # avoids re-splitting the columns
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' @export
analyze_vars_in_cols <- function(lyt,
                                 vars,
                                 ...,
                                 .stats = c(
                                   "n",
                                   "mean",
                                   "sd",
                                   "se",
                                   "cv",
                                   "geom_cv"
                                 ),
                                 .labels = c(
                                   n = "n",
                                   mean = "Mean",
                                   sd = "SD",
                                   se = "SE",
                                   cv = "CV (%)",
                                   geom_cv = "CV % Geometric Mean"
                                 ),
                                 row_labels = NULL,
                                 do_summarize_row_groups = FALSE,
                                 split_col_vars = TRUE,
                                 .indent_mods = NULL,
                                 nested = TRUE,
                                 na_level = NULL,
                                 .formats = NULL) {
  checkmate::assert_string(na_level, null.ok = TRUE)
  checkmate::assert_character(row_labels, null.ok = TRUE)
  checkmate::assert_int(.indent_mods, null.ok = TRUE)
  checkmate::assert_flag(nested)
  checkmate::assert_flag(split_col_vars)
  checkmate::assert_flag(do_summarize_row_groups)

  # Automatic assignment of formats
  if (is.null(.formats)) {
    # General values
    sf_numeric <- summary_formats("numeric")
    sf_counts <- summary_formats("counts")[-1]
    formats_v <- c(sf_numeric, sf_counts)
  } else {
    formats_v <- .formats
  }

  # Check for vars in the case that one or more are used
  if (length(vars) == 1) {
    vars <- rep(vars, length(.stats))
  } else if (length(vars) != length(.stats)) {
    stop(
      "Analyzed variables (vars) does not have the same ",
      "number of elements of specified statistics (.stats)."
    )
  }

  if (split_col_vars) {
    # Checking there is not a previous identical column split
    clyt <- tail(clayout(lyt), 1)[[1]]

    dummy_lyt <- split_cols_by_multivar(
      lyt = basic_table(),
      vars = vars,
      varlabels = .labels[.stats]
    )

    if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) {
      stop(
        "Column split called again with the same values. ",
        "This can create many unwanted columns. Please consider adding ",
        "split_col_vars = FALSE to the last call of ",
        deparse(sys.calls()[[sys.nframe() - 1]]), "."
      )
    }

    # Main col split
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = vars,
      varlabels = .labels[.stats]
    )
  }

  if (do_summarize_row_groups) {
    if (length(unique(vars)) > 1) {
      stop("When using do_summarize_row_groups only one label level var should be inserted.")
    }

    # Function list for do_summarize_row_groups. Slightly different handling of labels
    cfun_list <- Map(
      function(stat) {
        function(u, .spl_context, labelstr, ...) {
          # Statistic
          res <- s_summary(u, ...)[[stat]]

          # Label check and replacement
          if (length(row_labels) > 1) {
            if (!(labelstr %in% names(row_labels))) {
              stop(
                "Replacing the labels in do_summarize_row_groups needs a named vector",
                "that contains the split values. In the current split variable ",
                .spl_context$split[nrow(.spl_context)],
                " the labelstr value (split value by default) ", labelstr, " is not in",
                " row_labels names: ", names(row_labels)
              )
            }
            lbl <- unlist(row_labels[labelstr])
          } else {
            lbl <- labelstr
          }

          # Cell creation
          rcell(res,
            label = lbl,
            format = formats_v[names(formats_v) == stat][[1]],
            format_na_str = na_level,
            indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
          )
        }
      },
      stat = .stats
    )

    # Main call to rtables
    summarize_row_groups(
      lyt = lyt,
      var = unique(vars),
      cfun = cfun_list,
      extra_args = list(...)
    )
  } else {
    # Function list for analyze_colvars
    afun_list <- Map(
      function(stat) {
        function(u, .spl_context, ...) {
          # Main statistics
          res <- s_summary(u, ...)[[stat]]

          # Label from context
          label_from_context <- .spl_context$value[nrow(.spl_context)]

          # Label switcher
          if (is.null(row_labels)) {
            lbl <- label_from_context
          } else {
            if (length(row_labels) > 1) {
              if (!(label_from_context %in% names(row_labels))) {
                stop(
                  "Replacing the labels in do_summarize_row_groups needs a named vector",
                  "that contains the split values. In the current split variable ",
                  .spl_context$split[nrow(.spl_context)],
                  " the split value ", label_from_context, " is not in",
                  " row_labels names: ", names(row_labels)
                )
              }
              lbl <- unlist(row_labels[label_from_context])
            } else {
              lbl <- row_labels
            }
          }

          # Cell creation
          rcell(res,
            label = lbl,
            format = formats_v[names(formats_v) == stat][[1]],
            format_na_str = na_level,
            indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
          )
        }
      },
      stat = .stats
    )

    # Main call to rtables
    analyze_colvars(lyt,
      afun = afun_list,
      nested = nested,
      extra_args = list(...)
    )
  }
}

# Help function
get_last_col_split <- function(lyt) {
  tail(tail(clayout(lyt), 1)[[1]], 1)[[1]]
}

#' Cox Regression Helper: Interactions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Test and estimate the effect of a treatment in interaction with a covariate.
#' The effect is estimated as the HR of the tested treatment for a given level
#' of the covariate, in comparison to the treatment control.
#'
#' @inheritParams argument_convention
#' @param x (`numeric` or `factor`)\cr the values of the covariate to be tested.
#' @param effect (`string`)\cr the name of the effect to be tested and estimated.
#' @param covar (`string`)\cr the name of the covariate in the model.
#' @param mod (`coxph`)\cr the Cox regression model.
#' @param label (`string`)\cr the label to be returned as `term_label`.
#' @param control (`list`)\cr a list of controls as returned by [control_coxreg()].
#' @param ... see methods.
#'
#' @examples
#' library(survival)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' # Testing dataset [survival::bladder].
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4,
#'       labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' plot(
#'   survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name cox_regression_inter
NULL

#' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect.
#'
#' @return
#' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following
#'   variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`.
#'
#' @export
h_coxreg_inter_effect <- function(x,
                                  effect,
                                  covar,
                                  mod,
                                  label,
                                  control,
                                  ...) {
  UseMethod("h_coxreg_inter_effect", x)
}

#' @describeIn cox_regression_inter Method for `numeric` class. Estimates the interaction with a `numeric` covariate.
#'
#' @method h_coxreg_inter_effect numeric
#'
#' @param at (`list`)\cr a list with items named after the covariate, every
#'   item is a vector of levels at which the interaction should be estimated.
#'
#' @export
h_coxreg_inter_effect.numeric <- function(x,
                                          effect,
                                          covar,
                                          mod,
                                          label,
                                          control,
                                          at,
                                          ...) {
  betas <- stats::coef(mod)
  attrs <- attr(stats::terms(mod), "term.labels")
  term_indices <- grep(
    pattern = effect,
    x = attrs[!grepl("strata\\(", attrs)]
  )
  checkmate::assert_vector(term_indices, len = 2)
  betas <- betas[term_indices]
  betas_var <- diag(stats::vcov(mod))[term_indices]
  betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]]
  xval <- if (is.null(at[[covar]])) {
    stats::median(x)
  } else {
    at[[covar]]
  }
  effect_index <- !grepl(covar, names(betas))
  coef_hat <- betas[effect_index] + xval * betas[!effect_index]
  coef_se <- sqrt(
    betas_var[effect_index] +
      xval ^ 2 * betas_var[!effect_index] + # styler: off
      2 * xval * betas_cov
  )
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  data.frame(
    effect = "Covariate:",
    term = rep(covar, length(xval)),
    term_label = paste0("  ", xval),
    level = as.character(xval),
    n = NA,
    hr = exp(coef_hat),
    lcl = exp(coef_hat - q_norm * coef_se),
    ucl = exp(coef_hat + q_norm * coef_se),
    pval = NA,
    pval_inter = NA,
    stringsAsFactors = FALSE
  )
}

#' @describeIn cox_regression_inter Method for `factor` class. Estimate the interaction with a `factor` covariate.
#'
#' @method h_coxreg_inter_effect factor
#'
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
#'
#' @export
h_coxreg_inter_effect.factor <- function(x,
                                         effect,
                                         covar,
                                         mod,
                                         label,
                                         control,
                                         data,
                                         ...) {
  lvl_given <- levels(x)
  y <- h_coxreg_inter_estimations(
    variable = effect, given = covar,
    lvl_var = levels(data[[effect]]),
    lvl_given = lvl_given,
    mod = mod,
    conf_level = 0.95
  )[[1]]

  data.frame(
    effect = "Covariate:",
    term = rep(covar, nrow(y)),
    term_label = paste0("  ", lvl_given),
    level = lvl_given,
    n = NA,
    hr = y[, "hr"],
    lcl = y[, "lcl"],
    ucl = y[, "ucl"],
    pval = NA,
    pval_inter = NA,
    stringsAsFactors = FALSE
  )
}

#' @describeIn cox_regression_inter Method for `character` class. Estimate the interaction with a `character` covariate.
#'   This makes an automatic conversion to `factor` and then forwards to the method for factors.
#'
#' @method h_coxreg_inter_effect character
#'
#' @note
#' * Automatic conversion of character to factor does not guarantee results can be generated correctly. It is
#'   therefore better to always pre-process the dataset such that factors are manually created from character
#'   variables before passing the dataset to [rtables::build_table()].
#'
#' @export
h_coxreg_inter_effect.character <- function(x,
                                            effect,
                                            covar,
                                            mod,
                                            label,
                                            control,
                                            data,
                                            ...) {
  y <- as.factor(x)

  h_coxreg_inter_effect(
    x = y,
    effect = effect,
    covar = covar,
    mod = mod,
    label = label,
    control = control,
    data = data,
    ...
  )
}

#' @describeIn cox_regression_inter A higher level function to get
#'   the results of the interaction test and the estimated values.
#'
#' @return
#' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If
#'   no interaction, [h_coxreg_univar_extract()] is applied instead.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
#' h_coxreg_extract_interaction(
#'   mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder,
#'   control = control_coxreg()
#' )
#'
#' @export
h_coxreg_extract_interaction <- function(effect,
                                         covar,
                                         mod,
                                         data,
                                         at,
                                         control) {
  if (!any(attr(stats::terms(mod), "order") == 2)) {
    y <- h_coxreg_univar_extract(
      effect = effect, covar = covar, mod = mod, data = data, control = control
    )
    y$pval_inter <- NA
    y
  } else {
    test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]

    # Test the main treatment effect.
    mod_aov <- muffled_car_anova(mod, test_statistic)
    sum_anova <- broom::tidy(mod_aov)
    pval <- sum_anova[sum_anova$term == effect, ][["p.value"]]

    # Test the interaction effect.
    pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]]
    covar_test <- data.frame(
      effect = "Covariate:",
      term = covar,
      term_label = unname(labels_or_names(data[covar])),
      level = "",
      n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval,
      pval_inter = pval_inter,
      stringsAsFactors = FALSE
    )
    # Estimate the interaction.
    y <- h_coxreg_inter_effect(
      data[[covar]],
      covar = covar,
      effect = effect,
      mod = mod,
      label = unname(labels_or_names(data[covar])),
      at = at,
      control = control,
      data = data
    )
    rbind(covar_test, y)
  }
}

#' @describeIn cox_regression_inter Hazard ratio estimation in interactions.
#'
#' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation
#'   of the levels of `variable` given the levels of `given`.
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels has given by [levels()].
#' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]).
#'
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
#'   and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex.
#'   The cox regression estimates the coefficients along with a variance-covariance matrix for:
#'
#'   - b1 (arm b), b2 (arm c)
#'   - b3 (sex m)
#'   - b4 (arm b: sex m), b5 (arm c: sex m)
#'
#'   The estimation of the Hazard Ratio for arm C/sex M is given in reference
#'   to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5).
#'   The interaction coefficient is deduced by b2 + b5 while the standard error
#'   is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$.
#'
#' @return
#' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding
#'   to the combinations of `variable` and `given`, with columns:
#'   * `coef_hat`: Estimation of the coefficient.
#'   * `coef_se`: Standard error of the estimation.
#'   * `hr`: Hazard ratio.
#'   * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
#' result <- h_coxreg_inter_estimations(
#'   variable = "armcd", given = "covar1",
#'   lvl_var = levels(dta_bladder$armcd),
#'   lvl_given = levels(dta_bladder$covar1),
#'   mod = mod, conf_level = .95
#' )
#' result
#'
#' @export
h_coxreg_inter_estimations <- function(variable,
                                       given,
                                       lvl_var,
                                       lvl_given,
                                       mod,
                                       conf_level = 0.95) {
  var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
  giv_lvl <- paste0(given, lvl_given)
  design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
  design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
  design_mat <- within(
    data = design_mat,
    expr = {
      inter <- paste0(variable, ":", given)
      rev_inter <- paste0(given, ":", variable)
    }
  )
  split_by_variable <- design_mat$variable
  interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")

  mmat <- stats::model.matrix(mod)[1, ]
  mmat[!mmat == 0] <- 0

  design_mat <- apply(
    X = design_mat, MARGIN = 1, FUN = function(x) {
      mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
      mmat
    }
  )
  colnames(design_mat) <- interaction_names

  coef <- stats::coef(mod)
  vcov <- stats::vcov(mod)
  betas <- as.matrix(coef)
  coef_hat <- t(design_mat) %*% betas
  dimnames(coef_hat)[2] <- "coef"
  coef_se <- apply(
    design_mat, 2,
    function(x) {
      vcov_el <- as.logical(x)
      y <- vcov[vcov_el, vcov_el]
      y <- sum(y)
      y <- sqrt(y)
      return(y)
    }
  )
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  y <- cbind(coef_hat, `se(coef)` = coef_se)
  y <- apply(y, 1, function(x) {
    x["hr"] <- exp(x["coef"])
    x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
    x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
    x
  })
  y <- t(y)
  y <- by(y, split_by_variable, identity)
  y <- lapply(y, as.matrix)
  attr(y, "details") <- paste0(
    "Estimations of ", variable,
    " hazard ratio given the level of ", given, " compared to ",
    variable, " level ", lvl_var[1], "."
  )
  y
}

#' Counting Patients and Events in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Counting the number of unique patients and the total number of all and specific events
#' when a column table layout is required.
#'
#' @inheritParams argument_convention
#'
#' @name count_patients_events_in_cols
NULL

#' @describeIn count_patients_events_in_cols Statistics function which counts numbers of patients and multiple
#'   events defined by filters. Used as analysis function `afun` in `summarize_patients_events_in_cols()`.
#'
#' @param filters_list (named `list` of `character`)\cr each element in this list describes one
#'   type of event describe by filters, in the same format as [s_count_patients_with_event()].
#'   If it has a label, then this will be used for the column title.
#' @param empty_stats (`character`)\cr optional names of the statistics that should be returned empty such
#'   that corresponding table cells will stay blank.
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will
#'   be used as label.
#'
#' @return
#' * `s_count_patients_and_multiple_events()` returns a list with the statistics:
#'   - `unique`: number of unique patients in `df`.
#'   - `all`: number of rows in `df`.
#'   - one element with the same name as in `filters_list`: number of rows in `df`,
#'     i.e. events, fulfilling the filter condition.
#'
#' @examples
#' # `s_count_patients_and_multiple_events()`
#' df <- data.frame(
#'   USUBJID = rep(c("id1", "id2", "id3", "id4"), c(2, 3, 1, 1)),
#'   ARM = c("A", "A", "B", "B", "B", "B", "A"),
#'   AESER = rep("Y", 7),
#'   AESDTH = c("Y", "Y", "N", "Y", "Y", "N", "N"),
#'   AEREL = c("Y", "Y", "N", "Y", "Y", "N", "Y"),
#'   AEDECOD = c("A", "A", "A", "B", "B", "C", "D"),
#'   AEBODSYS = rep(c("SOC1", "SOC2", "SOC3"), c(3, 3, 1))
#' )
#'
#' @keywords internal
s_count_patients_and_multiple_events <- function(df, # nolint
                                                 id,
                                                 filters_list,
                                                 empty_stats = character(),
                                                 labelstr = "",
                                                 custom_label = NULL) {
  checkmate::assert_list(filters_list, names = "named")
  checkmate::assert_data_frame(df)
  checkmate::assert_string(id)
  checkmate::assert_disjunct(c("unique", "all"), names(filters_list))
  checkmate::assert_character(empty_stats)
  checkmate::assert_string(labelstr)
  checkmate::assert_string(custom_label, null.ok = TRUE)

  # Below we want to count each row in `df` once, therefore introducing this helper index column.
  df$.row_index <- as.character(seq_len(nrow(df)))
  y <- list()
  row_label <- if (labelstr != "") {
    labelstr
  } else if (!is.null(custom_label)) {
    custom_label
  } else {
    "counts"
  }
  y$unique <- formatters::with_label(
    s_num_patients_content(df = df, .N_col = 1, .var = id, required = NULL)$unique[1L],
    row_label
  )
  y$all <- formatters::with_label(
    nrow(df),
    row_label
  )
  events <- Map(
    function(filters) {
      formatters::with_label(
        s_count_patients_with_event(df = df, .var = ".row_index", filters = filters, .N_col = 1, .N_row = 1)$count,
        row_label
      )
    },
    filters = filters_list
  )
  y_complete <- c(y, events)
  y <- if (length(empty_stats) > 0) {
    y_reduced <- y_complete
    for (stat in intersect(names(y_complete), empty_stats)) {
      y_reduced[[stat]] <- formatters::with_label(character(), obj_label(y_reduced[[stat]]))
    }
    y_reduced
  } else {
    y_complete
  }
  y
}

#' @describeIn count_patients_events_in_cols Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @param col_split (`flag`)\cr whether the columns should be split.
#'   Set to `FALSE` when the required column split has been done already earlier in the layout pipe.
#'
#' @return
#' * `summarize_patients_events_in_cols()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_patients_and_multiple_events()` to the table layout.
#' @examples
#' # `summarize_patients_events_in_cols()`
#' basic_table() %>%
#'   summarize_patients_events_in_cols(
#'     filters_list = list(
#'       related = formatters::with_label(c(AEREL = "Y"), "Events (Related)"),
#'       fatal = c(AESDTH = "Y"),
#'       fatal_related = c(AEREL = "Y", AESDTH = "Y")
#'     ),
#'     custom_label = "%s Total number of patients and events"
#'   ) %>%
#'   build_table(df)
#'
#' @export
summarize_patients_events_in_cols <- function(lyt, # nolint
                                              id = "USUBJID",
                                              filters_list = list(),
                                              ...,
                                              .stats = c(
                                                "unique",
                                                "all",
                                                names(filters_list)
                                              ),
                                              .labels = c(
                                                unique = "Patients (All)",
                                                all = "Events (All)",
                                                labels_or_names(filters_list)
                                              ),
                                              col_split = TRUE) {
  afun_list <- Map(
    function(stat) {
      make_afun(
        s_count_patients_and_multiple_events,
        id = id,
        filters_list = filters_list,
        .stats = stat,
        .formats = "xx."
      )
    },
    stat = .stats
  )
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(id, length(.stats)),
      varlabels = .labels[.stats]
    )
  }
  summarize_row_groups(
    lyt = lyt,
    cfun = afun_list,
    extra_args = list(...)
  )
}

#' Tabulate Survival Duration by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate statistics such as median survival time and hazard ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_coxph_pairwise
#' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
#' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a list, which
#'   specifies the new group levels via the names and the levels that belong to it in the character vectors that are
#'   elements of the list.
#' @param label_all (`string`)\cr label for the total population analysis.
#' @param time_unit (`string`)\cr label with unit of median survival time. Default `NULL` skips displaying unit.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. Tables typically used as part of forest plot.
#'
#' @seealso [extract_survival_subgroups()]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c(
#'   "ARM" = adtte_labels[["ARM"]],
#'   "SEX" = adtte_labels[["SEX"]],
#'   "AVALU" = adtte_labels[["AVALU"]],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' @name survival_duration_subgroups
NULL

#' Prepares Survival Data for Population Subgroups in Data Frames
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates of median survival times and treatment hazard ratios for population subgroups in
#' data frames. Simple wrapper for [h_survtime_subgroups_df()] and [h_coxph_subgroups_df()]. Result is a `list`
#' of two `data.frame`s: `survtime` and `hr`. `variables` corresponds to the names of variables found in `data`,
#' passed as a named `list` and requires elements `tte`, `is_event`, `arm` and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_duration_subgroups
#' @inheritParams survival_coxph_pairwise
#'
#' @return A named `list` of two elements:
#'   * `survtime`: A `data.frame` containing columns `arm`, `n`, `n_events`, `median`, `subgroup`, `var`,
#'     `var_label`, and `row_type`.
#'   * `hr`: A `data.frame` containing columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, `conf_level`,
#'     `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @seealso [survival_duration_subgroups]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c(
#'   "ARM" = adtte_labels[["ARM"]],
#'   "SEX" = adtte_labels[["SEX"]],
#'   "AVALU" = adtte_labels[["AVALU"]],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' df_grouped <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @export
extract_survival_subgroups <- function(variables,
                                       data,
                                       groups_lists = list(),
                                       control = control_coxph(),
                                       label_all = "All Patients") {
  df_survtime <- h_survtime_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    label_all = label_all
  )
  df_hr <- h_coxph_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    control = control,
    label_all = label_all
  )

  list(survtime = df_survtime, hr = df_hr)
}

#' @describeIn survival_duration_subgroups  Formatted analysis function which is used as
#'   `afun` in `tabulate_survival_subgroups()`.
#'
#' @return
#' * `a_survival_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_survival_subgroups <- function(.formats = list(
                                   n = "xx",
                                   n_events = "xx",
                                   n_tot_events = "xx",
                                   median = "xx.x",
                                   n_tot = "xx",
                                   hr = list(format_extreme_values(2L)),
                                   ci = list(format_extreme_values_ci(2L)),
                                   pval = "x.xxxx | (<0.0001)"
                                 )) {
  checkmate::assert_list(.formats)
  checkmate::assert_subset(
    names(.formats),
    c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
  )

  afun_lst <- Map(
    function(stat, fmt) {
      if (stat == "ci") {
        function(df, labelstr = "", ...) {
          in_rows(
            .list = combine_vectors(df$lcl, df$ucl),
            .labels = as.character(df$subgroup),
            .formats = fmt
          )
        }
      } else {
        function(df, labelstr = "", ...) {
          in_rows(
            .list = as.list(df[[stat]]),
            .labels = as.character(df$subgroup),
            .formats = fmt
          )
        }
      }
    },
    stat = names(.formats),
    fmt = .formats
  )

  afun_lst
}

#' @describeIn survival_duration_subgroups Table-creating function which creates a table
#'   summarizing survival by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
#'   and [rtables::summarize_row_groups()].
#'
#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
#'   created using [extract_survival_subgroups()].
#' @param vars (`character`)\cr the name of statistics to be reported among:
#'   * `n_tot_events`: Total number of events per group.
#'   * `n_events`: Number of events per group.
#'   * `n_tot`: Total number of observations per group.
#'   * `n`: Number of observations per group.
#'   * `median`: Median survival time.
#'   * `hr`: Hazard ratio.
#'   * `ci`: Confidence interval of hazard ratio.
#'   * `pval`: p-value of the effect.
#'   Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci`
#'   are required.
#'
#' @return An `rtables` table summarizing survival by subgroup.
#'
#' @examples
#' ## Table with default columns.
#' basic_table() %>%
#'   tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
#'
#' ## Table with a manually chosen set of columns: adding "pval".
#' basic_table() %>%
#'   tabulate_survival_subgroups(
#'     df = df,
#'     vars = c("n_tot_events", "n_events", "median", "hr", "ci", "pval"),
#'     time_unit = adtte_f$AVALU[1]
#'   )
#'
#' @export
tabulate_survival_subgroups <- function(lyt,
                                        df,
                                        vars = c("n_tot_events", "n_events", "median", "hr", "ci"),
                                        time_unit = NULL) {
  conf_level <- df$hr$conf_level[1]
  method <- df$hr$pval_label[1]

  afun_lst <- a_survival_subgroups()
  colvars <- d_survival_subgroups_colvars(
    vars,
    conf_level = conf_level,
    method = method,
    time_unit = time_unit
  )

  colvars_survtime <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n", "n_events", "median")],
    labels = colvars$labels[names(colvars$labels) %in% c("n", "n_events", "median")]
  )
  colvars_hr <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")],
    labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")]
  )

  # Columns from table_survtime are optional.
  if (length(colvars_survtime$vars) > 0) {
    lyt_survtime <- split_cols_by(lyt = lyt, var = "arm")
    lyt_survtime <- split_rows_by(
      lyt = lyt_survtime,
      var = "row_type",
      split_fun = keep_split_levels("content"),
      nested = FALSE
    )
    lyt_survtime <- summarize_row_groups(
      lyt = lyt_survtime,
      var = "var_label",
      cfun = afun_lst[names(colvars_survtime$labels)]
    )
    lyt_survtime <- split_cols_by_multivar(
      lyt = lyt_survtime,
      vars = colvars_survtime$vars,
      varlabels = colvars_survtime$labels
    )

    if ("analysis" %in% df$survtime$row_type) {
      lyt_survtime <- split_rows_by(
        lyt = lyt_survtime,
        var = "row_type",
        split_fun = keep_split_levels("analysis"),
        nested = FALSE,
        child_labels = "hidden"
      )
      lyt_survtime <- split_rows_by(lyt = lyt_survtime, var = "var_label", nested = TRUE)
      lyt_survtime <- analyze_colvars(
        lyt = lyt_survtime,
        afun = afun_lst[names(colvars_survtime$labels)],
        inclNAs = TRUE
      )
    }

    table_survtime <- build_table(lyt_survtime, df = df$survtime)
  } else {
    table_survtime <- NULL
  }

  # Columns "n_tot_events" or "n_tot", and "hr", "ci" in table_hr are required.
  lyt_hr <- split_cols_by(lyt = lyt, var = "arm")
  lyt_hr <- split_rows_by(
    lyt = lyt_hr,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )
  lyt_hr <- summarize_row_groups(
    lyt = lyt_hr,
    var = "var_label",
    cfun = afun_lst[names(colvars_hr$labels)]
  )
  lyt_hr <- split_cols_by_multivar(
    lyt = lyt_hr,
    vars = colvars_hr$vars,
    varlabels = colvars_hr$labels
  ) %>%
    append_topleft("Baseline Risk Factors")

  if ("analysis" %in% df$survtime$row_type) {
    lyt_hr <- split_rows_by(
      lyt = lyt_hr,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )
    lyt_hr <- split_rows_by(lyt = lyt_hr, var = "var_label", nested = TRUE)
    lyt_hr <- analyze_colvars(
      lyt = lyt_hr,
      afun = afun_lst[names(colvars_hr$labels)],
      inclNAs = TRUE
    )
  }
  table_hr <- build_table(lyt_hr, df = df$hr)

  # There can be one or two vars starting with "n_tot".
  n_tot_ids <- grep("^n_tot", colvars_hr$vars)
  if (is.null(table_survtime)) {
    result <- table_hr
    hr_id <- match("hr", colvars_hr$vars)
    ci_id <- match("lcl", colvars_hr$vars)
  } else {
    # Reorder the table.
    result <- cbind_rtables(table_hr[, n_tot_ids], table_survtime, table_hr[, -n_tot_ids])
    # And then calculate column indices accordingly.
    hr_id <- length(n_tot_ids) + ncol(table_survtime) + match("hr", colvars_hr$vars[-n_tot_ids])
    ci_id <- length(n_tot_ids) + ncol(table_survtime) + match("lcl", colvars_hr$vars[-n_tot_ids])
    n_tot_ids <- seq_along(n_tot_ids)
  }

  structure(
    result,
    forest_header = paste0(rev(levels(df$survtime$arm)), "\nBetter"),
    col_x = hr_id,
    col_ci = ci_id,
    # Take the first one for scaling the symbol sizes in graph.
    col_symbol_size = n_tot_ids[1]
  )
}

#' Labels for Column Variables in Survival Duration by Subgroup Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Internal function to check variables included in [tabulate_survival_subgroups()] and create column labels.
#'
#' @inheritParams tabulate_survival_subgroups
#' @inheritParams argument_convention
#' @param method (`character`)\cr p-value method for testing hazard ratio = 1.
#'
#' @return A `list` of variables and their labels to tabulate.
#'
#' @note At least one of `n_tot` and `n_tot_events` must be provided in `vars`.
#'
#' @export
d_survival_subgroups_colvars <- function(vars,
                                         conf_level,
                                         method,
                                         time_unit = NULL) {
  checkmate::assert_character(vars)
  checkmate::assert_string(time_unit, null.ok = TRUE)
  checkmate::assert_subset(c("hr", "ci"), vars)
  checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars))
  checkmate::assert_subset(
    vars,
    c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
  )

  propcase_time_label <- if (!is.null(time_unit)) {
    paste0("Median (", time_unit, ")")
  } else {
    "Median"
  }

  varlabels <- c(
    n = "n",
    n_events = "Events",
    median = propcase_time_label,
    n_tot = "Total n",
    n_tot_events = "Total Events",
    hr = "Hazard Ratio",
    ci = paste0(100 * conf_level, "% Wald CI"),
    pval = method
  )

  colvars <- vars

  # The `lcl` variable is just a placeholder available in the analysis data,
  # it is not acutally used in the tabulation.
  # Variables used in the tabulation are lcl and ucl, see `a_survival_subgroups` for details.
  colvars[colvars == "ci"] <- "lcl"

  list(
    vars = colvars,
    labels = varlabels[vars]
  )
}

#' Patient Counts for Laboratory Events (Worsen From Baseline) by Highest Grade Post-Baseline
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Patient count and fraction for laboratory events (worsen from baseline) shift table.
#'
#' @inheritParams argument_convention
#'
#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()]
#'
#' @name abnormal_by_worst_grade_worsen
NULL

#' Helper Function to Prepare `ADLB` with Worst Labs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to prepare a `df` for generate the patient count shift table
#'
#' @param adlb (`data.frame`)\cr `ADLB` dataframe
#' @param worst_flag_low (named `vector`)\cr Worst low post-baseline lab grade flag variable
#' @param worst_flag_high (named `vector`)\cr Worst high post-baseline lab grade flag variable
#' @param direction_var (`string`)\cr Direction variable specifying the direction of the shift table of interest.
#'   Only lab records flagged by `L`, `H` or `B` are included in the shift table.
#'   * `L`: low direction only
#'   * `H`: high direction only
#'   * `B`: both low and high directions
#'
#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the
#'   worst labs specified according to `worst_flag_low` or `worst_flag_high` for the
#'   direction specified according to `direction_var`. For instance, for a lab that is
#'   needed for the low direction only, only records flagged by `worst_flag_low` are
#'   selected. For a lab that is needed for both low and high directions, the worst
#'   low records are selected for the low direction, and the worst high record are selected
#'   for the high direction.
#'
#' @seealso [abnormal_by_worst_grade_worsen]
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' @export
h_adlb_worsen <- function(adlb,
                          worst_flag_low = NULL,
                          worst_flag_high = NULL,
                          direction_var) {
  checkmate::assert_string(direction_var)
  checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H"))
  assert_df_with_variables(adlb, list("Col" = direction_var))

  if (any(unique(adlb[[direction_var]]) == "H")) {
    assert_df_with_variables(adlb, list("High" = names(worst_flag_high)))
  }

  if (any(unique(adlb[[direction_var]]) == "L")) {
    assert_df_with_variables(adlb, list("Low" = names(worst_flag_low)))
  }

  if (any(unique(adlb[[direction_var]]) == "B")) {
    assert_df_with_variables(
      adlb,
      list(
        "Low" = names(worst_flag_low),
        "High" = names(worst_flag_high)
      )
    )
  }

  # extract patients with worst post-baseline lab, either low or high or both
  worst_flag <- c(worst_flag_low, worst_flag_high)
  col_names <- names(worst_flag)
  filter_values <- worst_flag
  temp <- Map(
    function(x, y) which(adlb[[x]] == y),
    col_names,
    filter_values
  )
  position_satisfy_filters <- Reduce(union, temp)

  # select variables of interest
  adlb_f <- adlb[position_satisfy_filters, ]

  # generate subsets for different directionality
  adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ]
  adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ]
  adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ]

  # for labs requiring both high and low, data is duplicated and will be stacked on top of each other
  adlb_f_b_h <- adlb_f_b
  adlb_f_b_l <- adlb_f_b

  # extract data with worst lab
  if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) {
    # change H to High, L to Low
    adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
    adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))

    # change, B to High and Low
    adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
    adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))

    adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
    adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]

    out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l)
  } else if (!is.null(worst_flag_high)) {
    adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
    adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))

    adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]

    out <- rbind(adlb_out_h, adlb_out_b_h)
  } else if (!is.null(worst_flag_low)) {
    adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
    adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))

    adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
    adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]

    out <- rbind(adlb_out_l, adlb_out_b_l)
  }

  # label
  formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE)
  # NA
  out
}

#' Helper Function to Analyze Patients for [s_count_abnormal_lab_worsen_by_baseline()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to count the number of patients and the fraction of patients according to
#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`,
#' and the direction of interest specified in `direction_var`.
#'
#' @inheritParams argument_convention
#' @inheritParams h_adlb_worsen
#' @param baseline_var (`string`)\cr baseline lab grade variable
#'
#' @return `h_worsen_counter()` returns the counts and fraction of patients
#'   whose worst post-baseline lab grades are worse than their baseline grades, for
#'   post-baseline worst grades "1", "2", "3", "4" and "Any".
#'
#' @seealso [abnormal_by_worst_grade_worsen]
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' # `h_worsen_counter`
#' h_worsen_counter(
#'   df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"),
#'   id = "USUBJID",
#'   .var = "ATOXGR",
#'   baseline_var = "BTOXGR",
#'   direction_var = "GRADDR"
#' )
#'
#' @export
h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) {
  checkmate::assert_string(id)
  checkmate::assert_string(.var)
  checkmate::assert_string(baseline_var)
  checkmate::assert_scalar(unique(df[[direction_var]]))
  checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low"))
  assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var)))

  # remove post-baseline missing
  df <- df[df[[.var]] != "<Missing>", ]

  # obtain directionality
  direction <- unique(df[[direction_var]])

  if (direction == "Low") {
    grade <- -1:-4
    worst_grade <- -4
  } else if (direction == "High") {
    grade <- 1:4
    worst_grade <- 4
  }

  if (nrow(df) > 0) {
    by_grade <- lapply(grade, function(i) {
      # filter baseline values that is less than i or <Missing>
      df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ]
      # num: number of patients with post-baseline worst lab equal to i
      num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE]))
      # denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction
      denom <- length(unique(df_temp[[id]]))
      rm(df_temp)
      c(num = num, denom = denom)
    })
  } else {
    by_grade <- lapply(1, function(i) {
      c(num = 0, denom = 0)
    })
  }

  names(by_grade) <- as.character(seq_along(by_grade))

  # baseline grade less 4 or missing
  df_temp <- df[!df[[baseline_var]] %in% worst_grade, ]

  # denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction
  denom <- length(unique(df_temp[, id, drop = TRUE]))

  # condition 1: missing baseline and in the direction of abnormality
  con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade)
  df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ]

  # condition 2: if post-baseline values are present then post-baseline values must be worse than baseline
  if (direction == "Low") {
    con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]])))
  } else {
    con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]])))
  }

  # number of patients satisfy either conditions 1 or 2
  num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE]))

  list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom))))
}

#' @describeIn abnormal_by_worst_grade_worsen Statistics function for patients whose worst post-baseline
#'   lab grades are worse than their baseline grades.
#'
#' @param variables (named `list` of `string`)\cr list of additional analysis variables including:
#'   * `id` (`string`)\cr subject variable name.
#'   * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable.
#'   * `direction_var` (`string`)\cr see `direction_var` for more details.
#'
#' @return
#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst
#'   post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades
#'   "1", "2", "3", "4" and "Any".
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' @keywords internal
s_count_abnormal_lab_worsen_by_baseline <- function(df, # nolint
                                                    .var = "ATOXGR",
                                                    variables = list(
                                                      id = "USUBJID",
                                                      baseline_var = "BTOXGR",
                                                      direction_var = "GRADDR"
                                                    )) {
  checkmate::assert_string(.var)
  checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var"))
  checkmate::assert_string(variables$id)
  checkmate::assert_string(variables$baseline_var)
  checkmate::assert_string(variables$direction_var)
  assert_df_with_variables(df, c(aval = .var, variables[1:3]))
  assert_list_of_variables(variables)

  h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var)
}


#' @describeIn abnormal_by_worst_grade_worsen Formatted analysis function which is used as `afun`
#'   in `count_abnormal_lab_worsen_by_baseline()`.
#'
#' @return
#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with
#'   formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal_lab_worsen_by_baseline <- make_afun( # nolint
  s_count_abnormal_lab_worsen_by_baseline,
  .formats = c(fraction = format_fraction),
  .ungroup_stats = "fraction"
)

#' @describeIn abnormal_by_worst_grade_worsen Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting
#'   functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted
#'   rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   add_colcounts() %>%
#'   split_rows_by("PARAMCD") %>%
#'   split_rows_by("GRADDR") %>%
#'   count_abnormal_lab_worsen_by_baseline(
#'     var = "ATOXGR",
#'     variables = list(
#'       id = "USUBJID",
#'       baseline_var = "BTOXGR",
#'       direction_var = "GRADDR"
#'     )
#'   ) %>%
#'   append_topleft("Direction of Abnormality") %>%
#'   build_table(df = df, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_abnormal_lab_worsen_by_baseline <- function(lyt, # nolint
                                                  var,
                                                  ...,
                                                  table_names = NULL,
                                                  .stats = NULL,
                                                  .formats = NULL,
                                                  .labels = NULL,
                                                  .indent_mods = NULL) {
  checkmate::assert_string(var)

  afun <- make_afun(
    a_count_abnormal_lab_worsen_by_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    extra_args = list(...),
    show_labels = "hidden"
  )

  lyt
}

#' Formatting Functions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' See below for the list of formatting functions created in `tern` to work with `rtables`.
#'
#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional
#' custom formats can be created via the [`formatters::sprintf_format()`] function.
#'
#' @family formatting functions
#' @name formatting_functions
NULL

#' Formatting Fraction and Percentage
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction together with ratio in percent.
#'
#' @param x (`integer`)\cr with elements `num` and `denom`.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
#'
#' @examples
#' format_fraction(x = c(num = 2L, denom = 3L))
#' format_fraction(x = c(num = 0L, denom = 3L))
#'
#' @family formatting functions
#' @export
format_fraction <- function(x, ...) {
  attr(x, "label") <- NULL

  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])

  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", round(x["num"] / x["denom"] * 100, 1), "%)"
    )
  }

  return(result)
}

#' Formatting Fraction and Percentage with Fixed Single Decimal Place
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction together with ratio in percent with fixed single decimal place.
#' Includes trailing zero in case of whole number percentages to always keep one decimal place.
#'
#' @param x (`integer`)\cr with elements `num` and `denom`.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
#'
#' @examples
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L))
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L))
#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L))
#'
#' @family formatting functions
#' @export
format_fraction_fixed_dp <- function(x, ...) {
  attr(x, "label") <- NULL
  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])

  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)"
    )
  }
  return(result)
}

#' Formatting Count and Fraction
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a count together with fraction with special consideration when count is `0`.
#'
#' @param x (`integer`)\cr vector of length 2, count and fraction.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
#'
#' @examples
#' format_count_fraction(x = c(2, 0.6667))
#' format_count_fraction(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_count_fraction <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] == 0) {
    "0"
  } else {
    paste0(x[1], " (", round(x[2] * 100, 1), "%)")
  }

  return(result)
}

#' Formatting Count and Percentage with Fixed Single Decimal Place
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Formats a count together with fraction with special consideration when count is `0`.
#'
#' @param x (`integer`)\cr vector of length 2, count and fraction.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
#'
#' @examples
#' format_count_fraction_fixed_dp(x = c(2, 0.6667))
#' format_count_fraction_fixed_dp(x = c(2, 0.5))
#' format_count_fraction_fixed_dp(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_count_fraction_fixed_dp <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] == 0) {
    "0"
  } else if (x[2] == 1) {
    sprintf("%d (100%%)", x[1])
  } else {
    sprintf("%d (%.1f%%)", x[1], x[2] * 100)
  }

  return(result)
}

#' Formatting: XX as Formatting Function
#'
#' Translate a string where x and dots are interpreted as number place
#' holders, and others as formatting elements.
#'
#' @param str (`string`)\cr template.
#'
#' @return An `rtables` formatting function.
#'
#' @examples
#' test <- list(c(1.658, 0.5761), c(1e1, 785.6))
#'
#' z <- format_xx("xx (xx.x)")
#' sapply(test, z)
#'
#' z <- format_xx("xx.x - xx.x")
#' sapply(test, z)
#'
#' z <- format_xx("xx.x, incl. xx.x% NE")
#' sapply(test, z)
#'
#' @family formatting functions
#' @export
format_xx <- function(str) {
  # Find position in the string.
  positions <- gregexpr(pattern = "x+\\.x+|x+", text = str, perl = TRUE)
  x_positions <- regmatches(x = str, m = positions)[[1]]

  # Roundings depends on the number of x behind [.].
  roundings <- lapply(
    X = x_positions,
    function(x) {
      y <- strsplit(split = "\\.", x = x)[[1]]
      rounding <- function(x) {
        round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0))
      }
      return(rounding)
    }
  )

  rtable_format <- function(x, output) {
    values <- Map(y = x, fun = roundings, function(y, fun) fun(y))
    regmatches(x = str, m = positions)[[1]] <- values
    return(str)
  }

  return(rtable_format)
}

#' Formatting Fraction with Lower Threshold
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction when the second element of the input `x` is the fraction. It applies
#' a lower threshold, below which it is just stated that the fraction is smaller than that.
#'
#' @param threshold (`proportion`)\cr lower threshold.
#'
#' @return An `rtables` formatting function that takes numeric input `x` where the second
#'   element is the fraction that is formatted. If the fraction is above or equal to the threshold,
#'   then it is displayed in percentage. If it is positive but below the threshold, it returns,
#'   e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned.
#'
#' @examples
#' format_fun <- format_fraction_threshold(0.05)
#' format_fun(x = c(20, 0.1))
#' format_fun(x = c(2, 0.01))
#' format_fun(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_fraction_threshold <- function(threshold) {
  assert_proportion_value(threshold)
  string_below_threshold <- paste0("<", round(threshold * 100))
  function(x, ...) {
    assert_proportion_value(x[2], include_boundaries = TRUE)
    ifelse(
      x[2] > 0.01,
      round(x[2] * 100),
      ifelse(
        x[2] == 0,
        "0",
        string_below_threshold
      )
    )
  }
}

#' Formatting Extreme Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' `rtables` formatting functions that handle extreme values.
#'
#' @param digits (`integer`)\cr number of decimal places to display.
#'
#' @details For each input, apply a format to the specified number of `digits`. If the value is
#'    below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is
#'    above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2.
#'    If it is zero, then returns "0.00".
#'
#' @family formatting functions
#' @name extreme_format
NULL

#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings
#'  used in Formatting Functions. Returns a list with elements `threshold` and `format_string`.
#'
#' @return
#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds,
#'   and `format_string`, with thresholds formatted as strings.
#'
#' @examples
#' h_get_format_threshold(2L)
#'
#' @export
h_get_format_threshold <- function(digits = 2L) {
  checkmate::assert_integerish(digits)

  low_threshold <- 1 / (10 ^ digits) # styler: off
  high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off

  string_below_threshold <- paste0("<", low_threshold)
  string_above_threshold <- paste0(">", high_threshold)

  list(
    "threshold" = c(low = low_threshold, high = high_threshold),
    "format_string" = c(low = string_below_threshold, high = string_above_threshold)
  )
}

#' @describeIn extreme_format Internal helper function to apply a threshold format to a value.
#'   Creates a formatted string to be used in Formatting Functions.
#'
#' @param x (`number`)\cr value to format.
#'
#' @return
#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation
#'   of the given value to the digit threshold, as a formatted string.
#'
#' @examples
#' h_format_threshold(0.001)
#' h_format_threshold(1000)
#'
#' @export
h_format_threshold <- function(x, digits = 2L) {
  if (is.na(x)) {
    return(x)
  }

  checkmate::assert_numeric(x, lower = 0)

  l_fmt <- h_get_format_threshold(digits)

  result <- if (x < l_fmt$threshold["low"] && 0 < x) {
    l_fmt$format_string["low"]
  } else if (x > l_fmt$threshold["high"]) {
    l_fmt$format_string["high"]
  } else {
    sprintf(fmt = paste0("%.", digits, "f"), x)
  }

  unname(result)
}

#' Formatting a Single Extreme Value
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Create Formatting Function for a single extreme value.
#'
#' @inheritParams extreme_format
#'
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value.
#'
#' @examples
#' format_fun <- format_extreme_values(2L)
#' format_fun(x = 0.127)
#' format_fun(x = Inf)
#' format_fun(x = 0)
#' format_fun(x = 0.009)
#'
#' @family formatting functions
#' @export
format_extreme_values <- function(digits = 2L) {
  function(x, ...) {
    checkmate::assert_scalar(x, na.ok = TRUE)

    h_format_threshold(x = x, digits = digits)
  }
}

#' Formatting Extreme Values Part of a Confidence Interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formatting Function for extreme values part of a confidence interval. Values
#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2.
#'
#' @inheritParams extreme_format
#'
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme
#'   values confidence interval.
#'
#' @examples
#' format_fun <- format_extreme_values_ci(2L)
#' format_fun(x = c(0.127, Inf))
#' format_fun(x = c(0, 0.009))
#'
#' @family formatting functions
#' @export
format_extreme_values_ci <- function(digits = 2L) {
  function(x, ...) {
    checkmate::assert_vector(x, len = 2)
    l_result <- h_format_threshold(x = x[1], digits = digits)
    h_result <- h_format_threshold(x = x[2], digits = digits)

    paste0("(", l_result, ", ", h_result, ")")
  }
}

#' Missing Data
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Substitute missing data with a string or factor level.
#'
#' @param x (`factor` or `character` vector)\cr values for which any missing values should be substituted.
#' @param label (`character`)\cr string that missing data should be replaced with.
#'
#' @return `x` with any `NA` values substituted by `label`.
#'
#' @examples
#' explicit_na(c(NA, "a", "b"))
#' is.na(explicit_na(c(NA, "a", "b")))
#'
#' explicit_na(factor(c(NA, "a", "b")))
#' is.na(explicit_na(factor(c(NA, "a", "b"))))
#'
#' explicit_na(sas_na(c("a", "")))
#'
#' @export
explicit_na <- function(x, label = "<Missing>") {
  checkmate::assert_string(label)

  if (is.factor(x)) {
    x <- forcats::fct_na_value_to_level(x, label)
    forcats::fct_drop(x, only = label)
  } else if (is.character(x)) {
    x[is.na(x)] <- label
    x
  } else {
    stop("only factors and character vectors allowed")
  }
}

#' Convert Strings to `NA`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to
#' convert these values to `NA`s.
#'
#' @inheritParams explicit_na
#' @param empty (`logical`)\cr if `TRUE` empty strings get replaced by `NA`.
#' @param whitespaces (`logical`)\cr if `TRUE` then strings made from whitespaces only get replaced with `NA`.
#'
#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of
#'   `empty` and `whitespaces`.
#'
#' @examples
#' sas_na(c("1", "", " ", "   ", "b"))
#' sas_na(factor(c("", " ", "b")))
#'
#' is.na(sas_na(c("1", "", " ", "   ", "b")))
#'
#' @export
sas_na <- function(x, empty = TRUE, whitespaces = TRUE) {
  checkmate::assert_flag(empty)
  checkmate::assert_flag(whitespaces)

  if (is.factor(x)) {
    empty_levels <- levels(x) == ""
    if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA

    ws_levels <- grepl("^\\s+$", levels(x))
    if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA

    x
  } else if (is.character(x)) {
    if (empty) x[x == ""] <- NA_character_

    if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_

    x
  } else {
    stop("only factors and character vectors allowed")
  }
}

#' Helper Functions for Tabulating Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that tabulate in a data frame statistics such as response rate
#' and odds ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param arm (`factor`)\cr the treatment group variable.
#'
#' @details Main functionality is to prepare data for use in a layout-creating function.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' @name h_response_subgroups
NULL

#' @describeIn h_response_subgroups helper to prepare a data frame of binary responses by arm.
#'
#' @return
#' * `h_proportion_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, and `prop`.
#'
#' @examples
#' h_proportion_df(
#'   c(TRUE, FALSE, FALSE),
#'   arm = factor(c("A", "A", "B"), levels = c("A", "B"))
#' )
#'
#' @export
h_proportion_df <- function(rsp, arm) {
  checkmate::assert_logical(rsp)
  assert_valid_factor(arm, len = length(rsp))
  non_missing_rsp <- !is.na(rsp)
  rsp <- rsp[non_missing_rsp]
  arm <- arm[non_missing_rsp]

  lst_rsp <- split(rsp, arm)
  lst_results <- Map(function(x, arm) {
    if (length(x) > 0) {
      s_prop <- s_proportion(df = x)
      data.frame(
        arm = arm,
        n = length(x),
        n_rsp = unname(s_prop$n_prop[1]),
        prop = unname(s_prop$n_prop[2]),
        stringsAsFactors = FALSE
      )
    } else {
      data.frame(
        arm = arm,
        n = 0L,
        n_rsp = NA,
        prop = NA,
        stringsAsFactors = FALSE
      )
    }
  }, lst_rsp, names(lst_rsp))

  df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
  df$arm <- factor(df$arm, levels = levels(arm))
  df
}

#' @describeIn h_response_subgroups summarizes proportion of binary responses by arm and across subgroups
#'    in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
#'    requires elements `rsp`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
#'    groupings for `subgroups` variables.
#'
#' @return
#' * `h_proportion_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`,
#'   `var`, `var_label`, and `row_type`.
#'
#' @examples
#' h_proportion_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#'
#' # Define groupings for BMRKR2 levels.
#' h_proportion_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_proportion_subgroups_df <- function(variables,
                                      data,
                                      groups_lists = list(),
                                      label_all = "All Patients") {
  checkmate::assert_character(variables$rsp)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_proportion_df(data[[variables$rsp]], data[[variables$arm]])
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      result <- h_proportion_df(grp$df[[variables$rsp]], grp$df[[variables$arm]])
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn h_response_subgroups helper to prepare a data frame with estimates of
#'   the odds ratio between a treatment and a control arm.
#'
#' @inheritParams response_subgroups
#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
#'
#' @return
#' * `h_odds_ratio_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, and
#'   optionally `pval` and `pval_label`.
#'
#' @examples
#' # Unstratatified analysis.
#' h_odds_ratio_df(
#'   c(TRUE, FALSE, FALSE, TRUE),
#'   arm = factor(c("A", "A", "B", "B"), levels = c("A", "B"))
#' )
#'
#' # Include p-value.
#' h_odds_ratio_df(adrs_f$rsp, adrs_f$ARM, method = "chisq")
#'
#' # Stratatified analysis.
#' h_odds_ratio_df(
#'   rsp = adrs_f$rsp,
#'   arm = adrs_f$ARM,
#'   strata_data = adrs_f[, c("STRATA1", "STRATA2")],
#'   method = "cmh"
#' )
#'
#' @export
h_odds_ratio_df <- function(rsp, arm, strata_data = NULL, conf_level = 0.95, method = NULL) {
  assert_valid_factor(arm, n.levels = 2, len = length(rsp))

  df_rsp <- data.frame(
    rsp = rsp,
    arm = arm
  )

  if (!is.null(strata_data)) {
    strata_var <- interaction(strata_data, drop = TRUE)
    strata_name <- "strata"

    assert_valid_factor(strata_var, len = nrow(df_rsp))

    df_rsp[[strata_name]] <- strata_var
  } else {
    strata_name <- NULL
  }

  l_df <- split(df_rsp, arm)

  if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
    # Odds ratio and CI.
    result_odds_ratio <- s_odds_ratio(
      df = l_df[[2]],
      .var = "rsp",
      .ref_group = l_df[[1]],
      .in_ref_col = FALSE,
      .df_row = df_rsp,
      variables = list(arm = "arm", strata = strata_name),
      conf_level = conf_level
    )

    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = unname(result_odds_ratio$n_tot["n_tot"]),
      or = unname(result_odds_ratio$or_ci["est"]),
      lcl = unname(result_odds_ratio$or_ci["lcl"]),
      ucl = unname(result_odds_ratio$or_ci["ucl"]),
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )

    if (!is.null(method)) {
      # Test for difference.
      result_test <- s_test_proportion_diff(
        df = l_df[[2]],
        .var = "rsp",
        .ref_group = l_df[[1]],
        .in_ref_col = FALSE,
        variables = list(strata = strata_name),
        method = method
      )

      df$pval <- as.numeric(result_test$pval)
      df$pval_label <- obj_label(result_test$pval)
    }

    # In those cases cannot go through the model so will obtain n_tot from data.
  } else if (
    (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) ||
      (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
  ) {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = sum(stats::complete.cases(df_rsp)),
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )
    if (!is.null(method)) {
      df$pval <- NA
      df$pval_label <- NA
    }
  } else {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = 0L,
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )

    if (!is.null(method)) {
      df$pval <- NA
      df$pval_label <- NA
    }
  }

  df
}

#' @describeIn h_response_subgroups summarizes estimates of the odds ratio between a treatment and a control
#'   arm across subgroups in a data frame. `variables` corresponds to the names of variables found in
#'   `data`, passed as a named list and requires elements `rsp`, `arm` and optionally `subgroups`
#'   and `strat`. `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @return
#' * `h_odds_ratio_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`,
#'   `conf_level`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Unstratified analysis.
#' h_odds_ratio_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#'
#' # Stratified analysis.
#' h_odds_ratio_subgroups_df(
#'   variables = list(
#'     rsp = "rsp",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2"),
#'     strat = c("STRATA1", "STRATA2")
#'   ),
#'   data = adrs_f
#' )
#'
#' # Define groupings of BMRKR2 levels.
#' h_odds_ratio_subgroups_df(
#'   variables = list(
#'     rsp = "rsp",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_odds_ratio_subgroups_df <- function(variables,
                                      data,
                                      groups_lists = list(),
                                      conf_level = 0.95,
                                      method = NULL,
                                      label_all = "All Patients") {
  checkmate::assert_character(variables$rsp)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_character(variables$strat, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  strata_data <- if (is.null(variables$strat)) {
    NULL
  } else {
    data[, variables$strat, drop = FALSE]
  }

  # Add All Patients.
  result_all <- h_odds_ratio_df(
    rsp = data[[variables$rsp]],
    arm = data[[variables$arm]],
    strata_data = strata_data,
    conf_level = conf_level,
    method = method
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      grp_strata_data <- if (is.null(variables$strat)) {
        NULL
      } else {
        grp$df[, variables$strat, drop = FALSE]
      }

      result <- h_odds_ratio_df(
        rsp = grp$df[[variables$rsp]],
        arm = grp$df[[variables$arm]],
        strata_data = grp_strata_data,
        conf_level = conf_level,
        method = method
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })

    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Difference Test for Two Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Various tests were implemented to test the difference between two proportions.
#'
#' @inheritParams argument_convention
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
#'
#' @seealso [h_prop_diff_test]
#'
#' @name prop_diff_test
NULL

#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
#'
#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`, or `schouten`; specifies the test used
#'   to calculate the p-value.
#'
#' @return
#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
#'   describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
#'
#'
#' @keywords internal
s_test_proportion_diff <- function(df,
                                   .var,
                                   .ref_group,
                                   .in_ref_col,
                                   variables = list(strata = NULL),
                                   method = c("chisq", "schouten", "fisher", "cmh")) {
  method <- match.arg(method)
  y <- list(pval = "")

  if (!.in_ref_col) {
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))
    rsp <- factor(
      c(.ref_group[[.var]], df[[.var]]),
      levels = c("TRUE", "FALSE")
    )
    grp <- factor(
      rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
      levels = c("ref", "Not-ref")
    )

    if (!is.null(variables$strata) || method == "cmh") {
      strata <- variables$strata
      checkmate::assert_false(is.null(strata))
      strata_vars <- stats::setNames(as.list(strata), strata)
      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)
      strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
    }

    tbl <- switch(method,
      cmh = table(grp, rsp, strata),
      table(grp, rsp)
    )

    y$pval <- switch(method,
      chisq = prop_chisq(tbl),
      cmh = prop_cmh(tbl),
      fisher = prop_fisher(tbl),
      schouten = prop_schouten(tbl)
    )
  }

  y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method))
  y
}

#' Description of the Difference Test Between Two Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
#'
#' @inheritParams s_test_proportion_diff
#'
#' @return `string` describing the test from which the p-value is derived.
#'
#' @export
d_test_proportion_diff <- function(method) {
  checkmate::assert_string(method)
  meth_part <- switch(method,
    "schouten" = "Chi-Squared Test with Schouten Correction",
    "chisq" = "Chi-Squared Test",
    "cmh" = "Cochran-Mantel-Haenszel Test",
    "fisher" = "Fisher's Exact Test",
    stop(paste(method, "does not have a description"))
  )
  paste0("p-value (", meth_part, ")")
}

#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
#'
#' @return
#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#'
#' @keywords internal
a_test_proportion_diff <- make_afun(
  s_test_proportion_diff,
  .formats = c(pval = "x.xxxx | (<0.0001)"),
  .indent_mods = c(pval = 1L)
)

#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... other arguments are passed to [s_test_proportion_diff()].
#'
#' @return
#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_test_proportion_diff()` to the table layout.
#'
#' @examples
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50)),
#'   strat = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
#' )
#'
#' # With `rtables` pipelines.
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   test_proportion_diff(
#'     vars = "rsp",
#'     method = "cmh", variables = list(strata = "strat")
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
test_proportion_diff <- function(lyt,
                                 vars,
                                 ...,
                                 var_labels = vars,
                                 show_labels = "hidden",
                                 table_names = vars,
                                 .stats = NULL,
                                 .formats = NULL,
                                 .labels = NULL,
                                 .indent_mods = NULL) {
  afun <- make_afun(
    a_test_proportion_diff,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    var_labels = var_labels,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions to Test Proportion Differences
#'
#' Helper functions to implement various tests on the difference between two proportions.
#'
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
#'
#' @return A p-value.
#'
#' @seealso [prop_diff_test()] for implementation of these helper functions.
#'
#' @name h_prop_diff_test
NULL

#' @describeIn h_prop_diff_test performs Chi-Squared test. Internally calls [stats::prop.test()].
#'
#'
#' @keywords internal
prop_chisq <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }
  stats::prop.test(tbl, correct = FALSE)$p.value
}

#' @describeIn h_prop_diff_test performs stratified Cochran-Mantel-Haenszel test. Internally calls
#'   [stats::mantelhaen.test()]. Note that strata with less than two observations are automatically discarded.
#'
#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
#'   (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
#'
#'
#' @keywords internal
prop_cmh <- function(ary) {
  checkmate::assert_array(ary)
  checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
  checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
  strata_sizes <- apply(ary, MARGIN = 3, sum)
  if (any(strata_sizes < 5)) {
    warning("<5 data points in some strata. CMH test may be incorrect.")
    ary <- ary[, , strata_sizes > 1]
  }

  stats::mantelhaen.test(ary, correct = FALSE)$p.value
}

#' @describeIn h_prop_diff_test performs the Chi-Squared test with Schouten correction.
#'
#' @seealso For information on the Schouten correction (Schouten, 1980),
#'   visit \url{https://onlinelibrary.wiley.com/doi/abs/10.1002/bimj.4710220305}.
#'
#'
#' @keywords internal
prop_schouten <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }

  n <- sum(tbl)
  n1 <- sum(tbl[1, ])
  n2 <- sum(tbl[2, ])

  ad <- diag(tbl)
  bc <- diag(apply(tbl, 2, rev))
  ac <- tbl[, 1]
  bd <- tbl[, 2]

  t_schouten <- (n - 1) *
    (abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 /
    (n1 * n2 * sum(ac) * sum(bd))

  1 - stats::pchisq(t_schouten, df = 1)
}

#' @describeIn h_prop_diff_test performs the Fisher's exact test. Internally calls [stats::fisher.test()].
#'
#'
#' @keywords internal
prop_fisher <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  stats::fisher.test(tbl)$p.value
}

#' Individual Patient Plots
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Line plot(s) displaying trend in patients' parameter values over time is rendered.
#' Patients' individual baseline values can be added to the plot(s) as reference.
#'
#' @inheritParams argument_convention
#' @param xvar (`string`)\cr time point variable to be plotted on x-axis.
#' @param yvar (`string`)\cr continuous analysis variable to be plotted on y-axis.
#' @param xlab (`string`)\cr plot label for x-axis.
#' @param ylab (`string`)\cr plot label for y-axis.
#' @param id_var (`string`)\cr variable used as patient identifier.
#' @param title (`string`)\cr title for plot.
#' @param subtitle (`string`)\cr subtitle for plot.
#' @param add_baseline_hline (`flag`)\cr adds horizontal line at baseline y-value on
#'   plot when TRUE.
#' @param yvar_baseline (`string`)\cr variable with baseline values only.
#'   Ignored when `add_baseline_hline` is FALSE.
#' @param ggtheme (`theme`)\cr optional graphical theme function as provided
#'   by `ggplot2` to control outlook of plot. Use `ggplot2::theme()` to tweak the display.
#' @param plotting_choices (`character`)\cr specifies options for displaying
#'   plots. Must be one of "all_in_one", "split_by_max_obs", "separate_by_obs".
#' @param max_obs_per_plot (`count`)\cr Number of observations to be plotted on one
#'   plot. Ignored when `plotting_choices` is not "separate_by_obs".
#' @param caption (`character` scalar)\cr optional caption below the plot.
#' @param col (`character`)\cr lines colors.
#'
#' @seealso Relevant helper function [h_g_ipp()].
#'
#' @name individual_patient_plot
NULL

#' Helper Function To Create Simple Line Plot over Time
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function that generates a simple line plot displaying parameter trends over time.
#'
#' @inheritParams argument_convention
#' @inheritParams g_ipp
#'
#' @return A `ggplot` line plot.
#'
#' @seealso [g_ipp()] which uses this function.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' # Select a small sample of data to plot.
#' adlb <- tern_ex_adlb %>%
#'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
#'   slice(1:36)
#'
#' p <- h_g_ipp(
#'   df = adlb,
#'   xvar = "AVISIT",
#'   yvar = "AVAL",
#'   xlab = "Visit",
#'   id_var = "USUBJID",
#'   ylab = "SGOT/ALT (U/L)",
#'   add_baseline_hline = TRUE
#' )
#' p
#'
#' @export
h_g_ipp <- function(df,
                    xvar,
                    yvar,
                    xlab,
                    ylab,
                    id_var,
                    title = "Individual Patient Plots",
                    subtitle = "",
                    caption = NULL,
                    add_baseline_hline = FALSE,
                    yvar_baseline = "BASE",
                    ggtheme = nestcolor::theme_nest(),
                    col = NULL) {
  checkmate::assert_string(xvar)
  checkmate::assert_string(yvar)
  checkmate::assert_string(yvar_baseline)
  checkmate::assert_string(id_var)
  checkmate::assert_string(xlab)
  checkmate::assert_string(ylab)
  checkmate::assert_string(title)
  checkmate::assert_string(subtitle)
  checkmate::assert_subset(c(xvar, yvar, yvar_baseline, id_var), colnames(df))
  checkmate::assert_data_frame(df)
  checkmate::assert_flag(add_baseline_hline)
  checkmate::assert_character(col, null.ok = TRUE)

  p <- ggplot2::ggplot(
    data = df,
    mapping = ggplot2::aes(
      x = .data[[xvar]],
      y = .data[[yvar]],
      group = .data[[id_var]],
      colour = .data[[id_var]]
    )
  ) +
    ggplot2::geom_line(linewidth = 0.4) +
    ggplot2::geom_point(size = 2) +
    ggplot2::labs(
      x = xlab,
      y = ylab,
      title = title,
      subtitle = subtitle,
      caption = caption
    ) +
    ggtheme

  if (add_baseline_hline) {
    baseline_df <- df[, c(id_var, yvar_baseline)]
    baseline_df <- unique(baseline_df)

    p <- p +
      ggplot2::geom_hline(
        data = baseline_df,
        mapping = ggplot2::aes(
          yintercept = .data[[yvar_baseline]],
          colour = .data[[id_var]]
        ),
        linetype = "dotdash",
        linewidth = 0.4
      ) +
      ggplot2::geom_text(
        data = baseline_df,
        mapping = ggplot2::aes(
          x = 1,
          y = .data[[yvar_baseline]],
          label = .data[[id_var]],
          colour = .data[[id_var]]
        ),
        nudge_y = 0.025 * (max(df[, yvar], na.rm = TRUE) - min(df[, yvar], na.rm = TRUE)),
        vjust = "right",
        size = 2
      )

    if (!is.null(col)) {
      p <- p +
        ggplot2::scale_color_manual(values = col)
    }
  }
  p
}

#' @describeIn individual_patient_plot Plotting function for individual patient plots which, depending on user
#'   preference, renders a single graphic or compiles a list of graphics that show trends in individual's parameter
#'   values over time.
#'
#' @return A `ggplot` object or a list of `ggplot` objects.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' # Select a small sample of data to plot.
#' adlb <- tern_ex_adlb %>%
#'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
#'   slice(1:36)
#'
#' plot_list <- g_ipp(
#'   df = adlb,
#'   xvar = "AVISIT",
#'   yvar = "AVAL",
#'   xlab = "Visit",
#'   ylab = "SGOT/ALT (U/L)",
#'   title = "Individual Patient Plots",
#'   add_baseline_hline = TRUE,
#'   plotting_choices = "split_by_max_obs",
#'   max_obs_per_plot = 5
#' )
#' plot_list
#'
#' @export
g_ipp <- function(df,
                  xvar,
                  yvar,
                  xlab,
                  ylab,
                  id_var = "USUBJID",
                  title = "Individual Patient Plots",
                  subtitle = "",
                  caption = NULL,
                  add_baseline_hline = FALSE,
                  yvar_baseline = "BASE",
                  ggtheme = nestcolor::theme_nest(),
                  plotting_choices = c("all_in_one", "split_by_max_obs", "separate_by_obs"),
                  max_obs_per_plot = 4,
                  col = NULL) {
  checkmate::assert_count(max_obs_per_plot)
  checkmate::assert_subset(plotting_choices, c("all_in_one", "split_by_max_obs", "separate_by_obs"))
  checkmate::assert_character(col, null.ok = TRUE)

  plotting_choices <- match.arg(plotting_choices)

  if (plotting_choices == "all_in_one") {
    p <- h_g_ipp(
      df = df,
      xvar = xvar,
      yvar = yvar,
      xlab = xlab,
      ylab = ylab,
      id_var = id_var,
      title = title,
      subtitle = subtitle,
      caption = caption,
      add_baseline_hline = add_baseline_hline,
      yvar_baseline = yvar_baseline,
      ggtheme = ggtheme,
      col = col
    )

    return(p)
  } else if (plotting_choices == "split_by_max_obs") {
    id_vec <- unique(df[[id_var]])
    id_list <- split(
      id_vec,
      rep(1:ceiling(length(id_vec) / max_obs_per_plot),
        each = max_obs_per_plot,
        length.out = length(id_vec)
      )
    )

    df_list <- list()
    plot_list <- list()

    for (i in seq_along(id_list)) {
      df_list[[i]] <- df[df[[id_var]] %in% id_list[[i]], ]

      plots <- h_g_ipp(
        df = df_list[[i]],
        xvar = xvar,
        yvar = yvar,
        xlab = xlab,
        ylab = ylab,
        id_var = id_var,
        title = title,
        subtitle = subtitle,
        caption = caption,
        add_baseline_hline = add_baseline_hline,
        yvar_baseline = yvar_baseline,
        ggtheme = ggtheme,
        col = col
      )

      plot_list[[i]] <- plots
    }
    return(plot_list)
  } else {
    ind_df <- split(df, df[[id_var]])
    plot_list <- lapply(
      ind_df,
      function(x) {
        h_g_ipp(
          df = x,
          xvar = xvar,
          yvar = yvar,
          xlab = xlab,
          ylab = ylab,
          id_var = id_var,
          title = title,
          subtitle = subtitle,
          caption = caption,
          add_baseline_hline = add_baseline_hline,
          yvar_baseline = yvar_baseline,
          ggtheme = ggtheme,
          col = col
        )
      }
    )

    return(plot_list)
  }
}

#' Count the Number of Patients with a Particular Event
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` denotes the unique patient identifier.
#'
#' @inheritParams argument_convention
#'
#' @seealso [count_patients_with_flags]
#'
#' @name count_patients_with_event
NULL

#' @describeIn count_patients_with_event Statistics function which counts the number of patients for which
#'   the defined event has occurred.
#'
#' @inheritParams summarize_variables
#' @param .var (`character`)\cr name of the column that contains the unique identifier.
#' @param filters (`character`)\cr a character vector specifying the column names and flag variables
#'   to be used for counting the number of unique identifiers satisfying such conditions.
#'   Multiple column names and flags are accepted in this format
#'   `c("column_name1" = "flag1", "column_name2" = "flag2")`.
#'   Note that only equality is being accepted as condition.
#'
#' @return
#' * `s_count_patients_with_event()` returns the count and fraction of unique identifiers with the defined event.
#'
#' @examples
#' library(dplyr)
#'
#' # `s_count_patients_with_event()`
#'
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y")
#' )
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL")
#' )
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
#'   denom = "N_col",
#'   .N_col = 456
#' )
#'
#' @export
s_count_patients_with_event <- function(df,
                                        .var,
                                        filters,
                                        .N_col, # nolint
                                        .N_row, # nolint
                                        denom = c("n", "N_row", "N_col")) {
  col_names <- names(filters)
  filter_values <- filters

  checkmate::assert_subset(col_names, colnames(df))

  temp <- Map(
    function(x, y) which(df[[x]] == y),
    col_names,
    filter_values
  )
  position_satisfy_filters <- Reduce(intersect, temp)
  id_satisfy_filters <- as.character(unique(df[position_satisfy_filters, ][[.var]]))
  result <- s_count_values(
    as.character(unique(df[[.var]])),
    id_satisfy_filters,
    denom = denom,
    .N_col = .N_col,
    .N_row = .N_row
  )
  result
}

#' @describeIn count_patients_with_event Formatted analysis function which is used as `afun`
#'   in `count_patients_with_event()`.
#'
#' @return
#' * `a_count_patients_with_event()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # `a_count_patients_with_event()`
#'
#' a_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y"),
#'   .N_col = 100,
#'   .N_row = 100
#' )
#'
#' @export
a_count_patients_with_event <- make_afun(
  s_count_patients_with_event,
  .formats = c(count_fraction = format_count_fraction_fixed_dp)
)

#' @describeIn count_patients_with_event Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_patients_with_event()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_patients_with_event()` to the table layout.
#'
#' @examples
#' # `count_patients_with_event()`
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_values(
#'     "STUDYID",
#'     values = "AB12345",
#'     .stats = "count",
#'     .labels = c(count = "Total AEs")
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y"),
#'     .labels = c(count_fraction = "Total number of patients with at least one adverse event"),
#'     table_names = "tbl_all"
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
#'     .labels = c(count_fraction = "Total number of patients with fatal AEs"),
#'     table_names = "tbl_fatal"
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL", "AEREL" = "Y"),
#'     .labels = c(count_fraction = "Total number of patients with related fatal AEs"),
#'     .indent_mods = c(count_fraction = 2L),
#'     table_names = "tbl_rel_fatal"
#'   )
#' build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_patients_with_event <- function(lyt,
                                      vars,
                                      ...,
                                      table_names = vars,
                                      .stats = "count_fraction",
                                      .formats = NULL,
                                      .labels = NULL,
                                      .indent_mods = NULL) {
  afun <- make_afun(
    a_count_patients_with_event,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
    table_names = table_names
  )
}

#' Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis.
#'
#' @inheritParams argument_convention
#'
#' @details Cox models are the most commonly used methods to estimate the magnitude of
#'   the effect in survival analysis. It assumes proportional hazards: the ratio
#'   of the hazards between groups (e.g., two arms) is constant over time.
#'   This ratio is referred to as the "hazard ratio" (HR) and is one of the
#'   most commonly reported metrics to describe the effect size in survival
#'   analysis (NEST Team, 2020).
#'
#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant
#'   helper functions, and [tidy_coxreg] for custom tidy methods.
#'
#' @examples
#' library(survival)
#'
#' # Testing dataset [survival::bladder].
#' set.seed(1, kind = "Mersenne-Twister")
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   tibble::tibble(
#'     TIME = stop,
#'     STATUS = event,
#'     ARM = as.factor(rx),
#'     COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"),
#'     COVAR2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     ) %>% formatters::with_label("Sex (F/M)")
#'   )
#' )
#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#' dta_bladder$STUDYID <- factor("X")
#'
#' plot(
#'   survfit(Surv(TIME, STATUS) ~ ARM + COVAR1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name cox_regression
NULL

#' @describeIn cox_regression Statistics function that transforms results tabulated
#'   from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list.
#'
#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg]
#'   function with tidying applied via [broom::tidy()].
#' @param .stats (`character`)\cr the name of statistics to be reported among:
#'   * `n`: number of observations (univariate only)
#'   * `hr`: hazard ratio
#'   * `ci`: confidence interval
#'   * `pval`: p-value of the treatment effect
#'   * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only)
#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model.
#'   Defaults to "all". Other options include "var_main" for main effects, `"inter"` for interaction effects,
#'   and `"multi_lvl"` for multivariate model covariate level rows. When `.which_vars` is "all" specific
#'   variables can be selected by specifying `.var_nms`.
#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically
#'   this is the name of a variable. If using variable labels, `var` should be a vector of both the desired
#'   variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars`
#'   is `"var_main"` `.var_nms` should be only the variable name.
#'
#' @return
#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s).
#'
#' @examples
#' # s_coxreg
#'
#' # Univariate
#' u1_variables <- list(
#'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
#' )
#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder)
#' df1 <- broom::tidy(univar_model)
#' s_coxreg(model_df = df1, .stats = "hr")
#'
#' # Univariate with interactions
#' univar_model_inter <- fit_coxreg_univar(
#'   variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder
#' )
#' df1_inter <- broom::tidy(univar_model_inter)
#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1")
#'
#' # Univariate without treatment arm - only "COVAR2" covariate effects
#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder)
#' df1_covs <- broom::tidy(univar_covs_model)
#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)"))
#'
#' # Multivariate.
#' m1_variables <- list(
#'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
#' )
#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder)
#' df2 <- broom::tidy(multivar_model)
#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1")
#' s_coxreg(
#'   model_df = df2, .stats = "pval", .which_vars = "multi_lvl",
#'   .var_nms = c("COVAR1", "A Covariate Label")
#' )
#'
#' # Multivariate without treatment arm - only "COVAR1" main effect
#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder)
#' df2_covs <- broom::tidy(multivar_covs_model)
#' s_coxreg(model_df = df2_covs, .stats = "hr")
#'
#' @export
s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) {
  assert_df_with_variables(model_df, list(term = "term", stat = .stats))
  checkmate::assert_multi_class(model_df$term, classes = c("factor", "character"))
  model_df$term <- as.character(model_df$term)
  .var_nms <- .var_nms[!is.na(.var_nms)]

  if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ]
  if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1)

  # We need a list with names corresponding to the stats to display of equal length to the list of stats.
  y <- split(model_df, f = model_df$term, drop = FALSE)
  y <- stats::setNames(y, nm = rep(.stats, length(y)))

  if (.which_vars == "var_main") {
    y <- lapply(y, function(x) x[1, ]) # only main effect
  } else if (.which_vars %in% c("inter", "multi_lvl")) {
    y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect
  }

  lapply(
    X = y,
    FUN = function(x) {
      z <- as.list(x[[.stats]])
      stats::setNames(z, nm = x$term_label)
    }
  )
}

#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()]
#'   and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`.
#'
#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`.
#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`.
#' @param na_level (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`.
#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to
#'   avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching).
#' @param varlabels (`list`)\cr a named list corresponds to the names of variables found in data, passed
#'   as a named list and corresponding to time, event, arm, strata, and covariates terms. If arm is missing
#'   from variables, then only Cox model(s) including the covariates will be fitted and the corresponding
#'   effect estimates will be tabulated later.
#'
#' @return
#' * `a_coxreg()` returns formatted [rtables::CellValue()].
#'
#' @examples
#' a_coxreg(
#'   df = dta_bladder,
#'   labelstr = "Label 1",
#'   variables = u1_variables,
#'   .spl_context = list(value = "COVAR1"),
#'   .stats = "n",
#'   .formats = "xx"
#' )
#'
#' a_coxreg(
#'   df = dta_bladder,
#'   labelstr = "",
#'   variables = u1_variables,
#'   .spl_context = list(value = "COVAR2"),
#'   .stats = "pval",
#'   .formats = "xx.xxxx"
#' )
#'
#' @export
a_coxreg <- function(df,
                     labelstr,
                     eff = FALSE,
                     var_main = FALSE,
                     multivar = FALSE,
                     variables,
                     at = list(),
                     control = control_coxreg(),
                     .spl_context,
                     .stats,
                     .formats,
                     .indent_mods = NULL,
                     na_level = "",
                     cache_env = NULL) {
  cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm
  cov <- tail(.spl_context$value, 1) # current variable/covariate
  var_lbl <- formatters::var_labels(df)[cov] # check for df labels
  if (length(labelstr) > 1) {
    labelstr <- if (cov %in% names(labelstr)) labelstr[[cov]] else var_lbl # use df labels if none
  } else if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) {
    labelstr <- var_lbl
  }
  if (eff || multivar || cov_no_arm) {
    control$interaction <- FALSE
  } else {
    variables$covariates <- cov
    if (var_main) control$interaction <- TRUE
  }

  if (is.null(cache_env[[cov]])) {
    if (!multivar) {
      model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy()
    } else {
      model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy()
    }
    cache_env[[cov]] <- model
  } else {
    model <- cache_env[[cov]]
  }
  if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_

  if (cov_no_arm || (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) {
    multivar <- TRUE
    if (!cov_no_arm) var_main <- TRUE
  }

  vars_coxreg <- list(which_vars = "all", var_nms = NULL)
  if (eff) {
    if (multivar && !var_main) { # multivar treatment level
      var_lbl_arm <- formatters::var_labels(df)[[variables$arm]]
      vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl")
    } else { # treatment effect
      vars_coxreg["var_nms"] <- variables$arm
      if (var_main) vars_coxreg["which_vars"] <- "var_main"
    }
  } else {
    if (!multivar || (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level
      vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main")
    } else if (multivar) { # multivar covariate level
      vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl")
      if (var_main) model[cov, .stats] <- NA_real_
    }
    if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect
  }
  var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]]
  var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) {
    paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels
  } else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) ||
    (multivar && var_main && is.numeric(df[[cov]]))) {
    labelstr # other main effect labels
  } else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) {
    "All" # multivar numeric covariate
  } else {
    names(var_vals)
  }
  in_rows(
    .list = var_vals, .names = var_names, .labels = var_names, .indent_mods = .indent_mods,
    .formats = stats::setNames(rep(.formats, length(var_names)), var_names),
    .format_na_strs = stats::setNames(rep(na_level, length(var_names)), var_names)
  )
}

#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table
#'   layout. This function is a wrapper for several `rtables` layouting functions. This function
#'   is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()].
#'
#' @inheritParams fit_coxreg_univar
#' @param multivar (`flag`)\cr Defaults to `FALSE`. If `TRUE` multivariate Cox regression will run, otherwise
#'   univariate Cox regression will run.
#' @param common_var (`character`)\cr the name of a factor variable in the dataset which takes the same value
#'   for all rows. This should be created during pre-processing if no such variable currently exists.
#' @param .section_div (`character`)\cr string which should be repeated as a section divider between sections.
#'   Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between
#'   treatment and covariate sections and the second between different covariates.
#'
#' @return
#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table
#'   containing the chosen statistics to the table layout.
#'
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`,
#'   `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate
#'   Cox regression models, respectively.
#'
#' @examples
#' # summarize_coxreg
#'
#' result_univar <- basic_table() %>%
#'   summarize_coxreg(variables = u1_variables) %>%
#'   build_table(dta_bladder)
#' result_univar
#'
#' result_multivar <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = m1_variables,
#'     multivar = TRUE,
#'   ) %>%
#'   build_table(dta_bladder)
#' result_multivar
#'
#' result_univar_covs <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = u2_variables,
#'   ) %>%
#'   build_table(dta_bladder)
#' result_univar_covs
#'
#' result_multivar_covs <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = m2_variables,
#'     multivar = TRUE,
#'     varlabels = c("Covariate 1", "Covariate 2") # custom labels
#'   ) %>%
#'   build_table(dta_bladder)
#' result_multivar_covs
#'
#' @export
summarize_coxreg <- function(lyt,
                             variables,
                             control = control_coxreg(),
                             at = list(),
                             multivar = FALSE,
                             common_var = "STUDYID",
                             .stats = c("n", "hr", "ci", "pval", "pval_inter"),
                             .formats = c(
                               n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)",
                               pval = "x.xxxx | (<0.0001)", pval_inter = "x.xxxx | (<0.0001)"
                             ),
                             varlabels = NULL,
                             .indent_mods = NULL,
                             na_level = "",
                             .section_div = NA_character_) {
  if (multivar && control$interaction) {
    warning(paste(
      "Interactions are not available for multivariate cox regression using summarize_coxreg.",
      "The model will be calculated without interaction effects."
    ))
  }
  if (control$interaction && !"arm" %in% names(variables)) {
    stop("To include interactions please specify 'arm' in variables.")
  }

  .stats <- if (!"arm" %in% names(variables) || multivar) { # only valid statistics
    intersect(c("hr", "ci", "pval"), .stats)
  } else if (control$interaction) {
    intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats)
  } else {
    intersect(c("n", "hr", "ci", "pval"), .stats)
  }
  stat_labels <- c(
    n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"),
    pval = "p-value", pval_inter = "Interaction p-value"
  )
  stat_labels <- stat_labels[names(stat_labels) %in% .stats]
  .formats <- .formats[names(.formats) %in% .stats]
  env <- new.env() # create caching environment

  lyt <- lyt %>%
    split_cols_by_multivar(
      vars = rep(common_var, length(.stats)),
      varlabels = stat_labels,
      extra_args = list(
        .stats = .stats, .formats = .formats, .indent_mods = .indent_mods, na_level = rep(na_level, length(.stats)),
        cache_env = replicate(length(.stats), list(env))
      )
    )

  if ("arm" %in% names(variables)) { # treatment effect
    lyt <- lyt %>%
      split_rows_by(
        common_var,
        split_label = "Treatment:",
        label_pos = "visible",
        section_div = head(.section_div, 1)
      ) %>%
      summarize_row_groups(
        cfun = a_coxreg,
        extra_args = list(
          variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar
        )
      )
    if (multivar) { # treatment level effects
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar, labelstr = "")
        )
    }
  }

  if ("covariates" %in% names(variables)) { # covariate main effects
    lyt <- lyt %>%
      split_rows_by_multivar(
        vars = variables$covariates,
        varlabels = varlabels,
        split_label = "Covariate:",
        nested = FALSE,
        child_labels = if (multivar || control$interaction || !"arm" %in% names(variables)) "default" else "hidden",
        section_div = tail(.section_div, 1)
      )
    if (multivar || control$interaction || !"arm" %in% names(variables)) {
      lyt <- lyt %>%
        summarize_row_groups(
          cfun = a_coxreg,
          extra_args = list(
            variables = variables, at = at, control = control, multivar = multivar,
            var_main = if (multivar) multivar else control$interaction
          )
        )
    } else {
      if (!is.null(varlabels)) names(varlabels) <- variables$covariates
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          extra_args = list(
            variables = variables, at = at, control = control, multivar = multivar,
            var_main = if (multivar) multivar else control$interaction,
            labelstr = if (is.null(varlabels)) "" else varlabels
          )
        )
    }

    if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm
    if (multivar || control$interaction) { # covariate level effects
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = "")
        )
    }
  }

  lyt
}

#' Kaplan-Meier Plot
#'
#' @description `r lifecycle::badge("stable")`
#'
#' From a survival model, a graphic is rendered along with tabulated annotation
#' including the number of patient at risk at given time and the median survival
#' per group.
#'
#' @inheritParams grid::gTree
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param variables (named `list`)\cr variable names. Details are:
#'   * `tte` (`numeric`)\cr variable indicating time-to-event duration values.
#'   * `is_event` (`logical`)\cr event variable. `TRUE` if event, `FALSE` if time to event is censored.
#'   * `arm` (`factor`)\cr the treatment group variable.
#'   * `strat` (`character` or `NULL`)\cr variable names indicating stratification factors.
#' @param control_surv (`list`)\cr parameters for comparison details, specified by using
#'   the helper function [control_surv_timepoint()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
#'   * `conf_type` (`string`)\cr `"plain"` (default), `"log"`, `"log-log"` for confidence interval type,
#'     see more in [survival::survfit()]. Note that the option "none" is no longer supported.
#' @param xticks (`numeric`, `number`, or `NULL`)\cr numeric vector of ticks or single number with spacing
#'   between ticks on the x axis. If `NULL` (default), [labeling::extended()] is used to determine
#'   an optimal tick position on the x axis.
#' @param yval (`string`)\cr value of y-axis. Options are `Survival` (default) and `Failure` probability.
#' @param censor_show (`flag`)\cr whether to show censored.
#' @param xlab (`string`)\cr label of x-axis.
#' @param ylab (`string`)\cr label of y-axis.
#' @param title (`string`)\cr title for plot.
#' @param footnotes (`string`)\cr footnotes for plot.
#' @param col (`character`)\cr lines colors. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param lty (`numeric`)\cr line type. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param lwd (`numeric`)\cr line width. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param pch (`numeric`, `string`)\cr value or character of points symbol to indicate censored cases.
#' @param size (`numeric`)\cr size of censored point, a class of `unit`.
#' @param max_time (`numeric`)\cr maximum value to show on X axis. Only data values less than or up to
#'   this threshold value will be plotted (defaults to `NULL`).
#' @param font_size (`number`)\cr font size to be used.
#' @param ci_ribbon (`flag`)\cr draw the confidence interval around the Kaplan-Meier curve.
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control outlook of the Kaplan-Meier curve.
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of patient at risk
#'   matching the main grid of the Kaplan-Meier curve.
#' @param annot_surv_med (`flag`)\cr compute and add the annotation table on the Kaplan-Meier curve estimating the
#'   median survival time per group.
#' @param annot_coxph (`flag`)\cr add the annotation table from a [survival::coxph()] model.
#' @param annot_stats (`string`)\cr statistics annotations to add to the plot. Options are
#'   `median` (median survival follow-up time) and `min` (minimum survival follow-up time).
#' @param annot_stats_vlines (`flag`)\cr add vertical lines corresponding to each of the statistics
#'   specified by `annot_stats`. If `annot_stats` is `NULL` no lines will be added.
#' @param control_coxph_pw (`list`)\cr parameters for comparison details, specified by using
#'   the helper function [control_coxph()]. Some possible parameter options are:
#'   * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1.
#'     Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
#'   * `ties` (`string`)\cr method for tie handling. Default is `"efron"`,
#'     can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()]
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
#' @param position_coxph (`numeric`)\cr x and y positions for plotting [survival::coxph()] model.
#' @param position_surv_med (`numeric`)\cr x and y positions for plotting annotation table estimating median survival
#'   time per group.
#' @param width_annots (named `list` of `unit`s)\cr a named list of widths for annotation tables with names `surv_med`
#'   (median survival time table) and `coxph` ([survival::coxph()] model table), where each value is the width
#'   (in units) to implement when printing the annotation table.
#'
#' @return A `grob` of class `gTree`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(ggplot2)
#' library(survival)
#' library(grid)
#' library(nestcolor)
#'
#' df <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' variables <- list(tte = "AVAL", is_event = "is_event", arm = "ARMCD")
#'
#' # 1. Example - basic option
#'
#' res <- g_km(df = df, variables = variables)
#' res <- g_km(df = df, variables = variables, yval = "Failure")
#' res <- g_km(
#'   df = df,
#'   variables = variables,
#'   control_surv = control_surv_timepoint(conf_level = 0.9),
#'   col = c("grey25", "grey50", "grey75")
#' )
#' res <- g_km(df = df, variables = variables, ggtheme = theme_minimal())
#' res <- g_km(df = df, variables = variables, ggtheme = theme_minimal(), lty = 1:3)
#' res <- g_km(df = df, variables = variables, max = 2000)
#' res <- g_km(
#'   df = df,
#'   variables = variables,
#'   annot_stats = c("min", "median"),
#'   annot_stats_vlines = TRUE
#' )
#'
#' # 2. Example - Arrange several KM curve on a single graph device
#'
#' # 2.1 Use case: A general graph on the top, a zoom on the bottom.
#' grid.newpage()
#' lyt <- grid.layout(nrow = 2, ncol = 1) %>%
#'   viewport(layout = .) %>%
#'   pushViewport()
#'
#' res <- g_km(
#'   df = df, variables = variables, newpage = FALSE, annot_surv_med = FALSE,
#'   vp = viewport(layout.pos.row = 1, layout.pos.col = 1)
#' )
#' res <- g_km(
#'   df = df, variables = variables, max = 1000, newpage = FALSE, annot_surv_med = FALSE,
#'   ggtheme = theme_dark(),
#'   vp = viewport(layout.pos.row = 2, layout.pos.col = 1)
#' )
#'
#' # 2.1 Use case: No annotations on top, annotated graph on bottom
#' grid.newpage()
#' lyt <- grid.layout(nrow = 2, ncol = 1) %>%
#'   viewport(layout = .) %>%
#'   pushViewport()
#'
#' res <- g_km(
#'   df = df, variables = variables, newpage = FALSE,
#'   annot_surv_med = FALSE, annot_at_risk = FALSE,
#'   vp = viewport(layout.pos.row = 1, layout.pos.col = 1)
#' )
#' res <- g_km(
#'   df = df, variables = variables, max = 2000, newpage = FALSE, annot_surv_med = FALSE,
#'   annot_at_risk = TRUE,
#'   ggtheme = theme_dark(),
#'   vp = viewport(layout.pos.row = 2, layout.pos.col = 1)
#' )
#'
#' # Add annotation from a pairwise coxph analysis
#' g_km(
#'   df = df, variables = variables,
#'   annot_coxph = TRUE
#' )
#'
#' # Change widths/sizes of surv_med and coxph annotation tables.
#' g_km(
#'   df = df, variables = c(variables, list(strat = "SEX")),
#'   annot_coxph = TRUE,
#'   width_annots = list(surv_med = grid::unit(2, "in"), coxph = grid::unit(3, "in"))
#' )
#'
#' g_km(
#'   df = df, variables = c(variables, list(strat = "SEX")),
#'   font_size = 15,
#'   annot_coxph = TRUE,
#'   control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99),
#'   position_coxph = c(0.5, 0.5)
#' )
#'
#' # Change position of the treatment group annotation table.
#' g_km(
#'   df = df, variables = c(variables, list(strat = "SEX")),
#'   font_size = 15,
#'   annot_coxph = TRUE,
#'   control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99),
#'   position_surv_med = c(1, 0.7)
#' )
#' }
#'
#' @export
g_km <- function(df,
                 variables,
                 control_surv = control_surv_timepoint(),
                 col = NULL,
                 lty = NULL,
                 lwd = .5,
                 censor_show = TRUE,
                 pch = 3,
                 size = 2,
                 max_time = NULL,
                 xticks = NULL,
                 xlab = "Days",
                 yval = c("Survival", "Failure"),
                 ylab = paste(yval, "Probability"),
                 title = NULL,
                 footnotes = NULL,
                 draw = TRUE,
                 newpage = TRUE,
                 gp = NULL,
                 vp = NULL,
                 name = NULL,
                 font_size = 12,
                 ci_ribbon = FALSE,
                 ggtheme = nestcolor::theme_nest(),
                 annot_at_risk = TRUE,
                 annot_surv_med = TRUE,
                 annot_coxph = FALSE,
                 annot_stats = NULL,
                 annot_stats_vlines = FALSE,
                 control_coxph_pw = control_coxph(),
                 position_coxph = c(-0.03, -0.02),
                 position_surv_med = c(0.95, 0.9),
                 width_annots = list(surv_med = grid::unit(0.3, "npc"), coxph = grid::unit(0.4, "npc"))) {
  checkmate::assert_list(variables)
  checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables))
  checkmate::assert_string(title, null.ok = TRUE)
  checkmate::assert_string(footnotes, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_subset(annot_stats, c("median", "min"))
  checkmate::assert_logical(annot_stats_vlines)
  checkmate::assert_true(all(sapply(width_annots, grid::is.unit)))

  tte <- variables$tte
  is_event <- variables$is_event
  arm <- variables$arm

  assert_valid_factor(df[[arm]])
  assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm))
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(df[[tte]], min.len = 1, any.missing = FALSE)

  armval <- as.character(unique(df[[arm]]))
  if (length(armval) > 1) {
    armval <- NULL
  }
  yval <- match.arg(yval)
  formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm))
  fit_km <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = control_surv$conf_level,
    conf.type = control_surv$conf_type
  )
  data_plot <- h_data_plot(
    fit_km = fit_km,
    armval = armval,
    max_time = max_time
  )

  xticks <- h_xticks(data = data_plot, xticks = xticks, max_time = max_time)
  gg <- h_ggkm(
    data = data_plot,
    censor_show = censor_show,
    pch = pch,
    size = size,
    xticks = xticks,
    xlab = xlab,
    yval = yval,
    ylab = ylab,
    title = title,
    footnotes = footnotes,
    max_time = max_time,
    lwd = lwd,
    lty = lty,
    col = col,
    ggtheme = ggtheme,
    ci_ribbon = ci_ribbon
  )

  if (!is.null(annot_stats)) {
    if ("median" %in% annot_stats) {
      fit_km_all <- survival::survfit(
        formula = stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", 1)),
        data = df,
        conf.int = control_surv$conf_level,
        conf.type = control_surv$conf_type
      )
      gg <- gg +
        geom_text(
          size = 8 / ggplot2::.pt, col = 1,
          x = stats::median(fit_km_all) + 0.065 * max(data_plot$time),
          y = ifelse(yval == "Survival", 0.62, 0.38),
          label = paste("Median F/U:\n", round(stats::median(fit_km_all), 1), tolower(df$AVALU[1]))
        )
      if (annot_stats_vlines) {
        gg <- gg +
          geom_segment(aes(x = stats::median(fit_km_all), xend = stats::median(fit_km_all), y = -Inf, yend = Inf),
            linetype = 2, col = "darkgray"
          )
      }
    }
    if ("min" %in% annot_stats) {
      min_fu <- min(df[[tte]])
      gg <- gg +
        geom_text(
          size = 8 / ggplot2::.pt, col = 1,
          x = min_fu + max(data_plot$time) * ifelse(yval == "Survival", 0.05, 0.07),
          y = ifelse(yval == "Survival", 1.0, 0.05),
          label = paste("Min. F/U:\n", round(min_fu, 1), tolower(df$AVALU[1]))
        )
      if (annot_stats_vlines) {
        gg <- gg +
          geom_segment(aes(x = min_fu, xend = min_fu, y = Inf, yend = -Inf), linetype = 2, col = "darkgray")
      }
    }
    gg <- gg + ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(shape = NA, label = "")))
  }

  g_el <- h_decompose_gg(gg)

  if (annot_at_risk) {
    # This is the content of the table that will be below the graph.
    annot_tbl <- summary(fit_km, time = xticks)
    annot_tbl <- if (is.null(fit_km$strata)) {
      data.frame(
        n.risk = annot_tbl$n.risk,
        time = annot_tbl$time,
        strata = as.factor(armval)
      )
    } else {
      strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
      levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
      data.frame(
        n.risk = annot_tbl$n.risk,
        time = annot_tbl$time,
        strata = annot_tbl$strata
      )
    }

    grobs_patient <- h_grob_tbl_at_risk(
      data = data_plot,
      annot_tbl = annot_tbl,
      xlim = max(max_time, data_plot$time, xticks)
    )
  }

  if (annot_at_risk || annot_surv_med || annot_coxph) {
    lyt <- h_km_layout(
      data = data_plot, g_el = g_el, title = title, footnotes = footnotes, annot_at_risk = annot_at_risk
    )
    ttl_row <- as.numeric(!is.null(title))
    foot_row <- as.numeric(!is.null(footnotes))
    km_grob <- grid::gTree(
      vp = grid::viewport(layout = lyt, height = .95, width = .95),
      children = grid::gList(
        # Title.
        if (ttl_row == 1) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 2),
            children = grid::gList(grid::textGrob(label = title, x = grid::unit(0, "npc"), hjust = 0))
          )
        },

        # The Kaplan - Meier curve (top-right corner).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
          children = grid::gList(g_el$panel)
        ),

        # Survfit summary table (top-right corner).
        if (annot_surv_med) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
            children = h_grob_median_surv(
              fit_km = fit_km,
              armval = armval,
              x = position_surv_med[1],
              y = position_surv_med[2],
              width = if (!is.null(width_annots[["surv_med"]])) width_annots[["surv_med"]] else grid::unit(0.3, "npc"),
              ttheme = gridExtra::ttheme_default(base_size = font_size)
            )
          )
        },
        if (annot_coxph) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
            children = h_grob_coxph(
              df = df,
              variables = variables,
              control_coxph_pw = control_coxph_pw,
              x = position_coxph[1],
              y = position_coxph[2],
              width = if (!is.null(width_annots[["coxph"]])) width_annots[["coxph"]] else grid::unit(0.4, "npc"),
              ttheme = gridExtra::ttheme_default(
                base_size = font_size,
                padding = grid::unit(c(1, .5), "lines"),
                core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5))
              )
            )
          )
        },

        # Add the y-axis annotation (top-left corner).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 1),
          children = h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis)
        ),

        # Add the x-axis annotation (second row below the Kaplan Meier Curve).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 2 + ttl_row, layout.pos.col = 2),
          children = grid::gList(rbind(g_el$xaxis, g_el$xlab))
        ),

        # Add the legend.
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 3 + ttl_row, layout.pos.col = 2),
          children = grid::gList(g_el$guide)
        ),

        # Add the table with patient-at-risk numbers.
        if (annot_at_risk) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 4 + ttl_row, layout.pos.col = 2),
            children = grobs_patient$at_risk
          )
        },
        if (annot_at_risk) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 4 + ttl_row, layout.pos.col = 1),
            children = grobs_patient$label
          )
        },
        if (annot_at_risk) {
          # Add the x-axis for the table.
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 5 + ttl_row, layout.pos.col = 2),
            children = grid::gList(rbind(g_el$xaxis, g_el$xlab))
          )
        },

        # Footnotes.
        if (foot_row == 1) {
          grid::gTree(
            vp = grid::viewport(
              layout.pos.row = ifelse(annot_at_risk, 6 + ttl_row, 4 + ttl_row),
              layout.pos.col = 2
            ),
            children = grid::gList(grid::textGrob(label = footnotes, x = grid::unit(0, "npc"), hjust = 0))
          )
        }
      )
    )

    result <- grid::gTree(
      vp = vp,
      gp = gp,
      name = name,
      children = grid::gList(km_grob)
    )
  } else {
    result <- grid::gTree(
      vp = vp,
      gp = gp,
      name = name,
      children = grid::gList(ggplot2::ggplotGrob(gg))
    )
  }

  if (newpage && draw) grid::grid.newpage()
  if (draw) grid::grid.draw(result)
  invisible(result)
}

#' Helper function: tidy survival fit
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convert the survival fit data into a data frame designed for plotting
#' within `g_km`.
#'
#' This starts from the [broom::tidy()] result, and then:
#'   * Post-processes the `strata` column into a factor.
#'   * Extends each stratum by an additional first row with time 0 and probability 1 so that
#'     downstream plot lines start at those coordinates.
#'   * Adds a `censor` column.
#'   * Filters the rows before `max_time`.
#'
#' @inheritParams g_km
#' @param fit_km (`survfit`)\cr result of [survival::survfit()].
#' @param armval (`string`)\cr used as strata name when treatment arm variable only has one level. Default is `"All"`.
#'
#' @return A `tibble` with columns `time`, `n.risk`, `n.event`, `n.censor`, `estimate`, `std.error`, `conf.high`,
#'   `conf.low`, `strata`, and `censor`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#'
#' # Test with multiple arms
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot()
#'
#' # Test with single arm
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS", ARMCD == "ARM B") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot(armval = "ARM B")
#' }
#'
#' @export
h_data_plot <- function(fit_km,
                        armval = "All",
                        max_time = NULL) {
  y <- broom::tidy(fit_km)

  if (!is.null(fit_km$strata)) {
    fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals")
    strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2)
    strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals")
    y$strata <- factor(
      vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2),
      levels = strata_levels
    )
  } else {
    y$strata <- armval
  }

  y_by_strata <- split(y, y$strata)
  y_by_strata_extended <- lapply(
    y_by_strata,
    FUN = function(tbl) {
      first_row <- tbl[1L, ]
      first_row$time <- 0
      first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")])
      first_row$n.event <- first_row$n.censor <- 0
      first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1
      first_row$std.error <- 0
      rbind(
        first_row,
        tbl
      )
    }
  )
  y <- do.call(rbind, y_by_strata_extended)

  y$censor <- ifelse(y$n.censor > 0, y$estimate, NA)
  if (!is.null(max_time)) {
    y <- y[y$time <= max(max_time), ]
  }
  y
}

#' Helper function: x tick positions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Calculate the positions of ticks on the x-axis. However, if `xticks` already
#' exists it is kept as is. It is based on the same function `ggplot2` relies on,
#' and is required in the graphic and the patient-at-risk annotation table.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#'
#' @return A vector of positions to use for x-axis ticks on a `ggplot` object.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#'
#' data <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot()
#'
#' h_xticks(data)
#' h_xticks(data, xticks = seq(0, 3000, 500))
#' h_xticks(data, xticks = 500)
#' h_xticks(data, xticks = 500, max_time = 6000)
#' h_xticks(data, xticks = c(0, 500), max_time = 300)
#' h_xticks(data, xticks = 500, max_time = 300)
#' }
#'
#' @export
h_xticks <- function(data, xticks = NULL, max_time = NULL) {
  if (is.null(xticks)) {
    if (is.null(max_time)) {
      labeling::extended(range(data$time)[1], range(data$time)[2], m = 5)
    } else {
      labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5)
    }
  } else if (checkmate::test_number(xticks)) {
    if (is.null(max_time)) {
      seq(0, max(data$time), xticks)
    } else {
      seq(0, max(data$time, max_time), xticks)
    }
  } else if (is.numeric(xticks)) {
    xticks
  } else {
    stop(
      paste(
        "xticks should be either `NULL`",
        "or a single number (interval between x ticks)",
        "or a numeric vector (position of ticks on the x axis)"
      )
    )
  }
}

#' Helper function: KM plot
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Draw the Kaplan-Meier plot using `ggplot2`.
#'
#' @inheritParams g_km
#' @param data (`data.frame`)\cr survival data as pre-processed by `h_data_plot`.
#'
#' @return A `ggplot` object.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks,
#'   xlab = "Days",
#'   yval = "Survival",
#'   ylab = "Survival Probability",
#'   title = "Survival"
#' )
#' gg
#' }
#'
#' @export
h_ggkm <- function(data,
                   xticks = NULL,
                   yval = "Survival",
                   censor_show,
                   xlab,
                   ylab,
                   title,
                   footnotes = NULL,
                   max_time = NULL,
                   lwd = 1,
                   lty = NULL,
                   pch = 3,
                   size = 2,
                   col = NULL,
                   ci_ribbon = FALSE,
                   ggtheme = nestcolor::theme_nest()) {
  checkmate::assert_numeric(lty, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  # change estimates of survival to estimates of failure (1 - survival)
  if (yval == "Failure") {
    data$estimate <- 1 - data$estimate
    data[c("conf.high", "conf.low")] <- list(1 - data$conf.low, 1 - data$conf.high)
    data$censor <- 1 - data$censor
  }

  gg <- {
    ggplot2::ggplot(
      data = data,
      mapping = ggplot2::aes(
        x = .data[["time"]],
        y = .data[["estimate"]],
        ymin = .data[["conf.low"]],
        ymax = .data[["conf.high"]],
        color = .data[["strata"]],
        fill = .data[["strata"]]
      )
    ) +
      ggplot2::geom_hline(yintercept = 0)
  }

  if (ci_ribbon) {
    gg <- gg + ggplot2::geom_ribbon(alpha = .3, lty = 0)
  }

  gg <- if (is.null(lty)) {
    gg +
      ggplot2::geom_step(linewidth = lwd)
  } else if (checkmate::test_number(lty)) {
    gg +
      ggplot2::geom_step(linewidth = lwd, lty = lty)
  } else if (is.numeric(lty)) {
    gg +
      ggplot2::geom_step(mapping = ggplot2::aes(linetype = .data[["strata"]]), linewidth = lwd) +
      ggplot2::scale_linetype_manual(values = lty)
  }

  gg <- gg +
    ggplot2::coord_cartesian(ylim = c(0, 1)) +
    ggplot2::labs(x = xlab, y = ylab, title = title, caption = footnotes)

  if (!is.null(col)) {
    gg <- gg +
      ggplot2::scale_color_manual(values = col) +
      ggplot2::scale_fill_manual(values = col)
  }
  if (censor_show) {
    dt <- data[data$n.censor != 0, ]
    dt$censor_lbl <- factor("Censored")

    gg <- gg + ggplot2::geom_point(
      data = dt,
      ggplot2::aes(
        x = .data[["time"]],
        y = .data[["censor"]],
        shape = .data[["censor_lbl"]]
      ),
      size = size,
      show.legend = TRUE,
      inherit.aes = TRUE
    ) +
      ggplot2::scale_shape_manual(name = NULL, values = pch) +
      ggplot2::guides(
        shape = ggplot2::guide_legend(override.aes = list(linetype = NA)),
        fill = ggplot2::guide_legend(override.aes = list(shape = NA))
      )
  }

  if (!is.null(max_time) && !is.null(xticks)) {
    gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))))
  } else if (!is.null(xticks)) {
    if (max(data$time) <= max(xticks)) {
      gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)))
    } else {
      gg <- gg + ggplot2::scale_x_continuous(breaks = xticks)
    }
  } else if (!is.null(max_time)) {
    gg <- gg + ggplot2::scale_x_continuous(limits = c(0, max_time))
  }

  if (!is.null(ggtheme)) {
    gg <- gg + ggtheme
  }

  gg + ggplot2::theme(
    legend.position = "bottom",
    legend.title = ggplot2::element_blank(),
    legend.key.height = unit(0.02, "npc"),
    panel.grid.major.x = ggplot2::element_line(linewidth = 2)
  )
}

#' `ggplot` Decomposition
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The elements composing the `ggplot` are extracted and organized in a `list`.
#'
#' @param gg (`ggplot`)\cr a graphic to decompose.
#'
#' @return A named `list` with elements:
#'   * `panel`: The panel.
#'   * `yaxis`: The y-axis.
#'   * `xaxis`: The x-axis.
#'   * `xlab`: The x-axis label.
#'   * `ylab`: The y-axis label.
#'   * `guide`: The legend.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   yval = "Survival",
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt",
#'   footnotes = "ff"
#' )
#'
#' g_el <- h_decompose_gg(gg)
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "red", fill = "gray85", lwd = 5))
#' grid::grid.draw(g_el$panel)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "royalblue", fill = "gray85", lwd = 5))
#' grid::grid.draw(with(g_el, cbind(ylab, yaxis)))
#' }
#'
#' @export
h_decompose_gg <- function(gg) {
  g_el <- ggplot2::ggplotGrob(gg)
  y <- c(
    panel = "panel",
    yaxis = "axis-l",
    xaxis = "axis-b",
    xlab = "xlab-b",
    ylab = "ylab-l",
    guide = "guide"
  )
  lapply(X = y, function(x) gtable::gtable_filter(g_el, x))
}

#' Helper: KM Layout
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares a (5 rows) x (2 cols) layout for the Kaplan-Meier curve.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#' @param g_el (`list` of `gtable`)\cr list as obtained by `h_decompose_gg()`.
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of
#'   patient at risk matching the main grid of the Kaplan-Meier curve.
#'
#' @return A grid layout.
#'
#' @details The layout corresponds to a grid of two columns and five rows of unequal dimensions. Most of the
#'   dimension are fixed, only the curve is flexible and will accommodate with the remaining free space.
#'   * The left column gets the annotation of the `ggplot` (y-axis) and the names of the strata for the patient
#'     at risk tabulation. The main constraint is about the width of the columns which must allow the writing of
#'     the strata name.
#'   * The right column receive the `ggplot`, the legend, the x-axis and the patient at risk table.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt", footnotes = "ff", yval = "Survival"
#' )
#' g_el <- h_decompose_gg(gg)
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
#' grid.show.layout(lyt)
#' }
#'
#' @export
h_km_layout <- function(data, g_el, title, footnotes, annot_at_risk = TRUE) {
  txtlines <- levels(as.factor(data$strata))
  nlines <- nlevels(as.factor(data$strata))
  col_annot_width <- max(
    c(
      as.numeric(grid::convertX(g_el$yaxis$width + g_el$ylab$width, "pt")),
      as.numeric(
        grid::convertX(
          grid::stringWidth(txtlines) + grid::unit(7, "pt"), "pt"
        )
      )
    )
  )

  ttl_row <- as.numeric(!is.null(title))
  foot_row <- as.numeric(!is.null(footnotes))
  no_tbl_ind <- c()
  ht_x <- c()
  ht_units <- c()

  if (ttl_row == 1) {
    no_tbl_ind <- c(no_tbl_ind, TRUE)
    ht_x <- c(ht_x, 2)
    ht_units <- c(ht_units, "lines")
  }

  no_tbl_ind <- c(no_tbl_ind, rep(TRUE, 3), rep(FALSE, 2))
  ht_x <- c(
    ht_x,
    1,
    grid::convertX(with(g_el, xaxis$height + ylab$width), "pt") + grid::unit(5, "pt"),
    grid::convertX(g_el$guide$heights, "pt") + grid::unit(2, "pt"),
    nlines + 0.5,
    grid::convertX(with(g_el, xaxis$height + ylab$width), "pt")
  )
  ht_units <- c(
    ht_units,
    "null",
    "pt",
    "pt",
    "lines",
    "pt"
  )

  if (foot_row == 1) {
    no_tbl_ind <- c(no_tbl_ind, TRUE)
    ht_x <- c(ht_x, 1)
    ht_units <- c(ht_units, "lines")
  }

  no_at_risk_tbl <- if (annot_at_risk) {
    rep(TRUE, 5 + ttl_row + foot_row)
  } else {
    no_tbl_ind
  }

  grid::grid.layout(
    nrow = sum(no_at_risk_tbl), ncol = 2,
    widths = grid::unit(c(col_annot_width, 1), c("pt", "null")),
    heights = grid::unit(
      x = ht_x[no_at_risk_tbl],
      units = ht_units[no_at_risk_tbl]
    )
  )
}

#' Helper: Patient-at-Risk Grobs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Two graphical objects are obtained, one corresponding to row labeling and
#' the second to the number of patient at risk.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#' @param annot_tbl (`data.frame`)\cr annotation as prepared by [survival::summary.survfit()] which
#'   includes the number of patients at risk at given time points.
#' @param xlim (`numeric`)\cr the maximum value on the x-axis (used to
#'   ensure the at risk table aligns with the KM graph).
#'
#' @return A named `list` of two `gTree` objects: `at_risk` and `label`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#'
#' data_plot <- h_data_plot(fit_km = fit_km)
#'
#' xticks <- h_xticks(data = data_plot)
#'
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt", footnotes = "ff", yval = "Survival"
#' )
#'
#' # The annotation table reports the patient at risk for a given strata and
#' # time (`xticks`).
#' annot_tbl <- summary(fit_km, time = xticks)
#' if (is.null(fit_km$strata)) {
#'   annot_tbl <- with(annot_tbl, data.frame(n.risk = n.risk, time = time, strata = "All"))
#' } else {
#'   strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
#'   levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
#'   annot_tbl <- data.frame(
#'     n.risk = annot_tbl$n.risk,
#'     time = annot_tbl$time,
#'     strata = annot_tbl$strata
#'   )
#' }
#'
#' # The annotation table is transformed into a grob.
#' tbl <- h_grob_tbl_at_risk(data = data_plot, annot_tbl = annot_tbl, xlim = max(xticks))
#'
#' # For the representation, the layout is estimated for which the decomposition
#' # of the graphic element is necessary.
#' g_el <- h_decompose_gg(gg)
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
#'
#' grid::grid.newpage()
#' pushViewport(viewport(layout = lyt, height = .95, width = .95))
#' grid.rect(gp = grid::gpar(lty = 1, col = "purple", fill = "gray85", lwd = 1))
#' pushViewport(viewport(layout.pos.row = 4, layout.pos.col = 2))
#' grid.rect(gp = grid::gpar(lty = 1, col = "orange", fill = "gray85", lwd = 1))
#' grid::grid.draw(tbl$at_risk)
#' popViewport()
#' pushViewport(viewport(layout.pos.row = 4, layout.pos.col = 1))
#' grid.rect(gp = grid::gpar(lty = 1, col = "green3", fill = "gray85", lwd = 1))
#' grid::grid.draw(tbl$label)
#' }
#'
#' @export
h_grob_tbl_at_risk <- function(data, annot_tbl, xlim) {
  txtlines <- levels(as.factor(data$strata))
  nlines <- nlevels(as.factor(data$strata))
  y_int <- annot_tbl$time[2] - annot_tbl$time[1]
  annot_tbl <- expand.grid(
    time = seq(0, xlim, y_int),
    strata = unique(annot_tbl$strata)
  ) %>% dplyr::left_join(annot_tbl, by = c("time", "strata"))
  annot_tbl[is.na(annot_tbl)] <- 0
  y_str_unit <- as.numeric(annot_tbl$strata)
  vp_table <- grid::plotViewport(margins = grid::unit(c(0, 0, 0, 0), "lines"))
  gb_table_left_annot <- grid::gList(
    grid::rectGrob(
      x = 0, y = grid::unit(c(1:nlines) - 1, "lines"),
      gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
      height = grid::unit(1, "lines"), just = "bottom", hjust = 0
    ),
    grid::textGrob(
      label = unique(annot_tbl$strata),
      x = 0.5,
      y = grid::unit(
        (max(unique(y_str_unit)) - unique(y_str_unit)) + 0.75,
        "native"
      ),
      gp = grid::gpar(fontface = "italic", fontsize = 10)
    )
  )
  gb_patient_at_risk <- grid::gList(
    grid::rectGrob(
      x = 0, y = grid::unit(c(1:nlines) - 1, "lines"),
      gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
      height = grid::unit(1, "lines"), just = "bottom", hjust = 0
    ),
    grid::textGrob(
      label = annot_tbl$n.risk,
      x = grid::unit(annot_tbl$time, "native"),
      y = grid::unit(
        (max(y_str_unit) - y_str_unit) + .5,
        "line"
      ) # maybe native
    )
  )

  list(
    at_risk = grid::gList(
      grid::gTree(
        vp = vp_table,
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = c(0, xlim) + c(-0.05, 0.05) * xlim,
              yscale = c(0, nlines + 1),
              extension = c(0.05, 0)
            ),
            children = grid::gList(gb_patient_at_risk)
          )
        )
      )
    ),
    label = grid::gList(
      grid::gTree(
        vp = grid::viewport(width = max(grid::stringWidth(txtlines))),
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = 0:1,
              yscale = c(0, nlines + 1),
              extension = c(0.0, 0)
            ),
            children = grid::gList(gb_table_left_annot)
          )
        )
      )
    )
  )
}

#' Helper Function: Survival Estimations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Transform a survival fit to a table with groups in rows characterized by N, median and confidence interval.
#'
#' @inheritParams h_data_plot
#'
#' @return A summary table with statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#'
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "OS")
#' fit <- survfit(
#'   form = Surv(AVAL, 1 - CNSR) ~ ARMCD,
#'   data = adtte
#' )
#' h_tbl_median_surv(fit_km = fit)
#' }
#'
#' @export
h_tbl_median_surv <- function(fit_km, armval = "All") {
  y <- if (is.null(fit_km$strata)) {
    as.data.frame(t(summary(fit_km)$table), row.names = armval)
  } else {
    tbl <- summary(fit_km)$table
    rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals")
    rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2]
    as.data.frame(tbl)
  }
  conf.int <- summary(fit_km)$conf.int # nolint
  y$records <- round(y$records)
  y$median <- signif(y$median, 4)
  y$`CI` <- paste0(
    "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")"
  )
  stats::setNames(
    y[c("records", "median", "CI")],
    c("N", "Median", f_conf_level(conf.int))
  )
}

#' Helper Function: Survival Estimation Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The survival fit is transformed in a grob containing a table with groups in
#' rows characterized by N, median and 95% confidence interval.
#'
#' @inheritParams g_km
#' @inheritParams h_data_plot
#' @param ttheme (`list`)\cr see [gridExtra::ttheme_default()].
#' @param x (`numeric`)\cr a value between 0 and 1 specifying x-location.
#' @param y (`numeric`)\cr a value between 0 and 1 specifying y-location.
#' @param width (`unit`)\cr width (as a unit) to use when printing the grob.
#'
#' @return A `grob` of a table containing statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_grob_median_surv() %>%
#'   grid::grid.draw()
#' }
#'
#' @export
h_grob_median_surv <- function(fit_km,
                               armval = "All",
                               x = 0.9,
                               y = 0.9,
                               width = grid::unit(0.3, "npc"),
                               ttheme = gridExtra::ttheme_default()) {
  data <- h_tbl_median_surv(fit_km, armval = armval)

  width <- grid::convertUnit(width, "in")
  height <- width * (nrow(data) + 1) / 12

  w <- paste(" ", c(
    rownames(data)[which.max(nchar(rownames(data)))],
    sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))])
  ))
  w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE)

  w_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strwidth(w[4], units = "in")
  })
  f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]])

  h_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strheight(grid::stringHeight("X"), units = "in")
  })
  f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))])

  if (ttheme$core$fg_params$fontsize == 12) {
    ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h)
  }

  gt <- gridExtra::tableGrob(
    d = data,
    theme = ttheme
  )
  gt$widths <- ((w_unit / sum(w_unit)) * width)
  gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt))

  vp <- grid::viewport(
    x = grid::unit(x, "npc") + grid::unit(1, "lines"),
    y = grid::unit(y, "npc") + grid::unit(1.5, "lines"),
    height = height,
    width = width,
    just = c("right", "top")
  )

  grid::gList(
    grid::gTree(
      vp = vp,
      children = grid::gList(gt)
    )
  )
}

#' Helper: Grid Object with y-axis Annotation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Build the y-axis annotation from a decomposed `ggplot`.
#'
#' @param ylab (`gtable`)\cr the y-lab as a graphical object derived from a `ggplot`.
#' @param yaxis (`gtable`)\cr the y-axis as a graphical object derived from a `ggplot`.
#'
#' @return a `gTree` object containing the y-axis annotation from a `ggplot`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "title", footnotes = "footnotes", yval = "Survival"
#' )
#'
#' g_el <- h_decompose_gg(gg)
#'
#' grid::grid.newpage()
#' pvp <- grid::plotViewport(margins = c(5, 4, 2, 20))
#' pushViewport(pvp)
#' grid::grid.draw(h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis))
#' grid.rect(gp = grid::gpar(lty = 1, col = "gray35", fill = NA))
#' }
#'
#' @export
h_grob_y_annot <- function(ylab, yaxis) {
  grid::gList(
    grid::gTree(
      vp = grid::viewport(
        width = grid::convertX(yaxis$width + ylab$width, "pt"),
        x = grid::unit(1, "npc"),
        just = "right"
      ),
      children = grid::gList(cbind(ylab, yaxis))
    )
  )
}

#' Helper Function: Pairwise `CoxPH` table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Create a `data.frame` of pairwise stratified or unstratified `CoxPH` analysis results.
#'
#' @inheritParams g_km
#'
#' @return A `data.frame` containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
#'   and `p-value (log-rank)`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#'
#' h_tbl_coxph_pairwise(
#'   df = adtte,
#'   variables = list(tte = "AVAL", is_event = "is_event", arm = "ARM"),
#'   control_coxph_pw = control_coxph(conf_level = 0.9)
#' )
#' }
#'
#' @export
h_tbl_coxph_pairwise <- function(df,
                                 variables,
                                 control_coxph_pw = control_coxph()) {
  assert_df_with_variables(df, variables)
  arm <- variables$arm
  df[[arm]] <- factor(df[[arm]])
  ref_group <- levels(df[[arm]])[1]
  comp_group <- levels(df[[arm]])[-1]
  results <- Map(function(comp) {
    res <- s_coxph_pairwise(
      df = df[df[[arm]] == comp, , drop = FALSE],
      .ref_group = df[df[[arm]] == ref_group, , drop = FALSE],
      .in_ref_col = FALSE,
      .var = variables$tte,
      is_event = variables$is_event,
      strat = variables$strat,
      control = control_coxph_pw
    )
    res_df <- data.frame(
      hr = format(round(res$hr, 2), nsmall = 2),
      hr_ci = paste0(
        "(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ",
        format(round(res$hr_ci[2], 2), nsmall = 2), ")"
      ),
      pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4),
      stringsAsFactors = FALSE
    )
    colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character"))
    row.names(res_df) <- comp
    res_df
  }, comp_group)
  do.call(rbind, results)
}

#' Helper Function: `CoxPH` Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Grob of `rtable` output from [h_tbl_coxph_pairwise()]
#'
#' @inheritParams h_grob_median_surv
#' @param ... arguments will be passed to [h_tbl_coxph_pairwise()].
#' @param x (`numeric`)\cr a value between 0 and 1 specifying x-location.
#' @param y (`numeric`)\cr a value between 0 and 1 specifying y-location.
#' @param width (`unit`)\cr width (as a unit) to use when printing the grob.
#'
#' @return A `grob` of a table containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
#'   and `p-value (log-rank)`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
#' data <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' tbl_grob <- h_grob_coxph(
#'   df = data,
#'   variables = list(tte = "AVAL", is_event = "is_event", arm = "ARMCD"),
#'   control_coxph_pw = control_coxph(conf_level = 0.9), x = 0.5, y = 0.5
#' )
#' grid::grid.draw(tbl_grob)
#' }
#'
#' @export
h_grob_coxph <- function(...,
                         x = 0,
                         y = 0,
                         width = grid::unit(0.4, "npc"),
                         ttheme = gridExtra::ttheme_default(
                           padding = grid::unit(c(1, .5), "lines"),
                           core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5))
                         )) {
  data <- h_tbl_coxph_pairwise(...)

  width <- grid::convertUnit(width, "in")
  height <- width * (nrow(data) + 1) / 12

  w <- paste("    ", c(
    rownames(data)[which.max(nchar(rownames(data)))],
    sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))])
  ))
  w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE)

  w_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strwidth(w[4], units = "in")
  })
  f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]])

  h_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strheight(grid::stringHeight("X"), units = "in")
  })
  f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))])

  if (ttheme$core$fg_params$fontsize == 12) {
    ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h)
  }

  tryCatch(
    expr = {
      gt <- gridExtra::tableGrob(
        d = data,
        theme = ttheme
      ) # ERROR 'data' must be of a vector type, was 'NULL'
      gt$widths <- ((w_unit / sum(w_unit)) * width)
      gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt))
      vp <- grid::viewport(
        x = grid::unit(x, "npc") + grid::unit(1, "lines"),
        y = grid::unit(y, "npc") + grid::unit(1.5, "lines"),
        height = height,
        width = width,
        just = c("left", "bottom")
      )
      grid::gList(
        grid::gTree(
          vp = vp,
          children = grid::gList(gt)
        )
      )
    },
    error = function(w) {
      message(paste(
        "Warning: Cox table will not be displayed as there is",
        "not any level to be compared in the arm variable."
      ))
      return(
        grid::gList(
          grid::gTree(
            vp = NULL,
            children = NULL
          )
        )
      )
    }
  )
}

#' Confidence Interval for Mean
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
#' geometric mean. It can be used as a `ggplot` helper function for plotting.
#'
#' @inheritParams argument_convention
#' @param n_min (`number`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
#' @param geom_mean (`logical`)\cr `TRUE` when the geometric mean should be calculated.
#'
#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
#'
#' @examples
#' stat_mean_ci(sample(10), gg_helper = FALSE)
#'
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
#'   ggplot2::geom_point()
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   geom = "errorbar"
#' )
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   fun.args = list(conf_level = 0.5),
#'   geom = "errorbar"
#' )
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   fun.args = list(conf_level = 0.5, geom_mean = TRUE),
#'   geom = "errorbar"
#' )
#'
#' @export
stat_mean_ci <- function(x,
                         conf_level = 0.95,
                         na.rm = TRUE, # nolint
                         n_min = 2,
                         gg_helper = TRUE,
                         geom_mean = FALSE) {
  if (na.rm) {
    x <- stats::na.omit(x)
  }
  n <- length(x)

  if (!geom_mean) {
    m <- mean(x)
  } else {
    negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0)
    if (negative_values_exist) {
      m <- NA_real_
    } else {
      x <- log(x)
      m <- mean(x)
    }
  }

  if (n < n_min || is.na(m)) {
    ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_)
  } else {
    hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n)
    ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci)
    if (geom_mean) {
      ci <- exp(ci)
    }
  }

  if (gg_helper) {
    m <- ifelse(is.na(m), NA_real_, m)
    ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]])
  }

  return(ci)
}

#' Confidence Interval for Median
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
#' function for plotting.
#'
#' @inheritParams argument_convention
#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
#'
#' @details The function was adapted from `DescTools/versions/0.99.35/source`
#'
#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
#'
#' @examples
#' stat_median_ci(sample(10), gg_helper = FALSE)
#'
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
#'   ggplot2::geom_point()
#' p + ggplot2::stat_summary(
#'   fun.data = stat_median_ci,
#'   geom = "errorbar"
#' )
#'
#' @export
stat_median_ci <- function(x,
                           conf_level = 0.95,
                           na.rm = TRUE, # nolint
                           gg_helper = TRUE) {
  x <- unname(x)
  if (na.rm) {
    x <- x[!is.na(x)]
  }
  n <- length(x)
  med <- stats::median(x)

  k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE)

  # k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
  if (k == 0 || is.na(med)) {
    ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_)
    empir_conf_level <- NA_real_
  } else {
    x_sort <- sort(x)
    ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1])
    empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5)
  }

  if (gg_helper) {
    ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]])
  }

  attr(ci, "conf_level") <- empir_conf_level

  return(ci)
}

#' p-Value of the Mean
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the two-sided p-value of the mean.
#'
#' @inheritParams argument_convention
#' @param n_min (`numeric`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
#' @param test_mean (`numeric`)\cr mean value to test under the null hypothesis.
#'
#' @return A p-value.
#'
#' @examples
#' stat_mean_pval(sample(10))
#'
#' stat_mean_pval(rnorm(10), test_mean = 0.5)
#'
#' @export
stat_mean_pval <- function(x,
                           na.rm = TRUE, # nolint
                           n_min = 2,
                           test_mean = 0) {
  if (na.rm) {
    x <- stats::na.omit(x)
  }
  n <- length(x)

  x_mean <- mean(x)
  x_sd <- stats::sd(x)

  if (n < n_min) {
    pv <- c(p_value = NA_real_)
  } else {
    x_se <- stats::sd(x) / sqrt(n)
    ttest <- (x_mean - test_mean) / x_se
    pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1))
  }

  return(pv)
}

#' Estimation of Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the proportion of responders within a studied population.
#'
#' @inheritParams argument_convention
#'
#' @seealso [h_proportions]
#'
#' @name estimate_proportions
NULL

#' @describeIn estimate_proportions Statistics function estimating a
#'   proportion along with its confidence interval.
#'
#' @inheritParams prop_strat_wilson
#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used,
#'   it indicates whether each subject is a responder or not. `TRUE` represents
#'   a successful outcome. If a `data.frame` is provided, also the `strata` variable
#'   names must be provided in `variables` as a list element with the strata strings.
#'   In the case of `data.frame`, the logical vector of responses must be indicated as a
#'   variable name in `.var`.
#' @param method (`string`)\cr the method used to construct the confidence interval
#'   for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`,
#'   `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`.
#' @param long (`flag`)\cr a long description is required.
#'
#' @return
#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a
#'   given variable.
#'
#' @examples
#' # Case with only logical vector.
#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0)
#' s_proportion(rsp_v)
#'
#' # Example for Stratified Wilson CI
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' s_proportion(
#'   df = dta,
#'   .var = "rsp",
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "strat_wilson"
#' )
#'
#' @export
s_proportion <- function(df,
                         .var,
                         conf_level = 0.95,
                         method = c(
                           "waldcc", "wald", "clopper-pearson",
                           "wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
                           "agresti-coull", "jeffreys"
                         ),
                         weights = NULL,
                         max_iterations = 50,
                         variables = list(strata = NULL),
                         long = FALSE) {
  method <- match.arg(method)
  checkmate::assert_flag(long)
  assert_proportion_value(conf_level)

  if (!is.null(variables$strata)) {
    # Checks for strata
    if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.")
    strata_colnames <- variables$strata
    checkmate::assert_character(strata_colnames, null.ok = FALSE)
    strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
    assert_df_with_variables(df, strata_vars)

    strata <- interaction(df[strata_colnames])
    strata <- as.factor(strata)

    # Pushing down checks to prop_strat_wilson
  } else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) {
    stop("To use stratified methods you need to specify the strata variables.")
  }
  if (checkmate::test_atomic_vector(df)) {
    rsp <- as.logical(df)
  } else {
    rsp <- as.logical(df[[.var]])
  }
  n <- sum(rsp)
  p_hat <- mean(rsp)

  prop_ci <- switch(method,
    "clopper-pearson" = prop_clopper_pearson(rsp, conf_level),
    "wilson" = prop_wilson(rsp, conf_level),
    "wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE),
    "strat_wilson" = prop_strat_wilson(rsp,
      strata,
      weights,
      conf_level,
      max_iterations,
      correct = FALSE
    )$conf_int,
    "strat_wilsonc" = prop_strat_wilson(rsp,
      strata,
      weights,
      conf_level,
      max_iterations,
      correct = TRUE
    )$conf_int,
    "wald" = prop_wald(rsp, conf_level),
    "waldcc" = prop_wald(rsp, conf_level, correct = TRUE),
    "agresti-coull" = prop_agresti_coull(rsp, conf_level),
    "jeffreys" = prop_jeffreys(rsp, conf_level)
  )

  list(
    "n_prop" = formatters::with_label(c(n, p_hat), "Responders"),
    "prop_ci" = formatters::with_label(
      x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)
    )
  )
}

#' @describeIn estimate_proportions Formatted analysis function which is used as `afun`
#'   in `estimate_proportion()`.
#'
#' @return
#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_proportion <- make_afun(
  s_proportion,
  .formats = c(n_prop = "xx (xx.x%)", prop_ci = "(xx.x, xx.x)")
)

#' @describeIn estimate_proportions Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... other arguments are ultimately conveyed to [s_proportion()].
#'
#' @return
#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_proportion()` to the table layout.
#'
#' @examples
#' dta_test <- data.frame(
#'   USUBJID = paste0("S", 1:12),
#'   ARM     = rep(LETTERS[1:3], each = 4),
#'   AVAL    = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   estimate_proportion(vars = "AVAL") %>%
#'   build_table(df = dta_test)
#'
#' @export
estimate_proportion <- function(lyt,
                                vars,
                                ...,
                                show_labels = "hidden",
                                table_names = vars,
                                .stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  afun <- make_afun(
    a_proportion,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions for Calculating Proportion Confidence Intervals
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()].
#'
#' @inheritParams argument_convention
#' @inheritParams estimate_proportions
#'
#' @return Confidence interval of a proportion.
#'
#' @seealso [estimate_proportions], descriptive function [d_proportion()],
#'  and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()].
#'
#' @name h_proportions
NULL

#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()].
#'  Also referred to as Wilson score interval.
#'
#' @examples
#' rsp <- c(
#'   TRUE, TRUE, TRUE, TRUE, TRUE,
#'   FALSE, FALSE, FALSE, FALSE, FALSE
#' )
#' prop_wilson(rsp, conf_level = 0.9)
#'
#' @export
prop_wilson <- function(rsp, conf_level, correct = FALSE) {
  y <- stats::prop.test(
    sum(rsp),
    length(rsp),
    correct = correct,
    conf.level = conf_level
  )

  as.numeric(y$conf.int)
}

#' @describeIn h_proportions Calculates the stratified Wilson confidence
#'   interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern}
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are
#'   estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that
#'   minimizes the weighted squared length of the confidence interval.
#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used
#'   to find estimates of optimal weights.
#' @param correct (`flag`)\cr include the continuity correction. For further information, see for example
#'   [stats::prop.test()].
#'
#' @references
#' \insertRef{Yan2010-jt}{tern}
#'
#' @examples
#' # Stratified Wilson confidence interval with unequal probabilities
#'
#' set.seed(1)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#' strata <- interaction(strata_data)
#' n_strata <- ncol(table(rsp, strata)) # Number of strata
#'
#' prop_strat_wilson(
#'   rsp = rsp, strata = strata,
#'   conf_level = 0.90
#' )
#'
#' # Not automatic setting of weights
#' prop_strat_wilson(
#'   rsp = rsp, strata = strata,
#'   weights = rep(1 / n_strata, n_strata),
#'   conf_level = 0.90
#' )
#'
#' @export
prop_strat_wilson <- function(rsp,
                              strata,
                              weights = NULL,
                              conf_level = 0.95,
                              max_iterations = NULL,
                              correct = FALSE) {
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(strata, len = length(rsp))
  assert_proportion_value(conf_level)

  tbl <- table(rsp, strata)
  n_strata <- length(unique(strata))

  # Checking the weights and maximum number of iterations.
  do_iter <- FALSE
  if (is.null(weights)) {
    weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
    do_iter <- TRUE

    # Iteration parameters
    if (is.null(max_iterations)) max_iterations <- 10
    checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1)
  }
  checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = n_strata)
  sum_weights <- checkmate::assert_int(sum(weights))
  if (as.integer(sum_weights + 0.5) != 1L) stop("Sum of weights must be 1L.")


  xs <- tbl["TRUE", ]
  ns <- colSums(tbl)
  use_stratum <- (ns > 0)
  ns <- ns[use_stratum]
  xs <- xs[use_stratum]
  ests <- xs / ns
  vars <- ests * (1 - ests) / ns

  strata_qnorm <- strata_normal_quantile(vars, weights, conf_level)

  # Iterative setting of weights if they were not set externally
  weights_new <- if (do_iter) {
    update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights
  } else {
    weights
  }

  strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1

  ci_by_strata <- Map(
    function(x, n) {
      # Classic Wilson's confidence interval
      suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int)
    },
    x = xs,
    n = ns
  )
  lower_by_strata <- sapply(ci_by_strata, "[", 1L)
  upper_by_strata <- sapply(ci_by_strata, "[", 2L)

  lower <- sum(weights_new * lower_by_strata)
  upper <- sum(weights_new * upper_by_strata)

  # Return values
  if (do_iter) {
    list(
      conf_int = c(
        lower = lower,
        upper = upper
      ),
      weights = weights_new
    )
  } else {
    list(
      conf_int = c(
        lower = lower,
        upper = upper
      )
    )
  }
}

#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
#'   Also referred to as the `exact` method.
#'
#' @examples
#' prop_clopper_pearson(rsp, conf_level = .95)
#'
#' @export
prop_clopper_pearson <- function(rsp,
                                 conf_level) {
  y <- stats::binom.test(
    x = sum(rsp),
    n = length(rsp),
    conf.level = conf_level
  )
  as.numeric(y$conf.int)
}

#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition
#'   for a single proportion confidence interval using the normal approximation.
#'
#' @param correct (`flag`)\cr apply continuity correction.
#'
#' @examples
#' prop_wald(rsp, conf_level = 0.95)
#' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
#'
#' @export
prop_wald <- function(rsp, conf_level, correct = FALSE) {
  n <- length(rsp)
  p_hat <- mean(rsp)
  z <- stats::qnorm((1 + conf_level) / 2)
  q_hat <- 1 - p_hat
  correct <- if (correct) 1 / (2 * n) else 0

  err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct
  l_ci <- max(0, p_hat - err)
  u_ci <- min(1, p_hat + err)

  c(l_ci, u_ci)
}

#' @describeIn h_proportions Calculates the `Agresti-Coull` interval (created by `Alan Agresti` and `Brent Coull`) by
#'   (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI.
#'
#' @examples
#' prop_agresti_coull(rsp, conf_level = 0.95)
#'
#' @export
prop_agresti_coull <- function(rsp, conf_level) {
  n <- length(rsp)
  x_sum <- sum(rsp)
  z <- stats::qnorm((1 + conf_level) / 2)

  # Add here both z^2 / 2 successes and failures.
  x_sum_tilde <- x_sum + z^2 / 2
  n_tilde <- n + z^2

  # Then proceed as with the Wald interval.
  p_tilde <- x_sum_tilde / n_tilde
  q_tilde <- 1 - p_tilde
  err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
  l_ci <- max(0, p_tilde - err)
  u_ci <- min(1, p_tilde + err)

  c(l_ci, u_ci)
}

#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the
#'   non-informative Jeffreys prior for a binomial proportion.
#'
#' @examples
#' prop_jeffreys(rsp, conf_level = 0.95)
#'
#' @export
prop_jeffreys <- function(rsp,
                          conf_level) {
  n <- length(rsp)
  x_sum <- sum(rsp)

  alpha <- 1 - conf_level
  l_ci <- ifelse(
    x_sum == 0,
    0,
    stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  u_ci <- ifelse(
    x_sum == n,
    1,
    stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  c(l_ci, u_ci)
}

#' Description of the Proportion Summary
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function that describes the analysis in [s_proportion()].
#'
#' @inheritParams s_proportion
#' @param long (`flag`)\cr whether a long or a short (default) description is required.
#'
#' @return String describing the analysis.
#'
#' @export
d_proportion <- function(conf_level,
                         method,
                         long = FALSE) {
  label <- paste0(conf_level * 100, "% CI")

  if (long) label <- paste(label, "for Response Rates")

  method_part <- switch(method,
    "clopper-pearson" = "Clopper-Pearson",
    "waldcc" = "Wald, with correction",
    "wald" = "Wald, without correction",
    "wilson" = "Wilson, without correction",
    "strat_wilson" = "Stratified Wilson, without correction",
    "wilsonc" = "Wilson, with correction",
    "strat_wilsonc" = "Stratified Wilson, with correction",
    "agresti-coull" = "Agresti-Coull",
    "jeffreys" = "Jeffreys",
    stop(paste(method, "does not have a description"))
  )

  paste0(label, " (", method_part, ")")
}

#' Helper Function for the Estimation of Stratified Quantiles
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function wraps the estimation of stratified percentiles when we assume
#' the approximation for large numbers. This is necessary only in the case
#' proportions for each strata are unequal.
#'
#' @inheritParams argument_convention
#' @inheritParams prop_strat_wilson
#'
#' @return Stratified quantile.
#'
#' @seealso [prop_strat_wilson()]
#'
#' @examples
#' strata_data <- table(data.frame(
#'   "f1" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' ))
#' ns <- colSums(strata_data)
#' ests <- strata_data["TRUE", ] / ns
#' vars <- ests * (1 - ests) / ns
#' weights <- rep(1 / length(ns), length(ns))
#' strata_normal_quantile(vars, weights, 0.95)
#'
#' @export
strata_normal_quantile <- function(vars, weights, conf_level) {
  summands <- weights^2 * vars
  # Stratified quantile
  sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2)
}

#' Helper Function for the Estimation of Weights for `prop_strat_wilson`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function wraps the iteration procedure that allows you to estimate
#' the weights for each proportional strata. This assumes to minimize the
#' weighted squared length of the confidence interval.
#'
#' @inheritParams prop_strat_wilson
#' @param vars (`numeric`)\cr normalized proportions for each strata.
#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles.
#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
#'   be optimized in the future if we need to estimate better initial weights.
#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
#' @param max_iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked.
#' @param tol (`number`)\cr tolerance threshold for convergence.
#'
#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
#'
#' @seealso For references and details see [prop_strat_wilson()].
#'
#' @examples
#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
#' sq <- 0.674
#' ws <- rep(1 / length(vs), length(vs))
#' ns <- c(22, 18, 17, 17, 14, 12)
#'
#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
#'
#' @export
update_weights_strat_wilson <- function(vars,
                                        strata_qnorm,
                                        initial_weights,
                                        n_per_strata,
                                        max_iterations = 50,
                                        conf_level = 0.95,
                                        tol = 0.001) {
  it <- 0
  diff_v <- NULL

  while (it < max_iterations) {
    it <- it + 1
    weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
    weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
    weights_new <- weights_new_t / weights_new_b
    weights_new <- weights_new / sum(weights_new)
    strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level)
    diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
    if (diff_v[length(diff_v)] < tol) break
    initial_weights <- weights_new
  }

  if (it == max_iterations) {
    warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations)
  }

  list(
    "n_it" = it,
    "weights" = weights_new,
    "diff_v" = diff_v
  )
}

#' Add Titles, Footnotes, Page Number, and a Bounding Box to a Grid Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function is useful to label grid grobs (also `ggplot2`, and `lattice` plots)
#' with title, footnote, and page numbers.
#'
#' @inheritParams grid::grob
#' @param grob a grid grob object, optionally `NULL` if only a `grob` with the decoration should be shown.
#' @param titles vector of character strings. Vector elements are separated by a newline and strings are wrapped
#'   according to the page width.
#' @param footnotes vector of character string. Same rules as for `titles`.
#' @param page string with page numeration, if `NULL` then no page number is displayed.
#' @param width_titles unit object
#' @param width_footnotes unit object
#' @param border boolean, whether a a border should be drawn around the plot or not.
#' @param margins unit object of length 4
#' @param padding  unit object of length 4
#' @param outer_margins  unit object of length 4
#' @param gp_titles a `gpar` object
#' @param gp_footnotes a `gpar` object
#'
#' @return A grid grob (`gTree`).
#'
#' @details The titles and footnotes will be ragged, i.e. each title will be wrapped individually.
#'
#' @examples
#' library(grid)
#'
#' titles <- c(
#'   "Edgar Anderson's Iris Data",
#'   paste(
#'     "This famous (Fisher's or Anderson's) iris data set gives the measurements",
#'     "in centimeters of the variables sepal length and width and petal length",
#'     "and width, respectively, for 50 flowers from each of 3 species of iris."
#'   )
#' )
#'
#' footnotes <- c(
#'   "The species are Iris setosa, versicolor, and virginica.",
#'   paste(
#'     "iris is a data frame with 150 cases (rows) and 5 variables (columns) named",
#'     "Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, and Species."
#'   )
#' )
#'
#' ## empty plot
#' grid.newpage()
#'
#' grid.draw(
#'   decorate_grob(
#'     NULL,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 4 of 10"
#'   )
#' )
#'
#' # grid
#' p <- gTree(
#'   children = gList(
#'     rectGrob(),
#'     xaxisGrob(),
#'     yaxisGrob(),
#'     textGrob("Sepal.Length", y = unit(-4, "lines")),
#'     textGrob("Petal.Length", x = unit(-3.5, "lines"), rot = 90),
#'     pointsGrob(iris$Sepal.Length, iris$Petal.Length, gp = gpar(col = iris$Species), pch = 16)
#'   ),
#'   vp = vpStack(plotViewport(), dataViewport(xData = iris$Sepal.Length, yData = iris$Petal.Length))
#' )
#' grid.newpage()
#' grid.draw(p)
#'
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' ## with ggplot2
#' library(ggplot2)
#'
#' p_gg <- ggplot2::ggplot(iris, aes(Sepal.Length, Sepal.Width, col = Species)) +
#'   ggplot2::geom_point()
#' p_gg
#' p <- ggplotGrob(p_gg)
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' ## with lattice
#' library(lattice)
#'
#' xyplot(Sepal.Length ~ Petal.Length, data = iris, col = iris$Species)
#' p <- grid.grab()
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' # with gridExtra - no borders
#' library(gridExtra)
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     tableGrob(
#'       head(mtcars)
#'     ),
#'     titles = "title",
#'     footnotes = "footnote",
#'     border = FALSE
#'   )
#' )
#'
#' @export
decorate_grob <- function(grob,
                          titles,
                          footnotes,
                          page = "",
                          width_titles = grid::unit(1, "npc") - grid::stringWidth(page),
                          width_footnotes = grid::unit(1, "npc") - grid::stringWidth(page),
                          border = TRUE,
                          margins = grid::unit(c(1, 0, 1, 0), "lines"),
                          padding = grid::unit(rep(1, 4), "lines"),
                          outer_margins = grid::unit(c(2, 1.5, 3, 1.5), "cm"),
                          gp_titles = grid::gpar(),
                          gp_footnotes = grid::gpar(fontsize = 8),
                          name = NULL,
                          gp = grid::gpar(),
                          vp = NULL) {
  st_titles <- split_text_grob(
    titles,
    x = 0, y = 1,
    just = c("left", "top"),
    width = width_titles,
    vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 1),
    gp = gp_titles
  )

  st_footnotes <- split_text_grob(
    footnotes,
    x = 0, y = 1,
    just = c("left", "top"),
    width = width_footnotes,
    vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
    gp = gp_footnotes
  )

  grid::gTree(
    grob = grob,
    titles = titles,
    footnotes = footnotes,
    page = page,
    width_titles = width_titles,
    width_footnotes = width_footnotes,
    border = border,
    margins = margins,
    padding = padding,
    outer_margins = outer_margins,
    gp_titles = gp_titles,
    gp_footnotes = gp_footnotes,
    children = grid::gList(
      grid::gTree(
        children = grid::gList(
          st_titles,
          grid::gTree(
            children = grid::gList(
              if (border) grid::rectGrob(),
              grid::gTree(
                children = grid::gList(
                  grob
                ),
                vp = grid::plotViewport(margins = padding)
              )
            ),
            vp = grid::vpStack(
              grid::viewport(layout.pos.row = 2, layout.pos.col = 1),
              grid::plotViewport(margins = margins)
            )
          ),
          st_footnotes,
          grid::textGrob(
            page,
            x = 1, y = 0,
            just = c("right", "bottom"),
            vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
            gp = gp_footnotes
          )
        ),
        childrenvp = NULL,
        name = "titles_grob_footnotes",
        vp = grid::vpStack(
          grid::plotViewport(margins = outer_margins),
          grid::viewport(
            layout = grid::grid.layout(
              nrow = 3, ncol = 1,
              heights = grid::unit.c(
                grid::grobHeight(st_titles),
                grid::unit(1, "null"),
                grid::grobHeight(st_footnotes)
              )
            )
          )
        )
      )
    ),
    name = name,
    gp = gp,
    vp = vp,
    cl = "decoratedGrob"
  )
}

#' @importFrom grid validDetails
#' @noRd
validDetails.decoratedGrob <- function(x) {
  checkmate::assert_character(x$titles)
  checkmate::assert_character(x$footnotes)

  if (!is.null(x$grob)) {
    checkmate::assert_true(grid::is.grob(x$grob))
  }
  if (length(x$page) == 1) {
    checkmate::assert_character(x$page)
  }
  if (!grid::is.unit(x$outer_margins)) {
    checkmate::assert_vector(x$outer_margins, len = 4)
  }
  if (!grid::is.unit(x$margins)) {
    checkmate::assert_vector(x$margins, len = 4)
  }
  if (!grid::is.unit(x$padding)) {
    checkmate::assert_vector(x$padding, len = 4)
  }

  x
}

#' @importFrom grid widthDetails
#' @noRd
widthDetails.decoratedGrob <- function(x) {
  grid::unit(1, "null")
}

#' @importFrom grid heightDetails
#' @noRd
heightDetails.decoratedGrob <- function(x) {
  grid::unit(1, "null")
}

# Adapted from Paul Murell R Graphics 2nd Edition
# https://www.stat.auckland.ac.nz/~paul/RG2e/interactgrid-splittext.R
split_string <- function(text, width) {
  strings <- strsplit(text, " ")
  out_string <- NA
  for (string_i in seq_along(strings)) {
    newline_str <- strings[[string_i]]
    if (length(newline_str) == 0) newline_str <- ""
    if (is.na(out_string[string_i])) {
      out_string[string_i] <- newline_str[[1]][[1]]
      linewidth <- grid::stringWidth(out_string[string_i])
    }
    gapwidth <- grid::stringWidth(" ")
    availwidth <- as.numeric(width)
    if (length(newline_str) > 1) {
      for (i in seq(2, length(newline_str))) {
        width_i <- grid::stringWidth(newline_str[i])
        if (grid::convertWidth(linewidth + gapwidth + width_i, grid::unitType(width), valueOnly = TRUE) < availwidth) {
          sep <- " "
          linewidth <- linewidth + gapwidth + width_i
        } else {
          sep <- "\n"
          linewidth <- width_i
        }
        out_string[string_i] <- paste(out_string[string_i], newline_str[i], sep = sep)
      }
    }
  }
  paste(out_string, collapse = "\n")
}

#' Split Text According To Available Text Width
#'
#' Dynamically wrap text.
#'
#' @inheritParams grid::grid.text
#' @param text character string
#' @param width a unit object specifying max width of text
#'
#' @return A text grob.
#'
#' @details This code is taken from `R Graphics by Paul Murell, 2nd edition`
#'
#' @keywords internal
split_text_grob <- function(text,
                            x = grid::unit(0.5, "npc"),
                            y = grid::unit(0.5, "npc"),
                            width = grid::unit(1, "npc"),
                            just = "centre",
                            hjust = NULL,
                            vjust = NULL,
                            default.units = "npc", # nolint
                            name = NULL,
                            gp = grid::gpar(),
                            vp = NULL) {
  if (!grid::is.unit(x)) x <- grid::unit(x, default.units)
  if (!grid::is.unit(y)) y <- grid::unit(y, default.units)
  if (!grid::is.unit(width)) width <- grid::unit(width, default.units)
  if (grid::unitType(x) %in% c("sum", "min", "max")) x <- grid::convertUnit(x, default.units)
  if (grid::unitType(y) %in% c("sum", "min", "max")) y <- grid::convertUnit(y, default.units)
  if (grid::unitType(width) %in% c("sum", "min", "max")) width <- grid::convertUnit(width, default.units)

  ## if it is a fixed unit then we do not need to recalculate when viewport resized
  if (!inherits(width, "unit.arithmetic") &&
    !is.null(attr(width, "unit")) &&
    attr(width, "unit") %in% c("cm", "inches", "mm", "points", "picas", "bigpts", "dida", "cicero", "scaledpts")) {
    attr(text, "fixed_text") <- paste(vapply(text, split_string, character(1), width = width), collapse = "\n")
  }

  grid::grid.text(
    label = split_string(text, width),
    x = x, y = y,
    just = just,
    hjust = hjust,
    vjust = vjust,
    rot = 0,
    check.overlap = FALSE,
    name = name,
    gp = gp,
    vp = vp,
    draw = FALSE
  )
}

#' @importFrom grid validDetails
#' @noRd
validDetails.dynamicSplitText <- function(x) {
  checkmate::assert_character(x$text)
  checkmate::assert_true(grid::is.unit(x$width))
  checkmate::assert_vector(x$width, len = 1)
  x
}

#' @importFrom grid heightDetails
#' @noRd
heightDetails.dynamicSplitText <- function(x) {
  txt <- if (!is.null(attr(x$text, "fixed_text"))) {
    attr(x$text, "fixed_text")
  } else {
    paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
  }
  grid::stringHeight(txt)
}

#' @importFrom grid widthDetails
#' @noRd
widthDetails.dynamicSplitText <- function(x) {
  x$width
}

#' @importFrom grid drawDetails
#' @noRd
drawDetails.dynamicSplitText <- function(x, recording) {
  txt <- if (!is.null(attr(x$text, "fixed_text"))) {
    attr(x$text, "fixed_text")
  } else {
    paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
  }

  x$width <- NULL
  x$label <- txt
  x$text <- NULL
  class(x) <- c("text", class(x)[-1])

  grid::grid.draw(x)
}

#' Update Page Number
#'
#' Automatically updates page number.
#'
#' @param npages number of pages in total
#' @param ... passed on to [decorate_grob()]
#'
#' @return Closure that increments the page number.
#'
#' @keywords internal
decorate_grob_factory <- function(npages, ...) {
  current_page <- 0
  function(grob) {
    current_page <<- current_page + 1
    if (current_page > npages) {
      stop(paste("current page is", current_page, "but max.", npages, "specified."))
    }
    decorate_grob(grob = grob, page = paste("Page", current_page, "of", npages), ...)
  }
}

#' Decorate Set of `grobs` and Add Page Numbering
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Note that this uses the [decorate_grob_factory()] function.
#'
#' @param grobs a list of grid grobs
#' @param ... arguments passed on to [decorate_grob()].
#'
#' @return A decorated grob.
#'
#' @examples
#' library(ggplot2)
#' library(grid)
#' g <- with(data = iris, {
#'   list(
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Sepal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Length, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Length, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Petal.Length, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     )
#'   )
#' })
#' lg <- decorate_grob_set(grobs = g, titles = "Hello\nOne\nTwo\nThree", footnotes = "")
#'
#' draw_grob(lg[[1]])
#' draw_grob(lg[[2]])
#' draw_grob(lg[[6]])
#'
#' @export
decorate_grob_set <- function(grobs, ...) {
  n <- length(grobs)
  lgf <- decorate_grob_factory(npages = n, ...)
  lapply(grobs, lgf)
}

#' Helper Functions for Multivariate Logistic Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions used in calculations for logistic regression.
#'
#' @inheritParams argument_convention
#' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family.
#'   Limited functionality is also available for conditional logistic regression models fitted by
#'   [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()].
#' @param x (`string` or `character`)\cr a variable or interaction term in `fit_glm` (depending on the
#'   helper function).
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' @name h_logistic_regression
NULL

#' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted
#'   model assuming only one interaction term.
#'
#' @return Vector of names of interaction variables.
#'
#' @export
h_get_interaction_vars <- function(fit_glm) {
  checkmate::assert_class(fit_glm, "glm")
  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  terms_order <- attr(stats::terms(fit_glm), "order")
  interaction_term <- terms_name[terms_order == 2]
  checkmate::assert_string(interaction_term)
  strsplit(interaction_term, split = ":")[[1]]
}

#' @describeIn h_logistic_regression Helper function to get the right coefficient name from the
#'   interaction variable names and the given levels. The main value here is that the order
#'   of first and second variable is checked in the `interaction_vars` input.
#'
#' @param interaction_vars (`character` of length 2)\cr interaction variable names.
#' @param first_var_with_level (`character` of length 2)\cr the first variable name with
#'   the interaction level.
#' @param second_var_with_level (`character` of length 2)\cr the second variable name with
#'   the interaction level.
#'
#' @return Name of coefficient.
#'
#' @export
h_interaction_coef_name <- function(interaction_vars,
                                    first_var_with_level,
                                    second_var_with_level) {
  checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE)
  checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE)
  checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE)
  checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars)

  first_name <- paste(first_var_with_level, collapse = "")
  second_name <- paste(second_var_with_level, collapse = "")
  if (first_var_with_level[1] == interaction_vars[1]) {
    paste(first_name, second_name, sep = ":")
  } else if (second_var_with_level[1] == interaction_vars[1]) {
    paste(second_name, first_name, sep = ":")
  }
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   for the case when both the odds ratio and the interaction variable are categorical.
#'
#' @param odds_ratio_var (`string`)\cr the odds ratio variable.
#' @param interaction_var (`string`)\cr the interaction variable.
#'
#' @return Odds ratio.
#'
#' @export
h_or_cat_interaction <- function(odds_ratio_var,
                                 interaction_var,
                                 fit_glm,
                                 conf_level = 0.95) {
  interaction_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_string(odds_ratio_var)
  checkmate::assert_string(interaction_var)
  checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars)
  checkmate::assert_vector(interaction_vars, len = 2)

  xs_level <- fit_glm$xlevels
  xs_coef <- stats::coef(fit_glm)
  xs_vcov <- stats::vcov(fit_glm)
  y <- list()
  for (var_level in xs_level[[odds_ratio_var]][-1]) {
    x <- list()
    for (ref_level in xs_level[[interaction_var]]) {
      coef_names <- paste0(odds_ratio_var, var_level)
      if (ref_level != xs_level[[interaction_var]][1]) {
        interaction_coef_name <- h_interaction_coef_name(
          interaction_vars,
          c(odds_ratio_var, var_level),
          c(interaction_var, ref_level)
        )
        coef_names <- c(
          coef_names,
          interaction_coef_name
        )
      }
      if (length(coef_names) > 1) {
        ones <- t(c(1, 1))
        est <- as.numeric(ones %*% xs_coef[coef_names])
        se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones)))
      } else {
        est <- xs_coef[coef_names]
        se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
      }
      or <- exp(est)
      ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
      x[[ref_level]] <- list(or = or, ci = ci)
    }
    y[[var_level]] <- x
  }
  y
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   for the case when either the odds ratio or the interaction variable is continuous.
#'
#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise
#'   the median is used.
#'
#' @return Odds ratio.
#'
#' @note We don't provide a function for the case when both variables are continuous because
#'   this does not arise in this table, as the treatment arm variable will always be involved
#'   and categorical.
#'
#' @export
h_or_cont_interaction <- function(odds_ratio_var,
                                  interaction_var,
                                  fit_glm,
                                  at = NULL,
                                  conf_level = 0.95) {
  interaction_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_string(odds_ratio_var)
  checkmate::assert_string(interaction_var)
  checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars)
  checkmate::assert_vector(interaction_vars, len = 2)
  checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE)
  xs_level <- fit_glm$xlevels
  xs_coef <- stats::coef(fit_glm)
  xs_vcov <- stats::vcov(fit_glm)
  xs_class <- attr(fit_glm$terms, "dataClasses")
  model_data <- fit_glm$model
  if (!is.null(at)) {
    checkmate::assert_set_equal(xs_class[interaction_var], "numeric")
  }
  y <- list()
  if (xs_class[interaction_var] == "numeric") {
    if (is.null(at)) {
      at <- ceiling(stats::median(model_data[[interaction_var]]))
    }

    for (var_level in xs_level[[odds_ratio_var]][-1]) {
      x <- list()
      for (increment in at) {
        coef_names <- paste0(odds_ratio_var, var_level)
        if (increment != 0) {
          interaction_coef_name <- h_interaction_coef_name(
            interaction_vars,
            c(odds_ratio_var, var_level),
            c(interaction_var, "")
          )
          coef_names <- c(
            coef_names,
            interaction_coef_name
          )
        }
        if (length(coef_names) > 1) {
          xvec <- t(c(1, increment))
          est <- as.numeric(xvec %*% xs_coef[coef_names])
          se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec)))
        } else {
          est <- xs_coef[coef_names]
          se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
        }
        or <- exp(est)
        ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
        x[[as.character(increment)]] <- list(or = or, ci = ci)
      }
      y[[var_level]] <- x
    }
  } else {
    checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric")
    checkmate::assert_set_equal(xs_class[interaction_var], "factor")
    for (var_level in xs_level[[interaction_var]]) {
      coef_names <- odds_ratio_var
      if (var_level != xs_level[[interaction_var]][1]) {
        interaction_coef_name <- h_interaction_coef_name(
          interaction_vars,
          c(odds_ratio_var, ""),
          c(interaction_var, var_level)
        )
        coef_names <- c(
          coef_names,
          interaction_coef_name
        )
      }
      if (length(coef_names) > 1) {
        xvec <- t(c(1, 1))
        est <- as.numeric(xvec %*% xs_coef[coef_names])
        se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec)))
      } else {
        est <- xs_coef[coef_names]
        se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
      }
      or <- exp(est)
      ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
      y[[var_level]] <- list(or = or, ci = ci)
    }
  }
  y
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and
#'   [h_or_cat_interaction()].
#'
#' @return Odds ratio.
#'
#' @export
h_or_interaction <- function(odds_ratio_var,
                             interaction_var,
                             fit_glm,
                             at = NULL,
                             conf_level = 0.95) {
  xs_class <- attr(fit_glm$terms, "dataClasses")
  if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) {
    h_or_cont_interaction(
      odds_ratio_var,
      interaction_var,
      fit_glm,
      at = at,
      conf_level = conf_level
    )
  } else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) {
    h_or_cat_interaction(
      odds_ratio_var,
      interaction_var,
      fit_glm,
      conf_level = conf_level
    )
  } else {
    stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor")
  }
}

#' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table
#'   of numbers of patients.
#'
#' @param terms (`character`)\cr simple terms.
#' @param table (`table`)\cr table containing numbers for terms.
#'
#' @return Term labels containing numbers of patients.
#'
#' @export
h_simple_term_labels <- function(terms,
                                 table) {
  checkmate::assert_true(is.table(table))
  checkmate::assert_multi_class(terms, classes = c("factor", "character"))
  terms <- as.character(terms)
  term_n <- table[terms]
  paste0(terms, ", n = ", term_n)
}

#' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table
#'   of numbers of patients.
#'
#' @param terms1 (`character`)\cr terms for first dimension (rows).
#' @param terms2 (`character`)\cr terms for second dimension (rows).
#' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the
#'   number of patients. In that case they can only be scalar (strings).
#'
#' @return Term labels containing numbers of patients.
#'
#' @export
h_interaction_term_labels <- function(terms1,
                                      terms2,
                                      table,
                                      any = FALSE) {
  checkmate::assert_true(is.table(table))
  checkmate::assert_flag(any)
  checkmate::assert_multi_class(terms1, classes = c("factor", "character"))
  checkmate::assert_multi_class(terms2, classes = c("factor", "character"))
  terms1 <- as.character(terms1)
  terms2 <- as.character(terms2)
  if (any) {
    checkmate::assert_scalar(terms1)
    checkmate::assert_scalar(terms2)
    paste0(
      terms1, " or ", terms2, ", n = ",
      # Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract.
      sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2]
    )
  } else {
    term_n <- table[cbind(terms1, terms2)]
    paste0(terms1, " * ", terms2, ", n = ", term_n)
  }
}

#' @describeIn h_logistic_regression Helper function to tabulate the main effect
#'   results of a (conditional) logistic regression model.
#'
#' @return Tabulated main effect results from a logistic regression model.
#'
#' @examples
#' h_glm_simple_term_extract("AGE", mod1)
#' h_glm_simple_term_extract("ARMCD", mod1)
#'
#' @export
h_glm_simple_term_extract <- function(x, fit_glm) {
  checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
  checkmate::assert_string(x)

  xs_class <- attr(fit_glm$terms, "dataClasses")
  xs_level <- fit_glm$xlevels
  xs_coef <- summary(fit_glm)$coefficients
  stats <- if (inherits(fit_glm, "glm")) {
    c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
  } else {
    c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)")
  }
  # Make sure x is not an interaction term.
  checkmate::assert_subset(x, names(xs_class))
  x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1])
  x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE)
  colnames(x_stats) <- names(stats)
  x_stats$estimate <- as.list(x_stats$estimate)
  x_stats$std_error <- as.list(x_stats$std_error)
  x_stats$pvalue <- as.list(x_stats$pvalue)
  x_stats$df <- as.list(1)
  if (xs_class[x] == "numeric") {
    x_stats$term <- x
    x_stats$term_label <- if (inherits(fit_glm, "glm")) {
      formatters::var_labels(fit_glm$data[x], fill = TRUE)
    } else {
      # We just fill in here with the `term` itself as we don't have the data available.
      x
    }
    x_stats$is_variable_summary <- FALSE
    x_stats$is_term_summary <- TRUE
  } else {
    checkmate::assert_class(fit_glm, "glm")
    # The reason is that we don't have the original data set in the `clogit` object
    # and therefore cannot determine the `x_numbers` here.
    x_numbers <- table(fit_glm$data[[x]])
    x_stats$term <- xs_level[[x]][-1]
    x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers)
    x_stats$is_variable_summary <- FALSE
    x_stats$is_term_summary <- TRUE
    main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald")
    x_main <- data.frame(
      pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE],
      term = xs_level[[x]][1],
      term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)),
      df = main_effects[x, "Df", drop = TRUE],
      stringsAsFactors = FALSE
    )
    x_main$pvalue <- as.list(x_main$pvalue)
    x_main$df <- as.list(x_main$df)
    x_main$estimate <- list(numeric(0))
    x_main$std_error <- list(numeric(0))
    if (length(xs_level[[x]][-1]) == 1) {
      x_main$pvalue <- list(numeric(0))
      x_main$df <- list(numeric(0))
    }
    x_main$is_variable_summary <- TRUE
    x_main$is_term_summary <- FALSE
    x_stats <- rbind(x_main, x_stats)
  }
  x_stats$variable <- x
  x_stats$variable_label <- if (inherits(fit_glm, "glm")) {
    formatters::var_labels(fit_glm$data[x], fill = TRUE)
  } else {
    x
  }
  x_stats$interaction <- ""
  x_stats$interaction_label <- ""
  x_stats$reference <- ""
  x_stats$reference_label <- ""
  rownames(x_stats) <- NULL
  x_stats[c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "is_variable_summary",
    "is_term_summary"
  )]
}

#' @describeIn h_logistic_regression Helper function to tabulate the interaction term
#'   results of a logistic regression model.
#'
#' @return Tabulated interaction term results from a logistic regression model.
#'
#' @examples
#' h_glm_interaction_extract("ARMCD:AGE", mod2)
#'
#' @export
h_glm_interaction_extract <- function(x, fit_glm) {
  vars <- h_get_interaction_vars(fit_glm)
  xs_class <- attr(fit_glm$terms, "dataClasses")

  checkmate::assert_string(x)

  # Only take two-way interaction
  checkmate::assert_vector(vars, len = 2)

  # Only consider simple case: first variable in interaction is arm, a categorical variable
  checkmate::assert_disjunct(xs_class[vars[1]], "numeric")

  xs_level <- fit_glm$xlevels
  xs_coef <- summary(fit_glm)$coefficients
  main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald")
  stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
  v1_comp <- xs_level[[vars[1]]][-1]
  if (xs_class[vars[2]] == "numeric") {
    x_stats <- as.data.frame(
      xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE],
      stringsAsFactors = FALSE
    )
    colnames(x_stats) <- names(stats)
    x_stats$term <- v1_comp
    x_numbers <- table(fit_glm$data[[vars[1]]])
    x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers)
    v1_ref <- xs_level[[vars[1]]][1]
    term_main <- v1_ref
    ref_label <- h_simple_term_labels(v1_ref, x_numbers)
  } else if (xs_class[vars[2]] != "numeric") {
    v2_comp <- xs_level[[vars[2]]][-1]
    v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp)
    x_sel <- paste(
      paste0(vars[1], v1_v2_grid$v1),
      paste0(vars[2], v1_v2_grid$v2),
      sep = ":"
    )
    x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE)
    colnames(x_stats) <- names(stats)
    x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2)
    x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]])
    x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers)
    v1_ref <- xs_level[[vars[1]]][1]
    v2_ref <- xs_level[[vars[2]]][1]
    term_main <- paste(vars[1], vars[2], sep = " * ")
    ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE)
  }
  x_stats$df <- as.list(1)
  x_stats$pvalue <- as.list(x_stats$pvalue)
  x_stats$is_variable_summary <- FALSE
  x_stats$is_term_summary <- TRUE
  x_main <- data.frame(
    pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE],
    term = term_main,
    term_label = paste("Reference", ref_label),
    df = main_effects[x, "Df", drop = TRUE],
    stringsAsFactors = FALSE
  )
  x_main$pvalue <- as.list(x_main$pvalue)
  x_main$df <- as.list(x_main$df)
  x_main$estimate <- list(numeric(0))
  x_main$std_error <- list(numeric(0))
  x_main$is_variable_summary <- TRUE
  x_main$is_term_summary <- FALSE

  x_stats <- rbind(x_main, x_stats)
  x_stats$variable <- x
  x_stats$variable_label <- paste(
    "Interaction of",
    formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE),
    "*",
    formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE)
  )
  x_stats$interaction <- ""
  x_stats$interaction_label <- ""
  x_stats$reference <- ""
  x_stats$reference_label <- ""
  rownames(x_stats) <- NULL
  x_stats[c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "is_variable_summary",
    "is_term_summary"
  )]
}

#' @describeIn h_logistic_regression Helper function to tabulate the interaction
#'   results of a logistic regression model. This basically is a wrapper for
#'   [h_or_interaction()] and [h_glm_simple_term_extract()] which puts the results
#'   in the right data frame format.
#'
#' @return A `data.frame` of tabulated interaction term results from a logistic regression model.
#'
#' @examples
#' h_glm_inter_term_extract("AGE", "ARMCD", mod2)
#'
#' @export
h_glm_inter_term_extract <- function(odds_ratio_var,
                                     interaction_var,
                                     fit_glm,
                                     ...) {
  # First obtain the main effects.
  main_stats <- h_glm_simple_term_extract(odds_ratio_var, fit_glm)
  main_stats$is_reference_summary <- FALSE
  main_stats$odds_ratio <- NA
  main_stats$lcl <- NA
  main_stats$ucl <- NA

  # Then we get the odds ratio estimates and put into df form.
  or_numbers <- h_or_interaction(odds_ratio_var, interaction_var, fit_glm, ...)
  is_num_or_var <- attr(fit_glm$terms, "dataClasses")[odds_ratio_var] == "numeric"

  if (is_num_or_var) {
    # Numeric OR variable case.
    references <- names(or_numbers)
    n_ref <- length(references)

    extract_from_list <- function(l, name, pos = 1) {
      unname(unlist(
        lapply(or_numbers, function(x) {
          x[[name]][pos]
        })
      ))
    }
    or_stats <- data.frame(
      variable = odds_ratio_var,
      variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      term = odds_ratio_var,
      term_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      interaction = interaction_var,
      interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)),
      reference = references,
      reference_label = references,
      estimate = NA,
      std_error = NA,
      odds_ratio = extract_from_list(or_numbers, "or"),
      lcl = extract_from_list(or_numbers, "ci", pos = "lcl"),
      ucl = extract_from_list(or_numbers, "ci", pos = "ucl"),
      df = NA,
      pvalue = NA,
      is_variable_summary = FALSE,
      is_term_summary = FALSE,
      is_reference_summary = TRUE
    )
  } else {
    # Categorical OR variable case.
    references <- names(or_numbers[[1]])
    n_ref <- length(references)

    extract_from_list <- function(l, name, pos = 1) {
      unname(unlist(
        lapply(or_numbers, function(x) {
          lapply(x, function(y) y[[name]][pos])
        })
      ))
    }
    or_stats <- data.frame(
      variable = odds_ratio_var,
      variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      term = rep(names(or_numbers), each = n_ref),
      term_label = h_simple_term_labels(rep(names(or_numbers), each = n_ref), table(fit_glm$data[[odds_ratio_var]])),
      interaction = interaction_var,
      interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)),
      reference = unlist(lapply(or_numbers, names)),
      reference_label = unlist(lapply(or_numbers, names)),
      estimate = NA,
      std_error = NA,
      odds_ratio = extract_from_list(or_numbers, "or"),
      lcl = extract_from_list(or_numbers, "ci", pos = "lcl"),
      ucl = extract_from_list(or_numbers, "ci", pos = "ucl"),
      df = NA,
      pvalue = NA,
      is_variable_summary = FALSE,
      is_term_summary = FALSE,
      is_reference_summary = TRUE
    )
  }

  df <- rbind(
    main_stats[, names(or_stats)],
    or_stats
  )
  df[order(-df$is_variable_summary, df$term, -df$is_term_summary, df$reference), ]
}

#' @describeIn h_logistic_regression Helper function to tabulate the results including
#'   odds ratios and confidence intervals of simple terms.
#'
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
#'
#' @examples
#' h_logistic_simple_terms("AGE", mod1)
#'
#' @export
h_logistic_simple_terms <- function(x, fit_glm, conf_level = 0.95) {
  checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
  if (inherits(fit_glm, "glm")) {
    checkmate::assert_set_equal(fit_glm$family$family, "binomial")
  }
  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  xs_class <- attr(fit_glm$terms, "dataClasses")
  interaction <- terms_name[which(!terms_name %in% names(xs_class))]
  checkmate::assert_subset(x, terms_name)
  if (length(interaction) != 0) {
    # Make sure any item in x is not part of interaction term
    checkmate::assert_disjunct(x, unlist(strsplit(interaction, ":")))
  }
  x_stats <- lapply(x, h_glm_simple_term_extract, fit_glm)
  x_stats <- do.call(rbind, x_stats)
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  x_stats$odds_ratio <- lapply(x_stats$estimate, exp)
  x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ci <- Map(function(lcl, ucl) c(lcl, ucl), lcl = x_stats$lcl, ucl = x_stats$ucl)
  x_stats
}

#' @describeIn h_logistic_regression Helper function to tabulate the results including
#'   odds ratios and confidence intervals of interaction terms.
#'
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
#'
#' @examples
#' h_logistic_inter_terms(c("RACE", "AGE", "ARMCD", "AGE:ARMCD"), mod2)
#'
#' @export
h_logistic_inter_terms <- function(x,
                                   fit_glm,
                                   conf_level = 0.95,
                                   at = NULL) {
  # Find out the interaction variables and interaction term.
  inter_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_vector(inter_vars, len = 2)


  inter_term_index <- intersect(grep(inter_vars[1], x), grep(inter_vars[2], x))
  inter_term <- x[inter_term_index]

  # For the non-interaction vars we need the standard stuff.
  normal_terms <- setdiff(x, union(inter_vars, inter_term))

  x_stats <- lapply(normal_terms, h_glm_simple_term_extract, fit_glm)
  x_stats <- do.call(rbind, x_stats)
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  x_stats$odds_ratio <- lapply(x_stats$estimate, exp)
  x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  normal_stats <- x_stats
  normal_stats$is_reference_summary <- FALSE

  # Now the interaction term itself.
  inter_term_stats <- h_glm_interaction_extract(inter_term, fit_glm)
  inter_term_stats$odds_ratio <- NA
  inter_term_stats$lcl <- NA
  inter_term_stats$ucl <- NA
  inter_term_stats$is_reference_summary <- FALSE

  is_intervar1_numeric <- attr(fit_glm$terms, "dataClasses")[inter_vars[1]] == "numeric"

  # Interaction stuff.
  inter_stats_one <- h_glm_inter_term_extract(
    inter_vars[1],
    inter_vars[2],
    fit_glm,
    conf_level = conf_level,
    at = `if`(is_intervar1_numeric, NULL, at)
  )
  inter_stats_two <- h_glm_inter_term_extract(
    inter_vars[2],
    inter_vars[1],
    fit_glm,
    conf_level = conf_level,
    at = `if`(is_intervar1_numeric, at, NULL)
  )

  # Now just combine everything in one data frame.
  col_names <- c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "odds_ratio",
    "lcl",
    "ucl",
    "is_variable_summary",
    "is_term_summary",
    "is_reference_summary"
  )
  df <- rbind(
    inter_stats_one[, col_names],
    inter_stats_two[, col_names],
    inter_term_stats[, col_names]
  )
  if (length(normal_terms) > 0) {
    df <- rbind(
      normal_stats[, col_names],
      df
    )
  }
  df$ci <- combine_vectors(df$lcl, df$ucl)
  df
}

#' Create a Forest Plot based on a Table
#'
#' Create a forest plot from any [rtables::rtable()] object that has a
#' column with a single value and a column with 2 values.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param tbl (`rtable`)
#' @param col_x (`integer`)\cr column index with estimator. By default tries to get this from
#'   `tbl` attribute `col_x`, otherwise needs to be manually specified.
#' @param col_ci (`integer`)\cr column index with confidence intervals. By default tries
#'   to get this from `tbl` attribute `col_ci`, otherwise needs to be manually specified.
#' @param vline (`number`)\cr x coordinate for vertical line, if `NULL` then the line is omitted.
#' @param forest_header (`character`, length 2)\cr text displayed to the left and right of `vline`, respectively.
#'   If `vline = NULL` then `forest_header` needs to be `NULL` too.
#'   By default tries to get this from `tbl` attribute `forest_header`.
#' @param xlim (`numeric`)\cr limits for x axis.
#' @param logx (`flag`)\cr show the x-values on logarithm scale.
#' @param x_at (`numeric`)\cr x-tick locations, if `NULL` they get automatically chosen.
#' @param width_row_names (`unit`)\cr width for row names.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param width_columns (`unit`)\cr widths for the table columns.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param width_forest (`unit`)\cr width for the forest column.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param col_symbol_size (`integer`)\cr column index from `tbl` containing data to be used
#'   to determine relative size for estimator plot symbol. Typically, the symbol size is proportional
#'   to the sample size used to calculate the estimator. If `NULL`, the same symbol size is used for all subgroups.
#'   By default tries to get this from `tbl` attribute `col_symbol_size`, otherwise needs to be manually specified.
#' @param col (`character`)\cr color(s).
#'
#' @return `gTree` object containing the forest plot and table.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(forcats)
#' library(nestcolor)
#'
#' adrs <- tern_ex_adrs
#' n_records <- 20
#' adrs_labels <- formatters::var_labels(adrs, fill = TRUE)
#' adrs <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   slice(seq_len(n_records)) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs) <- c(adrs_labels, "Response")
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "STRATA2")),
#'   data = adrs
#' )
#' # Full commonly used response table.
#'
#' tbl <- basic_table() %>%
#'   tabulate_rsp_subgroups(df)
#' p <- g_forest(tbl)
#'
#' draw_grob(p)
#'
#' # Odds ratio only table.
#'
#' tbl_or <- basic_table() %>%
#'   tabulate_rsp_subgroups(df, vars = c("n_tot", "or", "ci"))
#' tbl_or
#' p <- g_forest(
#'   tbl_or,
#'   forest_header = c("Comparison\nBetter", "Treatment\nBetter")
#' )
#'
#' draw_grob(p)
#'
#' # Survival forest plot example.
#' adtte <- tern_ex_adtte
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = TRUE)
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- list(
#'   "ARM" = adtte_labels["ARM"],
#'   "SEX" = adtte_labels["SEX"],
#'   "AVALU" = adtte_labels["AVALU"],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- as.character(labels)
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' table_hr <- basic_table() %>%
#'   tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
#' g_forest(table_hr)
#' # Works with any `rtable`.
#' tbl <- rtable(
#'   header = c("E", "CI", "N"),
#'   rrow("", 1, c(.8, 1.2), 200),
#'   rrow("", 1.2, c(1.1, 1.4), 50)
#' )
#' g_forest(
#'   tbl = tbl,
#'   col_x = 1,
#'   col_ci = 2,
#'   xlim = c(0.5, 2),
#'   x_at = c(0.5, 1, 2),
#'   col_symbol_size = 3
#' )
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", rcell("A", colspan = 2)),
#'     rrow("", "c1", "c2")
#'   ),
#'   rrow("row 1", 1, c(.8, 1.2)),
#'   rrow("row 2", 1.2, c(1.1, 1.4))
#' )
#' g_forest(
#'   tbl = tbl,
#'   col_x = 1,
#'   col_ci = 2,
#'   xlim = c(0.5, 2),
#'   x_at = c(0.5, 1, 2),
#'   vline = 1,
#'   forest_header = c("Hello", "World")
#' )
#' }
#'
#' @export
g_forest <- function(tbl,
                     col_x = attr(tbl, "col_x"),
                     col_ci = attr(tbl, "col_ci"),
                     vline = 1,
                     forest_header = attr(tbl, "forest_header"),
                     xlim = c(0.1, 10),
                     logx = TRUE,
                     x_at = c(0.1, 1, 10),
                     width_row_names = NULL,
                     width_columns = NULL,
                     width_forest = grid::unit(1, "null"),
                     col_symbol_size = attr(tbl, "col_symbol_size"),
                     col = getOption("ggplot2.discrete.colour")[1],
                     draw = TRUE,
                     newpage = TRUE) {
  checkmate::assert_class(tbl, "VTableTree")

  nr <- nrow(tbl)
  nc <- ncol(tbl)
  if (is.null(col)) {
    col <- "blue"
  }

  checkmate::assert_number(col_x, lower = 0, upper = nc, null.ok = FALSE)
  checkmate::assert_number(col_ci, lower = 0, upper = nc, null.ok = FALSE)
  checkmate::assert_number(col_symbol_size, lower = 0, upper = nc, null.ok = TRUE)
  checkmate::assert_true(col_x > 0)
  checkmate::assert_true(col_ci > 0)
  checkmate::assert_character(col)
  if (!is.null(col_symbol_size)) {
    checkmate::assert_true(col_symbol_size > 0)
  }

  x_e <- vapply(seq_len(nr), function(i) {
    # If a label row is selected NULL is returned with a warning (suppressed)
    xi <- suppressWarnings(as.vector(tbl[i, col_x, drop = TRUE]))

    if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
      xi
    } else {
      NA_real_
    }
  }, numeric(1))

  x_ci <- lapply(seq_len(nr), function(i) {
    xi <- suppressWarnings(as.vector(tbl[i, col_ci, drop = TRUE])) # as above

    if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
      if (length(xi) != 2) {
        stop("ci column needs two elements")
      }
      xi
    } else {
      c(NA_real_, NA_real_)
    }
  })

  lower <- vapply(x_ci, `[`, numeric(1), 1)
  upper <- vapply(x_ci, `[`, numeric(1), 2)

  symbol_size <- if (!is.null(col_symbol_size)) {
    tmp_symbol_size <- vapply(seq_len(nr), function(i) {
      suppressWarnings(xi <- as.vector(tbl[i, col_symbol_size, drop = TRUE]))

      if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
        xi
      } else {
        NA_real_
      }
    }, numeric(1))

    # Scale symbol size.
    tmp_symbol_size <- sqrt(tmp_symbol_size)
    max_size <- max(tmp_symbol_size, na.rm = TRUE)
    # Biggest points have radius is 2 * (1/3.5) lines not to overlap.
    # See forest_dot_line.
    2 * tmp_symbol_size / max_size
  } else {
    NULL
  }

  grob_forest <- forest_grob(
    tbl,
    x_e,
    lower,
    upper,
    vline,
    forest_header,
    xlim,
    logx,
    x_at,
    width_row_names,
    width_columns,
    width_forest,
    symbol_size = symbol_size,
    col = col,
    vp = grid::plotViewport(margins = rep(1, 4))
  )

  if (draw) {
    if (newpage) grid::grid.newpage()
    grid::grid.draw(grob_forest)
  }

  invisible(grob_forest)
}

#' Forest Plot Grob
#'
#' @inheritParams g_forest
#' @param tbl ([rtables::rtable()])
#' @param x (`numeric`)\cr coordinate of point.
#' @param lower,upper (`numeric`)\cr lower/upper bound of the confidence interval.
#' @param symbol_size (`numeric`)\cr vector with relative size for plot symbol.
#' If `NULL`, the same symbol size is used.
#'
#' @details
#' The heights get automatically determined.
#'
#' @noRd
#'
#' @examples
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", "E", rcell("CI", colspan = 2), "N"),
#'     rrow("", "A", "B", "C", "D")
#'   ),
#'   rrow("row 1", 1, 0.8, 1.1, 16),
#'   rrow("row 2", 1.4, 0.8, 1.6, 25),
#'   rrow("row 3", 1.2, 0.8, 1.6, 36)
#' )
#'
#' x <- c(1, 1.4, 1.2)
#' lower <- c(0.8, 0.8, 0.8)
#' upper <- c(1.1, 1.6, 1.6)
#' # numeric vector with multiplication factor to scale each circle radius
#' # default radius is 1/3.5 lines
#' symbol_scale <- c(1, 1.25, 1.5)
#'
#' # Internal function - forest_grob
#' \donttest{
#' p <- forest_grob(tbl, x, lower, upper,
#'   vline = 1, forest_header = c("A", "B"),
#'   x_at = c(.1, 1, 10), xlim = c(0.1, 10), logx = TRUE, symbol_size = symbol_scale,
#'   vp = grid::plotViewport(margins = c(1, 1, 1, 1))
#' )
#'
#' draw_grob(p)
#' }
forest_grob <- function(tbl,
                        x,
                        lower,
                        upper,
                        vline,
                        forest_header,
                        xlim = NULL,
                        logx = FALSE,
                        x_at = NULL,
                        width_row_names = NULL,
                        width_columns = NULL,
                        width_forest = grid::unit(1, "null"),
                        symbol_size = NULL,
                        col = "blue",
                        name = NULL,
                        gp = NULL,
                        vp = NULL) {
  nr <- nrow(tbl)
  if (is.null(vline)) {
    checkmate::assert_true(is.null(forest_header))
  } else {
    checkmate::assert_number(vline)
    checkmate::assert_character(forest_header, len = 2, null.ok = TRUE)
  }

  checkmate::assert_numeric(x, len = nr)
  checkmate::assert_numeric(lower, len = nr)
  checkmate::assert_numeric(upper, len = nr)
  checkmate::assert_numeric(symbol_size, len = nr, null.ok = TRUE)
  checkmate::assert_character(col)

  if (is.null(symbol_size)) {
    symbol_size <- rep(1, nr)
  }

  if (is.null(xlim)) {
    r <- range(c(x, lower, upper), na.rm = TRUE)
    xlim <- r + c(-0.05, 0.05) * diff(r)
  }

  if (logx) {
    if (is.null(x_at)) {
      x_at <- pretty(log(stats::na.omit(c(x, lower, upper))))
      x_labels <- exp(x_at)
    } else {
      x_labels <- x_at
      x_at <- log(x_at)
    }
    xlim <- log(xlim)
    x <- log(x)
    lower <- log(lower)
    upper <- log(upper)
    if (!is.null(vline)) {
      vline <- log(vline)
    }
  } else {
    x_labels <- TRUE
  }

  data_forest_vp <- grid::dataViewport(xlim, c(0, 1))

  # Get table content as matrix form.
  mf <- matrix_form(tbl)

  # Use `rtables` indent_string eventually.
  mf$strings[, 1] <- paste0(
    strrep("    ", c(rep(0, attr(mf, "nrow_header")), mf$row_info$indent)),
    mf$strings[, 1]
  )

  n_header <- attr(mf, "nrow_header")

  if (any(mf$display[, 1] == FALSE)) stop("row names need to be always displayed")

  # Pre-process the data to be used in lapply and cell_in_rows.
  to_args_for_cell_in_rows_fun <- function(part = c("body", "header"),
                                           underline_colspan = FALSE) {
    part <- match.arg(part)
    if (part == "body") {
      mat_row_indices <- seq_len(nrow(tbl)) + n_header
      row_ind_offset <- -n_header
    } else {
      mat_row_indices <- seq_len(n_header)
      row_ind_offset <- 0
    }

    lapply(mat_row_indices, function(i) {
      disp <- mf$display[i, -1]
      list(
        row_name = mf$strings[i, 1],
        cells = mf$strings[i, -1][disp],
        cell_spans = mf$spans[i, -1][disp],
        row_index = i + row_ind_offset,
        underline_colspan = underline_colspan
      )
    })
  }

  args_header <- to_args_for_cell_in_rows_fun("header", underline_colspan = TRUE)
  args_body <- to_args_for_cell_in_rows_fun("body", underline_colspan = FALSE)

  grid::gTree(
    name = name,
    children = grid::gList(
      grid::gTree(
        children = do.call(grid::gList, lapply(args_header, do.call, what = cell_in_rows)),
        vp = grid::vpPath("vp_table_layout", "vp_header")
      ),
      grid::gTree(
        children = do.call(grid::gList, lapply(args_body, do.call, what = cell_in_rows)),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      ),
      grid::linesGrob(
        grid::unit(c(0, 1), "npc"),
        y = grid::unit(c(.5, .5), "npc"),
        vp = grid::vpPath("vp_table_layout", "vp_spacer")
      ),
      # forest part
      if (is.null(vline)) {
        NULL
      } else {
        grid::gTree(
          children = grid::gList(
            grid::gTree(
              children = grid::gList(
                # this may overflow, to fix, look here
                # https://stackoverflow.com/questions/33623169/add-multi-line-footnote-to-tablegrob-while-using-gridextra-in-r #nolintr
                grid::textGrob(
                  forest_header[1],
                  x = grid::unit(vline, "native") - grid::unit(1, "lines"),
                  just = c("right", "center")
                ),
                grid::textGrob(
                  forest_header[2],
                  x = grid::unit(vline, "native") + grid::unit(1, "lines"),
                  just = c("left", "center")
                )
              ),
              vp = grid::vpStack(grid::viewport(layout.pos.col = ncol(tbl) + 2), data_forest_vp)
            )
          ),
          vp = grid::vpPath("vp_table_layout", "vp_header")
        )
      },
      grid::gTree(
        children = grid::gList(
          grid::gTree(
            children = grid::gList(
              grid::rectGrob(gp = grid::gpar(col = "gray90", fill = "gray90")),
              if (is.null(vline)) {
                NULL
              } else {
                grid::linesGrob(
                  x = grid::unit(rep(vline, 2), "native"),
                  y = grid::unit(c(0, 1), "npc"),
                  gp = grid::gpar(lwd = 2),
                  vp = data_forest_vp
                )
              },
              grid::xaxisGrob(at = x_at, label = x_labels, vp = data_forest_vp)
            ),
            vp = grid::viewport(layout.pos.col = ncol(tbl) + 2)
          )
        ),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      ),
      grid::gTree(
        children = do.call(
          grid::gList,
          Map(
            function(xi, li, ui, row_index, size_i, col) {
              forest_dot_line(
                xi,
                li,
                ui,
                row_index,
                xlim,
                symbol_size = size_i,
                col = col,
                datavp = data_forest_vp
              )
            },
            x,
            lower,
            upper,
            seq_along(x),
            symbol_size,
            col,
            USE.NAMES = FALSE
          )
        ),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      )
    ),
    childrenvp = forest_viewport(tbl, width_row_names, width_columns, width_forest),
    vp = vp,
    gp = gp
  )
}


cell_in_rows <- function(row_name,
                         cells,
                         cell_spans,
                         row_index,
                         underline_colspan = FALSE) {
  checkmate::assert_string(row_name)
  checkmate::assert_character(cells, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(cell_spans, len = length(cells), any.missing = FALSE)
  checkmate::assert_number(row_index)
  checkmate::assert_flag(underline_colspan)

  vp_name_rn <- paste0("rowname-", row_index)
  g_rowname <- if (!is.null(row_name) && row_name != "") {
    grid::textGrob(
      name = vp_name_rn,
      label = row_name,
      x = grid::unit(0, "npc"),
      just = c("left", "center"),
      vp = grid::vpPath(paste0("rowname-", row_index))
    )
  } else {
    NULL
  }

  gl_cols <- if (!(length(cells) > 0)) {
    list(NULL)
  } else {
    j <- 1 # column index of cell

    lapply(seq_along(cells), function(k) {
      cell_ascii <- cells[[k]]
      cs <- cell_spans[[k]]

      if (is.na(cell_ascii) || is.null(cell_ascii)) {
        cell_ascii <- "NA"
      }

      cell_name <- paste0("g-cell-", row_index, "-", j)

      cell_grobs <- if (identical(cell_ascii, "")) {
        NULL
      } else {
        if (cs == 1) {
          grid::textGrob(
            label = cell_ascii,
            name = cell_name,
            vp = grid::vpPath(paste0("cell-", row_index, "-", j))
          )
        } else {
          # +1 because of rowname
          vp_joined_cols <- grid::viewport(layout.pos.row = row_index, layout.pos.col = seq(j + 1, j + cs))

          lab <- grid::textGrob(
            label = cell_ascii,
            name = cell_name,
            vp = vp_joined_cols
          )

          if (!underline_colspan || grepl("^[[:space:]]*$", cell_ascii)) {
            lab
          } else {
            grid::gList(
              lab,
              grid::linesGrob(
                x = grid::unit.c(grid::unit(.2, "lines"), grid::unit(1, "npc") - grid::unit(.2, "lines")),
                y = grid::unit(c(0, 0), "npc"),
                vp = vp_joined_cols
              )
            )
          }
        }
      }
      j <<- j + cs

      cell_grobs
    })
  }

  grid::gList(
    g_rowname,
    do.call(grid::gList, gl_cols)
  )
}

#' Graphic Object: Forest Dot Line
#'
#' Calculate the `grob` corresponding to the dot line within the forest plot.
#'
#' @noRd
forest_dot_line <- function(x,
                            lower,
                            upper,
                            row_index,
                            xlim,
                            symbol_size = 1,
                            col = "blue",
                            datavp) {
  ci <- c(lower, upper)
  if (any(!is.na(c(x, ci)))) {
    # line
    y <- grid::unit(c(0.5, 0.5), "npc")

    g_line <- if (all(!is.na(ci)) && ci[2] > xlim[1] && ci[1] < xlim[2]) {
      # -
      if (ci[1] >= xlim[1] && ci[2] <= xlim[2]) {
        grid::linesGrob(x = grid::unit(c(ci[1], ci[2]), "native"), y = y)
      } else if (ci[1] < xlim[1] && ci[2] > xlim[2]) {
        # <->
        grid::linesGrob(
          x = grid::unit(xlim, "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "both")
        )
      } else if (ci[1] < xlim[1] && ci[2] <= xlim[2]) {
        # <-
        grid::linesGrob(
          x = grid::unit(c(xlim[1], ci[2]), "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "first")
        )
      } else if (ci[1] >= xlim[1] && ci[2] > xlim[2]) {
        # ->
        grid::linesGrob(
          x = grid::unit(c(ci[1], xlim[2]), "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "last")
        )
      }
    } else {
      NULL
    }

    g_circle <- if (!is.na(x) && x >= xlim[1] && x <= xlim[2]) {
      grid::circleGrob(
        x = grid::unit(x, "native"),
        y = y,
        r = grid::unit(1 / 3.5 * symbol_size, "lines"),
        name = "point"
      )
    } else {
      NULL
    }

    grid::gTree(
      children = grid::gList(
        grid::gTree(
          children = grid::gList(
            grid::gList(
              g_line,
              g_circle
            )
          ),
          vp = datavp,
          gp = grid::gpar(col = col, fill = col)
        )
      ),
      vp = grid::vpPath(paste0("forest-", row_index))
    )
  } else {
    NULL
  }
}

#' Create a Viewport Tree for the Forest Plot
#' @param tbl (`rtable`)
#' @param width_row_names (`grid::unit`)\cr Width of row names
#' @param width_columns (`grid::unit`)\cr Width of column spans
#' @param width_forest (`grid::unit`)\cr Width of the forest plot
#' @param gap_column (`grid::unit`)\cr Gap width between the columns
#' @param gap_header (`grid::unit`)\cr Gap width between the header
#' @param mat_form matrix print form of the table
#' @return A viewport tree.
#'
#' @examples
#' library(grid)
#'
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", "E", rcell("CI", colspan = 2)),
#'     rrow("", "A", "B", "C")
#'   ),
#'   rrow("row 1", 1, 0.8, 1.1),
#'   rrow("row 2", 1.4, 0.8, 1.6),
#'   rrow("row 3", 1.2, 0.8, 1.2)
#' )
#'
#' \donttest{
#' v <- forest_viewport(tbl)
#'
#' grid::grid.newpage()
#' showViewport(v)
#' }
#'
#' @export
forest_viewport <- function(tbl,
                            width_row_names = NULL,
                            width_columns = NULL,
                            width_forest = grid::unit(1, "null"),
                            gap_column = grid::unit(1, "lines"),
                            gap_header = grid::unit(1, "lines"),
                            mat_form = NULL) {
  checkmate::assert_class(tbl, "VTableTree")
  checkmate::assert_true(grid::is.unit(width_forest))
  if (!is.null(width_row_names)) {
    checkmate::assert_true(grid::is.unit(width_row_names))
  }
  if (!is.null(width_columns)) {
    checkmate::assert_true(grid::is.unit(width_columns))
  }

  if (is.null(mat_form)) mat_form <- matrix_form(tbl)

  mat_form$strings[!mat_form$display] <- ""

  nr <- nrow(tbl)
  nc <- ncol(tbl)
  nr_h <- attr(mat_form, "nrow_header")

  if (is.null(width_row_names) || is.null(width_columns)) {
    tbl_widths <- formatters::propose_column_widths(mat_form)
    strs_with_width <- strrep("x", tbl_widths) # that works for mono spaced fonts
    if (is.null(width_row_names)) width_row_names <- grid::stringWidth(strs_with_width[1])
    if (is.null(width_columns)) width_columns <- grid::stringWidth(strs_with_width[-1])
  }

  # Widths for row name, cols, forest.
  widths <- grid::unit.c(
    width_row_names + gap_column,
    width_columns + gap_column,
    width_forest
  )

  n_lines_per_row <- apply(
    X = mat_form$strings,
    MARGIN = 1,
    FUN = function(row) {
      tmp <- vapply(
        gregexpr("\n", row, fixed = TRUE),
        attr, numeric(1),
        "match.length"
      ) + 1
      max(c(tmp, 1))
    }
  )

  i_header <- seq_len(nr_h)

  height_body_rows <- grid::unit(n_lines_per_row[-i_header] * 1.2, "lines")
  height_header_rows <- grid::unit(n_lines_per_row[i_header] * 1.2, "lines")

  height_body <- grid::unit(sum(n_lines_per_row[-i_header]) * 1.2, "lines")
  height_header <- grid::unit(sum(n_lines_per_row[i_header]) * 1.2, "lines")

  nc_g <- nc + 2 # number of columns incl. row names and forest

  vp_tbl <- grid::vpTree(
    parent = grid::viewport(
      name = "vp_table_layout",
      layout = grid::grid.layout(
        nrow = 3, ncol = 1,
        heights = grid::unit.c(height_header, gap_header, height_body)
      )
    ),
    children = grid::vpList(
      vp_forest_table_part(nr_h, nc_g, 1, 1, widths, height_header_rows, "vp_header"),
      vp_forest_table_part(nr, nc_g, 3, 1, widths, height_body_rows, "vp_body"),
      grid::viewport(name = "vp_spacer", layout.pos.row = 2, layout.pos.col = 1)
    )
  )
  vp_tbl
}

#' Viewport Forest Plot: Table Part
#'
#' Prepares a viewport for the table included in the forest plot.
#'
#' @noRd
vp_forest_table_part <- function(nrow,
                                 ncol,
                                 l_row,
                                 l_col,
                                 widths,
                                 heights,
                                 name) {
  grid::vpTree(
    grid::viewport(
      name = name,
      layout.pos.row = l_row,
      layout.pos.col = l_col,
      layout = grid::grid.layout(nrow = nrow, ncol = ncol, widths = widths, heights = heights)
    ),
    children = grid::vpList(
      do.call(
        grid::vpList,
        lapply(
          seq_len(nrow), function(i) {
            grid::viewport(layout.pos.row = i, layout.pos.col = 1, name = paste0("rowname-", i))
          }
        )
      ),
      do.call(
        grid::vpList,
        apply(
          expand.grid(seq_len(nrow), seq_len(ncol - 2)),
          1,
          function(x) {
            i <- x[1]
            j <- x[2]
            grid::viewport(layout.pos.row = i, layout.pos.col = j + 1, name = paste0("cell-", i, "-", j))
          }
        )
      ),
      do.call(
        grid::vpList,
        lapply(
          seq_len(nrow),
          function(i) {
            grid::viewport(layout.pos.row = i, layout.pos.col = ncol, name = paste0("forest-", i))
          }
        )
      )
    )
  )
}

#' Forest Rendering
#'
#' Renders the forest grob.
#'
#' @noRd
grid.forest <- function(...) { # nolint
  grid::grid.draw(forest_grob(...))
}

#' Helper Function to create a map dataframe that can be used in `trim_levels_to_map` split function.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper Function to create a map dataframe from the input dataset, which can be used as an argument in the
#' `trim_levels_to_map` split function. Based on different method, the map is constructed differently.
#'
#' @inheritParams argument_convention
#' @param abnormal (named `list`)\cr identifying the abnormal range level(s) in `df`. Based on the levels of
#'   abnormality of the input dataset, it can be something like `list(Low = "LOW LOW", High = "HIGH HIGH")` or
#'   `abnormal = list(Low = "LOW", High = "HIGH"))`
#' @param method (`string`)\cr indicates how the returned map will be constructed. Can be `"default"` or `"range"`.
#'
#' @return A map `data.frame`.
#'
#' @note If method is `"default"`, the returned map will only have the abnormal directions that are observed in the
#'   `df`, and records with all normal values will be excluded to avoid error in creating layout. If method is
#'   `"range"`, the returned map will be based on the rule that at least one observation with low range > 0
#'   for low direction and at least one observation with high range is not missing for high direction.
#'
#' @examples
#' adlb <- df_explicit_na(tern_ex_adlb)
#'
#' h_map_for_count_abnormal(
#'   df = adlb,
#'   variables = list(anl = "ANRIND", split_rows = c("LBCAT", "PARAM")),
#'   abnormal = list(low = c("LOW"), high = c("HIGH")),
#'   method = "default",
#'   na_level = "<Missing>"
#' )
#'
#' df <- data.frame(
#'   USUBJID = c(rep("1", 4), rep("2", 4), rep("3", 4)),
#'   AVISIT = c(
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2),
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2),
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2)
#'   ),
#'   PARAM = rep(c("ALT", "CPR"), 6),
#'   ANRIND = c(
#'     "NORMAL", "NORMAL", "LOW",
#'     "HIGH", "LOW", "LOW", "HIGH", "HIGH", rep("NORMAL", 4)
#'   ),
#'   ANRLO = rep(5, 12),
#'   ANRHI = rep(20, 12)
#' )
#' df$ANRIND <- factor(df$ANRIND, levels = c("LOW", "HIGH", "NORMAL"))
#' h_map_for_count_abnormal(
#'   df = df,
#'   variables = list(
#'     anl = "ANRIND",
#'     split_rows = c("PARAM"),
#'     range_low = "ANRLO",
#'     range_high = "ANRHI"
#'   ),
#'   abnormal = list(low = c("LOW"), high = c("HIGH")),
#'   method = "range",
#'   na_level = "<Missing>"
#' )
#'
#' @export
h_map_for_count_abnormal <- function(df,
                                     variables = list(
                                       anl = "ANRIND",
                                       split_rows = c("PARAM"),
                                       range_low = "ANRLO",
                                       range_high = "ANRHI"
                                     ),
                                     abnormal = list(low = c("LOW", "LOW LOW"), high = c("HIGH", "HIGH HIGH")),
                                     method = c("default", "range"),
                                     na_level = "<Missing>") {
  method <- match.arg(method)
  checkmate::assert_subset(c("anl", "split_rows"), names(variables))
  checkmate::assert_false(anyNA(df[variables$split_rows]))
  assert_df_with_variables(df,
    variables = list(anl = variables$anl, split_rows = variables$split_rows),
    na_level = na_level
  )
  assert_df_with_factors(df, list(val = variables$anl))
  assert_valid_factor(df[[variables$anl]], any.missing = FALSE)
  assert_list_of_variables(variables)
  checkmate::assert_list(abnormal, types = "character", len = 2)

  # Drop usued levels from df as they are not supposed to be in the final map
  df <- droplevels(df)

  normal_value <- setdiff(levels(df[[variables$anl]]), unlist(abnormal))

  # Based on the understanding of clinical data, there should only be one level of normal which is "NORMAL"
  checkmate::assert_vector(normal_value, len = 1)

  # Default method will only have what is observed in the df, and records with all normal values will be excluded to
  # avoid error in layout building.
  if (method == "default") {
    df_abnormal <- subset(df, df[[variables$anl]] %in% unlist(abnormal))
    map <- unique(df_abnormal[c(variables$split_rows, variables$anl)])
    map_normal <- unique(subset(map, select = variables$split_rows))
    map_normal[[variables$anl]] <- normal_value
    map <- rbind(map, map_normal)
  } else if (method == "range") {
    # range method follows the rule that at least one observation with ANRLO > 0 for low
    # direction and at least one observation with ANRHI is not missing for high direction.
    checkmate::assert_subset(c("range_low", "range_high"), names(variables))
    checkmate::assert_subset(c("LOW", "HIGH"), toupper(names(abnormal)))

    assert_df_with_variables(df,
      variables = list(
        range_low = variables$range_low,
        range_high = variables$range_high
      )
    )

    # Define low direction of map
    df_low <- subset(df, df[[variables$range_low]] > 0)
    map_low <- unique(df_low[variables$split_rows])
    low_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "LOW"]))
    low_levels_df <- as.data.frame(low_levels)
    colnames(low_levels_df) <- variables$anl
    low_levels_df <- do.call("rbind", replicate(nrow(map_low), low_levels_df, simplify = FALSE))
    rownames(map_low) <- NULL # Just to avoid strange row index in case upstream functions changed
    map_low <- map_low[rep(seq_len(nrow(map_low)), each = length(low_levels)), , drop = FALSE]
    map_low <- cbind(map_low, low_levels_df)

    # Define high direction of map
    df_high <- subset(df, df[[variables$range_high]] != na_level | !is.na(df[[variables$range_high]]))
    map_high <- unique(df_high[variables$split_rows])
    high_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "HIGH"]))
    high_levels_df <- as.data.frame(high_levels)
    colnames(high_levels_df) <- variables$anl
    high_levels_df <- do.call("rbind", replicate(nrow(map_high), high_levels_df, simplify = FALSE))
    rownames(map_high) <- NULL
    map_high <- map_high[rep(seq_len(nrow(map_high)), each = length(high_levels)), , drop = FALSE]
    map_high <- cbind(map_high, high_levels_df)

    # Define normal of map
    map_normal <- unique(rbind(map_low, map_high)[variables$split_rows])
    map_normal[variables$anl] <- normal_value

    map <- rbind(map_low, map_high, map_normal)
  }

  # map should be all characters
  map <- data.frame(lapply(map, as.character), stringsAsFactors = FALSE)

  # sort the map final output by split_rows variables
  for (i in rev(seq_len(length(variables$split_rows)))) {
    map <- map[order(map[[i]]), ]
  }
  map
}

#' Number of Patients
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Count the number of unique and non-unique patients in a column (variable).
#'
#' @inheritParams argument_convention
#' @param x (`character` or `factor`)\cr vector of patient IDs.
#' @param count_by (`character` or `factor`)\cr optional vector to be combined with `x` when counting
#'   `nonunique` records.
#' @param unique_count_suffix (`logical`)\cr should `"(n)"` suffix be added to `unique_count` labels.
#'   Defaults to `TRUE`.
#'
#' @name summarize_num_patients
NULL

#' @describeIn summarize_num_patients Statistics function which counts the number of
#'   unique patients, the corresponding percentage taken with respect to the
#'   total number of patients, and the number of non-unique patients.
#'
#' @return
#' * `s_num_patients()` returns a named `list` of 3 statistics:
#'   * `unique`: Vector of counts and percentages.
#'   * `nonunique`: Vector of counts.
#'   * `unique_count`: Counts.
#'
#' @examples
#' # Use the statistics function to count number of unique and nonunique patients.
#' s_num_patients(x = as.character(c(1, 1, 1, 2, 4, NA)), labelstr = "", .N_col = 6L)
#' s_num_patients(
#'   x = as.character(c(1, 1, 1, 2, 4, NA)),
#'   labelstr = "",
#'   .N_col = 6L,
#'   count_by = as.character(c(1, 1, 2, 1, 1, 1))
#' )
#'
#' @export
s_num_patients <- function(x, labelstr, .N_col, count_by = NULL, unique_count_suffix = TRUE) { # nolint

  checkmate::assert_string(labelstr)
  checkmate::assert_count(.N_col)
  checkmate::assert_multi_class(x, classes = c("factor", "character"))
  checkmate::assert_flag(unique_count_suffix)

  count1 <- n_available(unique(x))
  count2 <- n_available(x)

  if (!is.null(count_by)) {
    checkmate::assert_vector(count_by, len = length(x))
    checkmate::assert_multi_class(count_by, classes = c("factor", "character"))
    count2 <- n_available(unique(interaction(x, count_by)))
  }

  out <- list(
    unique = formatters::with_label(c(count1, ifelse(count1 == 0 && .N_col == 0, 0, count1 / .N_col)), labelstr),
    nonunique = formatters::with_label(count2, labelstr),
    unique_count = formatters::with_label(count1, ifelse(unique_count_suffix, paste(labelstr, "(n)"), labelstr))
  )

  out
}

#' @describeIn summarize_num_patients Statistics function which counts the number of unique patients
#'   in a column (variable), the corresponding percentage taken with respect to the total number of
#'   patients, and the number of non-unique patients in the column.
#'
#' @param required (`character` or `NULL`)\cr optional name of a variable that is required to be non-missing.
#'
#' @return
#' * `s_num_patients_content()` returns the same values as `s_num_patients()`.
#'
#' @examples
#' # Count number of unique and non-unique patients.
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA)),
#'   EVENT = as.character(c(10, 15, 10, 17, 8))
#' )
#' s_num_patients_content(df, .N_col = 5, .var = "USUBJID")
#'
#' df_by_event <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA)),
#'   EVENT = as.character(c(10, 15, 10, 17, 8))
#' )
#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID")
#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID", count_by = "EVENT")
#'
#' @export
s_num_patients_content <- function(df,
                                   labelstr = "",
                                   .N_col, # nolint
                                   .var,
                                   required = NULL,
                                   count_by = NULL,
                                   unique_count_suffix = TRUE) {
  checkmate::assert_string(.var)
  checkmate::assert_data_frame(df)
  if (is.null(count_by)) {
    assert_df_with_variables(df, list(id = .var))
  } else {
    assert_df_with_variables(df, list(id = .var, count_by = count_by))
  }
  if (!is.null(required)) {
    checkmate::assert_string(required)
    assert_df_with_variables(df, list(required = required))
    df <- df[!is.na(df[[required]]), , drop = FALSE]
  }

  x <- df[[.var]]
  y <- switch(as.numeric(!is.null(count_by)) + 1,
    NULL,
    df[[count_by]]
  )

  s_num_patients(
    x = x,
    labelstr = labelstr,
    .N_col = .N_col,
    count_by = y,
    unique_count_suffix = unique_count_suffix
  )
}

c_num_patients <- make_afun(
  s_num_patients_content,
  .stats = c("unique", "nonunique", "unique_count"),
  .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx")
)

#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_num_patients()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_num_patients_content()` to the table layout.
#'
#' @export
summarize_num_patients <- function(lyt,
                                   var,
                                   .stats = NULL,
                                   .formats = NULL,
                                   .labels = c(
                                     unique = "Number of patients with at least one event",
                                     nonunique = "Number of events"
                                   ),
                                   indent_mod = lifecycle::deprecated(),
                                   .indent_mods = 0L,
                                   ...) {
  if (lifecycle::is_present(indent_mod)) {
    lifecycle::deprecate_warn("0.8.2", "summarize_num_patients(indent_mod)", "summarize_num_patients(.indent_mods)")
    .indent_mods <- indent_mod
  }

  if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
  if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]

  cfun <- make_afun(
    c_num_patients,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels
  )

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = cfun,
    extra_args = list(...),
    indent_mod = .indent_mods
  )
}

#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `analyze_num_patients()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_num_patients_content()` to the table layout.
#'
#' @details In general, functions that starts with `analyze*` are expected to
#'   work like [rtables::analyze()], while functions that starts with `summarize*`
#'   are based upon [rtables::summarize_row_groups()]. The latter provides a
#'   value for each dividing split in the row and column space, but, being it
#'   bound to the fundamental splits, it is repeated by design in every page
#'   when pagination is involved.
#'
#' @note As opposed to [summarize_num_patients()], this function does not repeat the produced rows.
#'
#' @examples
#' df_tmp <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA, 6, 6, 8, 9)),
#'   ARM = c("A", "A", "A", "A", "A", "B", "B", "B", "B"),
#'   AGE = c(10, 15, 10, 17, 8, 11, 11, 19, 17)
#' )
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   analyze_num_patients("USUBJID", .stats = c("unique")) %>%
#'   build_table(df_tmp)
#' tbl
#'
#' @export
analyze_num_patients <- function(lyt,
                                 vars,
                                 .stats = NULL,
                                 .formats = NULL,
                                 .labels = c(
                                   unique = "Number of patients with at least one event",
                                   nonunique = "Number of events"
                                 ),
                                 show_labels = c("default", "visible", "hidden"),
                                 indent_mod = lifecycle::deprecated(),
                                 .indent_mods = 0L,
                                 ...) {
  if (lifecycle::is_present(indent_mod)) {
    lifecycle::deprecate_warn("0.8.2", "analyze_num_patients(indent_mod)", "analyze_num_patients(.indent_mods)")
    .indent_mods <- indent_mod
  }

  if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
  if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]

  afun <- make_afun(
    c_num_patients,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels
  )

  analyze(
    afun = afun,
    lyt = lyt,
    vars = vars,
    extra_args = list(...),
    show_labels = show_labels,
    indent_mod = .indent_mods
  )
}

#' Additional Assertions for `checkmate`
#'
#' Additional assertion functions which can be used together with the `checkmate` package.
#'
#' @inheritParams checkmate::assert_factor
#' @param x (`any`)\cr object to test.
#' @param df (`data.frame`)\cr data set to test.
#' @param variables (named `list` of `character`)\cr list of variables to test.
#' @param include_boundaries (`logical`)\cr whether to include boundaries when testing
#'   for proportions.
#' @param na_level (`character`)\cr the string you have been using to represent NA or
#'   missing data. For `NA` values please consider using directly [is.na()] or
#'   similar approaches.
#' @param (`integer`)\cr minimum number of factor levels. Default is `1`.
#' @param ... a collection of objects to test.
#'
#' @return Nothing if assertion passes, otherwise prints the error message.
#'
#' @name assertions
NULL

check_list_of_variables <- function(x) {
  # drop NULL elements in list
  x <- Filter(Negate(is.null), x)

  res <- checkmate::check_list(x,
    names = "named",
    min.len = 1,
    any.missing = FALSE,
    types = "character"
  )
  # no empty strings allowed
  if (isTRUE(res)) {
    res <- checkmate::check_character(unlist(x), min.chars = 1)
  }
  return(res)
}
#' @describeIn assertions Checks whether `x` is a valid list of variable names.
#'   `NULL` elements of the list `x` are dropped with `Filter(Negate(is.null), x)`.
#'
#' @keywords internal
assert_list_of_variables <- checkmate::makeAssertionFunction(check_list_of_variables)

check_df_with_variables <- function(df, variables, na_level = NULL) {
  checkmate::assert_data_frame(df)
  assert_list_of_variables(variables)

  # flag for equal variables and column names
  err_flag <- all(unlist(variables) %in% colnames(df))
  checkmate::assert_flag(err_flag)

  if (isFALSE(err_flag)) {
    vars <- setdiff(unlist(variables), colnames(df))
    return(paste(
      deparse(substitute(df)),
      "does not contain all specified variables as column names. Missing from dataframe:",
      paste(vars, collapse = ", ")
    ))
  }
  # checking if na_level is present and in which column
  if (!is.null(na_level)) {
    checkmate::assert_string(na_level)
    res <- unlist(lapply(as.list(df)[unlist(variables)], function(x) any(x == na_level)))
    if (any(res)) {
      return(paste0(
        deparse(substitute(df)), " contains explicit na_level (", na_level,
        ") in the following columns: ", paste0(unlist(variables)[res],
          collapse = ", "
        )
      ))
    }
  }
  return(TRUE)
}
#' @describeIn assertions Check whether `df` is a data frame with the analysis `variables`.
#'   Please notice how this produces an error when not all variables are present in the
#'   data.frame while the opposite is not required.
#'
#' @keywords internal
assert_df_with_variables <- checkmate::makeAssertionFunction(check_df_with_variables)

check_valid_factor <- function(x,
                               min.levels = 1, # nolint
                               max.levels = NULL, # nolint
                               null.ok = TRUE, # nolint
                               any.missing = TRUE, # nolint
                               n.levels = NULL, # nolint
                               len = NULL) {
  # checks on levels insertion
  checkmate::assert_int(min.levels, lower = 1)

  # main factor check
  res <- checkmate::check_factor(x,
    min.levels = min.levels,
    null.ok = null.ok,
    max.levels = max.levels,
    any.missing = any.missing,
    n.levels = n.levels
  )

  # no empty strings allowed
  if (isTRUE(res)) {
    res <- checkmate::check_character(levels(x), min.chars = 1)
  }

  return(res)
}
#' @describeIn assertions Check whether `x` is a valid factor (i.e. has levels and no empty
#'   string levels). Note that `NULL` and `NA` elements are allowed.
#'
#' @keywords internal
assert_valid_factor <- checkmate::makeAssertionFunction(check_valid_factor)


check_df_with_factors <- function(df,
                                  variables,
                                  min.levels = 1, # nolint
                                  max.levels = NULL, # nolint
                                  any.missing = TRUE, # nolint
                                  na_level = NULL) {
  res <- check_df_with_variables(df, variables, na_level)
  # checking if all the columns specified by variables are valid factors
  if (isTRUE(res)) {
    # searching the data.frame with selected columns (variables) as a list
    res <- lapply(
      X = as.list(df)[unlist(variables)],
      FUN = check_valid_factor,
      min.levels = min.levels,
      max.levels = max.levels,
      any.missing = any.missing
    )
    res_lo <- unlist(vapply(res, Negate(isTRUE), logical(1)))
    if (any(res_lo)) {
      return(paste0(
        deparse(substitute(df)), " does not contain only factor variables among:",
        "\n* Column `", paste0(unlist(variables)[res_lo],
          "` of the data.frame -> ", res[res_lo],
          collapse = "\n* "
        )
      ))
    } else {
      res <- TRUE
    }
  }
  return(res)
}
#' @describeIn assertions Check whether `df` is a data frame where the analysis `variables`
#'   are all factors. Note that the creation of `NA` by direct call of `factor()` will
#'   trim `NA` levels out of the vector list itself.
#'
#' @keywords internal
assert_df_with_factors <- checkmate::makeAssertionFunction(check_df_with_factors)

#' @describeIn assertions Check whether `x` is a proportion: number between 0 and 1.
#'
#' @keywords internal
assert_proportion_value <- function(x, include_boundaries = FALSE) {
  checkmate::assert_number(x, lower = 0, upper = 1)
  checkmate::assert_flag(include_boundaries)
  if (isFALSE(include_boundaries)) {
    checkmate::assert_true(x > 0)
    checkmate::assert_true(x < 1)
  }
}

#' Control Function for `CoxPH` Model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `CoxPH` model, typically used internally to specify
#' details of `CoxPH` model for [s_coxph_pairwise()]. `conf_level` refers to Hazard Ratio estimation.
#'
#' @inheritParams argument_convention
#' @param pval_method (`string`)\cr p-value method for testing hazard ratio = 1.
#'   Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
#' @param ties (`string`)\cr specifying the method for tie handling. Default is `"efron"`,
#'   can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()].
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"),
                          ties = c("efron", "breslow", "exact"),
                          conf_level = 0.95) {
  pval_method <- match.arg(pval_method)
  ties <- match.arg(ties)
  assert_proportion_value(conf_level)

  list(pval_method = pval_method, ties = ties, conf_level = conf_level)
}

#' Control Function for `survfit` Model for Survival Time
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
#' details of `survfit` model for [s_surv_time()]. `conf_level` refers to survival time estimation.
#'
#' @inheritParams argument_convention
#' @param conf_type (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
#'   see more in [survival::survfit()]. Note option "none" is no longer supported.
#' @param quantiles (`numeric`)\cr of length two to specify the quantiles of survival time.
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_surv_time <- function(conf_level = 0.95,
                              conf_type = c("plain", "log", "log-log"),
                              quantiles = c(0.25, 0.75)) {
  conf_type <- match.arg(conf_type)
  checkmate::assert_numeric(quantiles, lower = 0, upper = 1, len = 2, unique = TRUE, sorted = TRUE)
  nullo <- lapply(quantiles, assert_proportion_value)
  assert_proportion_value(conf_level)
  list(conf_level = conf_level, conf_type = conf_type, quantiles = quantiles)
}

#' Control Function for `survfit` Model for Patient's Survival Rate at time point
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
#' details of `survfit` model for [s_surv_timepoint()]. `conf_level` refers to patient risk estimation at a time point.
#'
#' @inheritParams argument_convention
#' @inheritParams control_surv_time
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_surv_timepoint <- function(conf_level = 0.95,
                                   conf_type = c("plain", "log", "log-log")) {
  conf_type <- match.arg(conf_type)
  assert_proportion_value(conf_level)
  list(
    conf_level = conf_level,
    conf_type = conf_type
  )
}

#' Helper Functions for Tabulating Survival Duration by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that tabulate in a data frame statistics such as median survival
#' time and hazard ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_coxph_pairwise
#' @inheritParams survival_duration_subgroups
#' @param arm (`factor`)\cr the treatment group variable.
#'
#' @details Main functionality is to prepare data for use in a layout-creating function.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' @name h_survival_duration_subgroups
NULL

#' @describeIn h_survival_duration_subgroups helper to prepare a data frame of median survival times by arm.
#'
#' @return
#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`.
#'
#' @examples
#' # Extract median survival time for one group.
#' h_survtime_df(
#'   tte = adtte_f$AVAL,
#'   is_event = adtte_f$is_event,
#'   arm = adtte_f$ARM
#' )
#'
#' @export
h_survtime_df <- function(tte, is_event, arm) {
  checkmate::assert_numeric(tte)
  checkmate::assert_logical(is_event, len = length(tte))
  assert_valid_factor(arm, len = length(tte))

  df_tte <- data.frame(
    tte = tte,
    is_event = is_event,
    stringsAsFactors = FALSE
  )

  # Delete NAs
  non_missing_rows <- stats::complete.cases(df_tte)
  df_tte <- df_tte[non_missing_rows, ]
  arm <- arm[non_missing_rows]

  lst_tte <- split(df_tte, arm)
  lst_results <- Map(function(x, arm) {
    if (nrow(x) > 0) {
      s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event")
      median_est <- unname(as.numeric(s_surv$median))
      n_events <- sum(x$is_event)
    } else {
      median_est <- NA
      n_events <- NA
    }

    data.frame(
      arm = arm,
      n = nrow(x),
      n_events = n_events,
      median = median_est,
      stringsAsFactors = FALSE
    )
  }, lst_tte, names(lst_tte))

  df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
  df$arm <- factor(df$arm, levels = levels(arm))
  df
}

#' @describeIn h_survival_duration_subgroups summarizes median survival times by arm and across subgroups
#'    in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
#'    requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
#'    groupings for `subgroups` variables.
#'
#' @return
#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`,
#'   `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Extract median survival time for multiple groups.
#' h_survtime_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Define groupings for BMRKR2 levels.
#' h_survtime_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_survtime_subgroups_df <- function(variables,
                                    data,
                                    groups_lists = list(),
                                    label_all = "All Patients") {
  checkmate::assert_character(variables$tte)
  checkmate::assert_character(variables$is_event)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)

  assert_df_with_variables(data, variables)

  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]])
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
    l_result <- lapply(l_data, function(grp) {
      result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]])
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn h_survival_duration_subgroups helper to prepare a data frame with estimates of
#'   treatment hazard ratio.
#'
#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
#'
#' @return
#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`,
#'   `conf_level`, `pval` and `pval_label`.
#'
#' @examples
#' # Extract hazard ratio for one group.
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM)
#'
#' # Extract hazard ratio for one group with stratification factor.
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1)
#'
#' @export
h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) {
  checkmate::assert_numeric(tte)
  checkmate::assert_logical(is_event, len = length(tte))
  assert_valid_factor(arm, n.levels = 2, len = length(tte))

  df_tte <- data.frame(tte = tte, is_event = is_event)
  strata_vars <- NULL

  if (!is.null(strata_data)) {
    if (is.data.frame(strata_data)) {
      strata_vars <- names(strata_data)
      checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte))
      assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars)))
    } else {
      assert_valid_factor(strata_data, len = nrow(df_tte))
      strata_vars <- "strata_data"
    }
    df_tte[strata_vars] <- strata_data
  }

  l_df <- split(df_tte, arm)

  if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
    # Hazard ratio and CI.
    result <- s_coxph_pairwise(
      df = l_df[[2]],
      .ref_group = l_df[[1]],
      .in_ref_col = FALSE,
      .var = "tte",
      is_event = "is_event",
      strat = strata_vars,
      control = control
    )

    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = unname(as.numeric(result$n_tot)),
      n_tot_events = unname(as.numeric(result$n_tot_events)),
      hr = unname(as.numeric(result$hr)),
      lcl = unname(result$hr_ci[1]),
      ucl = unname(result$hr_ci[2]),
      conf_level = control[["conf_level"]],
      pval = as.numeric(result$pvalue),
      pval_label = obj_label(result$pvalue),
      stringsAsFactors = FALSE
    )
  } else if (
    (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) ||
      (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
  ) {
    df_tte_complete <- df_tte[stats::complete.cases(df_tte), ]
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = nrow(df_tte_complete),
      n_tot_events = sum(df_tte_complete$is_event),
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = control[["conf_level"]],
      pval = NA,
      pval_label = NA,
      stringsAsFactors = FALSE
    )
  } else {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = 0L,
      n_tot_events = 0L,
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = control[["conf_level"]],
      pval = NA,
      pval_label = NA,
      stringsAsFactors = FALSE
    )
  }

  df
}

#' @describeIn h_survival_duration_subgroups summarizes estimates of the treatment hazard ratio
#'   across subgroups in a data frame. `variables` corresponds to the names of variables found in
#'   `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and
#'   optionally `subgroups` and `strat`. `groups_lists` optionally specifies
#'   groupings for `subgroups` variables.
#'
#' @return
#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`,
#'   `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Extract hazard ratio for multiple groups.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Define groupings of BMRKR2 levels.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' # Extract hazard ratio for multiple groups with stratification factors.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2"),
#'     strat = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#'
#' @export
h_coxph_subgroups_df <- function(variables,
                                 data,
                                 groups_lists = list(),
                                 control = control_coxph(),
                                 label_all = "All Patients") {
  checkmate::assert_character(variables$tte)
  checkmate::assert_character(variables$is_event)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_character(variables$strat, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_coxph_df(
    tte = data[[variables$tte]],
    is_event = data[[variables$is_event]],
    arm = data[[variables$arm]],
    strata_data = if (is.null(variables$strat)) NULL else data[variables$strat],
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      result <- h_coxph_df(
        tte = grp$df[[variables$tte]],
        is_event = grp$df[[variables$is_event]],
        arm = grp$df[[variables$arm]],
        strata_data = if (is.null(variables$strat)) NULL else grp$df[variables$strat],
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })

    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Split Dataframe by Subgroups
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Split a dataframe into a non-nested list of subsets.
#'
#' @inheritParams survival_duration_subgroups
#' @param data (`data.frame`)\cr dataset to split.
#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets.
#'   Unused levels not present in `data` are dropped. Note that the order in this vector
#'   determines the order in the downstream table.
#'
#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`).
#'
#' @details Main functionality is to prepare data for use in forest plot layouts.
#'
#' @examples
#' df <- data.frame(
#'   x = c(1:5),
#'   y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
#'   z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
#' )
#' formatters::var_labels(df) <- paste("label for", names(df))
#'
#' h_split_by_subgroups(
#'   data = df,
#'   subgroups = c("y", "z")
#' )
#'
#' h_split_by_subgroups(
#'   data = df,
#'   subgroups = c("y", "z"),
#'   groups_lists = list(
#'     y = list("AB" = c("A", "B"), "C" = "C")
#'   )
#' )
#'
#' @export
h_split_by_subgroups <- function(data,
                                 subgroups,
                                 groups_lists = list()) {
  checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(groups_lists, names = "named")
  checkmate::assert_subset(names(groups_lists), subgroups)
  assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups)))

  data_labels <- unname(formatters::var_labels(data))
  df_subgroups <- data[, subgroups, drop = FALSE]
  subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE)

  l_labels <- Map(function(grp_i, name_i) {
    existing_levels <- levels(droplevels(grp_i))
    grp_levels <- if (name_i %in% names(groups_lists)) {
      # For this variable groupings are defined. We check which groups are contained in the data.
      group_list_i <- groups_lists[[name_i]]
      group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE)
      names(which(group_has_levels))
    } else {
      existing_levels
    }
    df_labels <- data.frame(
      subgroup = grp_levels,
      var = name_i,
      var_label = unname(subgroup_labels[name_i]),
      stringsAsFactors = FALSE # Rationale is that subgroups may not be unique.
    )
  }, df_subgroups, names(df_subgroups))

  # Create a dataframe with one row per subgroup.
  df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE))
  row_label <- paste0(df_labels$var, ".", df_labels$subgroup)
  row_split_var <- factor(row_label, levels = row_label)

  # Create a list of data subsets.
  lapply(split(df_labels, row_split_var), function(row_i) {
    which_row <- if (row_i$var %in% names(groups_lists)) {
      data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]]
    } else {
      data[[row_i$var]] == row_i$subgroup
    }
    df <- data[which_row, ]
    rownames(df) <- NULL
    formatters::var_labels(df) <- data_labels

    list(
      df = df,
      df_labels = data.frame(row_i, row.names = NULL)
    )
  })
}

#' Occurrence Table Pruning
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Family of constructor and condition functions to flexibly prune occurrence tables.
#' The condition functions always return whether the row result is higher than the threshold.
#' Since they are of class [CombinationFunction()] they can be logically combined with other condition
#' functions.
#'
#' @note Since most table specifications are worded positively, we name our constructor and condition
#'   functions positively, too. However, note that the result of [keep_rows()] says what
#'   should be pruned, to conform with the [rtables::prune_table()] interface.
#'
#' @examples
#' \donttest{
#' tab <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("RACE") %>%
#'   split_rows_by("STRATA1") %>%
#'   summarize_row_groups() %>%
#'   summarize_vars("COUNTRY", .stats = "count_fraction") %>%
#'   build_table(DM)
#' }
#'
#' @name prune_occurrences
NULL

#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#'   a row condition function. This removes all analysis rows (`TableRow`) that should be
#'   pruned, i.e., don't fulfill the row condition. It removes the sub-tree if there are no
#'   children left.
#'
#' @param row_condition (`CombinationFunction`)\cr condition function which works on individual
#'   analysis rows and flags whether these should be kept in the pruned table.
#'
#' @return
#' * `keep_rows()` returns a pruning function that can be used with [rtables::prune_table()]
#'   to prune an `rtables` table.
#'
#' @examples
#' \donttest{
#' # `keep_rows`
#' is_non_empty <- !CombinationFunction(all_zero_or_na)
#' prune_table(tab, keep_rows(is_non_empty))
#' }
#'
#' @export
keep_rows <- function(row_condition) {
  checkmate::assert_function(row_condition)
  function(table_tree) {
    if (inherits(table_tree, "TableRow")) {
      return(!row_condition(table_tree))
    }
    children <- tree_children(table_tree)
    identical(length(children), 0L)
  }
}

#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#'   a condition for the (first) content row in leaf tables. This removes all leaf tables where
#'   the first content row does not fulfill the condition. It does not check individual rows.
#'   It then proceeds recursively by removing the sub tree if there are no children left.
#'
#' @param content_row_condition (`CombinationFunction`)\cr condition function which works on individual
#'   first content rows of leaf tables and flags whether these leaf tables should be kept in the pruned table.
#'
#' @return
#' * `keep_content_rows()` returns a pruning function that checks the condition on the first content
#'   row of leaf tables in the table.
#'
#' @examples
#' # `keep_content_rows`
#' \donttest{
#' more_than_twenty <- has_count_in_cols(atleast = 20L, col_names = names(tab))
#' prune_table(tab, keep_content_rows(more_than_twenty))
#' }
#'
#' @export
keep_content_rows <- function(content_row_condition) {
  checkmate::assert_function(content_row_condition)
  function(table_tree) {
    if (is_leaf_table(table_tree)) {
      content_row <- h_content_first_row(table_tree)
      return(!content_row_condition(content_row))
    }
    if (inherits(table_tree, "DataRow")) {
      return(FALSE)
    }
    children <- tree_children(table_tree)
    identical(length(children), 0L)
  }
}

#' @describeIn prune_occurrences Constructor for creating condition functions on total counts in the specified columns.
#'
#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
#' @param ... arguments for row or column access, see [`rtables_access`]: either `col_names` (`character`) including
#'   the names of the columns which should be used, or alternatively `col_indices` (`integer`) giving the indices
#'   directly instead.
#'
#' @return
#' * `has_count_in_cols()` returns a condition function that sums the counts in the specified column.
#'
#' @examples
#' \donttest{
#' more_than_one <- has_count_in_cols(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one))
#' }
#'
#' @export
has_count_in_cols <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    total_count <- sum(row_counts)
    total_count >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on any of the counts in
#'   the specified columns satisfying a threshold.
#'
#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
#'
#' @return
#' * `has_count_in_any_col()` returns a condition function that compares the counts in the
#'   specified columns with the threshold.
#'
#' @examples
#' \donttest{
#' # `has_count_in_any_col`
#' any_more_than_one <- has_count_in_any_col(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(any_more_than_one))
#' }
#'
#' @export
has_count_in_any_col <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    any(row_counts >= atleast)
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on total fraction in
#'   the specified columns.
#'
#' @return
#' * `has_fraction_in_cols()` returns a condition function that sums the counts in the
#'   specified column, and computes the fraction by dividing by the total column counts.
#'
#' @examples
#' \donttest{
#' # `has_fraction_in_cols`
#' more_than_five_percent <- has_fraction_in_cols(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_cols <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    total_count <- sum(row_counts)
    col_counts <- h_col_counts(table_row, ...)
    total_n <- sum(col_counts)
    total_percent <- total_count / total_n
    total_percent >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on any fraction in
#'   the specified columns.
#'
#' @return
#' * `has_fraction_in_any_col()` returns a condition function that looks at the fractions
#'  in the specified columns and checks whether any of them fulfill the threshold.
#'
#' @examples
#' \donttest{
#' # `has_fraction_in_any_col`
#' any_atleast_five_percent <- has_fraction_in_any_col(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_any_col <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    row_fractions <- h_row_fractions(table_row, ...)
    any(row_fractions >= atleast)
  })
}

#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#'   between the fractions reported in each specified column.
#'
#' @return
#' * `has_fractions_difference()` returns a condition function that extracts the fractions of each
#'   specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' \donttest{
#' # `has_fractions_difference`
#' more_than_five_percent_diff <- has_fractions_difference(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent_diff))
#' }
#'
#' @export
has_fractions_difference <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    fractions <- h_row_fractions(table_row, ...)
    difference <- diff(range(fractions))
    difference >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#'   between the counts reported in each specified column.
#'
#' @return
#' * `has_counts_difference()` returns a condition function that extracts the counts of each
#'   specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' \donttest{
#' more_than_one_diff <- has_counts_difference(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one_diff))
#' }
#'
#' @export
has_counts_difference <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    counts <- h_row_counts(table_row, ...)
    difference <- diff(range(counts))
    difference >= atleast
  })
}

#' Helper Functions for Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()].
#'
#' @inheritParams argument_convention
#' @inheritParams h_coxreg_univar_extract
#' @inheritParams cox_regression_inter
#' @inheritParams control_coxreg
#'
#' @seealso [cox_regression]
#'
#' @name h_cox_regression
NULL

#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used
#'   internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models.
#'
#' @return
#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]).
#'
#' @examples
#' # `h_coxreg_univar_formulas`
#'
#' ## Simple formulas.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
#'   )
#' )
#'
#' ## Addition of an optional strata.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
#'     strata = "SITE"
#'   )
#' )
#'
#' ## Inclusion of the interaction term.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
#'     strata = "SITE"
#'   ),
#'   interaction = TRUE
#' )
#'
#' ## Only covariates fitted in separate models.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", covariates = c("X", "y")
#'   )
#' )
#'
#' @export
h_coxreg_univar_formulas <- function(variables,
                                     interaction = FALSE) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  checkmate::assert_flag(interaction)

  if (!has_arm || is.null(variables$covariates)) {
    checkmate::assert_false(interaction)
  }

  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$covariates)) {
    forms <- paste0(
      "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
      ifelse(has_arm, variables$arm, "1"),
      ifelse(interaction, " * ", " + "),
      variables$covariates,
      ifelse(
        !is.null(variables$strata),
        paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"),
        ""
      )
    )
  } else {
    forms <- NULL
  }
  nams <- variables$covariates
  if (has_arm) {
    ref <- paste0(
      "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
      variables$arm,
      ifelse(
        !is.null(variables$strata),
        paste0(
          " + strata(", paste0(variables$strata, collapse = ", "), ")"
        ),
        ""
      )
    )
    forms <- c(ref, forms)
    nams <- c("ref", nams)
  }
  stats::setNames(forms, nams)
}

#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas
#'   string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox
#'   regression models. Interactions will not be included in multivariate Cox regression model.
#'
#' @return
#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]).
#'
#' @examples
#' # `h_coxreg_multivar_formula`
#'
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
#'   )
#' )
#'
#' # Addition of an optional strata.
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
#'     strata = "SITE"
#'   )
#' )
#'
#' # Example without treatment arm.
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
#'     strata = "SITE"
#'   )
#' )
#'
#' @export
h_coxreg_multivar_formula <- function(variables) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  y <- paste0(
    "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
    ifelse(has_arm, variables$arm, "1")
  )
  if (length(variables$covariates) > 0) {
    y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ")
  }
  if (!is.null(variables$strata)) {
    y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")")
  }
  y
}

#' @describeIn h_cox_regression Utility function to help tabulate the result of
#'   a univariate Cox regression model.
#'
#' @param effect (`string`)\cr the treatment variable.
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
#'
#' @return
#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`,
#'   `n`, `hr`, `lcl`, `ucl`, and `pval`.
#'
#' @examples
#' library(survival)
#'
#' dta_simple <- data.frame(
#'   time = c(5, 5, 10, 10, 5, 5, 10, 10),
#'   status = c(0, 0, 1, 0, 0, 1, 1, 1),
#'   armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
#'   var1 = c(45, 55, 65, 75, 55, 65, 85, 75),
#'   var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
#' )
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
#' result <- h_coxreg_univar_extract(
#'   effect = "armcd", covar = "armcd", mod = mod, data = dta_simple
#' )
#' result
#'
#' @export
h_coxreg_univar_extract <- function(effect,
                                    covar,
                                    data,
                                    mod,
                                    control = control_coxreg()) {
  checkmate::assert_string(covar)
  checkmate::assert_string(effect)
  checkmate::assert_class(mod, "coxph")
  test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]

  mod_aov <- muffled_car_anova(mod, test_statistic)
  msum <- summary(mod, conf.int = control$conf_level)
  sum_cox <- broom::tidy(msum)

  # Combine results together.
  effect_aov <- mod_aov[effect, , drop = TRUE]
  pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]]
  sum_main <- sum_cox[grepl(effect, sum_cox$level), ]

  term_label <- if (effect == covar) {
    paste0(
      levels(data[[covar]])[2],
      " vs control (",
      levels(data[[covar]])[1],
      ")"
    )
  } else {
    unname(labels_or_names(data[covar]))
  }
  data.frame(
    effect = ifelse(covar == effect, "Treatment:", "Covariate:"),
    term = covar,
    term_label = term_label,
    level = levels(data[[effect]])[2],
    n = mod[["n"]],
    hr = unname(sum_main["exp(coef)"]),
    lcl = unname(sum_main[grep("lower", names(sum_main))]),
    ucl = unname(sum_main[grep("upper", names(sum_main))]),
    pval = pval,
    stringsAsFactors = FALSE
  )
}

#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help
#'   tabulate the result of a multivariate Cox regression model for a treatment/covariate variable.
#'
#' @return
#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`,
#'   `n`, `term`, and `term_label`.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
#' result <- h_coxreg_multivar_extract(
#'   var = "var1", mod = mod, data = dta_simple
#' )
#' result
#'
#' @export
h_coxreg_multivar_extract <- function(var,
                                      data,
                                      mod,
                                      control = control_coxreg()) {
  test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
  mod_aov <- muffled_car_anova(mod, test_statistic)

  msum <- summary(mod, conf.int = control$conf_level)
  sum_anova <- broom::tidy(mod_aov)
  sum_cox <- broom::tidy(msum)

  ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")]
  names(ret_anova)[2] <- "pval"
  if (is.factor(data[[var]])) {
    ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
  } else {
    ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
  }
  names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl")
  varlab <- unname(labels_or_names(data[var]))
  ret_cox$term <- varlab

  if (is.numeric(data[[var]])) {
    ret <- ret_cox
    ret$term_label <- ret$term
  } else if (length(levels(data[[var]])) <= 2) {
    ret_anova$pval <- NA
    ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
    ret_cox$level <- gsub(var, "", ret_cox$level)
    ret_cox$term_label <- ret_cox$level
    ret <- dplyr::bind_rows(ret_anova, ret_cox)
  } else {
    ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
    ret_cox$level <- gsub(var, "", ret_cox$level)
    ret_cox$term_label <- ret_cox$level
    ret <- dplyr::bind_rows(ret_anova, ret_cox)
  }

  as.data.frame(ret)
}

#' Helper Functions for Subgroup Treatment Effect Pattern (STEP) Calculations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that are used internally for the STEP calculations.
#'
#' @inheritParams argument_convention
#'
#' @name h_step
#' @include control_step.R
NULL

#' @describeIn h_step creates the windows for STEP, based on the control settings
#'   provided.
#'
#' @param x (`numeric`)\cr biomarker value(s) to use (without `NA`).
#' @param control (named `list`)\cr output from `control_step()`.
#'
#' @return
#' * `h_step_window()` returns a list containing the window-selection matrix `sel`
#'   and the interval information matrix `interval`.
#'
#' @export
h_step_window <- function(x,
                          control = control_step()) {
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  sel <- matrix(FALSE, length(x), control$num_points)
  out <- matrix(0, control$num_points, 3)
  colnames(out) <- paste("Interval", c("Center", "Lower", "Upper"))
  if (control$use_percentile) {
    # Create windows according to percentile cutoffs.
    out <- cbind(out, out)
    colnames(out)[1:3] <- paste("Percentile", c("Center", "Lower", "Upper"))
    xs <- seq(0, 1, length = control$num_points + 2)[-1]
    for (i in seq_len(control$num_points)) {
      out[i, 2:3] <- c(
        max(xs[i] - control$bandwidth, 0),
        min(xs[i] + control$bandwidth, 1)
      )
      out[i, 5:6] <- stats::quantile(x, out[i, 2:3])
      sel[, i] <- x >= out[i, 5] & x <= out[i, 6]
    }
    # Center is the middle point of the percentile window.
    out[, 1] <- xs[-control$num_points - 1]
    out[, 4] <- stats::quantile(x, out[, 1])
  } else {
    # Create windows according to cutoffs.
    m <- c(min(x), max(x))
    xs <- seq(m[1], m[2], length = control$num_points + 2)[-1]
    for (i in seq_len(control$num_points)) {
      out[i, 2:3] <- c(
        max(xs[i] - control$bandwidth, m[1]),
        min(xs[i] + control$bandwidth, m[2])
      )
      sel[, i] <- x >= out[i, 2] & x <= out[i, 3]
    }
    # Center is the same as the point for predicting.
    out[, 1] <- xs[-control$num_points - 1]
  }
  list(sel = sel, interval = out)
}

#' @describeIn h_step calculates the estimated treatment effect estimate
#'   on the linear predictor scale and corresponding standard error from a STEP `model` fitted
#'   on `data` given `variables` specification, for a single biomarker value `x`.
#'   This works for both `coxph` and `glm` models, i.e. for calculating log hazard ratio or log odds
#'   ratio estimates.
#'
#' @param model the regression model object.
#'
#' @return
#' * `h_step_trt_effect()` returns a vector with elements `est` and `se`.
#'
#' @export
h_step_trt_effect <- function(data,
                              model,
                              variables,
                              x) {
  checkmate::assert_multi_class(model, c("coxph", "glm"))
  checkmate::assert_number(x)
  assert_df_with_variables(data, variables)
  checkmate::assert_factor(data[[variables$arm]], n.levels = 2)

  newdata <- data[c(1, 1), ]
  newdata[, variables$biomarker] <- x
  newdata[, variables$arm] <- levels(data[[variables$arm]])
  model_terms <- stats::delete.response(stats::terms(model))
  model_frame <- stats::model.frame(model_terms, data = newdata, xlev = model$xlevels)
  mat <- stats::model.matrix(model_terms, data = model_frame, contrasts.arg = model$contrasts)
  coefs <- stats::coef(model)
  # Note: It is important to use the coef subset from matrix, otherwise intercept and
  # strata are included for coxph() models.
  mat <- mat[, names(coefs)]
  mat_diff <- diff(mat)
  est <- mat_diff %*% coefs
  var <- mat_diff %*% stats::vcov(model) %*% t(mat_diff)
  se <- sqrt(var)
  c(
    est = est,
    se = se
  )
}

#' @describeIn h_step builds the model formula used in survival STEP calculations.
#'
#' @return
#' * `h_step_survival_formula()` returns a model formula.
#'
#' @export
h_step_survival_formula <- function(variables,
                                    control = control_step()) {
  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_list_of_variables(variables[c("arm", "biomarker", "event", "time")])
  form <- paste0("Surv(", variables$time, ", ", variables$event, ") ~ ", variables$arm)
  if (control$degree > 0) {
    form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
  }
  if (!is.null(variables$covariates)) {
    form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
  }
  if (!is.null(variables$strata)) {
    form <- paste0(form, " + strata(", paste0(variables$strata, collapse = ", "), ")")
  }
  stats::as.formula(form)
}

#' @describeIn h_step estimates the model with `formula` built based on
#'   `variables` in `data` for a given `subset` and `control` parameters for the
#'   Cox regression.
#'
#' @param formula (`formula`)\cr the regression model formula.
#' @param subset (`logical`)\cr subset vector.
#'
#' @return
#' * `h_step_survival_est()` returns a matrix of number of observations `n`,
#'   `events`, log hazard ratio estimates `loghr`, standard error `se`,
#'   and Wald confidence interval bounds `ci_lower` and `ci_upper`. One row is
#'   included for each biomarker value in `x`.
#'
#' @export
h_step_survival_est <- function(formula,
                                data,
                                variables,
                                x,
                                subset = rep(TRUE, nrow(data)),
                                control = control_coxph()) {
  checkmate::assert_formula(formula)
  assert_df_with_variables(data, variables)
  checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  # Note: `subset` in `coxph` needs to be an expression referring to `data` variables.
  data$.subset <- subset
  coxph_warnings <- NULL
  tryCatch(
    withCallingHandlers(
      expr = {
        fit <- survival::coxph(
          formula = formula,
          data = data,
          subset = .subset,
          ties = control$ties
        )
      },
      warning = function(w) {
        coxph_warnings <<- c(coxph_warnings, w)
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )
  if (!is.null(coxph_warnings)) {
    warning(paste(
      "Fit warnings occurred, please consider using a simpler model, or",
      "larger `bandwidth`, less `num_points` in `control_step()` settings"
    ))
  }
  # Produce a matrix with one row per `x` and columns `est` and `se`.
  estimates <- t(vapply(
    X = x,
    FUN = h_step_trt_effect,
    FUN.VALUE = c(1, 2),
    data = data,
    model = fit,
    variables = variables
  ))
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  cbind(
    n = fit$n,
    events = fit$nevent,
    loghr = estimates[, "est"],
    se = estimates[, "se"],
    ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
    ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
  )
}

#' @describeIn h_step builds the model formula used in response STEP calculations.
#'
#' @return
#' * `h_step_rsp_formula()` returns a model formula.
#'
#' @export
h_step_rsp_formula <- function(variables,
                               control = c(control_step(), control_logistic())) {
  checkmate::assert_character(variables$covariates, null.ok = TRUE)
  assert_list_of_variables(variables[c("arm", "biomarker", "response")])
  response_definition <- sub(
    pattern = "response",
    replacement = variables$response,
    x = control$response_definition,
    fixed = TRUE
  )
  form <- paste0(response_definition, " ~ ", variables$arm)
  if (control$degree > 0) {
    form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
  }
  if (!is.null(variables$covariates)) {
    form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
  }
  if (!is.null(variables$strata)) {
    strata_arg <- if (length(variables$strata) > 1) {
      paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
    } else {
      variables$strata
    }
    form <- paste0(form, "+ strata(", strata_arg, ")")
  }
  stats::as.formula(form)
}

#' @describeIn h_step estimates the model with `formula` built based on
#'   `variables` in `data` for a given `subset` and `control` parameters for the
#'   logistic regression.
#'
#' @param formula (`formula`)\cr the regression model formula.
#' @param subset (`logical`)\cr subset vector.
#'
#' @return
#' * `h_step_rsp_est()` returns a matrix of number of observations `n`, log odds
#'   ratio estimates `logor`, standard error `se`, and Wald confidence interval bounds
#'   `ci_lower` and `ci_upper`. One row is included for each biomarker value in `x`.
#'
#' @export
h_step_rsp_est <- function(formula,
                           data,
                           variables,
                           x,
                           subset = rep(TRUE, nrow(data)),
                           control = control_logistic()) {
  checkmate::assert_formula(formula)
  assert_df_with_variables(data, variables)
  checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")
  # Note: `subset` in `glm` needs to be an expression referring to `data` variables.
  data$.subset <- subset
  fit_warnings <- NULL
  tryCatch(
    withCallingHandlers(
      expr = {
        fit <- if (is.null(variables$strata)) {
          stats::glm(
            formula = formula,
            data = data,
            subset = .subset,
            family = stats::binomial("logit")
          )
        } else {
          # clogit needs coxph and strata imported
          survival::clogit(
            formula = formula,
            data = data,
            subset = .subset
          )
        }
      },
      warning = function(w) {
        fit_warnings <<- c(fit_warnings, w)
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )
  if (!is.null(fit_warnings)) {
    warning(paste(
      "Fit warnings occurred, please consider using a simpler model, or",
      "larger `bandwidth`, less `num_points` in `control_step()` settings"
    ))
  }
  # Produce a matrix with one row per `x` and columns `est` and `se`.
  estimates <- t(vapply(
    X = x,
    FUN = h_step_trt_effect,
    FUN.VALUE = c(1, 2),
    data = data,
    model = fit,
    variables = variables
  ))
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  cbind(
    n = length(fit$y),
    logor = estimates[, "est"],
    se = estimates[, "se"],
    ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
    ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
  )
}

#' Helper Functions for Tabulating Biomarker Effects on Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions which are documented here separately to not confuse the user
#' when reading about the user-facing functions.
#'
#' @inheritParams response_biomarkers_subgroups
#' @inheritParams extract_rsp_biomarkers
#' @inheritParams argument_convention
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' @name h_response_biomarkers_subgroups
NULL

#' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list
#'   to the "logistic regression" variable list. The reason is that currently there is an
#'   inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`.
#'
#' @param biomarker (`string`)\cr the name of the biomarker variable.
#'
#' @return
#' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`.
#'
#' @examples
#' # This is how the variable list is converted internally.
#' h_rsp_to_logistic_variables(
#'   variables = list(
#'     rsp = "RSP",
#'     covariates = c("A", "B"),
#'     strat = "D"
#'   ),
#'   biomarker = "AGE"
#' )
#'
#' @export
h_rsp_to_logistic_variables <- function(variables, biomarker) {
  checkmate::assert_list(variables)
  checkmate::assert_string(variables$rsp)
  checkmate::assert_string(biomarker)
  list(
    response = variables$rsp,
    arm = biomarker,
    covariates = variables$covariates,
    strata = variables$strat
  )
}

#' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and
#'   overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple
#'   biomarkers in a given single data set.
#'   `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
#'   `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates`
#'   and `strat`.
#'
#' @return
#' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
#'
#' @examples
#' # For a single population, estimate separately the effects
#' # of two biomarkers.
#' df <- h_logistic_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX"
#'   ),
#'   data = adrs_f
#' )
#' df
#'
#' # If the data set is empty, still the corresponding rows with missings are returned.
#' h_coxreg_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strat = "STRATA1"
#'   ),
#'   data = adrs_f[NULL, ]
#' )
#'
#' @export
h_logistic_mult_cont_df <- function(variables,
                                    data,
                                    control = control_logistic()) {
  assert_df_with_variables(data, variables)

  checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  conf_level <- control[["conf_level"]]
  pval_label <- "p-value (Wald)"

  # If there is any data, run model, otherwise return empty results.
  if (nrow(data) > 0) {
    bm_cols <- match(variables$biomarkers, names(data))
    l_result <- lapply(variables$biomarkers, function(bm) {
      model_fit <- fit_logistic(
        variables = h_rsp_to_logistic_variables(variables, bm),
        data = data,
        response_definition = control$response_definition
      )
      result <- h_logistic_simple_terms(
        x = bm,
        fit_glm = model_fit,
        conf_level = control$conf_level
      )
      resp_vector <- if (inherits(model_fit, "glm")) {
        model_fit$model[[variables$rsp]]
      } else {
        as.logical(as.matrix(model_fit$y)[, "status"])
      }
      data.frame(
        # Dummy column needed downstream to create a nested header.
        biomarker = bm,
        biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
        n_tot = length(resp_vector),
        n_rsp = sum(resp_vector),
        prop = mean(resp_vector),
        or = as.numeric(result[1L, "odds_ratio"]),
        lcl = as.numeric(result[1L, "lcl"]),
        ucl = as.numeric(result[1L, "ucl"]),
        conf_level = conf_level,
        pval = as.numeric(result[1L, "pvalue"]),
        pval_label = pval_label,
        stringsAsFactors = FALSE
      )
    })
    do.call(rbind, args = c(l_result, make.row.names = FALSE))
  } else {
    data.frame(
      biomarker = variables$biomarkers,
      biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
      n_tot = 0L,
      n_rsp = 0L,
      prop = NA,
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      pval = NA,
      pval_label = pval_label,
      row.names = seq_along(variables$biomarkers),
      stringsAsFactors = FALSE
    )
  }
}

#' @describeIn h_response_biomarkers_subgroups prepares a single sub-table given a `df_sub` containing
#'   the results for a single biomarker.
#'
#' @param df (`data.frame`)\cr results for a single biomarker, as part of what is
#'   returned by [extract_rsp_biomarkers()] (it needs a couple of columns which are
#'   added by that high-level function relative to what is returned by [h_logistic_mult_cont_df()],
#'   see the example).
#'
#' @return
#' * `h_tab_rsp_one_biomarker()` returns an `rtables` table object with the given statistics arranged in columns.
#'
#' @examples
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
#' df1 <- df[1, ]
#' df1$subgroup <- "All patients"
#' df1$row_type <- "content"
#' df1$var <- "ALL"
#' df1$var_label <- "All patients"
#'
#' h_tab_rsp_one_biomarker(
#'   df1,
#'   vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval")
#' )
#'
#' @export
h_tab_rsp_one_biomarker <- function(df,
                                    vars,
                                    .indent_mods = 0L) {
  afuns <- a_response_subgroups()[vars]
  colvars <- d_rsp_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1]
  )
  h_tab_one_biomarker(
    df = df,
    afuns = afuns,
    colvars = colvars,
    .indent_mods = .indent_mods
  )
}

#' Helper Function for Deriving Analysis Datasets for `LBT13` and `LBT14`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function that merges `ADSL` and `ADLB` datasets so that missing lab test records are inserted in the
#' output dataset.
#'
#' @param adsl (`data.frame`)\cr `ADSL` dataframe.
#' @param adlb (`data.frame`)\cr `ADLB` dataframe.
#' @param worst_flag (named `vector`)\cr Worst post-baseline lab flag variable.
#' @param by_visit (`logical`)\cr defaults to `FALSE` to generate worst grade per patient.
#'   If worst grade per patient per visit is specified for `worst_flag`, then
#'   `by_visit` should be `TRUE` to generate worst grade patient per visit.
#' @param no_fillin_visits (named `character`)\cr Visits that are not considered for post-baseline worst toxicity
#'   grade. Defaults to `c("SCREENING", "BASELINE")`.
#'
#' @return `df` containing variables shared between `adlb` and `adsl` along with variables `PARAM`, `PARAMCD`,
#'   `ATOXGR`, and `BTOXGR` relevant for analysis. Optionally, `AVISIT` are `AVISITN` are included when
#'   `by_visit = TRUE` and `no_fillin_visits = c("SCREENING", "BASELINE")`.
#'
#' @details In the result data missing records will be created for the following situations:
#'   * Patients who are present in `adsl` but have no lab data in `adlb` (both baseline and post-baseline).
#'   * Patients who do not have any post-baseline lab values.
#'   * Patients without any post-baseline values flagged as the worst.
#'
#' @examples
#' # `h_adsl_adlb_merge_using_worst_flag`
#' adlb_out <- h_adsl_adlb_merge_using_worst_flag(
#'   tern_ex_adsl,
#'   tern_ex_adlb,
#'   worst_flag = c("WGRHIFL" = "Y")
#' )
#'
#' # `h_adsl_adlb_merge_using_worst_flag` by visit example
#' adlb_out_by_visit <- h_adsl_adlb_merge_using_worst_flag(
#'   tern_ex_adsl,
#'   tern_ex_adlb,
#'   worst_flag = c("WGRLOVFL" = "Y"),
#'   by_visit = TRUE
#' )
#'
#' @export
h_adsl_adlb_merge_using_worst_flag <- function(adsl, # nolint
                                               adlb,
                                               worst_flag = c("WGRHIFL" = "Y"),
                                               by_visit = FALSE,
                                               no_fillin_visits = c("SCREENING", "BASELINE")) {
  col_names <- names(worst_flag)
  filter_values <- worst_flag

  temp <- Map(
    function(x, y) which(adlb[[x]] == y),
    col_names,
    filter_values
  )

  position_satisfy_filters <- Reduce(intersect, temp)

  adsl_adlb_common_columns <- intersect(colnames(adsl), colnames(adlb))
  columns_from_adlb <- c("USUBJID", "PARAM", "PARAMCD", "AVISIT", "AVISITN", "ATOXGR", "BTOXGR")

  adlb_f <- adlb[position_satisfy_filters, ] %>%
    dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits)
  adlb_f <- adlb_f[, columns_from_adlb]

  avisits_grid <- adlb %>%
    dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) %>%
    dplyr::pull(.data[["AVISIT"]]) %>%
    unique()

  if (by_visit) {
    adsl_lb <- expand.grid(
      USUBJID = unique(adsl$USUBJID),
      AVISIT = avisits_grid,
      PARAMCD = unique(adlb$PARAMCD)
    )

    adsl_lb <- adsl_lb %>%
      dplyr::left_join(unique(adlb[c("AVISIT", "AVISITN")]), by = "AVISIT") %>%
      dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")

    adsl1 <- adsl[, adsl_adlb_common_columns]
    adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")

    by_variables_from_adlb <- c("USUBJID", "AVISIT", "AVISITN", "PARAMCD", "PARAM")

    adlb_btoxgr <- adlb %>%
      dplyr::select(c("USUBJID", "PARAMCD", "BTOXGR")) %>%
      unique() %>%
      dplyr::rename("BTOXGR_MAP" = "BTOXGR")

    adlb_out <- merge(
      adlb_f,
      adsl_lb,
      by = by_variables_from_adlb,
      all = TRUE,
      sort = FALSE
    )
    adlb_out <- adlb_out %>%
      dplyr::left_join(adlb_btoxgr, by = c("USUBJID", "PARAMCD")) %>%
      dplyr::mutate(BTOXGR = .data$BTOXGR_MAP) %>%
      dplyr::select(-"BTOXGR_MAP")

    adlb_var_labels <- c(
      formatters::var_labels(adlb[by_variables_from_adlb]),
      formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
      formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
    )
  } else {
    adsl_lb <- expand.grid(
      USUBJID = unique(adsl$USUBJID),
      PARAMCD = unique(adlb$PARAMCD)
    )

    adsl_lb <- adsl_lb %>% dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")

    adsl1 <- adsl[, adsl_adlb_common_columns]
    adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")

    by_variables_from_adlb <- c("USUBJID", "PARAMCD", "PARAM")

    adlb_out <- merge(
      adlb_f,
      adsl_lb,
      by = by_variables_from_adlb,
      all = TRUE,
      sort = FALSE
    )

    adlb_var_labels <- c(
      formatters::var_labels(adlb[by_variables_from_adlb]),
      formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
      formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
    )
  }

  adlb_out$ATOXGR <- as.factor(adlb_out$ATOXGR)
  adlb_out$BTOXGR <- as.factor(adlb_out$BTOXGR)

  adlb_out <- df_explicit_na(adlb_out)
  formatters::var_labels(adlb_out) <- adlb_var_labels

  adlb_out
}

#' Patient Counts with the Most Extreme Post-baseline Toxicity Grade per Direction of Abnormality
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the toxicity grade (`factor`), and additional
#' analysis variables are `id` (`character` or `factor`), `param` (`factor`) and `grade_dir` (`factor`).
#' The pre-processing steps are crucial when using this function.
#' For a certain direction (e.g. high or low) this function counts
#' patients in the denominator as number of patients with at least one valid measurement during treatment,
#' and patients in the numerator as follows:
#'   * `1` to `4`: Numerator is number of patients with worst grades 1-4 respectively;
#'   * `Any`: Numerator is number of patients with at least one abnormality, which means grade is different from 0.
#'
#' @inheritParams argument_convention
#'
#' @details The pre-processing steps are crucial when using this function. From the standard lab grade variable
#'   `ATOXGR`, derive the following two variables:
#'   * A grade direction variable (e.g. `GRADE_DIR`) is required in order to obtain
#'     the correct denominators when building the layout as it is used to define row splitting.
#'   * A toxicity grade variable (e.g. `GRADE_ANL`) where all negative values from
#'     `ATOXGR` are replaced by their absolute values.
#'
#' @note Prior to tabulation, `df` must be filtered to include only post-baseline records with worst grade flags.
#'
#' @name abnormal_by_worst_grade
NULL

#' @describeIn abnormal_by_worst_grade Statistics function which counts patients by worst grade.
#'
#' @return
#' * `s_count_abnormal_by_worst_grade()` returns the single statistic `count_fraction` with grades 1 to 4 and
#'   "Any" results.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#' adlb <- tern_ex_adlb
#'
#' # Data is modified in order to have some parameters with grades only in one direction
#' # and simulate the real data.
#' adlb$ATOXGR[adlb$PARAMCD == "ALT" & adlb$ATOXGR %in% c("1", "2", "3", "4")] <- "-1"
#' adlb$ANRIND[adlb$PARAMCD == "ALT" & adlb$ANRIND == "HIGH"] <- "LOW"
#' adlb$WGRHIFL[adlb$PARAMCD == "ALT"] <- ""
#'
#' adlb$ATOXGR[adlb$PARAMCD == "IGA" & adlb$ATOXGR %in% c("-1", "-2", "-3", "-4")] <- "1"
#' adlb$ANRIND[adlb$PARAMCD == "IGA" & adlb$ANRIND == "LOW"] <- "HIGH"
#' adlb$WGRLOFL[adlb$PARAMCD == "IGA"] <- ""
#'
#' # Here starts the real pre-processing.
#' adlb_f <- adlb %>%
#'   filter(!AVISIT %in% c("SCREENING", "BASELINE")) %>%
#'   mutate(
#'     GRADE_DIR = factor(
#'       case_when(
#'         ATOXGR %in% c("-1", "-2", "-3", "-4") ~ "LOW",
#'         ATOXGR == "0" ~ "ZERO",
#'         ATOXGR %in% c("1", "2", "3", "4") ~ "HIGH"
#'       ),
#'       levels = c("LOW", "ZERO", "HIGH")
#'     ),
#'     GRADE_ANL = fct_relevel(
#'       fct_recode(ATOXGR, `1` = "-1", `2` = "-2", `3` = "-3", `4` = "-4"),
#'       c("0", "1", "2", "3", "4")
#'     )
#'   ) %>%
#'   filter(WGRLOFL == "Y" | WGRHIFL == "Y") %>%
#'   droplevels()
#'
#' adlb_f_alt <- adlb_f %>%
#'   filter(PARAMCD == "ALT") %>%
#'   droplevels()
#' full_parent_df <- list(adlb_f_alt, "not_needed")
#' cur_col_subset <- list(rep(TRUE, nrow(adlb_f_alt)), "not_needed")
#'
#' # This mimics a split structure on PARAM and GRADE_DIR for a total column
#' spl_context <- data.frame(
#'   split = c("PARAM", "GRADE_DIR"),
#'   full_parent_df = I(full_parent_df),
#'   cur_col_subset = I(cur_col_subset)
#' )
#'
#' @keywords internal
s_count_abnormal_by_worst_grade <- function(df, # nolint
                                            .var = "GRADE_ANL",
                                            .spl_context,
                                            variables = list(
                                              id = "USUBJID",
                                              param = "PARAM",
                                              grade_dir = "GRADE_DIR"
                                            )) {
  checkmate::assert_string(.var)
  assert_valid_factor(df[[.var]])
  assert_valid_factor(df[[variables$param]])
  assert_valid_factor(df[[variables$grade_dir]])
  assert_df_with_variables(df, c(a = .var, variables))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))

  # To verify that the `split_rows_by` are performed with correct variables.
  checkmate::assert_subset(c(variables[["param"]], variables[["grade_dir"]]), .spl_context$split)
  first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
  x_lvls <- c(setdiff(levels(df[[.var]]), "0"), "Any")
  result <- split(numeric(0), factor(x_lvls))

  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
  # Some subjects may have a record for high and low directions but
  # should be counted only once.
  denom <- length(unique(subj_cur_col))

  for (lvl in x_lvls) {
    if (lvl != "Any") {
      df_lvl <- df[df[[.var]] == lvl, ]
    } else {
      df_lvl <- df[df[[.var]] != 0, ]
    }
    num <- length(unique(df_lvl[["USUBJID"]]))
    fraction <- ifelse(denom == 0, 0, num / denom)
    result[[lvl]] <- formatters::with_label(c(count = num, fraction = fraction), lvl)
  }

  result <- list(count_fraction = result)
  result
}

#' @describeIn abnormal_by_worst_grade Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_worst_grade()`.
#'
#' @return
#' * `a_count_abnormal_by_worst_grade()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#'
#' @keywords internal
a_count_abnormal_by_worst_grade <- make_afun( # nolint
  s_count_abnormal_by_worst_grade,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn abnormal_by_worst_grade Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_worst_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_worst_grade()` to the table layout.
#'
#' @examples
#' # Map excludes records without abnormal grade since they should not be displayed
#' # in the table.
#' map <- unique(adlb_f[adlb_f$GRADE_DIR != "ZERO", c("PARAM", "GRADE_DIR", "GRADE_ANL")]) %>%
#'   lapply(as.character) %>%
#'   as.data.frame() %>%
#'   arrange(PARAM, desc(GRADE_DIR), GRADE_ANL)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAM") %>%
#'   split_rows_by("GRADE_DIR", split_fun = trim_levels_to_map(map)) %>%
#'   count_abnormal_by_worst_grade(
#'     var = "GRADE_ANL",
#'     variables = list(id = "USUBJID", param = "PARAM", grade_dir = "GRADE_DIR")
#'   ) %>%
#'   build_table(df = adlb_f)
#'
#' @export
count_abnormal_by_worst_grade <- function(lyt,
                                          var,
                                          ...,
                                          .stats = NULL,
                                          .formats = NULL,
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  afun <- make_afun(
    a_count_abnormal_by_worst_grade,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    extra_args = list(...),
    show_labels = "hidden"
  )
}

#' Patient Counts with Abnormal Range Values by Baseline Status
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the abnormal range result (`character` or `factor`), and additional
#' analysis variables are `id` (`character` or `factor`) and `baseline` (`character` or `factor`). For each
#' direction specified in `abnormal` (e.g. high or low) we condition on baseline range result and count
#' patients in the numerator and denominator as follows:
#'   * `Not <Abnormal>`
#'     * `denom`: the number of patients without abnormality at baseline (excluding those with missing baseline)
#'     * `num`:  the number of patients in `denom` who also have at least one abnormality post-baseline
#'   * `<Abnormal>`
#'     * `denom`: the number of patients with abnormality at baseline
#'     * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
#'   * `Total`
#'     * `denom`: the number of patients with at least one valid measurement post-baseline
#'     * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
#'
#' @inheritParams argument_convention
#' @param abnormal (`character`)\cr identifying the abnormal range level(s) in `.var`.
#'
#' @note
#' * `df` should be filtered to include only post-baseline records.
#' * If the baseline variable or analysis variable contains `NA`, it is expected that `NA` has been
#'   conveyed to `na_level` appropriately beforehand with [df_explicit_na()] or [explicit_na()].
#'
#' @seealso Relevant description function [d_count_abnormal_by_baseline()].
#'
#' @name abnormal_by_baseline
NULL

#' Description Function for [s_count_abnormal_by_baseline()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Description function that produces the labels for [s_count_abnormal_by_baseline()].
#'
#' @inheritParams abnormal_by_baseline
#'
#' @return Abnormal category labels for [s_count_abnormal_by_baseline()].
#'
#' @examples
#' d_count_abnormal_by_baseline("LOW")
#'
#' @export
d_count_abnormal_by_baseline <- function(abnormal) {
  not_abn_name <- paste("Not", tolower(abnormal))
  abn_name <- paste0(toupper(substr(abnormal, 1, 1)), tolower(substring(abnormal, 2)))
  total_name <- "Total"

  list(
    not_abnormal = not_abn_name,
    abnormal = abn_name,
    total = total_name
  )
}

#' @describeIn abnormal_by_baseline Statistics function for a single `abnormal` level.
#'
#' @param na_level (`string`)\cr the explicit `na_level` argument you used in the pre-processing steps (maybe with
#'   [df_explicit_na()]). The default is `"<Missing>"`.
#'
#' @return
#' * `s_count_abnormal_by_baseline()` returns statistic `fraction` which is a named list with 3 labeled elements:
#'   `not_abnormal`, `abnormal`, and `total`. Each element contains a vector with `num` and `denom` patient counts.
#'
#'
#' @keywords internal
s_count_abnormal_by_baseline <- function(df,
                                         .var,
                                         abnormal,
                                         na_level = "<Missing>",
                                         variables = list(id = "USUBJID", baseline = "BNRIND")) {
  checkmate::assert_string(.var)
  checkmate::assert_string(abnormal)
  checkmate::assert_string(na_level)
  assert_df_with_variables(df, c(range = .var, variables))
  checkmate::assert_subset(names(variables), c("id", "baseline"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))

  # If input is passed as character, changed to factor
  df[[.var]] <- as_factor_keep_attributes(df[[.var]], na_level = na_level)
  df[[variables$baseline]] <- as_factor_keep_attributes(df[[variables$baseline]], na_level = na_level)

  assert_valid_factor(df[[.var]], any.missing = FALSE)
  assert_valid_factor(df[[variables$baseline]], any.missing = FALSE)

  # Keep only records with valid analysis value.
  df <- df[df[[.var]] != na_level, ]

  anl <- data.frame(
    id = df[[variables$id]],
    var = df[[.var]],
    baseline = df[[variables$baseline]],
    stringsAsFactors = FALSE
  )

  # Total:
  #  - Patients in denominator: have at least one valid measurement post-baseline.
  #  - Patients in numerator: have at least one abnormality.
  total_denom <- length(unique(anl$id))
  total_num <- length(unique(anl$id[anl$var == abnormal]))

  # Baseline NA records are counted only in total rows.
  anl <- anl[anl$baseline != na_level, ]

  # Abnormal:
  #   - Patients in denominator: have abnormality at baseline.
  #   - Patients in numerator: have abnormality at baseline AND
  #     have at least one abnormality post-baseline.
  abn_denom <- length(unique(anl$id[anl$baseline == abnormal]))
  abn_num <- length(unique(anl$id[anl$baseline == abnormal & anl$var == abnormal]))

  # Not abnormal:
  #   - Patients in denominator: do not have abnormality at baseline.
  #   - Patients in numerator: do not have abnormality at baseline AND
  #     have at least one abnormality post-baseline.
  not_abn_denom <- length(unique(anl$id[anl$baseline != abnormal]))
  not_abn_num <- length(unique(anl$id[anl$baseline != abnormal & anl$var == abnormal]))

  labels <- d_count_abnormal_by_baseline(abnormal)
  list(fraction = list(
    not_abnormal = formatters::with_label(c(num = not_abn_num, denom = not_abn_denom), labels$not_abnormal),
    abnormal = formatters::with_label(c(num = abn_num, denom = abn_denom), labels$abnormal),
    total = formatters::with_label(c(num = total_num, denom = total_denom), labels$total)
  ))
}

#' @describeIn abnormal_by_baseline Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_baseline()`.
#'
#' @return
#' * `a_count_abnormal_by_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#'
#' @keywords internal
a_count_abnormal_by_baseline <- make_afun(
  s_count_abnormal_by_baseline,
  .formats = c(fraction = format_fraction)
)

#' @describeIn abnormal_by_baseline Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_baseline()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_baseline()` to the table layout.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(c(1:6)),
#'   ANRIND = factor(c(rep("LOW", 4), "NORMAL", "HIGH")),
#'   BNRIND = factor(c("LOW", "NORMAL", "HIGH", NA, "LOW", "NORMAL"))
#' )
#' df <- df_explicit_na(df)
#'
#' # Layout creating function.
#' basic_table() %>%
#'   count_abnormal_by_baseline(var = "ANRIND", abnormal = c(High = "HIGH")) %>%
#'   build_table(df)
#'
#' # Passing of statistics function and formatting arguments.
#' df2 <- data.frame(
#'   ID = as.character(c(1, 2, 3, 4)),
#'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BLRANGE = factor(c("LOW", "HIGH", "HIGH", "NORMAL"))
#' )
#'
#' basic_table() %>%
#'   count_abnormal_by_baseline(
#'     var = "RANGE",
#'     abnormal = c(Low = "LOW"),
#'     variables = list(id = "ID", baseline = "BLRANGE"),
#'     .formats = c(fraction = "xx / xx"),
#'     .indent_mods = c(fraction = 2L)
#'   ) %>%
#'   build_table(df2)
#'
#' @export
count_abnormal_by_baseline <- function(lyt,
                                       var,
                                       abnormal,
                                       ...,
                                       table_names = abnormal,
                                       .stats = NULL,
                                       .formats = NULL,
                                       .labels = NULL,
                                       .indent_mods = NULL) {
  checkmate::assert_character(abnormal, len = length(table_names), names = "named")
  checkmate::assert_string(var)
  afun <- make_afun(
    a_count_abnormal_by_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "fraction"
  )
  for (i in seq_along(abnormal)) {
    abn <- abnormal[i]
    lyt <- analyze(
      lyt = lyt,
      vars = var,
      var_labels = names(abn),
      afun = afun,
      table_names = table_names[i],
      extra_args = c(list(abnormal = abn), list(...)),
      show_labels = "visible"
    )
  }
  lyt
}

#' Incidence Rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the event rate adjusted for person-years at risk, otherwise known
#' as incidence rate. Primary analysis variable is the person-years at risk.
#'
#' @inheritParams argument_convention
#' @param control (`list`)\cr parameters for estimation details, specified by using
#'   the helper function [control_incidence_rate()]. Possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate.
#'   * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'     for confidence interval type.
#'   * `input_time_unit` (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'     indicating time unit for data input.
#'   * `num_pt_year` (`numeric`)\cr time unit for desired output (in person-years).
#' @param person_years (`numeric`)\cr total person-years at risk.
#' @param alpha (`numeric`)\cr two-sided alpha-level for confidence interval.
#' @param n_events (`integer`)\cr number of events observed.
#'
#' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate].
#'
#' @name incidence_rate
NULL

#' @describeIn incidence_rate Statistics function which estimates the incidence rate and the
#'   associated confidence interval.
#'
#' @return
#' * `s_incidence_rate()` returns the following statistics:
#'   - `person_years`: Total person-years at risk.
#'   - `n_events`: Total number of events observed.
#'   - `rate`: Estimated incidence rate.
#'   - `rate_ci`: Confidence interval for the incidence rate.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(seq(6)),
#'   CNSR = c(0, 1, 1, 0, 0, 0),
#'   AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4),
#'   ARM = factor(c("A", "A", "A", "B", "B", "B"))
#' ) %>%
#'   mutate(is_event = CNSR == 0) %>%
#'   mutate(n_events = as.integer(is_event))
#'
#' @keywords internal
s_incidence_rate <- function(df,
                             .var,
                             n_events,
                             is_event,
                             control = control_incidence_rate()) {
  if (!missing(is_event)) {
    warning("argument is_event will be deprecated. Please use n_events.")

    if (missing(n_events)) {
      assert_df_with_variables(df, list(tte = .var, is_event = is_event))
      checkmate::assert_string(.var)
      checkmate::assert_logical(df[[is_event]], any.missing = FALSE)
      checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
      n_events <- is_event
    }
  } else {
    assert_df_with_variables(df, list(tte = .var, n_events = n_events))
    checkmate::assert_string(.var)
    checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
    checkmate::assert_integer(df[[n_events]], any.missing = FALSE)
  }

  input_time_unit <- control$input_time_unit
  num_pt_year <- control$num_pt_year
  conf_level <- control$conf_level
  person_years <- sum(df[[.var]], na.rm = TRUE) * (
    1 * (input_time_unit == "year") +
      1 / 12 * (input_time_unit == "month") +
      1 / 52.14 * (input_time_unit == "week") +
      1 / 365.24 * (input_time_unit == "day")
  )
  n_events <- sum(df[[n_events]], na.rm = TRUE)

  result <- h_incidence_rate(
    person_years,
    n_events,
    control
  )
  list(
    person_years = formatters::with_label(person_years, "Total patient-years at risk"),
    n_events = formatters::with_label(n_events, "Number of adverse events observed"),
    rate = formatters::with_label(result$rate, paste("AE rate per", num_pt_year, "patient-years")),
    rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level))
  )
}

#' @describeIn incidence_rate Formatted analysis function which is used as `afun`
#'   in `estimate_incidence_rate()`.
#'
#' @return
#' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#'
#' @keywords internal
a_incidence_rate <- make_afun(
  s_incidence_rate,
  .formats = c(
    "person_years" = "xx.x",
    "n_events" = "xx",
    "rate" = "xx.xx",
    "rate_ci" = "(xx.xx, xx.xx)"
  )
)

#' @describeIn incidence_rate Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_incidence_rate()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   estimate_incidence_rate(
#'     vars = "AVAL",
#'     n_events = "n_events",
#'     control = control_incidence_rate(
#'       input_time_unit = "month",
#'       num_pt_year = 100
#'     )
#'   ) %>%
#'   build_table(df)
#'
#' @export
estimate_incidence_rate <- function(lyt,
                                    vars,
                                    ...,
                                    show_labels = "hidden",
                                    table_names = vars,
                                    .stats = NULL,
                                    .formats = NULL,
                                    .labels = NULL,
                                    .indent_mods = NULL) {
  afun <- make_afun(
    a_incidence_rate,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Helper Functions for Incidence Rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param control (`list`)\cr parameters for estimation details, specified by using
#'   the helper function [control_incidence_rate()]. Possible parameter options are:
#'   * `conf_level`: (`proportion`)\cr confidence level for the estimated incidence rate.
#'   * `conf_type`: (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'     for confidence interval type.
#'   * `input_time_unit`: (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'     indicating time unit for data input.
#'   * `num_pt_year`: (`numeric`)\cr time unit for desired output (in person-years).
#' @param person_years (`numeric`)\cr total person-years at risk.
#' @param alpha (`numeric`)\cr two-sided alpha-level for confidence interval.
#' @param n_events (`integer`)\cr number of events observed.
#'
#' @return Estimated incidence rate `rate` and associated confidence interval `rate_ci`.
#'
#' @seealso [incidence_rate]
#'
#' @name h_incidence_rate
NULL

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval based on the normal approximation for the
#'   incidence rate. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_normal(200, 2)
#'
#' @export
h_incidence_rate_normal <- function(person_years,
                                    n_events,
                                    alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  se <- sqrt(est / person_years)
  ci <- est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * se

  list(rate = est, rate_ci = ci)
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval based on the normal approximation for the
#'   logarithm of the incidence rate. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_normal_log(200, 2)
#'
#' @export
h_incidence_rate_normal_log <- function(person_years,
                                        n_events,
                                        alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  rate_est <- n_events / person_years
  rate_se <- sqrt(rate_est / person_years)
  lrate_est <- log(rate_est)
  lrate_se <- rate_se / rate_est
  ci <- exp(lrate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * lrate_se)

  list(rate = rate_est, rate_ci = ci)
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated exact confidence interval. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_exact(200, 2)
#'
#' @export
h_incidence_rate_exact <- function(person_years,
                                   n_events,
                                   alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  lcl <- stats::qchisq(p = (alpha) / 2, df = 2 * n_events) / (2 * person_years)
  ucl <- stats::qchisq(p = 1 - (alpha) / 2, df = 2 * n_events + 2) / (2 * person_years)

  list(rate = est, rate_ci = c(lcl, ucl))
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated `Byar`'s confidence interval. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_byar(200, 2)
#'
#' @export
h_incidence_rate_byar <- function(person_years,
                                  n_events,
                                  alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  seg_1 <- n_events + 0.5
  seg_2 <- 1 - 1 / (9 * (n_events + 0.5))
  seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3
  lcl <- seg_1 * ((seg_2 - seg_3)^3) / person_years
  ucl <- seg_1 * ((seg_2 + seg_3) ^ 3) / person_years # styler: off

  list(rate = est, rate_ci = c(lcl, ucl))
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval.
#'
#'
#' @keywords internal
h_incidence_rate <- function(person_years,
                             n_events,
                             control = control_incidence_rate()) {
  alpha <- 1 - control$conf_level
  est <- switch(control$conf_type,
    normal = h_incidence_rate_normal(person_years, n_events, alpha),
    normal_log = h_incidence_rate_normal_log(person_years, n_events, alpha),
    exact = h_incidence_rate_exact(person_years, n_events, alpha),
    byar = h_incidence_rate_byar(person_years, n_events, alpha)
  )

  num_pt_year <- control$num_pt_year
  list(
    rate = est$rate * num_pt_year,
    rate_ci = est$rate_ci * num_pt_year
  )
}

#' Encode Categorical Missing Values in a Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function to encode missing entries across groups of categorical
#' variables in a data frame.
#'
#' @details Missing entries are those with `NA` or empty strings and will
#'   be replaced with a specified value. If factor variables include missing
#'   values, the missing value will be inserted as the last level.
#'   Similarly, in case character or logical variables should be converted to factors
#'   with the `char_as_factor` or `logical_as_factor` options, the missing values will
#'   be set as the last level.
#'
#' @param data (`data.frame`)\cr data set.
#' @param omit_columns (`character`)\cr names of variables from `data` that should
#'   not be modified by this function.
#' @param char_as_factor (`flag`)\cr whether to convert character variables
#'   in `data` to factors.
#' @param logical_as_factor (`flag`)\cr whether to convert logical variables
#'   in `data` to factors.
#' @param na_level (`string`)\cr used to replace all `NA` or empty
#'   values inside non-`omit_columns` columns.
#'
#' @return A `data.frame` with the chosen modifications applied.
#'
#' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions.
#'
#' @examples
#' my_data <- data.frame(
#'   u = c(TRUE, FALSE, NA, TRUE),
#'   v = factor(c("A", NA, NA, NA), levels = c("Z", "A")),
#'   w = c("A", "B", NA, "C"),
#'   x = c("D", "E", "F", NA),
#'   y = c("G", "H", "I", ""),
#'   z = c(1, 2, 3, 4),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Example 1
#' # Encode missing values in all character or factor columns.
#' df_explicit_na(my_data)
#' # Also convert logical columns to factor columns.
#' df_explicit_na(my_data, logical_as_factor = TRUE)
#' # Encode missing values in a subset of columns.
#' df_explicit_na(my_data, omit_columns = c("x", "y"))
#'
#' # Example 2
#' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable.
#' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not
#' # included when generating `rtables`.
#' adsl <- tern_ex_adsl
#' adsl$SEX[adsl$SEX == "M"] <- NA
#' adsl <- df_explicit_na(adsl)
#'
#' # If you want the `Na` values to be displayed in the table use the `na_level` argument.
#' adsl <- tern_ex_adsl
#' adsl$SEX[adsl$SEX == "M"] <- NA
#' adsl <- df_explicit_na(adsl, na_level = "Missing Values")
#'
#' # Example 3
#' # Numeric variables that have missing values are not altered. This means that any `NA` value in
#' # a numeric variable will not be included in the summary statistics, nor will they be included
#' # in the denominator value for calculating the percent values.
#' adsl <- tern_ex_adsl
#' adsl$AGE[adsl$AGE < 30] <- NA
#' adsl <- df_explicit_na(adsl)
#'
#' @export
df_explicit_na <- function(data,
                           omit_columns = NULL,
                           char_as_factor = TRUE,
                           logical_as_factor = FALSE,
                           na_level = "<Missing>") {
  checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE)
  checkmate::assert_data_frame(data)
  checkmate::assert_flag(char_as_factor)
  checkmate::assert_flag(logical_as_factor)
  checkmate::assert_string(na_level)

  target_vars <- if (is.null(omit_columns)) {
    names(data)
  } else {
    setdiff(names(data), omit_columns) # May have duplicates.
  }
  if (length(target_vars) == 0) {
    return(data)
  }

  l_target_vars <- split(target_vars, target_vars)

  # Makes sure target_vars exist in data and names are not duplicated.
  assert_df_with_variables(data, l_target_vars)

  for (x in target_vars) {
    xi <- data[[x]]
    xi_label <- obj_label(xi)

    # Determine whether to convert character or logical input.
    do_char_conversion <- is.character(xi) && char_as_factor
    do_logical_conversion <- is.logical(xi) && logical_as_factor

    # Pre-convert logical to character to deal correctly with replacing NA
    # values below.
    if (do_logical_conversion) {
      xi <- as.character(xi)
    }

    if (is.factor(xi) || is.character(xi)) {
      # Handle empty strings and NA values.
      xi <- explicit_na(sas_na(xi), label = na_level)

      # Convert to factors if requested for the original type,
      # set na_level as the last value.
      if (do_char_conversion || do_logical_conversion) {
        levels_xi <- setdiff(sort(unique(xi)), na_level)
        if (na_level %in% unique(xi)) {
          levels_xi <- c(levels_xi, na_level)
        }

        xi <- factor(xi, levels = levels_xi)
      }

      data[, x] <- formatters::with_label(xi, label = xi_label)
    }
  }
  return(data)
}

#' Proportion Difference
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#'
#' @seealso [d_proportion_diff()]
#'
#' @name prop_diff
NULL

#' @describeIn prop_diff Statistics function estimating the difference
#'   in terms of responder proportion.
#'
#' @inheritParams prop_diff_strat_nc
#' @param method (`string`)\cr the method used for the confidence interval estimation.
#'
#' @return
#' * `s_proportion_diff()` returns a named list of elements `diff` and `diff_ci`.
#'
#' @note When performing an unstratified analysis, methods `"cmh"`, `"strat_newcombe"`, and `"strat_newcombecc"` are
#'   not permitted.
#'
#' @examples
#' # Summary
#'
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' s_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' # CMH example with strata
#' s_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "cmh"
#' )
#'
#' @export
s_proportion_diff <- function(df,
                              .var,
                              .ref_group,
                              .in_ref_col,
                              variables = list(strata = NULL),
                              conf_level = 0.95,
                              method = c(
                                "waldcc", "wald", "cmh",
                                "ha", "newcombe", "newcombecc",
                                "strat_newcombe", "strat_newcombecc"
                              ),
                              weights_method = "cmh") {
  method <- match.arg(method)
  if (is.null(variables$strata) && checkmate::test_subset(method, c("cmh", "strat_newcombe", "strat_newcombecc"))) {
    stop(paste(
      "When performing an unstratified analysis, methods 'cmh', 'strat_newcombe', and 'strat_newcombecc' are not",
      "permitted. Please choose a different method."
    ))
  }
  y <- list(diff = "", diff_ci = "")

  if (!.in_ref_col) {
    rsp <- c(.ref_group[[.var]], df[[.var]])
    grp <- factor(
      rep(
        c("ref", "Not-ref"),
        c(nrow(.ref_group), nrow(df))
      ),
      levels = c("ref", "Not-ref")
    )

    if (!is.null(variables$strata)) {
      strata_colnames <- variables$strata
      checkmate::assert_character(strata_colnames, null.ok = FALSE)
      strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)

      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)

      # Merging interaction strata for reference group rows data and remaining
      strata <- c(
        interaction(.ref_group[strata_colnames]),
        interaction(df[strata_colnames])
      )
      strata <- as.factor(strata)
    }

    # Defining the std way to calculate weights for strat_newcombe
    if (!is.null(variables$weights_method)) {
      weights_method <- variables$weights_method
    } else {
      weights_method <- "cmh"
    }

    y <- switch(method,
      "wald" = prop_diff_wald(rsp, grp, conf_level, correct = FALSE),
      "waldcc" = prop_diff_wald(rsp, grp, conf_level, correct = TRUE),
      "ha" = prop_diff_ha(rsp, grp, conf_level),
      "newcombe" = prop_diff_nc(rsp, grp, conf_level, correct = FALSE),
      "newcombecc" = prop_diff_nc(rsp, grp, conf_level, correct = TRUE),
      "strat_newcombe" = prop_diff_strat_nc(rsp,
        grp,
        strata,
        weights_method,
        conf_level,
        correct = FALSE
      ),
      "strat_newcombecc" = prop_diff_strat_nc(rsp,
        grp,
        strata,
        weights_method,
        conf_level,
        correct = TRUE
      ),
      "cmh" = prop_diff_cmh(rsp, grp, strata, conf_level)[c("diff", "diff_ci")]
    )

    y$diff <- y$diff * 100
    y$diff_ci <- y$diff_ci * 100
  }

  attr(y$diff, "label") <- "Difference in Response rate (%)"
  attr(y$diff_ci, "label") <- d_proportion_diff(
    conf_level, method,
    long = FALSE
  )

  y
}

#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`.
#'
#' @return
#' * `a_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' @export
a_proportion_diff <- make_afun(
  s_proportion_diff,
  .formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"),
  .indent_mods = c(diff = 0L, diff_ci = 1L)
)

#' @describeIn prop_diff Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_proportion_diff()`.
#'
#' @return
#' * `estimate_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_proportion_diff()` to the table layout.
#'
#' @examples
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   estimate_proportion_diff(
#'     vars = "rsp",
#'     conf_level = 0.90,
#'     method = "ha"
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
estimate_proportion_diff <- function(lyt,
                                     vars,
                                     ...,
                                     var_labels = vars,
                                     show_labels = "hidden",
                                     table_names = vars,
                                     .stats = NULL,
                                     .formats = NULL,
                                     .labels = NULL,
                                     .indent_mods = NULL) {
  afun <- make_afun(
    a_proportion_diff,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    var_labels = var_labels,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Check: Proportion Difference Arguments
#'
#' Verifies that and/or convert arguments into valid values to be used in the
#' estimation of difference in responder proportions.
#'
#' @inheritParams prop_diff
#' @inheritParams prop_diff_wald
#'
#' @keywords internal
check_diff_prop_ci <- function(rsp,
                               grp,
                               strata = NULL,
                               conf_level,
                               correct = NULL) {
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)
  checkmate::assert_number(conf_level, lower = 0, upper = 1)
  checkmate::assert_flag(correct, null.ok = TRUE)

  if (!is.null(strata)) {
    checkmate::assert_factor(strata, len = length(rsp))
  }

  invisible()
}

#' Description of Method Used for Proportion Comparison
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function that describes the analysis in
#' `s_proportion_diff`.
#'
#' @inheritParams s_proportion_diff
#' @param long (`logical`)\cr Whether a long or a short (default) description is required.
#'
#' @return A `string` describing the analysis.
#'
#' @seealso [prop_diff]
#'
#' @export
d_proportion_diff <- function(conf_level,
                              method,
                              long = FALSE) {
  label <- paste0(conf_level * 100, "% CI")
  if (long) {
    label <- paste(
      label,
      ifelse(
        method == "cmh",
        "for adjusted difference",
        "for difference"
      )
    )
  }

  method_part <- switch(method,
    "cmh" = "CMH, without correction",
    "waldcc" = "Wald, with correction",
    "wald" = "Wald, without correction",
    "ha" = "Anderson-Hauck",
    "newcombe" = "Newcombe, without correction",
    "newcombecc" = "Newcombe, with correction",
    "strat_newcombe" = "Stratified Newcombe, without correction",
    "strat_newcombecc" = "Stratified Newcombe, with correction",
    stop(paste(method, "does not have a description"))
  )
  paste0(label, " (", method_part, ")")
}

#' Helper Functions to Calculate Proportion Difference
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @inheritParams prop_diff
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
#'   (e.g. reference and treatment group).
#'
#' @return A named `list` of elements `diff` (proportion difference) and `diff_ci`
#'   (proportion difference confidence interval).
#'
#' @seealso [prop_diff()] for implementation of these helper functions.
#'
#' @name h_prop_diff
NULL

#' @describeIn h_prop_diff The Wald interval follows the usual textbook
#'   definition for a single proportion confidence interval using the normal
#'   approximation. It is possible to include a continuity correction for Wald's
#'   interval.
#'
#' @param correct (`logical`)\cr whether to include the continuity correction. For further
#'   information, see [stats::prop.test()].
#'
#' @examples
#' # Wald confidence interval
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
#' grp <- factor(c(rep("A", 10), rep("B", 10)))
#' prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE)
#'
#' @export
prop_diff_wald <- function(rsp,
                           grp,
                           conf_level = 0.95,
                           correct = FALSE) {
  if (isTRUE(correct)) {
    mthd <- "waldcc"
  } else {
    mthd <- "wald"
  }
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, correct = correct
  )

  # check if binary response is coded as logical
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)

  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  # x1 and n1 are non-reference groups.
  diff_ci <- desctools_binom(
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = mthd
  )

  list(
    "diff" = unname(diff_ci[, "est"]),
    "diff_ci" = unname(diff_ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Anderson-Hauck confidence interval.
#'
#' @examples
#' # Anderson-Hauck confidence interval
#' ## "Mid" case: 3/4 respond in group A, 1/2 respond in group B.
#' rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
#' grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90)
#'
#' ## Edge case: Same proportion of response in A and B.
#' rsp <- c(TRUE, FALSE, TRUE, FALSE)
#' grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6)
#'
#' @export
prop_diff_ha <- function(rsp,
                         grp,
                         conf_level) {
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)

  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  # x1 and n1 are non-reference groups.
  ci <- desctools_binom(
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = "ha"
  )
  list(
    "diff" = unname(ci[, "est"]),
    "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff `Newcombe` confidence interval. It is based on
#'   the Wilson score confidence interval for a single binomial proportion.
#'
#' @examples
#' # `Newcombe` confidence interval
#'
#' set.seed(1)
#' rsp <- c(
#'   sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE),
#'   sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE)
#' )
#' grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
#' table(rsp, grp)
#' prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9)
#'
#' @export
prop_diff_nc <- function(rsp,
                         grp,
                         conf_level,
                         correct = FALSE) {
  if (isTRUE(correct)) {
    mthd <- "scorecc"
  } else {
    mthd <- "score"
  }
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)

  p_grp <- tapply(rsp, grp, mean)
  diff_p <- unname(diff(p_grp))
  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  ci <- desctools_binom(
    # x1 and n1 are non-reference groups.
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = mthd
  )
  list(
    "diff" = unname(ci[, "est"]),
    "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Calculates the weighted difference. This is defined as the difference in
#'   response rates between the experimental treatment group and the control treatment group, adjusted
#'   for stratification factors by applying `Cochran-Mantel-Haenszel` (`CMH`) weights. For the `CMH` chi-squared
#'   test, use [stats::mantelhaen.test()].
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#'
#' @examples
#' # Cochran-Mantel-Haenszel confidence interval
#'
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_diff_cmh(
#'   rsp = rsp, grp = grp, strata = interaction(strata_data),
#'   conf_level = 0.90
#' )
#'
#' @export
prop_diff_cmh <- function(rsp,
                          grp,
                          strata,
                          conf_level = 0.95) {
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
  )

  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  # first dimension: FALSE, TRUE
  # 2nd dimension: CONTROL, TX
  # 3rd dimension: levels of strat
  # rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
  t_tbl <- table(
    factor(rsp, levels = c("FALSE", "TRUE")),
    grp,
    strata
  )
  n1 <- colSums(t_tbl[1:2, 1, ])
  n2 <- colSums(t_tbl[1:2, 2, ])
  p1 <- t_tbl[2, 1, ] / n1
  p2 <- t_tbl[2, 2, ] / n2
  # CMH weights
  use_stratum <- (n1 > 0) & (n2 > 0)
  n1 <- n1[use_stratum]
  n2 <- n2[use_stratum]
  p1 <- p1[use_stratum]
  p2 <- p2[use_stratum]
  wt <- (n1 * n2 / (n1 + n2))
  wt_normalized <- wt / sum(wt)
  est1 <- sum(wt_normalized * p1)
  est2 <- sum(wt_normalized * p2)
  estimate <- c(est1, est2)
  names(estimate) <- levels(grp)
  se1 <- sqrt(sum(wt_normalized^2 * p1 * (1 - p1) / n1))
  se2 <- sqrt(sum(wt_normalized^2 * p2 * (1 - p2) / n2))
  z <- stats::qnorm((1 + conf_level) / 2)
  err1 <- z * se1
  err2 <- z * se2
  ci1 <- c((est1 - err1), (est1 + err1))
  ci2 <- c((est2 - err2), (est2 + err2))
  estimate_ci <- list(ci1, ci2)
  names(estimate_ci) <- levels(grp)
  diff_est <- est2 - est1
  se_diff <- sqrt(sum(((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2)) * wt_normalized^2))
  diff_ci <- c(diff_est - z * se_diff, diff_est + z * se_diff)

  list(
    prop = estimate,
    prop_ci = estimate_ci,
    diff = diff_est,
    diff_ci = diff_ci,
    weights = wt_normalized,
    n1 = n1,
    n2 = n2
  )
}

#' @describeIn h_prop_diff Calculates the stratified `Newcombe` confidence interval and difference in response
#'   rates between the experimental treatment group and the control treatment group, adjusted for stratification
#'   factors. This implementation follows closely the one proposed by \insertCite{Yan2010-jt;textual}{tern}.
#'   Weights can be estimated from the heuristic proposed in [prop_strat_wilson()] or from `CMH`-derived weights
#'   (see [prop_diff_cmh()]).
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param weights_method (`string`)\cr weights method. Can be either `"cmh"` or `"heuristic"`
#'   and directs the way weights are estimated.
#'
#' @references
#' \insertRef{Yan2010-jt}{tern}
#'
#' @examples
#' # Stratified `Newcombe` confidence interval
#'
#' set.seed(2)
#' data_set <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   "grp" = sample(c("Placebo", "Treatment"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_diff_strat_nc(
#'   rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
#'   weights_method = "cmh",
#'   conf_level = 0.90
#' )
#'
#' prop_diff_strat_nc(
#'   rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
#'   weights_method = "wilson_h",
#'   conf_level = 0.90
#' )
#'
#' @export
prop_diff_strat_nc <- function(rsp,
                               grp,
                               strata,
                               weights_method = c("cmh", "wilson_h"),
                               conf_level = 0.95,
                               correct = FALSE) {
  weights_method <- match.arg(weights_method)
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
  )
  checkmate::assert_number(conf_level, lower = 0, upper = 1)
  checkmate::assert_flag(correct)
  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  rsp_by_grp <- split(rsp, f = grp)
  strata_by_grp <- split(strata, f = grp)

  # Finding the weights
  weights <- if (identical(weights_method, "cmh")) {
    prop_diff_cmh(rsp = rsp, grp = grp, strata = strata)$weights
  } else if (identical(weights_method, "wilson_h")) {
    prop_strat_wilson(rsp, strata, conf_level = conf_level, correct = correct)$weights
  }
  weights[levels(strata)[!levels(strata) %in% names(weights)]] <- 0

  # Calculating lower (`l`) and upper (`u`) confidence bounds per group.
  strat_wilson_by_grp <- Map(
    prop_strat_wilson,
    rsp = rsp_by_grp,
    strata = strata_by_grp,
    weights = list(weights, weights),
    conf_level = conf_level,
    correct = correct
  )

  ci_ref <- strat_wilson_by_grp[[1]]
  ci_trt <- strat_wilson_by_grp[[2]]
  l_ref <- as.numeric(ci_ref$conf_int[1])
  u_ref <- as.numeric(ci_ref$conf_int[2])
  l_trt <- as.numeric(ci_trt$conf_int[1])
  u_trt <- as.numeric(ci_trt$conf_int[2])

  # Estimating the diff and n_ref, n_trt (it allows different weights to be used)
  t_tbl <- table(
    factor(rsp, levels = c("FALSE", "TRUE")),
    grp,
    strata
  )
  n_ref <- colSums(t_tbl[1:2, 1, ])
  n_trt <- colSums(t_tbl[1:2, 2, ])
  use_stratum <- (n_ref > 0) & (n_trt > 0)
  n_ref <- n_ref[use_stratum]
  n_trt <- n_trt[use_stratum]
  p_ref <- t_tbl[2, 1, use_stratum] / n_ref
  p_trt <- t_tbl[2, 2, use_stratum] / n_trt
  est1 <- sum(weights * p_ref)
  est2 <- sum(weights * p_trt)
  diff_est <- est2 - est1

  lambda1 <- sum(weights^2 / n_ref)
  lambda2 <- sum(weights^2 / n_trt)
  z <- stats::qnorm((1 + conf_level) / 2)

  lower <- diff_est - z * sqrt(lambda2 * l_trt * (1 - l_trt) + lambda1 * u_ref * (1 - u_ref))
  upper <- diff_est + z * sqrt(lambda1 * l_ref * (1 - l_ref) + lambda2 * u_trt * (1 - u_trt))

  list(
    "diff" = diff_est,
    "diff_ci" = c("lower" = lower, "upper" = upper)
  )
}

#' Odds Ratio Estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Compares bivariate responses between two groups in terms of odds ratios
#' along with a confidence interval.
#'
#' @inheritParams argument_convention
#'
#' @details This function uses either logistic regression for unstratified
#'   analyses, or conditional logistic regression for stratified analyses.
#'   The Wald confidence interval with the specified confidence level is
#'   calculated.
#'
#' @note For stratified analyses, there is currently no implementation for conditional
#'   likelihood confidence intervals, therefore the likelihood confidence interval is not
#'   yet available as an option. Besides, when `rsp` contains only responders or non-responders,
#'   then the result values will be `NA`, because no odds ratio estimation is possible.
#'
#' @seealso Relevant helper function [h_odds_ratio()].
#'
#' @name odds_ratio
NULL

#' @describeIn odds_ratio Statistics function which estimates the odds ratio
#'   between a treatment and a control. A `variables` list with `arm` and `strata`
#'   variable names must be passed if a stratified analysis is required.
#'
#' @inheritParams split_cols_by_groups
#'
#' @return
#' * `s_odds_ratio()` returns a named list with the statistics `or_ci`
#'   (containing `est`, `lcl`, and `ucl`) and `n_tot`.
#'
#' @examples
#' set.seed(12)
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50), levels = c("B", "A")),
#'   strata = factor(sample(c("C", "D"), 100, TRUE))
#' )
#'
#' # Unstratified analysis.
#' s_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' # Stratified analysis.
#' s_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta,
#'   variables = list(arm = "grp", strata = "strata")
#' )
#'
#' @export
s_odds_ratio <- function(df,
                         .var,
                         .ref_group,
                         .in_ref_col,
                         .df_row,
                         variables = list(arm = NULL, strata = NULL),
                         conf_level = 0.95,
                         groups_list = NULL) {
  y <- list(or_ci = "", n_tot = "")

  if (!.in_ref_col) {
    assert_proportion_value(conf_level)
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))

    if (is.null(variables$strata)) {
      data <- data.frame(
        rsp = c(.ref_group[[.var]], df[[.var]]),
        grp = factor(
          rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
          levels = c("ref", "Not-ref")
        )
      )
      y <- or_glm(data, conf_level = conf_level)
    } else {
      assert_df_with_variables(.df_row, c(list(rsp = .var), variables))

      # The group variable prepared for clogit must be synchronised with combination groups definition.
      if (is.null(groups_list)) {
        ref_grp <- as.character(unique(.ref_group[[variables$arm]]))
        trt_grp <- as.character(unique(df[[variables$arm]]))
        grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp)
      } else {
        # If more than one level in reference col.
        reference <- as.character(unique(.ref_group[[variables$arm]]))
        grp_ref_flag <- vapply(
          X = groups_list,
          FUN.VALUE = TRUE,
          FUN = function(x) all(reference %in% x)
        )
        ref_grp <- names(groups_list)[grp_ref_flag]

        # If more than one level in treatment col.
        treatment <- as.character(unique(df[[variables$arm]]))
        grp_trt_flag <- vapply(
          X = groups_list,
          FUN.VALUE = TRUE,
          FUN = function(x) all(treatment %in% x)
        )
        trt_grp <- names(groups_list)[grp_trt_flag]

        grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp)
        grp <- combine_levels(grp, levels = treatment, new_level = trt_grp)
      }

      # The reference level in `grp` must be the same as in the `rtables` column split.
      data <- data.frame(
        rsp = .df_row[[.var]],
        grp = grp,
        strata = interaction(.df_row[variables$strata])
      )
      y_all <- or_clogit(data, conf_level = conf_level)
      checkmate::assert_string(trt_grp)
      checkmate::assert_subset(trt_grp, names(y_all$or_ci))
      y$or_ci <- y_all$or_ci[[trt_grp]]
      y$n_tot <- y_all$n_tot
    }
  }

  y$or_ci <- formatters::with_label(
    x = y$or_ci,
    label = paste0("Odds Ratio (", 100 * conf_level, "% CI)")
  )

  y$n_tot <- formatters::with_label(
    x = y$n_tot,
    label = "Total n"
  )

  y
}

#' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`.
#'
#' @return
#' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' @export
a_odds_ratio <- make_afun(
  s_odds_ratio,
  .formats = c(or_ci = "xx.xx (xx.xx - xx.xx)"),
  .indent_mods = c(or_ci = 1L)
)

#' @describeIn odds_ratio Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_odds_ratio()`.
#'
#' @return
#' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_odds_ratio()` to the table layout.
#'
#' @examples
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50))
#' )
#'
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   estimate_odds_ratio(vars = "rsp")
#'
#' build_table(l, df = dta)
#'
#' @export
estimate_odds_ratio <- function(lyt,
                                vars,
                                ...,
                                show_labels = "hidden",
                                table_names = vars,
                                .stats = "or_ci",
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  afun <- make_afun(
    a_odds_ratio,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions for Odds Ratio Estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to calculate odds ratios in [estimate_odds_ratio()].
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally
#'   `strata` for [or_clogit()].
#'
#' @return A named `list` of elements `or_ci` and `n_tot`.
#'
#' @seealso [odds_ratio]
#'
#' @name h_odds_ratio
NULL

#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be
#'   exactly 2 groups in `data` as specified by the `grp` variable.
#'
#' @examples
#' # Data with 2 groups.
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)],
#'   strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' # Odds ratio based on glm.
#' or_glm(data, conf_level = 0.95)
#'
#' @export
or_glm <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)
  formula <- stats::as.formula("rsp ~ grp")
  model_fit <- stats::glm(
    formula = formula, data = data,
    family = stats::binomial(link = "logit")
  )

  # Note that here we need to discard the intercept.
  or <- exp(stats::coef(model_fit)[-1])
  or_ci <- exp(
    stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE]
  )

  values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl"))
  n_tot <- stats::setNames(nrow(model_fit$model), "n_tot")

  list(or_ci = values, n_tot = n_tot)
}

#' @describeIn h_odds_ratio estimates the odds ratio based on [survival::clogit()]. This is done for
#'   the whole data set including all groups, since the results are not the same as when doing
#'   pairwise comparisons between the groups.
#'
#' @examples
#' # Data with 3 groups.
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)],
#'   strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' # Odds ratio based on stratified estimation by conditional logistic regression.
#' or_clogit(data, conf_level = 0.95)
#'
#' @export
or_clogit <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
  checkmate::assert_multi_class(data$strata, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  data$strata <- as_factor_keep_attributes(data$strata)

  # Deviation from convention: `survival::strata` must be simply `strata`.
  formula <- stats::as.formula("rsp ~ grp + strata(strata)")
  model_fit <- clogit_with_tryCatch(formula = formula, data = data)

  # Create a list with one set of OR estimates and CI per coefficient, i.e.
  # comparison of one group vs. the reference group.
  coef_est <- stats::coef(model_fit)
  ci_est <- stats::confint(model_fit, level = conf_level)
  or_ci <- list()
  for (coef_name in names(coef_est)) {
    grp_name <- gsub("^grp", "", x = coef_name)
    or_ci[[grp_name]] <- stats::setNames(
      object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])),
      nm = c("est", "lcl", "ucl")
    )
  }
  list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n))
}

#' Counting Missed Doses
#'
#' @description `r lifecycle::badge("stable")`
#'
#' These are specific functions to count patients with missed doses. The difference to [count_cumulative()] is
#' mainly the special labels.
#'
#' @inheritParams argument_convention
#'
#' @seealso Relevant description function [d_count_missed_doses()].
#'
#' @name count_missed_doses
NULL

#' @describeIn count_missed_doses Statistics function to count non-missing values.
#'
#' @return
#' * `s_count_nonmissing()` returns the statistic `n` which is the count of non-missing values in `x`.
#'
#' @examples
#' set.seed(1)
#' x <- c(sample(1:10, 10), NA)
#'
#' @keywords internal
s_count_nonmissing <- function(x) {
  list(n = n_available(x))
}

#' Description Function that Calculates Labels for [s_count_missed_doses()].
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams s_count_missed_doses
#'
#' @return [d_count_missed_doses()] returns a named `character` vector with the labels.
#'
#' @seealso [s_count_missed_doses()]
#'
#' @export
d_count_missed_doses <- function(thresholds) {
  paste0("At least ", thresholds, " missed dose", ifelse(thresholds > 1, "s", ""))
}

#' @describeIn count_missed_doses Statistics function to count patients with missed doses.
#'
#' @param thresholds (vector of `count`)\cr number of missed doses the patients at least had.
#'
#' @return
#' * `s_count_missed_doses()` returns the statistics `n` and `count_fraction` with one element for each threshold.
#'
#' @keywords internal
s_count_missed_doses <- function(x,
                                 thresholds,
                                 .N_col) { # nolint
  stat <- s_count_cumulative(
    x = x,
    thresholds = thresholds,
    lower_tail = FALSE,
    include_eq = TRUE,
    .N_col = .N_col
  )
  labels <- d_count_missed_doses(thresholds)
  for (i in seq_along(stat$count_fraction)) {
    stat$count_fraction[[i]] <- formatters::with_label(stat$count_fraction[[i]], label = labels[i])
  }
  n_stat <- s_count_nonmissing(x)
  c(n_stat, stat)
}

#' @describeIn count_missed_doses Formatted analysis function which is used as `afun`
#'   in `count_missed_doses()`.
#'
#' @return
#' * `a_count_missed_doses()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_missed_doses <- make_afun(
  s_count_missed_doses,
  .formats = c(n = "xx", count_fraction = format_count_fraction)
)

#' @describeIn count_missed_doses Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @inheritParams s_count_cumulative
#'
#' @return
#' * `count_missed_doses()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_missed_doses()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' anl <- tern_ex_adsl %>%
#'   distinct(STUDYID, USUBJID, ARM) %>%
#'   mutate(
#'     PARAMCD = "TNDOSMIS",
#'     PARAM = "Total number of missed doses during study",
#'     AVAL = sample(0:20, size = nrow(tern_ex_adsl), replace = TRUE),
#'     AVALC = ""
#'   )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_missed_doses("AVAL", thresholds = c(1, 5, 10, 15), var_labels = "Missed Doses") %>%
#'   build_table(anl, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_missed_doses <- function(lyt,
                               vars,
                               var_labels = vars,
                               show_labels = "visible",
                               ...,
                               table_names = vars,
                               .stats = NULL,
                               .formats = NULL,
                               .labels = NULL,
                               .indent_mods = NULL) {
  afun <- make_afun(
    a_count_missed_doses,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt = lyt,
    vars = vars,
    afun = afun,
    var_labels = var_labels,
    table_names = table_names,
    show_labels = show_labels,
    extra_args = list(...)
  )
}

#' Compare Variables Between Groups
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Comparison with a reference group for different `x` objects.
#'
#' @inheritParams argument_convention
#'
#' @note
#' * For factor variables, `denom` for factor proportions can only be `n` since the purpose is to compare proportions
#'   between columns, therefore a row-based proportion would not make sense. Proportion based on `N_col` would
#'   be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted
#'   for as explicit factor levels.
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
#'   set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
#'   factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the
#'   default `na_level` (`"<Missing>"`) will also be excluded when `na.rm` is set to `TRUE`.
#' * For character variables, automatic conversion to factor does not guarantee that the table
#'   will be generated correctly. In particular for sparse tables this very likely can fail.
#'   Therefore it is always better to manually convert character variables to factors during pre-processing.
#' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison
#'   is well defined.
#'
#' @seealso Relevant constructor function [create_afun_compare()], and [s_summary()] which is used internally
#'   to compute a summary within `s_compare()`.
#'
#' @name compare_variables
#' @include summarize_variables.R
NULL

#' @describeIn compare_variables S3 generic function to produce a comparison summary.
#'
#' @return
#' * `s_compare()` returns output of [s_summary()] and comparisons versus the reference group in the form of p-values.
#'
#' @export
s_compare <- function(x,
                      .ref_group,
                      .in_ref_col,
                      ...) {
  UseMethod("s_compare", x)
}

#' @describeIn compare_variables Method for `numeric` class. This uses the standard t-test
#'   to calculate the p-value.
#'
#' @method s_compare numeric
#'
#' @examples
#' # `s_compare.numeric`
#'
#' ## Usual case where both this and the reference group vector have more than 1 value.
#' s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE)
#'
#' ## If one group has not more than 1 value, then p-value is not calculated.
#' s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE)
#'
#' ## Empty numeric does not fail, it returns NA-filled items and no p-value.
#' s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE)
#'
#' @export
s_compare.numeric <- function(x,
                              .ref_group,
                              .in_ref_col,
                              ...) {
  checkmate::assert_numeric(x)
  checkmate::assert_numeric(.ref_group)
  checkmate::assert_flag(.in_ref_col)

  y <- s_summary.numeric(x = x, ...)

  y$pval <- if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) {
    stats::t.test(x, .ref_group)$p.value
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Method for `factor` class. This uses the chi-squared test
#'   to calculate the p-value.
#'
#' @param denom (`string`)\cr choice of denominator for factor proportions,
#'   can only be `n` (number of values in this row and column intersection).
#'
#' @method s_compare factor
#'
#' @examples
#' # `s_compare.factor`
#'
#' ## Basic usage:
#' x <- factor(c("a", "a", "b", "c", "a"))
#' y <- factor(c("a", "b", "c"))
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE)
#'
#' ## Management of NA values.
#' x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA)))
#' y <- explicit_na(factor(c("a", "b", "c", NA)))
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
#'
#' @export
s_compare.factor <- function(x,
                             .ref_group,
                             .in_ref_col,
                             denom = "n",
                             na.rm = TRUE, # nolint
                             ...) {
  checkmate::assert_flag(.in_ref_col)
  assert_valid_factor(x)
  assert_valid_factor(.ref_group)
  denom <- match.arg(denom)

  y <- s_summary.factor(
    x = x,
    denom = denom,
    na.rm = na.rm,
    ...
  )

  if (na.rm) {
    x <- x[!is.na(x)] %>% fct_discard("<Missing>")
    .ref_group <- .ref_group[!is.na(.ref_group)] %>% fct_discard("<Missing>")
  } else {
    x <- x %>% explicit_na(label = "NA")
    .ref_group <- .ref_group %>% explicit_na(label = "NA")
  }

  checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2)

  y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
    tab <- rbind(table(x), table(.ref_group))
    res <- suppressWarnings(stats::chisq.test(tab))
    res$p.value
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Method for `character` class. This makes an automatic
#'   conversion to `factor` (with a warning) and then forwards to the method for factors.
#'
#' @param verbose (`logical`)\cr Whether warnings and messages should be printed. Mainly used
#'   to print out information about factor casting. Defaults to `TRUE`.
#'
#' @method s_compare character
#'
#' @examples
#' # `s_compare.character`
#'
#' ## Basic usage:
#' x <- c("a", "a", "b", "c", "a")
#' y <- c("a", "b", "c")
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
#'
#' ## Note that missing values handling can make a large difference:
#' x <- c("a", "a", "b", "c", "a", NA)
#' y <- c("a", "b", "c", rep(NA, 20))
#' s_compare(x,
#'   .ref_group = y, .in_ref_col = FALSE,
#'   .var = "x", verbose = FALSE
#' )
#' s_compare(x,
#'   .ref_group = y, .in_ref_col = FALSE, .var = "x",
#'   na.rm = FALSE, verbose = FALSE
#' )
#'
#' @export
s_compare.character <- function(x,
                                .ref_group,
                                .in_ref_col,
                                denom = "n",
                                na.rm = TRUE, # nolint
                                .var,
                                verbose = TRUE,
                                ...) {
  x <- as_factor_keep_attributes(x, x_name = .var, verbose = verbose)
  .ref_group <- as_factor_keep_attributes(.ref_group, x_name = .var, verbose = verbose)
  s_compare(
    x = x,
    .ref_group = .ref_group,
    .in_ref_col = .in_ref_col,
    denom = denom,
    na.rm = na.rm,
    ...
  )
}

#' @describeIn compare_variables Method for `logical` class. A chi-squared test
#'   is used. If missing values are not removed, then they are counted as `FALSE`.
#'
#' @method s_compare logical
#'
#' @examples
#' # `s_compare.logical`
#'
#' ## Basic usage:
#' x <- c(TRUE, FALSE, TRUE, TRUE)
#' y <- c(FALSE, FALSE, TRUE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' ## Management of NA values.
#' x <- c(NA, TRUE, FALSE)
#' y <- c(NA, NA, NA, NA, FALSE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
#'
#' @export
s_compare.logical <- function(x,
                              .ref_group,
                              .in_ref_col,
                              na.rm = TRUE, # nolint
                              denom = "n",
                              ...) {
  denom <- match.arg(denom)

  y <- s_summary.logical(
    x = x,
    na.rm = na.rm,
    denom = denom,
    ...
  )

  if (na.rm) {
    x <- stats::na.omit(x)
    .ref_group <- stats::na.omit(.ref_group)
  } else {
    x[is.na(x)] <- FALSE
    .ref_group[is.na(.ref_group)] <- FALSE
  }

  y$pval <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
    x <- factor(x, levels = c(TRUE, FALSE))
    .ref_group <- factor(.ref_group, levels = c(TRUE, FALSE))
    tbl <- rbind(table(x), table(.ref_group))
    suppressWarnings(prop_chisq(tbl))
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Formatted analysis function which is used as `afun`
#'   in `compare_vars()`.
#'
#' @return
#' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_compare <- function(x,
                      .ref_group,
                      .in_ref_col,
                      ...,
                      .var) {
  UseMethod("a_compare", x)
}

#' @describeIn compare_variables Formatted analysis function method for `numeric` class.
#'
#' @examples
#' # `a_compare.numeric`
#' a_compare(
#'   rnorm(10, 5, 1),
#'   .ref_group = rnorm(20, -5, 1),
#'   .in_ref_col = FALSE,
#'   .var = "bla"
#' )
#'
#' @export
a_compare.numeric <- make_afun(
  s_compare.numeric,
  .formats = c(
    .a_summary_numeric_formats,
    pval = "x.xxxx | (<0.0001)"
  ),
  .labels = c(
    .a_summary_numeric_labels,
    pval = "p-value (t-test)"
  ),
  .null_ref_cells = FALSE
)

.a_compare_counts_formats <- c(
  .a_summary_counts_formats,
  pval = "x.xxxx | (<0.0001)"
)

.a_compare_counts_labels <- c(
  pval = "p-value (chi-squared test)"
)

#' @describeIn compare_variables Formatted analysis function method for `factor` class.
#'
#' @examples
#' # `a_compare.factor`
#' # We need to ungroup `count` and `count_fraction` first so that the `rtables` formatting
#' # functions can be applied correctly.
#' afun <- make_afun(
#'   getS3method("a_compare", "factor"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' x <- factor(c("a", "a", "b", "c", "a"))
#' y <- factor(c("a", "a", "b", "c"))
#' afun(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' @export
a_compare.factor <- make_afun(
  s_compare.factor,
  .formats = .a_compare_counts_formats,
  .labels = .a_compare_counts_labels,
  .null_ref_cells = FALSE
)

#' @describeIn compare_variables Formatted analysis function method for `character` class.
#'
#' @examples
#' # `a_compare.character`
#' afun <- make_afun(
#'   getS3method("a_compare", "character"),
#'   .ungroup_stats = c("count", "count_fraction")
#' )
#' x <- c("A", "B", "A", "C")
#' y <- c("B", "A", "C")
#' afun(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
#'
#' @export
a_compare.character <- make_afun(
  s_compare.character,
  .formats = .a_compare_counts_formats,
  .labels = .a_compare_counts_labels,
  .null_ref_cells = FALSE
)

#' @describeIn compare_variables Formatted analysis function method for `logical` class.
#'
#' @examples
#' # `a_compare.logical`
#' afun <- make_afun(
#'   getS3method("a_compare", "logical")
#' )
#' x <- c(TRUE, FALSE, FALSE, TRUE, TRUE)
#' y <- c(TRUE, FALSE)
#' afun(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' @export
a_compare.logical <- make_afun(
  s_compare.logical,
  .formats = .a_compare_counts_formats,
  .labels = .a_compare_counts_labels,
  .null_ref_cells = FALSE
)

#' Constructor Function for [compare_vars()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor function which creates a combined formatted analysis function.
#'
#' @inheritParams argument_convention
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return Combined formatted analysis function for use in [compare_vars()].
#'
#' @note Since [a_compare()] is generic and we want customization of the formatting arguments
#'   via [rtables::make_afun()], we need to create another temporary generic function, with
#'   corresponding customized methods. Then in order for the methods to be found,
#'   we need to wrap them in a combined `afun`. Since this is required by two layout creating
#'   functions (and possibly others in the future), we provide a constructor that does this:
#'   [create_afun_compare()].
#'
#' @seealso [compare_vars()]
#'
#' @examples
#' # `create_afun_compare()` to create combined `afun`
#'
#' afun <- create_afun_compare(
#'   .stats = c("n", "count_fraction", "mean_sd", "pval"),
#'   .indent_mods = c(pval = 1L)
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM A") %>%
#'   analyze(
#'     "AGE",
#'     afun = afun,
#'     show_labels = "visible"
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM A") %>%
#'   analyze(
#'     "SEX",
#'     afun = afun,
#'     show_labels = "visible"
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' @export
create_afun_compare <- function(.stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  function(x,
           .ref_group,
           .in_ref_col,
           ...,
           .var) {
    afun <- function(x, ...) {
      UseMethod("afun", x)
    }

    numeric_stats <- afun_selected_stats(
      .stats,
      all_stats = c(names(.a_summary_numeric_formats), "pval")
    )
    afun.numeric <- make_afun( # nolint
      a_compare.numeric,
      .stats = numeric_stats,
      .formats = extract_by_name(.formats, numeric_stats),
      .labels = extract_by_name(.labels, numeric_stats),
      .indent_mods = extract_by_name(.indent_mods, numeric_stats),
      .null_ref_cells = FALSE
    )

    factor_stats <- afun_selected_stats(
      .stats,
      all_stats = names(.a_compare_counts_formats)
    )
    ungroup_stats <- afun_selected_stats(.stats, c("count", "count_fraction"))
    afun.factor <- make_afun( # nolint
      a_compare.factor,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats,
      .null_ref_cells = FALSE
    )

    afun.character <- make_afun( # nolint
      a_compare.character,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .ungroup_stats = ungroup_stats,
      .null_ref_cells = FALSE
    )

    afun.logical <- make_afun( # nolint
      a_compare.logical,
      .stats = factor_stats,
      .formats = extract_by_name(.formats, factor_stats),
      .labels = extract_by_name(.labels, factor_stats),
      .indent_mods = extract_by_name(.indent_mods, factor_stats),
      .null_ref_cells = FALSE
    )

    afun(
      x = x,
      .ref_group = .ref_group,
      .in_ref_col = .in_ref_col,
      ...,
      .var = .var
    )
  }
}

#' @describeIn compare_variables Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_compare()`.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `compare_vars()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_compare()` to the table layout.
#'
#' @examples
#' # `compare_vars()` in `rtables` pipelines
#'
#' ## Default output within a `rtables` pipeline.
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM B") %>%
#'   compare_vars(c("AGE", "SEX"))
#' build_table(lyt, tern_ex_adsl)
#'
#' ## Select and format statistics output.
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM C") %>%
#'   compare_vars(
#'     vars = "AGE",
#'     .stats = c("mean_sd", "pval"),
#'     .formats = c(mean_sd = "xx.x, xx.x"),
#'     .labels = c(mean_sd = "Mean, SD")
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' @export
compare_vars <- function(lyt,
                         vars,
                         var_labels = vars,
                         nested = TRUE,
                         ...,
                         na_level = NA_character_,
                         show_labels = "default",
                         table_names = vars,
                         .stats = c("n", "mean_sd", "count_fraction", "pval"),
                         .formats = NULL,
                         .labels = NULL,
                         .indent_mods = NULL) {
  afun <- create_afun_compare(.stats, .formats, .labels, .indent_mods)

  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = afun,
    nested = nested,
    extra_args = list(...),
    na_str = na_level,
    inclNAs = TRUE,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Stack Multiple Grobs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Stack grobs as a new grob with 1 column and multiple rows layout.
#'
#' @param ... grobs.
#' @param grobs list of grobs.
#' @param padding unit of length 1, space between each grob.
#' @param vp a [viewport()] object (or `NULL`).
#' @param name a character identifier for the grob.
#' @param gp A [gpar()] object.
#'
#' @return A `grob`.
#'
#' @examples
#' library(grid)
#'
#' g1 <- circleGrob(gp = gpar(col = "blue"))
#' g2 <- circleGrob(gp = gpar(col = "red"))
#' g3 <- textGrob("TEST TEXT")
#' grid.newpage()
#' grid.draw(stack_grobs(g1, g2, g3))
#'
#' showViewport()
#'
#' grid.newpage()
#' pushViewport(viewport(layout = grid.layout(1, 2)))
#' vp1 <- viewport(layout.pos.row = 1, layout.pos.col = 2)
#' grid.draw(stack_grobs(g1, g2, g3, vp = vp1, name = "test"))
#'
#' showViewport()
#' grid.ls(grobs = TRUE, viewports = TRUE, print = FALSE)
#'
#' @export
stack_grobs <- function(...,
                        grobs = list(...),
                        padding = grid::unit(2, "line"),
                        vp = NULL,
                        gp = NULL,
                        name = NULL) {
  checkmate::assert_true(
    all(vapply(grobs, grid::is.grob, logical(1)))
  )

  if (length(grobs) == 1) {
    return(grobs[[1]])
  }

  n_layout <- 2 * length(grobs) - 1
  hts <- lapply(
    seq(1, n_layout),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding
      }
    }
  )
  hts <- do.call(grid::unit.c, hts)

  main_vp <- grid::viewport(
    layout = grid::grid.layout(nrow = n_layout, ncol = 1, heights = hts)
  )

  nested_grobs <- Map(function(g, i) {
    grid::gTree(
      children = grid::gList(g),
      vp = grid::viewport(layout.pos.row = i, layout.pos.col = 1)
    )
  }, grobs, seq_along(grobs) * 2 - 1)

  grobs_mainvp <- grid::gTree(
    children = do.call(grid::gList, nested_grobs),
    vp = main_vp
  )

  grid::gTree(
    children = grid::gList(grobs_mainvp),
    vp = vp,
    gp = gp,
    name = name
  )
}

#' Arrange Multiple Grobs
#'
#' Arrange grobs as a new grob with \verb{n*m (rows*cols)} layout.
#'
#' @inheritParams stack_grobs
#' @param ncol number of columns in layout.
#' @param nrow number of rows in layout.
#' @param padding_ht unit of length 1, vertical space between each grob.
#' @param padding_wt unit of length 1, horizontal space between each grob.
#'
#' @return A `grob`.
#' @examples
#' library(grid)
#'
#' \donttest{
#' num <- lapply(1:9, textGrob)
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(grobs = num, ncol = 2))
#'
#' showViewport()
#'
#' g1 <- circleGrob(gp = gpar(col = "blue"))
#' g2 <- circleGrob(gp = gpar(col = "red"))
#' g3 <- textGrob("TEST TEXT")
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(g1, g2, g3, nrow = 2))
#'
#' showViewport()
#'
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 3))
#'
#' grid::grid.newpage()
#' grid::pushViewport(grid::viewport(layout = grid::grid.layout(1, 2)))
#' vp1 <- grid::viewport(layout.pos.row = 1, layout.pos.col = 2)
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 2, vp = vp1))
#'
#' showViewport()
#' }
#' @export
arrange_grobs <- function(...,
                          grobs = list(...),
                          ncol = NULL, nrow = NULL,
                          padding_ht = grid::unit(2, "line"),
                          padding_wt = grid::unit(2, "line"),
                          vp = NULL,
                          gp = NULL,
                          name = NULL) {
  checkmate::assert_true(
    all(vapply(grobs, grid::is.grob, logical(1)))
  )

  if (length(grobs) == 1) {
    return(grobs[[1]])
  }

  if (is.null(ncol) && is.null(nrow)) {
    ncol <- 1
    nrow <- ceiling(length(grobs) / ncol)
  } else if (!is.null(ncol) && is.null(nrow)) {
    nrow <- ceiling(length(grobs) / ncol)
  } else if (is.null(ncol) && !is.null(nrow)) {
    ncol <- ceiling(length(grobs) / nrow)
  }

  if (ncol * nrow < length(grobs)) {
    stop("specififed ncol and nrow are not enough for arranging the grobs ")
  }

  if (ncol == 1) {
    return(stack_grobs(grobs = grobs, padding = padding_ht, vp = vp, gp = gp, name = name))
  }

  n_col <- 2 * ncol - 1
  n_row <- 2 * nrow - 1
  hts <- lapply(
    seq(1, n_row),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding_ht
      }
    }
  )
  hts <- do.call(grid::unit.c, hts)

  wts <- lapply(
    seq(1, n_col),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding_wt
      }
    }
  )
  wts <- do.call(grid::unit.c, wts)

  main_vp <- grid::viewport(
    layout = grid::grid.layout(nrow = n_row, ncol = n_col, widths = wts, heights = hts)
  )

  nested_grobs <- list()
  k <- 0
  for (i in seq(nrow) * 2 - 1) {
    for (j in seq(ncol) * 2 - 1) {
      k <- k + 1
      if (k <= length(grobs)) {
        nested_grobs <- c(
          nested_grobs,
          list(grid::gTree(
            children = grid::gList(grobs[[k]]),
            vp = grid::viewport(layout.pos.row = i, layout.pos.col = j)
          ))
        )
      }
    }
  }
  grobs_mainvp <- grid::gTree(
    children = do.call(grid::gList, nested_grobs),
    vp = main_vp
  )

  grid::gTree(
    children = grid::gList(grobs_mainvp),
    vp = vp,
    gp = gp,
    name = name
  )
}

#' Draw `grob`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Draw grob on device page.
#'
#' @param grob grid object
#' @param newpage draw on a new page
#' @param vp a [viewport()] object (or `NULL`).
#'
#' @return A `grob`.
#'
#' @examples
#' library(dplyr)
#' library(grid)
#'
#' \donttest{
#' rect <- rectGrob(width = grid::unit(0.5, "npc"), height = grid::unit(0.5, "npc"))
#' rect %>% draw_grob(vp = grid::viewport(angle = 45))
#'
#' num <- lapply(1:10, textGrob)
#' num %>%
#'   arrange_grobs(grobs = .) %>%
#'   draw_grob()
#' showViewport()
#' }
#'
#' @export
draw_grob <- function(grob, newpage = TRUE, vp = NULL) {
  if (newpage) {
    grid::grid.newpage()
  }
  if (!is.null(vp)) {
    grid::pushViewport(vp)
  }
  grid::grid.draw(grob)
}

tern_grob <- function(x) {
  class(x) <- unique(c("ternGrob", class(x)))
  x
}

print.ternGrob <- function(x, ...) {
  grid::grid.newpage()
  grid::grid.draw(x)
}

#' Controls for Cox Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters for Cox regression fit. Used internally.
#'
#' @inheritParams argument_convention
#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied
#'   treatment and candidate covariate. Note that for univariate models without treatment arm, and
#'   multivariate models, no interaction can be used so that this needs to be `FALSE`.
#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`,
#'   see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R.
#'
#' @return A `list` of items with names corresponding to the arguments.
#'
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()].
#'
#' @examples
#' control_coxreg()
#'
#' @export
control_coxreg <- function(pval_method = c("wald", "likelihood"),
                           ties = c("exact", "efron", "breslow"),
                           conf_level = 0.95,
                           interaction = FALSE) {
  pval_method <- match.arg(pval_method)
  ties <- match.arg(ties)
  checkmate::assert_flag(interaction)
  assert_proportion_value(conf_level)
  list(
    pval_method = pval_method,
    ties = ties,
    conf_level = conf_level,
    interaction = interaction
  )
}

#' Custom Tidy Methods for Cox Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param x (`list`)\cr Result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models)
#'   or [fit_coxreg_multivar()] (for multivariate models).
#'
#' @return [tidy()] returns:
#' * For `summary.coxph` objects,  a `data.frame` with columns: `Pr(>|z|)`, `exp(coef)`, `exp(-coef)`, `lower .95`,
#'   `upper .95`, `level`, and `n`.
#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`,
#'   `lcl`, `ucl`, `pval`, and `ci`.
#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`,
#'   `level`, and `ci`.
#'
#' @seealso [cox_regression]
#'
#' @name tidy_coxreg
NULL

#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results.
#'
#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results.
#'
#' @method tidy summary.coxph
#'
#' @examples
#' library(survival)
#' library(broom)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' formula <- "survival::Surv(time, status) ~ armcd + covar1"
#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder))
#' tidy(msum)
#'
#' @export
tidy.summary.coxph <- function(x, # nolint
                               ...) {
  checkmate::assert_class(x, "summary.coxph")
  pval <- x$coefficients
  confint <- x$conf.int
  levels <- rownames(pval)

  pval <- tibble::as_tibble(pval)
  confint <- tibble::as_tibble(confint)

  ret <- cbind(pval[, grepl("Pr", names(pval))], confint)
  ret$level <- levels
  ret$n <- x[["n"]]
  ret
}

#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression.
#'
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()].
#'
#' @method tidy coxreg.univar
#'
#' @examples
#' ## Cox regression: arm + 1 covariate.
#' mod1 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = "covar1"
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
#' mod2 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91, interaction = TRUE)
#' )
#'
#' tidy(mod1)
#' tidy(mod2)
#'
#' @export
tidy.coxreg.univar <- function(x, # nolint
                               ...) {
  checkmate::assert_class(x, "coxreg.univar")
  mod <- x$mod
  vars <- c(x$vars$arm, x$vars$covariates)
  has_arm <- "arm" %in% names(x$vars)

  result <- if (!has_arm) {
    Map(
      mod = mod, vars = vars,
      f = function(mod, vars) {
        h_coxreg_multivar_extract(
          var = vars,
          data = x$data,
          mod = mod,
          control = x$control
        )
      }
    )
  } else if (x$control$interaction) {
    Map(
      mod = mod, covar = vars,
      f = function(mod, covar) {
        h_coxreg_extract_interaction(
          effect = x$vars$arm, covar = covar, mod = mod, data = x$data,
          at = x$at, control = x$control
        )
      }
    )
  } else {
    Map(
      mod = mod, vars = vars,
      f = function(mod, vars) {
        h_coxreg_univar_extract(
          effect = x$vars$arm, covar = vars, data = x$data, mod = mod,
          control = x$control
        )
      }
    )
  }
  result <- do.call(rbind, result)

  result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
  result$n <- lapply(result$n, empty_vector_if_na)
  result$ci <- lapply(result$ci, empty_vector_if_na)
  result$hr <- lapply(result$hr, empty_vector_if_na)
  if (x$control$interaction) {
    result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na)
    # Remove interaction p-values due to change in specifications.
    result$pval[result$effect != "Treatment:"] <- NA
  }
  result$pval <- lapply(result$pval, empty_vector_if_na)
  attr(result, "conf_level") <- x$control$conf_level
  result
}

#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression.
#'
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()].
#'
#' @method tidy coxreg.multivar
#'
#' @examples
#' multivar_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#' broom::tidy(multivar_model)
#'
#' @export
tidy.coxreg.multivar <- function(x, # nolint
                                 ...) {
  checkmate::assert_class(x, "coxreg.multivar")
  vars <- c(x$vars$arm, x$vars$covariates)

  # Convert the model summaries to data.
  result <- Map(
    vars = vars,
    f = function(vars) {
      h_coxreg_multivar_extract(
        var = vars, data = x$data,
        mod = x$mod, control = x$control
      )
    }
  )
  result <- do.call(rbind, result)

  result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
  result$ci <- lapply(result$ci, empty_vector_if_na)
  result$hr <- lapply(result$hr, empty_vector_if_na)
  result$pval <- lapply(result$pval, empty_vector_if_na)
  result <- result[, names(result) != "n"]
  attr(result, "conf_level") <- x$control$conf_level

  result
}

#' Fits for Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fitting functions for univariate and multivariate Cox regression models.
#'
#' @param variables (`list`)\cr a named list corresponds to the names of variables found in `data`, passed as a named
#'   list and corresponding to `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from
#'   `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect
#'   estimates will be tabulated later.
#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models.
#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify
#'   the value of the covariate at which the effect should be estimated.
#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()].
#'
#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression].
#'
#' @examples
#' library(survival)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' # Testing dataset [survival::bladder].
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' plot(
#'   survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name fit_coxreg
NULL

#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs.
#'
#' @return
#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list`
#'   with 5 elements:
#'   * `mod`: Cox regression models fitted by [survival::coxph()].
#'   * `data`: The original data frame input.
#'   * `control`: The original control input.
#'   * `vars`: The variables used in the model.
#'   * `at`: Value of the covariate at which the effect should be estimated.
#'
#' @note When using `fit_coxreg_univar` there should be two study arms.
#'
#' @examples
#' # fit_coxreg_univar
#'
#' ## Cox regression: arm + 1 covariate.
#' mod1 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = "covar1"
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
#' mod2 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91, interaction = TRUE)
#' )
#'
#' ## Cox regression: arm + 1 covariate, stratified analysis.
#' mod3 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", strata = "covar2",
#'     covariates = c("covar1")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: no arm, only covariates.
#' mod4 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' @export
fit_coxreg_univar <- function(variables,
                              data,
                              at = list(),
                              control = control_coxreg()) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_df_with_variables(data, variables)
  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$strata)) {
    checkmate::assert_disjunct(control$pval_method, "likelihood")
  }
  if (has_arm) {
    assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  }
  vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE)
  for (i in vars) {
    if (is.factor(data[[i]])) {
      attr(data[[i]], "levels") <- levels(droplevels(data[[i]]))
    }
  }
  forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction)
  mod <- lapply(
    forms, function(x) {
      survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties)
    }
  )
  structure(
    list(
      mod = mod,
      data = data,
      control = control,
      vars = variables,
      at = at
    ),
    class = "coxreg.univar"
  )
}

#' @describeIn fit_coxreg Fit a multivariate Cox regression model.
#'
#' @return
#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list
#'   with 4 elements:
#'   * `mod`: Cox regression model fitted by [survival::coxph()].
#'   * `data`: The original data frame input.
#'   * `control`: The original control input.
#'   * `vars`: The variables used in the model.
#'
#' @examples
#' # fit_coxreg_multivar
#'
#' ## Cox regression: multivariate Cox regression.
#' multivar_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' # Example without treatment arm.
#' multivar_covs_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' @export
fit_coxreg_multivar <- function(variables,
                                data,
                                control = control_coxreg()) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  if (!is.null(variables$covariates)) {
    checkmate::assert_character(variables$covariates)
  }

  checkmate::assert_false(control$interaction)
  assert_df_with_variables(data, variables)
  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$strata)) {
    checkmate::assert_disjunct(control$pval_method, "likelihood")
  }

  form <- h_coxreg_multivar_formula(variables)
  mod <- survival::coxph(
    formula = stats::as.formula(form),
    data = data,
    ties = control$ties
  )
  structure(
    list(
      mod = mod,
      data = data,
      control = control,
      vars = variables
    ),
    class = "coxreg.multivar"
  )
}

#' Muffled `car::Anova`
#'
#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when
#' present, this function deliberately muffles this message.
#'
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
#'
#' @return Returns the output of [car::Anova()], with convergence message muffled.
#'
#' @keywords internal
muffled_car_anova <- function(mod, test_statistic) {
  tryCatch(
    withCallingHandlers(
      expr = {
        car::Anova(
          mod,
          test.statistic = test_statistic,
          type = "III"
        )
      },
      message = function(m) invokeRestart("muffleMessage"),
      error = function(e) {
        stop(paste(
          "the model seems to have convergence problems, please try to change",
          "the configuration of covariates or strata variables, e.g.",
          "- original error:", e
        ))
      }
    )
  )
}

#' Horizontal Waterfall Plot
#'
#' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param height (`numeric``)\cr vector containing values to be plotted as the waterfall bars.
#' @param id (`character`)\cr vector containing IDs to use as the x-axis label for the waterfall bars.
#' @param col (`character`)\cr colors.
#' @param col_var (`factor`, `character` or `NULL`)\cr categorical variable for bar coloring. `NULL` by default.
#' @param xlab (`character`)\cr x label. Default is `"ID"`.
#' @param ylab (`character`)\cr y label. Default is `"Value"`.
#' @param title (`character`)\cr text to be displayed as plot title.
#' @param col_legend_title (`character`)\cr text to be displayed as legend title.
#'
#' @return A `ggplot` waterfall plot.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' g_waterfall(height = c(3, 5, -1), id = letters[1:3])
#'
#' g_waterfall(
#'   height = c(3, 5, -1),
#'   id = letters[1:3],
#'   col_var = letters[1:3]
#' )
#'
#' adsl_f <- tern_ex_adsl %>%
#'   select(USUBJID, STUDYID, ARM, ARMCD, SEX)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "OVRINV") %>%
#'   mutate(pchg = rnorm(n(), 10, 50))
#'
#' adrs_f <- head(adrs_f, 30)
#' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ]
#' head(adrs_f)
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = adrs_f$USUBJID,
#'   col_var = adrs_f$AVALC
#' )
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
#'   col_var = adrs_f$SEX
#' )
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
#'   xlab = "ID",
#'   ylab = "Percentage Change",
#'   title = "Waterfall plot"
#' )
#'
#' @export
g_waterfall <- function(height,
                        id,
                        col_var = NULL,
                        col = getOption("ggplot2.discrete.colour"),
                        xlab = NULL,
                        ylab = NULL,
                        col_legend_title = NULL,
                        title = NULL) {
  if (!is.null(col_var)) {
    check_same_n(height = height, id = id, col_var = col_var)
  } else {
    check_same_n(height = height, id = id)
  }

  checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  xlabel <- deparse(substitute(id))
  ylabel <- deparse(substitute(height))

  col_label <- if (!missing(col_var)) {
    deparse(substitute(col_var))
  }

  xlab <- if (is.null(xlab)) xlabel else xlab
  ylab <- if (is.null(ylab)) ylabel else ylab
  col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title

  plot_data <- data.frame(
    height = height,
    id = as.character(id),
    col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)),
    stringsAsFactors = FALSE
  )

  plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ]

  p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) +
    ggplot2::geom_col() +
    ggplot2::geom_text(
      label = format(plot_data_ord$height, digits = 2),
      vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5)
    ) +
    ggplot2::xlab(xlab) +
    ggplot2::ylab(ylab) +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5))

  if (!is.null(col_var)) {
    p <- p +
      ggplot2::aes(fill = col_var) +
      ggplot2::labs(fill = col_legend_title) +
      ggplot2::theme(
        legend.position = "bottom",
        legend.background = ggplot2::element_blank(),
        legend.title = ggplot2::element_text(face = "bold"),
        legend.box.background = ggplot2::element_rect(colour = "black")
      )
  }

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_fill_manual(values = col)
  }

  if (!is.null(title)) {
    p <- p +
      ggplot2::labs(title = title) +
      ggplot2::theme(plot.title = ggplot2::element_text(face = "bold"))
  }

  p
}

#' Count Patients with Marked Laboratory Abnormalities
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates whether single, replicated or last marked laboratory
#' abnormality was observed (`factor`). Additional analysis variables are `id` (`character` or `factor`)
#' and `direction` (`factor`) indicating the direction of the abnormality. Denominator is number of
#' patients with at least one valid measurement during the analysis.
#'   * For `Single, not last` and `Last or replicated`: Numerator is number of patients
#'     with `Single, not last` and `Last or replicated` levels, respectively.
#'   * For `Any`: Numerator is the number of patients with either single or
#'     replicated marked abnormalities.
#'
#' @inheritParams argument_convention
#' @param category (`list`)\cr with different marked category names for single
#'   and last or replicated.
#'
#' @note `Single, not last` and `Last or replicated` levels are mutually exclusive. If a patient has
#'   abnormalities that meet both the `Single, not last` and `Last or replicated` criteria, then the
#'   patient will be counted only under the `Last or replicated` category.
#'
#' @name abnormal_by_marked
NULL

#' @describeIn abnormal_by_marked Statistics function for patients with marked lab abnormalities.
#'
#' @return
#' * `s_count_abnormal_by_marked()` returns statistic `count_fraction` with `Single, not last`,
#'   `Last or replicated`, and `Any` results.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(rep(1, 5), rep(2, 5), rep(1, 5), rep(2, 5))),
#'   ARMCD = factor(c(rep("ARM A", 5), rep("ARM B", 5), rep("ARM A", 5), rep("ARM B", 5))),
#'   ANRIND = factor(c(
#'     "NORMAL", "HIGH", "HIGH", "HIGH HIGH", "HIGH",
#'     "HIGH", "HIGH", "HIGH HIGH", "NORMAL", "HIGH HIGH", "NORMAL", "LOW", "LOW", "LOW LOW", "LOW",
#'     "LOW", "LOW", "LOW LOW", "NORMAL", "LOW LOW"
#'   )),
#'   ONTRTFL = rep(c("", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), 2),
#'   PARAMCD = factor(c(rep("CRP", 10), rep("ALT", 10))),
#'   AVALCAT1 = factor(rep(c("", "", "", "SINGLE", "REPLICATED", "", "", "LAST", "", "SINGLE"), 2)),
#'   stringsAsFactors = FALSE
#' )
#'
#' df <- df %>%
#'   mutate(abn_dir = factor(
#'     case_when(
#'       ANRIND == "LOW LOW" ~ "Low",
#'       ANRIND == "HIGH HIGH" ~ "High",
#'       TRUE ~ ""
#'     ),
#'     levels = c("Low", "High")
#'   ))
#'
#' # Select only post-baseline records.
#' df <- df %>% filter(ONTRTFL == "Y")
#' df_crp <- df %>%
#'   filter(PARAMCD == "CRP") %>%
#'   droplevels()
#' full_parent_df <- list(df_crp, "not_needed")
#' cur_col_subset <- list(rep(TRUE, nrow(df_crp)), "not_needed")
#' spl_context <- data.frame(
#'   split = c("PARAMCD", "GRADE_DIR"),
#'   full_parent_df = I(full_parent_df),
#'   cur_col_subset = I(cur_col_subset)
#' )
#'
#' @keywords internal
s_count_abnormal_by_marked <- function(df,
                                       .var = "AVALCAT1",
                                       .spl_context,
                                       category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")),
                                       variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir")) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_list(category)
  checkmate::assert_subset(names(category), c("single", "last_replicated"))
  checkmate::assert_subset(names(variables), c("id", "param", "direction"))
  checkmate::assert_vector(unique(df[[variables$direction]]), max.len = 1)

  assert_df_with_variables(df, c(aval = .var, variables))
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))


  first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
  # Patients in the denominator have at least one post-baseline visit.
  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
  # Some subjects may have a record for high and low directions but
  # should be counted only once.
  denom <- length(unique(subj_cur_col))

  if (denom != 0) {
    subjects_last_replicated <- unique(
      df[df[[.var]] %in% category[["last_replicated"]], variables$id, drop = TRUE]
    )
    subjects_single <- unique(
      df[df[[.var]] %in% category[["single"]], variables$id, drop = TRUE]
    )
    # Subjects who have both single and last/replicated abnormalities are counted in only the last/replicated group.
    subjects_single <- setdiff(subjects_single, subjects_last_replicated)
    n_single <- length(subjects_single)
    n_last_replicated <- length(subjects_last_replicated)
    n_any <- n_single + n_last_replicated
    result <- list(count_fraction = list(
      "Single, not last" = c(n_single, n_single / denom),
      "Last or replicated" = c(n_last_replicated, n_last_replicated / denom),
      "Any Abnormality" = c(n_any, n_any / denom)
    ))
  } else {
    result <- list(count_fraction = list(
      "Single, not last" = c(0, 0),
      "Last or replicated" = c(0, 0),
      "Any Abnormality" = c(0, 0)
    ))
  }

  result
}

#' @describeIn abnormal_by_marked Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_marked()`.
#'
#' @return
#' * `a_count_abnormal_by_marked()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#'
#' @keywords internal
a_count_abnormal_by_marked <- make_afun(
  s_count_abnormal_by_marked,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn abnormal_by_marked Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_marked()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_marked()` to the table layout.
#'
#' @examples
#' map <- unique(
#'   df[df$abn_dir %in% c("Low", "High") & df$AVALCAT1 != "", c("PARAMCD", "abn_dir")]
#' ) %>%
#'   lapply(as.character) %>%
#'   as.data.frame() %>%
#'   arrange(PARAMCD, abn_dir)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAMCD") %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = "unique_count"
#'   ) %>%
#'   split_rows_by(
#'     "abn_dir",
#'     split_fun = trim_levels_to_map(map)
#'   ) %>%
#'   count_abnormal_by_marked(
#'     var = "AVALCAT1",
#'     variables = list(
#'       id = "USUBJID",
#'       param = "PARAMCD",
#'       direction = "abn_dir"
#'     )
#'   ) %>%
#'   build_table(df = df)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAMCD") %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = "unique_count"
#'   ) %>%
#'   split_rows_by(
#'     "abn_dir",
#'     split_fun = trim_levels_in_group("abn_dir")
#'   ) %>%
#'   count_abnormal_by_marked(
#'     var = "AVALCAT1",
#'     variables = list(
#'       id = "USUBJID",
#'       param = "PARAMCD",
#'       direction = "abn_dir"
#'     )
#'   ) %>%
#'   build_table(df = df)
#'
#' @export
count_abnormal_by_marked <- function(lyt,
                                     var,
                                     ...,
                                     .stats = NULL,
                                     .formats = NULL,
                                     .labels = NULL,
                                     .indent_mods = NULL) {
  checkmate::assert_string(var)

  afun <- make_afun(
    a_count_abnormal_by_marked,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    show_labels = "hidden",
    extra_args = c(list(...))
  )
  lyt
}

#' Summary for analysis of covariance (`ANCOVA`).
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize results of `ANCOVA`. This can be used to analyze multiple endpoints and/or
#' multiple timepoints within the same response variable `.var`.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_ancova
NULL

#' Helper Function to Return Results of a Linear Model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
#'     summarized. Specifically, the first level of `arm` variable is taken as the reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as `"X1"`), and/or
#'     interaction terms indicated by `"X1 * X2"`.
#' @param interaction_item (`character`)\cr name of the variable that should have interactions
#'   with arm. if the interaction is not needed, the default option is `NULL`.
#'
#' @return The summary of a linear model.
#'
#' @examples
#' h_ancova(
#'   .var = "Sepal.Length",
#'   .df_row = iris,
#'   variables = list(arm = "Species", covariates = c("Petal.Length * Petal.Width", "Sepal.Width"))
#' )
#'
#' @export
h_ancova <- function(.var,
                     .df_row,
                     variables,
                     interaction_item = NULL) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_subset(names(variables), c("arm", "covariates"))
  assert_df_with_variables(.df_row, list(rsp = .var))

  arm <- variables$arm
  covariates <- variables$covariates
  if (!is.null(covariates) && length(covariates) > 0) {
    # Get all covariate variable names in the model.
    var_list <- get_covariates(covariates)
    assert_df_with_variables(.df_row, var_list)
  }

  covariates_part <- paste(covariates, collapse = " + ")
  if (covariates_part != "") {
    formula <- stats::as.formula(paste0(.var, " ~ ", covariates_part, " + ", arm))
  } else {
    formula <- stats::as.formula(paste0(.var, " ~ ", arm))
  }

  if (is.null(interaction_item)) {
    specs <- arm
  } else {
    specs <- c(arm, interaction_item)
  }

  lm_fit <- stats::lm(
    formula = formula,
    data = .df_row
  )
  emmeans_fit <- emmeans::emmeans(
    lm_fit,
    # Specify here the group variable over which EMM are desired.
    specs = specs,
    # Pass the data again so that the factor levels of the arm variable can be inferred.
    data = .df_row
  )

  emmeans_fit
}

#' @describeIn summarize_ancova Statistics function that produces a named list of results
#'   of the investigated linear model.
#'
#' @inheritParams h_ancova
#' @param interaction_y (`character`)\cr a selected item inside of the interaction_item column which will be used
#'   to select the specific `ANCOVA` results. if the interaction is not needed, the default option is `FALSE`.
#'
#' @return
#' * `s_ancova()` returns a named list of 5 statistics:
#'   * `n`: Count of complete sample size for the group.
#'   * `lsmean`: Estimated marginal means in the group.
#'   * `lsmean_diff`: Difference in estimated marginal means in comparison to the reference group.
#'     If working with the reference group, this will be empty.
#'   * `lsmean_diff_ci`: Confidence level for difference in estimated marginal means in comparison
#'     to the reference group.
#'   * `pval`: p-value (not adjusted for multiple comparisons).
#'
#' @examples
#' library(dplyr)
#'
#' df <- iris %>% filter(Species == "virginica")
#' .df_row <- iris
#' .var <- "Petal.Length"
#' variables <- list(arm = "Species", covariates = "Sepal.Length * Sepal.Width")
#' .ref_group <- iris %>% filter(Species == "setosa")
#' conf_level <- 0.95
#'
#' @keywords internal
s_ancova <- function(df,
                     .var,
                     .df_row,
                     variables,
                     .ref_group,
                     .in_ref_col,
                     conf_level,
                     interaction_y = FALSE,
                     interaction_item = NULL) {
  emmeans_fit <- h_ancova(.var = .var, variables = variables, .df_row = .df_row, interaction_item = interaction_item)

  sum_fit <- summary(
    emmeans_fit,
    level = conf_level
  )

  arm <- variables$arm

  sum_level <- as.character(unique(df[[arm]]))

  # Ensure that there is only one element in sum_level.
  checkmate::assert_scalar(sum_level)

  sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ]

  # Get the index of the ref arm
  if (interaction_y != FALSE) {
    y <- unlist(df[(df[[interaction_item]] == interaction_y), .var])
    # convert characters selected in interaction_y into the numeric order
    interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y)
    sum_fit_level <- sum_fit_level[interaction_y, ]
    # if interaction is called, reset the index
    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
    ref_key <- tail(ref_key, n = 1)
    ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key
  } else {
    y <- df[[.var]]
    # Get the index of the ref arm when interaction is not called
    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
    ref_key <- tail(ref_key, n = 1)
  }

  if (.in_ref_col) {
    list(
      n = length(y[!is.na(y)]),
      lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
      lsmean_diff = formatters::with_label(character(), "Difference in Adjusted Means"),
      lsmean_diff_ci = formatters::with_label(character(), f_conf_level(conf_level)),
      pval = formatters::with_label(character(), "p-value")
    )
  } else {
    # Estimate the differences between the marginal means.
    emmeans_contrasts <- emmeans::contrast(
      emmeans_fit,
      # Compare all arms versus the control arm.
      method = "trt.vs.ctrl",
      # Take the arm factor from .ref_group as the control arm.
      ref = ref_key,
      level = conf_level
    )
    sum_contrasts <- summary(
      emmeans_contrasts,
      # Derive confidence intervals, t-tests and p-values.
      infer = TRUE,
      # Do not adjust the p-values for multiplicity.
      adjust = "none"
    )

    sum_contrasts_level <- sum_contrasts[grepl(sum_level, sum_contrasts$contrast), ]
    if (interaction_y != FALSE) {
      sum_contrasts_level <- sum_contrasts_level[interaction_y, ]
    }

    list(
      n = length(y[!is.na(y)]),
      lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
      lsmean_diff = formatters::with_label(sum_contrasts_level$estimate, "Difference in Adjusted Means"),
      lsmean_diff_ci = formatters::with_label(
        c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL),
        f_conf_level(conf_level)
      ),
      pval = formatters::with_label(sum_contrasts_level$p.value, "p-value")
    )
  }
}

#' @describeIn summarize_ancova Formatted analysis function which is used as `afun` in `summarize_ancova()`.
#'
#' @return
#' * `a_ancova()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#'
#' @keywords internal
a_ancova <- make_afun(
  s_ancova,
  .indent_mods = c("n" = 0L, "lsmean" = 0L, "lsmean_diff" = 0L, "lsmean_diff_ci" = 1L, "pval" = 1L),
  .formats = c(
    "n" = "xx",
    "lsmean" = "xx.xx",
    "lsmean_diff" = "xx.xx",
    "lsmean_diff_ci" = "(xx.xx, xx.xx)",
    "pval" = "x.xxxx | (<0.0001)"
  ),
  .null_ref_cells = FALSE
)

#' @describeIn summarize_ancova Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_ancova()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_ancova()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("Species", ref_group = "setosa") %>%
#'   add_colcounts() %>%
#'   summarize_ancova(
#'     vars = "Petal.Length",
#'     variables = list(arm = "Species", covariates = NULL),
#'     table_names = "unadj",
#'     conf_level = 0.95, var_labels = "Unadjusted comparison",
#'     .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means")
#'   ) %>%
#'   summarize_ancova(
#'     vars = "Petal.Length",
#'     variables = list(arm = "Species", covariates = c("Sepal.Length", "Sepal.Width")),
#'     table_names = "adj",
#'     conf_level = 0.95, var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)"
#'   ) %>%
#'   build_table(iris)
#'
#' @export
summarize_ancova <- function(lyt,
                             vars,
                             var_labels,
                             ...,
                             show_labels = "visible",
                             table_names = vars,
                             .stats = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL,
                             interaction_y = FALSE,
                             interaction_item = NULL) {
  afun <- make_afun(
    a_ancova,
    interaction_y = interaction_y,
    interaction_item = interaction_item,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Tabulate Biomarker Effects on Survival by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate the estimated effects of multiple continuous biomarker variables
#' across population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#' @inheritParams survival_duration_subgroups
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. The tables are then typically used as input for forest plots.
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' @name survival_biomarkers_subgroups
NULL

#' Prepares Survival Data Estimates for Multiple Biomarkers in a Single Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of events, patients and median survival times, as well as hazard ratio estimates,
#' confidence intervals and p-values, for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a named `list` and requires elements
#' `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables), and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#' @inheritParams survival_duration_subgroups
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_tot_events`,
#'   `median`, `hr`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#'   `var_label`, and `row_type`.
#'
#' @seealso [h_coxreg_mult_cont_df()] which is used internally, [tabulate_survival_biomarkers()].
#'
#' @examples
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in multiple regression models containing one covariate `RACE`,
#' # as well as one stratification variable `STRATA1`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#'
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually.
#' df_grouped <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @export
extract_survival_biomarkers <- function(variables,
                                        data,
                                        groups_lists = list(),
                                        control = control_coxreg(),
                                        label_all = "All Patients") {
  checkmate::assert_list(variables)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_string(label_all)

  # Start with all patients.
  result_all <- h_coxreg_mult_cont_df(
    variables = variables,
    data = data,
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"
  if (is.null(variables$subgroups)) {
    # Only return result for all patients.
    result_all
  } else {
    # Add subgroups results.
    l_data <- h_split_by_subgroups(
      data,
      variables$subgroups,
      groups_lists = groups_lists
    )
    l_result <- lapply(l_data, function(grp) {
      result <- h_coxreg_mult_cont_df(
        variables = variables,
        data = grp$df,
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn survival_biomarkers_subgroups Table-creating function which creates a table
#'   summarizing biomarker effects on survival by subgroup.
#'
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#'   [extract_survival_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot_events`: Total number of events per group.
#'   * `n_tot`: Total number of observations per group.
#'   * `median`: Median survival time.
#'   * `hr`: Hazard ratio.
#'   * `ci`: Confidence interval of hazard ratio.
#'   * `pval`: p-value of the effect.
#'   Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci` are required.
#'
#' @return An `rtables` table summarizing biomarker effects on survival by subgroup.
#'
#' @note In contrast to [tabulate_survival_subgroups()] this tabulation function does
#'   not start from an input layout `lyt`. This is because internally the table is
#'   created by combining multiple subtables.
#'
#' @seealso [h_tab_surv_one_biomarker()] which is used internally, [extract_survival_biomarkers()].
#'
#' @examples
#' ## Table with default columns.
#' tabulate_survival_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_survival_biomarkers(
#'   df = df,
#'   vars = c("n_tot_events", "ci", "n_tot", "median", "hr"),
#'   time_unit = as.character(adtte_f$AVALU[1])
#' )
#'
#' ## Finally produce the forest plot.
#' \donttest{
#' g_forest(tab, xlim = c(0.8, 1.2))
#' }
#'
#' @export
tabulate_survival_biomarkers <- function(df,
                                         vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
                                         time_unit = NULL,
                                         .indent_mods = 0L) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(df$biomarker)
  checkmate::assert_character(df$biomarker_label)
  checkmate::assert_subset(vars, c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"))

  df_subs <- split(df, f = df$biomarker)
  tabs <- lapply(df_subs, FUN = function(df_sub) {
    tab_sub <- h_tab_surv_one_biomarker(
      df = df_sub,
      vars = vars,
      time_unit = time_unit,
      .indent_mods = .indent_mods
    )
    # Insert label row as first row in table.
    label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
    tab_sub
  })
  result <- do.call(rbind, tabs)

  n_tot_ids <- grep("^n_tot", vars)
  hr_id <- match("hr", vars)
  ci_id <- match("ci", vars)
  structure(
    result,
    forest_header = paste0(c("Higher", "Lower"), "\nBetter"),
    col_x = hr_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_ids[1]
  )
}

#' Convert List of Groups to Data Frame
#'
#' This converts a list of group levels into a data frame format which is expected by [rtables::add_combo_levels()].
#'
#' @param groups_list (named `list` of `character`)\cr specifies the new group levels via the names and the
#'   levels that belong to it in the character vectors that are elements of the list.
#'
#' @return [tibble::tibble()] in the required format.
#'
#' @examples
#' grade_groups <- list(
#'   "Any Grade (%)" = c("1", "2", "3", "4", "5"),
#'   "Grade 3-4 (%)" = c("3", "4"),
#'   "Grade 5 (%)" = "5"
#' )
#' groups_list_to_df(grade_groups)
#'
#' @export
groups_list_to_df <- function(groups_list) {
  checkmate::assert_list(groups_list, names = "named")
  lapply(groups_list, checkmate::assert_character)
  tibble::tibble(
    valname = make_names(names(groups_list)),
    label = names(groups_list),
    levelcombo = unname(groups_list),
    exargs = replicate(length(groups_list), list())
  )
}

#' Reference and Treatment Group Combination
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Facilitate the re-combination of groups divided as reference and treatment groups; it helps in arranging groups of
#' columns in the `rtables` framework and teal modules.
#'
#' @param fct (`factor`)\cr the variable with levels which needs to be grouped.
#' @param ref (`string`)\cr the reference level(s).
#' @param collapse (`string`)\cr a character string to separate `fct` and `ref`.
#'
#' @return A `list` with first item `ref` (reference) and second item `trt` (treatment).
#'
#' @examples
#' groups <- combine_groups(
#'   fct = DM$ARM,
#'   ref = c("B: Placebo")
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   summarize_vars("AGE") %>%
#'   build_table(DM)
#'
#' @export
combine_groups <- function(fct,
                           ref = NULL,
                           collapse = "/") {
  checkmate::assert_string(collapse)
  checkmate::assert_character(ref, min.chars = 1, any.missing = FALSE, null.ok = TRUE)
  checkmate::assert_multi_class(fct, classes = c("factor", "character"))

  fct <- as_factor_keep_attributes(fct)

  group_levels <- levels(fct)
  if (is.null(ref)) {
    ref <- group_levels[1]
  } else {
    checkmate::assert_subset(ref, group_levels)
  }

  groups <- list(
    ref = group_levels[group_levels %in% ref],
    trt = group_levels[!group_levels %in% ref]
  )
  stats::setNames(groups, nm = lapply(groups, paste, collapse = collapse))
}

#' Split Columns by Groups of Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @inheritParams groups_list_to_df
#' @param ... additional arguments to [rtables::split_cols_by()] in order. For instance, to
#'   control formats (`format`), add a joint column for all groups (`incl_all`).
#'
#' @return A layout object suitable for passing to further layouting functions. Adding
#'   this function to an `rtable` layout will add a column split including the given
#'   groups to the table layout.
#'
#' @seealso [rtables::split_cols_by()]
#'
#' @examples
#' # 1 - Basic use
#'
#' # Without group combination `split_cols_by_groups` is
#' # equivalent to [rtables::split_cols_by()].
#' basic_table() %>%
#'   split_cols_by_groups("ARM") %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Add a reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", ref_group = "B: Placebo") %>%
#'   add_colcounts() %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff Mean" = rcell(NULL))
#'       } else {
#'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' # 2 - Adding group specification
#'
#' # Manual preparation of the groups.
#' groups <- list(
#'   "Arms A+B" = c("A: Drug X", "B: Placebo"),
#'   "Arms A+C" = c("A: Drug X", "C: Combination")
#' )
#'
#' # Use of split_cols_by_groups without reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Including differentiated output in the reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups_list = groups, ref_group = "Arms A+B") %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff. of Averages" = rcell(NULL))
#'       } else {
#'         in_rows("Diff. of Averages" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' # 3 - Binary list dividing factor levels into reference and treatment
#'
#' # `combine_groups` defines reference and treatment.
#' groups <- combine_groups(
#'   fct = DM$ARM,
#'   ref = c("A: Drug X", "B: Placebo")
#' )
#' groups
#'
#' # Use group definition without reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups_list = groups) %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Use group definition with reference column (first item of groups).
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups, ref_group = names(groups)[1]) %>%
#'   add_colcounts() %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff Mean" = rcell(NULL))
#'       } else {
#'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' @export
split_cols_by_groups <- function(lyt,
                                 var,
                                 groups_list = NULL,
                                 ref_group = NULL,
                                 ...) {
  if (is.null(groups_list)) {
    split_cols_by(
      lyt = lyt,
      var = var,
      ref_group = ref_group,
      ...
    )
  } else {
    groups_df <- groups_list_to_df(groups_list)
    if (!is.null(ref_group)) {
      ref_group <- groups_df$valname[groups_df$label == ref_group]
    }
    split_cols_by(
      lyt = lyt,
      var = var,
      split_fun = add_combo_levels(groups_df, keep_levels = groups_df$valname),
      ref_group = ref_group,
      ...
    )
  }
}

#' Combine Counts
#'
#' Simplifies the estimation of column counts, especially when group combination is required.
#'
#' @inheritParams combine_groups
#' @inheritParams groups_list_to_df
#'
#' @return A `vector` of column counts.
#'
#' @seealso [combine_groups()]
#'
#' @examples
#' ref <- c("A: Drug X", "B: Placebo")
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
#'
#' col_counts <- combine_counts(
#'   fct = DM$ARM,
#'   groups_list = groups
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   summarize_vars("AGE") %>%
#'   build_table(DM, col_counts = col_counts)
#'
#' ref <- "A: Drug X"
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
#' col_counts <- combine_counts(
#'   fct = DM$ARM,
#'   groups_list = groups
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   summarize_vars("AGE") %>%
#'   build_table(DM, col_counts = col_counts)
#'
#' @export
combine_counts <- function(fct, groups_list = NULL) {
  checkmate::assert_multi_class(fct, classes = c("factor", "character"))

  fct <- as_factor_keep_attributes(fct)

  if (is.null(groups_list)) {
    y <- table(fct)
    y <- stats::setNames(as.numeric(y), nm = dimnames(y)[[1]])
  } else {
    y <- vapply(
      X = groups_list,
      FUN = function(x) sum(table(fct)[x]),
      FUN.VALUE = 1
    )
  }
  y
}

#' Survival Time Point Analysis
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize patients' survival rate and difference of survival rates between groups at a time point.
#'
#' @inheritParams argument_convention
#' @inheritParams s_surv_time
#' @param time_point (`number`)\cr survival time point of interest.
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_surv_timepoint()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
#'     see more in [survival::survfit()]. Note option "none" is no longer supported.
#'   * `time_point` (`number`)\cr survival time point of interest.
#'
#' @name survival_timepoint
NULL

#' @describeIn survival_timepoint Statistics function which analyzes survival rate.
#'
#' @return
#' * `s_surv_timepoint()` returns the statistics:
#'   * `pt_at_risk`: Patients remaining at risk.
#'   * `event_free_rate`: Event-free rate (%).
#'   * `rate_se`: Standard error of event free rate.
#'   * `rate_ci`: Confidence interval for event free rate.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#' df <- adtte_f %>%
#'   filter(ARMCD == "ARM A")
#'
#' @keywords internal
s_surv_timepoint <- function(df,
                             .var,
                             time_point,
                             is_event,
                             control = control_surv_timepoint()) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_number(time_point)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = conf_level,
    conf.type = conf_type
  )
  s_srv_fit <- summary(srv_fit, times = time_point, extend = TRUE)
  df_srv_fit <- as.data.frame(s_srv_fit[c("time", "n.risk", "surv", "lower", "upper", "std.err")])
  if (df_srv_fit[["n.risk"]] == 0) {
    pt_at_risk <- event_free_rate <- rate_se <- NA_real_
    rate_ci <- c(NA_real_, NA_real_)
  } else {
    pt_at_risk <- df_srv_fit$n.risk
    event_free_rate <- df_srv_fit$surv
    rate_se <- df_srv_fit$std.err
    rate_ci <- c(df_srv_fit$lower, df_srv_fit$upper)
  }
  list(
    pt_at_risk = formatters::with_label(pt_at_risk, "Patients remaining at risk"),
    event_free_rate = formatters::with_label(event_free_rate * 100, "Event Free Rate (%)"),
    rate_se = formatters::with_label(rate_se * 100, "Standard Error of Event Free Rate"),
    rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level))
  )
}

#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
#'   when `method = "surv"`.
#'
#' @return
#' * `a_surv_timepoint()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_surv_timepoint <- make_afun(
  s_surv_timepoint,
  .indent_mods = c(
    pt_at_risk = 0L,
    event_free_rate = 0L,
    rate_se = 1L,
    rate_ci = 1L
  ),
  .formats = c(
    pt_at_risk = "xx",
    event_free_rate = "xx.xx",
    rate_se = "xx.xx",
    rate_ci = "(xx.xx, xx.xx)"
  )
)

#' @describeIn survival_timepoint Statistics function which analyzes difference between two survival rates.
#'
#' @return
#' * `s_surv_timepoint_diff()` returns the statistics:
#'   * `rate_diff`: Event-free rate difference between two groups.
#'   * `rate_diff_ci`: Confidence interval for the difference.
#'   * `ztest_pval`: p-value to test the difference is 0.
#'
#' @examples
#' df_ref_group <- adtte_f %>%
#'   filter(ARMCD == "ARM B")
#'
#' @keywords internal
s_surv_timepoint_diff <- function(df,
                                  .var,
                                  .ref_group,
                                  .in_ref_col,
                                  time_point,
                                  control = control_surv_timepoint(),
                                  ...) {
  if (.in_ref_col) {
    return(
      list(
        rate_diff = formatters::with_label("", "Difference in Event Free Rate"),
        rate_diff_ci = formatters::with_label("", f_conf_level(control$conf_level)),
        ztest_pval = formatters::with_label("", "p-value (Z-test)")
      )
    )
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))
  res_per_group <- lapply(split(data, group), function(x) {
    s_surv_timepoint(df = x, .var = .var, time_point = time_point, control = control, ...)
  })

  res_x <- res_per_group[[2]]
  res_ref <- res_per_group[[1]]
  rate_diff <- res_x$event_free_rate - res_ref$event_free_rate
  se_diff <- sqrt(res_x$rate_se^2 + res_ref$rate_se^2)

  qs <- c(-1, 1) * stats::qnorm(1 - (1 - control$conf_level) / 2)
  rate_diff_ci <- rate_diff + qs * se_diff
  ztest_pval <- if (is.na(rate_diff)) {
    NA
  } else {
    2 * (1 - stats::pnorm(abs(rate_diff) / se_diff))
  }
  list(
    rate_diff = formatters::with_label(rate_diff, "Difference in Event Free Rate"),
    rate_diff_ci = formatters::with_label(rate_diff_ci, f_conf_level(control$conf_level)),
    ztest_pval = formatters::with_label(ztest_pval, "p-value (Z-test)")
  )
}

#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
#'   when `method = "surv_diff"`.
#'
#' @return
#' * `a_surv_timepoint_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_surv_timepoint_diff <- make_afun(
  s_surv_timepoint_diff,
  .formats = c(
    rate_diff = "xx.xx",
    rate_diff_ci = "(xx.xx, xx.xx)",
    ztest_pval = "x.xxxx | (<0.0001)"
  )
)

#' @describeIn survival_timepoint Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param method (`string`)\cr either `surv` (survival estimations),
#'   `surv_diff` (difference in survival with the control) or `both`.
#' @param table_names_suffix (`string`)\cr optional suffix for the `table_names` used for the `rtables` to
#'   avoid warnings from duplicate table names.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `surv_timepoint()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_surv_timepoint()` and/or `s_surv_timepoint_diff()` to the table layout depending on
#'   the value of `method`.
#'
#' @examples
#' # Survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 7
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' # Difference in survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 9,
#'     method = "surv_diff",
#'     .indent_mods = c("rate_diff" = 0L, "rate_diff_ci" = 2L, "ztest_pval" = 2L)
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' # Survival and difference in survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 9,
#'     method = "both"
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
surv_timepoint <- function(lyt,
                           vars,
                           ...,
                           table_names_suffix = "",
                           var_labels = "Time",
                           show_labels = "visible",
                           method = c("surv", "surv_diff", "both"),
                           .stats = c(
                             "pt_at_risk", "event_free_rate", "rate_ci",
                             "rate_diff", "rate_diff_ci", "ztest_pval"
                           ),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = if (method == "both") {
                             c(rate_diff = 1L, rate_diff_ci = 2L, ztest_pval = 2L)
                           } else {
                             c(rate_diff_ci = 1L, ztest_pval = 1L)
                           }) {
  method <- match.arg(method)
  checkmate::assert_string(table_names_suffix)

  f <- list(
    surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
    surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
  )
  .stats <- h_split_param(.stats, .stats, f = f)
  .formats <- h_split_param(.formats, names(.formats), f = f)
  .labels <- h_split_param(.labels, names(.labels), f = f)
  .indent_mods <- h_split_param(.indent_mods, names(.indent_mods), f = f)

  afun_surv <- make_afun(
    a_surv_timepoint,
    .stats = .stats$surv,
    .formats = .formats$surv,
    .labels = .labels$surv,
    .indent_mods = .indent_mods$surv
  )

  afun_surv_diff <- make_afun(
    a_surv_timepoint_diff,
    .stats = .stats$surv_diff,
    .formats = .formats$surv_diff,
    .labels = .labels$surv_diff,
    .indent_mods = .indent_mods$surv_diff
  )

  time_point <- list(...)$time_point

  for (i in seq_along(time_point)) {
    tpt <- time_point[i]

    if (method %in% c("surv", "both")) {
      lyt <- analyze(
        lyt,
        vars,
        var_labels = paste(tpt, var_labels),
        table_names = paste0("surv_", tpt, table_names_suffix),
        show_labels = show_labels,
        afun = afun_surv,
        extra_args = list(
          is_event = list(...)$is_event,
          control = list(...)$control,
          time_point = tpt
        )
      )
    }

    if (method %in% c("surv_diff", "both")) {
      lyt <- analyze(
        lyt,
        vars,
        var_labels = paste(tpt, var_labels),
        table_names = paste0("surv_diff_", tpt, table_names_suffix),
        show_labels = ifelse(method == "both", "hidden", show_labels),
        afun = afun_surv_diff,
        extra_args = list(
          is_event = list(...)$is_event,
          control = list(...)$control,
          time_point = tpt
        )
      )
    }
  }
  lyt
}

#' Tabulate Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate statistics such as response rate and odds ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
#' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a
#'   list, which specifies the new group levels via the names and the
#'   levels that belong to it in the character vectors that are elements of the list.
#' @param label_all (`string`)\cr label for the total population analysis.
#' @param method (`string`)\cr specifies the test used to calculate the p-value for the difference between
#'   two proportions. For options, see [s_test_proportion_diff()]. Default is `NULL` so no test is performed.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. Tables typically used as part of forest plot.
#'
#' @seealso [extract_rsp_subgroups()]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' # Unstratified analysis.
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#' df
#'
#' @name response_subgroups
NULL

#' Prepares Response Data for Population Subgroups in Data Frames
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares response rates and odds ratios for population subgroups in data frames. Simple wrapper
#' for [h_odds_ratio_subgroups_df()] and [h_proportion_subgroups_df()]. Result is a list of two
#' `data.frames`: `prop` and `or`. `variables` corresponds to the names of variables found in `data`,
#' passed as a named `list` and requires elements `rsp`, `arm` and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param label_all (`string`)\cr label for the total population analysis.
#'
#' @return A named list of two elements:
#'   * `prop`: A `data.frame` containing columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, `var`,
#'     `var_label`, and `row_type`.
#'   * `or`: A `data.frame` containing columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`,
#'     `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @seealso [response_subgroups]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' # Unstratified analysis.
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#' df
#'
#' # Stratified analysis.
#' df_strat <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2"), strat = "STRATA1"),
#'   data = adrs_f
#' )
#' df_strat
#'
#' # Grouping of the BMRKR2 levels.
#' df_grouped <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @export
extract_rsp_subgroups <- function(variables,
                                  data,
                                  groups_lists = list(),
                                  conf_level = 0.95,
                                  method = NULL,
                                  label_all = "All Patients") {
  df_prop <- h_proportion_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    label_all = label_all
  )
  df_or <- h_odds_ratio_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    conf_level = conf_level,
    method = method,
    label_all = label_all
  )

  list(prop = df_prop, or = df_or)
}

#' @describeIn response_subgroups Formatted analysis function which is used as `afun` in `tabulate_rsp_subgroups()`.
#'
#' @return
#' * `a_response_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_response_subgroups <- function(.formats = list(
                                   n = "xx",
                                   n_rsp = "xx",
                                   prop = "xx.x%",
                                   n_tot = "xx",
                                   or = list(format_extreme_values(2L)),
                                   ci = list(format_extreme_values_ci(2L)),
                                   pval = "x.xxxx | (<0.0001)"
                                 )) {
  checkmate::assert_list(.formats)
  checkmate::assert_subset(
    names(.formats),
    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
  )

  afun_lst <- Map(
    function(stat, fmt) {
      if (stat == "ci") {
        function(df, labelstr = "", ...) {
          in_rows(.list = combine_vectors(df$lcl, df$ucl), .labels = as.character(df$subgroup), .formats = fmt)
        }
      } else {
        function(df, labelstr = "", ...) {
          in_rows(.list = as.list(df[[stat]]), .labels = as.character(df$subgroup), .formats = fmt)
        }
      }
    },
    stat = names(.formats),
    fmt = .formats
  )

  afun_lst
}

#' @describeIn response_subgroups Table-creating function which creates a table
#'   summarizing binary response by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
#'   and [rtables::summarize_row_groups()].
#'
#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
#'   created using [extract_rsp_subgroups()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n`: Total number of observations per group.
#'   * `n_rsp`: Number of responders per group.
#'   * `prop`: Proportion of responders.
#'   * `n_tot`: Total number of observations.
#'   * `or`: Odds ratio.
#'   * `ci` : Confidence interval of odds ratio.
#'   * `pval`: p-value of the effect.
#'   Note, the statistics `n_tot`, `or` and `ci` are required.
#'
#' @return An `rtables` table summarizing binary response by subgroup.
#'
#' @examples
#' ## Table with default columns.
#' basic_table() %>%
#'   tabulate_rsp_subgroups(df)
#'
#' ## Table with selected columns.
#' basic_table() %>%
#'   tabulate_rsp_subgroups(
#'     df = df,
#'     vars = c("n_tot", "n", "n_rsp", "prop", "or", "ci")
#'   )
#'
#' @export
tabulate_rsp_subgroups <- function(lyt,
                                   df,
                                   vars = c("n_tot", "n", "prop", "or", "ci")) {
  conf_level <- df$or$conf_level[1]
  method <- if ("pval_label" %in% names(df$or)) {
    df$or$pval_label[1]
  } else {
    NULL
  }

  afun_lst <- a_response_subgroups()
  colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method)

  colvars_prop <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n", "prop", "n_rsp")],
    labels = colvars$labels[names(colvars$labels) %in% c("n", "prop", "n_rsp")]
  )
  colvars_or <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")],
    labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")]
  )

  # Columns from table_prop are optional.
  if (length(colvars_prop$vars) > 0) {
    lyt_prop <- split_cols_by(lyt = lyt, var = "arm")
    lyt_prop <- split_rows_by(
      lyt = lyt_prop,
      var = "row_type",
      split_fun = keep_split_levels("content"),
      nested = FALSE
    )
    lyt_prop <- summarize_row_groups(
      lyt = lyt_prop,
      var = "var_label",
      cfun = afun_lst[names(colvars_prop$labels)]
    )
    lyt_prop <- split_cols_by_multivar(
      lyt = lyt_prop,
      vars = colvars_prop$vars,
      varlabels = colvars_prop$labels
    )

    if ("analysis" %in% df$prop$row_type) {
      lyt_prop <- split_rows_by(
        lyt = lyt_prop,
        var = "row_type",
        split_fun = keep_split_levels("analysis"),
        nested = FALSE,
        child_labels = "hidden"
      )
      lyt_prop <- split_rows_by(lyt = lyt_prop, var = "var_label", nested = TRUE)
      lyt_prop <- analyze_colvars(
        lyt = lyt_prop,
        afun = afun_lst[names(colvars_prop$labels)],
        inclNAs = TRUE
      )
    }

    table_prop <- build_table(lyt_prop, df = df$prop)
  } else {
    table_prop <- NULL
  }

  # Columns "n_tot", "or", "ci" in table_or are required.
  lyt_or <- split_cols_by(lyt = lyt, var = "arm")
  lyt_or <- split_rows_by(
    lyt = lyt_or,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )
  lyt_or <- split_cols_by_multivar(
    lyt = lyt_or,
    vars = colvars_or$vars,
    varlabels = colvars_or$labels
  )
  lyt_or <- summarize_row_groups(
    lyt = lyt_or,
    var = "var_label",
    cfun = afun_lst[names(colvars_or$labels)]
  ) %>%
    append_topleft("Baseline Risk Factors")

  if ("analysis" %in% df$or$row_type) {
    lyt_or <- split_rows_by(
      lyt = lyt_or,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )
    lyt_or <- split_rows_by(lyt = lyt_or, var = "var_label", nested = TRUE)
    lyt_or <- analyze_colvars(
      lyt = lyt_or,
      afun = afun_lst[names(colvars_or$labels)],
      inclNAs = TRUE
    )
  }
  table_or <- build_table(lyt_or, df = df$or)

  n_tot_id <- match("n_tot", colvars_or$vars)
  if (is.null(table_prop)) {
    result <- table_or
    or_id <- match("or", colvars_or$vars)
    ci_id <- match("lcl", colvars_or$vars)
  } else {
    result <- cbind_rtables(table_or[, n_tot_id], table_prop, table_or[, -n_tot_id])
    or_id <- 1L + ncol(table_prop) + match("or", colvars_or$vars[-n_tot_id])
    ci_id <- 1L + ncol(table_prop) + match("lcl", colvars_or$vars[-n_tot_id])
    n_tot_id <- 1L
  }
  structure(
    result,
    forest_header = paste0(levels(df$prop$arm), "\nBetter"),
    col_x = or_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_id
  )
}

#' Labels for Column Variables in Binary Response by Subgroup Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Internal function to check variables included in [tabulate_rsp_subgroups()] and create column labels.
#'
#' @inheritParams argument_convention
#' @inheritParams tabulate_rsp_subgroups
#'
#' @return A `list` of variables to tabulate and their labels.
#'
#' @export
d_rsp_subgroups_colvars <- function(vars,
                                    conf_level = NULL,
                                    method = NULL) {
  checkmate::assert_character(vars)
  checkmate::assert_subset(c("n_tot", "or", "ci"), vars)
  checkmate::assert_subset(
    vars,
    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
  )

  varlabels <- c(
    n = "n",
    n_rsp = "Responders",
    prop = "Response (%)",
    n_tot = "Total n",
    or = "Odds Ratio"
  )
  colvars <- vars

  if ("ci" %in% colvars) {
    checkmate::assert_false(is.null(conf_level))

    varlabels <- c(
      varlabels,
      ci = paste0(100 * conf_level, "% CI")
    )

    # The `lcl`` variable is just a placeholder available in the analysis data,
    # it is not acutally used in the tabulation.
    # Variables used in the tabulation are lcl and ucl, see `a_response_subgroups` for details.
    colvars[colvars == "ci"] <- "lcl"
  }

  if ("pval" %in% colvars) {
    varlabels <- c(
      varlabels,
      pval = method
    )
  }

  list(
    vars = colvars,
    labels = varlabels[vars]
  )
}

#' Combine Factor Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Combine specified old factor Levels in a single new level.
#'
#' @param x factor
#' @param levels level names to be combined
#' @param new_level name of new level
#'
#' @return A `factor` with the new levels.
#'
#' @examples
#' x <- factor(letters[1:5], levels = letters[5:1])
#' combine_levels(x, levels = c("a", "b"))
#'
#' combine_levels(x, c("e", "b"))
#'
#' @export
combine_levels <- function(x, levels, new_level = paste(levels, collapse = "/")) {
  checkmate::assert_factor(x)
  checkmate::assert_subset(levels, levels(x))

  lvls <- levels(x)

  lvls[lvls %in% levels] <- new_level

  levels(x) <- lvls

  x
}

#' Conversion of a Vector to a Factor
#'
#' Converts `x` to a factor and keeps its attributes. Warns appropriately such that the user
#' can decide whether they prefer converting to factor manually (e.g. for full control of
#' factor levels).
#'
#' @param x (`atomic`)\cr object to convert.
#' @param x_name (`string`)\cr name of `x`.
#' @param na_level (`string`)\cr the explicit missing level which should be used when converting a character vector.
#' @param verbose defaults to `TRUE`. It prints out warnings and messages.
#'
#' @return A `factor` with same attributes (except class) as `x`. Does not modify `x` if already a `factor`.
#'
#' @keywords internal
as_factor_keep_attributes <- function(x,
                                      x_name = deparse(substitute(x)),
                                      na_level = "<Missing>",
                                      verbose = TRUE) {
  checkmate::assert_atomic(x)
  checkmate::assert_string(x_name)
  checkmate::assert_string(na_level)
  checkmate::assert_flag(verbose)
  if (is.factor(x)) {
    return(x)
  }
  x_class <- class(x)[1]
  if (verbose) {
    warning(paste(
      "automatically converting", x_class, "variable", x_name,
      "to factor, better manually convert to factor to avoid failures"
    ))
  }
  if (identical(length(x), 0L)) {
    warning(paste(
      x_name, "has length 0, this can lead to tabulation failures, better convert to factor"
    ))
  }
  if (is.character(x)) {
    x_no_na <- explicit_na(sas_na(x), label = na_level)
    if (any(na_level %in% x_no_na)) {
      do.call(
        structure,
        c(
          list(.Data = forcats::fct_relevel(x_no_na, na_level, after = Inf)),
          attributes(x)
        )
      )
    } else {
      do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
    }
  } else {
    do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
  }
}

#' Labels for Bins in Percent
#'
#' This creates labels for quantile based bins in percent. This assumes the right-closed
#' intervals as produced by [cut_quantile_bins()].
#'
#' @param probs (`proportion` vector)\cr the probabilities identifying the quantiles.
#'   This is a sorted vector of unique `proportion` values, i.e. between 0 and 1, where
#'   the boundaries 0 and 1 must not be included.
#' @param digits (`integer`)\cr number of decimal places to round the percent numbers.
#'
#' @return A `character` vector with labels in the format `[0%,20%]`, `(20%,50%]`, etc.
#'
#' @keywords internal
bins_percent_labels <- function(probs,
                                digits = 0) {
  if (isFALSE(0 %in% probs)) probs <- c(0, probs)
  if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
  checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
  percent <- round(probs * 100, digits = digits)
  left <- paste0(utils::head(percent, -1), "%")
  right <- paste0(utils::tail(percent, -1), "%")
  without_left_bracket <- paste0(left, ",", right, "]")
  with_left_bracket <- paste0("[", utils::head(without_left_bracket, 1))
  if (length(without_left_bracket) > 1) {
    with_left_bracket <- c(
      with_left_bracket,
      paste0("(", utils::tail(without_left_bracket, -1))
    )
  }
  with_left_bracket
}

#' Cutting Numeric Vector into Empirical Quantile Bins
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This cuts a numeric vector into sample quantile bins.
#'
#' @inheritParams bins_percent_labels
#' @param x (`numeric`)\cr the continuous variable values which should be cut into
#'   quantile bins. This may contain `NA` values, which are then
#'   not used for the quantile calculations, but included in the return vector.
#' @param labels (`character`)\cr the unique labels for the quantile bins. When there are `n`
#'   probabilities in `probs`, then this must be `n + 1` long.
#' @param type (`integer`)\cr type of quantiles to use, see [stats::quantile()] for details.
#' @param ordered (`flag`)\cr should the result be an ordered factor.
#'
#' @return A `factor` variable with appropriately-labeled bins as levels.
#'
#' @note Intervals are closed on the right side. That is, the first bin is the interval
#'   `[-Inf, q1]` where `q1` is the first quantile, the second bin is then `(q1, q2]`, etc.,
#'   and the last bin is `(qn, +Inf]` where `qn` is the last quantile.
#'
#' @examples
#' # Default is to cut into quartile bins.
#' cut_quantile_bins(cars$speed)
#'
#' # Use custom quantiles.
#' cut_quantile_bins(cars$speed, probs = c(0.1, 0.2, 0.6, 0.88))
#'
#' # Use custom labels.
#' cut_quantile_bins(cars$speed, labels = paste0("Q", 1:4))
#'
#' # NAs are preserved in result factor.
#' ozone_binned <- cut_quantile_bins(airquality$Ozone)
#' which(is.na(ozone_binned))
#' # So you might want to make these explicit.
#' explicit_na(ozone_binned)
#'
#' @export
cut_quantile_bins <- function(x,
                              probs = c(0.25, 0.5, 0.75),
                              labels = NULL,
                              type = 7,
                              ordered = TRUE) {
  checkmate::assert_flag(ordered)
  checkmate::assert_numeric(x)
  if (isFALSE(0 %in% probs)) probs <- c(0, probs)
  if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
  checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
  if (is.null(labels)) labels <- bins_percent_labels(probs)
  checkmate::assert_character(labels, len = length(probs) - 1, any.missing = FALSE, unique = TRUE)

  if (all(is.na(x))) {
    # Early return if there are only NAs in input.
    return(factor(x, ordered = ordered, levels = labels))
  }

  quantiles <- stats::quantile(
    x,
    probs = probs,
    type = type,
    na.rm = TRUE
  )

  checkmate::assert_numeric(quantiles, unique = TRUE)

  cut(
    x,
    breaks = quantiles,
    labels = labels,
    ordered_result = ordered,
    include.lowest = TRUE,
    right = TRUE
  )
}

#' Discard Certain Levels from a Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This discards the observations as well as the levels specified from a factor.
#'
#' @param x (`factor`)\cr the original factor.
#' @param discard (`character`)\cr which levels to discard.
#'
#' @return A modified `factor` with observations as well as levels from `discard` dropped.
#'
#' @examples
#' fct_discard(factor(c("a", "b", "c")), "c")
#'
#' @export
fct_discard <- function(x, discard) {
  checkmate::assert_factor(x)
  checkmate::assert_character(discard, any.missing = FALSE)
  new_obs <- x[!(x %in% discard)]
  new_levels <- setdiff(levels(x), discard)
  factor(new_obs, levels = new_levels)
}

#' Insertion of Explicit Missings in a Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This inserts explicit missings in a factor based on a condition. Additionally,
#' existing `NA` values will be explicitly converted to given `na_level`.
#'
#' @param x (`factor`)\cr the original factor.
#' @param condition (`logical`)\cr where to insert missings.
#' @param na_level (`string`)\cr which level to use for missings.
#'
#' @return A modified `factor` with inserted and existing `NA` converted to `na_level`.
#'
#' @seealso [forcats::fct_na_value_to_level()] which is used internally.
#'
#' @examples
#' fct_explicit_na_if(factor(c("a", "b", NA)), c(TRUE, FALSE, FALSE))
#'
#' @export
fct_explicit_na_if <- function(x, condition, na_level = "<Missing>") {
  checkmate::assert_factor(x, len = length(condition))
  checkmate::assert_logical(condition)
  x[condition] <- NA
  x <- forcats::fct_na_value_to_level(x, level = na_level)
  forcats::fct_drop(x, only = na_level)
}

#' Collapsing of Factor Levels and Keeping Only Those New Group Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This collapses levels and only keeps those new group levels, in the order provided.
#' The returned factor has levels in the order given, with the possible missing level last (this will
#' only be included if there are missing values).
#'
#' @param .f (`factor` or `character`)\cr original vector.
#' @param ... (named `character` vectors)\cr levels in each vector provided will be collapsed into
#'   the new level given by the respective name.
#' @param .na_level (`string`)\cr which level to use for other levels, which should be missing in the
#'   new factor. Note that this level must not be contained in the new levels specified in `...`.
#'
#' @return A modified `factor` with collapsed levels. Values and levels which are not included
#'   in the given `character` vector input will be set to the missing level `.na_level`.
#'
#' @note Any existing `NA`s in the input vector will not be replaced by the missing level. If needed,
#'   [explicit_na()] can be called separately on the result.
#'
#' @seealso [forcats::fct_collapse()], [forcats::fct_relevel()] which are used internally.
#'
#' @examples
#' fct_collapse_only(factor(c("a", "b", "c", "d")), TRT = "b", CTRL = c("c", "d"))
#'
#' @export
fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
  new_lvls <- names(list(...))
  if (checkmate::test_subset(.na_level, new_lvls)) {
    stop(paste0(".na_level currently set to '", .na_level, "' must not be contained in the new levels"))
  }
  x <- forcats::fct_collapse(.f, ..., other_level = .na_level)
  do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls)))
}

#' Control Function for Subgroup Treatment Effect Pattern (STEP) Calculations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for STEP calculations.
#'
#' @param biomarker (`numeric` or `NULL`)\cr optional provision of the numeric biomarker variable, which
#'   could be used to infer `bandwidth`, see below.
#' @param use_percentile (`flag`)\cr if `TRUE`, the running windows are created according to
#'   quantiles rather than actual values, i.e. the bandwidth refers to the percentage of data
#'   covered in each window. Suggest `TRUE` if the biomarker variable is not uniformly
#'   distributed.
#' @param bandwidth (`number` or `NULL`)\cr indicating the bandwidth of each window.
#'   Depending on the argument `use_percentile`, it can be either the length of actual-value
#'   windows on the real biomarker scale, or percentage windows.
#'   If `use_percentile = TRUE`, it should be a number between 0 and 1.
#'   If `NULL`, treat the bandwidth to be infinity, which means only one global model will be fitted.
#'   By default, `0.25` is used for percentage windows and one quarter of the range of the `biomarker`
#'   variable for actual-value windows.
#' @param degree (`count`)\cr the degree of polynomial function of the biomarker as an interaction term
#'   with the treatment arm fitted at each window. If 0 (default), then the biomarker variable
#'   is not included in the model fitted in each biomarker window.
#' @param num_points (`count`)\cr the number of points at which the hazard ratios are estimated. The
#'   smallest number is 2.
#'
#' @return A list of components with the same names as the arguments, except `biomarker` which is
#'   just used to calculate the `bandwidth` in case that actual biomarker windows are requested.
#'
#' @examples
#' # Provide biomarker values and request actual values to be used,
#' # so that bandwidth is chosen from range.
#' control_step(biomarker = 1:10, use_percentile = FALSE)
#'
#' # Use a global model with quadratic biomarker interaction term.
#' control_step(bandwidth = NULL, degree = 2)
#'
#' # Reduce number of points to be used.
#' control_step(num_points = 10)
#'
#' @export
control_step <- function(biomarker = NULL,
                         use_percentile = TRUE,
                         bandwidth,
                         degree = 0L,
                         num_points = 39L) {
  checkmate::assert_numeric(biomarker, null.ok = TRUE)
  checkmate::assert_flag(use_percentile)
  checkmate::assert_int(num_points, lower = 2)
  checkmate::assert_count(degree)

  if (missing(bandwidth)) {
    # Infer bandwidth
    bandwidth <- if (use_percentile) {
      0.25
    } else if (!is.null(biomarker)) {
      diff(range(biomarker, na.rm = TRUE)) / 4
    } else {
      NULL
    }
  } else {
    # Check bandwidth
    if (!is.null(bandwidth)) {
      if (use_percentile) {
        assert_proportion_value(bandwidth)
      } else {
        checkmate::assert_scalar(bandwidth)
        checkmate::assert_true(bandwidth > 0)
      }
    }
  }
  list(
    use_percentile = use_percentile,
    bandwidth = bandwidth,
    degree = as.integer(degree),
    num_points = as.integer(num_points)
  )
}

#' `rtables` Access Helper Functions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' These are a couple of functions that help with accessing the data in `rtables` objects.
#' Currently these work for occurrence tables, which are defined as having a count as the first
#' element and a fraction as the second element in each cell.
#'
#' @seealso [prune_occurrences] for usage of these functions.
#'
#' @name rtables_access
NULL

#' @describeIn rtables_access Helper function to extract the first values from each content
#'   cell and from specified columns in a `TableRow`. Defaults to all columns.
#'
#' @param table_row (`TableRow`)\cr an analysis row in a occurrence table.
#' @param col_names (`character`)\cr the names of the columns to extract from.
#' @param col_indices (`integer`)\cr the indices of the columns to extract from. If `col_names` are provided,
#'   then these are inferred from the names of `table_row`. Note that this currently only works well with a single
#'   column split.
#'
#' @return
#' * `h_row_first_values()` returns a `vector` of numeric values.
#'
#' @examples
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("RACE") %>%
#'   analyze("AGE", function(x) {
#'     list(
#'       "mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.x (xx.x)"),
#'       "n" = length(x),
#'       "frac" = rcell(c(0.1, 0.1), format = "xx (xx)")
#'     )
#'   }) %>%
#'   build_table(tern_ex_adsl) %>%
#'   prune_table()
#' tree_row_elem <- collect_leaves(tbl[2, ])[[1]]
#' result <- max(h_row_first_values(tree_row_elem))
#' result
#'
#' @export
h_row_first_values <- function(table_row,
                               col_names = NULL,
                               col_indices = NULL) {
  col_indices <- check_names_indices(table_row, col_names, col_indices)
  checkmate::assert_integerish(col_indices)
  checkmate::assert_subset(col_indices, seq_len(ncol(table_row)))

  # Main values are extracted
  row_vals <- row_values(table_row)[col_indices]

  # Main return
  vapply(row_vals, function(rv) {
    if (is.null(rv)) {
      NA_real_
    } else {
      rv[1L]
    }
  }, FUN.VALUE = numeric(1))
}

#' @describeIn rtables_access Helper function that extracts row values and checks if they are
#'   convertible to integers (`integerish` values).
#'
#' @return
#' * `h_row_counts()` returns a `vector` of numeric values.
#'
#' @examples
#' # Row counts (integer values)
#' # h_row_counts(tree_row_elem) # Fails because there are no integers
#' # Using values with integers
#' tree_row_elem <- collect_leaves(tbl[3, ])[[1]]
#' result <- h_row_counts(tree_row_elem)
#' # result
#'
#' @export
h_row_counts <- function(table_row,
                         col_names = NULL,
                         col_indices = NULL) {
  counts <- h_row_first_values(table_row, col_names, col_indices)
  checkmate::assert_integerish(counts)
  counts
}

#' @describeIn rtables_access helper function to extract fractions from specified columns in a `TableRow`.
#'   More specifically it extracts the second values from each content cell and checks it is a fraction.
#'
#' @return
#' * `h_row_fractions()` returns a `vector` of proportions.
#'
#' @examples
#' # Row fractions
#' tree_row_elem <- collect_leaves(tbl[4, ])[[1]]
#' h_row_fractions(tree_row_elem)
#'
#' @export
h_row_fractions <- function(table_row,
                            col_names = NULL,
                            col_indices = NULL) {
  col_indices <- check_names_indices(table_row, col_names, col_indices)
  row_vals <- row_values(table_row)[col_indices]
  fractions <- sapply(row_vals, "[", 2L)
  checkmate::assert_numeric(fractions, lower = 0, upper = 1)
  fractions
}

#' @describeIn rtables_access Helper function to extract column counts from specified columns in a table.
#'
#' @param table (`VTableNodeInfo`)\cr an occurrence table or row.
#'
#' @return
#' * `h_col_counts()` returns a `vector` of column counts.
#'
#' @export
h_col_counts <- function(table,
                         col_names = NULL,
                         col_indices = NULL) {
  col_indices <- check_names_indices(table, col_names, col_indices)
  counts <- col_counts(table)[col_indices]
  stats::setNames(counts, col_names)
}

#' @describeIn rtables_access Helper function to get first row of content table of current table.
#'
#' @return
#' * `h_content_first_row()` returns a row from an `rtables` table.
#'
#' @export
h_content_first_row <- function(table) {
  ct <- content_table(table)
  tree_children(ct)[[1]]
}

#' @describeIn rtables_access Helper function which says whether current table is a leaf in the tree.
#'
#' @return
#' * `is_leaf_table()` returns a `logical` value indicating whether current table is a leaf.
#'
#' @keywords internal
is_leaf_table <- function(table) {
  children <- tree_children(table)
  child_classes <- unique(sapply(children, class))
  identical(child_classes, "ElementaryTable")
}

#' @describeIn rtables_access Internal helper function that tests standard inputs for column indices.
#'
#' @return
#' * `check_names_indices` returns column indices.
#'
#' @keywords internal
check_names_indices <- function(table_row,
                                col_names = NULL,
                                col_indices = NULL) {
  if (!is.null(col_names)) {
    if (!is.null(col_indices)) {
      stop(
        "Inserted both col_names and col_indices when selecting row values. ",
        "Please choose one."
      )
    }
    col_indices <- h_col_indices(table_row, col_names)
  }
  if (is.null(col_indices)) {
    ll <- ifelse(is.null(ncol(table_row)), length(table_row), ncol(table_row))
    col_indices <- seq_len(ll)
  }

  return(col_indices)
}

#' Line plot with the optional table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Line plot with the optional table.
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param alt_counts_df (`data.frame` or `NULL`)\cr data set that will be used (only) to counts objects in strata.
#' @param variables (named `character` vector) of variable names in `df` data set. Details are:
#'   * `x` (`character`)\cr name of x-axis variable.
#'   * `y` (`character`)\cr name of y-axis variable.
#'   * `strata` (`character`)\cr name of grouping variable, i.e. treatment arm. Can be `NA` to indicate lack of groups.
#'   * `paramcd` (`character`)\cr name of the variable for parameter's code. Used for y-axis label and plot's subtitle.
#'     Can be `NA` if `paramcd` is not to be added to the y-axis label or subtitle.
#'   * `y_unit` (`character`)\cr name of variable with units of `y`. Used for y-axis label and plot's subtitle.
#'     Can be `NA` if y unit is not to be added to the y-axis label or subtitle.
#' @param mid (`character` or `NULL`)\cr names of the statistics that will be plotted as midpoints.
#'   All the statistics indicated in `mid` variable must be present in the object returned by `sfun`,
#'   and be of a `double` or `numeric` type vector of length one.
#' @param interval (`character` or `NULL`)\cr names of the statistics that will be plotted as intervals.
#'   All the statistics indicated in `interval` variable must be present in the object returned by `sfun`,
#'   and be of a `double` or `numeric` type vector of length two.
#' @param whiskers (`character`)\cr names of the interval whiskers that will be plotted. Must match the `names`
#'   attribute of the `interval` element in the list returned by `sfun`. It is possible to specify one whisker only,
#'   lower or upper.
#' @param table (`character` or `NULL`)\cr names of the statistics that will be displayed in the table below the plot.
#'   All the statistics indicated in `table` variable must be present in the object returned by `sfun`.
#' @param sfun (`closure`)\cr the function to compute the values of required statistics. It must return a named `list`
#'   with atomic vectors. The names of the `list` elements refer to the names of the statistics and are used by `mid`,
#'   `interval`, `table`. It must be able to accept as input a vector with data for which statistics are computed.
#' @param ... optional arguments to `sfun`.
#' @param mid_type (`character`)\cr controls the type of the `mid` plot, it can be point (`p`), line (`l`),
#'   or point and line (`pl`).
#' @param mid_point_size (`integer` or `double`)\cr controls the font size of the point for `mid` plot.
#' @param position (`character` or `call`)\cr geom element position adjustment, either as a string, or the result of
#'   a call to a position adjustment function.
#' @param legend_title (`character` string)\cr legend title.
#' @param legend_position (`character`)\cr the position of the plot legend (`none`, `left`, `right`, `bottom`, `top`,
#'   or two-element numeric vector).
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot.
#' @param y_lab (`character`)\cr y-axis label. If equal to `NULL`, then no label will be added.
#' @param y_lab_add_paramcd (`logical`)\cr should `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` be added to the
#'   y-axis label `y_lab`?
#' @param y_lab_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the y-axis
#'   label `y_lab`?
#' @param title (`character`)\cr plot title.
#' @param subtitle (`character`)\cr plot subtitle.
#' @param subtitle_add_paramcd (`logical`)\cr should `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` be added to
#'   the plot's subtitle `subtitle`?
#' @param subtitle_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the
#'   plot's subtitle `subtitle`?
#' @param caption (`character`)\cr optional caption below the plot.
#' @param table_format (named `character` or `NULL`)\cr format patterns for descriptive statistics used in the
#'   (optional) table appended to the plot. It is passed directly to the `h_format_row` function through the `format`
#'   parameter. Names of `table_format` must match the names of statistics returned by `sfun` function.
#' @param table_labels (named `character` or `NULL`)\cr labels for descriptive statistics used in the (optional) table
#'   appended to the plot. Names of `table_labels` must match the names of statistics returned by `sfun` function.
#' @param table_font_size (`integer` or `double`)\cr controls the font size of values in the table.
#' @param newpage (`logical`)\cr should plot be drawn on new page?
#' @param col (`character`)\cr colors.
#'
#' @return A `ggplot` line plot (and statistics table if applicable).
#'
#' @examples
#' library(nestcolor)
#'
#' adsl <- tern_ex_adsl
#' adlb <- tern_ex_adlb %>% dplyr::filter(ANL01FL == "Y", PARAMCD == "ALT", AVISIT != "SCREENING")
#' adlb$AVISIT <- droplevels(adlb$AVISIT)
#' adlb <- dplyr::mutate(adlb, AVISIT = forcats::fct_reorder(AVISIT, AVISITN, min))
#'
#' # Mean with CI
#' g_lineplot(adlb, adsl, subtitle = "Laboratory Test:")
#'
#' # Mean with CI, no stratification
#' g_lineplot(adlb, variables = control_lineplot_vars(strata = NA))
#'
#' # Mean, upper whisker of CI, no strata counts N
#' g_lineplot(
#'   adlb,
#'   whiskers = "mean_ci_upr",
#'   title = "Plot of Mean and Upper 95% Confidence Limit by Visit"
#' )
#'
#' # Median with CI
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   mid = "median",
#'   interval = "median_ci",
#'   whiskers = c("median_ci_lwr", "median_ci_upr"),
#'   title = "Plot of Median and 95% Confidence Limits by Visit"
#' )
#'
#' # Mean, +/- SD
#' g_lineplot(adlb, adsl,
#'   interval = "mean_sdi",
#'   whiskers = c("mean_sdi_lwr", "mean_sdi_upr"),
#'   title = "Plot of Median +/- SD by Visit"
#' )
#'
#' # Mean with CI plot with stats table
#' g_lineplot(adlb, adsl, table = c("n", "mean", "mean_ci"))
#'
#' # Mean with CI, table and customized confidence level
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   table = c("n", "mean", "mean_ci"),
#'   control = control_summarize_vars(conf_level = 0.80),
#'   title = "Plot of Mean and 80% Confidence Limits by Visit"
#' )
#'
#' # Mean with CI, table, filtered data
#' adlb_f <- dplyr::filter(adlb, ARMCD != "ARM A" | AVISIT == "BASELINE")
#' g_lineplot(adlb_f, table = c("n", "mean"))
#'
#' @export
g_lineplot <- function(df,
                       alt_counts_df = NULL,
                       variables = control_lineplot_vars(),
                       mid = "mean",
                       interval = "mean_ci",
                       whiskers = c("mean_ci_lwr", "mean_ci_upr"),
                       table = NULL,
                       sfun = tern::s_summary,
                       ...,
                       mid_type = "pl",
                       mid_point_size = 2,
                       position = ggplot2::position_dodge(width = 0.4),
                       legend_title = NULL,
                       legend_position = "bottom",
                       ggtheme = nestcolor::theme_nest(),
                       y_lab = NULL,
                       y_lab_add_paramcd = TRUE,
                       y_lab_add_unit = TRUE,
                       title = "Plot of Mean and 95% Confidence Limits by Visit",
                       subtitle = "",
                       subtitle_add_paramcd = TRUE,
                       subtitle_add_unit = TRUE,
                       caption = NULL,
                       table_format = summary_formats(),
                       table_labels = summary_labels(),
                       table_font_size = 3,
                       newpage = TRUE,
                       col = NULL) {
  checkmate::assert_character(variables, any.missing = TRUE)
  checkmate::assert_character(mid, null.ok = TRUE)
  checkmate::assert_character(interval, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  checkmate::assert_string(title, null.ok = TRUE)
  checkmate::assert_string(subtitle, null.ok = TRUE)

  if (is.character(interval)) {
    checkmate::assert_vector(whiskers, min.len = 0, max.len = 2)
  }

  if (length(whiskers) == 1) {
    checkmate::assert_character(mid)
  }

  if (is.character(mid)) {
    checkmate::assert_scalar(mid_type)
    checkmate::assert_subset(mid_type, c("pl", "p", "l"))
  }

  x <- variables[["x"]]
  y <- variables[["y"]]
  paramcd <- variables["paramcd"] # NA if paramcd == NA or it is not in variables
  y_unit <- variables["y_unit"] # NA if y_unit == NA or it is not in variables
  if (is.na(variables["strata"])) {
    strata <- NULL # NULL if strata == NA or it is not in variables
  } else {
    strata <- variables[["strata"]]
  }
  checkmate::assert_flag(y_lab_add_paramcd, null.ok = TRUE)
  checkmate::assert_flag(subtitle_add_paramcd, null.ok = TRUE)
  if ((!is.null(y_lab) && y_lab_add_paramcd) || (!is.null(subtitle) && subtitle_add_paramcd)) {
    checkmate::assert_false(is.na(paramcd))
    checkmate::assert_scalar(unique(df[[paramcd]]))
  }

  checkmate::assert_flag(y_lab_add_unit, null.ok = TRUE)
  checkmate::assert_flag(subtitle_add_unit, null.ok = TRUE)
  if ((!is.null(y_lab) && y_lab_add_unit) || (!is.null(subtitle) && subtitle_add_unit)) {
    checkmate::assert_false(is.na(y_unit))
    checkmate::assert_scalar(unique(df[[y_unit]]))
  }

  if (!is.null(strata) && !is.null(alt_counts_df)) {
    checkmate::assert_set_equal(unique(alt_counts_df[[strata]]), unique(df[[strata]]))
  }

  ####################################### |
  # ---- Compute required statistics ----
  ####################################### |
  if (!is.null(strata)) {
    df_grp <- tidyr::expand(df, .data[[strata]], .data[[x]]) # expand based on levels of factors
  } else {
    df_grp <- tidyr::expand(df, NULL, .data[[x]])
  }
  df_grp <- df_grp %>%
    dplyr::full_join(y = df[, c(strata, x, y)], by = c(strata, x), multiple = "all") %>%
    dplyr::group_by_at(c(strata, x))

  df_stats <- df_grp %>%
    dplyr::summarise(
      data.frame(t(do.call(c, unname(sfun(.data[[y]], ...)[c(mid, interval)])))),
      .groups = "drop"
    )

  df_stats <- df_stats[!is.na(df_stats[[mid]]), ]

  # add number of objects N in strata
  if (!is.null(strata) && !is.null(alt_counts_df)) {
    strata_N <- paste0(strata, "_N") # nolint

    df_N <- as.data.frame(table(alt_counts_df[[strata]], exclude = c(NA, NaN, Inf))) # nolint
    colnames(df_N) <- c(strata, "N") # nolint
    df_N[[strata_N]] <- paste0(df_N[[strata]], " (N = ", df_N$N, ")") # nolint

    # strata_N should not be in clonames(df_stats)
    checkmate::assert_disjunct(strata_N, colnames(df_stats))

    df_stats <- merge(x = df_stats, y = df_N[, c(strata, strata_N)], by = strata)
  } else if (!is.null(strata)) {
    strata_N <- strata # nolint
  } else {
    strata_N <- NULL # nolint
  }

  ############################################### |
  # ---- Prepare certain plot's properties. ----
  ############################################### |
  # legend title
  if (is.null(legend_title) && !is.null(strata) && legend_position != "none") {
    legend_title <- attr(df[[strata]], "label")
  }

  # y label
  if (!is.null(y_lab)) {
    if (y_lab_add_paramcd) {
      y_lab <- paste(y_lab, unique(df[[paramcd]]))
    }

    if (y_lab_add_unit) {
      y_lab <- paste0(y_lab, " (", unique(df[[y_unit]]), ")")
    }

    y_lab <- trimws(y_lab)
  }

  # subtitle
  if (!is.null(subtitle)) {
    if (subtitle_add_paramcd) {
      subtitle <- paste(subtitle, unique(df[[paramcd]]))
    }

    if (subtitle_add_unit) {
      subtitle <- paste0(subtitle, " (", unique(df[[y_unit]]), ")")
    }

    subtitle <- trimws(subtitle)
  }

  ############################### |
  # ---- Build plot object. ----
  ############################### |
  p <- ggplot2::ggplot(
    data = df_stats,
    mapping = ggplot2::aes(
      x = .data[[x]], y = .data[[mid]],
      color = if (is.null(strata_N)) NULL else .data[[strata_N]],
      shape = if (is.null(strata_N)) NULL else .data[[strata_N]],
      lty = if (is.null(strata_N)) NULL else .data[[strata_N]],
      group = if (is.null(strata_N)) NULL else .data[[strata_N]]
    )
  )

  if (!is.null(mid)) {
    # points
    if (grepl("p", mid_type, fixed = TRUE)) {
      p <- p + ggplot2::geom_point(position = position, size = mid_point_size, na.rm = TRUE)
    }

    # lines
    # further conditions in if are to ensure that not all of the groups consist of only one observation
    if (grepl("l", mid_type, fixed = TRUE) &&
      !is.null(strata) &&
      !all(dplyr::summarise(df_grp, count_n = dplyr::n())[["count_n"]] == 1L)) {
      p <- p + ggplot2::geom_line(position = position, na.rm = TRUE)
    }
  }

  # interval
  if (!is.null(interval)) {
    p <- p +
      ggplot2::geom_errorbar(
        ggplot2::aes(ymin = .data[[whiskers[1]]], ymax = .data[[whiskers[max(1, length(whiskers))]]]),
        width = 0.45,
        position = position
      )

    if (length(whiskers) == 1) { # lwr or upr only; mid is then required
      # workaround as geom_errorbar does not provide single-direction whiskers
      p <- p +
        ggplot2::geom_linerange(
          data = df_stats[!is.na(df_stats[[whiskers]]), ], # as na.rm =TRUE does not suppress warnings
          ggplot2::aes(ymin = .data[[mid]], ymax = .data[[whiskers]]),
          position = position,
          na.rm = TRUE,
          show.legend = FALSE
        )
    }
  }

  p <- p +
    ggplot2::scale_y_continuous(labels = scales::comma, expand = ggplot2::expansion(c(0.25, .25))) +
    ggplot2::labs(
      title = title,
      subtitle = subtitle,
      caption = caption,
      color = legend_title,
      lty = legend_title,
      shape = legend_title,
      x = attr(df[[x]], "label"),
      y = y_lab
    )

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_color_manual(values = col)
  }

  if (!is.null(ggtheme)) {
    p <- p + ggtheme
  } else {
    p <- p +
      ggplot2::theme_bw() +
      ggplot2::theme(
        legend.key.width = grid::unit(1, "cm"),
        legend.position = legend_position,
        legend.direction = ifelse(
          legend_position %in% c("top", "bottom"),
          "horizontal",
          "vertical"
        )
      )
  }

  ############################################################# |
  # ---- Optionally, add table to the bottom of the plot. ----
  ############################################################# |
  if (!is.null(table)) {
    df_stats_table <- df_grp %>%
      dplyr::summarise(
        h_format_row(
          x = sfun(.data[[y]], ...)[table],
          format = table_format,
          labels = table_labels
        ),
        .groups = "drop"
      )

    stats_lev <- rev(setdiff(colnames(df_stats_table), c(strata, x)))

    df_stats_table <- df_stats_table %>%
      tidyr::pivot_longer(
        cols = -dplyr::all_of(c(strata, x)),
        names_to = "stat",
        values_to = "value",
        names_ptypes = list(stat = factor(levels = stats_lev))
      )

    tbl <- ggplot2::ggplot(
      df_stats_table,
      ggplot2::aes(x = .data[[x]], y = .data[["stat"]], label = .data[["value"]])
    ) +
      ggplot2::geom_text(size = table_font_size) +
      ggplot2::theme_bw() +
      ggplot2::theme(
        panel.border = ggplot2::element_blank(),
        panel.grid.major = ggplot2::element_blank(),
        panel.grid.minor = ggplot2::element_blank(),
        axis.ticks = ggplot2::element_blank(),
        axis.title = ggplot2::element_blank(),
        axis.text.x = ggplot2::element_blank(),
        axis.text.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 5)),
        strip.text = ggplot2::element_text(hjust = 0),
        strip.text.x = ggplot2::element_text(margin = ggplot2::margin(1.5, 0, 1.5, 0, "pt")),
        strip.background = ggplot2::element_rect(fill = "grey95", color = NA),
        legend.position = "none"
      )

    if (!is.null(strata)) {
      tbl <- tbl + ggplot2::facet_wrap(facets = strata, ncol = 1)
    }

    # align plot and table
    cowplot::plot_grid(p, tbl, ncol = 1)
  } else {
    p
  }
}

#' Helper function to get the right formatting in the optional table in `g_lineplot`.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param x (named `list`)\cr list of numerical values to be formatted and optionally labeled.
#'   Elements of `x` must be `numeric` vectors.
#' @param format (named `character` or `NULL`)\cr format patterns for `x`. Names of the `format` must
#'   match the names of `x`. This parameter is passed directly to the `rtables::format_rcell`
#'   function through the `format` parameter.
#' @param labels (named `character` or `NULL`)\cr optional labels for `x`. Names of the `labels` must
#'   match the names of `x`. When a label is not specified for an element of `x`,
#'   then this function tries to use `label` or `names` (in this order) attribute of that element
#'   (depending on which one exists and it is not `NULL` or `NA` or `NaN`). If none of these attributes
#'   are attached to a given element of `x`, then the label is automatically generated.
#'
#' @return A single row `data.frame` object.
#'
#' @examples
#' mean_ci <- c(48, 51)
#' x <- list(mean = 50, mean_ci = mean_ci)
#' format <- c(mean = "xx.x", mean_ci = "(xx.xx, xx.xx)")
#' labels <- c(mean = "My Mean")
#' h_format_row(x, format, labels)
#'
#' attr(mean_ci, "label") <- "Mean 95% CI"
#' x <- list(mean = 50, mean_ci = mean_ci)
#' h_format_row(x, format, labels)
#'
#' @export
h_format_row <- function(x, format, labels = NULL) {
  # cell: one row, one column data.frame
  format_cell <- function(x, format, label = NULL) {
    fc <- format_rcell(x = x, format = format)
    if (is.na(fc)) {
      fc <- "NA"
    }
    x_label <- attr(x, "label")
    if (!is.null(label) && !is.na(label)) {
      names(fc) <- label
    } else if (!is.null(x_label) && !is.na(x_label)) {
      names(fc) <- x_label
    } else if (length(x) == length(fc)) {
      names(fc) <- names(x)
    }
    as.data.frame(t(fc))
  }

  row <- do.call(
    cbind,
    lapply(
      names(x), function(xn) format_cell(x[[xn]], format = format[xn], label = labels[xn])
    )
  )

  row
}

#' Control Function for `g_lineplot` Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Default values for `variables` parameter in `g_lineplot` function.
#' A variable's default value can be overwritten for any variable.
#'
#' @param x (`character`)\cr x variable name.
#' @param y (`character`)\cr y variable name.
#' @param strata (`character` or `NA`)\cr strata variable name.
#' @param paramcd (`character` or `NA`)\cr `paramcd` variable name.
#' @param y_unit (`character` or `NA`)\cr `y_unit` variable name.
#'
#' @return A named character vector of variable names.
#'
#' @examples
#' control_lineplot_vars()
#' control_lineplot_vars(strata = NA)
#'
#' @export
control_lineplot_vars <- function(x = "AVISIT", y = "AVAL", strata = "ARM", paramcd = "PARAMCD", y_unit = "AVALU") {
  checkmate::assert_string(x)
  checkmate::assert_string(y)
  checkmate::assert_string(strata, na.ok = TRUE)
  checkmate::assert_string(paramcd, na.ok = TRUE)
  checkmate::assert_string(y_unit, na.ok = TRUE)

  variables <- c(x = x, y = y, strata = strata, paramcd = paramcd, y_unit = y_unit)
  return(variables)
}

#' Summary for Poisson Negative Binomial.
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Summarize results of a Poisson Negative Binomial Regression.
#' This can be used to analyze count and/or frequency data using a linear model.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_glm_count
NULL

#' Helper Functions for Poisson Models.
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Helper functions that can be used to return the results of various Poisson models.
#'
#' @inheritParams argument_convention
#'
#' @seealso [summarize_glm_count]
#'
#' @name h_glm_count
NULL

#' @describeIn h_glm_count Helper function to return results of a poisson model.
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called
#'   in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with
#'   expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
#'     groups will be summarized. Specifically, the first level of `arm` variable is taken as the
#'     reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as
#'     `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'   * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
#' @param `weights`(`character`)\cr a character vector specifying weights used
#'   in averaging predictions. Number of weights must equal the number of levels included in the covariates.
#'   Weights option passed to emmeans function (hyperlink) (link to emmeans documentation)
#'
#' @return
#' * `h_glm_poisson()` returns the results of a Poisson model.
#'
#' @keywords internal
h_glm_poisson <- function(.var,
                          .df_row,
                          variables,
                          weights) {
  arm <- variables$arm
  covariates <- variables$covariates
  offset <- .df_row[[variables$offset]]

  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  glm_fit <- stats::glm(
    formula = formula,
    offset = offset,
    data = .df_row,
    family = stats::poisson(link = "log")
  )

  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

#' @describeIn h_glm_count Helper function to return results of a quasipoisson model.
#'
#' @inheritParams summarize_glm_count
#'
#' @return
#' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model.
#'
#'
#' @keywords internal
h_glm_quasipoisson <- function(.var,
                               .df_row,
                               variables,
                               weights) {
  arm <- variables$arm
  covariates <- variables$covariates
  offset <- .df_row[[variables$offset]]

  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  glm_fit <- stats::glm(
    formula = formula,
    offset = offset,
    data = .df_row,
    family = stats::quasipoisson(link = "log")
  )

  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

#' @describeIn h_glm_count Helper function to return the results of the
#'   selected model (poisson, quasipoisson, negative binomial).
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called
#'   in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with
#'   expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
#'     groups will be summarized. Specifically, the first level of `arm` variable is taken as the
#'     reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as
#'     `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'   * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
#' @param `weights`(`character`)\cr character vector specifying weights used in averaging predictions.
#' @param `distribution`(`character`)\cr a character value specifying the distribution
#'   used in the regression (poisson, quasipoisson).
#'
#' @return
#' * `h_glm_count()` returns the results of the selected model.
#'
#'
#' @keywords internal
h_glm_count <- function(.var,
                        .df_row,
                        variables,
                        distribution,
                        weights) {
  if (distribution == "negbin") {
    stop("negative binomial distribution is not currently available.")
  }
  switch(distribution,
    poisson = h_glm_poisson(.var, .df_row, variables, weights),
    quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights),
    negbin = list() # h_glm_negbin(.var, .df_row, variables, weights) # nolint
  )
}

#' @describeIn h_glm_count Helper function to return the estimated means.
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
#' @param conf_level (`numeric`)\cr value used to derive the confidence interval for the rate.
#' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm.
#' @param `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
#'   summarized. Specifically, the first level of `arm` variable is taken as the reference group.
#'
#' @return
#' * `h_ppmeans()` returns the estimated means.
#'
#'
#' @keywords internal
h_ppmeans <- function(obj, .df_row, arm, conf_level) {
  alpha <- 1 - conf_level
  p <- 1 - alpha / 2

  arm_levels <- levels(.df_row[[arm]])

  out <- lapply(arm_levels, function(lev) {
    temp <- .df_row
    temp[[arm]] <- factor(lev, levels = arm_levels)

    mf <- stats::model.frame(obj$formula, data = temp)
    X <- stats::model.matrix(obj$formula, data = mf) # nolint

    rate <- stats::predict(obj, newdata = mf, type = "response")
    rate_hat <- mean(rate)

    zz <- colMeans(rate * X)
    se <- sqrt(as.numeric(t(zz) %*% stats::vcov(obj) %*% zz))
    rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat)
    rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat)

    c(rate_hat, rate_lwr, rate_upr)
  })

  names(out) <- arm_levels
  out <- do.call(rbind, out)
  if ("negbin" %in% class(obj)) {
    colnames(out) <- c("response", "asymp.LCL", "asymp.UCL")
  } else {
    colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL")
  }
  out <- as.data.frame(out)
  out[[arm]] <- rownames(out)
  out
}

#' @describeIn summarize_glm_count Statistics function that produces a named list of results
#'   of the investigated Poisson model.
#'
#' @inheritParams h_glm_count
#'
#' @return
#' * `s_glm_count()` returns a named `list` of 5 statistics:
#'   * `n`: Count of complete sample size for the group.
#'   * `rate`: Estimated event rate per follow-up time.
#'   * `rate_ci`: Confidence level for estimated rate per follow-up time.
#'   * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm.
#'   * `rate_ratio_ci`: Confidence level for the rate ratio.
#'   * `pval`: p-value.
#'
#'
#' @keywords internal
s_glm_count <- function(df,
                        .var,
                        .df_row,
                        variables,
                        .ref_group,
                        .in_ref_col,
                        distribution,
                        conf_level,
                        rate_mean_method,
                        weights,
                        scale = 1) {
  arm <- variables$arm

  y <- df[[.var]]
  smry_level <- as.character(unique(df[[arm]]))

  # ensure there is only 1 value
  checkmate::assert_scalar(smry_level)

  results <- h_glm_count(
    .var = .var,
    .df_row = .df_row,
    variables = variables,
    distribution = distribution,
    weights
  )

  if (rate_mean_method == "emmeans") {
    emmeans_smry <- summary(results$emmeans_fit, level = conf_level)
  } else if (rate_mean_method == "ppmeans") {
    emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level)
  }

  emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ]

  if (.in_ref_col) {
    list(
      n = length(y[!is.na(y)]),
      rate = formatters::with_label(
        ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate),
        "Adjusted Rate"
      ),
      rate_ci = formatters::with_label(
        c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
        f_conf_level(conf_level)
      ),
      rate_ratio = formatters::with_label(character(), "Adjusted Rate Ratio"),
      rate_ratio_ci = formatters::with_label(character(), f_conf_level(conf_level)),
      pval = formatters::with_label(character(), "p-value")
    )
  } else {
    emmeans_contrasts <- emmeans::contrast(
      results$emmeans_fit,
      method = "trt.vs.ctrl",
      ref = grep(
        as.character(unique(.ref_group[[arm]])),
        as.data.frame(results$emmeans_fit)[[arm]]
      )
    )

    contrasts_smry <- summary(
      emmeans_contrasts,
      infer = TRUE,
      adjust = "none"
    )

    smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ]

    list(
      n = length(y[!is.na(y)]),
      rate = formatters::with_label(
        ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate),
        "Adjusted Rate"
      ),
      rate_ci = formatters::with_label(
        c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
        f_conf_level(conf_level)
      ),
      rate_ratio = formatters::with_label(smry_contrasts_level$ratio, "Adjusted Rate Ratio"),
      rate_ratio_ci = formatters::with_label(
        c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL),
        f_conf_level(conf_level)
      ),
      pval = formatters::with_label(smry_contrasts_level$p.value, "p-value")
    )
  }
}

#' @describeIn summarize_glm_count Formatted analysis function which is used as `afun` in `summarize_glm_count()`.
#'
#' @return
#' * `a_glm_count()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#'
#' @keywords internal
a_glm_count <- make_afun(
  s_glm_count,
  .indent_mods = c(
    "n" = 0L,
    "rate" = 0L,
    "rate_ci" = 1L,
    "rate_ratio" = 0L,
    "rate_ratio_ci" = 1L,
    "pval" = 1L
  ),
  .formats = c(
    "n" = "xx",
    "rate" = "xx.xxxx",
    "rate_ci" = "(xx.xxxx, xx.xxxx)",
    "rate_ratio" = "xx.xxxx",
    "rate_ratio_ci" = "(xx.xxxx, xx.xxxx)",
    "pval" = "x.xxxx | (<0.0001)"
  ),
  .null_ref_cells = FALSE
)

#' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_glm_count()` to the table layout.
#'
#' @examples
#' library(dplyr)
#' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE")
#' anl$AVAL_f <- as.factor(anl$AVAL)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM", ref_group = "B: Placebo") %>%
#'   add_colcounts() %>%
#'   summarize_vars(
#'     "AVAL_f",
#'     var_labels = "Number of exacerbations per patient",
#'     .stats = c("count_fraction"),
#'     .formats = c("count_fraction" = "xx (xx.xx%)"),
#'     .label = c("Number of exacerbations per patient")
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL),
#'     conf_level = 0.95,
#'     distribution = "poisson",
#'     rate_mean_method = "emmeans",
#'     var_labels = "Unadjusted exacerbation rate (per year)",
#'     table_names = "unadj",
#'     .stats = c("rate"),
#'     .labels = c(rate = "Rate")
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
#'     conf_level = 0.95,
#'     distribution = "quasipoisson",
#'     rate_mean_method = "ppmeans",
#'     var_labels = "Adjusted (QP) exacerbation rate (per year)",
#'     table_names = "adj",
#'     .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
#'     .labels = c(
#'       rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
#'       rate_ratio_ci = "Rate Ratio CI", pval = "p value"
#'     )
#'   )
#' build_table(lyt = lyt, df = anl)
#'
#' @export
summarize_glm_count <- function(lyt,
                                vars,
                                var_labels,
                                ...,
                                show_labels = "visible",
                                table_names = vars,
                                .stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  afun <- make_afun(
    a_glm_count,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Count the Number of Patients with Particular Flags
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` denotes the unique patient identifier.
#'
#' @inheritParams argument_convention
#'
#' @seealso [count_patients_with_event]
#'
#' @name count_patients_with_flags
NULL

#' @describeIn count_patients_with_flags Statistics function which counts the number of patients for which
#'   a particular flag variable is `TRUE`.
#'
#' @inheritParams summarize_variables
#' @param .var (`character`)\cr name of the column that contains the unique identifier.
#' @param flag_variables (`character`)\cr a character vector specifying the names of `logical`
#'   variables from analysis dataset used for counting the number of unique identifiers.
#'
#' @return
#' * `s_count_patients_with_flags()` returns the count and the fraction of unique identifiers with each particular
#'   flag as a list of statistics `n`, `count`, `count_fraction`, and `n_blq`, with one element per flag.
#'
#' @examples
#' library(dplyr)
#'
#' # `s_count_patients_with_flags()`
#'
#' # Add labelled flag variables to analysis dataset.
#' adae <- tern_ex_adae %>%
#'   mutate(
#'     fl1 = TRUE,
#'     fl2 = TRTEMFL == "Y",
#'     fl3 = TRTEMFL == "Y" & AEOUT == "FATAL",
#'     fl4 = TRTEMFL == "Y" & AEOUT == "FATAL" & AEREL == "Y"
#'   )
#' labels <- c(
#'   "fl1" = "Total AEs",
#'   "fl2" = "Total number of patients with at least one adverse event",
#'   "fl3" = "Total number of patients with fatal AEs",
#'   "fl4" = "Total number of patients with related fatal AEs"
#' )
#' formatters::var_labels(adae)[names(labels)] <- labels
#'
#' s_count_patients_with_flags(
#'   adae,
#'   "SUBJID",
#'   flag_variables = c("fl1", "fl2", "fl3", "fl4"),
#'   denom = "N_col",
#'   .N_col = 1000
#' )
#'
#' @export
s_count_patients_with_flags <- function(df,
                                        .var,
                                        flag_variables,
                                        .N_col, # nolint
                                        .N_row, # nolint
                                        denom = c("n", "N_row", "N_col")) {
  if (is.null(names(flag_variables))) flag_variables <- stats::setNames(flag_variables, flag_variables)
  flag_names <- unname(flag_variables)
  flag_variables <- names(flag_variables)

  checkmate::assert_subset(flag_variables, colnames(df))
  temp <- sapply(flag_variables, function(x) {
    tmp <- Map(function(y) which(df[[y]]), x)
    position_satisfy_flags <- Reduce(intersect, tmp)
    id_satisfy_flags <- as.character(unique(df[position_satisfy_flags, ][[.var]]))
    s_count_values(
      as.character(unique(df[[.var]])),
      id_satisfy_flags,
      denom = denom,
      .N_col = .N_col,
      .N_row = .N_row
    )
  })
  colnames(temp) <- flag_names
  temp <- data.frame(t(temp))
  result <- temp %>% as.list()
  if (length(flag_variables) == 1) {
    for (i in 1:3) names(result[[i]]) <- flag_names[1]
  }
  result
}

#' @describeIn count_patients_with_flags Formatted analysis function which is used as `afun`
#'   in `count_patients_with_flags()`.
#'
#' @return
#' * `a_count_patients_with_flags()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#'
#' # `a_count_patients_with_flags()`
#'
#' afun <- make_afun(a_count_patients_with_flags,
#'   .stats = "count_fraction",
#'   .ungroup_stats = "count_fraction"
#' )
#' afun(
#'   adae,
#'   .N_col = 10L,
#'   .N_row = 10L,
#'   .var = "USUBJID",
#'   flag_variables = c("fl1", "fl2", "fl3", "fl4")
#' )
#'
#' @export
a_count_patients_with_flags <- make_afun(
  s_count_patients_with_flags,
  .formats = c("count_fraction" = format_count_fraction_fixed_dp)
)

#' @describeIn count_patients_with_flags Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_patients_with_flags()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_patients_with_flags()` to the table layout.
#'
#' @examples
#' # `count_patients_with_flags()`
#'
#' lyt2 <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_patients_with_flags(
#'     "SUBJID",
#'     flag_variables = formatters::var_labels(adae[, c("fl1", "fl2", "fl3", "fl4")]),
#'     denom = "N_col"
#'   )
#' build_table(lyt2, adae, alt_counts_df = tern_ex_adsl)
#'
#' @export
count_patients_with_flags <- function(lyt,
                                      var,
                                      var_labels = var,
                                      show_labels = "hidden",
                                      ...,
                                      table_names = paste0("tbl_flags_", var),
                                      .stats = "count_fraction",
                                      .formats = NULL,
                                      .indent_mods = NULL) {
  afun <- make_afun(
    a_count_patients_with_flags,
    .stats = .stats,
    .formats = .formats,
    .indent_mods = .indent_mods,
    .ungroup_stats = .stats
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    var_labels = var_labels,
    show_labels = show_labels,
    afun = afun,
    table_names = table_names,
    extra_args = list(...)
  )

  lyt
}

#' Occurrence Counts by Grade
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions for analyzing frequencies and fractions of occurrences by grade for patients
#' with occurrence data. Multiple occurrences within one individual are counted once at the
#' greatest intensity/highest grade level.
#'
#' @inheritParams argument_convention
#' @param grade_groups (named `list` of `character`)\cr containing groupings of grades.
#' @param remove_single (`logical`)\cr `TRUE` to not include the elements of one-element grade groups
#'   in the the output list; in this case only the grade groups names will be included in the output.
#'
#' @seealso Relevant helper function [h_append_grade_groups()].
#'
#' @name count_occurrences_by_grade
NULL

#' Helper function for [s_count_occurrences_by_grade()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function for [s_count_occurrences_by_grade()] to insert grade groupings into list with
#' individual grade frequencies. The order of the final result follows the order of `grade_groups`.
#' The elements under any-grade group (if any), i.e. the grade group equal to `refs` will be moved to
#' the end. Grade groups names must be unique.
#'
#' @inheritParams count_occurrences_by_grade
#' @param refs (named `list` of `numeric`)\cr where each name corresponds to a reference grade level
#'   and each entry represents a count.
#'
#' @return Formatted list of grade groupings.
#'
#' @examples
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(1:5),
#'     "Grade 1-2" = c("1", "2"),
#'     "Grade 3-4" = c("3", "4")
#'   ),
#'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
#' )
#'
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(5:1),
#'     "Grade A" = "5",
#'     "Grade B" = c("4", "3")
#'   ),
#'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
#' )
#'
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(1:5),
#'     "Grade 1-2" = c("1", "2"),
#'     "Grade 3-4" = c("3", "4")
#'   ),
#'   list("1" = 10, "2" = 5, "3" = 0)
#' )
#'
#' @export
h_append_grade_groups <- function(grade_groups, refs, remove_single = TRUE) {
  checkmate::assert_list(grade_groups)
  checkmate::assert_list(refs)
  refs_orig <- refs
  elements <- unique(unlist(grade_groups))

  ### compute sums in groups
  grp_sum <- lapply(grade_groups, function(i) do.call(sum, refs[i]))
  if (!checkmate::test_subset(elements, names(refs))) {
    padding_el <- setdiff(elements, names(refs))
    refs[padding_el] <- 0
  }
  result <- c(grp_sum, refs)

  ### order result while keeping grade_groups's ordering
  ordr <- grade_groups

  # elements of any-grade group (if any) will be moved to the end
  is_any <- sapply(grade_groups, setequal, y = names(refs))
  ordr[is_any] <- list(character(0)) # hide elements under any-grade group

  # groups-elements combined sequence
  ordr <- c(lapply(names(ordr), function(g) c(g, ordr[[g]])), recursive = TRUE, use.names = FALSE)
  ordr <- ordr[!duplicated(ordr)]

  # append remaining elements (if any)
  ordr <- union(ordr, unlist(grade_groups[is_any])) # from any-grade group
  ordr <- union(ordr, names(refs)) # from refs

  # remove elements of single-element groups, if any
  if (remove_single) {
    is_single <- sapply(grade_groups, length) == 1L
    ordr <- setdiff(ordr, unlist(grade_groups[is_single]))
  }

  # apply the order
  result <- result[ordr]

  # remove groups without any elements in the original refs
  # note: it's OK if groups have 0 value
  keep_grp <- vapply(grade_groups, function(x, rf) {
    any(x %in% rf)
  }, rf = names(refs_orig), logical(1))

  keep_el <- names(result) %in% names(refs_orig) | names(result) %in% names(keep_grp)[keep_grp]
  result <- result[keep_el]

  result
}

#' @describeIn count_occurrences_by_grade Statistics function which counts the
#'  number of patients by highest grade.
#'
#' @return
#' * `s_count_occurrences_by_grade()` returns a list of counts and fractions with one element per grade level or
#'   grade level grouping.
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(
#'   USUBJID = as.character(c(1:6, 1)),
#'   ARM = factor(c("A", "A", "A", "B", "B", "B", "A"), levels = c("A", "B")),
#'   AETOXGR = factor(c(1, 2, 3, 4, 1, 2, 3), levels = c(1:5)),
#'   AESEV = factor(
#'     x = c("MILD", "MODERATE", "SEVERE", "MILD", "MILD", "MODERATE", "SEVERE"),
#'     levels = c("MILD", "MODERATE", "SEVERE")
#'   ),
#'   stringsAsFactors = FALSE
#' )
#' df_adsl <- df %>%
#'   select(USUBJID, ARM) %>%
#'   unique()
#'
#' s_count_occurrences_by_grade(
#'   df,
#'   .N_col = 10L,
#'   .var = "AETOXGR",
#'   id = "USUBJID",
#'   grade_groups = list("ANY" = levels(df$AETOXGR))
#' )
#'
#' @export
s_count_occurrences_by_grade <- function(df,
                                         .var,
                                         .N_col, # nolint
                                         id = "USUBJID",
                                         grade_groups = list(),
                                         remove_single = TRUE,
                                         labelstr = "") {
  assert_valid_factor(df[[.var]])
  assert_df_with_variables(df, list(grade = .var, id = id))

  if (nrow(df) < 1) {
    grade_levels <- levels(df[[.var]])
    l_count <- as.list(rep(0, length(grade_levels)))
    names(l_count) <- grade_levels
  } else {
    if (isTRUE(is.factor(df[[id]]))) {
      assert_valid_factor(df[[id]], any.missing = FALSE)
    } else {
      checkmate::assert_character(df[[id]], min.chars = 1, any.missing = FALSE)
    }
    checkmate::assert_count(.N_col)

    id <- df[[id]]
    grade <- df[[.var]]

    if (!is.ordered(grade)) {
      grade_lbl <- obj_label(grade)
      lvls <- levels(grade)
      if (sum(grepl("^\\d+$", lvls)) %in% c(0, length(lvls))) {
        lvl_ord <- lvls
      } else {
        lvls[!grepl("^\\d+$", lvls)] <- min(as.numeric(lvls[grepl("^\\d+$", lvls)])) - 1
        lvl_ord <- levels(grade)[order(as.numeric(lvls))]
      }
      grade <- formatters::with_label(factor(grade, levels = lvl_ord, ordered = TRUE), grade_lbl)
    }

    df_max <- stats::aggregate(grade ~ id, FUN = max, drop = FALSE)
    l_count <- as.list(table(df_max$grade))
  }

  if (length(grade_groups) > 0) {
    l_count <- h_append_grade_groups(grade_groups, l_count, remove_single)
  }

  l_count_fraction <- lapply(l_count, function(i, denom) c(i, i / denom), denom = .N_col)

  list(
    count_fraction = l_count_fraction
  )
}

#' @describeIn count_occurrences_by_grade Formatted analysis function which is used as `afun`
#'   in `count_occurrences_by_grade()`.
#'
#' @return
#' * `a_count_occurrences_by_grade()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_occurrences_by_grade, .ungroup_stats = "count_fraction")
#' afun(
#'   df,
#'   .N_col = 10L,
#'   .var = "AETOXGR",
#'   id = "USUBJID",
#'   grade_groups = list("ANY" = levels(df$AETOXGR))
#' )
#'
#' @export
a_count_occurrences_by_grade <- make_afun(
  s_count_occurrences_by_grade,
  .formats = c("count_fraction" = format_count_fraction_fixed_dp)
)

#' @describeIn count_occurrences_by_grade Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param var_labels (`character`)\cr labels to show in the result table.
#'
#' @return
#' * `count_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_occurrences_by_grade()` to the table layout.
#'
#' @examples
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences_by_grade(
#'     var = "AESEV",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' # Define additional grade groupings.
#' grade_groups <- list(
#'   "-Any-" = c("1", "2", "3", "4", "5"),
#'   "Grade 1-2" = c("1", "2"),
#'   "Grade 3-5" = c("3", "4", "5")
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences_by_grade(
#'     var = "AETOXGR",
#'     grade_groups = grade_groups
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
count_occurrences_by_grade <- function(lyt,
                                       var,
                                       var_labels = var,
                                       show_labels = "default",
                                       ...,
                                       table_names = var,
                                       .stats = NULL,
                                       .formats = NULL,
                                       .indent_mods = NULL,
                                       .labels = NULL) {
  afun <- make_afun(
    a_count_occurrences_by_grade,
    .stats = .stats,
    .formats = .formats,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  analyze(
    lyt = lyt,
    vars = var,
    var_labels = var_labels,
    show_labels = show_labels,
    afun = afun,
    table_names = table_names,
    extra_args = list(...)
  )
}

#' @describeIn count_occurrences_by_grade Layout-creating function which can take content function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_occurrences_by_grade()` to the table layout.
#'
#' @examples
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
#'   summarize_occurrences_by_grade(
#'     var = "AESEV",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
#'   summarize_occurrences_by_grade(
#'     var = "AETOXGR",
#'     grade_groups = grade_groups
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
summarize_occurrences_by_grade <- function(lyt,
                                           var,
                                           ...,
                                           .stats = NULL,
                                           .formats = NULL,
                                           .indent_mods = NULL,
                                           .labels = NULL) {
  cfun <- make_afun(
    a_count_occurrences_by_grade,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = cfun,
    extra_args = list(...)
  )
}

#' Helper Functions for Tabulating Biomarker Effects on Survival by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions which are documented here separately to not confuse the user
#' when reading about the user-facing functions.
#'
#' @inheritParams survival_biomarkers_subgroups
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = FALSE)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' @name h_survival_biomarkers_subgroups
NULL

#' @describeIn h_survival_biomarkers_subgroups helps with converting the "survival" function variable list
#'   to the "Cox regression" variable list. The reason is that currently there is an inconsistency between the variable
#'   names accepted by `extract_survival_subgroups()` and `fit_coxreg_multivar()`.
#'
#' @param biomarker (`string`)\cr the name of the biomarker variable.
#'
#' @return
#' * `h_surv_to_coxreg_variables()` returns a named `list` of elements `time`, `event`, `arm`,
#'   `covariates`, and `strata`.
#'
#' @examples
#' # This is how the variable list is converted internally.
#' h_surv_to_coxreg_variables(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "EVNT",
#'     covariates = c("A", "B"),
#'     strata = "D"
#'   ),
#'   biomarker = "AGE"
#' )
#'
#' @export
h_surv_to_coxreg_variables <- function(variables, biomarker) {
  checkmate::assert_list(variables)
  checkmate::assert_string(variables$tte)
  checkmate::assert_string(variables$is_event)
  checkmate::assert_string(biomarker)
  list(
    time = variables$tte,
    event = variables$is_event,
    arm = biomarker,
    covariates = variables$covariates,
    strata = variables$strata
  )
}

#' @describeIn h_survival_biomarkers_subgroups prepares estimates for number of events, patients and median survival
#'   times, as well as hazard ratio estimates, confidence intervals and p-values, for multiple biomarkers
#'   in a given single data set.
#'   `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
#'   `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables) and optionally `subgroups` and `strat`.
#'
#' @return
#' * `h_coxreg_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
#'
#' @examples
#' # For a single population, estimate separately the effects
#' # of two biomarkers.
#' df <- h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' # If the data set is empty, still the corresponding rows with missings are returned.
#' h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "REGION1",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f[NULL, ]
#' )
#'
#' @export
h_coxreg_mult_cont_df <- function(variables,
                                  data,
                                  control = control_coxreg()) {
  assert_df_with_variables(data, variables)
  checkmate::assert_list(control, names = "named")
  checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
  conf_level <- control[["conf_level"]]
  pval_label <- paste0(
    # the regex capitalizes the first letter of the string / senetence.
    "p-value (", gsub("(^[a-z])", "\\U\\1", trimws(control[["pval_method"]]), perl = TRUE), ")"
  )
  # If there is any data, run model, otherwise return empty results.
  if (nrow(data) > 0) {
    bm_cols <- match(variables$biomarkers, names(data))
    l_result <- lapply(variables$biomarkers, function(bm) {
      coxreg_list <- fit_coxreg_multivar(
        variables = h_surv_to_coxreg_variables(variables, bm),
        data = data,
        control = control
      )
      result <- do.call(
        h_coxreg_multivar_extract,
        c(list(var = bm), coxreg_list[c("mod", "data", "control")])
      )
      data_fit <- as.data.frame(as.matrix(coxreg_list$mod$y))
      data_fit$status <- as.logical(data_fit$status)
      median <- s_surv_time(
        df = data_fit,
        .var = "time",
        is_event = "status"
      )$median
      data.frame(
        # Dummy column needed downstream to create a nested header.
        biomarker = bm,
        biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
        n_tot = coxreg_list$mod$n,
        n_tot_events = coxreg_list$mod$nevent,
        median = as.numeric(median),
        result[1L, c("hr", "lcl", "ucl")],
        conf_level = conf_level,
        pval = result[1L, "pval"],
        pval_label = pval_label,
        stringsAsFactors = FALSE
      )
    })
    do.call(rbind, args = c(l_result, make.row.names = FALSE))
  } else {
    data.frame(
      biomarker = variables$biomarkers,
      biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
      n_tot = 0L,
      n_tot_events = 0L,
      median = NA,
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      pval = NA,
      pval_label = pval_label,
      row.names = seq_along(variables$biomarkers),
      stringsAsFactors = FALSE
    )
  }
}

#' @describeIn h_survival_biomarkers_subgroups prepares a single sub-table given a `df_sub` containing
#'   the results for a single biomarker.
#'
#' @param df (`data.frame`)\cr results for a single biomarker, as part of what is
#'   returned by [extract_survival_biomarkers()] (it needs a couple of columns which are
#'   added by that high-level function relative to what is returned by [h_coxreg_mult_cont_df()],
#'   see the example).
#'
#' @return
#' * `h_tab_surv_one_biomarker()` returns an `rtables` table object with the given statistics arranged in columns.
#'
#' @examples
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
#' df1 <- df[1, ]
#' df1$subgroup <- "All patients"
#' df1$row_type <- "content"
#' df1$var <- "ALL"
#' df1$var_label <- "All patients"
#' h_tab_surv_one_biomarker(
#'   df1,
#'   vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
#'   time_unit = "days"
#' )
#'
#' @export
h_tab_surv_one_biomarker <- function(df,
                                     vars,
                                     time_unit,
                                     .indent_mods = 0L) {
  afuns <- a_survival_subgroups()[vars]
  colvars <- d_survival_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1],
    time_unit = time_unit
  )
  h_tab_one_biomarker(
    df = df,
    afuns = afuns,
    colvars = colvars,
    .indent_mods = .indent_mods
  )
}

#' Occurrence Counts
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions for analyzing frequencies and fractions of occurrences for patients with occurrence
#' data. Primary analysis variables are the dictionary terms. All occurrences are counted for total
#' counts. Multiple occurrences within patient at the lowest term level displayed in the table are
#' counted only once.
#'
#' @inheritParams argument_convention
#'
#' @note By default, occurrences which don't appear in a given row split are dropped from the table and
#'   the occurrences in the table are sorted alphabetically per row split. Therefore, the corresponding layout
#'   needs to use `split_fun = drop_split_levels` in the `split_rows_by` calls. Use `drop = FALSE` if you would
#'   like to show all occurrences.
#'
#' @name count_occurrences
NULL

#' @describeIn count_occurrences Statistics function which counts number of patients that report an
#' occurrence.
#'
#' @param denom (`string`)\cr choice of denominator for patient proportions. Can be:
#'   - `N_col`: total number of patients in this column across rows
#'   - `n`: number of patients with any occurrences
#'
#' @return
#' * `s_count_occurrences()` returns a list with:
#'   * `count`: list of counts with one element per occurrence.
#'   * `count_fraction`: list of counts and fractions with one element per occurrence.
#'   * `fraction`: list of numerators and denominators with one element per occurrence.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 1, 2, 4, 4, 4)),
#'   MHDECOD = c("MH1", "MH2", "MH1", "MH1", "MH1", "MH3")
#' )
#'
#' N_per_col <- 4L
#'
#' # Count unique occurrences per subject.
#' s_count_occurrences(
#'   df,
#'   .N_col = N_per_col,
#'   .df_row = df,
#'   .var = "MHDECOD",
#'   id = "USUBJID"
#' )
#'
#' @export
s_count_occurrences <- function(df,
                                denom = c("N_col", "n"),
                                .N_col, # nolint
                                .df_row,
                                drop = TRUE,
                                .var = "MHDECOD",
                                id = "USUBJID") {
  checkmate::assert_flag(drop)
  assert_df_with_variables(df, list(range = .var, id = id))
  checkmate::assert_count(.N_col)
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[id]], classes = c("factor", "character"))
  denom <- match.arg(denom)

  occurrences <- if (drop) {
    # Note that we don't try to preserve original level order here since a) that would required
    # more time to look up in large original levels and b) that would fail for character input variable.
    occurrence_levels <- sort(unique(.df_row[[.var]]))
    if (length(occurrence_levels) == 0) {
      stop(
        "no empty `.df_row` input allowed when `drop = TRUE`,",
        " please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls"
      )
    }
    factor(df[[.var]], levels = occurrence_levels)
  } else {
    df[[.var]]
  }
  ids <- factor(df[[id]])
  dn <- switch(denom,
    n = nlevels(ids),
    N_col = .N_col
  )
  has_occurrence_per_id <- table(occurrences, ids) > 0
  n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
  list(
    count = n_ids_per_occurrence,
    count_fraction = lapply(
      n_ids_per_occurrence,
      function(i, denom) {
        if (i == 0 && denom == 0) {
          c(0, 0)
        } else {
          c(i, i / denom)
        }
      },
      denom = dn
    ),
    fraction = lapply(
      n_ids_per_occurrence,
      function(i, denom) c("num" = i, "denom" = denom),
      denom = dn
    )
  )
}

#' @describeIn count_occurrences Formatted analysis function which is used as `afun`
#'   in `count_occurrences()`.
#'
#' @return
#' * `a_count_occurrences()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_occurrences, .ungroup_stats = c("count", "count_fraction", "fraction"))
#' afun(
#'   df,
#'   .N_col = N_per_col,
#'   .df_row = df,
#'   .var = "MHDECOD",
#'   id = "USUBJID"
#' )
#'
#' @export
a_count_occurrences <- make_afun(
  s_count_occurrences,
  .formats = c(count = "xx", count_fraction = format_count_fraction_fixed_dp, fraction = format_fraction_fixed_dp)
)

#' @describeIn count_occurrences Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_occurrences()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_occurrences()` to the table layout.
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(
#'   USUBJID = as.character(c(
#'     1, 1, 2, 4, 4, 4,
#'     6, 6, 6, 7, 7, 8
#'   )),
#'   MHDECOD = c(
#'     "MH1", "MH2", "MH1", "MH1", "MH1", "MH3",
#'     "MH2", "MH2", "MH3", "MH1", "MH2", "MH4"
#'   ),
#'   ARM = rep(c("A", "B"), each = 6)
#' )
#' df_adsl <- df %>%
#'   select(USUBJID, ARM) %>%
#'   unique()
#'
#' # Create table layout
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences(vars = "MHDECOD", .stats = c("count_fraction"))
#'
#' # Apply table layout to data and produce `rtable` object
#' lyt %>%
#'   build_table(df, alt_counts_df = df_adsl) %>%
#'   prune_table()
#'
#' @export
count_occurrences <- function(lyt,
                              vars,
                              var_labels = vars,
                              show_labels = "hidden",
                              ...,
                              table_names = vars,
                              .stats = "count_fraction",
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  afun <- make_afun(
    a_count_occurrences,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = .stats
  )

  analyze(
    lyt = lyt,
    vars = vars,
    afun = afun,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    extra_args = list(...)
  )
}

#' Create a STEP Graph
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Based on the STEP results, creates a `ggplot` graph showing the estimated HR or OR
#' along the continuous biomarker value subgroups.
#'
#' @param df (`tibble`)\cr result of [tidy.step()].
#' @param use_percentile (`flag`)\cr whether to use percentiles for the x axis or actual
#'   biomarker values.
#' @param est (named `list`)\cr `col` and `lty` settings for estimate line.
#' @param ci_ribbon (named `list` or `NULL`)\cr `fill` and `alpha` settings for the confidence interval
#'   ribbon area, or `NULL` to not plot a CI ribbon.
#' @param col (`character`)\cr colors.
#'
#' @return A `ggplot` STEP graph.
#'
#' @seealso Custom tidy method [tidy.step()].
#'
#' @examples
#' library(nestcolor)
#' library(survival)
#' lung$sex <- factor(lung$sex)
#'
#' # Survival example.
#' vars <- list(
#'   time = "time",
#'   event = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#'
#' step_matrix <- fit_survival_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(control_coxph(), control_step(num_points = 10, degree = 2))
#' )
#' step_data <- broom::tidy(step_matrix)
#'
#' # Default plot.
#' g_step(step_data)
#'
#' # Add the reference 1 horizontal line.
#' library(ggplot2)
#' g_step(step_data) +
#'   ggplot2::geom_hline(ggplot2::aes(yintercept = 1), linetype = 2)
#'
#' # Use actual values instead of percentiles, different color for estimate and no CI,
#' # use log scale for y axis.
#' g_step(
#'   step_data,
#'   use_percentile = FALSE,
#'   est = list(col = "blue", lty = 1),
#'   ci_ribbon = NULL
#' ) + scale_y_log10()
#'
#' # Adding another curve based on additional column.
#' step_data$extra <- exp(step_data$`Percentile Center`)
#' g_step(step_data) +
#'   ggplot2::geom_line(ggplot2::aes(y = extra), linetype = 2, color = "green")
#'
#' # Response example.
#' vars <- list(
#'   response = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#'
#' step_matrix <- fit_rsp_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(
#'     control_logistic(response_definition = "I(response == 2)"),
#'     control_step()
#'   )
#' )
#' step_data <- broom::tidy(step_matrix)
#' g_step(step_data)
#'
#' @export
g_step <- function(df,
                   use_percentile = "Percentile Center" %in% names(df),
                   est = list(col = "blue", lty = 1),
                   ci_ribbon = list(fill = getOption("ggplot2.discrete.colour")[1], alpha = 0.5),
                   col = getOption("ggplot2.discrete.colour")) {
  checkmate::assert_tibble(df)
  checkmate::assert_flag(use_percentile)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_list(est, names = "named")
  checkmate::assert_list(ci_ribbon, names = "named", null.ok = TRUE)

  x_var <- ifelse(use_percentile, "Percentile Center", "Interval Center")
  df$x <- df[[x_var]]
  attrs <- attributes(df)
  df$y <- df[[attrs$estimate]]

  # Set legend names. To be modified also at call level
  legend_names <- c("Estimate", "CI 95%")

  p <- ggplot2::ggplot(df, ggplot2::aes(x = .data[["x"]], y = .data[["y"]]))

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_color_manual(values = col)
  }

  if (!is.null(ci_ribbon)) {
    if (is.null(ci_ribbon$fill)) {
      ci_ribbon$fill <- "lightblue"
    }
    p <- p + ggplot2::geom_ribbon(
      ggplot2::aes(
        ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]],
        fill = legend_names[2]
      ),
      alpha = ci_ribbon$alpha
    ) +
      scale_fill_manual(
        name = "", values = c("CI 95%" = ci_ribbon$fill)
      )
  }
  suppressMessages(p <- p +
    ggplot2::geom_line(
      ggplot2::aes(y = .data[["y"]], color = legend_names[1]),
      linetype = est$lty
    ) +
    scale_colour_manual(
      name = "", values = c("Estimate" = "blue")
    ))

  p <- p + ggplot2::labs(x = attrs$biomarker, y = attrs$estimate)
  if (use_percentile) {
    p <- p + ggplot2::scale_x_continuous(labels = scales::percent)
  }
  p
}

#' Custom Tidy Method for STEP Results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tidy the STEP results into a `tibble` format ready for plotting.
#'
#' @param x (`step` matrix)\cr results from [fit_survival_step()].
#' @param ... not used here.
#'
#' @return A `tibble` with one row per STEP subgroup. The estimates and CIs are on the HR or OR scale,
#'   respectively. Additional attributes carry metadata also used for plotting.
#'
#' @seealso [g_step()] which consumes the result from this function.
#'
#' @method tidy step
#'
#' @examples
#' library(survival)
#' lung$sex <- factor(lung$sex)
#' vars <- list(
#'   time = "time",
#'   event = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#' step_matrix <- fit_survival_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(control_coxph(), control_step(num_points = 10, degree = 2))
#' )
#' broom::tidy(step_matrix)
#'
#' @export
tidy.step <- function(x, ...) { # nolint
  checkmate::assert_class(x, "step")
  dat <- as.data.frame(x)
  nams <- names(dat)
  is_surv <- "loghr" %in% names(dat)
  est_var <- ifelse(is_surv, "loghr", "logor")
  new_est_var <- ifelse(is_surv, "Hazard Ratio", "Odds Ratio")
  new_y_vars <- c(new_est_var, c("ci_lower", "ci_upper"))
  names(dat)[match(est_var, nams)] <- new_est_var
  dat[, new_y_vars] <- exp(dat[, new_y_vars])
  any_is_na <- any(is.na(dat[, new_y_vars]))
  any_is_very_large <- any(abs(dat[, new_y_vars]) > 1e10, na.rm = TRUE)
  if (any_is_na) {
    warning(paste(
      "Missing values in the point estimate or CI columns,",
      "this will lead to holes in the `g_step()` plot"
    ))
  }
  if (any_is_very_large) {
    warning(paste(
      "Very large absolute values in the point estimate or CI columns,",
      "consider adding `scale_y_log10()` to the `g_step()` result for plotting"
    ))
  }
  if (any_is_na || any_is_very_large) {
    warning("Consider using larger `bandwidth`, less `num_points` in `control_step()` settings for fitting")
  }
  structure(
    tibble::as_tibble(dat),
    estimate = new_est_var,
    biomarker = attr(x, "variables")$biomarker,
    ci = f_conf_level(attr(x, "control")$conf_level)
  )
}

#' Convert Table into Matrix of Strings
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to use mostly within tests. `with_spaces`parameter allows
#' to test not only for content but also indentation and table structure.
#' `print_txt_to_copy` instead facilitate the testing development by returning a well
#' formatted text that needs only to be copied and pasted in the expected output.
#'
#' @param x `rtables` table.
#' @param with_spaces Should the tested table keep the indentation and other relevant spaces?
#' @param print_txt_to_copy Utility to have a way to copy the input table directly
#'   into the expected variable instead of copying it too manually.
#'
#' @return A `matrix` of `string`s.
#'
#' @export
to_string_matrix <- function(x, with_spaces = FALSE, print_txt_to_copy = FALSE) {
  checkmate::assert_flag(with_spaces)
  checkmate::assert_flag(print_txt_to_copy)

  # Producing the matrix to test
  if (with_spaces) {
    out <- strsplit(toString(matrix_form(x, TRUE)), "\\n")[[1]]
  } else {
    out <- matrix_form(x)$string
  }

  # Printing to console formatted output that needs to be copied in "expected"
  if (print_txt_to_copy) {
    out_tmp <- out
    if (!with_spaces) {
      out_tmp <- apply(out, 1, paste0, collapse = '", "')
    }
    cat(paste0('c(\n  "', paste0(out_tmp, collapse = '",\n  "'), '"\n)'))
  }

  # Return values
  return(out)
}

#' Blank for Missing Input
#'
#' Helper function to use in tabulating model results.
#'
#' @param x (`vector`)\cr input for a cell.
#'
#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise
#'   the unlisted version of `x`.
#'
#' @keywords internal
unlist_and_blank_na <- function(x) {
  unl <- unlist(x)
  if (all(is.na(unl))) {
    character()
  } else {
    unl
  }
}

#' Constructor for Content Functions given Data Frame with Flag Input
#'
#' This can be useful for tabulating model results.
#'
#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the
#'   content function.
#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned.
#' @param format (`string`)\cr `rtables` format to use.
#'
#' @return A content function which gives `df$analysis_var` at the row identified by
#'   `.df_row$flag` in the given format.
#'
#' @keywords internal
cfun_by_flag <- function(analysis_var,
                         flag_var,
                         format = "xx",
                         .indent_mods = NULL) {
  checkmate::assert_string(analysis_var)
  checkmate::assert_string(flag_var)
  function(df, labelstr) {
    row_index <- which(df[[flag_var]])
    x <- unlist_and_blank_na(df[[analysis_var]][row_index])
    formatters::with_label(
      rcell(x, format = format, indent_mod = .indent_mods),
      labelstr
    )
  }
}

#' Content Row Function to Add Row Total to Labels
#'
#' This takes the label of the latest row split level and adds the row total in parentheses.
#'
#' @inheritParams argument_convention
#'
#' @return A `list` containing "row_count" with the row count value and the correct label.
#'
#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because
#'   the former is already split by columns and will refer to the first column of the data only.
#'
#' @keywords internal
c_label_n <- function(df,
                      labelstr,
                      .N_row) { # nolint
  label <- paste0(labelstr, " (N=", .N_row, ")")
  list(row_count = formatters::with_label(c(.N_row, .N_row), label))
}

#' Layout Creating Function to Add Row Total Counts
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This works analogously to [rtables::add_colcounts()] but on the rows. This function
#'  is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#'
#' @return A modified layout where the latest row split labels now have the row-wise
#'   total counts (i.e. without column-based subsetting) attached in parentheses.
#'
#' @note Row count values are contained in these row count rows but are not displayed
#'   so that they are not considered zero rows by default when pruning.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   split_rows_by("RACE", split_fun = drop_split_levels) %>%
#'   add_rowcounts() %>%
#'   analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>%
#'   build_table(DM)
#'
#' @export
add_rowcounts <- function(lyt) {
  c_lbl_n_fun <- make_afun(
    c_label_n,
    .stats = c("row_count"),
    .formats = c(row_count = function(x, ...) "")
  )
  summarize_row_groups(
    lyt,
    cfun = c_lbl_n_fun
  )
}

#' Obtain Column Indices
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to extract column indices from a `VTableTree` for a given
#' vector of column names.
#'
#' @param table_tree (`VTableTree`)\cr table to extract the indices from.
#' @param col_names (`character`)\cr vector of column names.
#'
#' @return A vector of column indices.
#'
#' @export
h_col_indices <- function(table_tree, col_names) {
  checkmate::assert_class(table_tree, "VTableNodeInfo")
  checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE)
  match(col_names, names(attr(col_info(table_tree), "cextra_args")))
}

#' Labels or Names of List Elements
#'
#' Internal helper function for working with nested statistic function results which typically
#' don't have labels but names that we can use.
#'
#' @param x a list
#'
#' @return A `character` vector with the labels or names for the list elements.
#'
#' @keywords internal
labels_or_names <- function(x) {
  checkmate::assert_multi_class(x, c("data.frame", "list"))
  labs <- sapply(x, obj_label)
  nams <- rlang::names2(x)
  label_is_null <- sapply(labs, is.null)
  result <- unlist(ifelse(label_is_null, nams, labs))
  return(result)
}

#' Convert to `rtable`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a new generic function to convert objects to `rtable` tables.
#'
#' @param x the object which should be converted to an `rtable`.
#' @param ... additional arguments for methods.
#'
#' @return An `rtables` table object. Note that the concrete class will depend on the method used.
#'
#' @export
as.rtable <- function(x, ...) { # nolint
  UseMethod("as.rtable", x)
}

#' @describeIn as.rtable method for converting `data.frame` that contain numeric columns to `rtable`.
#'
#' @param format the format which should be used for the columns.
#'
#' @method as.rtable data.frame
#'
#' @examples
#' x <- data.frame(
#'   a = 1:10,
#'   b = rnorm(10)
#' )
#' as.rtable(x)
#'
#' @export
as.rtable.data.frame <- function(x, format = "xx.xx", ...) {
  checkmate::assert_numeric(unlist(x))
  do.call(
    rtable,
    c(
      list(
        header = labels_or_names(x),
        format = format
      ),
      Map(
        function(row, row_name) {
          do.call(
            rrow,
            c(as.list(unname(row)),
              row.name = row_name
            )
          )
        },
        row = as.data.frame(t(x)),
        row_name = rownames(x)
      )
    )
  )
}

#' Split parameters
#'
#' @description `r lifecycle::badge("stable")`
#'
#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant
#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to
#' specific analysis function.
#'
#' @param param (`vector`)\cr the parameter to be split.
#' @param value (`vector`)\cr the value used to split.
#' @param f (`list` of `vectors`)\cr the reference to make the split
#'
#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`.
#'
#' @examples
#' f <- list(
#'   surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
#'   surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
#' )
#'
#' .stats <- c("pt_at_risk", "rate_diff")
#' h_split_param(.stats, .stats, f = f)
#'
#' # $surv
#' # [1] "pt_at_risk"
#' #
#' # $surv_diff
#' # [1] "rate_diff"
#'
#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx")
#' h_split_param(.formats, names(.formats), f = f)
#'
#' # $surv
#' # pt_at_risk event_free_rate
#' # "xx"           "xxx"
#' #
#' # $surv_diff
#' # NULL
#'
#' @export
h_split_param <- function(param,
                          value,
                          f) {
  y <- lapply(f, function(x) param[value %in% x])
  lapply(y, function(x) if (length(x) == 0) NULL else x)
}

#' Get Selected Statistics Names
#'
#' Helper function to be used for creating `afun`.
#'
#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means
#'   in this context that all default statistics should be used.
#' @param all_stats (`character`)\cr all statistics which can be selected here potentially.
#'
#' @return A `character` vector with the selected statistics.
#'
#' @keywords internal
afun_selected_stats <- function(.stats, all_stats) {
  checkmate::assert_character(.stats, null.ok = TRUE)
  checkmate::assert_character(all_stats)
  if (is.null(.stats)) {
    all_stats
  } else {
    intersect(.stats, all_stats)
  }
}

#' Add Variable Labels to Top Left Corner in Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper layout creating function to just append the variable labels of a given variables vector
#' from a given dataset in the top left corner. If a variable label is not found then the
#' variable name itself is used instead. Multiple variable labels are concatenated with slashes.
#'
#' @inheritParams argument_convention
#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`.
#' @param indent (`integer`)\cr non-negative number of nested indent space, default to 0L which means no indent.
#'   1L means two spaces indent, 2L means four spaces indent and so on.
#'
#' @return A modified layout with the new variable label(s) added to the top-left material.
#'
#' @note This is not an optimal implementation of course, since we are using here the data set
#'   itself during the layout creation. When we have a more mature `rtables` implementation then
#'   this will also be improved or not necessary anymore.
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   split_rows_by("SEX") %>%
#'   append_varlabels(DM, "SEX") %>%
#'   analyze("AGE", afun = mean) %>%
#'   append_varlabels(DM, "AGE", indent = 1)
#' build_table(lyt, DM)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("SEX") %>%
#'   analyze("AGE", afun = mean) %>%
#'   append_varlabels(DM, c("SEX", "AGE"))
#' build_table(lyt, DM)
#'
#' @export
append_varlabels <- function(lyt, df, vars, indent = 0L) {
  if (checkmate::test_flag(indent)) {
    warning("indent argument is now accepting integers. Boolean indent will be converted to integers.")
    indent <- as.integer(indent)
  }

  checkmate::assert_data_frame(df)
  checkmate::assert_character(vars)
  checkmate::assert_count(indent)

  lab <- formatters::var_labels(df[vars], fill = TRUE)
  lab <- paste(lab, collapse = " / ")
  space <- paste(rep(" ", indent * 2), collapse = "")
  lab <- paste0(space, lab)

  append_topleft(lyt, lab)
}

#' Tabulate Biomarker Effects on Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate the estimated effects of multiple continuous biomarker variables
#' on a binary response endpoint across population subgroups.
#'
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#'   [extract_rsp_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot`: Total number of patients per group.
#'   * `n_rsp`: Total number of responses per group.
#'   * `prop`: Total response proportion per group.
#'   * `or`: Odds ratio.
#'   * `ci`: Confidence interval of odds ratio.
#'   * `pval`: p-value of the effect.
#'   Note, the statistics `n_tot`, `or` and `ci` are required.
#'
#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. The tables are then typically used as input for forest plots.
#'
#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
#'   not start from an input layout `lyt`. This is because internally the table is
#'   created by combining multiple subtables.
#'
#' @seealso [h_tab_rsp_one_biomarker()] which is used internally, [extract_rsp_biomarkers()].
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' df <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adrs_f
#' )
#'
#' \donttest{
#' ## Table with default columns.
#' tabulate_rsp_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_rsp_biomarkers(
#'   df = df,
#'   vars = c("n_rsp", "ci", "n_tot", "prop", "or")
#' )
#'
#' ## Finally produce the forest plot.
#' g_forest(tab, xlim = c(0.7, 1.4))
#' }
#'
#' @export
#' @name response_biomarkers_subgroups
tabulate_rsp_biomarkers <- function(df,
                                    vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
                                    .indent_mods = 0L) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(df$biomarker)
  checkmate::assert_character(df$biomarker_label)
  checkmate::assert_subset(vars, c("n_tot", "n_rsp", "prop", "or", "ci", "pval"))

  df_subs <- split(df, f = df$biomarker)
  tabs <- lapply(df_subs, FUN = function(df_sub) {
    tab_sub <- h_tab_rsp_one_biomarker(
      df = df_sub,
      vars = vars,
      .indent_mods = .indent_mods
    )
    # Insert label row as first row in table.
    label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
    tab_sub
  })
  result <- do.call(rbind, tabs)

  n_id <- grep("n_tot", vars)
  or_id <- match("or", vars)
  ci_id <- match("ci", vars)
  structure(
    result,
    forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
    col_x = or_id,
    col_ci = ci_id,
    col_symbol_size = n_id
  )
}

#' Prepares Response Data Estimates for Multiple Biomarkers in a Single Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of responses, patients and overall response rate,
#' as well as odds ratio estimates, confidence intervals and p-values,
#' for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a
#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
#' biomarker variables) and optionally `covariates`, `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param control (named `list`)\cr controls for the response definition and the
#'   confidence level produced by [control_logistic()].
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
#'   `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#'   `var_label`, and `row_type`.
#'
#' @note You can also specify a continuous variable in `rsp` and then use the
#'   `response_definition` control to convert that internally to a logical
#'   variable reflecting binary response.
#'
#' @seealso [h_logistic_mult_cont_df()] which is used internally.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#'
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in logistic regression models with one covariate `RACE`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#' df <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adrs_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
#' # which is then binarized internally (response is defined as this variable
#' # being larger than 500).
#' df_grouped <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "EOSDY",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2",
#'     strat = "STRATA1"
#'   ),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   ),
#'   control = control_logistic(
#'     response_definition = "I(response > 500)"
#'   )
#' )
#' df_grouped
#'
#' @export
extract_rsp_biomarkers <- function(variables,
                                   data,
                                   groups_lists = list(),
                                   control = control_logistic(),
                                   label_all = "All Patients") {
  assert_list_of_variables(variables)
  checkmate::assert_string(variables$rsp)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_string(label_all)

  # Start with all patients.
  result_all <- h_logistic_mult_cont_df(
    variables = variables,
    data = data,
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"
  if (is.null(variables$subgroups)) {
    # Only return result for all patients.
    result_all
  } else {
    # Add subgroups results.
    l_data <- h_split_by_subgroups(
      data,
      variables$subgroups,
      groups_lists = groups_lists
    )
    l_result <- lapply(l_data, function(grp) {
      result <- h_logistic_mult_cont_df(
        variables = variables,
        data = grp$df,
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Combination Functions Class
#'
#' @description `r lifecycle::badge("stable")`
#'
#' `CombinationFunction` is an S4 class which extends standard functions. These are special functions that
#' can be combined and negated with the logical operators.
#'
#' @param e1 (`CombinationFunction`)\cr left hand side of logical operator.
#' @param e2 (`CombinationFunction`)\cr right hand side of logical operator.
#' @param x (`CombinationFunction`)\cr the function which should be negated.
#'
#' @return Returns a logical value indicating whether the left hand side of the equation equals the right hand side.
#'
#' @exportClass CombinationFunction
#' @export CombinationFunction
#'
#' @examples
#' higher <- function(a) {
#'   force(a)
#'   CombinationFunction(
#'     function(x) {
#'       x > a
#'     }
#'   )
#' }
#'
#' lower <- function(b) {
#'   force(b)
#'   CombinationFunction(
#'     function(x) {
#'       x < b
#'     }
#'   )
#' }
#'
#' c1 <- higher(5)
#' c2 <- lower(10)
#' c3 <- higher(5) & lower(10)
#' c3(7)
#'
#' @aliases CombinationFunction-class
#' @name combination_function
CombinationFunction <- methods::setClass("CombinationFunction", contains = "function") # nolint

#' @describeIn combination_function Logical "AND" combination of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the two argument functions. The result
#'   is then the "AND" of the two individual results.
#'
#' @export
methods::setMethod(
  "&",
  signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
  definition = function(e1, e2) {
    CombinationFunction(function(...) {
      e1(...) && e2(...)
    })
  }
)

#' @describeIn combination_function Logical "OR" combination of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the two argument functions. The result
#'   is then the "OR" of the two individual results.
#'
#' @export
methods::setMethod(
  "|",
  signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
  definition = function(e1, e2) {
    CombinationFunction(function(...) {
      e1(...) || e2(...)
    })
  }
)

#' @describeIn combination_function Logical negation of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the original function. The result
#'   is then the opposite of this results.
#'
#' @export
methods::setMethod(
  "!",
  signature = c(x = "CombinationFunction"),
  definition = function(x) {
    CombinationFunction(function(...) {
      !x(...)
    })
  }
)

#' Estimation of Proportions per Level of Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the proportion along with confidence interval of a proportion
#' regarding the level of a factor.
#'
#' @inheritParams argument_convention
#'
#' @seealso Relevant description function [d_onco_rsp_label()].
#'
#' @name estimate_multinomial_rsp
NULL

#' Description of Standard Oncology Response
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Describe the oncology response in a standard way.
#'
#' @param x (`character`)\cr the standard oncology code to be described.
#'
#' @return Response labels.
#'
#' @seealso [estimate_multinomial_rsp()]
#'
#' @examples
#' d_onco_rsp_label(
#'   c("CR", "PR", "SD", "NON CR/PD", "PD", "NE", "Missing", "<Missing>", "NE/Missing")
#' )
#'
#' # Adding some values not considered in d_onco_rsp_label
#'
#' d_onco_rsp_label(
#'   c("CR", "PR", "hello", "hi")
#' )
#'
#' @export
d_onco_rsp_label <- function(x) {
  x <- as.character(x)
  desc <- c(
    CR           = "Complete Response (CR)",
    PR           = "Partial Response (PR)",
    MR           = "Minimal/Minor Response (MR)",
    MRD          = "Minimal Residual Disease (MRD)",
    SD           = "Stable Disease (SD)",
    PD           = "Progressive Disease (PD)",
    `NON CR/PD`  = "Non-CR or Non-PD (NON CR/PD)",
    NE           = "Not Evaluable (NE)",
    `NE/Missing` = "Missing or unevaluable",
    Missing      = "Missing",
    `NA`         = "Not Applicable (NA)",
    ND           = "Not Done (ND)"
  )

  values_label <- vapply(
    X = x,
    FUN.VALUE = character(1),
    function(val) {
      if (val %in% names(desc)) desc[val] else val
    }
  )

  return(factor(values_label, levels = c(intersect(desc, values_label), setdiff(values_label, desc))))
}

#' @describeIn estimate_multinomial_rsp Statistics function which feeds the length of `x` as number
#'   of successes, and `.N_col` as total number of successes and failures into [s_proportion()].
#'
#' @return
#' * `s_length_proportion()` returns statistics from [s_proportion()].
#'
#' @examples
#' s_length_proportion(rep("CR", 10), .N_col = 100)
#' s_length_proportion(factor(character(0)), .N_col = 100)
#'
#' @export
s_length_proportion <- function(x,
                                .N_col, # nolint
                                ...) {
  checkmate::assert_multi_class(x, classes = c("factor", "character"))
  checkmate::assert_vector(x, min.len = 0, max.len = .N_col)
  checkmate::assert_vector(unique(x), min.len = 0, max.len = 1)

  n_true <- length(x)
  n_false <- .N_col - n_true
  x_logical <- rep(c(TRUE, FALSE), c(n_true, n_false))
  s_proportion(df = x_logical, ...)
}

#' @describeIn estimate_multinomial_rsp Formatted analysis function which is used as `afun`
#'   in `estimate_multinomial_response()`.
#'
#' @return
#' * `a_length_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_length_proportion(rep("CR", 10), .N_col = 100)
#' a_length_proportion(factor(character(0)), .N_col = 100)
#'
#' @export
a_length_proportion <- make_afun(
  s_length_proportion,
  .formats = c(
    n_prop = "xx (xx.x%)",
    prop_ci = "(xx.xx, xx.xx)"
  )
)

#' @describeIn estimate_multinomial_rsp Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()] and
#'   [rtables::summarize_row_groups()].
#'
#' @return
#' * `estimate_multinomial_response()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_length_proportion()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' # Use of the layout creating function.
#' dta_test <- data.frame(
#'   USUBJID = paste0("S", 1:12),
#'   ARM     = factor(rep(LETTERS[1:3], each = 4)),
#'   AVAL    = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
#' ) %>% mutate(
#'   AVALC = factor(AVAL,
#'     levels = c(0, 1),
#'     labels = c("Complete Response (CR)", "Partial Response (PR)")
#'   )
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   estimate_multinomial_response(var = "AVALC")
#'
#' tbl <- build_table(lyt, dta_test)
#'
#' html <- as_html(tbl)
#' html
#' \donttest{
#' Viewer(html)
#' }
#'
#' @export
estimate_multinomial_response <- function(lyt,
                                          var,
                                          ...,
                                          show_labels = "hidden",
                                          table_names = var,
                                          .stats = "prop_ci",
                                          .formats = NULL,
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  afun <- make_afun(
    a_length_proportion,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  lyt <- split_rows_by(lyt, var = var)
  lyt <- summarize_row_groups(lyt)

  analyze(
    lyt,
    vars = var,
    afun = afun,
    show_labels = show_labels,
    table_names = table_names,
    extra_args = list(...)
  )
}

#' Survival Time Analysis
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize median survival time and CIs, percentiles of survival times, survival
#' time range of censored/event patients.
#'
#' @inheritParams argument_convention
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_surv_time()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival time.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", or "log-log",
#'     see more in [survival::survfit()]. Note option "none" is not supported.
#'   * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time.
#'
#' @name survival_time
NULL

#' @describeIn survival_time Statistics function which analyzes survival times.
#'
#' @return
#' * `s_surv_time()` returns the statistics:
#'   * `median`: Median survival time.
#'   * `median_ci`: Confidence interval for median time.
#'   * `quantiles`: Survival time for two specified quantiles.
#'   * `range_censor`: Survival time range for censored observations.
#'   * `range_event`: Survival time range for observations with events.
#'   * `range`: Survival time range for all observations.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#' df <- adtte_f %>% filter(ARMCD == "ARM A")
#'
#' @keywords internal
s_surv_time <- function(df,
                        .var,
                        is_event,
                        control = control_surv_time()) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level
  quantiles <- control$quantiles

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = conf_level,
    conf.type = conf_type
  )
  srv_tab <- summary(srv_fit, extend = TRUE)$table
  srv_qt_tab <- stats::quantile(srv_fit, probs = quantiles)$quantile
  range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE)
  range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE)
  range <- range_noinf(df[[.var]], na.rm = TRUE)
  list(
    median = formatters::with_label(unname(srv_tab["median"]), "Median"),
    median_ci = formatters::with_label(
      unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))]), f_conf_level(conf_level)
    ),
    quantiles = formatters::with_label(
      unname(srv_qt_tab), paste0(quantiles[1] * 100, "% and ", quantiles[2] * 100, "%-ile")
    ),
    range_censor = formatters::with_label(range_censor, "Range (censored)"),
    range_event = formatters::with_label(range_event, "Range (event)"),
    range = formatters::with_label(range, "Range")
  )
}

#' @describeIn survival_time Formatted analysis function which is used as `afun` in `surv_time()`.
#'
#' @return
#' * `a_surv_time()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_surv_time <- make_afun(
  s_surv_time,
  .formats = c(
    "median" = "xx.x",
    "median_ci" = "(xx.x, xx.x)",
    "quantiles" = "xx.x, xx.x",
    "range_censor" = "xx.x to xx.x",
    "range_event" = "xx.x to xx.x",
    "range" = "xx.x to xx.x"
  )
)

#' @describeIn survival_time Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `surv_time()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_surv_time()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD") %>%
#'   add_colcounts() %>%
#'   surv_time(
#'     vars = "AVAL",
#'     var_labels = "Survival Time (Months)",
#'     is_event = "is_event",
#'     control = control_surv_time(conf_level = 0.9, conf_type = "log-log")
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
surv_time <- function(lyt,
                      vars,
                      ...,
                      var_labels = "Time to Event",
                      table_names = vars,
                      .stats = c("median", "median_ci", "quantiles", "range_censor", "range_event"),
                      .formats = NULL,
                      .labels = NULL,
                      .indent_mods = c(
                        "median" = 0L, "median_ci" = 1L, "quantiles" = 0L,
                        "range_censor" = 0L, "range_event" = 0L, "range" = 0L
                      )) {
  afun <- make_afun(
    a_surv_time,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = extract_by_name(.indent_mods, .stats)
  )
  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = "visible",
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Subgroup Treatment Effect Pattern (STEP) Fit for Binary (Response) Outcome
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This fits the Subgroup Treatment Effect Pattern logistic regression models for a binary
#' (response) outcome. The treatment arm variable must have exactly 2 levels,
#' where the first one is taken as reference and the estimated odds ratios are
#' for the comparison of the second level vs. the first one.
#'
#' The (conditional) logistic regression model which is fit is:
#'
#' `response ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
#'
#' where `degree` is specified by `control_step()`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `character`)\cr list of analysis variables:
#'   needs `response`, `arm`, `biomarker`, and optional `covariates` and `strata`.
#' @param control (named `list`)\cr combined control list from [control_step()]
#'   and [control_logistic()].
#'
#' @return A matrix of class `step`. The first part of the columns describe the
#'   subgroup intervals used for the biomarker variable, including where the
#'   center of the intervals are and their bounds. The second part of the
#'   columns contain the estimates for the treatment arm comparison.
#'
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
#'
#' @seealso [control_step()] and [control_logistic()] for the available
#'   customization options.
#'
#' @examples
#' # Testing dataset with just two treatment arms.
#' library(survival)
#' library(dplyr)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(
#'     PARAMCD == "BESRSPI",
#'     ARM %in% c("B: Placebo", "A: Drug X")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to have Placebo as reference arm for Odds Ratio calculations.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     RSP = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     SEX = factor(SEX)
#'   )
#'
#' variables <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = "AGE",
#'   response = "RSP"
#' )
#'
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
#' # We use a large enough bandwidth to avoid too small subgroups and linear separation in those.
#' step_matrix <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = 0.5))
#' )
#' dim(step_matrix)
#' head(step_matrix)
#'
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
#' # models. Or specify different logistic regression options, including confidence level.
#' step_matrix2 <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(conf_level = 0.9), control_step(bandwidth = 0.6, degree = 1))
#' )
#'
#' # Use a global constant model. This is helpful as a reference for the subgroup models.
#' step_matrix3 <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = NULL, num_points = 2L))
#' )
#'
#' # It is also possible to use strata, i.e. use conditional logistic regression models.
#' variables2 <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = "AGE",
#'   response = "RSP",
#'   strata = c("STRATA1", "STRATA2")
#' )
#'
#' step_matrix4 <- fit_rsp_step(
#'   variables = variables2,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = 0.6))
#' )
#'
#' @export
fit_rsp_step <- function(variables,
                         data,
                         control = c(control_step(), control_logistic())) {
  assert_df_with_variables(data, variables)
  checkmate::assert_list(control, names = "named")
  data <- data[!is.na(data[[variables$biomarker]]), ]
  window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
  interval_center <- window_sel$interval[, "Interval Center"]
  form <- h_step_rsp_formula(variables = variables, control = control)
  estimates <- if (is.null(control$bandwidth)) {
    h_step_rsp_est(
      formula = form,
      data = data,
      variables = variables,
      x = interval_center,
      control = control
    )
  } else {
    tmp <- mapply(
      FUN = h_step_rsp_est,
      x = interval_center,
      subset = as.list(as.data.frame(window_sel$sel)),
      MoreArgs = list(
        formula = form,
        data = data,
        variables = variables,
        control = control
      )
    )
    # Maybe we find a more elegant solution than this.
    rownames(tmp) <- c("n", "logor", "se", "ci_lower", "ci_upper")
    t(tmp)
  }
  result <- cbind(window_sel$interval, estimates)
  structure(
    result,
    class = c("step", "matrix"),
    variables = variables,
    control = control
  )
}

#' Counting Patients Summing Exposure Across All Patients in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Counting the number of patients and summing analysis value (i.e exposure values) across all patients
#' when a column table layout is required.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_patients_exposure_in_cols
NULL

#' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers
#'   of patients and the sum of exposure across all patients.
#'
#' @param ex_var (`character`)\cr name of the variable within `df` containing exposure values.
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will be used as label.
#'
#' @return
#' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics:
#'   * `n_patients`: Number of unique patients in `df`.
#'   * `sum_exposure`: Sum of `ex_var` across all patients in `df`.
#'
#' @examples
#' set.seed(1)
#' df <- data.frame(
#'   USUBJID = c(paste("id", seq(1, 12), sep = "")),
#'   ARMCD = c(rep("ARM A", 6), rep("ARM B", 6)),
#'   SEX = c(rep("Female", 6), rep("Male", 6)),
#'   AVAL = as.numeric(sample(seq(1, 20), 12)),
#'   stringsAsFactors = TRUE
#' )
#' adsl <- data.frame(
#'   USUBJID = c(paste("id", seq(1, 12), sep = "")),
#'   ARMCD = c(rep("ARM A", 2), rep("ARM B", 2)),
#'   SEX = c(rep("Female", 2), rep("Male", 2)),
#'   stringsAsFactors = TRUE
#' )
#'
#' @keywords internal
s_count_patients_sum_exposure <- function(df,
                                          ex_var = "AVAL",
                                          id = "USUBJID",
                                          labelstr = "",
                                          .stats = c("n_patients", "sum_exposure"),
                                          .N_col, # nolint
                                          custom_label = NULL) {
  assert_df_with_variables(df, list(ex_var = ex_var, id = id))
  checkmate::assert_string(id)
  checkmate::assert_string(labelstr)
  checkmate::assert_string(custom_label, null.ok = TRUE)
  checkmate::assert_numeric(df[[ex_var]])
  checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure")))

  row_label <- if (labelstr != "") {
    labelstr
  } else if (!is.null(custom_label)) {
    custom_label
  } else {
    "Total patients numbers/person time"
  }

  y <- list()

  if ("n_patients" %in% .stats) {
    y$n_patients <-
      formatters::with_label(
        s_num_patients_content(
          df = df,
          .N_col = .N_col, # nolint
          .var = id,
          labelstr = ""
        )$unique,
        row_label
      )
  }
  if ("sum_exposure" %in% .stats) {
    y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label)
  }
  y
}

#' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in
#'   [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in
#'   [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`.
#'
#' @return
#' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()].
#'
#' @examples
#' a_count_patients_sum_exposure(
#'   df = df,
#'   var = "SEX",
#'   .N_col = nrow(df),
#'   .stats = "n_patients"
#' )
#'
#' @export
a_count_patients_sum_exposure <- function(df,
                                          var = NULL,
                                          ex_var = "AVAL",
                                          id = "USUBJID",
                                          labelstr = "",
                                          add_total_level = FALSE,
                                          .N_col, # nolint
                                          .stats,
                                          .formats = list(n_patients = "xx (xx.x%)", sum_exposure = "xx"),
                                          custom_label = NULL) {
  checkmate::assert_flag(add_total_level)

  if (!is.null(var)) {
    assert_df_with_variables(df, list(var = var))
    df[[var]] <- as.factor(df[[var]])
  }

  y <- list()
  if (is.null(var)) {
    y[[.stats]] <- list(Total = s_count_patients_sum_exposure(
      df = df,
      ex_var = ex_var,
      id = id,
      labelstr = labelstr,
      .N_col = .N_col,
      .stats = .stats,
      custom_label = custom_label
    )[[.stats]])
  } else {
    for (lvl in levels(df[[var]])) {
      y[[.stats]][[lvl]] <- s_count_patients_sum_exposure(
        df = subset(df, get(var) == lvl),
        ex_var = ex_var,
        id = id,
        labelstr = labelstr,
        .N_col = .N_col,
        .stats = .stats,
        custom_label = lvl
      )[[.stats]]
    }
    if (add_total_level) {
      y[[.stats]][["Total"]] <- s_count_patients_sum_exposure(
        df = df,
        ex_var = ex_var,
        id = id,
        labelstr = labelstr,
        .N_col = .N_col,
        .stats = .stats,
        custom_label = custom_label
      )[[.stats]]
    }
  }

  in_rows(.list = y[[.stats]], .formats = .formats[[.stats]])
}

#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
#'   function arguments and additional format arguments. This function is a wrapper for
#'   [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further
#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
#'   add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
#'   columns, to the table layout.
#'
#' @examples
#' lyt <- basic_table() %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE)
#' result <- build_table(lyt, df = df, alt_counts_df = adsl)
#' result
#'
#' lyt2 <- basic_table() %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure")
#' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl)
#' result2
#'
#' @export
summarize_patients_exposure_in_cols <- function(lyt, # nolint
                                                var,
                                                ...,
                                                .stats = c("n_patients", "sum_exposure"),
                                                .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                                .indent_mods = NULL,
                                                col_split = TRUE) {
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(var, length(.stats)),
      varlabels = .labels[.stats],
      extra_args = list(.stats = .stats)
    )
  }
  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = a_count_patients_sum_exposure,
    extra_args = list(...)
  )
}

#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
#'   function arguments and additional format arguments. This function is a wrapper for
#'   [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()].
#'
#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required
#'   column split has been done already earlier in the layout pipe.
#'
#' @return
#' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further
#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
#'   add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
#'   columns, to the table layout.
#'
#' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows,
#'   `analyze_patients_exposure_in_cols()` generates data rows which will _not_ be repeated on multiple
#'   pages when pagination is used.
#'
#' @examples
#' lyt3 <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE)
#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl)
#' result3
#'
#' lyt4 <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
#'   summarize_patients_exposure_in_cols(
#'     var = "AVAL", col_split = TRUE,
#'     .stats = "n_patients", custom_label = "some custom label"
#'   ) %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL")
#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl)
#' result4
#'
#' lyt5 <- basic_table() %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL")
#' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl)
#' result5
#'
#' # Adding total levels and custom label
#' lyt <- basic_table(
#'   show_colcounts = TRUE
#' ) %>%
#'   analyze_patients_exposure_in_cols(
#'     var = "ARMCD",
#'     col_split = TRUE,
#'     add_total_level = TRUE,
#'     custom_label = "TOTAL"
#'   ) %>%
#'   append_topleft(c("", "Sex"))
#'
#' tbl <- build_table(lyt, df = df, alt_counts_df = adsl)
#' tbl
#'
#' @export
analyze_patients_exposure_in_cols <- function(lyt, # nolint
                                              var = NULL,
                                              ex_var = "AVAL",
                                              col_split = TRUE,
                                              add_total_level = FALSE,
                                              .stats = c("n_patients", "sum_exposure"),
                                              .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                              .indent_mods = 0L,
                                              ...) {
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(ex_var, length(.stats)),
      varlabels = .labels[.stats],
      extra_args = list(.stats = .stats)
    )
  }
  lyt <- lyt %>% analyze_colvars(
    afun = a_count_patients_sum_exposure,
    indent_mod = .indent_mods,
    extra_args = c(
      list(
        var = var,
        ex_var = ex_var,
        add_total_level = add_total_level
      ),
      ...
    )
  )
  lyt
}

#' Sort Data by `PK PARAM` Variable
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param pk_data (`data.frame`)\cr `Pharmacokinetics` dataframe
#' @param key_var (`character`)\cr key variable used to merge pk_data and metadata created by `d_pkparam()`
#'
#' @return A PK `data.frame` sorted by a `PARAM` variable.
#'
#' @examples
#' library(dplyr)
#'
#' adpp <- tern_ex_adpp %>% mutate(PKPARAM = factor(paste0(PARAM, " (", AVALU, ")")))
#' pk_ordered_data <- h_pkparam_sort(adpp)
#'
#' @export
h_pkparam_sort <- function(pk_data, key_var = "PARAMCD") {
  assert_df_with_variables(pk_data, list(key_var = key_var))
  pk_data$PARAMCD <- pk_data[[key_var]]

  ordered_pk_data <- d_pkparam()

  # Add the numeric values from ordered_pk_data to pk_data
  joined_data <- merge(pk_data, ordered_pk_data, by = "PARAMCD", suffix = c("", ".y"))

  joined_data <- joined_data[, -grep(".*.y$", colnames(joined_data))]

  joined_data$TLG_ORDER <- as.numeric(joined_data$TLG_ORDER)

  # Then order PARAM based on this column
  joined_data$PARAM <- factor(joined_data$PARAM,
    levels = unique(joined_data$PARAM[order(joined_data$TLG_ORDER)]),
    ordered = TRUE
  )

  joined_data$TLG_DISPLAY <- factor(joined_data$TLG_DISPLAY,
    levels = unique(joined_data$TLG_DISPLAY[order(joined_data$TLG_ORDER)]),
    ordered = TRUE
  )

  joined_data
}

#' Counting Specific Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' We can count the occurrence of specific values in a variable of interest.
#'
#' @inheritParams argument_convention
#'
#' @note
#' * For `factor` variables, `s_count_values` checks whether `values` are all included in the levels of `x`
#'   and fails otherwise.
#' * For `count_values()`, variable labels are shown when there is more than one element in `vars`,
#'   otherwise they are hidden.
#'
#' @name count_values_funs
NULL

#' @describeIn count_values_funs S3 generic function to count values.
#'
#' @inheritParams s_summary.logical
#' @param values (`character`)\cr specific values that should be counted.
#'
#' @return
#' * `s_count_values()` returns output of [s_summary()] for specified values of a non-numeric variable.
#'
#' @export
s_count_values <- function(x,
                           values,
                           na.rm = TRUE, # nolint
                           .N_col, # nolint
                           .N_row, # nolint
                           denom = c("n", "N_row", "N_col")) {
  UseMethod("s_count_values", x)
}

#' @describeIn count_values_funs Method for `character` class.
#'
#' @method s_count_values character
#'
#' @examples
#' # `s_count_values.character`
#' s_count_values(x = c("a", "b", "a"), values = "a")
#' s_count_values(x = c("a", "b", "a", NA, NA), values = "b", na.rm = FALSE)
#'
#' @export
s_count_values.character <- function(x,
                                     values = "Y",
                                     na.rm = TRUE, # nolint
                                     ...) {
  checkmate::assert_character(values)

  if (na.rm) {
    x <- x[!is.na(x)]
  }

  is_in_values <- x %in% values

  s_summary(is_in_values, ...)
}

#' @describeIn count_values_funs Method for `factor` class. This makes an automatic
#'   conversion to `character` and then forwards to the method for characters.
#'
#' @method s_count_values factor
#'
#' @examples
#' # `s_count_values.factor`
#' s_count_values(x = factor(c("a", "b", "a")), values = "a")
#'
#' @export
s_count_values.factor <- function(x,
                                  values = "Y",
                                  ...) {
  s_count_values(as.character(x), values = as.character(values), ...)
}

#' @describeIn count_values_funs Method for `logical` class.
#'
#' @method s_count_values logical
#'
#' @examples
#' # `s_count_values.logical`
#' s_count_values(x = c(TRUE, FALSE, TRUE))
#'
#' @export
s_count_values.logical <- function(x, values = TRUE, ...) {
  checkmate::assert_logical(values)
  s_count_values(as.character(x), values = as.character(values), ...)
}

#' @describeIn count_values_funs Formatted analysis function which is used as `afun`
#'   in `count_values()`.
#'
#' @return
#' * `a_count_values()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # `a_count_values`
#' a_count_values(x = factor(c("a", "b", "a")), values = "a", .N_col = 10, .N_row = 10)
#'
#' @export
a_count_values <- make_afun(
  s_count_values,
  .formats = c(count_fraction = "xx (xx.xx%)", count = "xx")
)

#' @describeIn count_values_funs Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_values()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_values()` to the table layout.
#'
#' @examples
#' # `count_values`
#' basic_table() %>%
#'   count_values("Species", values = "setosa") %>%
#'   build_table(iris)
#'
#' @export
count_values <- function(lyt,
                         vars,
                         values,
                         ...,
                         table_names = vars,
                         .stats = "count_fraction",
                         .formats = NULL,
                         .labels = c(count_fraction = paste(values, collapse = ", ")),
                         .indent_mods = NULL) {
  afun <- make_afun(
    a_count_values,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = c(list(values = values), list(...)),
    show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
    table_names = table_names
  )
}

#' Control function for incidence rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for the incidence rate, used
#' internally to specify details in `s_incidence_rate()`.
#'
#' @inheritParams argument_convention
#' @param conf_type (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'   for confidence interval type.
#' @param input_time_unit (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'   indicating time unit for data input.
#' @param num_pt_year (`numeric`)\cr number of patient-years to use when calculating adverse event rates.
#' @param time_unit_input `r lifecycle::badge("deprecated")` Please use the `input_time_unit` argument instead.
#' @param time_unit_output `r lifecycle::badge("deprecated")` Please use the `num_pt_year` argument instead.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @seealso [incidence_rate]
#'
#' @examples
#' control_incidence_rate(0.9, "exact", "month", 100)
#'
#' @export
control_incidence_rate <- function(conf_level = 0.95,
                                   conf_type = c("normal", "normal_log", "exact", "byar"),
                                   input_time_unit = c("year", "day", "week", "month"),
                                   num_pt_year = 100,
                                   time_unit_input = lifecycle::deprecated(),
                                   time_unit_output = lifecycle::deprecated()) {
  if (lifecycle::is_present(time_unit_input)) {
    lifecycle::deprecate_warn(
      "0.8.3", "control_incidence_rate(time_unit_input)", "control_incidence_rate(input_time_unit)"
    )
    input_time_unit <- time_unit_input
  }
  if (lifecycle::is_present(time_unit_output)) {
    lifecycle::deprecate_warn(
      "0.8.3", "control_incidence_rate(time_unit_output)", "control_incidence_rate(num_pt_year)"
    )
    num_pt_year <- time_unit_output
  }

  conf_type <- match.arg(conf_type)
  input_time_unit <- match.arg(input_time_unit)
  checkmate::assert_number(num_pt_year)
  assert_proportion_value(conf_level)

  list(
    conf_level = conf_level,
    conf_type = conf_type,
    input_time_unit = input_time_unit,
    num_pt_year = num_pt_year
  )
}

#' Occurrence Table Sorting
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to score occurrence table subtables and rows which can be used in the
#' sorting of occurrence tables.
#'
#' @name score_occurrences
NULL

#' @describeIn score_occurrences Scoring function which sums the counts across all
#'   columns. It will fail if anything else but counts are used.
#'
#' @inheritParams rtables_access
#'
#' @return
#' * `score_occurrences()` returns the sum of counts across all columns of a table row.
#'
#' @seealso [h_row_first_values()]
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   analyze_num_patients(
#'     vars = "USUBJID",
#'     .stats = c("unique"),
#'     .labels = c("Total number of patients with at least one event")
#'   ) %>%
#'   split_rows_by("AEBODSYS", child_labels = "visible", nested = FALSE) %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = c("unique", "nonunique"),
#'     .labels = c(
#'       "Total number of patients with at least one event",
#'       "Total number of events"
#'     )
#'   ) %>%
#'   count_occurrences(vars = "AEDECOD")
#'
#' tbl <- build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl) %>%
#'   prune_table()
#'
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_occurrences)
#'
#' tbl_sorted
#'
#' @export
score_occurrences <- function(table_row) {
  row_counts <- h_row_counts(table_row)
  sum(row_counts)
}

#' @describeIn score_occurrences Scoring functions can be produced by this constructor to only include
#'   specific columns in the scoring. See [h_row_counts()] for further information.
#'
#' @inheritParams has_count_in_cols
#'
#' @return
#' * `score_occurrences_cols()` returns a function that sums counts across all specified columns
#'   of a table row.
#'
#' @seealso [h_row_counts()]
#'
#' @examples
#' score_cols_a_and_b <- score_occurrences_cols(col_names = c("A: Drug X", "B: Placebo"))
#'
#' # Note that this here just sorts the AEDECOD inside the AEBODSYS. The AEBODSYS are not sorted.
#' # That would require a second pass of `sort_at_path`.
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_cols_a_and_b)
#'
#' tbl_sorted
#'
#' @export
score_occurrences_cols <- function(...) {
  function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    sum(row_counts)
  }
}

#' @describeIn score_occurrences Scoring functions produced by this constructor can be used on
#'   subtables: They sum up all specified column counts in the subtable. This is useful when
#'   there is no available content row summing up these counts.
#'
#' @return
#' * `score_occurrences_subtable()` returns a function that sums counts in each subtable
#'   across all specified columns.
#'
#' @examples
#' score_subtable_all <- score_occurrences_subtable(col_names = names(tbl))
#'
#' # Note that this code just sorts the AEBODSYS, not the AEDECOD within AEBODSYS. That
#' # would require a second pass of `sort_at_path`.
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS"), scorefun = score_subtable_all, decreasing = FALSE)
#'
#' tbl_sorted
#'
#' @export
score_occurrences_subtable <- function(...) {
  score_table_row <- score_occurrences_cols(...)
  function(table_tree) {
    table_rows <- collect_leaves(table_tree)
    counts <- vapply(table_rows, score_table_row, numeric(1))
    sum(counts)
  }
}

#' @describeIn score_occurrences Produce score function for sorting table by summing the first content row in
#'   specified columns. Note that this is extending [rtables::cont_n_onecol()] and [rtables::cont_n_allcols()].
#'
#' @return
#' * `score_occurrences_cont_cols()` returns a function that sums counts in the first content row in
#'   specified columns.
#'
#' @export
score_occurrences_cont_cols <- function(...) {
  score_table_row <- score_occurrences_cols(...)
  function(table_tree) {
    if (inherits(table_tree, "ContentRow")) {
      return(NA)
    }
    content_row <- h_content_first_row(table_tree)
    score_table_row(content_row)
  }
}

#' Pairwise `CoxPH` model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize p-value, HR and CIs from stratified or unstratified `CoxPH` model.
#'
#' @inheritParams argument_convention
#' @inheritParams s_surv_time
#' @param strat (`character` or `NULL`)\cr variable names indicating stratification factors.
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_coxph()]. Some possible parameter options are:
#'   * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1. Default method is `"log-rank"` which
#'     comes from [survival::survdiff()], can also be set to `"wald"` or `"likelihood"` (from [survival::coxph()]).
#'   * `ties` (`string`)\cr specifying the method for tie handling. Default is `"efron"`,
#'     can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()]
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
#'
#' @name survival_coxph_pairwise
NULL

#' @describeIn survival_coxph_pairwise Statistics function which analyzes HR, CIs of HR and p-value of a `coxph` model.
#'
#' @return
#' * `s_coxph_pairwise()` returns the statistics:
#'   * `pvalue`: p-value to test HR = 1.
#'   * `hr`: Hazard ratio.
#'   * `hr_ci`: Confidence interval for hazard ratio.
#'   * `n_tot`: Total number of observations.
#'   * `n_tot_events`: Total number of events.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' df <- adtte_f %>%
#'   filter(ARMCD == "ARM A")
#' df_ref_group <- adtte_f %>%
#'   filter(ARMCD == "ARM B")
#'
#' @keywords internal
s_coxph_pairwise <- function(df,
                             .ref_group,
                             .in_ref_col,
                             .var,
                             is_event,
                             strat = NULL,
                             control = control_coxph()) {
  checkmate::assert_string(.var)
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[is_event]])
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  pval_method <- control$pval_method
  ties <- control$ties
  conf_level <- control$conf_level

  if (.in_ref_col) {
    return(
      list(
        pvalue = formatters::with_label("", paste0("p-value (", pval_method, ")")),
        hr = formatters::with_label("", "Hazard Ratio"),
        hr_ci = formatters::with_label("", f_conf_level(conf_level)),
        n_tot = formatters::with_label("", "Total n"),
        n_tot_events = formatters::with_label("", "Total events")
      )
    )
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))

  df_cox <- data.frame(
    tte = data[[.var]],
    is_event = data[[is_event]],
    arm = group
  )
  if (is.null(strat)) {
    formula_cox <- survival::Surv(tte, is_event) ~ arm
  } else {
    formula_cox <- stats::as.formula(
      paste0(
        "survival::Surv(tte, is_event) ~ arm + strata(",
        paste(strat, collapse = ","),
        ")"
      )
    )
    df_cox <- cbind(df_cox, data[strat])
  }
  cox_fit <- survival::coxph(
    formula = formula_cox,
    data = df_cox,
    ties = ties
  )
  sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE)
  orginal_survdiff <- survival::survdiff(
    formula_cox,
    data = df_cox
  )
  log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1)

  pval <- switch(pval_method,
    "wald" = sum_cox$waldtest["pvalue"],
    "log-rank" = log_rank_pvalue, # pvalue from original log-rank test survival::survdiff()
    "likelihood" = sum_cox$logtest["pvalue"]
  )
  list(
    pvalue = formatters::with_label(unname(pval), paste0("p-value (", pval_method, ")")),
    hr = formatters::with_label(sum_cox$conf.int[1, 1], "Hazard Ratio"),
    hr_ci = formatters::with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)),
    n_tot = formatters::with_label(sum_cox$n, "Total n"),
    n_tot_events = formatters::with_label(sum_cox$nevent, "Total events")
  )
}

#' @describeIn survival_coxph_pairwise Formatted analysis function which is used as `afun` in `coxph_pairwise()`.
#'
#' @return
#' * `a_coxph_pairwise()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#'
#' @keywords internal
a_coxph_pairwise <- make_afun(
  s_coxph_pairwise,
  .indent_mods = c(pvalue = 0L, hr = 0L, hr_ci = 1L, n_tot = 0L, n_tot_events = 0L),
  .formats = c(
    pvalue = "x.xxxx | (<0.0001)",
    hr = "xx.xx",
    hr_ci = "(xx.xx, xx.xx)",
    n_tot = "xx.xx",
    n_tot_events = "xx.xx"
  )
)

#' @describeIn survival_coxph_pairwise Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `coxph_pairwise()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_coxph_pairwise()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   coxph_pairwise(
#'     vars = "AVAL",
#'     is_event = "is_event",
#'     var_labels = "Unstratified Analysis"
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   coxph_pairwise(
#'     vars = "AVAL",
#'     is_event = "is_event",
#'     var_labels = "Stratified Analysis",
#'     strat = "SEX",
#'     control = control_coxph(pval_method = "wald")
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
coxph_pairwise <- function(lyt,
                           vars,
                           ...,
                           var_labels = "CoxPH",
                           show_labels = "visible",
                           table_names = vars,
                           .stats = c("pvalue", "hr", "hr_ci"),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  afun <- make_afun(
    a_coxph_pairwise,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    extra_args = list(...)
  )
}

#' Summarize the Change from Baseline or Absolute Baseline Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` indicates the numerical change from baseline results,
#' and additional required secondary analysis variables are `value` and `baseline_flag`.
#' Depending on the baseline flag, either the absolute baseline values (at baseline)
#' or the change from baseline values (post-baseline) are then summarized.
#'
#' @inheritParams argument_convention
#'
#' @name summarize_change
NULL

#' @describeIn summarize_change Statistics function that summarizes baseline or post-baseline visits.
#'
#' @return
#' * `s_change_from_baseline()` returns the same values returned by [s_summary.numeric()].
#'
#' @note The data in `df` must be either all be from baseline or post-baseline visits. Otherwise
#'   an error will be thrown.
#'
#' @examples
#' df <- data.frame(
#'   chg = c(1, 2, 3),
#'   is_bl = c(TRUE, TRUE, TRUE),
#'   val = c(4, 5, 6)
#' )
#'
#' @keywords internal
s_change_from_baseline <- function(df,
                                   .var,
                                   variables,
                                   na.rm = TRUE, # nolint
                                   ...) {
  checkmate::assert_numeric(df[[variables$value]])
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[variables$baseline_flag]])
  checkmate::assert_vector(unique(df[[variables$baseline_flag]]), max.len = 1)
  assert_df_with_variables(df, c(variables, list(chg = .var)))

  combined <- ifelse(
    df[[variables$baseline_flag]],
    df[[variables$value]],
    df[[.var]]
  )
  if (is.logical(combined) && identical(length(combined), 0L)) {
    combined <- numeric(0)
  }
  s_summary(combined, na.rm = na.rm, ...)
}

#' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`.
#'
#' @return
#' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#'
#' @keywords internal
a_change_from_baseline <- make_afun(
  s_change_from_baseline,
  .formats = c(
    n = "xx",
    mean_sd = "xx.xx (xx.xx)",
    mean_se = "xx.xx (xx.xx)",
    median = "xx.xx",
    range = "xx.xx - xx.xx",
    mean_ci = "(xx.xx, xx.xx)",
    median_ci = "(xx.xx, xx.xx)",
    mean_pval = "xx.xx"
  ),
  .labels = c(
    mean_sd = "Mean (SD)",
    mean_se = "Mean (SE)",
    median = "Median",
    range = "Min - Max"
  )
)

#' @describeIn summarize_change Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_change()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_change_from_baseline()` to the table layout.
#'
#' @note To be used after a split on visits in the layout, such that each data subset only contains
#'   either baseline or post-baseline data.
#'
#' @examples
#' # `summarize_change()`
#'
#' ## Fabricated dataset.
#' library(dplyr)
#'
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   AVISIT = rep(paste0("V", 1:3), 6),
#'   ARM = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL = c(9:1, rep(NA, 9))
#' ) %>%
#'   mutate(ABLFLL = AVISIT == "V1") %>%
#'   group_by(USUBJID) %>%
#'   mutate(
#'     BLVAL = AVAL[ABLFLL],
#'     CHG = AVAL - BLVAL
#'   ) %>%
#'   ungroup()
#'
#' results <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>%
#'   build_table(dta_test)
#' \donttest{
#' Viewer(results)
#' }
#'
#' @export
summarize_change <- function(lyt,
                             vars,
                             ...,
                             table_names = vars,
                             .stats = c("n", "mean_sd", "median", "range"),
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  afun <- make_afun(
    a_change_from_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    extra_args = list(...),
    table_names = table_names
  )
}

#' Patient Counts with Abnormal Range Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the abnormal range result (`character` or `factor`)
#' and additional analysis variables are `id` (`character` or `factor`) and `baseline` (`character` or
#' `factor`). For each direction specified in `abnormal` (e.g. high or low) count patients in the
#' numerator and denominator as follows:
#'   * `num` : The number of patients with this abnormality recorded while on treatment.
#'   * `denom`: The number of patients with at least one post-baseline assessment.
#'
#' @inheritParams argument_convention
#' @param abnormal (named `list`)\cr list identifying the abnormal range level(s) in `var`. Defaults to
#'   `list(Low = "LOW", High = "HIGH")` but you can also group different levels into the named list,
#'   for example, `abnormal = list(Low = c("LOW", "LOW LOW"), High = c("HIGH", "HIGH HIGH"))`.
#'
#' @note
#' * `count_abnormal()` only works with a single variable containing multiple abnormal levels.
#' * `df` should be filtered to include only post-baseline records.
#' * the denominator includes patients that might have other abnormal levels at baseline,
#'   and patients with missing baseline. Patients with these abnormalities at
#'   baseline can be optionally excluded from numerator and denominator.
#'
#' @name abnormal
#' @include formatting_functions.R
NULL

#' @describeIn abnormal Statistics function which counts patients with abnormal range values
#'   for a single `abnormal` level.
#'
#' @param exclude_base_abn (`flag`)\cr whether to exclude subjects with baseline abnormality
#'   from numerator and denominator.
#'
#' @return
#' * `s_count_abnormal()` returns the statistic `fraction` which is a vector with `num` and `denom` counts of patients.
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 1, 2, 2)),
#'   ANRIND = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BNRIND = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
#'   ONTRTFL = c("", "Y", "", "Y"),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Select only post-baseline records.
#' df <- df %>%
#'   filter(ONTRTFL == "Y")
#' @keywords internal
s_count_abnormal <- function(df,
                             .var,
                             abnormal = list(Low = "LOW", High = "HIGH"),
                             variables = list(id = "USUBJID", baseline = "BNRIND"),
                             exclude_base_abn = FALSE) {
  checkmate::assert_list(abnormal, types = "character", names = "named", len = 2, any.missing = FALSE)
  checkmate::assert_true(any(unlist(abnormal) %in% levels(df[[.var]])))
  checkmate::assert_factor(df[[.var]])
  checkmate::assert_flag(exclude_base_abn)
  assert_df_with_variables(df, c(range = .var, variables))
  checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))

  count_abnormal_single <- function(abn_name, abn) {
    # Patients in the denominator fulfill:
    # - have at least one post-baseline visit
    # - their baseline must not be abnormal if `exclude_base_abn`.
    if (exclude_base_abn) {
      denom_select <- !(df[[variables$baseline]] %in% abn)
    } else {
      denom_select <- TRUE
    }
    denom <- length(unique(df[denom_select, variables$id, drop = TRUE]))

    # Patients in the numerator fulfill:
    # - have at least one post-baseline visit with the required abnormality level
    # - are part of the denominator patients.
    num_select <- (df[[.var]] %in% abn) & denom_select
    num <- length(unique(df[num_select, variables$id, drop = TRUE]))

    formatters::with_label(c(num = num, denom = denom), abn_name)
  }

  # This will define the abnormal levels theoretically possible for a specific lab parameter
  # within a split level of a layout.
  abnormal_lev <- lapply(abnormal, intersect, levels(df[[.var]]))
  abnormal_lev <- abnormal_lev[vapply(abnormal_lev, function(x) length(x) > 0, logical(1))]

  result <- sapply(names(abnormal_lev), function(i) count_abnormal_single(i, abnormal_lev[[i]]), simplify = FALSE)
  result <- list(fraction = result)
  result
}

#' @describeIn abnormal Formatted analysis function which is used as `afun` in `count_abnormal()`.
#'
#' @return
#' * `a_count_abnormal()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal <- make_afun(
  s_count_abnormal,
  .formats = c(fraction = format_fraction)
)

#' @describeIn abnormal Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal()` to the table layout.
#'
#' @examples
#' # Layout creating function.
#' basic_table() %>%
#'   count_abnormal(var = "ANRIND", abnormal = list(high = "HIGH", low = "LOW")) %>%
#'   build_table(df)
#'
#' # Passing of statistics function and formatting arguments.
#' df2 <- data.frame(
#'   ID = as.character(c(1, 1, 2, 2)),
#'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BL_RANGE = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
#'   ONTRTFL = c("", "Y", "", "Y"),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Select only post-baseline records.
#' df2 <- df2 %>%
#'   filter(ONTRTFL == "Y")
#'
#' basic_table() %>%
#'   count_abnormal(
#'     var = "RANGE",
#'     abnormal = list(low = "LOW", high = "HIGH"),
#'     variables = list(id = "ID", baseline = "BL_RANGE")
#'   ) %>%
#'   build_table(df2)
#'
#' @export
count_abnormal <- function(lyt,
                           var,
                           ...,
                           table_names = var,
                           .stats = NULL,
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  afun <- make_afun(
    a_count_abnormal,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "fraction"
  )

  checkmate::assert_string(var)

  analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    table_names = table_names,
    extra_args = list(...),
    show_labels = "hidden"
  )
}

#' Control Function for Logistic Regression Model Fitting
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for logistic regression models.
#' `conf_level` refers to the confidence level used for the Odds Ratio CIs.
#'
#' @inheritParams argument_convention
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
#'   This will be used when fitting the logistic regression model on the left hand side of the formula.
#'   Note that the evaluated expression should result in either a logical vector or a factor with 2
#'   levels. By default this is just `"response"` such that the original response variable is used
#'   and not modified further.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @examples
#' # Standard options.
#' control_logistic()
#'
#' # Modify confidence level.
#' control_logistic(conf_level = 0.9)
#'
#' # Use a different response definition.
#' control_logistic(response_definition = "I(response %in% c('CR', 'PR'))")
#'
#' @export
control_logistic <- function(response_definition = "response",
                             conf_level = 0.95) {
  checkmate::assert_true(grepl("response", response_definition))
  checkmate::assert_string(response_definition)
  assert_proportion_value(conf_level)
  list(
    response_definition = response_definition,
    conf_level = conf_level
  )
}

#' Helper Function for Tabulation of a Single Biomarker Result
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Please see [h_tab_surv_one_biomarker()] and [h_tab_rsp_one_biomarker()], which use this function for examples.
#' This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr results for a single biomarker.
#' @param afuns (named `list` of `function`)\cr analysis functions.
#' @param colvars (`list` with `vars` and `labels`)\cr variables to tabulate and their labels.
#'
#' @return An `rtables` table object with statistics in columns.
#'
#' @export
h_tab_one_biomarker <- function(df,
                                afuns,
                                colvars,
                                .indent_mods = 0L) {
  lyt <- basic_table()

  # Row split by row type - only keep the content rows here.
  lyt <- split_rows_by(
    lyt = lyt,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )

  # Summarize rows with all patients.
  lyt <- summarize_row_groups(
    lyt = lyt,
    var = "var_label",
    cfun = afuns,
    indent_mod = .indent_mods
  )

  # Split cols by the multiple variables to populate into columns.
  lyt <- split_cols_by_multivar(
    lyt = lyt,
    vars = colvars$vars,
    varlabels = colvars$labels
  )

  # If there is any subgroup variables, we extend the layout accordingly.
  if ("analysis" %in% df$row_type) {
    # Now only continue with the subgroup rows.
    lyt <- split_rows_by(
      lyt = lyt,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )

    # Split by the subgroup variable.
    lyt <- split_rows_by(
      lyt = lyt,
      var = "var",
      labels_var = "var_label",
      nested = TRUE,
      child_labels = "visible",
      indent_mod = .indent_mods * 2
    )

    # Then analyze colvars for each subgroup.
    lyt <- summarize_row_groups(
      lyt = lyt,
      cfun = afuns,
      var = "subgroup"
    )
  }
  build_table(lyt, df = df)
}

#' Subgroup Treatment Effect Pattern (STEP) Fit for Survival Outcome
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This fits the Subgroup Treatment Effect Pattern models for a survival outcome. The treatment arm
#' variable must have exactly 2 levels, where the first one is taken as reference and the estimated
#' hazard ratios are for the comparison of the second level vs. the first one.
#'
#' The model which is fit is:
#'
#' `Surv(time, event) ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
#'
#' where `degree` is specified by `control_step()`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `character`)\cr list of analysis variables: needs `time`, `event`,
#'   `arm`, `biomarker`, and optional `covariates` and `strata`.
#' @param control (named `list`)\cr combined control list from [control_step()] and [control_coxph()].
#'
#' @return A matrix of class `step`. The first part of the columns describe the subgroup intervals used
#'   for the biomarker variable, including where the center of the intervals are and their bounds. The
#'   second part of the columns contain the estimates for the treatment arm comparison.
#'
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
#'
#' @seealso [control_step()] and [control_coxph()] for the available customization options.
#'
#' @examples
#' # Testing dataset with just two treatment arms.
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("ARM" = "Treatment Arm", "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' variables <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = c("AGE", "BMRKR2"),
#'   event = "is_event",
#'   time = "AVAL"
#' )
#'
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
#' step_matrix <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f
#' )
#' dim(step_matrix)
#' head(step_matrix)
#'
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
#' # models. Or specify different Cox regression options.
#' step_matrix2 <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f,
#'   control = c(control_coxph(conf_level = 0.9), control_step(degree = 2))
#' )
#'
#' # Use a global model with cubic interaction and only 5 points.
#' step_matrix3 <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f,
#'   control = c(control_coxph(), control_step(bandwidth = NULL, degree = 3, num_points = 5L))
#' )
#'
#' @export
fit_survival_step <- function(variables,
                              data,
                              control = c(control_step(), control_coxph())) {
  checkmate::assert_list(control)
  assert_df_with_variables(data, variables)
  data <- data[!is.na(data[[variables$biomarker]]), ]
  window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
  interval_center <- window_sel$interval[, "Interval Center"]
  form <- h_step_survival_formula(variables = variables, control = control)
  estimates <- if (is.null(control$bandwidth)) {
    h_step_survival_est(
      formula = form,
      data = data,
      variables = variables,
      x = interval_center,
      control = control
    )
  } else {
    tmp <- mapply(
      FUN = h_step_survival_est,
      x = interval_center,
      subset = as.list(as.data.frame(window_sel$sel)),
      MoreArgs = list(
        formula = form,
        data = data,
        variables = variables,
        control = control
      )
    )
    # Maybe we find a more elegant solution than this.
    rownames(tmp) <- c("n", "events", "loghr", "se", "ci_lower", "ci_upper")
    t(tmp)
  }
  result <- cbind(window_sel$interval, estimates)
  structure(
    result,
    class = c("step", "matrix"),
    variables = variables,
    control = control
  )
}

#' Generate PK reference dataset
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @return `data.frame` of PK parameters
#'
#' @examples
#' pk_reference_dataset <- d_pkparam()
#'
#' @export
d_pkparam <- function() {
  pk_dataset <- as.data.frame(matrix(
    c(
      "TMAX", "Time of CMAX", "Tmax", "Plasma/Blood/Serum", "1",
      "CMAX", "Max Conc", "Cmax", "Plasma/Blood/Serum", "2",
      "CMAXD", "Max Conc Norm by Dose", "Cmax/D", "Plasma/Blood/Serum", "3",
      "AUCIFO", "AUC Infinity Obs", "AUCinf obs", "Plasma/Blood/Serum", "4",
      "AUCIFP", "AUC Infinity Pred", "AUCinf pred", "Plasma/Blood/Serum", "5",
      "AUCIFOD", "AUC Infinity Obs Norm by Dose", "AUCinf/D obs", "Plasma/Blood/Serum", "6",
      "AUCIFD", "AUC Infinity Pred Norm by Dose", "AUCinf/D pred", "Plasma/Blood/Serum", "7",
      "AUCPEO", "AUC %Extrapolation Obs", "AUCinf extrap obs", "Plasma/Blood/Serum", "8",
      "AUCPEP", "AUC %Extrapolation Pred", "AUCinf extrap pred", "Plasma/Blood/Serum", "9",
      "AUCINT", "AUC from T1 to T2", "AUCupper-lower ", "Plasma/Blood/Serum", "10",
      "AUCTAU", "AUC Over Dosing Interval", "AUCtau", "Plasma/Blood/Serum", "11",
      "AUCLST", "AUC to Last Nonzero Conc", "AUClast", "Plasma/Blood/Serum", "12",
      "AUCALL", "AUC All", "AUCall", "Plasma/Blood/Serum", "13",
      "AUMCIFO", "AUMC Infinity Obs", "AUMCinf obs", "Plasma/Blood/Serum", "14",
      "AUMCIFP", "AUMC Infinity Pred", "AUMCinf pred", "Plasma/Blood/Serum", "15",
      "AUMCPEO", "AUMC % Extrapolation Obs", "AUMC extrap obs", "Plasma/Blood/Serum", "16",
      "AUMCPEP", "AUMC % Extrapolation Pred", "AUMC extrap pred", "Plasma/Blood/Serum", "17",
      "AUMCTAU", "AUMC Over Dosing Interval", "AUMCtau", "Plasma/Blood/Serum", "18",
      "AUMCLST", "AUMC to Last Nonzero Conc", "AUMClast", "Plasma/Blood/Serum", "19",
      "AURCIFO", "AURC Infinity Obs", "AURCinf obs", "Plasma/Blood/Serum", "20",
      "AURCIFP", "AURC Infinity Pred", "AURCinf pred", "Plasma/Blood/Serum", "21",
      "AURCPEO", "AURC % Extrapolation Obs", "AURC extrap obs", "Plasma/Blood/Serum", "22",
      "AURCPEP", "AURC % Extrapolation Pred", "AURC extrap pred", "Plasma/Blood/Serum", "23",
      "AURCLST", "AURC Dosing to Last Conc", "AURClast", "Plasma/Blood/Serum", "24",
      "AURCALL", "AURC All", "AURCall", "Plasma/Blood/Serum", "25",
      "TLST", "Time of Last Nonzero Conc", "Tlast", "Plasma/Blood/Serum", "26",
      "CO", "Initial Conc", "CO", "Plasma/Blood/Serum", "27",
      "C0", "Initial Conc", "C0", "Plasma/Blood/Serum", "28",
      "CAVG", "Average Conc", "Cavg", "Plasma/Blood/Serum", "29",
      "CLST", "Last Nonzero Conc", "Clast", "Plasma/Blood/Serum", "30",
      "CMIN", "Min Conc", "Cmin", "Plasma/Blood/Serum", "31",
      "LAMZHL", "Half-Life Lambda z", "t1/2", "Plasma/Blood/Serum", "32",
      "CLFO", "Total CL Obs by F", "CL/F obs", "Plasma/Blood/Serum", "33",
      "CLFP", "Total CL Pred by F", "CL/F pred", "Plasma/Blood/Serum", "34",
      "CLO", "Total CL Obs", "CL obs", "Plasma/Blood/Serum", "35",
      "CLP", "Total CL Pred", "CL pred", "Plasma/Blood/Serum", "36",
      "CLSS", "Total CL Steady State Pred", "CLss", "Plasma/Blood/Serum", "37",
      "CLSSF", "Total CL Steady State Pred by F", "CLss/F", "Plasma/Blood/Serum", "38",
      "VZFO", "Vz Obs by F", "Vz/F obs", "Plasma/Blood/Serum", "39",
      "VZFP", "Vz Pred by F", "Vz/F pred", "Plasma/Blood/Serum", "40",
      "VZO", "Vz Obs", "Vz obs", "Plasma/Blood/Serum", "41",
      "VZP", "Vz Pred", "Vz pred", "Plasma/Blood/Serum", "42",
      "VSSO", "Vol Dist Steady State Obs", "Vss obs", "Plasma/Blood/Serum", "43",
      "VSSP", "Vol Dist Steady State Pred", "Vss pred", "Plasma/Blood/Serum", "44",
      "LAMZ", "Lambda z", "Lambda z", "Plasma/Blood/Serum", "45",
      "LAMZLL", "Lambda z Lower Limit", "Lambda z lower", "Plasma/Blood/Serum", "46",
      "LAMZUL", "Lambda z Upper Limit", "Lambda z upper", "Plasma/Blood/Serum", "47",
      "LAMZNPT", "Number of Points for Lambda z", "No points Lambda z", "Plasma/Blood/Serum", "48",
      "MRTIFO", "MRT Infinity Obs", "MRTinf obs", "Plasma/Blood/Serum", "49",
      "MRTIFP", "MRT Infinity Pred", "MRTinf pred", "Plasma/Blood/Serum", "50",
      "MRTLST", "MRT to Last Nonzero Conc", "MRTlast", "Plasma/Blood/Serum", "51",
      "R2", "R Squared", "Rsq", "Plasma/Blood/Serum", "52",
      "R2ADJ", "R Squared Adjusted", "Rsq adjusted", "Plasma/Blood/Serum", "53",
      "TLAG", "Time Until First Nonzero Conc", "TIag", "Plasma/Blood/Serum", "54",
      "TMIN", "Time of CMIN Observation", "Tmin", "Plasma/Blood/Serum", "55",
      "ACCI", "Accumulation Index", "Accumulation Index", "Plasma/Blood/Serum/Urine", "56",
      "FLUCP", "Fluctuation%", "Fluctuation", "Plasma/Blood/Serum", "57",
      "CORRXY", "Correlation Between TimeX and Log ConcY", "Corr xy", "Plasma/Blood/Serum", "58",
      "RCAMINT", "Amt Rec from T1 to T2", "Ae", "Urine", "59",
      "RCPCINT", "Pct Rec from T1 to T2", "Fe", "Urine", "60",
      "VOLPK", "Sum of Urine Vol", "Urine volume", "Urine", "61",
      "RENALCL", "Renal CL", "CLR", "Plasma/Blood/Serum/Urine", "62",
      "ERTMAX", "Time of Max Excretion Rate", "Tmax Rate", "Urine", "63",
      "RMAX", "Time of Maximum Response", "Rmax", "Matrix of PD", "64",
      "RMIN", "Time of Minimum Response", "Rmin", "Matrix of PD", "65",
      "ERMAX", "Max Excretion Rate", "Max excretion rate", "Urine", "66",
      "MIDPTLST", "Midpoint of Collection Interval", "Midpoint last", "Urine", "67",
      "ERLST", "Last Meas Excretion Rate", "Rate last", "Urine", "68",
      "TON", "Time to Onset", "Tonset", "Matrix of PD", "69",
      "TOFF", "Time to Offset", "Toffset", "Matrix of PD", "70",
      "TBBLP", "Time Below Baseline %", "Time %Below Baseline", "Matrix of PD", "71",
      "TBTP", "Time Below Threshold %", "Time %Below Threshold", "Matrix of PD", "72",
      "TABL", "Time Above Baseline", "Time Above Baseline", "Matrix of PD", "73",
      "TAT", "Time Above Threshold", "Time Above Threshold", "Matrix of PD", "74",
      "TBT", "Time Below Threshold", "Time Below Threshold", "Matrix of PD", "75",
      "TBLT", "Time Between Baseline and Threshold", "Time Between Baseline Threshold", "Matrix of PD", "76",
      "BLRSP", "Baseline Response", "Baseline", "Matrix of PD", "77",
      "TSHDRSP", "Response Threshold", "Threshold", "Matrix of PD", "78",
      "AUCABL", "AUC Above Baseline", "AUC above baseline", "Matrix of PD", "79",
      "AUCAT", "AUC Above Threshold", "AUC above threshold", "Matrix of PD", "80",
      "AUCBBL", "AUC Below Baseline", "AUC below baseline", "Matrix of PD", "81",
      "AUCBT", "AUC Below Threshold", "AUC below threshold", "Matrix of PD", "82",
      "AUCBLDIF", "Diff AUC Above Base and AUC Below Base", "AUC diff baseline", "Matrix of PD", "83",
      "AUCTDIF", "Diff AUC Above Thr and AUC Below Thr", "AUCnet threshold", "Matrix of PD", "84",
      "TDIFF", "Diff Time to Offset and Time to Onset", "Diff toffset-tonset", "Matrix of PD", "85",
      "AUCPBEO", "AUC %Back Extrapolation Obs", "AUC%Back extrap obs", "Plasma/Blood/Serum", "86",
      "AUCPBEP", "AUC %Back Extrapolation Pred", "AUC%Back extrap pred", "Plasma/Blood/Serum", "87",
      "TSLP1L", "Lower Time Limit Slope 1st", "Slope1 lower", "Matrix of PD", "88",
      "TSLP1U", "Upper Time Limit Slope 1st Segment", "Slope1 upper", "Matrix of PD", "89",
      "TSLP2L", "Lower Time Limit Slope 2nd Segment", "Slope2 lower", "Matrix of PD", "90",
      "TSLP2U", "Upper Time Limit Slope 2nd Segment", "Slope2 upper", "Matrix of PD", "91",
      "SLP1", "Slope, 1st Segment", "Slope1", "Matrix of PD", "92",
      "SLP2", "Slope, 2nd Segment", "Slope2", "Matrix of PD", "93",
      "SLP1PT", "Number of Points for Slope 1st Segment", "No points slope1", "Matrix of PD", "94",
      "SLP2PT", "Number of Points for Slope 2nd Segment", "No points slope2", "Matrix of PD", "95",
      "R2ADJS1", "R-Squared Adjusted Slope, 1st Segment", "Rsq adjusted slope1", "Matrix of PD", "96",
      "R2ADJS2", "R-Squared Adjusted Slope, 2nd Segment", "Rsq adjusted slope2", "Matrix of PD", "97",
      "R2SLP1", "R Squared, Slope, 1st Segment", "Rsq slope1", "Matrix of PD", "98",
      "R2SLP2", "R Squared, Slope, 2nd Segment", "Rsq slope2", "Matrix of PD", "99",
      "CORRXYS1", "Corr Btw TimeX and Log ConcY, Slope 1st", "Corr xy slope1", "Plasma/Blood/Serum", "100",
      "CORRXYS2", "Corr Btw TimeX and Log ConcY, Slope 1st Slope 2nd", "Corr xy slope2", "Plasma/Blood/Serum", "101",
      "AILAMZ", "Accumulation Index using Lambda z", "AILAMZ", "Plasma/Blood/Serum", "102",
      "ARAUC", "Accumulation Ratio AUCTAU", "ARAUC", "Plasma/Blood/Serum", "103",
      "ARAUCD", "Accum Ratio AUCTAU norm by dose", "ARAUCD", "Plasma/Blood/Serum", "104",
      "ARAUCIFO", "Accum Ratio AUC Infinity Obs", "ARAUCIFO", "Plasma/Blood/Serum", "105",
      "ARAUCIFP", "Accum Ratio AUC Infinity Pred", "ARAUCIFP", "Plasma/Blood/Serum", "106",
      "ARAUCIND", "Accum Ratio AUC T1 to T2 norm by dose", "ARAUCIND_T1_T2_UNIT", "Plasma/Blood/Serum", "107",
      "ARAUCINT", "Accumulation Ratio AUC from T1 to T2", "ARAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "108",
      "ARAUCIOD", "Accum Ratio AUCIFO Norm by Dose", "ARAUCIOD", "Plasma/Blood/Serum", "109",
      "ARAUCIPD", "Accum Ratio AUCIFP Norm by Dose", "ARAUCIPD", "Plasma/Blood/Serum", "110",
      "ARAUCLST", "Accum Ratio AUC to Last Nonzero Conc", "ARAUCLST", "Plasma/Blood/Serum", "111",
      "ARCMAX", "Accumulation Ratio Cmax", "ARCMAX", "Plasma/Blood/Serum", "112",
      "ARCMAXD", "Accum Ratio Cmax norm by dose", "ARCMAXD", "Plasma/Blood/Serum", "113",
      "ARCMIN", "Accumulation Ratio Cmin", "ARCMIN", "Plasma/Blood/Serum", "114",
      "ARCMIND", "Accum Ratio Cmin norm by dose", "ARCMIND", "Plasma/Blood/Serum", "115",
      "ARCTROUD", "Accum Ratio Ctrough norm by dose", "ARCTROUD", "Plasma/Blood/Serum", "116",
      "ARCTROUG", "Accumulation Ratio Ctrough", "ARCTROUG", "Plasma/Blood/Serum", "117",
      "AUCALLB", "AUC All Norm by BMI", "AUCall_B", "Plasma/Blood/Serum", "118",
      "AUCALLD", "AUC All Norm by Dose", "AUCall_D", "Plasma/Blood/Serum", "119",
      "AUCALLS", "AUC All Norm by SA", "AUCall_S", "Plasma/Blood/Serum", "120",
      "AUCALLW", "AUC All Norm by WT", "AUCall_W", "Plasma/Blood/Serum", "121",
      "AUCIFOB", "AUC Infinity Obs Norm by BMI", "AUCINF_obs_B", "Plasma/Blood/Serum", "122",
      "AUCIFOLN", "AUC Infinity Obs LN Transformed", "AUCIFOLN", "Plasma/Blood/Serum", "123",
      "AUCIFOS", "AUC Infinity Obs Norm by SA", "AUCINF_obs_S", "Plasma/Blood/Serum", "124",
      "AUCIFOUB", "AUC Infinity Obs, Unbound Drug", "AUCIFOUB", "Plasma/Blood/Serum", "125",
      "AUCIFOW", "AUC Infinity Obs Norm by WT", "AUCINF_obs_W", "Plasma/Blood/Serum", "126",
      "AUCIFPB", "AUC Infinity Pred Norm by BMI", "AUCINF_pred_B", "Plasma/Blood/Serum", "127",
      "AUCIFPD", "AUC Infinity Pred Norm by Dose", "AUCINF_pred_D", "Plasma/Blood/Serum", "128",
      "AUCIFPS", "AUC Infinity Pred Norm by SA", "AUCINF_pred_S", "Plasma/Blood/Serum", "129",
      "AUCIFPUB", "AUC Infinity Pred, Unbound Drug", "AUCIFPUB", "Plasma/Blood/Serum", "130",
      "AUCIFPW", "AUC Infinity Pred Norm by WT", "AUCINF_pred_W", "Plasma/Blood/Serum", "131",
      "AUCINTB", "AUC from T1 to T2 Norm by BMI", "AUC_B_T1_T2_UNIT", "Plasma/Blood/Serum", "132",
      "AUCINTD", "AUC from T1 to T2 Norm by Dose", "AUC_D_T1_T2_UNIT", "Plasma/Blood/Serum", "133",
      "AUCINTS", "AUC from T1 to T2 Norm by SA", "AUC_S_T1_T2_UNIT", "Plasma/Blood/Serum", "134",
      "AUCINTW", "AUC from T1 to T2 Norm by WT", "AUC_W_T1_T2_UNIT", "Plasma/Blood/Serum", "135",
      "AUCLSTB", "AUC to Last Nonzero Conc Norm by BMI", "AUClast_B", "Plasma/Blood/Serum", "136",
      "AUCLSTD", "AUC to Last Nonzero Conc Norm by Dose", "AUClast_D", "Plasma/Blood/Serum", "137",
      "AUCLSTLN", "AUC to Last Nonzero Conc LN Transformed", "AUCLSTLN", "Plasma/Blood/Serum", "138",
      "AUCLSTS", "AUC to Last Nonzero Conc Norm by SA", "AUClast_S", "Plasma/Blood/Serum", "139",
      "AUCLSTUB", "AUC to Last Nonzero Conc, Unbound Drug", "AUCLSTUB", "Plasma/Blood/Serum", "140",
      "AUCLSTW", "AUC to Last Nonzero Conc Norm by WT", "AUClast_W", "Plasma/Blood/Serum", "141",
      "AUCTAUB", "AUC Over Dosing Interval Norm by BMI", "AUC_TAU_B", "Plasma/Blood/Serum", "142",
      "AUCTAUD", "AUC Over Dosing Interval Norm by Dose", "AUC_TAU_D", "Plasma/Blood/Serum", "143",
      "AUCTAUS", "AUC Over Dosing Interval Norm by SA", "AUC_TAU_S", "Plasma/Blood/Serum", "144",
      "AUCTAUW", "AUC Over Dosing Interval Norm by WT", "AUC_TAU_W", "Plasma/Blood/Serum", "145",
      "AUMCIFOB", "AUMC Infinity Obs Norm by BMI", "AUMCINF_obs_B", "Plasma/Blood/Serum", "146",
      "AUMCIFOD", "AUMC Infinity Obs Norm by Dose", "AUMCINF_obs_D", "Plasma/Blood/Serum", "147",
      "AUMCIFOS", "AUMC Infinity Obs Norm by SA", "AUMCINF_obs_S", "Plasma/Blood/Serum", "148",
      "AUMCIFOW", "AUMC Infinity Obs Norm by WT", "AUMCINF_obs_W", "Plasma/Blood/Serum", "149",
      "AUMCIFPB", "AUMC Infinity Pred Norm by BMI", "AUMCINF_pred_B", "Plasma/Blood/Serum", "150",
      "AUMCIFPD", "AUMC Infinity Pred Norm by Dose", "AUMCINF_pred_D", "Plasma/Blood/Serum", "151",
      "AUMCIFPS", "AUMC Infinity Pred Norm by SA", "AUMCINF_pred_S", "Plasma/Blood/Serum", "152",
      "AUMCIFPW", "AUMC Infinity Pred Norm by WT", "AUMCINF_pred_W", "Plasma/Blood/Serum", "153",
      "AUMCLSTB", "AUMC to Last Nonzero Conc Norm by BMI", "AUMClast_B", "Plasma/Blood/Serum", "154",
      "AUMCLSTD", "AUMC to Last Nonzero Conc Norm by Dose", "AUMClast_D", "Plasma/Blood/Serum", "155",
      "AUMCLSTS", "AUMC to Last Nonzero Conc Norm by SA", "AUMClast_S", "Plasma/Blood/Serum", "156",
      "AUMCLSTW", "AUMC to Last Nonzero Conc Norm by WT", "AUMClast_W", "Plasma/Blood/Serum", "157",
      "AUMCTAUB", "AUMC Over Dosing Interval Norm by BMI", "AUMCTAUB", "Plasma/Blood/Serum", "158",
      "AUMCTAUD", "AUMC Over Dosing Interval Norm by Dose", "AUMCTAUD", "Plasma/Blood/Serum", "159",
      "AUMCTAUS", "AUMC Over Dosing Interval Norm by SA", "AUMCTAUS", "Plasma/Blood/Serum", "160",
      "AUMCTAUW", "AUMC Over Dosing Interval Norm by WT", "AUMCTAUW", "Plasma/Blood/Serum", "161",
      "AURCALLB", "AURC All Norm by BMI", "AURCALLB", "Plasma/Blood/Serum", "162",
      "AURCALLD", "AURC All Norm by Dose", "AURCALLD", "Plasma/Blood/Serum", "163",
      "AURCALLS", "AURC All Norm by SA", "AURCALLS", "Plasma/Blood/Serum", "164",
      "AURCALLW", "AURC All Norm by WT", "AURCALLW", "Plasma/Blood/Serum", "165",
      "AURCIFOB", "AURC Infinity Obs Norm by BMI", "AURCIFOB", "Plasma/Blood/Serum", "166",
      "AURCIFOD", "AURC Infinity Obs Norm by Dose", "AURCIFOD", "Plasma/Blood/Serum", "167",
      "AURCIFOS", "AURC Infinity Obs Norm by SA", "AURCIFOS", "Plasma/Blood/Serum", "168",
      "AURCIFOW", "AURC Infinity Obs Norm by WT", "AURCIFOW", "Plasma/Blood/Serum", "169",
      "AURCIFPB", "AURC Infinity Pred Norm by BMI", "AURCIFPB", "Plasma/Blood/Serum", "170",
      "AURCIFPD", "AURC Infinity Pred Norm by Dose", "AURCIFPD", "Plasma/Blood/Serum", "171",
      "AURCIFPS", "AURC Infinity Pred Norm by SA", "AURCIFPS", "Plasma/Blood/Serum", "172",
      "AURCIFPW", "AURC Infinity Pred Norm by WT", "AURCIFPW", "Plasma/Blood/Serum", "173",
      "AURCINT", "AURC from T1 to T2", "AURCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "174",
      "AURCINTB", "AURC from T1 to T2 Norm by BMI", "AURCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "175",
      "AURCINTD", "AURC from T1 to T2 Norm by Dose", "AURCINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "176",
      "AURCINTS", "AURC from T1 to T2 Norm by SA", "AURCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "177",
      "AURCINTW", "AURC from T1 to T2 Norm by WT", "AURCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "178",
      "AURCLSTB", "AURC to Last Nonzero Rate Norm by BMI", "AURCLSTB", "Plasma/Blood/Serum", "179",
      "AURCLSTD", "AURC to Last Nonzero Rate Norm by Dose", "AURCLSTD", "Plasma/Blood/Serum", "180",
      "AURCLSTS", "AURC to Last Nonzero Rate Norm by SA", "AURCLSTS", "Plasma/Blood/Serum", "181",
      "AURCLSTW", "AURC to Last Nonzero Rate Norm by WT", "AURCLSTW", "Plasma/Blood/Serum", "182",
      "C0B", "Initial Conc Norm by BMI", "C0B", "Plasma/Blood/Serum", "183",
      "C0D", "Initial Conc Norm by Dose", "C0D", "Plasma/Blood/Serum", "184",
      "C0S", "Initial Conc Norm by SA", "C0S", "Plasma/Blood/Serum", "185",
      "C0W", "Initial Conc Norm by WT", "C0W", "Plasma/Blood/Serum", "186",
      "CAVGB", "Average Conc Norm by BMI", "CAVGB", "Plasma/Blood/Serum", "187",
      "CAVGD", "Average Conc Norm by Dose", "CAVGD", "Plasma/Blood/Serum", "188",
      "CAVGINT", "Average Conc from T1 to T2", "CAVGINT_T1_T2_UNIT", "Plasma/Blood/Serum", "189",
      "CAVGINTB", "Average Conc from T1 to T2 Norm by BMI", "CAVGINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "190",
      "CAVGINTD", "Average Conc from T1 to T2 Norm by Dose", "CAVGINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "191",
      "CAVGINTS", "Average Conc from T1 to T2 Norm by SA", "CAVGINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "192",
      "CAVGINTW", "Average Conc from T1 to T2 Norm by WT", "CAVGINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "193",
      "CAVGS", "Average Conc Norm by SA", "CAVGS", "Plasma/Blood/Serum", "194",
      "CAVGW", "Average Conc Norm by WT", "CAVGW", "Plasma/Blood/Serum", "195",
      "CHTMAX", "Concentration at Half Tmax", "CHTMAX", "Plasma/Blood/Serum", "196",
      "CLFOB", "Total CL Obs by F Norm by BMI", "CLFOB", "Plasma/Blood/Serum", "197",
      "CLFOD", "Total CL Obs by F Norm by Dose", "CLFOD", "Plasma/Blood/Serum", "198",
      "CLFOS", "Total CL Obs by F Norm by SA", "CLFOS", "Plasma/Blood/Serum", "199",
      "CLFOW", "Total CL Obs by F Norm by WT", "CLFOW", "Plasma/Blood/Serum", "200",
      "CLFPB", "Total CL Pred by F Norm by BMI", "CLFPB", "Plasma/Blood/Serum", "201",
      "CLFPD", "Total CL Pred by F Norm by Dose", "CLFPD", "Plasma/Blood/Serum", "202",
      "CLFPS", "Total CL Pred by F Norm by SA", "CLFPS", "Plasma/Blood/Serum", "203",
      "CLFPW", "Total CL Pred by F Norm by WT", "CLFPW", "Plasma/Blood/Serum", "204",
      "CLFTAU", "Total CL by F for Dose Int", "CLFTAU", "Plasma/Blood/Serum", "205",
      "CLFTAUB", "Total CL by F for Dose Int Norm by BMI", "CLFTAUB", "Plasma/Blood/Serum", "206",
      "CLFTAUD", "Total CL by F for Dose Int Norm by Dose", "CLFTAUD", "Plasma/Blood/Serum", "207",
      "CLFTAUS", "Total CL by F for Dose Int Norm by SA", "CLFTAUS", "Plasma/Blood/Serum", "208",
      "CLFTAUW", "Total CL by F for Dose Int Norm by WT", "CLFTAUW", "Plasma/Blood/Serum", "209",
      "CLFUB", "Apparent CL for Unbound Drug", "CLFUB", "Plasma/Blood/Serum", "210",
      "CLOB", "Total CL Obs Norm by BMI", "CLOB", "Plasma/Blood/Serum", "211",
      "CLOD", "Total CL Obs Norm by Dose", "CLOD", "Plasma/Blood/Serum", "212",
      "CLOS", "Total CL Obs Norm by SA", "CLOS", "Plasma/Blood/Serum", "213",
      "CLOUB", "Total CL Obs for Unbound Drug", "CLOUB", "Plasma/Blood/Serum", "214",
      "CLOW", "Total CL Obs Norm by WT", "CLOW", "Plasma/Blood/Serum", "215",
      "CLPB", "Total CL Pred Norm by BMI", "CLPB", "Plasma/Blood/Serum", "216",
      "CLPD", "Total CL Pred Norm by Dose", "CLPD", "Plasma/Blood/Serum", "217",
      "CLPS", "Total CL Pred Norm by SA", "CLPS", "Plasma/Blood/Serum", "218",
      "CLPUB", "Total CL Pred for Unbound Drug", "CLPUB", "Plasma/Blood/Serum", "219",
      "CLPW", "Total CL Pred Norm by WT", "CLPW", "Plasma/Blood/Serum", "220",
      "CLRPCLEV", "Renal CL as Pct CL EV", "CLRPCLEV", "Urine", "221",
      "CLRPCLIV", "Renal CL as Pct CL IV", "CLRPCLIV", "Urine", "222",
      "CLSTB", "Last Nonzero Conc Norm by BMI", "CLSTB", "Plasma/Blood/Serum", "223",
      "CLSTD", "Last Nonzero Conc Norm by Dose", "CLSTD", "Plasma/Blood/Serum", "224",
      "CLSTS", "Last Nonzero Conc Norm by SA", "CLSTS", "Plasma/Blood/Serum", "225",
      "CLSTW", "Last Nonzero Conc Norm by WT", "CLSTW", "Plasma/Blood/Serum", "226",
      "CLTAU", "Total CL for Dose Int", "CLTAU", "Plasma/Blood/Serum", "227",
      "CLTAUB", "Total CL for Dose Int Norm by BMI", "CLTAUB", "Plasma/Blood/Serum", "228",
      "CLTAUD", "Total CL for Dose Int Norm by Dose", "CLTAUD", "Plasma/Blood/Serum", "229",
      "CLTAUS", "Total CL for Dose Int Norm by SA", "CLTAUS", "Plasma/Blood/Serum", "230",
      "CLTAUW", "Total CL for Dose Int Norm by WT", "CLTAUW", "Plasma/Blood/Serum", "231",
      "CMAXB", "Max Conc Norm by BMI", "CMAX_B", "Plasma/Blood/Serum", "232",
      "CMAXLN", "Max Conc LN Transformed", "CMAXLN", "Plasma/Blood/Serum", "233",
      "CMAXS", "Max Conc Norm by SA", "CMAXS", "Plasma/Blood/Serum", "234",
      "CMAXUB", "Max Conc, Unbound Drug", "CMAXUB", "Plasma/Blood/Serum", "235",
      "CMAXW", "Max Conc Norm by WT", "CMAXW", "Plasma/Blood/Serum", "236",
      "CMINB", "Min Conc Norm by BMI", "CMINB", "Plasma/Blood/Serum", "237",
      "CMIND", "Min Conc Norm by Dose", "CMIND", "Plasma/Blood/Serum", "238",
      "CMINS", "Min Conc Norm by SA", "CMINS", "Plasma/Blood/Serum", "239",
      "CMINW", "Min Conc Norm by WT", "CMINW", "Plasma/Blood/Serum", "240",
      "CONC", "Concentration", "CONC", "Plasma/Blood/Serum", "241",
      "CONCB", "Conc by BMI", "CONCB", "Plasma/Blood/Serum", "242",
      "CONCD", "Conc by Dose", "CONCD", "Plasma/Blood/Serum", "243",
      "CONCS", "Conc by SA", "CONCS", "Plasma/Blood/Serum", "244",
      "CONCW", "Conc by WT", "CONCW", "Plasma/Blood/Serum", "245",
      "CTROUGH", "Conc Trough", "CTROUGH", "Plasma/Blood/Serum", "246",
      "CTROUGHB", "Conc Trough by BMI", "CTROUGHB", "Plasma/Blood/Serum", "247",
      "CTROUGHD", "Conc Trough by Dose", "CTROUGHD", "Plasma/Blood/Serum", "248",
      "CTROUGHS", "Conc Trough by SA", "CTROUGHS", "Plasma/Blood/Serum", "249",
      "CTROUGHW", "Conc Trough by WT", "CTROUGHW", "Plasma/Blood/Serum", "250",
      "EFFHL", "Effective Half-Life", "EFFHL", "Plasma/Blood/Serum", "251",
      "ERINT", "Excret Rate from T1 to T2", "ERINT_T1_T2_UNIT", "Plasma/Blood/Serum", "252",
      "ERINTB", "Excret Rate from T1 to T2 Norm by BMI", "ERINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "253",
      "ERINTD", "Excret Rate from T1 to T2 Norm by Dose", "ERINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "254",
      "ERINTS", "Excret Rate from T1 to T2 Norm by SA", "ERINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "255",
      "ERINTW", "Excret Rate from T1 to T2 Norm by WT", "ERINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "256",
      "ERLSTB", "Last Meas Excretion Rate Norm by BMI", "ERLSTB", "Plasma/Blood/Serum", "257",
      "ERLSTD", "Last Meas Excretion Rate Norm by Dose", "ERLSTD", "Plasma/Blood/Serum", "258",
      "ERLSTS", "Last Meas Excretion Rate Norm by SA", "ERLSTS", "Plasma/Blood/Serum", "259",
      "ERLSTW", "Last Meas Excretion Rate Norm by WT", "ERLSTW", "Plasma/Blood/Serum", "260",
      "ERMAXB", "Max Excretion Rate Norm by BMI", "ERMAXB", "Plasma/Blood/Serum", "261",
      "ERMAXD", "Max Excretion Rate Norm by Dose", "ERMAXD", "Plasma/Blood/Serum", "262",
      "ERMAXS", "Max Excretion Rate Norm by SA", "ERMAXS", "Plasma/Blood/Serum", "263",
      "ERMAXW", "Max Excretion Rate Norm by WT", "ERMAXW", "Plasma/Blood/Serum", "264",
      "ERTLST", "Midpoint of Interval of Last Nonzero ER", "ERTLST", "Plasma/Blood/Serum", "265",
      "FABS", "Absolute Bioavailability", "FABS", "Plasma/Blood/Serum", "266",
      "FB", "Fraction Bound", "FB", "Plasma/Blood/Serum", "267",
      "FREL", "Relative Bioavailability", "FREL", "Plasma/Blood/Serum", "268",
      "FREXINT", "Fract Excr from T1 to T2", "FREXINT_T1_T2_UNIT", "Plasma/Blood/Serum", "269",
      "FU", "Fraction Unbound", "FU", "Plasma/Blood/Serum", "270",
      "HDCL", "Hemodialysis Clearance", "HDCL", "Plasma/Blood/Serum", "271",
      "HDER", "Hemodialysis Extraction Ratio", "HDER", "Plasma/Blood/Serum", "272",
      "HTMAX", "Half Tmax", "HTMAX", "Plasma/Blood/Serum", "273",
      "LAMZLTAU", "Lambda z Lower Limit TAU", "LAMZLTAU", "Plasma/Blood/Serum", "274",
      "LAMZNTAU", "Number of Points for Lambda z TAU", "LAMZNTAU", "Plasma/Blood/Serum", "275",
      "LAMZSPN", "Lambda z Span", "LAMZSPN", "Plasma/Blood/Serum", "276",
      "LAMZTAU", "Lambda z TAU", "LAMZTAU", "Plasma/Blood/Serum", "277",
      "LAMZUTAU", "Lambda z Upper Limit TAU", "LAMZUTAU", "Plasma/Blood/Serum", "278",
      "MAT", "Mean Absorption Time", "MAT", "Plasma/Blood/Serum", "279",
      "MRAUCIFO", "Metabolite Ratio for AUC Infinity Obs", "MRAUCIFO", "Plasma/Blood/Serum", "280",
      "MRAUCIFP", "Metabolite Ratio for AUC Infinity Pred", "MRAUCIFP", "Plasma/Blood/Serum", "281",
      "MRAUCINT", "Metabolite Ratio AUC from T1 to T2", "MRAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "282",
      "MRAUCLST", "Metabolite Ratio AUC Last Nonzero Conc", "MRAUCLST", "Plasma/Blood/Serum", "283",
      "MRAUCTAU", "Metabolite Ratio for AUC Dosing Interval", "MRAUCTAU", "Plasma/Blood/Serum", "284",
      "MRCMAX", "Metabolite Ratio for Max Conc", "MRCMAX", "Plasma/Blood/Serum", "285",
      "MRTEVIFO", "MRT Extravasc Infinity Obs", "MRTEVIFO", "Plasma/Blood/Serum", "286",
      "MRTEVIFP", "MRT Extravasc Infinity Pred", "MRTEVIFP", "Plasma/Blood/Serum", "287",
      "MRTEVLST", "MRT Extravasc to Last Nonzero Conc", "MRTEVLST", "Plasma/Blood/Serum", "288",
      "MRTIVIFO", "MRT Intravasc Infinity Obs", "MRTIVIFO", "Plasma/Blood/Serum", "289",
      "MRTIVIFP", "MRT Intravasc Infinity Pred", "MRTIVIFP", "Plasma/Blood/Serum", "290",
      "MRTIVLST", "MRT Intravasc to Last Nonzero Conc", "MRTIVLST", "Plasma/Blood/Serum", "291",
      "NRENALCL", "Nonrenal CL", "NRENALCL", "Urine", "292",
      "NRENLCLB", "Nonrenal CL Norm by BMI", "NRENLCLB", "Urine", "293",
      "NRENLCLD", "Nonrenal CL Norm by Dose", "NRENLCLD", "Urine", "294",
      "NRENLCLS", "Nonrenal CL Norm by SA", "NRENLCLS", "Urine", "295",
      "NRENLCLW", "Nonrenal CL Norm by WT", "NRENLCLW", "Urine", "296",
      "PTROUGHR", "Peak Trough Ratio", "PTROUGHR", "Plasma/Blood/Serum", "297",
      "RAAUC", "Ratio AUC", "RAAUC", "Plasma/Blood/Serum", "298",
      "RAAUCIFO", "Ratio AUC Infinity Obs", "RAAUCIFO", "Plasma/Blood/Serum", "299",
      "RAAUCIFP", "Ratio AUC Infinity Pred", "RAAUCIFP", "Plasma/Blood/Serum", "300",
      "RACMAX", "Ratio CMAX", "RACMAX", "Plasma/Blood/Serum", "301",
      "RAMAXMIN", "Ratio of CMAX to CMIN", "RAMAXMIN", "Plasma/Blood/Serum", "302",
      "RCAMIFO", "Amt Rec Infinity Obs", "RCAMIFO", "Plasma/Blood/Serum", "303",
      "RCAMIFOB", "Amt Rec Infinity Obs Norm by BMI", "RCAMIFOB", "Plasma/Blood/Serum", "304",
      "RCAMIFOS", "Amt Rec Infinity Obs Norm by SA", "RCAMIFOS", "Plasma/Blood/Serum", "305",
      "RCAMIFOW", "Amt Rec Infinity Obs Norm by WT", "RCAMIFOW", "Plasma/Blood/Serum", "306",
      "RCAMIFP", "Amt Rec Infinity Pred", "RCAMIFP", "Plasma/Blood/Serum", "307",
      "RCAMIFPB", "Amt Rec Infinity Pred Norm by BMI", "RCAMIFPB", "Plasma/Blood/Serum", "308",
      "RCAMIFPS", "Amt Rec Infinity Pred Norm by SA", "RCAMIFPS", "Plasma/Blood/Serum", "309",
      "RCAMIFPW", "Amt Rec Infinity Pred Norm by WT", "RCAMIFPW", "Plasma/Blood/Serum", "310",
      "RCAMINTB", "Amt Rec from T1 to T2 Norm by BMI", "RCAMINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "311",
      "RCAMINTS", "Amt Rec from T1 to T2 Norm by SA", "RCAMINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "312",
      "RCAMINTW", "Amt Rec from T1 to T2 Norm by WT", "RCAMINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "313",
      "RCAMTAU", "Amt Rec Over Dosing Interval", "RCAMTAU", "Plasma/Blood/Serum", "314",
      "RCAMTAUB", "Amt Rec Over Dosing Interval Norm by BMI", "RCAMTAUB", "Plasma/Blood/Serum", "315",
      "RCAMTAUS", "Amt Rec Over Dosing Interval Norm by SA", "RCAMTAUS", "Plasma/Blood/Serum", "316",
      "RCAMTAUW", "Amt Rec Over Dosing Interval Norm by WT", "RCAMTAUW", "Plasma/Blood/Serum", "317",
      "RCPCIFO", "Pct Rec Infinity Obs", "RCPCIFO", "Plasma/Blood/Serum", "318",
      "RCPCIFOB", "Pct Rec Infinity Obs Norm by BMI", "RCPCIFOB", "Plasma/Blood/Serum", "319",
      "RCPCIFOS", "Pct Rec Infinity Obs Norm by SA", "RCPCIFOS", "Plasma/Blood/Serum", "320",
      "RCPCIFOW", "Pct Rec Infinity Obs Norm by WT", "RCPCIFOW", "Plasma/Blood/Serum", "321",
      "RCPCIFP", "Pct Rec Infinity Pred", "RCPCIFP", "Plasma/Blood/Serum", "322",
      "RCPCIFPB", "Pct Rec Infinity Pred Norm by BMI", "RCPCIFPB", "Plasma/Blood/Serum", "323",
      "RCPCIFPS", "Pct Rec Infinity Pred Norm by SA", "RCPCIFPS", "Plasma/Blood/Serum", "324",
      "RCPCIFPW", "Pct Rec Infinity Pred Norm by WT", "RCPCIFPW", "Plasma/Blood/Serum", "325",
      "RCPCINTB", "Pct Rec from T1 to T2 Norm by BMI", "RCPCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "326",
      "RCPCINTS", "Pct Rec from T1 to T2 Norm by SA", "RCPCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "327",
      "RCPCINTW", "Pct Rec from T1 to T2 Norm by WT", "RCPCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "328",
      "RCPCLST", "Pct Rec to Last Nonzero Conc", "RCPCLST", "Plasma/Blood/Serum", "329",
      "RCPCTAU", "Pct Rec Over Dosing Interval", "RCPCTAU", "Plasma/Blood/Serum", "330",
      "RCPCTAUB", "Pct Rec Over Dosing Interval Norm by BMI", "RCPCTAUB", "Plasma/Blood/Serum", "331",
      "RCPCTAUS", "Pct Rec Over Dosing Interval Norm by SA", "RCPCTAUS", "Plasma/Blood/Serum", "332",
      "RCPCTAUW", "Pct Rec Over Dosing Interval Norm by WT", "RCPCTAUW", "Plasma/Blood/Serum", "333",
      "RENALCLB", "Renal CL Norm by BMI", "RENALCLB", "Urine", "334",
      "RENALCLD", "Renal CL Norm by Dose", "RENALCLD", "Urine", "335",
      "RENALCLS", "Renal CL Norm by SA", "RENALCLS", "Urine", "336",
      "RENALCLW", "Renal CL Norm by WT", "RENALCLW", "Urine", "337",
      "RENCLTAU", "Renal CL for Dose Int", "RENCLTAU", "Urine", "338",
      "RNCLINT", "Renal CL from T1 to T2", "RNCLINT_T1_T2_UNIT", "Urine", "339",
      "RNCLINTB", "Renal CL from T1 to T2 Norm by BMI", "RNCLINTB_T1_T2_UNIT", "Urine", "340",
      "RNCLINTD", "Renal CL from T1 to T2 Norm by Dose", "RNCLINTD_T1_T2_UNIT", "Urine", "341",
      "RNCLINTS", "Renal CL from T1 to T2 Norm by SA", "RNCLINTS_T1_T2_UNIT", "Urine", "342",
      "RNCLINTW", "Renal CL from T1 to T2 Norm by WT", "RNCLINTW_T1_T2_UNIT", "Urine", "343",
      "RNCLTAUB", "Renal CL for Dose Int Norm by BMI", "RNCLTAUB", "Urine", "344",
      "RNCLTAUD", "Renal CL for Dose Int Norm by Dose", "RNCLTAUD", "Urine", "345",
      "RNCLTAUS", "Renal CL for Dose Int Norm by SA", "RNCLTAUS", "Urine", "346",
      "RNCLTAUW", "Renal CL for Dose Int Norm by WT", "RNCLTAUW", "Urine", "347",
      "RNCLUB", "Renal CL for Unbound Drug", "RNCLUB", "Urine", "348",
      "SRAUC", "Stationarity Ratio AUC", "SRAUC", "Plasma/Blood/Serum", "349",
      "SWING", "Swing", "SWING", "Plasma/Blood/Serum", "350",
      "TAUHL", "Half-Life TAU", "TAUHL", "Plasma/Blood/Serum", "351",
      "TBBL", "Time Below Baseline", "Time_Below_B", "Plasma/Blood/Serum", "352",
      "TROUGHPR", "Trough Peak Ratio", "TROUGHPR", "Plasma/Blood/Serum", "353",
      "V0", "Vol Dist Initial", "V0", "Plasma/Blood/Serum", "354",
      "V0B", "Vol Dist Initial Norm by BMI", "V0B", "Plasma/Blood/Serum", "355",
      "V0D", "Vol Dist Initial Norm by Dose", "V0D", "Plasma/Blood/Serum", "356",
      "V0S", "Vol Dist Initial Norm by SA", "V0S", "Plasma/Blood/Serum", "357",
      "V0W", "Vol Dist Initial Norm by WT", "V0W", "Plasma/Blood/Serum", "358",
      "VSSOB", "Vol Dist Steady State Obs Norm by BMI", "VSSOB", "Plasma/Blood/Serum", "359",
      "VSSOBD", "Vol Dist Steady State Obs by B", "VSSOBD", "Plasma/Blood/Serum", "360",
      "VSSOD", "Vol Dist Steady State Obs Norm by Dose", "VSSOD", "Plasma/Blood/Serum", "361",
      "VSSOF", "Vol Dist Steady State Obs by F", "VSSOF", "Plasma/Blood/Serum", "362",
      "VSSOS", "Vol Dist Steady State Obs Norm by SA", "VSSOS", "Plasma/Blood/Serum", "363",
      "VSSOUB", "Vol Dist Steady State Obs by UB", "VSSOUB", "Plasma/Blood/Serum", "364",
      "VSSOW", "Vol Dist Steady State Obs Norm by WT", "VSSOW", "Plasma/Blood/Serum", "365",
      "VSSPB", "Vol Dist Steady State Pred Norm by BMI", "VSSPB", "Plasma/Blood/Serum", "366",
      "VSSPBD", "Vol Dist Steady State Pred by B", "VSSPBD", "Plasma/Blood/Serum", "367",
      "VSSPD", "Vol Dist Steady State Pred Norm by Dose", "VSSPD", "Plasma/Blood/Serum", "368",
      "VSSPF", "Vol Dist Steady State Pred by F", "VSSPF", "Plasma/Blood/Serum", "369",
      "VSSPS", "Vol Dist Steady State Pred Norm by SA", "VSSPS", "Plasma/Blood/Serum", "370",
      "VSSPUB", "Vol Dist Steady State Pred by UB", "VSSPUB", "Plasma/Blood/Serum", "371",
      "VSSPW", "Vol Dist Steady State Pred Norm by WT", "VSSPW", "Plasma/Blood/Serum", "372",
      "VZ", "Vol Z", "Vz", "Plasma/Blood/Serum", "373",
      "VZF", "Vol Z by F", "Vz_F", "Plasma/Blood/Serum", "374",
      "VZFOB", "Vz Obs by F Norm by BMI", "VZFOB", "Plasma/Blood/Serum", "375",
      "VZFOD", "Vz Obs by F Norm by Dose", "VZFOD", "Plasma/Blood/Serum", "376",
      "VZFOS", "Vz Obs by F Norm by SA", "VZFOS", "Plasma/Blood/Serum", "377",
      "VZFOUB", "Vz Obs by F for UB", "VZFOUB", "Plasma/Blood/Serum", "378",
      "VZFOW", "Vz Obs by F Norm by WT", "VZFOW", "Plasma/Blood/Serum", "379",
      "VZFPB", "Vz Pred by F Norm by BMI", "VZFPB", "Plasma/Blood/Serum", "380",
      "VZFPD", "Vz Pred by F Norm by Dose", "VZFPD", "Plasma/Blood/Serum", "381",
      "VZFPS", "Vz Pred by F Norm by SA", "VZFPS", "Plasma/Blood/Serum", "382",
      "VZFPUB", "Vz Pred by F for UB", "VZFPUB", "Plasma/Blood/Serum", "383",
      "VZFPW", "Vz Pred by F Norm by WT", "VZFPW", "Plasma/Blood/Serum", "384",
      "VZFTAU", "Vz for Dose Int by F", "VZFTAU", "Plasma/Blood/Serum", "385",
      "VZFTAUB", "Vz for Dose Int by F Norm by BMI", "VZFTAUB", "Plasma/Blood/Serum", "386",
      "VZFTAUD", "Vz for Dose Int by F Norm by Dose", "VZFTAUD", "Plasma/Blood/Serum", "387",
      "VZFTAUS", "Vz for Dose Int by F Norm by SA", "VZFTAUS", "Plasma/Blood/Serum", "388",
      "VZFTAUW", "Vz for Dose Int by F Norm by WT", "VZFTAUW", "Plasma/Blood/Serum", "389",
      "VZOB", "Vz Obs Norm by BMI", "VZOB", "Plasma/Blood/Serum", "390",
      "VZOD", "Vz Obs Norm by Dose", "VZOD", "Plasma/Blood/Serum", "391",
      "VZOS", "Vz Obs Norm by SA", "VZOS", "Plasma/Blood/Serum", "392",
      "VZOUB", "Vz Obs for UB", "VZOUB", "Plasma/Blood/Serum", "393",
      "VZOW", "Vz Obs Norm by WT", "VZOW", "Plasma/Blood/Serum", "394",
      "VZPB", "Vz Pred Norm by BMI", "VZPB", "Plasma/Blood/Serum", "395",
      "VZPD", "Vz Pred Norm by Dose", "VZPD", "Plasma/Blood/Serum", "396",
      "VZPS", "Vz Pred Norm by SA", "VZPS", "Plasma/Blood/Serum", "397",
      "VZPUB", "Vz Pred for UB", "VZPUB", "Plasma/Blood/Serum", "398"
    ),
    ncol = 5,
    byrow = TRUE
  ))
  colnames(pk_dataset) <- c("PARAMCD", "PARAM", "TLG_DISPLAY", "MATRIX", "TLG_ORDER")
  pk_dataset
}

#' Summarize Variables in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This analyze function uses the S3 generic function [s_summary()] to summarize different variables
#' that are arranged in columns. Additional standard formatting arguments are available. It is a
#' minimal wrapper for [rtables::analyze_colvars()]. The latter function is meant to add different
#' analysis methods for each column variables as different rows. To have the analysis methods as
#' column labels, please refer to [analyze_vars_in_cols()].
#'
#' @inheritParams argument_convention
#' @param ... arguments passed to `s_summary()`.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
#' in columns, and add it to the table layout.
#'
#' @seealso [rtables::split_cols_by_multivar()] and [`analyze_colvars_functions`].
#'
#' @examples
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT = rep(paste0("V", 1:3), 6),
#'   ARM = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL = c(9:1, rep(NA, 9)),
#'   CHG = c(1:9, rep(NA, 9))
#' )
#'
#' ## Default output within a `rtables` pipeline.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars() %>%
#'   build_table(dta_test)
#'
#' ## Selection of statistics, formats and labels also work.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars(
#'     .stats = c("n", "mean_sd"),
#'     .formats = c("mean_sd" = "xx.x, xx.x"),
#'     .labels = c(n = "n", mean_sd = "Mean, SD")
#'   ) %>%
#'   build_table(dta_test)
#'
#' ## Use arguments interpreted by `s_summary`.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars(na.rm = FALSE) %>%
#'   build_table(dta_test)
#'
#' @export
summarize_colvars <- function(lyt,
                              ...,
                              .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  afun <- create_afun_summary(.stats, .formats, .labels, .indent_mods)

  analyze_colvars(
    lyt,
    afun = afun,
    extra_args = list(...)
  )
}

1		#' Multivariate Logistic Regression Table
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
6		#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
7		#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
8		#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
9		#' category or specified values and corresponding Wald confidence intervals as default but allow user
10		#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
11		#' that covariate has no effect on response in model containing all specified covariates.
12		#' Allow option to include one two-way interaction and present similar output for
13		#' each interaction degree of freedom.
14		#'
15		#' @inheritParams argument_convention
16		#' @param drop_and_remove_str (`character`)\cr string to be dropped and removed.
17		#'
18		#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
19		#' Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
20		#'
21		#' @note For the formula, the variable names need to be standard `data.frame` column names without
22		#' special characters.
23		#'
24		#' @examples
25		#' library(dplyr)
26		#' library(broom)
27		#'
28		#' adrs_f <- tern_ex_adrs %>%
29		#' filter(PARAMCD == "BESRSPI") %>%
30		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
31		#' mutate(
32		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
33		#' RACE = factor(RACE),
34		#' SEX = factor(SEX)
35		#' )
36		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
37		#' mod1 <- fit_logistic(
38		#' data = adrs_f,
39		#' variables = list(
40		#' response = "Response",
41		#' arm = "ARMCD",
42		#' covariates = c("AGE", "RACE")
43		#' )
44		#' )
45		#' mod2 <- fit_logistic(
46		#' data = adrs_f,
47		#' variables = list(
48		#' response = "Response",
49		#' arm = "ARMCD",
50		#' covariates = c("AGE", "RACE"),
51		#' interaction = "AGE"
52		#' )
53		#' )
54		#'
55		#' df <- tidy(mod1, conf_level = 0.99)
56		#' df2 <- tidy(mod2, conf_level = 0.99)
57		#'
58		#' # flagging empty strings with "_"
59		#' df <- df_explicit_na(df, na_level = "_")
60		#' df2 <- df_explicit_na(df2, na_level = "_")
61		#'
62		#' result1 <- basic_table() %>%
63		#' summarize_logistic(
64		#' conf_level = 0.95,
65		#' drop_and_remove_str = "_"
66		#' ) %>%
67		#' build_table(df = df)
68		#' result1
69		#'
70		#' result2 <- basic_table() %>%
71		#' summarize_logistic(
72		#' conf_level = 0.95,
73		#' drop_and_remove_str = "_"
74		#' ) %>%
75		#' build_table(df = df2)
76		#' result2
77		#'
78		#' @export
79		summarize_logistic <- function(lyt,
80		conf_level,
81		drop_and_remove_str = "",
82		.indent_mods = NULL) {
83		# checks
84	3x	checkmate::assert_string(drop_and_remove_str)
85
86	3x	sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
87	3x	sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods)
88	3x	sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods)
89	3x	split_fun <- drop_and_remove_levels(drop_and_remove_str)
90
91	3x	lyt <- logistic_regression_cols(lyt, conf_level = conf_level)
92	3x	lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun)
93	3x	lyt <- sum_logistic_variable_test(lyt)
94	3x	lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun)
95	3x	lyt <- sum_logistic_term_estimates(lyt)
96	3x	lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun)
97	3x	lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun)
98	3x	lyt <- sum_logistic_odds_ratios(lyt)
99	3x	lyt
100		}
101
102		#' Fit for Logistic Regression
103		#'
104		#' @description `r lifecycle::badge("stable")`
105		#'
106		#' Fit a (conditional) logistic regression model.
107		#'
108		#' @inheritParams argument_convention
109		#' @param data (`data.frame`)\cr the data frame on which the model was fit.
110		#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
111		#' This will be used when fitting the (conditional) logistic regression model on the left hand
112		#' side of the formula.
113		#'
114		#' @return A fitted logistic regression model.
115		#'
116		#' @section Model Specification:
117		#'
118		#' The `variables` list needs to include the following elements:
119		#' * `arm`: Treatment arm variable name.
120		#' * `response`: The response arm variable name. Usually this is a 0/1 variable.
121		#' * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
122		#' * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
123		#' included in `covariates`. Then the interaction with the treatment arm is included in the model.
124		#'
125		#' @examples
126		#' library(dplyr)
127		#'
128		#' adrs_f <- tern_ex_adrs %>%
129		#' filter(PARAMCD == "BESRSPI") %>%
130		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
131		#' mutate(
132		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
133		#' RACE = factor(RACE),
134		#' SEX = factor(SEX)
135		#' )
136		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
137		#' mod1 <- fit_logistic(
138		#' data = adrs_f,
139		#' variables = list(
140		#' response = "Response",
141		#' arm = "ARMCD",
142		#' covariates = c("AGE", "RACE")
143		#' )
144		#' )
145		#' mod2 <- fit_logistic(
146		#' data = adrs_f,
147		#' variables = list(
148		#' response = "Response",
149		#' arm = "ARMCD",
150		#' covariates = c("AGE", "RACE"),
151		#' interaction = "AGE"
152		#' )
153		#' )
154		#'
155		#' @export
156		fit_logistic <- function(data,
157		variables = list(
158		response = "Response",
159		arm = "ARMCD",
160		covariates = NULL,
161		interaction = NULL,
162		strata = NULL
163		),
164		response_definition = "response") {
165	62x	assert_df_with_variables(data, variables)
166	62x	checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
167	62x	checkmate::assert_string(response_definition)
168	62x	checkmate::assert_true(grepl("response", response_definition))
169
170	62x	response_definition <- sub(
171	62x	pattern = "response",
172	62x	replacement = variables$response,
173	62x	x = response_definition,
174	62x	fixed = TRUE
175		)
176	62x	form <- paste0(response_definition, " ~ ", variables$arm)
177	62x	if (!is.null(variables$covariates)) {
178	28x	form <- paste0(form, " + ", paste(variables$covariates, collapse = " + "))
179		}
180	62x	if (!is.null(variables$interaction)) {
181	17x	checkmate::assert_string(variables$interaction)
182	17x	checkmate::assert_subset(variables$interaction, variables$covariates)
183	17x	form <- paste0(form, " + ", variables$arm, ":", variables$interaction)
184		}
185	62x	if (!is.null(variables$strata)) {
186	14x	strata_arg <- if (length(variables$strata) > 1) {
187	7x	paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
188		} else {
189	7x	variables$strata
190		}
191	14x	form <- paste0(form, "+ strata(", strata_arg, ")")
192		}
193	62x	formula <- stats::as.formula(form)
194	62x	if (is.null(variables$strata)) {
195	48x	stats::glm(
196	48x	formula = formula,
197	48x	data = data,
198	48x	family = stats::binomial("logit")
199		)
200		} else {
201	14x	clogit_with_tryCatch(
202	14x	formula = formula,
203	14x	data = data,
204	14x	x = TRUE
205		)
206		}
207		}
208
209		#' Custom Tidy Method for Binomial GLM Results
210		#'
211		#' @description `r lifecycle::badge("stable")`
212		#'
213		#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
214		#' with `binomial` family.
215		#'
216		#' @inheritParams argument_convention
217		#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise the median is used.
218		#' @param x logistic regression model fitted by [stats::glm()] with "binomial" family.
219		#'
220		#' @return A `data.frame` containing the tidied model.
221		#'
222		#' @method tidy glm
223		#'
224		#' @seealso [h_logistic_regression] for relevant helper functions.
225		#'
226		#' @examples
227		#' library(dplyr)
228		#' library(broom)
229		#'
230		#' adrs_f <- tern_ex_adrs %>%
231		#' filter(PARAMCD == "BESRSPI") %>%
232		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
233		#' mutate(
234		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
235		#' RACE = factor(RACE),
236		#' SEX = factor(SEX)
237		#' )
238		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
239		#' mod1 <- fit_logistic(
240		#' data = adrs_f,
241		#' variables = list(
242		#' response = "Response",
243		#' arm = "ARMCD",
244		#' covariates = c("AGE", "RACE")
245		#' )
246		#' )
247		#' mod2 <- fit_logistic(
248		#' data = adrs_f,
249		#' variables = list(
250		#' response = "Response",
251		#' arm = "ARMCD",
252		#' covariates = c("AGE", "RACE"),
253		#' interaction = "AGE"
254		#' )
255		#' )
256		#'
257		#' df <- tidy(mod1, conf_level = 0.99)
258		#' df2 <- tidy(mod2, conf_level = 0.99)
259		#'
260		#' @export
261		tidy.glm <- function(x, # nolint
262		conf_level = 0.95,
263		at = NULL,
264		...) {
265	5x	checkmate::assert_class(x, "glm")
266	5x	checkmate::assert_set_equal(x$family$family, "binomial")
267
268	5x	terms_name <- attr(stats::terms(x), "term.labels")
269	5x	xs_class <- attr(x$terms, "dataClasses")
270	5x	interaction <- terms_name[which(!terms_name %in% names(xs_class))]
271	5x	df <- if (length(interaction) == 0) {
272	2x	h_logistic_simple_terms(
273	2x	x = terms_name,
274	2x	fit_glm = x,
275	2x	conf_level = conf_level
276		)
277		} else {
278	3x	h_logistic_inter_terms(
279	3x	x = terms_name,
280	3x	fit_glm = x,
281	3x	conf_level = conf_level,
282	3x	at = at
283		)
284		}
285	5x	for (var in c("variable", "term", "interaction", "reference")) {
286	20x	df[[var]] <- factor(df[[var]], levels = unique(df[[var]]))
287		}
288	5x	df
289		}
290
291		#' Logistic Regression Multivariate Column Layout Function
292		#'
293		#' @description `r lifecycle::badge("stable")`
294		#'
295		#' Layout-creating function which creates a multivariate column layout summarizing logistic
296		#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
297		#'
298		#' @inheritParams argument_convention
299		#'
300		#' @return A layout object suitable for passing to further layouting functions. Adding this
301		#' function to an `rtable` layout will split the table into columns corresponding to
302		#' statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
303		#'
304		#' @export
305		logistic_regression_cols <- function(lyt,
306		conf_level = 0.95) {
307	4x	vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
308	4x	var_labels <- c(
309	4x	df = "Degrees of Freedom",
310	4x	estimate = "Parameter Estimate",
311	4x	std_error = "Standard Error",
312	4x	odds_ratio = "Odds Ratio",
313	4x	ci = paste("Wald", f_conf_level(conf_level)),
314	4x	pvalue = "p-value"
315		)
316	4x	split_cols_by_multivar(
317	4x	lyt = lyt,
318	4x	vars = vars,
319	4x	varlabels = var_labels
320		)
321		}
322
323		#' Logistic Regression Summary Table Constructor Function
324		#'
325		#' @description `r lifecycle::badge("stable")`
326		#'
327		#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
328		#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
329		#'
330		#' @inheritParams argument_convention
331		#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
332		#' content function.
333		#'
334		#' @return A content function.
335		#'
336		#' @export
337		logistic_summary_by_flag <- function(flag_var, .indent_mods = NULL) {
338	10x	checkmate::assert_string(flag_var)
339	10x	function(lyt) {
340	10x	cfun_list <- list(
341	10x	df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods),
342	10x	estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
343	10x	std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
344	10x	odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods),
345	10x	ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods),
346	10x	pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx \| (<0.0001)", .indent_mods = .indent_mods)
347		)
348	10x	summarize_row_groups(
349	10x	lyt = lyt,
350	10x	cfun = cfun_list
351		)
352		}
353		}

1		#' Helper Function to create a new `SMQ` variable in `ADAE` by stacking `SMQ` and/or `CQ` records.
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper Function to create a new `SMQ` variable in `ADAE` that consists of all adverse events belonging to
6		#' selected Standardized/Customized queries. The new dataset will only contain records of the adverse events
7		#' belonging to any of the selected baskets.
8		#'
9		#' @inheritParams argument_convention
10		#' @param baskets (`character`)\cr variable names of the selected Standardized/Customized queries.
11		#' @param smq_varlabel (`string`)\cr a label for the new variable created.
12		#' @param keys (`character`)\cr names of the key variables to be returned along with the new variable created.
13		#' @param aag_summary (`data.frame`)\cr containing the `SMQ` baskets and the levels of interest for the final `SMQ`
14		#' variable. This is useful when there are some levels of interest that are not observed in the `df` dataset.
15		#' The two columns of this dataset should be named `basket` and `basket_name`.
16		#'
17		#' @return `data.frame` with variables in `keys` taken from `df` and new variable `SMQ` containing
18		#' records belonging to the baskets selected via the `baskets` argument.
19		#'
20		#' @examples
21		#' adae <- tern_ex_adae[1:20, ] %>% df_explicit_na()
22		#' h_stack_by_baskets(df = adae)
23		#'
24		#' aag <- data.frame(
25		#' NAMVAR = c("CQ01NAM", "CQ02NAM", "SMQ01NAM", "SMQ02NAM"),
26		#' REFNAME = c(
27		#' "D.2.1.5.3/A.1.1.1.1 AESI", "X.9.9.9.9/Y.8.8.8.8 AESI",
28		#' "C.1.1.1.3/B.2.2.3.1 AESI", "C.1.1.1.3/B.3.3.3.3 AESI"
29		#' ),
30		#' SCOPE = c("", "", "BROAD", "BROAD"),
31		#' stringsAsFactors = FALSE
32		#' )
33		#'
34		#' basket_name <- character(nrow(aag))
35		#' cq_pos <- grep("^(CQ).+NAM$", aag$NAMVAR)
36		#' smq_pos <- grep("^(SMQ).+NAM$", aag$NAMVAR)
37		#' basket_name[cq_pos] <- aag$REFNAME[cq_pos]
38		#' basket_name[smq_pos] <- paste0(
39		#' aag$REFNAME[smq_pos], "(", aag$SCOPE[smq_pos], ")"
40		#' )
41		#'
42		#' aag_summary <- data.frame(
43		#' basket = aag$NAMVAR,
44		#' basket_name = basket_name,
45		#' stringsAsFactors = TRUE
46		#' )
47		#'
48		#' result <- h_stack_by_baskets(df = adae, aag_summary = aag_summary)
49		#' all(levels(aag_summary$basket_name) %in% levels(result$SMQ))
50		#'
51		#' h_stack_by_baskets(
52		#' df = adae,
53		#' aag_summary = NULL,
54		#' keys = c("STUDYID", "USUBJID", "AEDECOD", "ARM"),
55		#' baskets = "SMQ01NAM"
56		#' )
57		#'
58		#' @export
59		h_stack_by_baskets <- function(df,
60		baskets = grep("^(SMQ\|CQ).+NAM$", names(df), value = TRUE),
61		smq_varlabel = "Standardized MedDRA Query",
62		keys = c("STUDYID", "USUBJID", "ASTDTM", "AEDECOD", "AESEQ"),
63		aag_summary = NULL,
64		na_level = "<Missing>") {
65		# Use of df_explicit_na() in case the user has not previously used
66	5x	df <- df_explicit_na(df, na_level = na_level)
67
68	5x	smq_nam <- baskets[startsWith(baskets, "SMQ")]
69		# SC corresponding to NAM
70	5x	smq_sc <- gsub(pattern = "NAM", replacement = "SC", x = smq_nam, fixed = TRUE)
71	5x	smq <- stats::setNames(smq_sc, smq_nam)
72
73	5x	checkmate::assert_character(baskets)
74	5x	checkmate::assert_string(smq_varlabel)
75	5x	checkmate::assert_data_frame(df)
76	5x	checkmate::assert_true(all(startsWith(baskets, "SMQ") \| startsWith(baskets, "CQ")))
77	4x	checkmate::assert_true(all(endsWith(baskets, "NAM")))
78	3x	checkmate::assert_subset(baskets, names(df))
79	3x	checkmate::assert_subset(keys, names(df))
80	3x	checkmate::assert_subset(smq_sc, names(df))
81	3x	checkmate::assert_string(na_level)
82
83	3x	if (!is.null(aag_summary)) {
84	1x	assert_df_with_variables(
85	1x	df = aag_summary,
86	1x	variables = list(val = c("basket", "basket_name"))
87		)
88		# Warning in case there is no match between `aag_summary$basket` and `baskets` argument.
89		# Honestly, I think those should completely match. Target baskets should be the same.
90	1x	if (length(intersect(baskets, unique(aag_summary$basket))) == 0) {
91	!	warning("There are 0 baskets in common between aag_summary$basket and `baskets` argument.")
92		}
93		}
94
95	3x	var_labels <- c(formatters::var_labels(df[, keys]), "SMQ" = smq_varlabel)
96
97		# convert `na_level` records from baskets to NA for the later loop and from wide to long steps
98	3x	df[, c(baskets, smq_sc)][df[, c(baskets, smq_sc)] == na_level] <- NA
99
100	3x	if (all(is.na(df[, baskets]))) { # in case there is no level for the target baskets
101	1x	df_long <- df[-seq_len(nrow(df)), keys] # we just need an empty dataframe keeping all factor levels
102		} else {
103		# Concatenate SMQxxxNAM with corresponding SMQxxxSC
104	2x	df_cnct <- df[, c(keys, baskets[startsWith(baskets, "CQ")])]
105
106	2x	for (nam in names(smq)) {
107	4x	sc <- smq[nam] # SMQxxxSC corresponding to SMQxxxNAM
108	4x	nam_notna <- !is.na(df[[nam]])
109	4x	new_colname <- paste(nam, sc, sep = "_")
110	4x	df_cnct[nam_notna, new_colname] <- paste0(df[[nam]], "(", df[[sc]], ")")[nam_notna]
111		}
112
113	2x	df_cnct$unique_id <- seq(1, nrow(df_cnct))
114	2x	var_cols <- names(df_cnct)[!(names(df_cnct) %in% c(keys, "unique_id"))]
115		# have to convert df_cnct from tibble to dataframe
116		# as it throws a warning otherwise about rownames.
117		# tibble do not support rownames and reshape creates rownames
118
119	2x	df_long <- stats::reshape(
120	2x	data = as.data.frame(df_cnct),
121	2x	varying = var_cols,
122	2x	v.names = "SMQ",
123	2x	idvar = names(df_cnct)[names(df_cnct) %in% c(keys, "unique_id")],
124	2x	direction = "long",
125	2x	new.row.names = seq(prod(length(var_cols), nrow(df_cnct)))
126		)
127
128	2x	df_long <- df_long[!is.na(df_long[, "SMQ"]), !(names(df_long) %in% c("time", "unique_id"))]
129	2x	df_long$SMQ <- as.factor(df_long$SMQ)
130		}
131
132	3x	smq_levels <- setdiff(levels(df_long[["SMQ"]]), na_level)
133
134	3x	if (!is.null(aag_summary)) {
135		# A warning in case there is no match between df and aag_summary records
136	1x	if (length(intersect(smq_levels, unique(aag_summary$basket_name))) == 0) {
137	1x	warning("There are 0 basket levels in common between aag_summary$basket_name and df.")
138		}
139	1x	df_long[["SMQ"]] <- factor(
140	1x	df_long[["SMQ"]],
141	1x	levels = sort(
142	1x	c(
143	1x	smq_levels,
144	1x	setdiff(unique(aag_summary$basket_name), smq_levels)
145		)
146		)
147		)
148		} else {
149	2x	all_na_basket_flag <- vapply(df[, baskets], function(x) {
150	6x	all(is.na(x))
151	2x	}, FUN.VALUE = logical(1))
152	2x	all_na_basket <- baskets[all_na_basket_flag]
153
154	2x	df_long[["SMQ"]] <- factor(
155	2x	df_long[["SMQ"]],
156	2x	levels = sort(c(smq_levels, all_na_basket))
157		)
158		}
159	3x	formatters::var_labels(df_long) <- var_labels
160	3x	tibble::tibble(df_long)
161		}

1		#' Univariate Formula Special Term
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The special term `univariate` indicate that the model should be fitted individually for
6		#' every variable included in univariate.
7		#'
8		#' @param x A vector of variable name separated by commas.
9		#'
10		#' @return When used within a model formula, produces univariate models for each variable provided.
11		#'
12		#' @details
13		#' If provided alongside with pairwise specification, the model
14		#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models
15		#' + `y ~ ARM`
16		#' + `y ~ ARM + SEX`
17		#' + `y ~ ARM + AGE`
18		#' + `y ~ ARM + RACE`
19		#'
20		#' @export
21		univariate <- function(x) {
22	1x	structure(x, varname = deparse(substitute(x)))
23		}
24
25		# Get the right-hand-term of a formula
26		rht <- function(x) {
27	4x	checkmate::assert_formula(x)
28	4x	y <- as.character(rev(x)[[1]])
29	4x	return(y)
30		}
31
32		#' Hazard Ratio Estimation in Interactions
33		#'
34		#' This function estimates the hazard ratios between arms when an interaction variable is given with
35		#' specific values.
36		#'
37		#' @param variable,given Names of two variable in interaction. We seek the estimation of the levels of `variable`
38		#' given the levels of `given`.
39		#' @param lvl_var,lvl_given corresponding levels has given by `levels`.
40		#' @param mmat A name numeric filled with 0 used as template to obtain the design matrix.
41		#' @param coef Numeric of estimated coefficients.
42		#' @param vcov Variance-covariance matrix of underlying model.
43		#' @param conf_level Single numeric for the confidence level of estimate intervals.
44		#'
45		#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
46		#' and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex.
47		#' The cox regression estimates the coefficients along with a variance-covariance matrix for:
48		#'
49		#' - b1 (arm b), b2 (arm c)
50		#' - b3 (sex m)
51		#' - b4 (arm b: sex m), b5 (arm c: sex m)
52		#'
53		#' Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation
54		#' will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5),
55		#' therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained
56		#' as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95.
57		#'
58		#' @return A list of matrix (one per level of variable) with rows corresponding to the combinations of
59		#' `variable` and `given`, with columns:
60		#' * `coef_hat`: Estimation of the coefficient.
61		#' * `coef_se`: Standard error of the estimation.
62		#' * `hr`: Hazard ratio.
63		#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
64		#'
65		#' @seealso [s_cox_multivariate()].
66		#'
67		#' @examples
68		#' library(dplyr)
69		#' library(survival)
70		#'
71		#' ADSL <- tern_ex_adsl %>%
72		#' filter(SEX %in% c("F", "M"))
73		#'
74		#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS")
75		#' adtte$ARMCD <- droplevels(adtte$ARMCD)
76		#' adtte$SEX <- droplevels(adtte$SEX)
77		#'
78		#' mod <- coxph(
79		#' formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2,
80		#' data = adtte
81		#' )
82		#'
83		#' mmat <- stats::model.matrix(mod)[1, ]
84		#' mmat[!mmat == 0] <- 0
85		#'
86		#' @keywords internal
87		estimate_coef <- function(variable, given,
88		lvl_var, lvl_given,
89		coef,
90		mmat,
91		vcov,
92		conf_level = 0.95) {
93	8x	var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
94	8x	giv_lvl <- paste0(given, lvl_given)
95
96	8x	design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
97	8x	design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
98	8x	design_mat <- within(
99	8x	data = design_mat,
100	8x	expr = {
101	8x	inter <- paste0(variable, ":", given)
102	8x	rev_inter <- paste0(given, ":", variable)
103		}
104		)
105
106	8x	split_by_variable <- design_mat$variable
107	8x	interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")
108
109	8x	design_mat <- apply(
110	8x	X = design_mat, MARGIN = 1, FUN = function(x) {
111	27x	mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
112	27x	return(mmat)
113		}
114		)
115	8x	colnames(design_mat) <- interaction_names
116
117	8x	betas <- as.matrix(coef)
118
119	8x	coef_hat <- t(design_mat) %*% betas
120	8x	dimnames(coef_hat)[2] <- "coef"
121
122	8x	coef_se <- apply(design_mat, 2, function(x) {
123	27x	vcov_el <- as.logical(x)
124	27x	y <- vcov[vcov_el, vcov_el]
125	27x	y <- sum(y)
126	27x	y <- sqrt(y)
127	27x	return(y)
128		})
129
130	8x	q_norm <- stats::qnorm((1 + conf_level) / 2)
131	8x	y <- cbind(coef_hat, `se(coef)` = coef_se)
132
133	8x	y <- apply(y, 1, function(x) {
134	27x	x["hr"] <- exp(x["coef"])
135	27x	x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
136	27x	x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
137
138	27x	return(x)
139		})
140
141	8x	y <- t(y)
142	8x	y <- by(y, split_by_variable, identity)
143	8x	y <- lapply(y, as.matrix)
144
145	8x	attr(y, "details") <- paste0(
146	8x	"Estimations of ", variable,
147	8x	" hazard ratio given the level of ", given, " compared to ",
148	8x	variable, " level ", lvl_var[1], "."
149		)
150	8x	return(y)
151		}
152
153		#' `tryCatch` around `car::Anova`
154		#'
155		#' Captures warnings when executing [car::Anova].
156		#'
157		#' @inheritParams car::Anova
158		#'
159		#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings.
160		#'
161		#' @examples
162		#' # `car::Anova` on cox regression model including strata and expected
163		#' # a likelihood ratio test triggers a warning as only `Wald` method is
164		#' # accepted.
165		#'
166		#' library(survival)
167		#'
168		#' mod <- coxph(
169		#' formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps),
170		#' data = ovarian
171		#' )
172		#'
173		#' @keywords internal
174		try_car_anova <- function(mod,
175		test.statistic) { # nolint
176	2x	y <- tryCatch(
177	2x	withCallingHandlers(
178	2x	expr = {
179	2x	warn_text <- c()
180	2x	list(
181	2x	aov = car::Anova(
182	2x	mod,
183	2x	test.statistic = test.statistic,
184	2x	type = "III"
185		),
186	2x	warn_text = warn_text
187		)
188		},
189	2x	warning = function(w) {
190		# If a warning is detected it is handled as "w".
191	!	warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w))
192
193		# A warning is sometimes expected, then, we want to restart
194		# the execution while ignoring the warning.
195	!	invokeRestart("muffleWarning")
196		}
197		),
198	2x	finally = {
199		}
200		)
201
202	2x	return(y)
203		}
204
205		#' Fit the Cox Regression Model and `Anova`
206		#'
207		#' The functions allows to derive from the [survival::coxph()] results the effect p.values using [car::Anova()].
208		#' This last package introduces more flexibility to get the effect p.values.
209		#'
210		#' @inheritParams t_coxreg
211		#'
212		#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and
213		#' `aov` (result of [car::Anova()]).
214		#'
215		#' @noRd
216		fit_n_aov <- function(formula,
217		data = data,
218		conf_level = conf_level,
219		pval_method = c("wald", "likelihood"),
220		...) {
221	1x	pval_method <- match.arg(pval_method)
222
223	1x	environment(formula) <- environment()
224	1x	suppressWarnings({
225		# We expect some warnings due to coxph which fails strict programming.
226	1x	mod <- survival::coxph(formula, data = data, ...)
227	1x	msum <- summary(mod, conf.int = conf_level)
228		})
229
230	1x	aov <- try_car_anova(
231	1x	mod,
232	1x	test.statistic = switch(pval_method,
233	1x	"wald" = "Wald",
234	1x	"likelihood" = "LR"
235		)
236		)
237
238	1x	warn_attr <- aov$warn_text
239	!	if (!is.null(aov$warn_text)) message(warn_attr)
240
241	1x	aov <- aov$aov
242	1x	y <- list(mod = mod, msum = msum, aov = aov)
243	1x	attr(y, "message") <- warn_attr
244
245	1x	return(y)
246		}
247
248		# argument_checks
249		check_formula <- function(formula) {
250	1x	if (!(inherits(formula, "formula"))) {
251	1x	stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.")
252		}
253
254	!	invisible()
255		}
256
257		check_covariate_formulas <- function(covariates) {
258	1x	if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) \|\| is.null(covariates)) {
259	1x	stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).")
260		}
261
262	!	invisible()
263		}
264
265		name_covariate_names <- function(covariates) {
266	1x	miss_names <- names(covariates) == ""
267	1x	no_names <- is.null(names(covariates))
268	!	if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name")
269	!	if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
270	1x	return(covariates)
271		}
272
273		check_increments <- function(increments, covariates) {
274	1x	if (!is.null(increments)) {
275	1x	covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
276	1x	lapply(
277	1x	X = names(increments), FUN = function(x) {
278	3x	if (!x %in% covariates) {
279	1x	warning(
280	1x	paste(
281	1x	"Check `increments`, the `increment` for ", x,
282	1x	"doesn't match any names in investigated covariate(s)."
283		)
284		)
285		}
286		}
287		)
288		}
289
290	1x	invisible()
291		}
292
293		#' Multivariate Cox Model - Summarized Results
294		#'
295		#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or
296		#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually
297		#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the
298		#' covariates included in the model.
299		#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the
300		#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis,
301		#' the p.values need to be interpreted with caution. (Statistical Analysis of Clinical Trials Data with R,
302		#' `NEST's bookdown`)
303		#'
304		#' @param formula (`formula`)\cr A formula corresponding to the investigated [survival::Surv()] survival model
305		#' including covariates.
306		#' @param data (`data.frame`)\cr A data frame which includes the variable in formula and covariates.
307		#' @param conf_level (`proportion`)\cr The confidence level for the hazard ratio interval estimations. Default is 0.95.
308		#' @param pval_method (`character`)\cr The method used for the estimation of p-values, should be one of
309		#' `"wald"` (default) or `"likelihood"`.
310		#' @param ... Optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the
311		#' method for tie handling, one of `exact` (default), `efron`, `breslow`.
312		#'
313		#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`.
314		#'
315		#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms
316		#' but is out of scope as defined by the Global Data Standards Repository
317		#' (`GDS_Standard_TLG_Specs_Tables_2.doc`).
318		#'
319		#' @seealso [estimate_coef()].
320		#'
321		#' @examples
322		#' library(dplyr)
323		#'
324		#' adtte <- tern_ex_adtte
325		#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered
326		#' adtte_f <- filter(
327		#' adtte_f,
328		#' PARAMCD == "OS" &
329		#' SEX %in% c("F", "M") &
330		#' RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
331		#' )
332		#' adtte_f$SEX <- droplevels(adtte_f$SEX)
333		#' adtte_f$RACE <- droplevels(adtte_f$RACE)
334		#'
335		#' @keywords internal
336		s_cox_multivariate <- function(formula, data,
337		conf_level = 0.95,
338		pval_method = c("wald", "likelihood"),
339		...) {
340	1x	tf <- stats::terms(formula, specials = c("strata"))
341	1x	covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))]
342	1x	lapply(
343	1x	X = covariates,
344	1x	FUN = function(x) {
345	3x	if (is.character(data[[x]])) {
346	1x	data[[x]] <<- as.factor(data[[x]])
347		}
348	3x	invisible()
349		}
350		)
351	1x	pval_method <- match.arg(pval_method)
352
353		# Results directly exported from environment(fit_n_aov) to environment(s_function_draft)
354	1x	y <- fit_n_aov(
355	1x	formula = formula,
356	1x	data = data,
357	1x	conf_level = conf_level,
358	1x	pval_method = pval_method,
359		...
360		)
361	1x	mod <- y$mod
362	1x	aov <- y$aov
363	1x	msum <- y$msum
364	1x	list2env(as.list(y), environment())
365
366	1x	all_term_labs <- attr(mod$terms, "term.labels")
367	1x	term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)]
368	1x	names(term_labs) <- term_labs
369
370	1x	coef_inter <- NULL
371	1x	if (any(attr(mod$terms, "order") > 1)) {
372	1x	for_inter <- all_term_labs[attr(mod$terms, "order") > 1]
373	1x	names(for_inter) <- for_inter
374	1x	mmat <- stats::model.matrix(mod)[1, ]
375	1x	mmat[!mmat == 0] <- 0
376	1x	mcoef <- stats::coef(mod)
377	1x	mvcov <- stats::vcov(mod)
378
379	1x	estimate_coef_local <- function(variable, given) {
380	6x	estimate_coef(
381	6x	variable, given,
382	6x	coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level,
383	6x	lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]])
384		)
385		}
386
387	1x	coef_inter <- lapply(
388	1x	for_inter, function(x) {
389	3x	y <- attr(mod$terms, "factor")[, x]
390	3x	y <- names(y[y > 0])
391	3x	Map(estimate_coef_local, variable = y, given = rev(y))
392		}
393		)
394		}
395
396	1x	list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter)
397		}

1		#' Cumulative Counts with Thresholds
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Summarize cumulative counts of a (`numeric`) vector that is less than, less or equal to,
6		#' greater than, or greater or equal to user-specific thresholds.
7		#'
8		#' @inheritParams h_count_cumulative
9		#' @inheritParams argument_convention
10		#'
11		#' @seealso Relevant helper function [h_count_cumulative()], and descriptive function [d_count_cumulative()].
12		#'
13		#' @name count_cumulative
14		NULL
15
16		#' Helper Function for [s_count_cumulative()]
17		#'
18		#' @description `r lifecycle::badge("stable")`
19		#'
20		#' Helper function to calculate count and fraction of `x` values in the lower or upper tail given a threshold.
21		#'
22		#' @inheritParams argument_convention
23		#' @param threshold (`number`)\cr a cutoff value as threshold to count values of `x`.
24		#' @param lower_tail (`logical`)\cr whether to count lower tail, default is `TRUE`.
25		#' @param include_eq (`logical`)\cr whether to include value equal to the `threshold` in
26		#' count, default is `TRUE`.
27		#' @param .N_col (`count`)\cr denominator for fraction calculation.
28		#'
29		#' @return A named vector with items:
30		#' * `count`: the count of values less than, less or equal to, greater than, or greater or equal to a threshold
31		#' of user specification.
32		#' * `fraction`: the fraction of the count.
33		#'
34		#' @seealso [count_cumulative]
35		#'
36		#' @examples
37		#' set.seed(1, kind = "Mersenne-Twister")
38		#' x <- c(sample(1:10, 10), NA)
39		#' .N_col <- length(x)
40		#' h_count_cumulative(x, 5, .N_col = .N_col)
41		#' h_count_cumulative(x, 5, lower_tail = FALSE, include_eq = FALSE, na.rm = FALSE, .N_col = .N_col)
42		#' h_count_cumulative(x, 0, lower_tail = FALSE, .N_col = .N_col)
43		#' h_count_cumulative(x, 100, lower_tail = FALSE, .N_col = .N_col)
44		#'
45		#' @export
46		h_count_cumulative <- function(x,
47		threshold,
48		lower_tail = TRUE,
49		include_eq = TRUE,
50		na.rm = TRUE, # nolint
51		.N_col) { # nolint
52	20x	checkmate::assert_numeric(x)
53	20x	checkmate::assert_numeric(threshold)
54	20x	checkmate::assert_numeric(.N_col)
55	20x	checkmate::assert_flag(lower_tail)
56	20x	checkmate::assert_flag(include_eq)
57	20x	checkmate::assert_flag(na.rm)
58
59	20x	is_keep <- if (na.rm) !is.na(x) else rep(TRUE, length(x))
60	20x	count <- if (lower_tail && include_eq) {
61	7x	length(x[is_keep & x <= threshold])
62	20x	} else if (lower_tail && !include_eq) {
63	!	length(x[is_keep & x < threshold])
64	20x	} else if (!lower_tail && include_eq) {
65	6x	length(x[is_keep & x >= threshold])
66	20x	} else if (!lower_tail && !include_eq) {
67	7x	length(x[is_keep & x > threshold])
68		}
69
70	20x	result <- c(count = count, fraction = count / .N_col)
71	20x	result
72		}
73
74		#' Description of Cumulative Count
75		#'
76		#' @description `r lifecycle::badge("stable")`
77		#'
78		#' This is a helper function that describes the analysis in [s_count_cumulative()].
79		#'
80		#' @inheritParams h_count_cumulative
81		#'
82		#' @return Labels for [s_count_cumulative()].
83		#'
84		#' @export
85		d_count_cumulative <- function(threshold, lower_tail, include_eq) {
86	18x	checkmate::assert_numeric(threshold)
87	18x	lg <- if (lower_tail) "<" else ">"
88	18x	eq <- if (include_eq) "=" else ""
89	18x	paste0(lg, eq, " ", threshold)
90		}
91
92		#' @describeIn count_cumulative Statistics function that produces a named list given a numeric vector of thresholds.
93		#'
94		#' @param thresholds (`numeric`)\cr vector of cutoff value for the counts.
95		#'
96		#' @return
97		#' * `s_count_cumulative()` returns a named list of `count_fraction`s: a list with each `thresholds` value as a
98		#' component, each component containing a vector for the count and fraction.
99		#'
100		#' @keywords internal
101		s_count_cumulative <- function(x,
102		thresholds,
103		lower_tail = TRUE,
104		include_eq = TRUE,
105		.N_col, # nolint
106		...) {
107	5x	checkmate::assert_numeric(thresholds, min.len = 1, any.missing = FALSE)
108
109	5x	count_fraction_list <- Map(function(thres) {
110	10x	result <- h_count_cumulative(x, thres, lower_tail, include_eq, .N_col = .N_col, ...)
111	10x	label <- d_count_cumulative(thres, lower_tail, include_eq)
112	10x	formatters::with_label(result, label)
113	5x	}, thresholds)
114
115	5x	names(count_fraction_list) <- thresholds
116	5x	list(count_fraction = count_fraction_list)
117		}
118
119		#' @describeIn count_cumulative Formatted analysis function which is used as `afun`
120		#' in `count_cumulative()`.
121		#'
122		#' @return
123		#' * `a_count_cumulative()` returns the corresponding list with formatted [rtables::CellValue()].
124		#'
125		#' @keywords internal
126		a_count_cumulative <- make_afun(
127		s_count_cumulative,
128		.formats = c(count_fraction = format_count_fraction)
129		)
130
131		#' @describeIn count_cumulative Layout-creating function which can take statistics function arguments
132		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
133		#'
134		#' @return
135		#' * `count_cumulative()` returns a layout object suitable for passing to further layouting functions,
136		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
137		#' the statistics from `s_count_cumulative()` to the table layout.
138		#'
139		#' @examples
140		#' basic_table() %>%
141		#' split_cols_by("ARM") %>%
142		#' add_colcounts() %>%
143		#' count_cumulative(
144		#' vars = "AGE",
145		#' thresholds = c(40, 60)
146		#' ) %>%
147		#' build_table(tern_ex_adsl)
148		#'
149		#' @export
150		count_cumulative <- function(lyt,
151		vars,
152		var_labels = vars,
153		show_labels = "visible",
154		...,
155		table_names = vars,
156		.stats = NULL,
157		.formats = NULL,
158		.labels = NULL,
159		.indent_mods = NULL) {
160	2x	afun <- make_afun(
161	2x	a_count_cumulative,
162	2x	.stats = .stats,
163	2x	.formats = .formats,
164	2x	.labels = .labels,
165	2x	.indent_mods = .indent_mods,
166	2x	.ungroup_stats = "count_fraction"
167		)
168	2x	analyze(
169	2x	lyt,
170	2x	vars,
171	2x	afun = afun,
172	2x	table_names = table_names,
173	2x	var_labels = var_labels,
174	2x	show_labels = show_labels,
175	2x	extra_args = list(...)
176		)
177		}

1		#' Re-implemented [range()] Default S3 method for numerical objects
2		#'
3		#' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data
4		#' without any warnings.
5		#'
6		#' @param x (`numeric`)\cr a sequence of numbers for which the range is computed.
7		#' @param na.rm (`logical`)\cr indicating if `NA` should be omitted.
8		#' @param finite (`logical`)\cr indicating if non-finite elements should be removed.
9		#'
10		#' @return A 2-element vector of class `numeric`.
11		#'
12		#' @keywords internal
13		range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint
14
15	733x	checkmate::assert_numeric(x)
16
17	733x	if (finite) {
18	24x	x <- x[is.finite(x)] # removes NAs too
19	709x	} else if (na.rm) {
20	468x	x <- x[!is.na(x)]
21		}
22
23	733x	if (length(x) == 0) {
24	47x	rval <- c(NA, NA)
25	47x	mode(rval) <- typeof(x)
26		} else {
27	686x	rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE))
28		}
29
30	733x	return(rval)
31		}
32
33		#' Utility function to create label for confidence interval
34		#'
35		#' @description `r lifecycle::badge("stable")`
36		#'
37		#' @inheritParams argument_convention
38		#'
39		#' @return A `string`.
40		#'
41		#' @export
42		f_conf_level <- function(conf_level) {
43	998x	assert_proportion_value(conf_level)
44	996x	paste0(conf_level * 100, "% CI")
45		}
46
47		#' Utility function to create label for p-value
48		#'
49		#' @description `r lifecycle::badge("stable")`
50		#'
51		#' @param test_mean (`number`)\cr mean value to test under the null hypothesis.
52		#'
53		#' @return A `string`.
54		#'
55		#' @export
56		f_pval <- function(test_mean) {
57	232x	checkmate::assert_numeric(test_mean, len = 1)
58	230x	paste0("p-value (H0: mean = ", test_mean, ")")
59		}
60
61		#' Utility function to return a named list of covariate names.
62		#'
63		#' @param covariates (`character`)\cr a vector that can contain single variable names (such as
64		#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
65		#'
66		#' @return A named `list` of `character` vector.
67		#'
68		#' @keywords internal
69		get_covariates <- function(covariates) {
70	14x	checkmate::assert_character(covariates)
71	12x	cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*"))))
72	12x	stats::setNames(as.list(cov_vars), cov_vars)
73		}
74
75		#' Replicate Entries of a Vector if Required
76		#'
77		#' @description `r lifecycle::badge("stable")`
78		#'
79		#' Replicate entries of a vector if required.
80		#'
81		#' @inheritParams argument_convention
82		#' @param n (`count`)\cr how many entries we need.
83		#'
84		#' @return `x` if it has the required length already or is `NULL`,
85		#' otherwise if it is scalar the replicated version of it with `n` entries.
86		#'
87		#' @note This function will fail if `x` is not of length `n` and/or is not a scalar.
88		#'
89		#' @export
90		to_n <- function(x, n) {
91	1x	if (is.null(x)) {
92	!	NULL
93	1x	} else if (length(x) == 1) {
94	!	rep(x, n)
95	1x	} else if (length(x) == n) {
96	1x	x
97		} else {
98	!	stop("dimension mismatch")
99		}
100		}
101
102		#' Check Element Dimension
103		#'
104		#' Checks if the elements in `...` have the same dimension.
105		#'
106		#' @param ... (`data.frame`s or `vector`s)\cr any data frames/vectors.
107		#' @param omit_null (`logical`)\cr whether `NULL` elements in `...` should be omitted from the check.
108		#'
109		#' @return A `logical` value.
110		#'
111		#' @keywords internal
112		check_same_n <- function(..., omit_null = TRUE) {
113	2x	dots <- list(...)
114
115	2x	n_list <- Map(
116	2x	function(x, name) {
117	5x	if (is.null(x)) {
118	!	if (omit_null) {
119	2x	NA_integer_
120		} else {
121	!	stop("arg", name, "is not supposed to be NULL")
122		}
123	5x	} else if (is.data.frame(x)) {
124	!	nrow(x)
125	5x	} else if (is.atomic(x)) {
126	5x	length(x)
127		} else {
128	!	stop("data structure for ", name, "is currently not supported")
129		}
130		},
131	2x	dots, names(dots)
132		)
133
134	2x	n <- stats::na.omit(unlist(n_list))
135
136	2x	if (length(unique(n)) > 1) {
137	!	sel <- which(n != n[1])
138	!	stop("dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1])
139		}
140
141	2x	TRUE
142		}
143
144		#' Make Names Without Dots
145		#'
146		#' @param nams (`character`)\cr vector of original names.
147		#'
148		#' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()].
149		#'
150		#' @keywords internal
151		make_names <- function(nams) {
152	6x	orig <- make.names(nams)
153	6x	gsub(".", "", x = orig, fixed = TRUE)
154		}
155
156		#' Conversion of Months to Days
157		#'
158		#' @description `r lifecycle::badge("stable")`
159		#'
160		#' Conversion of Months to Days. This is an approximative calculation because it
161		#' considers each month as having an average of 30.4375 days.
162		#'
163		#' @param x (`numeric`)\cr time in months.
164		#'
165		#' @return A `numeric` vector with the time in days.
166		#'
167		#' @examples
168		#' x <- c(13.25, 8.15, 1, 2.834)
169		#' month2day(x)
170		#'
171		#' @export
172		month2day <- function(x) {
173	1x	checkmate::assert_numeric(x)
174	1x	x * 30.4375
175		}
176
177		#' Conversion of Days to Months
178		#'
179		#' @param x (`numeric`)\cr time in days.
180		#'
181		#' @return A `numeric` vector with the time in months.
182		#'
183		#' @examples
184		#' x <- c(403, 248, 30, 86)
185		#' day2month(x)
186		#'
187		#' @export
188		day2month <- function(x) {
189	15x	checkmate::assert_numeric(x)
190	15x	x / 30.4375
191		}
192
193		#' Return an empty numeric if all elements are `NA`.
194		#'
195		#' @param x (`numeric`)\cr vector.
196		#'
197		#' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`.
198		#'
199		#' @examples
200		#' x <- c(NA, NA, NA)
201		#' # Internal function - empty_vector_if_na
202		#' @keywords internal
203		empty_vector_if_na <- function(x) {
204	683x	if (all(is.na(x))) {
205	220x	numeric()
206		} else {
207	463x	x
208		}
209		}
210
211		#' Combine Two Vectors Element Wise
212		#'
213		#' @param x (`vector`)\cr first vector to combine.
214		#' @param y (`vector`)\cr second vector to combine.
215		#'
216		#' @return A `list` where each element combines corresponding elements of `x` and `y`.
217		#'
218		#' @examples
219		#' combine_vectors(1:3, 4:6)
220		#'
221		#' @export
222		combine_vectors <- function(x, y) {
223	49x	checkmate::assert_vector(x)
224	49x	checkmate::assert_vector(y, len = length(x))
225
226	49x	result <- lapply(as.data.frame(rbind(x, y)), `c`)
227	49x	names(result) <- NULL
228	49x	result
229		}
230
231		#' Extract Elements by Name
232		#'
233		#' This utility function extracts elements from a vector `x` by `names`.
234		#' Differences to the standard `[` function are:
235		#'
236		#' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
237		#' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
238		#' elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
239		#'
240		#' @param x (named `vector`)\cr where to extract named elements from.
241		#' @param names (`character`)\cr vector of names to extract.
242		#'
243		#' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
244		#'
245		#' @keywords internal
246		extract_by_name <- function(x, names) {
247	3425x	if (is.null(x)) {
248	3001x	return(NULL)
249		}
250	424x	checkmate::assert_named(x)
251	424x	checkmate::assert_character(names)
252	424x	which_extract <- intersect(names(x), names)
253	424x	if (length(which_extract) > 0) {
254	204x	x[which_extract]
255		} else {
256	220x	NULL
257		}
258		}
259
260		#' Labels for Adverse Event Baskets
261		#'
262		#' @description `r lifecycle::badge("stable")`
263		#'
264		#' @param aesi (`character`)\cr with standardized `MedDRA` query name (e.g. `SMQzzNAM`) or customized query
265		#' name (e.g. `CQzzNAM`).
266		#' @param scope (`character`)\cr with scope of query (e.g. `SMQzzSC`).
267		#'
268		#' @return A `string` with the standard label for the `AE` basket.
269		#'
270		#' @examples
271		#' adae <- tern_ex_adae
272		#'
273		#' # Standardized query label includes scope.
274		#' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC)
275		#'
276		#' # Customized query label.
277		#' aesi_label(adae$CQ01NAM)
278		#'
279		#' @export
280		aesi_label <- function(aesi, scope = NULL) {
281	3x	checkmate::assert_character(aesi)
282	3x	checkmate::assert_character(scope, null.ok = TRUE)
283	3x	aesi_label <- obj_label(aesi)
284	3x	aesi <- sas_na(aesi)
285	3x	aesi <- unique(aesi)[!is.na(unique(aesi))]
286
287	3x	lbl <- if (length(aesi) == 1 && !is.null(scope)) {
288	1x	scope <- sas_na(scope)
289	1x	scope <- unique(scope)[!is.na(unique(scope))]
290	1x	checkmate::assert_string(scope)
291	1x	paste0(aesi, " (", scope, ")")
292	3x	} else if (length(aesi) == 1 && is.null(scope)) {
293	1x	aesi
294		} else {
295	1x	aesi_label
296		}
297
298	3x	lbl
299		}
300
301		#' Indicate Study Arm Variable in Formula
302		#'
303		#' We use `study_arm` to indicate the study arm variable in `tern` formulas.
304		#'
305		#' @param x arm information
306		#'
307		#' @return `x`
308		#'
309		#' @keywords internal
310		study_arm <- function(x) {
311	!	structure(x, varname = deparse(substitute(x)))
312		}
313
314		#' Smooth Function with Optional Grouping
315		#'
316		#' @description `r lifecycle::badge("stable")`
317		#'
318		#' This produces `loess` smoothed estimates of `y` with Student confidence intervals.
319		#'
320		#' @param df (`data.frame`)\cr data set containing all analysis variables.
321		#' @param x (`character`)\cr value with x column name.
322		#' @param y (`character`)\cr value with y column name.
323		#' @param groups (`character`)\cr vector with optional grouping variables names.
324		#' @param level (`numeric`)\cr level of confidence interval to use (0.95 by default).
325		#'
326		#' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and
327		#' optional `groups` variables formatted as `factor` type.
328		#'
329		#' @export
330		get_smooths <- function(df, x, y, groups = NULL, level = 0.95) {
331	5x	checkmate::assert_data_frame(df)
332	5x	df_cols <- colnames(df)
333	5x	checkmate::assert_string(x)
334	5x	checkmate::assert_subset(x, df_cols)
335	5x	checkmate::assert_numeric(df[[x]])
336	5x	checkmate::assert_string(y)
337	5x	checkmate::assert_subset(y, df_cols)
338	5x	checkmate::assert_numeric(df[[y]])
339
340	5x	if (!is.null(groups)) {
341	4x	checkmate::assert_character(groups)
342	4x	checkmate::assert_subset(groups, df_cols)
343		}
344
345	5x	smooths <- function(x, y) {
346	18x	stats::predict(stats::loess(y ~ x), se = TRUE)
347		}
348
349	5x	if (!is.null(groups)) {
350	4x	cc <- stats::complete.cases(df[c(x, y, groups)])
351	4x	df_c <- df[cc, c(x, y, groups)]
352	4x	df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE]
353	4x	df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups]))
354
355	4x	df_smooth_raw <-
356	4x	by(df_c_ordered, df_c_g, function(d) {
357	17x	plx <- smooths(d[[x]], d[[y]])
358	17x	data.frame(
359	17x	x = d[[x]],
360	17x	y = plx$fit,
361	17x	ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
362	17x	yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
363		)
364		})
365
366	4x	df_smooth <- do.call(rbind, df_smooth_raw)
367	4x	df_smooth[groups] <- df_c_g
368
369	4x	df_smooth
370		} else {
371	1x	cc <- stats::complete.cases(df[c(x, y)])
372	1x	df_c <- df[cc, ]
373	1x	plx <- smooths(df_c[[x]], df_c[[y]])
374
375	1x	df_smooth <- data.frame(
376	1x	x = df_c[[x]],
377	1x	y = plx$fit,
378	1x	ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
379	1x	yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
380		)
381
382	1x	df_smooth
383		}
384		}
385
386		#' Number of Available (Non-Missing Entries) in a Vector
387		#'
388		#' Small utility function for better readability.
389		#'
390		#' @param x (`any`)\cr vector in which to count non-missing values.
391		#'
392		#' @return Number of non-missing values.
393		#'
394		#' @keywords internal
395		n_available <- function(x) {
396	196x	sum(!is.na(x))
397		}
398
399		#' Reapply Variable Labels
400		#'
401		#' This is a helper function that is used in tests.
402		#'
403		#' @param x (`vector`)\cr vector of elements that needs new labels.
404		#' @param varlabels (`character`)\cr vector of labels for `x`.
405		#' @param ... further parameters to be added to the list.
406		#'
407		#' @return `x` with variable labels reapplied.
408		#'
409		#' @export
410		reapply_varlabels <- function(x, varlabels, ...) {
411	10x	named_labels <- c(as.list(varlabels), list(...))
412	10x	formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels)
413	10x	x
414		}
415
416		# Wrapper function of survival::clogit so that when model fitting failed, a more useful message would show
417		clogit_with_tryCatch <- function(formula, data, ...) { # nolint
418	30x	tryCatch(
419	30x	survival::clogit(formula = formula, data = data, ...),
420	30x	error = function(e) stop("model not built successfully with survival::clogit")
421		)
422		}

1		#' Summary numeric variables in columns
2		#'
3		#' @description `r lifecycle::badge("experimental")`
4		#'
5		#' Layout-creating function which can be used for creating column-wise summary tables.
6		#' This function sets the analysis methods as column labels and is a wrapper for
7		#' [rtables::analyze_colvars()]. It was designed principally for PK tables.
8		#'
9		#' @inheritParams argument_convention
10		#' @inheritParams rtables::analyze_colvars
11		#' @param row_labels (`character`)\cr as this function works in columns space, usual `.labels`
12		#' character vector applies on the column space. You can change the row labels by defining this
13		#' parameter to a named character vector with names corresponding to the split values. It defaults
14		#' to `NULL` and if it contains only one `string`, it will duplicate that as a row label.
15		#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current
16		#' label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr`
17		#' to define row labels. This behavior is not supported as we never need to overload row labels.
18		#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns.
19		#' This option allows you to add multiple instances of this functions, also in a nested fashion,
20		#' without adding more splits. This split must happen only one time on a single layout.
21		#'
22		#' @return
23		#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
24		#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
25		#' in columns, and add it to the table layout.
26		#'
27		#' @note This is an experimental implementation of [rtables::summarize_row_groups()] and
28		#' [rtables::analyze_colvars()] that may be subjected to changes as `rtables` extends its
29		#' support to more complex analysis pipelines on the column space. For the same reasons,
30		#' we encourage to read the examples carefully and file issues for cases that differ from
31		#' them.
32		#'
33		#' Here `labelstr` behaves differently than usual. If it is not defined (default as `NULL`),
34		#' row labels are assigned automatically to the split values in case of `rtables::analyze_colvars`
35		#' (`do_summarize_row_groups = FALSE`, the default), and to the group label for
36		#' `do_summarize_row_groups = TRUE`.
37		#'
38		#' @seealso [summarize_vars()], [rtables::analyze_colvars()].
39		#'
40		#' @examples
41		#' library(dplyr)
42		#'
43		#' # Data preparation
44		#' adpp <- tern_ex_adpp %>% h_pkparam_sort()
45		#'
46		#' lyt <- basic_table() %>%
47		#' split_rows_by(var = "STRATA1", label_pos = "topleft") %>%
48		#' split_rows_by(
49		#' var = "SEX",
50		#' label_pos = "topleft",
51		#' child_label = "hidden"
52		#' ) %>% # Removes duplicated labels
53		#' analyze_vars_in_cols(vars = "AGE")
54		#' result <- build_table(lyt = lyt, df = adpp)
55		#' result
56		#'
57		#' # By selecting just some statistics and ad-hoc labels
58		#' lyt <- basic_table() %>%
59		#' split_rows_by(var = "ARM", label_pos = "topleft") %>%
60		#' split_rows_by(
61		#' var = "SEX",
62		#' label_pos = "topleft",
63		#' child_labels = "hidden",
64		#' split_fun = drop_split_levels
65		#' ) %>%
66		#' analyze_vars_in_cols(
67		#' vars = "AGE",
68		#' .stats = c("n", "cv", "geom_mean"),
69		#' .labels = c(
70		#' n = "aN",
71		#' cv = "aCV",
72		#' geom_mean = "aGeomMean"
73		#' )
74		#' )
75		#' result <- build_table(lyt = lyt, df = adpp)
76		#' result
77		#'
78		#' # Changing row labels
79		#' lyt <- basic_table() %>%
80		#' analyze_vars_in_cols(
81		#' vars = "AGE",
82		#' row_labels = "some custom label"
83		#' )
84		#' result <- build_table(lyt, df = adpp)
85		#' result
86		#'
87		#' # Pharmacokinetic parameters
88		#' lyt <- basic_table() %>%
89		#' split_rows_by(
90		#' var = "TLG_DISPLAY",
91		#' split_label = "PK Parameter",
92		#' label_pos = "topleft",
93		#' child_label = "hidden"
94		#' ) %>%
95		#' analyze_vars_in_cols(
96		#' vars = "AVAL"
97		#' )
98		#' result <- build_table(lyt, df = adpp)
99		#' result
100		#'
101		#' # Multiple calls (summarize label and analyze underneath)
102		#' lyt <- basic_table() %>%
103		#' split_rows_by(
104		#' var = "TLG_DISPLAY",
105		#' split_label = "PK Parameter",
106		#' label_pos = "topleft"
107		#' ) %>%
108		#' analyze_vars_in_cols(
109		#' vars = "AVAL",
110		#' do_summarize_row_groups = TRUE # does a summarize level
111		#' ) %>%
112		#' split_rows_by("SEX",
113		#' child_label = "hidden",
114		#' label_pos = "topleft"
115		#' ) %>%
116		#' analyze_vars_in_cols(
117		#' vars = "AVAL",
118		#' split_col_vars = FALSE # avoids re-splitting the columns
119		#' )
120		#' result <- build_table(lyt, df = adpp)
121		#' result
122		#'
123		#' @export
124		analyze_vars_in_cols <- function(lyt,
125		vars,
126		...,
127		.stats = c(
128		"n",
129		"mean",
130		"sd",
131		"se",
132		"cv",
133		"geom_cv"
134		),
135		.labels = c(
136		n = "n",
137		mean = "Mean",
138		sd = "SD",
139		se = "SE",
140		cv = "CV (%)",
141		geom_cv = "CV % Geometric Mean"
142		),
143		row_labels = NULL,
144		do_summarize_row_groups = FALSE,
145		split_col_vars = TRUE,
146		.indent_mods = NULL,
147		nested = TRUE,
148		na_level = NULL,
149		.formats = NULL) {
150	6x	checkmate::assert_string(na_level, null.ok = TRUE)
151	6x	checkmate::assert_character(row_labels, null.ok = TRUE)
152	6x	checkmate::assert_int(.indent_mods, null.ok = TRUE)
153	6x	checkmate::assert_flag(nested)
154	6x	checkmate::assert_flag(split_col_vars)
155	6x	checkmate::assert_flag(do_summarize_row_groups)
156
157		# Automatic assignment of formats
158	6x	if (is.null(.formats)) {
159		# General values
160	6x	sf_numeric <- summary_formats("numeric")
161	6x	sf_counts <- summary_formats("counts")[-1]
162	6x	formats_v <- c(sf_numeric, sf_counts)
163		} else {
164	!	formats_v <- .formats
165		}
166
167		# Check for vars in the case that one or more are used
168	6x	if (length(vars) == 1) {
169	5x	vars <- rep(vars, length(.stats))
170	1x	} else if (length(vars) != length(.stats)) {
171	1x	stop(
172	1x	"Analyzed variables (vars) does not have the same ",
173	1x	"number of elements of specified statistics (.stats)."
174		)
175		}
176
177	5x	if (split_col_vars) {
178		# Checking there is not a previous identical column split
179	4x	clyt <- tail(clayout(lyt), 1)[[1]]
180
181	4x	dummy_lyt <- split_cols_by_multivar(
182	4x	lyt = basic_table(),
183	4x	vars = vars,
184	4x	varlabels = .labels[.stats]
185		)
186
187	4x	if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) {
188	!	stop(
189	!	"Column split called again with the same values. ",
190	!	"This can create many unwanted columns. Please consider adding ",
191	!	"split_col_vars = FALSE to the last call of ",
192	!	deparse(sys.calls()[[sys.nframe() - 1]]), "."
193		)
194		}
195
196		# Main col split
197	4x	lyt <- split_cols_by_multivar(
198	4x	lyt = lyt,
199	4x	vars = vars,
200	4x	varlabels = .labels[.stats]
201		)
202		}
203
204	5x	if (do_summarize_row_groups) {
205	2x	if (length(unique(vars)) > 1) {
206	!	stop("When using do_summarize_row_groups only one label level var should be inserted.")
207		}
208
209		# Function list for do_summarize_row_groups. Slightly different handling of labels
210	2x	cfun_list <- Map(
211	2x	function(stat) {
212	12x	function(u, .spl_context, labelstr, ...) {
213		# Statistic
214	24x	res <- s_summary(u, ...)[[stat]]
215
216		# Label check and replacement
217	24x	if (length(row_labels) > 1) {
218	12x	if (!(labelstr %in% names(row_labels))) {
219	!	stop(
220	!	"Replacing the labels in do_summarize_row_groups needs a named vector",
221	!	"that contains the split values. In the current split variable ",
222	!	.spl_context$split[nrow(.spl_context)],
223	!	" the labelstr value (split value by default) ", labelstr, " is not in",
224	!	" row_labels names: ", names(row_labels)
225		)
226		}
227	12x	lbl <- unlist(row_labels[labelstr])
228		} else {
229	12x	lbl <- labelstr
230		}
231
232		# Cell creation
233	24x	rcell(res,
234	24x	label = lbl,
235	24x	format = formats_v[names(formats_v) == stat][[1]],
236	24x	format_na_str = na_level,
237	24x	indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
238		)
239		}
240		},
241	2x	stat = .stats
242		)
243
244		# Main call to rtables
245	2x	summarize_row_groups(
246	2x	lyt = lyt,
247	2x	var = unique(vars),
248	2x	cfun = cfun_list,
249	2x	extra_args = list(...)
250		)
251		} else {
252		# Function list for analyze_colvars
253	3x	afun_list <- Map(
254	3x	function(stat) {
255	15x	function(u, .spl_context, ...) {
256		# Main statistics
257	78x	res <- s_summary(u, ...)[[stat]]
258
259		# Label from context
260	78x	label_from_context <- .spl_context$value[nrow(.spl_context)]
261
262		# Label switcher
263	78x	if (is.null(row_labels)) {
264	18x	lbl <- label_from_context
265		} else {
266	60x	if (length(row_labels) > 1) {
267	48x	if (!(label_from_context %in% names(row_labels))) {
268	!	stop(
269	!	"Replacing the labels in do_summarize_row_groups needs a named vector",
270	!	"that contains the split values. In the current split variable ",
271	!	.spl_context$split[nrow(.spl_context)],
272	!	" the split value ", label_from_context, " is not in",
273	!	" row_labels names: ", names(row_labels)
274		)
275		}
276	48x	lbl <- unlist(row_labels[label_from_context])
277		} else {
278	12x	lbl <- row_labels
279		}
280		}
281
282		# Cell creation
283	78x	rcell(res,
284	78x	label = lbl,
285	78x	format = formats_v[names(formats_v) == stat][[1]],
286	78x	format_na_str = na_level,
287	78x	indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
288		)
289		}
290		},
291	3x	stat = .stats
292		)
293
294		# Main call to rtables
295	3x	analyze_colvars(lyt,
296	3x	afun = afun_list,
297	3x	nested = nested,
298	3x	extra_args = list(...)
299		)
300		}
301		}
302
303		# Help function
304		get_last_col_split <- function(lyt) {
305	!	tail(tail(clayout(lyt), 1)[[1]], 1)[[1]]
306		}

1		#' Cox Regression Helper: Interactions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Test and estimate the effect of a treatment in interaction with a covariate.
6		#' The effect is estimated as the HR of the tested treatment for a given level
7		#' of the covariate, in comparison to the treatment control.
8		#'
9		#' @inheritParams argument_convention
10		#' @param x (`numeric` or `factor`)\cr the values of the covariate to be tested.
11		#' @param effect (`string`)\cr the name of the effect to be tested and estimated.
12		#' @param covar (`string`)\cr the name of the covariate in the model.
13		#' @param mod (`coxph`)\cr the Cox regression model.
14		#' @param label (`string`)\cr the label to be returned as `term_label`.
15		#' @param control (`list`)\cr a list of controls as returned by [control_coxreg()].
16		#' @param ... see methods.
17		#'
18		#' @examples
19		#' library(survival)
20		#'
21		#' set.seed(1, kind = "Mersenne-Twister")
22		#'
23		#' # Testing dataset [survival::bladder].
24		#' dta_bladder <- with(
25		#' data = bladder[bladder$enum < 5, ],
26		#' data.frame(
27		#' time = stop,
28		#' status = event,
29		#' armcd = as.factor(rx),
30		#' covar1 = as.factor(enum),
31		#' covar2 = factor(
32		#' sample(as.factor(enum)),
33		#' levels = 1:4,
34		#' labels = c("F", "F", "M", "M")
35		#' )
36		#' )
37		#' )
38		#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
39		#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
40		#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
41		#'
42		#' plot(
43		#' survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
44		#' lty = 2:4,
45		#' xlab = "Months",
46		#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
47		#' )
48		#'
49		#' @name cox_regression_inter
50		NULL
51
52		#' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect.
53		#'
54		#' @return
55		#' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following
56		#' variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`.
57		#'
58		#' @export
59		h_coxreg_inter_effect <- function(x,
60		effect,
61		covar,
62		mod,
63		label,
64		control,
65		...) {
66	26x	UseMethod("h_coxreg_inter_effect", x)
67		}
68
69		#' @describeIn cox_regression_inter Method for `numeric` class. Estimates the interaction with a `numeric` covariate.
70		#'
71		#' @method h_coxreg_inter_effect numeric
72		#'
73		#' @param at (`list`)\cr a list with items named after the covariate, every
74		#' item is a vector of levels at which the interaction should be estimated.
75		#'
76		#' @export
77		h_coxreg_inter_effect.numeric <- function(x,
78		effect,
79		covar,
80		mod,
81		label,
82		control,
83		at,
84		...) {
85	7x	betas <- stats::coef(mod)
86	7x	attrs <- attr(stats::terms(mod), "term.labels")
87	7x	term_indices <- grep(
88	7x	pattern = effect,
89	7x	x = attrs[!grepl("strata\\(", attrs)]
90		)
91	7x	checkmate::assert_vector(term_indices, len = 2)
92	7x	betas <- betas[term_indices]
93	7x	betas_var <- diag(stats::vcov(mod))[term_indices]
94	7x	betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]]
95	7x	xval <- if (is.null(at[[covar]])) {
96	6x	stats::median(x)
97		} else {
98	1x	at[[covar]]
99		}
100	7x	effect_index <- !grepl(covar, names(betas))
101	7x	coef_hat <- betas[effect_index] + xval * betas[!effect_index]
102	7x	coef_se <- sqrt(
103	7x	betas_var[effect_index] +
104	7x	xval ^ 2 * betas_var[!effect_index] + # styler: off
105	7x	2 * xval * betas_cov
106		)
107	7x	q_norm <- stats::qnorm((1 + control$conf_level) / 2)
108	7x	data.frame(
109	7x	effect = "Covariate:",
110	7x	term = rep(covar, length(xval)),
111	7x	term_label = paste0(" ", xval),
112	7x	level = as.character(xval),
113	7x	n = NA,
114	7x	hr = exp(coef_hat),
115	7x	lcl = exp(coef_hat - q_norm * coef_se),
116	7x	ucl = exp(coef_hat + q_norm * coef_se),
117	7x	pval = NA,
118	7x	pval_inter = NA,
119	7x	stringsAsFactors = FALSE
120		)
121		}
122
123		#' @describeIn cox_regression_inter Method for `factor` class. Estimate the interaction with a `factor` covariate.
124		#'
125		#' @method h_coxreg_inter_effect factor
126		#'
127		#' @param data (`data.frame`)\cr the data frame on which the model was fit.
128		#'
129		#' @export
130		h_coxreg_inter_effect.factor <- function(x,
131		effect,
132		covar,
133		mod,
134		label,
135		control,
136		data,
137		...) {
138	15x	lvl_given <- levels(x)
139	15x	y <- h_coxreg_inter_estimations(
140	15x	variable = effect, given = covar,
141	15x	lvl_var = levels(data[[effect]]),
142	15x	lvl_given = lvl_given,
143	15x	mod = mod,
144	15x	conf_level = 0.95
145	15x	)[[1]]
146
147	15x	data.frame(
148	15x	effect = "Covariate:",
149	15x	term = rep(covar, nrow(y)),
150	15x	term_label = paste0(" ", lvl_given),
151	15x	level = lvl_given,
152	15x	n = NA,
153	15x	hr = y[, "hr"],
154	15x	lcl = y[, "lcl"],
155	15x	ucl = y[, "ucl"],
156	15x	pval = NA,
157	15x	pval_inter = NA,
158	15x	stringsAsFactors = FALSE
159		)
160		}
161
162		#' @describeIn cox_regression_inter Method for `character` class. Estimate the interaction with a `character` covariate.
163		#' This makes an automatic conversion to `factor` and then forwards to the method for factors.
164		#'
165		#' @method h_coxreg_inter_effect character
166		#'
167		#' @note
168		#' * Automatic conversion of character to factor does not guarantee results can be generated correctly. It is
169		#' therefore better to always pre-process the dataset such that factors are manually created from character
170		#' variables before passing the dataset to [rtables::build_table()].
171		#'
172		#' @export
173		h_coxreg_inter_effect.character <- function(x,
174		effect,
175		covar,
176		mod,
177		label,
178		control,
179		data,
180		...) {
181	4x	y <- as.factor(x)
182
183	4x	h_coxreg_inter_effect(
184	4x	x = y,
185	4x	effect = effect,
186	4x	covar = covar,
187	4x	mod = mod,
188	4x	label = label,
189	4x	control = control,
190	4x	data = data,
191		...
192		)
193		}
194
195		#' @describeIn cox_regression_inter A higher level function to get
196		#' the results of the interaction test and the estimated values.
197		#'
198		#' @return
199		#' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If
200		#' no interaction, [h_coxreg_univar_extract()] is applied instead.
201		#'
202		#' @examples
203		#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
204		#' h_coxreg_extract_interaction(
205		#' mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder,
206		#' control = control_coxreg()
207		#' )
208		#'
209		#' @export
210		h_coxreg_extract_interaction <- function(effect,
211		covar,
212		mod,
213		data,
214		at,
215		control) {
216	27x	if (!any(attr(stats::terms(mod), "order") == 2)) {
217	10x	y <- h_coxreg_univar_extract(
218	10x	effect = effect, covar = covar, mod = mod, data = data, control = control
219		)
220	10x	y$pval_inter <- NA
221	10x	y
222		} else {
223	17x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
224
225		# Test the main treatment effect.
226	17x	mod_aov <- muffled_car_anova(mod, test_statistic)
227	17x	sum_anova <- broom::tidy(mod_aov)
228	17x	pval <- sum_anova[sum_anova$term == effect, ][["p.value"]]
229
230		# Test the interaction effect.
231	17x	pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]]
232	17x	covar_test <- data.frame(
233	17x	effect = "Covariate:",
234	17x	term = covar,
235	17x	term_label = unname(labels_or_names(data[covar])),
236	17x	level = "",
237	17x	n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval,
238	17x	pval_inter = pval_inter,
239	17x	stringsAsFactors = FALSE
240		)
241		# Estimate the interaction.
242	17x	y <- h_coxreg_inter_effect(
243	17x	data[[covar]],
244	17x	covar = covar,
245	17x	effect = effect,
246	17x	mod = mod,
247	17x	label = unname(labels_or_names(data[covar])),
248	17x	at = at,
249	17x	control = control,
250	17x	data = data
251		)
252	17x	rbind(covar_test, y)
253		}
254		}
255
256		#' @describeIn cox_regression_inter Hazard ratio estimation in interactions.
257		#'
258		#' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation
259		#' of the levels of `variable` given the levels of `given`.
260		#' @param lvl_var,lvl_given (`character`)\cr corresponding levels has given by [levels()].
261		#' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]).
262		#'
263		#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
264		#' and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex.
265		#' The cox regression estimates the coefficients along with a variance-covariance matrix for:
266		#'
267		#' - b1 (arm b), b2 (arm c)
268		#' - b3 (sex m)
269		#' - b4 (arm b: sex m), b5 (arm c: sex m)
270		#'
271		#' The estimation of the Hazard Ratio for arm C/sex M is given in reference
272		#' to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5).
273		#' The interaction coefficient is deduced by b2 + b5 while the standard error
274		#' is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$.
275		#'
276		#' @return
277		#' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding
278		#' to the combinations of `variable` and `given`, with columns:
279		#' * `coef_hat`: Estimation of the coefficient.
280		#' * `coef_se`: Standard error of the estimation.
281		#' * `hr`: Hazard ratio.
282		#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
283		#'
284		#' @examples
285		#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
286		#' result <- h_coxreg_inter_estimations(
287		#' variable = "armcd", given = "covar1",
288		#' lvl_var = levels(dta_bladder$armcd),
289		#' lvl_given = levels(dta_bladder$covar1),
290		#' mod = mod, conf_level = .95
291		#' )
292		#' result
293		#'
294		#' @export
295		h_coxreg_inter_estimations <- function(variable,
296		given,
297		lvl_var,
298		lvl_given,
299		mod,
300		conf_level = 0.95) {
301	16x	var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
302	16x	giv_lvl <- paste0(given, lvl_given)
303	16x	design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
304	16x	design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
305	16x	design_mat <- within(
306	16x	data = design_mat,
307	16x	expr = {
308	16x	inter <- paste0(variable, ":", given)
309	16x	rev_inter <- paste0(given, ":", variable)
310		}
311		)
312	16x	split_by_variable <- design_mat$variable
313	16x	interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")
314
315	16x	mmat <- stats::model.matrix(mod)[1, ]
316	16x	mmat[!mmat == 0] <- 0
317
318	16x	design_mat <- apply(
319	16x	X = design_mat, MARGIN = 1, FUN = function(x) {
320	46x	mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
321	46x	mmat
322		}
323		)
324	16x	colnames(design_mat) <- interaction_names
325
326	16x	coef <- stats::coef(mod)
327	16x	vcov <- stats::vcov(mod)
328	16x	betas <- as.matrix(coef)
329	16x	coef_hat <- t(design_mat) %*% betas
330	16x	dimnames(coef_hat)[2] <- "coef"
331	16x	coef_se <- apply(
332	16x	design_mat, 2,
333	16x	function(x) {
334	46x	vcov_el <- as.logical(x)
335	46x	y <- vcov[vcov_el, vcov_el]
336	46x	y <- sum(y)
337	46x	y <- sqrt(y)
338	46x	return(y)
339		}
340		)
341	16x	q_norm <- stats::qnorm((1 + conf_level) / 2)
342	16x	y <- cbind(coef_hat, `se(coef)` = coef_se)
343	16x	y <- apply(y, 1, function(x) {
344	46x	x["hr"] <- exp(x["coef"])
345	46x	x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
346	46x	x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
347	46x	x
348		})
349	16x	y <- t(y)
350	16x	y <- by(y, split_by_variable, identity)
351	16x	y <- lapply(y, as.matrix)
352	16x	attr(y, "details") <- paste0(
353	16x	"Estimations of ", variable,
354	16x	" hazard ratio given the level of ", given, " compared to ",
355	16x	variable, " level ", lvl_var[1], "."
356		)
357	16x	y
358		}

1		#' Counting Patients and Events in Columns
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Counting the number of unique patients and the total number of all and specific events
6		#' when a column table layout is required.
7		#'
8		#' @inheritParams argument_convention
9		#'
10		#' @name count_patients_events_in_cols
11		NULL
12
13		#' @describeIn count_patients_events_in_cols Statistics function which counts numbers of patients and multiple
14		#' events defined by filters. Used as analysis function `afun` in `summarize_patients_events_in_cols()`.
15		#'
16		#' @param filters_list (named `list` of `character`)\cr each element in this list describes one
17		#' type of event describe by filters, in the same format as [s_count_patients_with_event()].
18		#' If it has a label, then this will be used for the column title.
19		#' @param empty_stats (`character`)\cr optional names of the statistics that should be returned empty such
20		#' that corresponding table cells will stay blank.
21		#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will
22		#' be used as label.
23		#'
24		#' @return
25		#' * `s_count_patients_and_multiple_events()` returns a list with the statistics:
26		#' - `unique`: number of unique patients in `df`.
27		#' - `all`: number of rows in `df`.
28		#' - one element with the same name as in `filters_list`: number of rows in `df`,
29		#' i.e. events, fulfilling the filter condition.
30		#'
31		#' @examples
32		#' # `s_count_patients_and_multiple_events()`
33		#' df <- data.frame(
34		#' USUBJID = rep(c("id1", "id2", "id3", "id4"), c(2, 3, 1, 1)),
35		#' ARM = c("A", "A", "B", "B", "B", "B", "A"),
36		#' AESER = rep("Y", 7),
37		#' AESDTH = c("Y", "Y", "N", "Y", "Y", "N", "N"),
38		#' AEREL = c("Y", "Y", "N", "Y", "Y", "N", "Y"),
39		#' AEDECOD = c("A", "A", "A", "B", "B", "C", "D"),
40		#' AEBODSYS = rep(c("SOC1", "SOC2", "SOC3"), c(3, 3, 1))
41		#' )
42		#'
43		#' @keywords internal
44		s_count_patients_and_multiple_events <- function(df, # nolint
45		id,
46		filters_list,
47		empty_stats = character(),
48		labelstr = "",
49		custom_label = NULL) {
50	9x	checkmate::assert_list(filters_list, names = "named")
51	9x	checkmate::assert_data_frame(df)
52	9x	checkmate::assert_string(id)
53	9x	checkmate::assert_disjunct(c("unique", "all"), names(filters_list))
54	9x	checkmate::assert_character(empty_stats)
55	9x	checkmate::assert_string(labelstr)
56	9x	checkmate::assert_string(custom_label, null.ok = TRUE)
57
58		# Below we want to count each row in `df` once, therefore introducing this helper index column.
59	9x	df$.row_index <- as.character(seq_len(nrow(df)))
60	9x	y <- list()
61	9x	row_label <- if (labelstr != "") {
62	!	labelstr
63	9x	} else if (!is.null(custom_label)) {
64	2x	custom_label
65		} else {
66	7x	"counts"
67		}
68	9x	y$unique <- formatters::with_label(
69	9x	s_num_patients_content(df = df, .N_col = 1, .var = id, required = NULL)$unique[1L],
70	9x	row_label
71		)
72	9x	y$all <- formatters::with_label(
73	9x	nrow(df),
74	9x	row_label
75		)
76	9x	events <- Map(
77	9x	function(filters) {
78	25x	formatters::with_label(
79	25x	s_count_patients_with_event(df = df, .var = ".row_index", filters = filters, .N_col = 1, .N_row = 1)$count,
80	25x	row_label
81		)
82		},
83	9x	filters = filters_list
84		)
85	9x	y_complete <- c(y, events)
86	9x	y <- if (length(empty_stats) > 0) {
87	3x	y_reduced <- y_complete
88	3x	for (stat in intersect(names(y_complete), empty_stats)) {
89	4x	y_reduced[[stat]] <- formatters::with_label(character(), obj_label(y_reduced[[stat]]))
90		}
91	3x	y_reduced
92		} else {
93	6x	y_complete
94		}
95	9x	y
96		}
97
98		#' @describeIn count_patients_events_in_cols Layout-creating function which can take statistics function
99		#' arguments and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
100		#'
101		#' @param col_split (`flag`)\cr whether the columns should be split.
102		#' Set to `FALSE` when the required column split has been done already earlier in the layout pipe.
103		#'
104		#' @return
105		#' * `summarize_patients_events_in_cols()` returns a layout object suitable for passing to further layouting functions,
106		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
107		#' containing the statistics from `s_count_patients_and_multiple_events()` to the table layout.
108		#' @examples
109		#' # `summarize_patients_events_in_cols()`
110		#' basic_table() %>%
111		#' summarize_patients_events_in_cols(
112		#' filters_list = list(
113		#' related = formatters::with_label(c(AEREL = "Y"), "Events (Related)"),
114		#' fatal = c(AESDTH = "Y"),
115		#' fatal_related = c(AEREL = "Y", AESDTH = "Y")
116		#' ),
117		#' custom_label = "%s Total number of patients and events"
118		#' ) %>%
119		#' build_table(df)
120		#'
121		#' @export
122		summarize_patients_events_in_cols <- function(lyt, # nolint
123		id = "USUBJID",
124		filters_list = list(),
125		...,
126		.stats = c(
127		"unique",
128		"all",
129		names(filters_list)
130		),
131		.labels = c(
132		unique = "Patients (All)",
133		all = "Events (All)",
134		labels_or_names(filters_list)
135		),
136		col_split = TRUE) {
137	2x	afun_list <- Map(
138	2x	function(stat) {
139	7x	make_afun(
140	7x	s_count_patients_and_multiple_events,
141	7x	id = id,
142	7x	filters_list = filters_list,
143	7x	.stats = stat,
144	7x	.formats = "xx."
145		)
146		},
147	2x	stat = .stats
148		)
149	2x	if (col_split) {
150	2x	lyt <- split_cols_by_multivar(
151	2x	lyt = lyt,
152	2x	vars = rep(id, length(.stats)),
153	2x	varlabels = .labels[.stats]
154		)
155		}
156	2x	summarize_row_groups(
157	2x	lyt = lyt,
158	2x	cfun = afun_list,
159	2x	extra_args = list(...)
160		)
161		}

1		#' Tabulate Survival Duration by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Tabulate statistics such as median survival time and hazard ratio for population subgroups.
6		#'
7		#' @inheritParams argument_convention
8		#' @inheritParams survival_coxph_pairwise
9		#' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
10		#' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a list, which
11		#' specifies the new group levels via the names and the levels that belong to it in the character vectors that are
12		#' elements of the list.
13		#' @param label_all (`string`)\cr label for the total population analysis.
14		#' @param time_unit (`string`)\cr label with unit of median survival time. Default `NULL` skips displaying unit.
15		#'
16		#' @details These functions create a layout starting from a data frame which contains
17		#' the required statistics. Tables typically used as part of forest plot.
18		#'
19		#' @seealso [extract_survival_subgroups()]
20		#'
21		#' @examples
22		#' library(dplyr)
23		#' library(forcats)
24		#'
25		#' adtte <- tern_ex_adtte
26		#'
27		#' # Save variable labels before data processing steps.
28		#' adtte_labels <- formatters::var_labels(adtte)
29		#'
30		#' adtte_f <- adtte %>%
31		#' filter(
32		#' PARAMCD == "OS",
33		#' ARM %in% c("B: Placebo", "A: Drug X"),
34		#' SEX %in% c("M", "F")
35		#' ) %>%
36		#' mutate(
37		#' # Reorder levels of ARM to display reference arm before treatment arm.
38		#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
39		#' SEX = droplevels(SEX),
40		#' AVALU = as.character(AVALU),
41		#' is_event = CNSR == 0
42		#' )
43		#' labels <- c(
44		#' "ARM" = adtte_labels[["ARM"]],
45		#' "SEX" = adtte_labels[["SEX"]],
46		#' "AVALU" = adtte_labels[["AVALU"]],
47		#' "is_event" = "Event Flag"
48		#' )
49		#' formatters::var_labels(adtte_f)[names(labels)] <- labels
50		#'
51		#' df <- extract_survival_subgroups(
52		#' variables = list(
53		#' tte = "AVAL",
54		#' is_event = "is_event",
55		#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
56		#' ),
57		#' data = adtte_f
58		#' )
59		#' df
60		#'
61		#' @name survival_duration_subgroups
62		NULL
63
64		#' Prepares Survival Data for Population Subgroups in Data Frames
65		#'
66		#' @description `r lifecycle::badge("stable")`
67		#'
68		#' Prepares estimates of median survival times and treatment hazard ratios for population subgroups in
69		#' data frames. Simple wrapper for [h_survtime_subgroups_df()] and [h_coxph_subgroups_df()]. Result is a `list`
70		#' of two `data.frame`s: `survtime` and `hr`. `variables` corresponds to the names of variables found in `data`,
71		#' passed as a named `list` and requires elements `tte`, `is_event`, `arm` and optionally `subgroups` and `strat`.
72		#' `groups_lists` optionally specifies groupings for `subgroups` variables.
73		#'
74		#' @inheritParams argument_convention
75		#' @inheritParams survival_duration_subgroups
76		#' @inheritParams survival_coxph_pairwise
77		#'
78		#' @return A named `list` of two elements:
79		#' * `survtime`: A `data.frame` containing columns `arm`, `n`, `n_events`, `median`, `subgroup`, `var`,
80		#' `var_label`, and `row_type`.
81		#' * `hr`: A `data.frame` containing columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, `conf_level`,
82		#' `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
83		#'
84		#' @seealso [survival_duration_subgroups]
85		#'
86		#' @examples
87		#' library(dplyr)
88		#' library(forcats)
89		#'
90		#' adtte <- tern_ex_adtte
91		#' adtte_labels <- formatters::var_labels(adtte)
92		#'
93		#' adtte_f <- adtte %>%
94		#' filter(
95		#' PARAMCD == "OS",
96		#' ARM %in% c("B: Placebo", "A: Drug X"),
97		#' SEX %in% c("M", "F")
98		#' ) %>%
99		#' mutate(
100		#' # Reorder levels of ARM to display reference arm before treatment arm.
101		#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
102		#' SEX = droplevels(SEX),
103		#' AVALU = as.character(AVALU),
104		#' is_event = CNSR == 0
105		#' )
106		#' labels <- c(
107		#' "ARM" = adtte_labels[["ARM"]],
108		#' "SEX" = adtte_labels[["SEX"]],
109		#' "AVALU" = adtte_labels[["AVALU"]],
110		#' "is_event" = "Event Flag"
111		#' )
112		#' formatters::var_labels(adtte_f)[names(labels)] <- labels
113		#'
114		#' df <- extract_survival_subgroups(
115		#' variables = list(
116		#' tte = "AVAL",
117		#' is_event = "is_event",
118		#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
119		#' ),
120		#' data = adtte_f
121		#' )
122		#' df
123		#'
124		#' df_grouped <- extract_survival_subgroups(
125		#' variables = list(
126		#' tte = "AVAL",
127		#' is_event = "is_event",
128		#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
129		#' ),
130		#' data = adtte_f,
131		#' groups_lists = list(
132		#' BMRKR2 = list(
133		#' "low" = "LOW",
134		#' "low/medium" = c("LOW", "MEDIUM"),
135		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
136		#' )
137		#' )
138		#' )
139		#' df_grouped
140		#'
141		#' @export
142		extract_survival_subgroups <- function(variables,
143		data,
144		groups_lists = list(),
145		control = control_coxph(),
146		label_all = "All Patients") {
147	8x	df_survtime <- h_survtime_subgroups_df(
148	8x	variables,
149	8x	data,
150	8x	groups_lists = groups_lists,
151	8x	label_all = label_all
152		)
153	8x	df_hr <- h_coxph_subgroups_df(
154	8x	variables,
155	8x	data,
156	8x	groups_lists = groups_lists,
157	8x	control = control,
158	8x	label_all = label_all
159		)
160
161	8x	list(survtime = df_survtime, hr = df_hr)
162		}
163
164		#' @describeIn survival_duration_subgroups Formatted analysis function which is used as
165		#' `afun` in `tabulate_survival_subgroups()`.
166		#'
167		#' @return
168		#' * `a_survival_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
169		#'
170		#' @keywords internal
171		a_survival_subgroups <- function(.formats = list(
172		n = "xx",
173		n_events = "xx",
174		n_tot_events = "xx",
175		median = "xx.x",
176		n_tot = "xx",
177		hr = list(format_extreme_values(2L)),
178		ci = list(format_extreme_values_ci(2L)),
179		pval = "x.xxxx \| (<0.0001)"
180		)) {
181	12x	checkmate::assert_list(.formats)
182	12x	checkmate::assert_subset(
183	12x	names(.formats),
184	12x	c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
185		)
186
187	12x	afun_lst <- Map(
188	12x	function(stat, fmt) {
189	90x	if (stat == "ci") {
190	11x	function(df, labelstr = "", ...) {
191	20x	in_rows(
192	20x	.list = combine_vectors(df$lcl, df$ucl),
193	20x	.labels = as.character(df$subgroup),
194	20x	.formats = fmt
195		)
196		}
197		} else {
198	79x	function(df, labelstr = "", ...) {
199	111x	in_rows(
200	111x	.list = as.list(df[[stat]]),
201	111x	.labels = as.character(df$subgroup),
202	111x	.formats = fmt
203		)
204		}
205		}
206		},
207	12x	stat = names(.formats),
208	12x	fmt = .formats
209		)
210
211	12x	afun_lst
212		}
213
214		#' @describeIn survival_duration_subgroups Table-creating function which creates a table
215		#' summarizing survival by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
216		#' and [rtables::summarize_row_groups()].
217		#'
218		#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
219		#' created using [extract_survival_subgroups()].
220		#' @param vars (`character`)\cr the name of statistics to be reported among:
221		#' * `n_tot_events`: Total number of events per group.
222		#' * `n_events`: Number of events per group.
223		#' * `n_tot`: Total number of observations per group.
224		#' * `n`: Number of observations per group.
225		#' * `median`: Median survival time.
226		#' * `hr`: Hazard ratio.
227		#' * `ci`: Confidence interval of hazard ratio.
228		#' * `pval`: p-value of the effect.
229		#' Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci`
230		#' are required.
231		#'
232		#' @return An `rtables` table summarizing survival by subgroup.
233		#'
234		#' @examples
235		#' ## Table with default columns.
236		#' basic_table() %>%
237		#' tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
238		#'
239		#' ## Table with a manually chosen set of columns: adding "pval".
240		#' basic_table() %>%
241		#' tabulate_survival_subgroups(
242		#' df = df,
243		#' vars = c("n_tot_events", "n_events", "median", "hr", "ci", "pval"),
244		#' time_unit = adtte_f$AVALU[1]
245		#' )
246		#'
247		#' @export
248		tabulate_survival_subgroups <- function(lyt,
249		df,
250		vars = c("n_tot_events", "n_events", "median", "hr", "ci"),
251		time_unit = NULL) {
252	5x	conf_level <- df$hr$conf_level[1]
253	5x	method <- df$hr$pval_label[1]
254
255	5x	afun_lst <- a_survival_subgroups()
256	5x	colvars <- d_survival_subgroups_colvars(
257	5x	vars,
258	5x	conf_level = conf_level,
259	5x	method = method,
260	5x	time_unit = time_unit
261		)
262
263	5x	colvars_survtime <- list(
264	5x	vars = colvars$vars[names(colvars$labels) %in% c("n", "n_events", "median")],
265	5x	labels = colvars$labels[names(colvars$labels) %in% c("n", "n_events", "median")]
266		)
267	5x	colvars_hr <- list(
268	5x	vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")],
269	5x	labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")]
270		)
271
272		# Columns from table_survtime are optional.
273	5x	if (length(colvars_survtime$vars) > 0) {
274	4x	lyt_survtime <- split_cols_by(lyt = lyt, var = "arm")
275	4x	lyt_survtime <- split_rows_by(
276	4x	lyt = lyt_survtime,
277	4x	var = "row_type",
278	4x	split_fun = keep_split_levels("content"),
279	4x	nested = FALSE
280		)
281	4x	lyt_survtime <- summarize_row_groups(
282	4x	lyt = lyt_survtime,
283	4x	var = "var_label",
284	4x	cfun = afun_lst[names(colvars_survtime$labels)]
285		)
286	4x	lyt_survtime <- split_cols_by_multivar(
287	4x	lyt = lyt_survtime,
288	4x	vars = colvars_survtime$vars,
289	4x	varlabels = colvars_survtime$labels
290		)
291
292	4x	if ("analysis" %in% df$survtime$row_type) {
293	3x	lyt_survtime <- split_rows_by(
294	3x	lyt = lyt_survtime,
295	3x	var = "row_type",
296	3x	split_fun = keep_split_levels("analysis"),
297	3x	nested = FALSE,
298	3x	child_labels = "hidden"
299		)
300	3x	lyt_survtime <- split_rows_by(lyt = lyt_survtime, var = "var_label", nested = TRUE)
301	3x	lyt_survtime <- analyze_colvars(
302	3x	lyt = lyt_survtime,
303	3x	afun = afun_lst[names(colvars_survtime$labels)],
304	3x	inclNAs = TRUE
305		)
306		}
307
308	4x	table_survtime <- build_table(lyt_survtime, df = df$survtime)
309		} else {
310	1x	table_survtime <- NULL
311		}
312
313		# Columns "n_tot_events" or "n_tot", and "hr", "ci" in table_hr are required.
314	5x	lyt_hr <- split_cols_by(lyt = lyt, var = "arm")
315	5x	lyt_hr <- split_rows_by(
316	5x	lyt = lyt_hr,
317	5x	var = "row_type",
318	5x	split_fun = keep_split_levels("content"),
319	5x	nested = FALSE
320		)
321	5x	lyt_hr <- summarize_row_groups(
322	5x	lyt = lyt_hr,
323	5x	var = "var_label",
324	5x	cfun = afun_lst[names(colvars_hr$labels)]
325		)
326	5x	lyt_hr <- split_cols_by_multivar(
327	5x	lyt = lyt_hr,
328	5x	vars = colvars_hr$vars,
329	5x	varlabels = colvars_hr$labels
330		) %>%
331	5x	append_topleft("Baseline Risk Factors")
332
333	5x	if ("analysis" %in% df$survtime$row_type) {
334	4x	lyt_hr <- split_rows_by(
335	4x	lyt = lyt_hr,
336	4x	var = "row_type",
337	4x	split_fun = keep_split_levels("analysis"),
338	4x	nested = FALSE,
339	4x	child_labels = "hidden"
340		)
341	4x	lyt_hr <- split_rows_by(lyt = lyt_hr, var = "var_label", nested = TRUE)
342	4x	lyt_hr <- analyze_colvars(
343	4x	lyt = lyt_hr,
344	4x	afun = afun_lst[names(colvars_hr$labels)],
345	4x	inclNAs = TRUE
346		)
347		}
348	5x	table_hr <- build_table(lyt_hr, df = df$hr)
349
350		# There can be one or two vars starting with "n_tot".
351	5x	n_tot_ids <- grep("^n_tot", colvars_hr$vars)
352	5x	if (is.null(table_survtime)) {
353	1x	result <- table_hr
354	1x	hr_id <- match("hr", colvars_hr$vars)
355	1x	ci_id <- match("lcl", colvars_hr$vars)
356		} else {
357		# Reorder the table.
358	4x	result <- cbind_rtables(table_hr[, n_tot_ids], table_survtime, table_hr[, -n_tot_ids])
359		# And then calculate column indices accordingly.
360	4x	hr_id <- length(n_tot_ids) + ncol(table_survtime) + match("hr", colvars_hr$vars[-n_tot_ids])
361	4x	ci_id <- length(n_tot_ids) + ncol(table_survtime) + match("lcl", colvars_hr$vars[-n_tot_ids])
362	4x	n_tot_ids <- seq_along(n_tot_ids)
363		}
364
365	5x	structure(
366	5x	result,
367	5x	forest_header = paste0(rev(levels(df$survtime$arm)), "\nBetter"),
368	5x	col_x = hr_id,
369	5x	col_ci = ci_id,
370		# Take the first one for scaling the symbol sizes in graph.
371	5x	col_symbol_size = n_tot_ids[1]
372		)
373		}
374
375		#' Labels for Column Variables in Survival Duration by Subgroup Table
376		#'
377		#' @description `r lifecycle::badge("stable")`
378		#'
379		#' Internal function to check variables included in [tabulate_survival_subgroups()] and create column labels.
380		#'
381		#' @inheritParams tabulate_survival_subgroups
382		#' @inheritParams argument_convention
383		#' @param method (`character`)\cr p-value method for testing hazard ratio = 1.
384		#'
385		#' @return A `list` of variables and their labels to tabulate.
386		#'
387		#' @note At least one of `n_tot` and `n_tot_events` must be provided in `vars`.
388		#'
389		#' @export
390		d_survival_subgroups_colvars <- function(vars,
391		conf_level,
392		method,
393		time_unit = NULL) {
394	12x	checkmate::assert_character(vars)
395	12x	checkmate::assert_string(time_unit, null.ok = TRUE)
396	12x	checkmate::assert_subset(c("hr", "ci"), vars)
397	12x	checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars))
398	12x	checkmate::assert_subset(
399	12x	vars,
400	12x	c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
401		)
402
403	12x	propcase_time_label <- if (!is.null(time_unit)) {
404	11x	paste0("Median (", time_unit, ")")
405		} else {
406	1x	"Median"
407		}
408
409	12x	varlabels <- c(
410	12x	n = "n",
411	12x	n_events = "Events",
412	12x	median = propcase_time_label,
413	12x	n_tot = "Total n",
414	12x	n_tot_events = "Total Events",
415	12x	hr = "Hazard Ratio",
416	12x	ci = paste0(100 * conf_level, "% Wald CI"),
417	12x	pval = method
418		)
419
420	12x	colvars <- vars
421
422		# The `lcl` variable is just a placeholder available in the analysis data,
423		# it is not acutally used in the tabulation.
424		# Variables used in the tabulation are lcl and ucl, see `a_survival_subgroups` for details.
425	12x	colvars[colvars == "ci"] <- "lcl"
426
427	12x	list(
428	12x	vars = colvars,
429	12x	labels = varlabels[vars]
430		)
431		}

1		#' Patient Counts for Laboratory Events (Worsen From Baseline) by Highest Grade Post-Baseline
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Patient count and fraction for laboratory events (worsen from baseline) shift table.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()]
10		#'
11		#' @name abnormal_by_worst_grade_worsen
12		NULL
13
14		#' Helper Function to Prepare `ADLB` with Worst Labs
15		#'
16		#' @description `r lifecycle::badge("stable")`
17		#'
18		#' Helper function to prepare a `df` for generate the patient count shift table
19		#'
20		#' @param adlb (`data.frame`)\cr `ADLB` dataframe
21		#' @param worst_flag_low (named `vector`)\cr Worst low post-baseline lab grade flag variable
22		#' @param worst_flag_high (named `vector`)\cr Worst high post-baseline lab grade flag variable
23		#' @param direction_var (`string`)\cr Direction variable specifying the direction of the shift table of interest.
24		#' Only lab records flagged by `L`, `H` or `B` are included in the shift table.
25		#' * `L`: low direction only
26		#' * `H`: high direction only
27		#' * `B`: both low and high directions
28		#'
29		#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the
30		#' worst labs specified according to `worst_flag_low` or `worst_flag_high` for the
31		#' direction specified according to `direction_var`. For instance, for a lab that is
32		#' needed for the low direction only, only records flagged by `worst_flag_low` are
33		#' selected. For a lab that is needed for both low and high directions, the worst
34		#' low records are selected for the low direction, and the worst high record are selected
35		#' for the high direction.
36		#'
37		#' @seealso [abnormal_by_worst_grade_worsen]
38		#'
39		#' @examples
40		#' library(dplyr)
41		#'
42		#' # The direction variable, GRADDR, is based on metadata
43		#' adlb <- tern_ex_adlb %>%
44		#' mutate(
45		#' GRADDR = case_when(
46		#' PARAMCD == "ALT" ~ "B",
47		#' PARAMCD == "CRP" ~ "L",
48		#' PARAMCD == "IGA" ~ "H"
49		#' )
50		#' ) %>%
51		#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
52		#'
53		#' df <- h_adlb_worsen(
54		#' adlb,
55		#' worst_flag_low = c("WGRLOFL" = "Y"),
56		#' worst_flag_high = c("WGRHIFL" = "Y"),
57		#' direction_var = "GRADDR"
58		#' )
59		#'
60		#' @export
61		h_adlb_worsen <- function(adlb,
62		worst_flag_low = NULL,
63		worst_flag_high = NULL,
64		direction_var) {
65	5x	checkmate::assert_string(direction_var)
66	5x	checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H"))
67	5x	assert_df_with_variables(adlb, list("Col" = direction_var))
68
69	5x	if (any(unique(adlb[[direction_var]]) == "H")) {
70	4x	assert_df_with_variables(adlb, list("High" = names(worst_flag_high)))
71		}
72
73	5x	if (any(unique(adlb[[direction_var]]) == "L")) {
74	4x	assert_df_with_variables(adlb, list("Low" = names(worst_flag_low)))
75		}
76
77	5x	if (any(unique(adlb[[direction_var]]) == "B")) {
78	3x	assert_df_with_variables(
79	3x	adlb,
80	3x	list(
81	3x	"Low" = names(worst_flag_low),
82	3x	"High" = names(worst_flag_high)
83		)
84		)
85		}
86
87		# extract patients with worst post-baseline lab, either low or high or both
88	5x	worst_flag <- c(worst_flag_low, worst_flag_high)
89	5x	col_names <- names(worst_flag)
90	5x	filter_values <- worst_flag
91	5x	temp <- Map(
92	5x	function(x, y) which(adlb[[x]] == y),
93	5x	col_names,
94	5x	filter_values
95		)
96	5x	position_satisfy_filters <- Reduce(union, temp)
97
98		# select variables of interest
99	5x	adlb_f <- adlb[position_satisfy_filters, ]
100
101		# generate subsets for different directionality
102	5x	adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ]
103	5x	adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ]
104	5x	adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ]
105
106		# for labs requiring both high and low, data is duplicated and will be stacked on top of each other
107	5x	adlb_f_b_h <- adlb_f_b
108	5x	adlb_f_b_l <- adlb_f_b
109
110		# extract data with worst lab
111	5x	if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) {
112		# change H to High, L to Low
113	3x	adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
114	3x	adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
115
116		# change, B to High and Low
117	3x	adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
118	3x	adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))
119
120	3x	adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
121	3x	adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
122	3x	adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
123	3x	adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]
124
125	3x	out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l)
126	2x	} else if (!is.null(worst_flag_high)) {
127	1x	adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
128	1x	adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
129
130	1x	adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
131	1x	adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
132
133	1x	out <- rbind(adlb_out_h, adlb_out_b_h)
134	1x	} else if (!is.null(worst_flag_low)) {
135	1x	adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
136	1x	adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))
137
138	1x	adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
139	1x	adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]
140
141	1x	out <- rbind(adlb_out_l, adlb_out_b_l)
142		}
143
144		# label
145	5x	formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE)
146		# NA
147	5x	out
148		}
149
150		#' Helper Function to Analyze Patients for [s_count_abnormal_lab_worsen_by_baseline()]
151		#'
152		#' @description `r lifecycle::badge("stable")`
153		#'
154		#' Helper function to count the number of patients and the fraction of patients according to
155		#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`,
156		#' and the direction of interest specified in `direction_var`.
157		#'
158		#' @inheritParams argument_convention
159		#' @inheritParams h_adlb_worsen
160		#' @param baseline_var (`string`)\cr baseline lab grade variable
161		#'
162		#' @return `h_worsen_counter()` returns the counts and fraction of patients
163		#' whose worst post-baseline lab grades are worse than their baseline grades, for
164		#' post-baseline worst grades "1", "2", "3", "4" and "Any".
165		#'
166		#' @seealso [abnormal_by_worst_grade_worsen]
167		#'
168		#' @examples
169		#' library(dplyr)
170		#'
171		#' # The direction variable, GRADDR, is based on metadata
172		#' adlb <- tern_ex_adlb %>%
173		#' mutate(
174		#' GRADDR = case_when(
175		#' PARAMCD == "ALT" ~ "B",
176		#' PARAMCD == "CRP" ~ "L",
177		#' PARAMCD == "IGA" ~ "H"
178		#' )
179		#' ) %>%
180		#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
181		#'
182		#' df <- h_adlb_worsen(
183		#' adlb,
184		#' worst_flag_low = c("WGRLOFL" = "Y"),
185		#' worst_flag_high = c("WGRHIFL" = "Y"),
186		#' direction_var = "GRADDR"
187		#' )
188		#'
189		#' # `h_worsen_counter`
190		#' h_worsen_counter(
191		#' df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"),
192		#' id = "USUBJID",
193		#' .var = "ATOXGR",
194		#' baseline_var = "BTOXGR",
195		#' direction_var = "GRADDR"
196		#' )
197		#'
198		#' @export
199		h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) {
200	17x	checkmate::assert_string(id)
201	17x	checkmate::assert_string(.var)
202	17x	checkmate::assert_string(baseline_var)
203	17x	checkmate::assert_scalar(unique(df[[direction_var]]))
204	17x	checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low"))
205	17x	assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var)))
206
207		# remove post-baseline missing
208	17x	df <- df[df[[.var]] != "<Missing>", ]
209
210		# obtain directionality
211	17x	direction <- unique(df[[direction_var]])
212
213	17x	if (direction == "Low") {
214	10x	grade <- -1:-4
215	10x	worst_grade <- -4
216	7x	} else if (direction == "High") {
217	7x	grade <- 1:4
218	7x	worst_grade <- 4
219		}
220
221	17x	if (nrow(df) > 0) {
222	17x	by_grade <- lapply(grade, function(i) {
223		# filter baseline values that is less than i or <Missing>
224	68x	df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ]
225		# num: number of patients with post-baseline worst lab equal to i
226	68x	num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE]))
227		# denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction
228	68x	denom <- length(unique(df_temp[[id]]))
229	68x	rm(df_temp)
230	68x	c(num = num, denom = denom)
231		})
232		} else {
233	!	by_grade <- lapply(1, function(i) {
234	!	c(num = 0, denom = 0)
235		})
236		}
237
238	17x	names(by_grade) <- as.character(seq_along(by_grade))
239
240		# baseline grade less 4 or missing
241	17x	df_temp <- df[!df[[baseline_var]] %in% worst_grade, ]
242
243		# denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction
244	17x	denom <- length(unique(df_temp[, id, drop = TRUE]))
245
246		# condition 1: missing baseline and in the direction of abnormality
247	17x	con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade)
248	17x	df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ]
249
250		# condition 2: if post-baseline values are present then post-baseline values must be worse than baseline
251	17x	if (direction == "Low") {
252	10x	con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]])))
253		} else {
254	7x	con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]])))
255		}
256
257		# number of patients satisfy either conditions 1 or 2
258	17x	num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE]))
259
260	17x	list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom))))
261		}
262
263		#' @describeIn abnormal_by_worst_grade_worsen Statistics function for patients whose worst post-baseline
264		#' lab grades are worse than their baseline grades.
265		#'
266		#' @param variables (named `list` of `string`)\cr list of additional analysis variables including:
267		#' * `id` (`string`)\cr subject variable name.
268		#' * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable.
269		#' * `direction_var` (`string`)\cr see `direction_var` for more details.
270		#'
271		#' @return
272		#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst
273		#' post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades
274		#' "1", "2", "3", "4" and "Any".
275		#'
276		#' @examples
277		#' library(dplyr)
278		#'
279		#' # The direction variable, GRADDR, is based on metadata
280		#' adlb <- tern_ex_adlb %>%
281		#' mutate(
282		#' GRADDR = case_when(
283		#' PARAMCD == "ALT" ~ "B",
284		#' PARAMCD == "CRP" ~ "L",
285		#' PARAMCD == "IGA" ~ "H"
286		#' )
287		#' ) %>%
288		#' filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
289		#'
290		#' df <- h_adlb_worsen(
291		#' adlb,
292		#' worst_flag_low = c("WGRLOFL" = "Y"),
293		#' worst_flag_high = c("WGRHIFL" = "Y"),
294		#' direction_var = "GRADDR"
295		#' )
296		#'
297		#' @keywords internal
298		s_count_abnormal_lab_worsen_by_baseline <- function(df, # nolint
299		.var = "ATOXGR",
300		variables = list(
301		id = "USUBJID",
302		baseline_var = "BTOXGR",
303		direction_var = "GRADDR"
304		)) {
305	1x	checkmate::assert_string(.var)
306	1x	checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var"))
307	1x	checkmate::assert_string(variables$id)
308	1x	checkmate::assert_string(variables$baseline_var)
309	1x	checkmate::assert_string(variables$direction_var)
310	1x	assert_df_with_variables(df, c(aval = .var, variables[1:3]))
311	1x	assert_list_of_variables(variables)
312
313	1x	h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var)
314		}
315
316
317		#' @describeIn abnormal_by_worst_grade_worsen Formatted analysis function which is used as `afun`
318		#' in `count_abnormal_lab_worsen_by_baseline()`.
319		#'
320		#' @return
321		#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with
322		#' formatted [rtables::CellValue()].
323		#'
324		#' @keywords internal
325		a_count_abnormal_lab_worsen_by_baseline <- make_afun( # nolint
326		s_count_abnormal_lab_worsen_by_baseline,
327		.formats = c(fraction = format_fraction),
328		.ungroup_stats = "fraction"
329		)
330
331		#' @describeIn abnormal_by_worst_grade_worsen Layout-creating function which can take statistics function
332		#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
333		#'
334		#' @return
335		#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting
336		#' functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted
337		#' rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout.
338		#'
339		#' @examples
340		#' basic_table() %>%
341		#' split_cols_by("ARMCD") %>%
342		#' add_colcounts() %>%
343		#' split_rows_by("PARAMCD") %>%
344		#' split_rows_by("GRADDR") %>%
345		#' count_abnormal_lab_worsen_by_baseline(
346		#' var = "ATOXGR",
347		#' variables = list(
348		#' id = "USUBJID",
349		#' baseline_var = "BTOXGR",
350		#' direction_var = "GRADDR"
351		#' )
352		#' ) %>%
353		#' append_topleft("Direction of Abnormality") %>%
354		#' build_table(df = df, alt_counts_df = tern_ex_adsl)
355		#'
356		#' @export
357		count_abnormal_lab_worsen_by_baseline <- function(lyt, # nolint
358		var,
359		...,
360		table_names = NULL,
361		.stats = NULL,
362		.formats = NULL,
363		.labels = NULL,
364		.indent_mods = NULL) {
365	1x	checkmate::assert_string(var)
366
367	1x	afun <- make_afun(
368	1x	a_count_abnormal_lab_worsen_by_baseline,
369	1x	.stats = .stats,
370	1x	.formats = .formats,
371	1x	.labels = .labels,
372	1x	.indent_mods = .indent_mods
373		)
374
375	1x	lyt <- analyze(
376	1x	lyt = lyt,
377	1x	vars = var,
378	1x	afun = afun,
379	1x	extra_args = list(...),
380	1x	show_labels = "hidden"
381		)
382
383	1x	lyt
384		}

1		#' Formatting Functions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' See below for the list of formatting functions created in `tern` to work with `rtables`.
6		#'
7		#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional
8		#' custom formats can be created via the [`formatters::sprintf_format()`] function.
9		#'
10		#' @family formatting functions
11		#' @name formatting_functions
12		NULL
13
14		#' Formatting Fraction and Percentage
15		#'
16		#' @description `r lifecycle::badge("stable")`
17		#'
18		#' Formats a fraction together with ratio in percent.
19		#'
20		#' @param x (`integer`)\cr with elements `num` and `denom`.
21		#' @param ... required for `rtables` interface.
22		#'
23		#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
24		#'
25		#' @examples
26		#' format_fraction(x = c(num = 2L, denom = 3L))
27		#' format_fraction(x = c(num = 0L, denom = 3L))
28		#'
29		#' @family formatting functions
30		#' @export
31		format_fraction <- function(x, ...) {
32	4x	attr(x, "label") <- NULL
33
34	4x	checkmate::assert_vector(x)
35	4x	checkmate::assert_count(x["num"])
36	2x	checkmate::assert_count(x["denom"])
37
38	2x	result <- if (x["num"] == 0) {
39	1x	paste0(x["num"], "/", x["denom"])
40		} else {
41	1x	paste0(
42	1x	x["num"], "/", x["denom"],
43	1x	" (", round(x["num"] / x["denom"] * 100, 1), "%)"
44		)
45		}
46
47	2x	return(result)
48		}
49
50		#' Formatting Fraction and Percentage with Fixed Single Decimal Place
51		#'
52		#' @description `r lifecycle::badge("stable")`
53		#'
54		#' Formats a fraction together with ratio in percent with fixed single decimal place.
55		#' Includes trailing zero in case of whole number percentages to always keep one decimal place.
56		#'
57		#' @param x (`integer`)\cr with elements `num` and `denom`.
58		#' @param ... required for `rtables` interface.
59		#'
60		#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
61		#'
62		#' @examples
63		#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L))
64		#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L))
65		#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L))
66		#'
67		#' @family formatting functions
68		#' @export
69		format_fraction_fixed_dp <- function(x, ...) {
70	3x	attr(x, "label") <- NULL
71	3x	checkmate::assert_vector(x)
72	3x	checkmate::assert_count(x["num"])
73	3x	checkmate::assert_count(x["denom"])
74
75	3x	result <- if (x["num"] == 0) {
76	1x	paste0(x["num"], "/", x["denom"])
77		} else {
78	2x	paste0(
79	2x	x["num"], "/", x["denom"],
80	2x	" (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)"
81		)
82		}
83	3x	return(result)
84		}
85
86		#' Formatting Count and Fraction
87		#'
88		#' @description `r lifecycle::badge("stable")`
89		#'
90		#' Formats a count together with fraction with special consideration when count is `0`.
91		#'
92		#' @param x (`integer`)\cr vector of length 2, count and fraction.
93		#' @param ... required for `rtables` interface.
94		#'
95		#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
96		#'
97		#' @examples
98		#' format_count_fraction(x = c(2, 0.6667))
99		#' format_count_fraction(x = c(0, 0))
100		#'
101		#' @family formatting functions
102		#' @export
103		format_count_fraction <- function(x, ...) {
104	3x	attr(x, "label") <- NULL
105
106	3x	if (any(is.na(x))) {
107	1x	return("NA")
108		}
109
110	2x	checkmate::assert_vector(x)
111	2x	checkmate::assert_integerish(x[1])
112	2x	assert_proportion_value(x[2], include_boundaries = TRUE)
113
114	2x	result <- if (x[1] == 0) {
115	1x	"0"
116		} else {
117	1x	paste0(x[1], " (", round(x[2] * 100, 1), "%)")
118		}
119
120	2x	return(result)
121		}
122
123		#' Formatting Count and Percentage with Fixed Single Decimal Place
124		#'
125		#' @description `r lifecycle::badge("experimental")`
126		#'
127		#' Formats a count together with fraction with special consideration when count is `0`.
128		#'
129		#' @param x (`integer`)\cr vector of length 2, count and fraction.
130		#' @param ... required for `rtables` interface.
131		#'
132		#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
133		#'
134		#' @examples
135		#' format_count_fraction_fixed_dp(x = c(2, 0.6667))
136		#' format_count_fraction_fixed_dp(x = c(2, 0.5))
137		#' format_count_fraction_fixed_dp(x = c(0, 0))
138		#'
139		#' @family formatting functions
140		#' @export
141		format_count_fraction_fixed_dp <- function(x, ...) {
142	3x	attr(x, "label") <- NULL
143
144	3x	if (any(is.na(x))) {
145	!	return("NA")
146		}
147
148	3x	checkmate::assert_vector(x)
149	3x	checkmate::assert_integerish(x[1])
150	3x	assert_proportion_value(x[2], include_boundaries = TRUE)
151
152	3x	result <- if (x[1] == 0) {
153	1x	"0"
154	3x	} else if (x[2] == 1) {
155	!	sprintf("%d (100%%)", x[1])
156		} else {
157	2x	sprintf("%d (%.1f%%)", x[1], x[2] * 100)
158		}
159
160	3x	return(result)
161		}
162
163		#' Formatting: XX as Formatting Function
164		#'
165		#' Translate a string where x and dots are interpreted as number place
166		#' holders, and others as formatting elements.
167		#'
168		#' @param str (`string`)\cr template.
169		#'
170		#' @return An `rtables` formatting function.
171		#'
172		#' @examples
173		#' test <- list(c(1.658, 0.5761), c(1e1, 785.6))
174		#'
175		#' z <- format_xx("xx (xx.x)")
176		#' sapply(test, z)
177		#'
178		#' z <- format_xx("xx.x - xx.x")
179		#' sapply(test, z)
180		#'
181		#' z <- format_xx("xx.x, incl. xx.x% NE")
182		#' sapply(test, z)
183		#'
184		#' @family formatting functions
185		#' @export
186		format_xx <- function(str) {
187		# Find position in the string.
188	1x	positions <- gregexpr(pattern = "x+\\.x+\|x+", text = str, perl = TRUE)
189	1x	x_positions <- regmatches(x = str, m = positions)[[1]]
190
191		# Roundings depends on the number of x behind [.].
192	1x	roundings <- lapply(
193	1x	X = x_positions,
194	1x	function(x) {
195	2x	y <- strsplit(split = "\\.", x = x)[[1]]
196	2x	rounding <- function(x) {
197	4x	round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0))
198		}
199	2x	return(rounding)
200		}
201		)
202
203	1x	rtable_format <- function(x, output) {
204	2x	values <- Map(y = x, fun = roundings, function(y, fun) fun(y))
205	2x	regmatches(x = str, m = positions)[[1]] <- values
206	2x	return(str)
207		}
208
209	1x	return(rtable_format)
210		}
211
212		#' Formatting Fraction with Lower Threshold
213		#'
214		#' @description `r lifecycle::badge("stable")`
215		#'
216		#' Formats a fraction when the second element of the input `x` is the fraction. It applies
217		#' a lower threshold, below which it is just stated that the fraction is smaller than that.
218		#'
219		#' @param threshold (`proportion`)\cr lower threshold.
220		#'
221		#' @return An `rtables` formatting function that takes numeric input `x` where the second
222		#' element is the fraction that is formatted. If the fraction is above or equal to the threshold,
223		#' then it is displayed in percentage. If it is positive but below the threshold, it returns,
224		#' e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned.
225		#'
226		#' @examples
227		#' format_fun <- format_fraction_threshold(0.05)
228		#' format_fun(x = c(20, 0.1))
229		#' format_fun(x = c(2, 0.01))
230		#' format_fun(x = c(0, 0))
231		#'
232		#' @family formatting functions
233		#' @export
234		format_fraction_threshold <- function(threshold) {
235	1x	assert_proportion_value(threshold)
236	1x	string_below_threshold <- paste0("<", round(threshold * 100))
237	1x	function(x, ...) {
238	3x	assert_proportion_value(x[2], include_boundaries = TRUE)
239	3x	ifelse(
240	3x	x[2] > 0.01,
241	3x	round(x[2] * 100),
242	3x	ifelse(
243	3x	x[2] == 0,
244	3x	"0",
245	3x	string_below_threshold
246		)
247		)
248		}
249		}
250
251		#' Formatting Extreme Values
252		#'
253		#' @description `r lifecycle::badge("stable")`
254		#'
255		#' `rtables` formatting functions that handle extreme values.
256		#'
257		#' @param digits (`integer`)\cr number of decimal places to display.
258		#'
259		#' @details For each input, apply a format to the specified number of `digits`. If the value is
260		#' below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is
261		#' above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2.
262		#' If it is zero, then returns "0.00".
263		#'
264		#' @family formatting functions
265		#' @name extreme_format
266		NULL
267
268		#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings
269		#' used in Formatting Functions. Returns a list with elements `threshold` and `format_string`.
270		#'
271		#' @return
272		#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds,
273		#' and `format_string`, with thresholds formatted as strings.
274		#'
275		#' @examples
276		#' h_get_format_threshold(2L)
277		#'
278		#' @export
279		h_get_format_threshold <- function(digits = 2L) {
280	1022x	checkmate::assert_integerish(digits)
281
282	1022x	low_threshold <- 1 / (10 ^ digits) # styler: off
283	1022x	high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off
284
285	1022x	string_below_threshold <- paste0("<", low_threshold)
286	1022x	string_above_threshold <- paste0(">", high_threshold)
287
288	1022x	list(
289	1022x	"threshold" = c(low = low_threshold, high = high_threshold),
290	1022x	"format_string" = c(low = string_below_threshold, high = string_above_threshold)
291		)
292		}
293
294		#' @describeIn extreme_format Internal helper function to apply a threshold format to a value.
295		#' Creates a formatted string to be used in Formatting Functions.
296		#'
297		#' @param x (`number`)\cr value to format.
298		#'
299		#' @return
300		#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation
301		#' of the given value to the digit threshold, as a formatted string.
302		#'
303		#' @examples
304		#' h_format_threshold(0.001)
305		#' h_format_threshold(1000)
306		#'
307		#' @export
308		h_format_threshold <- function(x, digits = 2L) {
309	1025x	if (is.na(x)) {
310	4x	return(x)
311		}
312
313	1021x	checkmate::assert_numeric(x, lower = 0)
314
315	1021x	l_fmt <- h_get_format_threshold(digits)
316
317	1021x	result <- if (x < l_fmt$threshold["low"] && 0 < x) {
318	25x	l_fmt$format_string["low"]
319	1021x	} else if (x > l_fmt$threshold["high"]) {
320	72x	l_fmt$format_string["high"]
321		} else {
322	924x	sprintf(fmt = paste0("%.", digits, "f"), x)
323		}
324
325	1021x	unname(result)
326		}
327
328		#' Formatting a Single Extreme Value
329		#'
330		#' @description `r lifecycle::badge("stable")`
331		#'
332		#' Create Formatting Function for a single extreme value.
333		#'
334		#' @inheritParams extreme_format
335		#'
336		#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value.
337		#'
338		#' @examples
339		#' format_fun <- format_extreme_values(2L)
340		#' format_fun(x = 0.127)
341		#' format_fun(x = Inf)
342		#' format_fun(x = 0)
343		#' format_fun(x = 0.009)
344		#'
345		#' @family formatting functions
346		#' @export
347		format_extreme_values <- function(digits = 2L) {
348	24x	function(x, ...) {
349	307x	checkmate::assert_scalar(x, na.ok = TRUE)
350
351	307x	h_format_threshold(x = x, digits = digits)
352		}
353		}
354
355		#' Formatting Extreme Values Part of a Confidence Interval
356		#'
357		#' @description `r lifecycle::badge("stable")`
358		#'
359		#' Formatting Function for extreme values part of a confidence interval. Values
360		#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2.
361		#'
362		#' @inheritParams extreme_format
363		#'
364		#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme
365		#' values confidence interval.
366		#'
367		#' @examples
368		#' format_fun <- format_extreme_values_ci(2L)
369		#' format_fun(x = c(0.127, Inf))
370		#' format_fun(x = c(0, 0.009))
371		#'
372		#' @family formatting functions
373		#' @export
374		format_extreme_values_ci <- function(digits = 2L) {
375	32x	function(x, ...) {
376	356x	checkmate::assert_vector(x, len = 2)
377	356x	l_result <- h_format_threshold(x = x[1], digits = digits)
378	356x	h_result <- h_format_threshold(x = x[2], digits = digits)
379
380	356x	paste0("(", l_result, ", ", h_result, ")")
381		}
382		}

1		#' Missing Data
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Substitute missing data with a string or factor level.
6		#'
7		#' @param x (`factor` or `character` vector)\cr values for which any missing values should be substituted.
8		#' @param label (`character`)\cr string that missing data should be replaced with.
9		#'
10		#' @return `x` with any `NA` values substituted by `label`.
11		#'
12		#' @examples
13		#' explicit_na(c(NA, "a", "b"))
14		#' is.na(explicit_na(c(NA, "a", "b")))
15		#'
16		#' explicit_na(factor(c(NA, "a", "b")))
17		#' is.na(explicit_na(factor(c(NA, "a", "b"))))
18		#'
19		#' explicit_na(sas_na(c("a", "")))
20		#'
21		#' @export
22		explicit_na <- function(x, label = "<Missing>") {
23	409x	checkmate::assert_string(label)
24
25	409x	if (is.factor(x)) {
26	307x	x <- forcats::fct_na_value_to_level(x, label)
27	307x	forcats::fct_drop(x, only = label)
28	102x	} else if (is.character(x)) {
29	102x	x[is.na(x)] <- label
30	102x	x
31		} else {
32	!	stop("only factors and character vectors allowed")
33		}
34		}
35
36		#' Convert Strings to `NA`
37		#'
38		#' @description `r lifecycle::badge("stable")`
39		#'
40		#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to
41		#' convert these values to `NA`s.
42		#'
43		#' @inheritParams explicit_na
44		#' @param empty (`logical`)\cr if `TRUE` empty strings get replaced by `NA`.
45		#' @param whitespaces (`logical`)\cr if `TRUE` then strings made from whitespaces only get replaced with `NA`.
46		#'
47		#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of
48		#' `empty` and `whitespaces`.
49		#'
50		#' @examples
51		#' sas_na(c("1", "", " ", " ", "b"))
52		#' sas_na(factor(c("", " ", "b")))
53		#'
54		#' is.na(sas_na(c("1", "", " ", " ", "b")))
55		#'
56		#' @export
57		sas_na <- function(x, empty = TRUE, whitespaces = TRUE) {
58	406x	checkmate::assert_flag(empty)
59	406x	checkmate::assert_flag(whitespaces)
60
61	406x	if (is.factor(x)) {
62	300x	empty_levels <- levels(x) == ""
63	11x	if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA
64
65	300x	ws_levels <- grepl("^\\s+$", levels(x))
66	!	if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA
67
68	300x	x
69	106x	} else if (is.character(x)) {
70	106x	if (empty) x[x == ""] <- NA_character_
71
72	106x	if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_
73
74	106x	x
75		} else {
76	!	stop("only factors and character vectors allowed")
77		}
78		}

1		#' Helper Functions for Tabulating Binary Response by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions that tabulate in a data frame statistics such as response rate
6		#' and odds ratio for population subgroups.
7		#'
8		#' @inheritParams argument_convention
9		#' @inheritParams response_subgroups
10		#' @param arm (`factor`)\cr the treatment group variable.
11		#'
12		#' @details Main functionality is to prepare data for use in a layout-creating function.
13		#'
14		#' @examples
15		#' library(dplyr)
16		#' library(forcats)
17		#'
18		#' adrs <- tern_ex_adrs
19		#' adrs_labels <- formatters::var_labels(adrs)
20		#'
21		#' adrs_f <- adrs %>%
22		#' filter(PARAMCD == "BESRSPI") %>%
23		#' filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
24		#' droplevels() %>%
25		#' mutate(
26		#' # Reorder levels of factor to make the placebo group the reference arm.
27		#' ARM = fct_relevel(ARM, "B: Placebo"),
28		#' rsp = AVALC == "CR"
29		#' )
30		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
31		#'
32		#' @name h_response_subgroups
33		NULL
34
35		#' @describeIn h_response_subgroups helper to prepare a data frame of binary responses by arm.
36		#'
37		#' @return
38		#' * `h_proportion_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, and `prop`.
39		#'
40		#' @examples
41		#' h_proportion_df(
42		#' c(TRUE, FALSE, FALSE),
43		#' arm = factor(c("A", "A", "B"), levels = c("A", "B"))
44		#' )
45		#'
46		#' @export
47		h_proportion_df <- function(rsp, arm) {
48	59x	checkmate::assert_logical(rsp)
49	58x	assert_valid_factor(arm, len = length(rsp))
50	58x	non_missing_rsp <- !is.na(rsp)
51	58x	rsp <- rsp[non_missing_rsp]
52	58x	arm <- arm[non_missing_rsp]
53
54	58x	lst_rsp <- split(rsp, arm)
55	58x	lst_results <- Map(function(x, arm) {
56	116x	if (length(x) > 0) {
57	114x	s_prop <- s_proportion(df = x)
58	114x	data.frame(
59	114x	arm = arm,
60	114x	n = length(x),
61	114x	n_rsp = unname(s_prop$n_prop[1]),
62	114x	prop = unname(s_prop$n_prop[2]),
63	114x	stringsAsFactors = FALSE
64		)
65		} else {
66	2x	data.frame(
67	2x	arm = arm,
68	2x	n = 0L,
69	2x	n_rsp = NA,
70	2x	prop = NA,
71	2x	stringsAsFactors = FALSE
72		)
73		}
74	58x	}, lst_rsp, names(lst_rsp))
75
76	58x	df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
77	58x	df$arm <- factor(df$arm, levels = levels(arm))
78	58x	df
79		}
80
81		#' @describeIn h_response_subgroups summarizes proportion of binary responses by arm and across subgroups
82		#' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
83		#' requires elements `rsp`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
84		#' groupings for `subgroups` variables.
85		#'
86		#' @return
87		#' * `h_proportion_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`,
88		#' `var`, `var_label`, and `row_type`.
89		#'
90		#' @examples
91		#' h_proportion_subgroups_df(
92		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
93		#' data = adrs_f
94		#' )
95		#'
96		#' # Define groupings for BMRKR2 levels.
97		#' h_proportion_subgroups_df(
98		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
99		#' data = adrs_f,
100		#' groups_lists = list(
101		#' BMRKR2 = list(
102		#' "low" = "LOW",
103		#' "low/medium" = c("LOW", "MEDIUM"),
104		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
105		#' )
106		#' )
107		#' )
108		#'
109		#' @export
110		h_proportion_subgroups_df <- function(variables,
111		data,
112		groups_lists = list(),
113		label_all = "All Patients") {
114	13x	checkmate::assert_character(variables$rsp)
115	13x	checkmate::assert_character(variables$arm)
116	13x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
117	13x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
118	13x	assert_df_with_variables(data, variables)
119	13x	checkmate::assert_string(label_all)
120
121		# Add All Patients.
122	13x	result_all <- h_proportion_df(data[[variables$rsp]], data[[variables$arm]])
123	13x	result_all$subgroup <- label_all
124	13x	result_all$var <- "ALL"
125	13x	result_all$var_label <- label_all
126	13x	result_all$row_type <- "content"
127
128		# Add Subgroups.
129	13x	if (is.null(variables$subgroups)) {
130	3x	result_all
131		} else {
132	10x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
133
134	10x	l_result <- lapply(l_data, function(grp) {
135	42x	result <- h_proportion_df(grp$df[[variables$rsp]], grp$df[[variables$arm]])
136	42x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
137	42x	cbind(result, result_labels)
138		})
139	10x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
140	10x	result_subgroups$row_type <- "analysis"
141
142	10x	rbind(
143	10x	result_all,
144	10x	result_subgroups
145		)
146		}
147		}
148
149		#' @describeIn h_response_subgroups helper to prepare a data frame with estimates of
150		#' the odds ratio between a treatment and a control arm.
151		#'
152		#' @inheritParams response_subgroups
153		#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
154		#'
155		#' @return
156		#' * `h_odds_ratio_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, and
157		#' optionally `pval` and `pval_label`.
158		#'
159		#' @examples
160		#' # Unstratatified analysis.
161		#' h_odds_ratio_df(
162		#' c(TRUE, FALSE, FALSE, TRUE),
163		#' arm = factor(c("A", "A", "B", "B"), levels = c("A", "B"))
164		#' )
165		#'
166		#' # Include p-value.
167		#' h_odds_ratio_df(adrs_f$rsp, adrs_f$ARM, method = "chisq")
168		#'
169		#' # Stratatified analysis.
170		#' h_odds_ratio_df(
171		#' rsp = adrs_f$rsp,
172		#' arm = adrs_f$ARM,
173		#' strata_data = adrs_f[, c("STRATA1", "STRATA2")],
174		#' method = "cmh"
175		#' )
176		#'
177		#' @export
178		h_odds_ratio_df <- function(rsp, arm, strata_data = NULL, conf_level = 0.95, method = NULL) {
179	64x	assert_valid_factor(arm, n.levels = 2, len = length(rsp))
180
181	64x	df_rsp <- data.frame(
182	64x	rsp = rsp,
183	64x	arm = arm
184		)
185
186	64x	if (!is.null(strata_data)) {
187	11x	strata_var <- interaction(strata_data, drop = TRUE)
188	11x	strata_name <- "strata"
189
190	11x	assert_valid_factor(strata_var, len = nrow(df_rsp))
191
192	11x	df_rsp[[strata_name]] <- strata_var
193		} else {
194	53x	strata_name <- NULL
195		}
196
197	64x	l_df <- split(df_rsp, arm)
198
199	64x	if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
200		# Odds ratio and CI.
201	62x	result_odds_ratio <- s_odds_ratio(
202	62x	df = l_df[[2]],
203	62x	.var = "rsp",
204	62x	.ref_group = l_df[[1]],
205	62x	.in_ref_col = FALSE,
206	62x	.df_row = df_rsp,
207	62x	variables = list(arm = "arm", strata = strata_name),
208	62x	conf_level = conf_level
209		)
210
211	62x	df <- data.frame(
212		# Dummy column needed downstream to create a nested header.
213	62x	arm = " ",
214	62x	n_tot = unname(result_odds_ratio$n_tot["n_tot"]),
215	62x	or = unname(result_odds_ratio$or_ci["est"]),
216	62x	lcl = unname(result_odds_ratio$or_ci["lcl"]),
217	62x	ucl = unname(result_odds_ratio$or_ci["ucl"]),
218	62x	conf_level = conf_level,
219	62x	stringsAsFactors = FALSE
220		)
221
222	62x	if (!is.null(method)) {
223		# Test for difference.
224	29x	result_test <- s_test_proportion_diff(
225	29x	df = l_df[[2]],
226	29x	.var = "rsp",
227	29x	.ref_group = l_df[[1]],
228	29x	.in_ref_col = FALSE,
229	29x	variables = list(strata = strata_name),
230	29x	method = method
231		)
232
233	29x	df$pval <- as.numeric(result_test$pval)
234	29x	df$pval_label <- obj_label(result_test$pval)
235		}
236
237		# In those cases cannot go through the model so will obtain n_tot from data.
238		} else if (
239	2x	(nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) \|\|
240	2x	(nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
241		) {
242	2x	df <- data.frame(
243		# Dummy column needed downstream to create a nested header.
244	2x	arm = " ",
245	2x	n_tot = sum(stats::complete.cases(df_rsp)),
246	2x	or = NA,
247	2x	lcl = NA,
248	2x	ucl = NA,
249	2x	conf_level = conf_level,
250	2x	stringsAsFactors = FALSE
251		)
252	2x	if (!is.null(method)) {
253	2x	df$pval <- NA
254	2x	df$pval_label <- NA
255		}
256		} else {
257	!	df <- data.frame(
258		# Dummy column needed downstream to create a nested header.
259	!	arm = " ",
260	!	n_tot = 0L,
261	!	or = NA,
262	!	lcl = NA,
263	!	ucl = NA,
264	!	conf_level = conf_level,
265	!	stringsAsFactors = FALSE
266		)
267
268	!	if (!is.null(method)) {
269	!	df$pval <- NA
270	!	df$pval_label <- NA
271		}
272		}
273
274	64x	df
275		}
276
277		#' @describeIn h_response_subgroups summarizes estimates of the odds ratio between a treatment and a control
278		#' arm across subgroups in a data frame. `variables` corresponds to the names of variables found in
279		#' `data`, passed as a named list and requires elements `rsp`, `arm` and optionally `subgroups`
280		#' and `strat`. `groups_lists` optionally specifies groupings for `subgroups` variables.
281		#'
282		#' @return
283		#' * `h_odds_ratio_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`,
284		#' `conf_level`, `subgroup`, `var`, `var_label`, and `row_type`.
285		#'
286		#' @examples
287		#' # Unstratified analysis.
288		#' h_odds_ratio_subgroups_df(
289		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
290		#' data = adrs_f
291		#' )
292		#'
293		#' # Stratified analysis.
294		#' h_odds_ratio_subgroups_df(
295		#' variables = list(
296		#' rsp = "rsp",
297		#' arm = "ARM",
298		#' subgroups = c("SEX", "BMRKR2"),
299		#' strat = c("STRATA1", "STRATA2")
300		#' ),
301		#' data = adrs_f
302		#' )
303		#'
304		#' # Define groupings of BMRKR2 levels.
305		#' h_odds_ratio_subgroups_df(
306		#' variables = list(
307		#' rsp = "rsp",
308		#' arm = "ARM",
309		#' subgroups = c("SEX", "BMRKR2")
310		#' ),
311		#' data = adrs_f,
312		#' groups_lists = list(
313		#' BMRKR2 = list(
314		#' "low" = "LOW",
315		#' "low/medium" = c("LOW", "MEDIUM"),
316		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
317		#' )
318		#' )
319		#' )
320		#'
321		#' @export
322		h_odds_ratio_subgroups_df <- function(variables,
323		data,
324		groups_lists = list(),
325		conf_level = 0.95,
326		method = NULL,
327		label_all = "All Patients") {
328	14x	checkmate::assert_character(variables$rsp)
329	14x	checkmate::assert_character(variables$arm)
330	14x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
331	14x	checkmate::assert_character(variables$strat, null.ok = TRUE)
332	14x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
333	14x	assert_df_with_variables(data, variables)
334	14x	checkmate::assert_string(label_all)
335
336	14x	strata_data <- if (is.null(variables$strat)) {
337	12x	NULL
338		} else {
339	2x	data[, variables$strat, drop = FALSE]
340		}
341
342		# Add All Patients.
343	14x	result_all <- h_odds_ratio_df(
344	14x	rsp = data[[variables$rsp]],
345	14x	arm = data[[variables$arm]],
346	14x	strata_data = strata_data,
347	14x	conf_level = conf_level,
348	14x	method = method
349		)
350	14x	result_all$subgroup <- label_all
351	14x	result_all$var <- "ALL"
352	14x	result_all$var_label <- label_all
353	14x	result_all$row_type <- "content"
354
355	14x	if (is.null(variables$subgroups)) {
356	3x	result_all
357		} else {
358	11x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
359
360	11x	l_result <- lapply(l_data, function(grp) {
361	46x	grp_strata_data <- if (is.null(variables$strat)) {
362	38x	NULL
363		} else {
364	8x	grp$df[, variables$strat, drop = FALSE]
365		}
366
367	46x	result <- h_odds_ratio_df(
368	46x	rsp = grp$df[[variables$rsp]],
369	46x	arm = grp$df[[variables$arm]],
370	46x	strata_data = grp_strata_data,
371	46x	conf_level = conf_level,
372	46x	method = method
373		)
374	46x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
375	46x	cbind(result, result_labels)
376		})
377
378	11x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
379	11x	result_subgroups$row_type <- "analysis"
380
381	11x	rbind(
382	11x	result_all,
383	11x	result_subgroups
384		)
385		}
386		}

1		#' Difference Test for Two Proportions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Various tests were implemented to test the difference between two proportions.
6		#'
7		#' @inheritParams argument_convention
8		#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
9		#'
10		#' @seealso [h_prop_diff_test]
11		#'
12		#' @name prop_diff_test
13		NULL
14
15		#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
16		#'
17		#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`, or `schouten`; specifies the test used
18		#' to calculate the p-value.
19		#'
20		#' @return
21		#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
22		#' describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
23		#'
24		#'
25		#' @keywords internal
26		s_test_proportion_diff <- function(df,
27		.var,
28		.ref_group,
29		.in_ref_col,
30		variables = list(strata = NULL),
31		method = c("chisq", "schouten", "fisher", "cmh")) {
32	30x	method <- match.arg(method)
33	30x	y <- list(pval = "")
34
35	30x	if (!.in_ref_col) {
36	30x	assert_df_with_variables(df, list(rsp = .var))
37	30x	assert_df_with_variables(.ref_group, list(rsp = .var))
38	30x	rsp <- factor(
39	30x	c(.ref_group[[.var]], df[[.var]]),
40	30x	levels = c("TRUE", "FALSE")
41		)
42	30x	grp <- factor(
43	30x	rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
44	30x	levels = c("ref", "Not-ref")
45		)
46
47	30x	if (!is.null(variables$strata) \|\| method == "cmh") {
48	12x	strata <- variables$strata
49	12x	checkmate::assert_false(is.null(strata))
50	12x	strata_vars <- stats::setNames(as.list(strata), strata)
51	12x	assert_df_with_variables(df, strata_vars)
52	12x	assert_df_with_variables(.ref_group, strata_vars)
53	12x	strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
54		}
55
56	30x	tbl <- switch(method,
57	30x	cmh = table(grp, rsp, strata),
58	30x	table(grp, rsp)
59		)
60
61	30x	y$pval <- switch(method,
62	30x	chisq = prop_chisq(tbl),
63	30x	cmh = prop_cmh(tbl),
64	30x	fisher = prop_fisher(tbl),
65	30x	schouten = prop_schouten(tbl)
66		)
67		}
68
69	30x	y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method))
70	30x	y
71		}
72
73		#' Description of the Difference Test Between Two Proportions
74		#'
75		#' @description `r lifecycle::badge("stable")`
76		#'
77		#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
78		#'
79		#' @inheritParams s_test_proportion_diff
80		#'
81		#' @return `string` describing the test from which the p-value is derived.
82		#'
83		#' @export
84		d_test_proportion_diff <- function(method) {
85	41x	checkmate::assert_string(method)
86	41x	meth_part <- switch(method,
87	41x	"schouten" = "Chi-Squared Test with Schouten Correction",
88	41x	"chisq" = "Chi-Squared Test",
89	41x	"cmh" = "Cochran-Mantel-Haenszel Test",
90	41x	"fisher" = "Fisher's Exact Test",
91	41x	stop(paste(method, "does not have a description"))
92		)
93	41x	paste0("p-value (", meth_part, ")")
94		}
95
96		#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
97		#'
98		#' @return
99		#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
100		#'
101		#'
102		#' @keywords internal
103		a_test_proportion_diff <- make_afun(
104		s_test_proportion_diff,
105		.formats = c(pval = "x.xxxx \| (<0.0001)"),
106		.indent_mods = c(pval = 1L)
107		)
108
109		#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
110		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
111		#'
112		#' @param ... other arguments are passed to [s_test_proportion_diff()].
113		#'
114		#' @return
115		#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
116		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
117		#' the statistics from `s_test_proportion_diff()` to the table layout.
118		#'
119		#' @examples
120		#' dta <- data.frame(
121		#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
122		#' grp = factor(rep(c("A", "B"), each = 50)),
123		#' strat = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
124		#' )
125		#'
126		#' # With `rtables` pipelines.
127		#' l <- basic_table() %>%
128		#' split_cols_by(var = "grp", ref_group = "B") %>%
129		#' test_proportion_diff(
130		#' vars = "rsp",
131		#' method = "cmh", variables = list(strata = "strat")
132		#' )
133		#'
134		#' build_table(l, df = dta)
135		#'
136		#' @export
137		test_proportion_diff <- function(lyt,
138		vars,
139		...,
140		var_labels = vars,
141		show_labels = "hidden",
142		table_names = vars,
143		.stats = NULL,
144		.formats = NULL,
145		.labels = NULL,
146		.indent_mods = NULL) {
147	5x	afun <- make_afun(
148	5x	a_test_proportion_diff,
149	5x	.stats = .stats,
150	5x	.formats = .formats,
151	5x	.labels = .labels,
152	5x	.indent_mods = .indent_mods
153		)
154	5x	analyze(
155	5x	lyt,
156	5x	vars,
157	5x	afun = afun,
158	5x	var_labels = var_labels,
159	5x	extra_args = list(...),
160	5x	show_labels = show_labels,
161	5x	table_names = table_names
162		)
163		}
164
165		#' Helper Functions to Test Proportion Differences
166		#'
167		#' Helper functions to implement various tests on the difference between two proportions.
168		#'
169		#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
170		#'
171		#' @return A p-value.
172		#'
173		#' @seealso [prop_diff_test()] for implementation of these helper functions.
174		#'
175		#' @name h_prop_diff_test
176		NULL
177
178		#' @describeIn h_prop_diff_test performs Chi-Squared test. Internally calls [stats::prop.test()].
179		#'
180		#'
181		#' @keywords internal
182		prop_chisq <- function(tbl) {
183	23x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
184	23x	tbl <- tbl[, c("TRUE", "FALSE")]
185	23x	if (any(colSums(tbl) == 0)) {
186	2x	return(1)
187		}
188	21x	stats::prop.test(tbl, correct = FALSE)$p.value
189		}
190
191		#' @describeIn h_prop_diff_test performs stratified Cochran-Mantel-Haenszel test. Internally calls
192		#' [stats::mantelhaen.test()]. Note that strata with less than two observations are automatically discarded.
193		#'
194		#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
195		#' (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
196		#'
197		#'
198		#' @keywords internal
199		prop_cmh <- function(ary) {
200	16x	checkmate::assert_array(ary)
201	16x	checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
202	16x	checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
203	16x	strata_sizes <- apply(ary, MARGIN = 3, sum)
204	16x	if (any(strata_sizes < 5)) {
205	1x	warning("<5 data points in some strata. CMH test may be incorrect.")
206	1x	ary <- ary[, , strata_sizes > 1]
207		}
208
209	16x	stats::mantelhaen.test(ary, correct = FALSE)$p.value
210		}
211
212		#' @describeIn h_prop_diff_test performs the Chi-Squared test with Schouten correction.
213		#'
214		#' @seealso For information on the Schouten correction (Schouten, 1980),
215		#' visit \url{https://onlinelibrary.wiley.com/doi/abs/10.1002/bimj.4710220305}.
216		#'
217		#'
218		#' @keywords internal
219		prop_schouten <- function(tbl) {
220	100x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
221	100x	tbl <- tbl[, c("TRUE", "FALSE")]
222	100x	if (any(colSums(tbl) == 0)) {
223	1x	return(1)
224		}
225
226	99x	n <- sum(tbl)
227	99x	n1 <- sum(tbl[1, ])
228	99x	n2 <- sum(tbl[2, ])
229
230	99x	ad <- diag(tbl)
231	99x	bc <- diag(apply(tbl, 2, rev))
232	99x	ac <- tbl[, 1]
233	99x	bd <- tbl[, 2]
234
235	99x	t_schouten <- (n - 1) *
236	99x	(abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 /
237	99x	(n1 * n2 * sum(ac) * sum(bd))
238
239	99x	1 - stats::pchisq(t_schouten, df = 1)
240		}
241
242		#' @describeIn h_prop_diff_test performs the Fisher's exact test. Internally calls [stats::fisher.test()].
243		#'
244		#'
245		#' @keywords internal
246		prop_fisher <- function(tbl) {
247	2x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
248	2x	tbl <- tbl[, c("TRUE", "FALSE")]
249	2x	stats::fisher.test(tbl)$p.value
250		}

1		#' Individual Patient Plots
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Line plot(s) displaying trend in patients' parameter values over time is rendered.
6		#' Patients' individual baseline values can be added to the plot(s) as reference.
7		#'
8		#' @inheritParams argument_convention
9		#' @param xvar (`string`)\cr time point variable to be plotted on x-axis.
10		#' @param yvar (`string`)\cr continuous analysis variable to be plotted on y-axis.
11		#' @param xlab (`string`)\cr plot label for x-axis.
12		#' @param ylab (`string`)\cr plot label for y-axis.
13		#' @param id_var (`string`)\cr variable used as patient identifier.
14		#' @param title (`string`)\cr title for plot.
15		#' @param subtitle (`string`)\cr subtitle for plot.
16		#' @param add_baseline_hline (`flag`)\cr adds horizontal line at baseline y-value on
17		#' plot when TRUE.
18		#' @param yvar_baseline (`string`)\cr variable with baseline values only.
19		#' Ignored when `add_baseline_hline` is FALSE.
20		#' @param ggtheme (`theme`)\cr optional graphical theme function as provided
21		#' by `ggplot2` to control outlook of plot. Use `ggplot2::theme()` to tweak the display.
22		#' @param plotting_choices (`character`)\cr specifies options for displaying
23		#' plots. Must be one of "all_in_one", "split_by_max_obs", "separate_by_obs".
24		#' @param max_obs_per_plot (`count`)\cr Number of observations to be plotted on one
25		#' plot. Ignored when `plotting_choices` is not "separate_by_obs".
26		#' @param caption (`character` scalar)\cr optional caption below the plot.
27		#' @param col (`character`)\cr lines colors.
28		#'
29		#' @seealso Relevant helper function [h_g_ipp()].
30		#'
31		#' @name individual_patient_plot
32		NULL
33
34		#' Helper Function To Create Simple Line Plot over Time
35		#'
36		#' @description `r lifecycle::badge("stable")`
37		#'
38		#' Function that generates a simple line plot displaying parameter trends over time.
39		#'
40		#' @inheritParams argument_convention
41		#' @inheritParams g_ipp
42		#'
43		#' @return A `ggplot` line plot.
44		#'
45		#' @seealso [g_ipp()] which uses this function.
46		#'
47		#' @examples
48		#' library(dplyr)
49		#' library(nestcolor)
50		#'
51		#' # Select a small sample of data to plot.
52		#' adlb <- tern_ex_adlb %>%
53		#' filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
54		#' slice(1:36)
55		#'
56		#' p <- h_g_ipp(
57		#' df = adlb,
58		#' xvar = "AVISIT",
59		#' yvar = "AVAL",
60		#' xlab = "Visit",
61		#' id_var = "USUBJID",
62		#' ylab = "SGOT/ALT (U/L)",
63		#' add_baseline_hline = TRUE
64		#' )
65		#' p
66		#'
67		#' @export
68		h_g_ipp <- function(df,
69		xvar,
70		yvar,
71		xlab,
72		ylab,
73		id_var,
74		title = "Individual Patient Plots",
75		subtitle = "",
76		caption = NULL,
77		add_baseline_hline = FALSE,
78		yvar_baseline = "BASE",
79		ggtheme = nestcolor::theme_nest(),
80		col = NULL) {
81	13x	checkmate::assert_string(xvar)
82	13x	checkmate::assert_string(yvar)
83	13x	checkmate::assert_string(yvar_baseline)
84	13x	checkmate::assert_string(id_var)
85	13x	checkmate::assert_string(xlab)
86	13x	checkmate::assert_string(ylab)
87	13x	checkmate::assert_string(title)
88	13x	checkmate::assert_string(subtitle)
89	13x	checkmate::assert_subset(c(xvar, yvar, yvar_baseline, id_var), colnames(df))
90	13x	checkmate::assert_data_frame(df)
91	13x	checkmate::assert_flag(add_baseline_hline)
92	13x	checkmate::assert_character(col, null.ok = TRUE)
93
94	13x	p <- ggplot2::ggplot(
95	13x	data = df,
96	13x	mapping = ggplot2::aes(
97	13x	x = .data[[xvar]],
98	13x	y = .data[[yvar]],
99	13x	group = .data[[id_var]],
100	13x	colour = .data[[id_var]]
101		)
102		) +
103	13x	ggplot2::geom_line(linewidth = 0.4) +
104	13x	ggplot2::geom_point(size = 2) +
105	13x	ggplot2::labs(
106	13x	x = xlab,
107	13x	y = ylab,
108	13x	title = title,
109	13x	subtitle = subtitle,
110	13x	caption = caption
111		) +
112	13x	ggtheme
113
114	13x	if (add_baseline_hline) {
115	12x	baseline_df <- df[, c(id_var, yvar_baseline)]
116	12x	baseline_df <- unique(baseline_df)
117
118	12x	p <- p +
119	12x	ggplot2::geom_hline(
120	12x	data = baseline_df,
121	12x	mapping = ggplot2::aes(
122	12x	yintercept = .data[[yvar_baseline]],
123	12x	colour = .data[[id_var]]
124		),
125	12x	linetype = "dotdash",
126	12x	linewidth = 0.4
127		) +
128	12x	ggplot2::geom_text(
129	12x	data = baseline_df,
130	12x	mapping = ggplot2::aes(
131	12x	x = 1,
132	12x	y = .data[[yvar_baseline]],
133	12x	label = .data[[id_var]],
134	12x	colour = .data[[id_var]]
135		),
136	12x	nudge_y = 0.025 * (max(df[, yvar], na.rm = TRUE) - min(df[, yvar], na.rm = TRUE)),
137	12x	vjust = "right",
138	12x	size = 2
139		)
140
141	12x	if (!is.null(col)) {
142	1x	p <- p +
143	1x	ggplot2::scale_color_manual(values = col)
144		}
145		}
146	13x	p
147		}
148
149		#' @describeIn individual_patient_plot Plotting function for individual patient plots which, depending on user
150		#' preference, renders a single graphic or compiles a list of graphics that show trends in individual's parameter
151		#' values over time.
152		#'
153		#' @return A `ggplot` object or a list of `ggplot` objects.
154		#'
155		#' @examples
156		#' library(dplyr)
157		#' library(nestcolor)
158		#'
159		#' # Select a small sample of data to plot.
160		#' adlb <- tern_ex_adlb %>%
161		#' filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
162		#' slice(1:36)
163		#'
164		#' plot_list <- g_ipp(
165		#' df = adlb,
166		#' xvar = "AVISIT",
167		#' yvar = "AVAL",
168		#' xlab = "Visit",
169		#' ylab = "SGOT/ALT (U/L)",
170		#' title = "Individual Patient Plots",
171		#' add_baseline_hline = TRUE,
172		#' plotting_choices = "split_by_max_obs",
173		#' max_obs_per_plot = 5
174		#' )
175		#' plot_list
176		#'
177		#' @export
178		g_ipp <- function(df,
179		xvar,
180		yvar,
181		xlab,
182		ylab,
183		id_var = "USUBJID",
184		title = "Individual Patient Plots",
185		subtitle = "",
186		caption = NULL,
187		add_baseline_hline = FALSE,
188		yvar_baseline = "BASE",
189		ggtheme = nestcolor::theme_nest(),
190		plotting_choices = c("all_in_one", "split_by_max_obs", "separate_by_obs"),
191		max_obs_per_plot = 4,
192		col = NULL) {
193	3x	checkmate::assert_count(max_obs_per_plot)
194	3x	checkmate::assert_subset(plotting_choices, c("all_in_one", "split_by_max_obs", "separate_by_obs"))
195	3x	checkmate::assert_character(col, null.ok = TRUE)
196
197	3x	plotting_choices <- match.arg(plotting_choices)
198
199	3x	if (plotting_choices == "all_in_one") {
200	1x	p <- h_g_ipp(
201	1x	df = df,
202	1x	xvar = xvar,
203	1x	yvar = yvar,
204	1x	xlab = xlab,
205	1x	ylab = ylab,
206	1x	id_var = id_var,
207	1x	title = title,
208	1x	subtitle = subtitle,
209	1x	caption = caption,
210	1x	add_baseline_hline = add_baseline_hline,
211	1x	yvar_baseline = yvar_baseline,
212	1x	ggtheme = ggtheme,
213	1x	col = col
214		)
215
216	1x	return(p)
217	2x	} else if (plotting_choices == "split_by_max_obs") {
218	1x	id_vec <- unique(df[[id_var]])
219	1x	id_list <- split(
220	1x	id_vec,
221	1x	rep(1:ceiling(length(id_vec) / max_obs_per_plot),
222	1x	each = max_obs_per_plot,
223	1x	length.out = length(id_vec)
224		)
225		)
226
227	1x	df_list <- list()
228	1x	plot_list <- list()
229
230	1x	for (i in seq_along(id_list)) {
231	2x	df_list[[i]] <- df[df[[id_var]] %in% id_list[[i]], ]
232
233	2x	plots <- h_g_ipp(
234	2x	df = df_list[[i]],
235	2x	xvar = xvar,
236	2x	yvar = yvar,
237	2x	xlab = xlab,
238	2x	ylab = ylab,
239	2x	id_var = id_var,
240	2x	title = title,
241	2x	subtitle = subtitle,
242	2x	caption = caption,
243	2x	add_baseline_hline = add_baseline_hline,
244	2x	yvar_baseline = yvar_baseline,
245	2x	ggtheme = ggtheme,
246	2x	col = col
247		)
248
249	2x	plot_list[[i]] <- plots
250		}
251	1x	return(plot_list)
252		} else {
253	1x	ind_df <- split(df, df[[id_var]])
254	1x	plot_list <- lapply(
255	1x	ind_df,
256	1x	function(x) {
257	8x	h_g_ipp(
258	8x	df = x,
259	8x	xvar = xvar,
260	8x	yvar = yvar,
261	8x	xlab = xlab,
262	8x	ylab = ylab,
263	8x	id_var = id_var,
264	8x	title = title,
265	8x	subtitle = subtitle,
266	8x	caption = caption,
267	8x	add_baseline_hline = add_baseline_hline,
268	8x	yvar_baseline = yvar_baseline,
269	8x	ggtheme = ggtheme,
270	8x	col = col
271		)
272		}
273		)
274
275	1x	return(plot_list)
276		}
277		}

1		#' Count the Number of Patients with a Particular Event
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The primary analysis variable `.var` denotes the unique patient identifier.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @seealso [count_patients_with_flags]
10		#'
11		#' @name count_patients_with_event
12		NULL
13
14		#' @describeIn count_patients_with_event Statistics function which counts the number of patients for which
15		#' the defined event has occurred.
16		#'
17		#' @inheritParams summarize_variables
18		#' @param .var (`character`)\cr name of the column that contains the unique identifier.
19		#' @param filters (`character`)\cr a character vector specifying the column names and flag variables
20		#' to be used for counting the number of unique identifiers satisfying such conditions.
21		#' Multiple column names and flags are accepted in this format
22		#' `c("column_name1" = "flag1", "column_name2" = "flag2")`.
23		#' Note that only equality is being accepted as condition.
24		#'
25		#' @return
26		#' * `s_count_patients_with_event()` returns the count and fraction of unique identifiers with the defined event.
27		#'
28		#' @examples
29		#' library(dplyr)
30		#'
31		#' # `s_count_patients_with_event()`
32		#'
33		#' s_count_patients_with_event(
34		#' tern_ex_adae,
35		#' .var = "SUBJID",
36		#' filters = c("TRTEMFL" = "Y")
37		#' )
38		#' s_count_patients_with_event(
39		#' tern_ex_adae,
40		#' .var = "SUBJID",
41		#' filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL")
42		#' )
43		#' s_count_patients_with_event(
44		#' tern_ex_adae,
45		#' .var = "SUBJID",
46		#' filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
47		#' denom = "N_col",
48		#' .N_col = 456
49		#' )
50		#'
51		#' @export
52		s_count_patients_with_event <- function(df,
53		.var,
54		filters,
55		.N_col, # nolint
56		.N_row, # nolint
57		denom = c("n", "N_row", "N_col")) {
58	28x	col_names <- names(filters)
59	28x	filter_values <- filters
60
61	28x	checkmate::assert_subset(col_names, colnames(df))
62
63	28x	temp <- Map(
64	28x	function(x, y) which(df[[x]] == y),
65	28x	col_names,
66	28x	filter_values
67		)
68	28x	position_satisfy_filters <- Reduce(intersect, temp)
69	28x	id_satisfy_filters <- as.character(unique(df[position_satisfy_filters, ][[.var]]))
70	28x	result <- s_count_values(
71	28x	as.character(unique(df[[.var]])),
72	28x	id_satisfy_filters,
73	28x	denom = denom,
74	28x	.N_col = .N_col,
75	28x	.N_row = .N_row
76		)
77	28x	result
78		}
79
80		#' @describeIn count_patients_with_event Formatted analysis function which is used as `afun`
81		#' in `count_patients_with_event()`.
82		#'
83		#' @return
84		#' * `a_count_patients_with_event()` returns the corresponding list with formatted [rtables::CellValue()].
85		#'
86		#' @examples
87		#' # `a_count_patients_with_event()`
88		#'
89		#' a_count_patients_with_event(
90		#' tern_ex_adae,
91		#' .var = "SUBJID",
92		#' filters = c("TRTEMFL" = "Y"),
93		#' .N_col = 100,
94		#' .N_row = 100
95		#' )
96		#'
97		#' @export
98		a_count_patients_with_event <- make_afun(
99		s_count_patients_with_event,
100		.formats = c(count_fraction = format_count_fraction_fixed_dp)
101		)
102
103		#' @describeIn count_patients_with_event Layout-creating function which can take statistics function
104		#' arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
105		#'
106		#' @return
107		#' * `count_patients_with_event()` returns a layout object suitable for passing to further layouting functions,
108		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
109		#' the statistics from `s_count_patients_with_event()` to the table layout.
110		#'
111		#' @examples
112		#' # `count_patients_with_event()`
113		#'
114		#' lyt <- basic_table() %>%
115		#' split_cols_by("ARM") %>%
116		#' add_colcounts() %>%
117		#' count_values(
118		#' "STUDYID",
119		#' values = "AB12345",
120		#' .stats = "count",
121		#' .labels = c(count = "Total AEs")
122		#' ) %>%
123		#' count_patients_with_event(
124		#' "SUBJID",
125		#' filters = c("TRTEMFL" = "Y"),
126		#' .labels = c(count_fraction = "Total number of patients with at least one adverse event"),
127		#' table_names = "tbl_all"
128		#' ) %>%
129		#' count_patients_with_event(
130		#' "SUBJID",
131		#' filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
132		#' .labels = c(count_fraction = "Total number of patients with fatal AEs"),
133		#' table_names = "tbl_fatal"
134		#' ) %>%
135		#' count_patients_with_event(
136		#' "SUBJID",
137		#' filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL", "AEREL" = "Y"),
138		#' .labels = c(count_fraction = "Total number of patients with related fatal AEs"),
139		#' .indent_mods = c(count_fraction = 2L),
140		#' table_names = "tbl_rel_fatal"
141		#' )
142		#' build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl)
143		#'
144		#' @export
145		count_patients_with_event <- function(lyt,
146		vars,
147		...,
148		table_names = vars,
149		.stats = "count_fraction",
150		.formats = NULL,
151		.labels = NULL,
152		.indent_mods = NULL) {
153	5x	afun <- make_afun(
154	5x	a_count_patients_with_event,
155	5x	.stats = .stats,
156	5x	.formats = .formats,
157	5x	.labels = .labels,
158	5x	.indent_mods = .indent_mods
159		)
160
161	5x	analyze(
162	5x	lyt,
163	5x	vars,
164	5x	afun = afun,
165	5x	extra_args = list(...),
166	5x	show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
167	5x	table_names = table_names
168		)
169		}

1		#' Cox Proportional Hazards Regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @details Cox models are the most commonly used methods to estimate the magnitude of
10		#' the effect in survival analysis. It assumes proportional hazards: the ratio
11		#' of the hazards between groups (e.g., two arms) is constant over time.
12		#' This ratio is referred to as the "hazard ratio" (HR) and is one of the
13		#' most commonly reported metrics to describe the effect size in survival
14		#' analysis (NEST Team, 2020).
15		#'
16		#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant
17		#' helper functions, and [tidy_coxreg] for custom tidy methods.
18		#'
19		#' @examples
20		#' library(survival)
21		#'
22		#' # Testing dataset [survival::bladder].
23		#' set.seed(1, kind = "Mersenne-Twister")
24		#' dta_bladder <- with(
25		#' data = bladder[bladder$enum < 5, ],
26		#' tibble::tibble(
27		#' TIME = stop,
28		#' STATUS = event,
29		#' ARM = as.factor(rx),
30		#' COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"),
31		#' COVAR2 = factor(
32		#' sample(as.factor(enum)),
33		#' levels = 1:4, labels = c("F", "F", "M", "M")
34		#' ) %>% formatters::with_label("Sex (F/M)")
35		#' )
36		#' )
37		#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
38		#' dta_bladder$STUDYID <- factor("X")
39		#'
40		#' plot(
41		#' survfit(Surv(TIME, STATUS) ~ ARM + COVAR1, data = dta_bladder),
42		#' lty = 2:4,
43		#' xlab = "Months",
44		#' col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
45		#' )
46		#'
47		#' @name cox_regression
48		NULL
49
50		#' @describeIn cox_regression Statistics function that transforms results tabulated
51		#' from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list.
52		#'
53		#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg]
54		#' function with tidying applied via [broom::tidy()].
55		#' @param .stats (`character`)\cr the name of statistics to be reported among:
56		#' * `n`: number of observations (univariate only)
57		#' * `hr`: hazard ratio
58		#' * `ci`: confidence interval
59		#' * `pval`: p-value of the treatment effect
60		#' * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only)
61		#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model.
62		#' Defaults to "all". Other options include "var_main" for main effects, `"inter"` for interaction effects,
63		#' and `"multi_lvl"` for multivariate model covariate level rows. When `.which_vars` is "all" specific
64		#' variables can be selected by specifying `.var_nms`.
65		#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically
66		#' this is the name of a variable. If using variable labels, `var` should be a vector of both the desired
67		#' variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars`
68		#' is `"var_main"` `.var_nms` should be only the variable name.
69		#'
70		#' @return
71		#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s).
72		#'
73		#' @examples
74		#' # s_coxreg
75		#'
76		#' # Univariate
77		#' u1_variables <- list(
78		#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
79		#' )
80		#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder)
81		#' df1 <- broom::tidy(univar_model)
82		#' s_coxreg(model_df = df1, .stats = "hr")
83		#'
84		#' # Univariate with interactions
85		#' univar_model_inter <- fit_coxreg_univar(
86		#' variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder
87		#' )
88		#' df1_inter <- broom::tidy(univar_model_inter)
89		#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1")
90		#'
91		#' # Univariate without treatment arm - only "COVAR2" covariate effects
92		#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
93		#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder)
94		#' df1_covs <- broom::tidy(univar_covs_model)
95		#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)"))
96		#'
97		#' # Multivariate.
98		#' m1_variables <- list(
99		#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
100		#' )
101		#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder)
102		#' df2 <- broom::tidy(multivar_model)
103		#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1")
104		#' s_coxreg(
105		#' model_df = df2, .stats = "pval", .which_vars = "multi_lvl",
106		#' .var_nms = c("COVAR1", "A Covariate Label")
107		#' )
108		#'
109		#' # Multivariate without treatment arm - only "COVAR1" main effect
110		#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
111		#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder)
112		#' df2_covs <- broom::tidy(multivar_covs_model)
113		#' s_coxreg(model_df = df2_covs, .stats = "hr")
114		#'
115		#' @export
116		s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) {
117	194x	assert_df_with_variables(model_df, list(term = "term", stat = .stats))
118	194x	checkmate::assert_multi_class(model_df$term, classes = c("factor", "character"))
119	194x	model_df$term <- as.character(model_df$term)
120	194x	.var_nms <- .var_nms[!is.na(.var_nms)]
121
122	192x	if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ]
123	39x	if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1)
124
125		# We need a list with names corresponding to the stats to display of equal length to the list of stats.
126	194x	y <- split(model_df, f = model_df$term, drop = FALSE)
127	194x	y <- stats::setNames(y, nm = rep(.stats, length(y)))
128
129	194x	if (.which_vars == "var_main") {
130	84x	y <- lapply(y, function(x) x[1, ]) # only main effect
131	110x	} else if (.which_vars %in% c("inter", "multi_lvl")) {
132	80x	y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect
133		}
134
135	194x	lapply(
136	194x	X = y,
137	194x	FUN = function(x) {
138	198x	z <- as.list(x[[.stats]])
139	198x	stats::setNames(z, nm = x$term_label)
140		}
141		)
142		}
143
144		#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()]
145		#' and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`.
146		#'
147		#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`.
148		#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`.
149		#' @param na_level (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`.
150		#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to
151		#' avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching).
152		#' @param varlabels (`list`)\cr a named list corresponds to the names of variables found in data, passed
153		#' as a named list and corresponding to time, event, arm, strata, and covariates terms. If arm is missing
154		#' from variables, then only Cox model(s) including the covariates will be fitted and the corresponding
155		#' effect estimates will be tabulated later.
156		#'
157		#' @return
158		#' * `a_coxreg()` returns formatted [rtables::CellValue()].
159		#'
160		#' @examples
161		#' a_coxreg(
162		#' df = dta_bladder,
163		#' labelstr = "Label 1",
164		#' variables = u1_variables,
165		#' .spl_context = list(value = "COVAR1"),
166		#' .stats = "n",
167		#' .formats = "xx"
168		#' )
169		#'
170		#' a_coxreg(
171		#' df = dta_bladder,
172		#' labelstr = "",
173		#' variables = u1_variables,
174		#' .spl_context = list(value = "COVAR2"),
175		#' .stats = "pval",
176		#' .formats = "xx.xxxx"
177		#' )
178		#'
179		#' @export
180		a_coxreg <- function(df,
181		labelstr,
182		eff = FALSE,
183		var_main = FALSE,
184		multivar = FALSE,
185		variables,
186		at = list(),
187		control = control_coxreg(),
188		.spl_context,
189		.stats,
190		.formats,
191		.indent_mods = NULL,
192		na_level = "",
193		cache_env = NULL) {
194	191x	cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm
195	191x	cov <- tail(.spl_context$value, 1) # current variable/covariate
196	191x	var_lbl <- formatters::var_labels(df)[cov] # check for df labels
197	191x	if (length(labelstr) > 1) {
198	!	labelstr <- if (cov %in% names(labelstr)) labelstr[[cov]] else var_lbl # use df labels if none
199	191x	} else if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) {
200	62x	labelstr <- var_lbl
201		}
202	191x	if (eff \|\| multivar \|\| cov_no_arm) {
203	82x	control$interaction <- FALSE
204		} else {
205	109x	variables$covariates <- cov
206	40x	if (var_main) control$interaction <- TRUE
207		}
208
209	191x	if (is.null(cache_env[[cov]])) {
210	30x	if (!multivar) {
211	23x	model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy()
212		} else {
213	7x	model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy()
214		}
215	30x	cache_env[[cov]] <- model
216		} else {
217	161x	model <- cache_env[[cov]]
218		}
219	109x	if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_
220
221	191x	if (cov_no_arm \|\| (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) {
222	15x	multivar <- TRUE
223	3x	if (!cov_no_arm) var_main <- TRUE
224		}
225
226	191x	vars_coxreg <- list(which_vars = "all", var_nms = NULL)
227	191x	if (eff) {
228	40x	if (multivar && !var_main) { # multivar treatment level
229	6x	var_lbl_arm <- formatters::var_labels(df)[[variables$arm]]
230	6x	vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl")
231		} else { # treatment effect
232	34x	vars_coxreg["var_nms"] <- variables$arm
233	6x	if (var_main) vars_coxreg["which_vars"] <- "var_main"
234		}
235		} else {
236	151x	if (!multivar \|\| (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level
237	118x	vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main")
238	33x	} else if (multivar) { # multivar covariate level
239	33x	vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl")
240	6x	if (var_main) model[cov, .stats] <- NA_real_
241		}
242	40x	if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect
243		}
244	191x	var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]]
245	191x	var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) {
246	21x	paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels
247	191x	} else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) \|\|
248	191x	(multivar && var_main && is.numeric(df[[cov]]))) {
249	47x	labelstr # other main effect labels
250	191x	} else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) {
251	6x	"All" # multivar numeric covariate
252		} else {
253	117x	names(var_vals)
254		}
255	191x	in_rows(
256	191x	.list = var_vals, .names = var_names, .labels = var_names, .indent_mods = .indent_mods,
257	191x	.formats = stats::setNames(rep(.formats, length(var_names)), var_names),
258	191x	.format_na_strs = stats::setNames(rep(na_level, length(var_names)), var_names)
259		)
260		}
261
262		#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table
263		#' layout. This function is a wrapper for several `rtables` layouting functions. This function
264		#' is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()].
265		#'
266		#' @inheritParams fit_coxreg_univar
267		#' @param multivar (`flag`)\cr Defaults to `FALSE`. If `TRUE` multivariate Cox regression will run, otherwise
268		#' univariate Cox regression will run.
269		#' @param common_var (`character`)\cr the name of a factor variable in the dataset which takes the same value
270		#' for all rows. This should be created during pre-processing if no such variable currently exists.
271		#' @param .section_div (`character`)\cr string which should be repeated as a section divider between sections.
272		#' Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between
273		#' treatment and covariate sections and the second between different covariates.
274		#'
275		#' @return
276		#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions,
277		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table
278		#' containing the chosen statistics to the table layout.
279		#'
280		#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`,
281		#' `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate
282		#' Cox regression models, respectively.
283		#'
284		#' @examples
285		#' # summarize_coxreg
286		#'
287		#' result_univar <- basic_table() %>%
288		#' summarize_coxreg(variables = u1_variables) %>%
289		#' build_table(dta_bladder)
290		#' result_univar
291		#'
292		#' result_multivar <- basic_table() %>%
293		#' summarize_coxreg(
294		#' variables = m1_variables,
295		#' multivar = TRUE,
296		#' ) %>%
297		#' build_table(dta_bladder)
298		#' result_multivar
299		#'
300		#' result_univar_covs <- basic_table() %>%
301		#' summarize_coxreg(
302		#' variables = u2_variables,
303		#' ) %>%
304		#' build_table(dta_bladder)
305		#' result_univar_covs
306		#'
307		#' result_multivar_covs <- basic_table() %>%
308		#' summarize_coxreg(
309		#' variables = m2_variables,
310		#' multivar = TRUE,
311		#' varlabels = c("Covariate 1", "Covariate 2") # custom labels
312		#' ) %>%
313		#' build_table(dta_bladder)
314		#' result_multivar_covs
315		#'
316		#' @export
317		summarize_coxreg <- function(lyt,
318		variables,
319		control = control_coxreg(),
320		at = list(),
321		multivar = FALSE,
322		common_var = "STUDYID",
323		.stats = c("n", "hr", "ci", "pval", "pval_inter"),
324		.formats = c(
325		n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)",
326		pval = "x.xxxx \| (<0.0001)", pval_inter = "x.xxxx \| (<0.0001)"
327		),
328		varlabels = NULL,
329		.indent_mods = NULL,
330		na_level = "",
331		.section_div = NA_character_) {
332	11x	if (multivar && control$interaction) {
333	1x	warning(paste(
334	1x	"Interactions are not available for multivariate cox regression using summarize_coxreg.",
335	1x	"The model will be calculated without interaction effects."
336		))
337		}
338	11x	if (control$interaction && !"arm" %in% names(variables)) {
339	1x	stop("To include interactions please specify 'arm' in variables.")
340		}
341
342	10x	.stats <- if (!"arm" %in% names(variables) \|\| multivar) { # only valid statistics
343	4x	intersect(c("hr", "ci", "pval"), .stats)
344	10x	} else if (control$interaction) {
345	4x	intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats)
346		} else {
347	2x	intersect(c("n", "hr", "ci", "pval"), .stats)
348		}
349	10x	stat_labels <- c(
350	10x	n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"),
351	10x	pval = "p-value", pval_inter = "Interaction p-value"
352		)
353	10x	stat_labels <- stat_labels[names(stat_labels) %in% .stats]
354	10x	.formats <- .formats[names(.formats) %in% .stats]
355	10x	env <- new.env() # create caching environment
356
357	10x	lyt <- lyt %>%
358	10x	split_cols_by_multivar(
359	10x	vars = rep(common_var, length(.stats)),
360	10x	varlabels = stat_labels,
361	10x	extra_args = list(
362	10x	.stats = .stats, .formats = .formats, .indent_mods = .indent_mods, na_level = rep(na_level, length(.stats)),
363	10x	cache_env = replicate(length(.stats), list(env))
364		)
365		)
366
367	10x	if ("arm" %in% names(variables)) { # treatment effect
368	8x	lyt <- lyt %>%
369	8x	split_rows_by(
370	8x	common_var,
371	8x	split_label = "Treatment:",
372	8x	label_pos = "visible",
373	8x	section_div = head(.section_div, 1)
374		) %>%
375	8x	summarize_row_groups(
376	8x	cfun = a_coxreg,
377	8x	extra_args = list(
378	8x	variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar
379		)
380		)
381	8x	if (multivar) { # treatment level effects
382	2x	lyt <- lyt %>%
383	2x	analyze_colvars(
384	2x	afun = a_coxreg,
385	2x	extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar, labelstr = "")
386		)
387		}
388		}
389
390	10x	if ("covariates" %in% names(variables)) { # covariate main effects
391	10x	lyt <- lyt %>%
392	10x	split_rows_by_multivar(
393	10x	vars = variables$covariates,
394	10x	varlabels = varlabels,
395	10x	split_label = "Covariate:",
396	10x	nested = FALSE,
397	10x	child_labels = if (multivar \|\| control$interaction \|\| !"arm" %in% names(variables)) "default" else "hidden",
398	10x	section_div = tail(.section_div, 1)
399		)
400	10x	if (multivar \|\| control$interaction \|\| !"arm" %in% names(variables)) {
401	8x	lyt <- lyt %>%
402	8x	summarize_row_groups(
403	8x	cfun = a_coxreg,
404	8x	extra_args = list(
405	8x	variables = variables, at = at, control = control, multivar = multivar,
406	8x	var_main = if (multivar) multivar else control$interaction
407		)
408		)
409		} else {
410	!	if (!is.null(varlabels)) names(varlabels) <- variables$covariates
411	2x	lyt <- lyt %>%
412	2x	analyze_colvars(
413	2x	afun = a_coxreg,
414	2x	extra_args = list(
415	2x	variables = variables, at = at, control = control, multivar = multivar,
416	2x	var_main = if (multivar) multivar else control$interaction,
417	2x	labelstr = if (is.null(varlabels)) "" else varlabels
418		)
419		)
420		}
421
422	2x	if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm
423	10x	if (multivar \|\| control$interaction) { # covariate level effects
424	8x	lyt <- lyt %>%
425	8x	analyze_colvars(
426	8x	afun = a_coxreg,
427	8x	extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = "")
428		)
429		}
430		}
431
432	10x	lyt
433		}

1		#' Confidence Interval for Mean
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
6		#' geometric mean. It can be used as a `ggplot` helper function for plotting.
7		#'
8		#' @inheritParams argument_convention
9		#' @param n_min (`number`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
10		#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
11		#' @param geom_mean (`logical`)\cr `TRUE` when the geometric mean should be calculated.
12		#'
13		#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
14		#'
15		#' @examples
16		#' stat_mean_ci(sample(10), gg_helper = FALSE)
17		#'
18		#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
19		#' ggplot2::geom_point()
20		#'
21		#' p + ggplot2::stat_summary(
22		#' fun.data = stat_mean_ci,
23		#' geom = "errorbar"
24		#' )
25		#'
26		#' p + ggplot2::stat_summary(
27		#' fun.data = stat_mean_ci,
28		#' fun.args = list(conf_level = 0.5),
29		#' geom = "errorbar"
30		#' )
31		#'
32		#' p + ggplot2::stat_summary(
33		#' fun.data = stat_mean_ci,
34		#' fun.args = list(conf_level = 0.5, geom_mean = TRUE),
35		#' geom = "errorbar"
36		#' )
37		#'
38		#' @export
39		stat_mean_ci <- function(x,
40		conf_level = 0.95,
41		na.rm = TRUE, # nolint
42		n_min = 2,
43		gg_helper = TRUE,
44		geom_mean = FALSE) {
45	460x	if (na.rm) {
46	2x	x <- stats::na.omit(x)
47		}
48	460x	n <- length(x)
49
50	460x	if (!geom_mean) {
51	231x	m <- mean(x)
52		} else {
53	229x	negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0)
54	229x	if (negative_values_exist) {
55	18x	m <- NA_real_
56		} else {
57	211x	x <- log(x)
58	211x	m <- mean(x)
59		}
60		}
61
62	460x	if (n < n_min \|\| is.na(m)) {
63	96x	ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_)
64		} else {
65	364x	hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n)
66	364x	ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci)
67	364x	if (geom_mean) {
68	176x	ci <- exp(ci)
69		}
70		}
71
72	460x	if (gg_helper) {
73	!	m <- ifelse(is.na(m), NA_real_, m)
74	!	ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]])
75		}
76
77	460x	return(ci)
78		}
79
80		#' Confidence Interval for Median
81		#'
82		#' @description `r lifecycle::badge("stable")`
83		#'
84		#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
85		#' function for plotting.
86		#'
87		#' @inheritParams argument_convention
88		#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
89		#'
90		#' @details The function was adapted from `DescTools/versions/0.99.35/source`
91		#'
92		#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
93		#'
94		#' @examples
95		#' stat_median_ci(sample(10), gg_helper = FALSE)
96		#'
97		#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
98		#' ggplot2::geom_point()
99		#' p + ggplot2::stat_summary(
100		#' fun.data = stat_median_ci,
101		#' geom = "errorbar"
102		#' )
103		#'
104		#' @export
105		stat_median_ci <- function(x,
106		conf_level = 0.95,
107		na.rm = TRUE, # nolint
108		gg_helper = TRUE) {
109	232x	x <- unname(x)
110	232x	if (na.rm) {
111	3x	x <- x[!is.na(x)]
112		}
113	232x	n <- length(x)
114	232x	med <- stats::median(x)
115
116	232x	k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE)
117
118		# k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
119	232x	if (k == 0 \|\| is.na(med)) {
120	78x	ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_)
121	78x	empir_conf_level <- NA_real_
122		} else {
123	154x	x_sort <- sort(x)
124	154x	ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1])
125	154x	empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5)
126		}
127
128	232x	if (gg_helper) {
129	!	ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]])
130		}
131
132	232x	attr(ci, "conf_level") <- empir_conf_level
133
134	232x	return(ci)
135		}
136
137		#' p-Value of the Mean
138		#'
139		#' @description `r lifecycle::badge("stable")`
140		#'
141		#' Convenient function for calculating the two-sided p-value of the mean.
142		#'
143		#' @inheritParams argument_convention
144		#' @param n_min (`numeric`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
145		#' @param test_mean (`numeric`)\cr mean value to test under the null hypothesis.
146		#'
147		#' @return A p-value.
148		#'
149		#' @examples
150		#' stat_mean_pval(sample(10))
151		#'
152		#' stat_mean_pval(rnorm(10), test_mean = 0.5)
153		#'
154		#' @export
155		stat_mean_pval <- function(x,
156		na.rm = TRUE, # nolint
157		n_min = 2,
158		test_mean = 0) {
159	233x	if (na.rm) {
160	4x	x <- stats::na.omit(x)
161		}
162	233x	n <- length(x)
163
164	233x	x_mean <- mean(x)
165	233x	x_sd <- stats::sd(x)
166
167	233x	if (n < n_min) {
168	42x	pv <- c(p_value = NA_real_)
169		} else {
170	191x	x_se <- stats::sd(x) / sqrt(n)
171	191x	ttest <- (x_mean - test_mean) / x_se
172	191x	pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1))
173		}
174
175	233x	return(pv)
176		}

1		#' Estimation of Proportions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Estimate the proportion of responders within a studied population.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @seealso [h_proportions]
10		#'
11		#' @name estimate_proportions
12		NULL
13
14		#' @describeIn estimate_proportions Statistics function estimating a
15		#' proportion along with its confidence interval.
16		#'
17		#' @inheritParams prop_strat_wilson
18		#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used,
19		#' it indicates whether each subject is a responder or not. `TRUE` represents
20		#' a successful outcome. If a `data.frame` is provided, also the `strata` variable
21		#' names must be provided in `variables` as a list element with the strata strings.
22		#' In the case of `data.frame`, the logical vector of responses must be indicated as a
23		#' variable name in `.var`.
24		#' @param method (`string`)\cr the method used to construct the confidence interval
25		#' for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`,
26		#' `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`.
27		#' @param long (`flag`)\cr a long description is required.
28		#'
29		#' @return
30		#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a
31		#' given variable.
32		#'
33		#' @examples
34		#' # Case with only logical vector.
35		#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0)
36		#' s_proportion(rsp_v)
37		#'
38		#' # Example for Stratified Wilson CI
39		#' nex <- 100 # Number of example rows
40		#' dta <- data.frame(
41		#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
42		#' "grp" = sample(c("A", "B"), nex, TRUE),
43		#' "f1" = sample(c("a1", "a2"), nex, TRUE),
44		#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
45		#' stringsAsFactors = TRUE
46		#' )
47		#'
48		#' s_proportion(
49		#' df = dta,
50		#' .var = "rsp",
51		#' variables = list(strata = c("f1", "f2")),
52		#' conf_level = 0.90,
53		#' method = "strat_wilson"
54		#' )
55		#'
56		#' @export
57		s_proportion <- function(df,
58		.var,
59		conf_level = 0.95,
60		method = c(
61		"waldcc", "wald", "clopper-pearson",
62		"wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
63		"agresti-coull", "jeffreys"
64		),
65		weights = NULL,
66		max_iterations = 50,
67		variables = list(strata = NULL),
68		long = FALSE) {
69	125x	method <- match.arg(method)
70	125x	checkmate::assert_flag(long)
71	125x	assert_proportion_value(conf_level)
72
73	125x	if (!is.null(variables$strata)) {
74		# Checks for strata
75	!	if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.")
76	!	strata_colnames <- variables$strata
77	!	checkmate::assert_character(strata_colnames, null.ok = FALSE)
78	!	strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
79	!	assert_df_with_variables(df, strata_vars)
80
81	!	strata <- interaction(df[strata_colnames])
82	!	strata <- as.factor(strata)
83
84		# Pushing down checks to prop_strat_wilson
85	125x	} else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) {
86	!	stop("To use stratified methods you need to specify the strata variables.")
87		}
88	125x	if (checkmate::test_atomic_vector(df)) {
89	125x	rsp <- as.logical(df)
90		} else {
91	!	rsp <- as.logical(df[[.var]])
92		}
93	125x	n <- sum(rsp)
94	125x	p_hat <- mean(rsp)
95
96	125x	prop_ci <- switch(method,
97	125x	"clopper-pearson" = prop_clopper_pearson(rsp, conf_level),
98	125x	"wilson" = prop_wilson(rsp, conf_level),
99	125x	"wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE),
100	125x	"strat_wilson" = prop_strat_wilson(rsp,
101	125x	strata,
102	125x	weights,
103	125x	conf_level,
104	125x	max_iterations,
105	125x	correct = FALSE
106	125x	)$conf_int,
107	125x	"strat_wilsonc" = prop_strat_wilson(rsp,
108	125x	strata,
109	125x	weights,
110	125x	conf_level,
111	125x	max_iterations,
112	125x	correct = TRUE
113	125x	)$conf_int,
114	125x	"wald" = prop_wald(rsp, conf_level),
115	125x	"waldcc" = prop_wald(rsp, conf_level, correct = TRUE),
116	125x	"agresti-coull" = prop_agresti_coull(rsp, conf_level),
117	125x	"jeffreys" = prop_jeffreys(rsp, conf_level)
118		)
119
120	125x	list(
121	125x	"n_prop" = formatters::with_label(c(n, p_hat), "Responders"),
122	125x	"prop_ci" = formatters::with_label(
123	125x	x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)
124		)
125		)
126		}
127
128		#' @describeIn estimate_proportions Formatted analysis function which is used as `afun`
129		#' in `estimate_proportion()`.
130		#'
131		#' @return
132		#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
133		#'
134		#' @export
135		a_proportion <- make_afun(
136		s_proportion,
137		.formats = c(n_prop = "xx (xx.x%)", prop_ci = "(xx.x, xx.x)")
138		)
139
140		#' @describeIn estimate_proportions Layout-creating function which can take statistics function arguments
141		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
142		#'
143		#' @param ... other arguments are ultimately conveyed to [s_proportion()].
144		#'
145		#' @return
146		#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions,
147		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
148		#' the statistics from `s_proportion()` to the table layout.
149		#'
150		#' @examples
151		#' dta_test <- data.frame(
152		#' USUBJID = paste0("S", 1:12),
153		#' ARM = rep(LETTERS[1:3], each = 4),
154		#' AVAL = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
155		#' )
156		#'
157		#' basic_table() %>%
158		#' split_cols_by("ARM") %>%
159		#' estimate_proportion(vars = "AVAL") %>%
160		#' build_table(df = dta_test)
161		#'
162		#' @export
163		estimate_proportion <- function(lyt,
164		vars,
165		...,
166		show_labels = "hidden",
167		table_names = vars,
168		.stats = NULL,
169		.formats = NULL,
170		.labels = NULL,
171		.indent_mods = NULL) {
172	3x	afun <- make_afun(
173	3x	a_proportion,
174	3x	.stats = .stats,
175	3x	.formats = .formats,
176	3x	.labels = .labels,
177	3x	.indent_mods = .indent_mods
178		)
179	3x	analyze(
180	3x	lyt,
181	3x	vars,
182	3x	afun = afun,
183	3x	extra_args = list(...),
184	3x	show_labels = show_labels,
185	3x	table_names = table_names
186		)
187		}
188
189		#' Helper Functions for Calculating Proportion Confidence Intervals
190		#'
191		#' @description `r lifecycle::badge("stable")`
192		#'
193		#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()].
194		#'
195		#' @inheritParams argument_convention
196		#' @inheritParams estimate_proportions
197		#'
198		#' @return Confidence interval of a proportion.
199		#'
200		#' @seealso [estimate_proportions], descriptive function [d_proportion()],
201		#' and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()].
202		#'
203		#' @name h_proportions
204		NULL
205
206		#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()].
207		#' Also referred to as Wilson score interval.
208		#'
209		#' @examples
210		#' rsp <- c(
211		#' TRUE, TRUE, TRUE, TRUE, TRUE,
212		#' FALSE, FALSE, FALSE, FALSE, FALSE
213		#' )
214		#' prop_wilson(rsp, conf_level = 0.9)
215		#'
216		#' @export
217		prop_wilson <- function(rsp, conf_level, correct = FALSE) {
218	5x	y <- stats::prop.test(
219	5x	sum(rsp),
220	5x	length(rsp),
221	5x	correct = correct,
222	5x	conf.level = conf_level
223		)
224
225	5x	as.numeric(y$conf.int)
226		}
227
228		#' @describeIn h_proportions Calculates the stratified Wilson confidence
229		#' interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern}
230		#'
231		#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
232		#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are
233		#' estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that
234		#' minimizes the weighted squared length of the confidence interval.
235		#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used
236		#' to find estimates of optimal weights.
237		#' @param correct (`flag`)\cr include the continuity correction. For further information, see for example
238		#' [stats::prop.test()].
239		#'
240		#' @references
241		#' \insertRef{Yan2010-jt}{tern}
242		#'
243		#' @examples
244		#' # Stratified Wilson confidence interval with unequal probabilities
245		#'
246		#' set.seed(1)
247		#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
248		#' strata_data <- data.frame(
249		#' "f1" = sample(c("a", "b"), 100, TRUE),
250		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
251		#' stringsAsFactors = TRUE
252		#' )
253		#' strata <- interaction(strata_data)
254		#' n_strata <- ncol(table(rsp, strata)) # Number of strata
255		#'
256		#' prop_strat_wilson(
257		#' rsp = rsp, strata = strata,
258		#' conf_level = 0.90
259		#' )
260		#'
261		#' # Not automatic setting of weights
262		#' prop_strat_wilson(
263		#' rsp = rsp, strata = strata,
264		#' weights = rep(1 / n_strata, n_strata),
265		#' conf_level = 0.90
266		#' )
267		#'
268		#' @export
269		prop_strat_wilson <- function(rsp,
270		strata,
271		weights = NULL,
272		conf_level = 0.95,
273		max_iterations = NULL,
274		correct = FALSE) {
275	20x	checkmate::assert_logical(rsp, any.missing = FALSE)
276	20x	checkmate::assert_factor(strata, len = length(rsp))
277	20x	assert_proportion_value(conf_level)
278
279	20x	tbl <- table(rsp, strata)
280	20x	n_strata <- length(unique(strata))
281
282		# Checking the weights and maximum number of iterations.
283	20x	do_iter <- FALSE
284	20x	if (is.null(weights)) {
285	6x	weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
286	6x	do_iter <- TRUE
287
288		# Iteration parameters
289	2x	if (is.null(max_iterations)) max_iterations <- 10
290	6x	checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1)
291		}
292	20x	checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = n_strata)
293	20x	sum_weights <- checkmate::assert_int(sum(weights))
294	!	if (as.integer(sum_weights + 0.5) != 1L) stop("Sum of weights must be 1L.")
295
296
297	20x	xs <- tbl["TRUE", ]
298	20x	ns <- colSums(tbl)
299	20x	use_stratum <- (ns > 0)
300	20x	ns <- ns[use_stratum]
301	20x	xs <- xs[use_stratum]
302	20x	ests <- xs / ns
303	20x	vars <- ests * (1 - ests) / ns
304
305	20x	strata_qnorm <- strata_normal_quantile(vars, weights, conf_level)
306
307		# Iterative setting of weights if they were not set externally
308	20x	weights_new <- if (do_iter) {
309	6x	update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights
310		} else {
311	14x	weights
312		}
313
314	20x	strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1
315
316	20x	ci_by_strata <- Map(
317	20x	function(x, n) {
318		# Classic Wilson's confidence interval
319	139x	suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int)
320		},
321	20x	x = xs,
322	20x	n = ns
323		)
324	20x	lower_by_strata <- sapply(ci_by_strata, "[", 1L)
325	20x	upper_by_strata <- sapply(ci_by_strata, "[", 2L)
326
327	20x	lower <- sum(weights_new * lower_by_strata)
328	20x	upper <- sum(weights_new * upper_by_strata)
329
330		# Return values
331	20x	if (do_iter) {
332	6x	list(
333	6x	conf_int = c(
334	6x	lower = lower,
335	6x	upper = upper
336		),
337	6x	weights = weights_new
338		)
339		} else {
340	14x	list(
341	14x	conf_int = c(
342	14x	lower = lower,
343	14x	upper = upper
344		)
345		)
346		}
347		}
348
349		#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
350		#' Also referred to as the `exact` method.
351		#'
352		#' @examples
353		#' prop_clopper_pearson(rsp, conf_level = .95)
354		#'
355		#' @export
356		prop_clopper_pearson <- function(rsp,
357		conf_level) {
358	1x	y <- stats::binom.test(
359	1x	x = sum(rsp),
360	1x	n = length(rsp),
361	1x	conf.level = conf_level
362		)
363	1x	as.numeric(y$conf.int)
364		}
365
366		#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition
367		#' for a single proportion confidence interval using the normal approximation.
368		#'
369		#' @param correct (`flag`)\cr apply continuity correction.
370		#'
371		#' @examples
372		#' prop_wald(rsp, conf_level = 0.95)
373		#' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
374		#'
375		#' @export
376		prop_wald <- function(rsp, conf_level, correct = FALSE) {
377	122x	n <- length(rsp)
378	122x	p_hat <- mean(rsp)
379	122x	z <- stats::qnorm((1 + conf_level) / 2)
380	122x	q_hat <- 1 - p_hat
381	122x	correct <- if (correct) 1 / (2 * n) else 0
382
383	122x	err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct
384	122x	l_ci <- max(0, p_hat - err)
385	122x	u_ci <- min(1, p_hat + err)
386
387	122x	c(l_ci, u_ci)
388		}
389
390		#' @describeIn h_proportions Calculates the `Agresti-Coull` interval (created by `Alan Agresti` and `Brent Coull`) by
391		#' (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI.
392		#'
393		#' @examples
394		#' prop_agresti_coull(rsp, conf_level = 0.95)
395		#'
396		#' @export
397		prop_agresti_coull <- function(rsp, conf_level) {
398	2x	n <- length(rsp)
399	2x	x_sum <- sum(rsp)
400	2x	z <- stats::qnorm((1 + conf_level) / 2)
401
402		# Add here both z^2 / 2 successes and failures.
403	2x	x_sum_tilde <- x_sum + z^2 / 2
404	2x	n_tilde <- n + z^2
405
406		# Then proceed as with the Wald interval.
407	2x	p_tilde <- x_sum_tilde / n_tilde
408	2x	q_tilde <- 1 - p_tilde
409	2x	err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
410	2x	l_ci <- max(0, p_tilde - err)
411	2x	u_ci <- min(1, p_tilde + err)
412
413	2x	c(l_ci, u_ci)
414		}
415
416		#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the
417		#' non-informative Jeffreys prior for a binomial proportion.
418		#'
419		#' @examples
420		#' prop_jeffreys(rsp, conf_level = 0.95)
421		#'
422		#' @export
423		prop_jeffreys <- function(rsp,
424		conf_level) {
425	4x	n <- length(rsp)
426	4x	x_sum <- sum(rsp)
427
428	4x	alpha <- 1 - conf_level
429	4x	l_ci <- ifelse(
430	4x	x_sum == 0,
431	4x	0,
432	4x	stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
433		)
434
435	4x	u_ci <- ifelse(
436	4x	x_sum == n,
437	4x	1,
438	4x	stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
439		)
440
441	4x	c(l_ci, u_ci)
442		}
443
444		#' Description of the Proportion Summary
445		#'
446		#' @description `r lifecycle::badge("stable")`
447		#'
448		#' This is a helper function that describes the analysis in [s_proportion()].
449		#'
450		#' @inheritParams s_proportion
451		#' @param long (`flag`)\cr whether a long or a short (default) description is required.
452		#'
453		#' @return String describing the analysis.
454		#'
455		#' @export
456		d_proportion <- function(conf_level,
457		method,
458		long = FALSE) {
459	137x	label <- paste0(conf_level * 100, "% CI")
460
461	!	if (long) label <- paste(label, "for Response Rates")
462
463	137x	method_part <- switch(method,
464	137x	"clopper-pearson" = "Clopper-Pearson",
465	137x	"waldcc" = "Wald, with correction",
466	137x	"wald" = "Wald, without correction",
467	137x	"wilson" = "Wilson, without correction",
468	137x	"strat_wilson" = "Stratified Wilson, without correction",
469	137x	"wilsonc" = "Wilson, with correction",
470	137x	"strat_wilsonc" = "Stratified Wilson, with correction",
471	137x	"agresti-coull" = "Agresti-Coull",
472	137x	"jeffreys" = "Jeffreys",
473	137x	stop(paste(method, "does not have a description"))
474		)
475
476	137x	paste0(label, " (", method_part, ")")
477		}
478
479		#' Helper Function for the Estimation of Stratified Quantiles
480		#'
481		#' @description `r lifecycle::badge("stable")`
482		#'
483		#' This function wraps the estimation of stratified percentiles when we assume
484		#' the approximation for large numbers. This is necessary only in the case
485		#' proportions for each strata are unequal.
486		#'
487		#' @inheritParams argument_convention
488		#' @inheritParams prop_strat_wilson
489		#'
490		#' @return Stratified quantile.
491		#'
492		#' @seealso [prop_strat_wilson()]
493		#'
494		#' @examples
495		#' strata_data <- table(data.frame(
496		#' "f1" = sample(c(TRUE, FALSE), 100, TRUE),
497		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
498		#' stringsAsFactors = TRUE
499		#' ))
500		#' ns <- colSums(strata_data)
501		#' ests <- strata_data["TRUE", ] / ns
502		#' vars <- ests * (1 - ests) / ns
503		#' weights <- rep(1 / length(ns), length(ns))
504		#' strata_normal_quantile(vars, weights, 0.95)
505		#'
506		#' @export
507		strata_normal_quantile <- function(vars, weights, conf_level) {
508	41x	summands <- weights^2 * vars
509		# Stratified quantile
510	41x	sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2)
511		}
512
513		#' Helper Function for the Estimation of Weights for `prop_strat_wilson`
514		#'
515		#' @description `r lifecycle::badge("stable")`
516		#'
517		#' This function wraps the iteration procedure that allows you to estimate
518		#' the weights for each proportional strata. This assumes to minimize the
519		#' weighted squared length of the confidence interval.
520		#'
521		#' @inheritParams prop_strat_wilson
522		#' @param vars (`numeric`)\cr normalized proportions for each strata.
523		#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles.
524		#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
525		#' be optimized in the future if we need to estimate better initial weights.
526		#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
527		#' @param max_iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked.
528		#' @param tol (`number`)\cr tolerance threshold for convergence.
529		#'
530		#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
531		#'
532		#' @seealso For references and details see [prop_strat_wilson()].
533		#'
534		#' @examples
535		#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
536		#' sq <- 0.674
537		#' ws <- rep(1 / length(vs), length(vs))
538		#' ns <- c(22, 18, 17, 17, 14, 12)
539		#'
540		#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
541		#'
542		#' @export
543		update_weights_strat_wilson <- function(vars,
544		strata_qnorm,
545		initial_weights,
546		n_per_strata,
547		max_iterations = 50,
548		conf_level = 0.95,
549		tol = 0.001) {
550	8x	it <- 0
551	8x	diff_v <- NULL
552
553	8x	while (it < max_iterations) {
554	19x	it <- it + 1
555	19x	weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
556	19x	weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
557	19x	weights_new <- weights_new_t / weights_new_b
558	19x	weights_new <- weights_new / sum(weights_new)
559	19x	strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level)
560	19x	diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
561	8x	if (diff_v[length(diff_v)] < tol) break
562	11x	initial_weights <- weights_new
563		}
564
565	8x	if (it == max_iterations) {
566	!	warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations)
567		}
568
569	8x	list(
570	8x	"n_it" = it,
571	8x	"weights" = weights_new,
572	8x	"diff_v" = diff_v
573		)
574		}

1		#' Add Titles, Footnotes, Page Number, and a Bounding Box to a Grid Grob
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This function is useful to label grid grobs (also `ggplot2`, and `lattice` plots)
6		#' with title, footnote, and page numbers.
7		#'
8		#' @inheritParams grid::grob
9		#' @param grob a grid grob object, optionally `NULL` if only a `grob` with the decoration should be shown.
10		#' @param titles vector of character strings. Vector elements are separated by a newline and strings are wrapped
11		#' according to the page width.
12		#' @param footnotes vector of character string. Same rules as for `titles`.
13		#' @param page string with page numeration, if `NULL` then no page number is displayed.
14		#' @param width_titles unit object
15		#' @param width_footnotes unit object
16		#' @param border boolean, whether a a border should be drawn around the plot or not.
17		#' @param margins unit object of length 4
18		#' @param padding unit object of length 4
19		#' @param outer_margins unit object of length 4
20		#' @param gp_titles a `gpar` object
21		#' @param gp_footnotes a `gpar` object
22		#'
23		#' @return A grid grob (`gTree`).
24		#'
25		#' @details The titles and footnotes will be ragged, i.e. each title will be wrapped individually.
26		#'
27		#' @examples
28		#' library(grid)
29		#'
30		#' titles <- c(
31		#' "Edgar Anderson's Iris Data",
32		#' paste(
33		#' "This famous (Fisher's or Anderson's) iris data set gives the measurements",
34		#' "in centimeters of the variables sepal length and width and petal length",
35		#' "and width, respectively, for 50 flowers from each of 3 species of iris."
36		#' )
37		#' )
38		#'
39		#' footnotes <- c(
40		#' "The species are Iris setosa, versicolor, and virginica.",
41		#' paste(
42		#' "iris is a data frame with 150 cases (rows) and 5 variables (columns) named",
43		#' "Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, and Species."
44		#' )
45		#' )
46		#'
47		#' ## empty plot
48		#' grid.newpage()
49		#'
50		#' grid.draw(
51		#' decorate_grob(
52		#' NULL,
53		#' titles = titles,
54		#' footnotes = footnotes,
55		#' page = "Page 4 of 10"
56		#' )
57		#' )
58		#'
59		#' # grid
60		#' p <- gTree(
61		#' children = gList(
62		#' rectGrob(),
63		#' xaxisGrob(),
64		#' yaxisGrob(),
65		#' textGrob("Sepal.Length", y = unit(-4, "lines")),
66		#' textGrob("Petal.Length", x = unit(-3.5, "lines"), rot = 90),
67		#' pointsGrob(iris$Sepal.Length, iris$Petal.Length, gp = gpar(col = iris$Species), pch = 16)
68		#' ),
69		#' vp = vpStack(plotViewport(), dataViewport(xData = iris$Sepal.Length, yData = iris$Petal.Length))
70		#' )
71		#' grid.newpage()
72		#' grid.draw(p)
73		#'
74		#' grid.newpage()
75		#' grid.draw(
76		#' decorate_grob(
77		#' grob = p,
78		#' titles = titles,
79		#' footnotes = footnotes,
80		#' page = "Page 6 of 129"
81		#' )
82		#' )
83		#'
84		#' ## with ggplot2
85		#' library(ggplot2)
86		#'
87		#' p_gg <- ggplot2::ggplot(iris, aes(Sepal.Length, Sepal.Width, col = Species)) +
88		#' ggplot2::geom_point()
89		#' p_gg
90		#' p <- ggplotGrob(p_gg)
91		#' grid.newpage()
92		#' grid.draw(
93		#' decorate_grob(
94		#' grob = p,
95		#' titles = titles,
96		#' footnotes = footnotes,
97		#' page = "Page 6 of 129"
98		#' )
99		#' )
100		#'
101		#' ## with lattice
102		#' library(lattice)
103		#'
104		#' xyplot(Sepal.Length ~ Petal.Length, data = iris, col = iris$Species)
105		#' p <- grid.grab()
106		#' grid.newpage()
107		#' grid.draw(
108		#' decorate_grob(
109		#' grob = p,
110		#' titles = titles,
111		#' footnotes = footnotes,
112		#' page = "Page 6 of 129"
113		#' )
114		#' )
115		#'
116		#' # with gridExtra - no borders
117		#' library(gridExtra)
118		#' grid.newpage()
119		#' grid.draw(
120		#' decorate_grob(
121		#' tableGrob(
122		#' head(mtcars)
123		#' ),
124		#' titles = "title",
125		#' footnotes = "footnote",
126		#' border = FALSE
127		#' )
128		#' )
129		#'
130		#' @export
131		decorate_grob <- function(grob,
132		titles,
133		footnotes,
134		page = "",
135		width_titles = grid::unit(1, "npc") - grid::stringWidth(page),
136		width_footnotes = grid::unit(1, "npc") - grid::stringWidth(page),
137		border = TRUE,
138		margins = grid::unit(c(1, 0, 1, 0), "lines"),
139		padding = grid::unit(rep(1, 4), "lines"),
140		outer_margins = grid::unit(c(2, 1.5, 3, 1.5), "cm"),
141		gp_titles = grid::gpar(),
142		gp_footnotes = grid::gpar(fontsize = 8),
143		name = NULL,
144		gp = grid::gpar(),
145		vp = NULL) {
146	8x	st_titles <- split_text_grob(
147	8x	titles,
148	8x	x = 0, y = 1,
149	8x	just = c("left", "top"),
150	8x	width = width_titles,
151	8x	vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 1),
152	8x	gp = gp_titles
153		)
154
155	8x	st_footnotes <- split_text_grob(
156	8x	footnotes,
157	8x	x = 0, y = 1,
158	8x	just = c("left", "top"),
159	8x	width = width_footnotes,
160	8x	vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
161	8x	gp = gp_footnotes
162		)
163
164	8x	grid::gTree(
165	8x	grob = grob,
166	8x	titles = titles,
167	8x	footnotes = footnotes,
168	8x	page = page,
169	8x	width_titles = width_titles,
170	8x	width_footnotes = width_footnotes,
171	8x	border = border,
172	8x	margins = margins,
173	8x	padding = padding,
174	8x	outer_margins = outer_margins,
175	8x	gp_titles = gp_titles,
176	8x	gp_footnotes = gp_footnotes,
177	8x	children = grid::gList(
178	8x	grid::gTree(
179	8x	children = grid::gList(
180	8x	st_titles,
181	8x	grid::gTree(
182	8x	children = grid::gList(
183	8x	if (border) grid::rectGrob(),
184	8x	grid::gTree(
185	8x	children = grid::gList(
186	8x	grob
187		),
188	8x	vp = grid::plotViewport(margins = padding)
189		)
190		),
191	8x	vp = grid::vpStack(
192	8x	grid::viewport(layout.pos.row = 2, layout.pos.col = 1),
193	8x	grid::plotViewport(margins = margins)
194		)
195		),
196	8x	st_footnotes,
197	8x	grid::textGrob(
198	8x	page,
199	8x	x = 1, y = 0,
200	8x	just = c("right", "bottom"),
201	8x	vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
202	8x	gp = gp_footnotes
203		)
204		),
205	8x	childrenvp = NULL,
206	8x	name = "titles_grob_footnotes",
207	8x	vp = grid::vpStack(
208	8x	grid::plotViewport(margins = outer_margins),
209	8x	grid::viewport(
210	8x	layout = grid::grid.layout(
211	8x	nrow = 3, ncol = 1,
212	8x	heights = grid::unit.c(
213	8x	grid::grobHeight(st_titles),
214	8x	grid::unit(1, "null"),
215	8x	grid::grobHeight(st_footnotes)
216		)
217		)
218		)
219		)
220		)
221		),
222	8x	name = name,
223	8x	gp = gp,
224	8x	vp = vp,
225	8x	cl = "decoratedGrob"
226		)
227		}
228
229		#' @importFrom grid validDetails
230		#' @noRd
231		validDetails.decoratedGrob <- function(x) {
232	!	checkmate::assert_character(x$titles)
233	!	checkmate::assert_character(x$footnotes)
234
235	!	if (!is.null(x$grob)) {
236	!	checkmate::assert_true(grid::is.grob(x$grob))
237		}
238	!	if (length(x$page) == 1) {
239	!	checkmate::assert_character(x$page)
240		}
241	!	if (!grid::is.unit(x$outer_margins)) {
242	!	checkmate::assert_vector(x$outer_margins, len = 4)
243		}
244	!	if (!grid::is.unit(x$margins)) {
245	!	checkmate::assert_vector(x$margins, len = 4)
246		}
247	!	if (!grid::is.unit(x$padding)) {
248	!	checkmate::assert_vector(x$padding, len = 4)
249		}
250
251	!	x
252		}
253
254		#' @importFrom grid widthDetails
255		#' @noRd
256		widthDetails.decoratedGrob <- function(x) {
257	!	grid::unit(1, "null")
258		}
259
260		#' @importFrom grid heightDetails
261		#' @noRd
262		heightDetails.decoratedGrob <- function(x) {
263	!	grid::unit(1, "null")
264		}
265
266		# Adapted from Paul Murell R Graphics 2nd Edition
267		# https://www.stat.auckland.ac.nz/~paul/RG2e/interactgrid-splittext.R
268		split_string <- function(text, width) {
269	17x	strings <- strsplit(text, " ")
270	17x	out_string <- NA
271	17x	for (string_i in seq_along(strings)) {
272	17x	newline_str <- strings[[string_i]]
273	6x	if (length(newline_str) == 0) newline_str <- ""
274	17x	if (is.na(out_string[string_i])) {
275	17x	out_string[string_i] <- newline_str[[1]][[1]]
276	17x	linewidth <- grid::stringWidth(out_string[string_i])
277		}
278	17x	gapwidth <- grid::stringWidth(" ")
279	17x	availwidth <- as.numeric(width)
280	17x	if (length(newline_str) > 1) {
281	5x	for (i in seq(2, length(newline_str))) {
282	27x	width_i <- grid::stringWidth(newline_str[i])
283	27x	if (grid::convertWidth(linewidth + gapwidth + width_i, grid::unitType(width), valueOnly = TRUE) < availwidth) {
284	25x	sep <- " "
285	25x	linewidth <- linewidth + gapwidth + width_i
286		} else {
287	2x	sep <- "\n"
288	2x	linewidth <- width_i
289		}
290	27x	out_string[string_i] <- paste(out_string[string_i], newline_str[i], sep = sep)
291		}
292		}
293		}
294	17x	paste(out_string, collapse = "\n")
295		}
296
297		#' Split Text According To Available Text Width
298		#'
299		#' Dynamically wrap text.
300		#'
301		#' @inheritParams grid::grid.text
302		#' @param text character string
303		#' @param width a unit object specifying max width of text
304		#'
305		#' @return A text grob.
306		#'
307		#' @details This code is taken from `R Graphics by Paul Murell, 2nd edition`
308		#'
309		#' @keywords internal
310		split_text_grob <- function(text,
311		x = grid::unit(0.5, "npc"),
312		y = grid::unit(0.5, "npc"),
313		width = grid::unit(1, "npc"),
314		just = "centre",
315		hjust = NULL,
316		vjust = NULL,
317		default.units = "npc", # nolint
318		name = NULL,
319		gp = grid::gpar(),
320		vp = NULL) {
321	16x	if (!grid::is.unit(x)) x <- grid::unit(x, default.units)
322	16x	if (!grid::is.unit(y)) y <- grid::unit(y, default.units)
323	!	if (!grid::is.unit(width)) width <- grid::unit(width, default.units)
324	!	if (grid::unitType(x) %in% c("sum", "min", "max")) x <- grid::convertUnit(x, default.units)
325	!	if (grid::unitType(y) %in% c("sum", "min", "max")) y <- grid::convertUnit(y, default.units)
326	16x	if (grid::unitType(width) %in% c("sum", "min", "max")) width <- grid::convertUnit(width, default.units)
327
328		## if it is a fixed unit then we do not need to recalculate when viewport resized
329	16x	if (!inherits(width, "unit.arithmetic") &&
330	16x	!is.null(attr(width, "unit")) &&
331	16x	attr(width, "unit") %in% c("cm", "inches", "mm", "points", "picas", "bigpts", "dida", "cicero", "scaledpts")) {
332	!	attr(text, "fixed_text") <- paste(vapply(text, split_string, character(1), width = width), collapse = "\n")
333		}
334
335	16x	grid::grid.text(
336	16x	label = split_string(text, width),
337	16x	x = x, y = y,
338	16x	just = just,
339	16x	hjust = hjust,
340	16x	vjust = vjust,
341	16x	rot = 0,
342	16x	check.overlap = FALSE,
343	16x	name = name,
344	16x	gp = gp,
345	16x	vp = vp,
346	16x	draw = FALSE
347		)
348		}
349
350		#' @importFrom grid validDetails
351		#' @noRd
352		validDetails.dynamicSplitText <- function(x) {
353	!	checkmate::assert_character(x$text)
354	!	checkmate::assert_true(grid::is.unit(x$width))
355	!	checkmate::assert_vector(x$width, len = 1)
356	!	x
357		}
358
359		#' @importFrom grid heightDetails
360		#' @noRd
361		heightDetails.dynamicSplitText <- function(x) {
362	!	txt <- if (!is.null(attr(x$text, "fixed_text"))) {
363	!	attr(x$text, "fixed_text")
364		} else {
365	!	paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
366		}
367	!	grid::stringHeight(txt)
368		}
369
370		#' @importFrom grid widthDetails
371		#' @noRd
372		widthDetails.dynamicSplitText <- function(x) {
373	!	x$width
374		}
375
376		#' @importFrom grid drawDetails
377		#' @noRd
378		drawDetails.dynamicSplitText <- function(x, recording) {
379	!	txt <- if (!is.null(attr(x$text, "fixed_text"))) {
380	!	attr(x$text, "fixed_text")
381		} else {
382	!	paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
383		}
384
385	!	x$width <- NULL
386	!	x$label <- txt
387	!	x$text <- NULL
388	!	class(x) <- c("text", class(x)[-1])
389
390	!	grid::grid.draw(x)
391		}
392
393		#' Update Page Number
394		#'
395		#' Automatically updates page number.
396		#'
397		#' @param npages number of pages in total
398		#' @param ... passed on to [decorate_grob()]
399		#'
400		#' @return Closure that increments the page number.
401		#'
402		#' @keywords internal
403		decorate_grob_factory <- function(npages, ...) {
404	2x	current_page <- 0
405	2x	function(grob) {
406	7x	current_page <<- current_page + 1
407	7x	if (current_page > npages) {
408	1x	stop(paste("current page is", current_page, "but max.", npages, "specified."))
409		}
410	6x	decorate_grob(grob = grob, page = paste("Page", current_page, "of", npages), ...)
411		}
412		}
413
414		#' Decorate Set of `grobs` and Add Page Numbering
415		#'
416		#' @description `r lifecycle::badge("stable")`
417		#'
418		#' Note that this uses the [decorate_grob_factory()] function.
419		#'
420		#' @param grobs a list of grid grobs
421		#' @param ... arguments passed on to [decorate_grob()].
422		#'
423		#' @return A decorated grob.
424		#'
425		#' @examples
426		#' library(ggplot2)
427		#' library(grid)
428		#' g <- with(data = iris, {
429		#' list(
430		#' ggplot2::ggplotGrob(
431		#' ggplot2::ggplot(mapping = aes(Sepal.Length, Sepal.Width, col = Species)) +
432		#' ggplot2::geom_point()
433		#' ),
434		#' ggplot2::ggplotGrob(
435		#' ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Length, col = Species)) +
436		#' ggplot2::geom_point()
437		#' ),
438		#' ggplot2::ggplotGrob(
439		#' ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Width, col = Species)) +
440		#' ggplot2::geom_point()
441		#' ),
442		#' ggplot2::ggplotGrob(
443		#' ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Length, col = Species)) +
444		#' ggplot2::geom_point()
445		#' ),
446		#' ggplot2::ggplotGrob(
447		#' ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Width, col = Species)) +
448		#' ggplot2::geom_point()
449		#' ),
450		#' ggplot2::ggplotGrob(
451		#' ggplot2::ggplot(mapping = aes(Petal.Length, Petal.Width, col = Species)) +
452		#' ggplot2::geom_point()
453		#' )
454		#' )
455		#' })
456		#' lg <- decorate_grob_set(grobs = g, titles = "Hello\nOne\nTwo\nThree", footnotes = "")
457		#'
458		#' draw_grob(lg[[1]])
459		#' draw_grob(lg[[2]])
460		#' draw_grob(lg[[6]])
461		#'
462		#' @export
463		decorate_grob_set <- function(grobs, ...) {
464	1x	n <- length(grobs)
465	1x	lgf <- decorate_grob_factory(npages = n, ...)
466	1x	lapply(grobs, lgf)
467		}

1		#' Helper Function to create a map dataframe that can be used in `trim_levels_to_map` split function.
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper Function to create a map dataframe from the input dataset, which can be used as an argument in the
6		#' `trim_levels_to_map` split function. Based on different method, the map is constructed differently.
7		#'
8		#' @inheritParams argument_convention
9		#' @param abnormal (named `list`)\cr identifying the abnormal range level(s) in `df`. Based on the levels of
10		#' abnormality of the input dataset, it can be something like `list(Low = "LOW LOW", High = "HIGH HIGH")` or
11		#' `abnormal = list(Low = "LOW", High = "HIGH"))`
12		#' @param method (`string`)\cr indicates how the returned map will be constructed. Can be `"default"` or `"range"`.
13		#'
14		#' @return A map `data.frame`.
15		#'
16		#' @note If method is `"default"`, the returned map will only have the abnormal directions that are observed in the
17		#' `df`, and records with all normal values will be excluded to avoid error in creating layout. If method is
18		#' `"range"`, the returned map will be based on the rule that at least one observation with low range > 0
19		#' for low direction and at least one observation with high range is not missing for high direction.
20		#'
21		#' @examples
22		#' adlb <- df_explicit_na(tern_ex_adlb)
23		#'
24		#' h_map_for_count_abnormal(
25		#' df = adlb,
26		#' variables = list(anl = "ANRIND", split_rows = c("LBCAT", "PARAM")),
27		#' abnormal = list(low = c("LOW"), high = c("HIGH")),
28		#' method = "default",
29		#' na_level = "<Missing>"
30		#' )
31		#'
32		#' df <- data.frame(
33		#' USUBJID = c(rep("1", 4), rep("2", 4), rep("3", 4)),
34		#' AVISIT = c(
35		#' rep("WEEK 1", 2),
36		#' rep("WEEK 2", 2),
37		#' rep("WEEK 1", 2),
38		#' rep("WEEK 2", 2),
39		#' rep("WEEK 1", 2),
40		#' rep("WEEK 2", 2)
41		#' ),
42		#' PARAM = rep(c("ALT", "CPR"), 6),
43		#' ANRIND = c(
44		#' "NORMAL", "NORMAL", "LOW",
45		#' "HIGH", "LOW", "LOW", "HIGH", "HIGH", rep("NORMAL", 4)
46		#' ),
47		#' ANRLO = rep(5, 12),
48		#' ANRHI = rep(20, 12)
49		#' )
50		#' df$ANRIND <- factor(df$ANRIND, levels = c("LOW", "HIGH", "NORMAL"))
51		#' h_map_for_count_abnormal(
52		#' df = df,
53		#' variables = list(
54		#' anl = "ANRIND",
55		#' split_rows = c("PARAM"),
56		#' range_low = "ANRLO",
57		#' range_high = "ANRHI"
58		#' ),
59		#' abnormal = list(low = c("LOW"), high = c("HIGH")),
60		#' method = "range",
61		#' na_level = "<Missing>"
62		#' )
63		#'
64		#' @export
65		h_map_for_count_abnormal <- function(df,
66		variables = list(
67		anl = "ANRIND",
68		split_rows = c("PARAM"),
69		range_low = "ANRLO",
70		range_high = "ANRHI"
71		),
72		abnormal = list(low = c("LOW", "LOW LOW"), high = c("HIGH", "HIGH HIGH")),
73		method = c("default", "range"),
74		na_level = "<Missing>") {
75	7x	method <- match.arg(method)
76	7x	checkmate::assert_subset(c("anl", "split_rows"), names(variables))
77	7x	checkmate::assert_false(anyNA(df[variables$split_rows]))
78	7x	assert_df_with_variables(df,
79	7x	variables = list(anl = variables$anl, split_rows = variables$split_rows),
80	7x	na_level = na_level
81		)
82	7x	assert_df_with_factors(df, list(val = variables$anl))
83	7x	assert_valid_factor(df[[variables$anl]], any.missing = FALSE)
84	7x	assert_list_of_variables(variables)
85	7x	checkmate::assert_list(abnormal, types = "character", len = 2)
86
87		# Drop usued levels from df as they are not supposed to be in the final map
88	7x	df <- droplevels(df)
89
90	7x	normal_value <- setdiff(levels(df[[variables$anl]]), unlist(abnormal))
91
92		# Based on the understanding of clinical data, there should only be one level of normal which is "NORMAL"
93	7x	checkmate::assert_vector(normal_value, len = 1)
94
95		# Default method will only have what is observed in the df, and records with all normal values will be excluded to
96		# avoid error in layout building.
97	7x	if (method == "default") {
98	3x	df_abnormal <- subset(df, df[[variables$anl]] %in% unlist(abnormal))
99	3x	map <- unique(df_abnormal[c(variables$split_rows, variables$anl)])
100	3x	map_normal <- unique(subset(map, select = variables$split_rows))
101	3x	map_normal[[variables$anl]] <- normal_value
102	3x	map <- rbind(map, map_normal)
103	4x	} else if (method == "range") {
104		# range method follows the rule that at least one observation with ANRLO > 0 for low
105		# direction and at least one observation with ANRHI is not missing for high direction.
106	4x	checkmate::assert_subset(c("range_low", "range_high"), names(variables))
107	4x	checkmate::assert_subset(c("LOW", "HIGH"), toupper(names(abnormal)))
108
109	4x	assert_df_with_variables(df,
110	4x	variables = list(
111	4x	range_low = variables$range_low,
112	4x	range_high = variables$range_high
113		)
114		)
115
116		# Define low direction of map
117	4x	df_low <- subset(df, df[[variables$range_low]] > 0)
118	4x	map_low <- unique(df_low[variables$split_rows])
119	4x	low_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "LOW"]))
120	4x	low_levels_df <- as.data.frame(low_levels)
121	4x	colnames(low_levels_df) <- variables$anl
122	4x	low_levels_df <- do.call("rbind", replicate(nrow(map_low), low_levels_df, simplify = FALSE))
123	4x	rownames(map_low) <- NULL # Just to avoid strange row index in case upstream functions changed
124	4x	map_low <- map_low[rep(seq_len(nrow(map_low)), each = length(low_levels)), , drop = FALSE]
125	4x	map_low <- cbind(map_low, low_levels_df)
126
127		# Define high direction of map
128	4x	df_high <- subset(df, df[[variables$range_high]] != na_level \| !is.na(df[[variables$range_high]]))
129	4x	map_high <- unique(df_high[variables$split_rows])
130	4x	high_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "HIGH"]))
131	4x	high_levels_df <- as.data.frame(high_levels)
132	4x	colnames(high_levels_df) <- variables$anl
133	4x	high_levels_df <- do.call("rbind", replicate(nrow(map_high), high_levels_df, simplify = FALSE))
134	4x	rownames(map_high) <- NULL
135	4x	map_high <- map_high[rep(seq_len(nrow(map_high)), each = length(high_levels)), , drop = FALSE]
136	4x	map_high <- cbind(map_high, high_levels_df)
137
138		# Define normal of map
139	4x	map_normal <- unique(rbind(map_low, map_high)[variables$split_rows])
140	4x	map_normal[variables$anl] <- normal_value
141
142	4x	map <- rbind(map_low, map_high, map_normal)
143		}
144
145		# map should be all characters
146	7x	map <- data.frame(lapply(map, as.character), stringsAsFactors = FALSE)
147
148		# sort the map final output by split_rows variables
149	7x	for (i in rev(seq_len(length(variables$split_rows)))) {
150	7x	map <- map[order(map[[i]]), ]
151		}
152	7x	map
153		}

1		#' Number of Patients
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Count the number of unique and non-unique patients in a column (variable).
6		#'
7		#' @inheritParams argument_convention
8		#' @param x (`character` or `factor`)\cr vector of patient IDs.
9		#' @param count_by (`character` or `factor`)\cr optional vector to be combined with `x` when counting
10		#' `nonunique` records.
11		#' @param unique_count_suffix (`logical`)\cr should `"(n)"` suffix be added to `unique_count` labels.
12		#' Defaults to `TRUE`.
13		#'
14		#' @name summarize_num_patients
15		NULL
16
17		#' @describeIn summarize_num_patients Statistics function which counts the number of
18		#' unique patients, the corresponding percentage taken with respect to the
19		#' total number of patients, and the number of non-unique patients.
20		#'
21		#' @return
22		#' * `s_num_patients()` returns a named `list` of 3 statistics:
23		#' * `unique`: Vector of counts and percentages.
24		#' * `nonunique`: Vector of counts.
25		#' * `unique_count`: Counts.
26		#'
27		#' @examples
28		#' # Use the statistics function to count number of unique and nonunique patients.
29		#' s_num_patients(x = as.character(c(1, 1, 1, 2, 4, NA)), labelstr = "", .N_col = 6L)
30		#' s_num_patients(
31		#' x = as.character(c(1, 1, 1, 2, 4, NA)),
32		#' labelstr = "",
33		#' .N_col = 6L,
34		#' count_by = as.character(c(1, 1, 2, 1, 1, 1))
35		#' )
36		#'
37		#' @export
38		s_num_patients <- function(x, labelstr, .N_col, count_by = NULL, unique_count_suffix = TRUE) { # nolint
39
40	84x	checkmate::assert_string(labelstr)
41	84x	checkmate::assert_count(.N_col)
42	84x	checkmate::assert_multi_class(x, classes = c("factor", "character"))
43	84x	checkmate::assert_flag(unique_count_suffix)
44
45	84x	count1 <- n_available(unique(x))
46	84x	count2 <- n_available(x)
47
48	84x	if (!is.null(count_by)) {
49	10x	checkmate::assert_vector(count_by, len = length(x))
50	10x	checkmate::assert_multi_class(count_by, classes = c("factor", "character"))
51	10x	count2 <- n_available(unique(interaction(x, count_by)))
52		}
53
54	84x	out <- list(
55	84x	unique = formatters::with_label(c(count1, ifelse(count1 == 0 && .N_col == 0, 0, count1 / .N_col)), labelstr),
56	84x	nonunique = formatters::with_label(count2, labelstr),
57	84x	unique_count = formatters::with_label(count1, ifelse(unique_count_suffix, paste(labelstr, "(n)"), labelstr))
58		)
59
60	84x	out
61		}
62
63		#' @describeIn summarize_num_patients Statistics function which counts the number of unique patients
64		#' in a column (variable), the corresponding percentage taken with respect to the total number of
65		#' patients, and the number of non-unique patients in the column.
66		#'
67		#' @param required (`character` or `NULL`)\cr optional name of a variable that is required to be non-missing.
68		#'
69		#' @return
70		#' * `s_num_patients_content()` returns the same values as `s_num_patients()`.
71		#'
72		#' @examples
73		#' # Count number of unique and non-unique patients.
74		#' df <- data.frame(
75		#' USUBJID = as.character(c(1, 2, 1, 4, NA)),
76		#' EVENT = as.character(c(10, 15, 10, 17, 8))
77		#' )
78		#' s_num_patients_content(df, .N_col = 5, .var = "USUBJID")
79		#'
80		#' df_by_event <- data.frame(
81		#' USUBJID = as.character(c(1, 2, 1, 4, NA)),
82		#' EVENT = as.character(c(10, 15, 10, 17, 8))
83		#' )
84		#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID")
85		#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID", count_by = "EVENT")
86		#'
87		#' @export
88		s_num_patients_content <- function(df,
89		labelstr = "",
90		.N_col, # nolint
91		.var,
92		required = NULL,
93		count_by = NULL,
94		unique_count_suffix = TRUE) {
95	36x	checkmate::assert_string(.var)
96	36x	checkmate::assert_data_frame(df)
97	36x	if (is.null(count_by)) {
98	33x	assert_df_with_variables(df, list(id = .var))
99		} else {
100	3x	assert_df_with_variables(df, list(id = .var, count_by = count_by))
101		}
102	36x	if (!is.null(required)) {
103	!	checkmate::assert_string(required)
104	!	assert_df_with_variables(df, list(required = required))
105	!	df <- df[!is.na(df[[required]]), , drop = FALSE]
106		}
107
108	36x	x <- df[[.var]]
109	36x	y <- switch(as.numeric(!is.null(count_by)) + 1,
110	36x	NULL,
111	36x	df[[count_by]]
112		)
113
114	36x	s_num_patients(
115	36x	x = x,
116	36x	labelstr = labelstr,
117	36x	.N_col = .N_col,
118	36x	count_by = y,
119	36x	unique_count_suffix = unique_count_suffix
120		)
121		}
122
123		c_num_patients <- make_afun(
124		s_num_patients_content,
125		.stats = c("unique", "nonunique", "unique_count"),
126		.formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx")
127		)
128
129		#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
130		#' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
131		#'
132		#' @return
133		#' * `summarize_num_patients()` returns a layout object suitable for passing to further layouting functions,
134		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
135		#' the statistics from `s_num_patients_content()` to the table layout.
136		#'
137		#' @export
138		summarize_num_patients <- function(lyt,
139		var,
140		.stats = NULL,
141		.formats = NULL,
142		.labels = c(
143		unique = "Number of patients with at least one event",
144		nonunique = "Number of events"
145		),
146		indent_mod = lifecycle::deprecated(),
147		.indent_mods = 0L,
148		...) {
149	8x	if (lifecycle::is_present(indent_mod)) {
150	!	lifecycle::deprecate_warn("0.8.2", "summarize_num_patients(indent_mod)", "summarize_num_patients(.indent_mods)")
151	!	.indent_mods <- indent_mod
152		}
153
154	4x	if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
155	1x	if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]
156
157	8x	cfun <- make_afun(
158	8x	c_num_patients,
159	8x	.stats = .stats,
160	8x	.formats = .formats,
161	8x	.labels = .labels
162		)
163
164	8x	summarize_row_groups(
165	8x	lyt = lyt,
166	8x	var = var,
167	8x	cfun = cfun,
168	8x	extra_args = list(...),
169	8x	indent_mod = .indent_mods
170		)
171		}
172
173		#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
174		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
175		#'
176		#' @return
177		#' * `analyze_num_patients()` returns a layout object suitable for passing to further layouting functions,
178		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
179		#' the statistics from `s_num_patients_content()` to the table layout.
180		#'
181		#' @details In general, functions that starts with `analyze*` are expected to
182		#' work like [rtables::analyze()], while functions that starts with `summarize*`
183		#' are based upon [rtables::summarize_row_groups()]. The latter provides a
184		#' value for each dividing split in the row and column space, but, being it
185		#' bound to the fundamental splits, it is repeated by design in every page
186		#' when pagination is involved.
187		#'
188		#' @note As opposed to [summarize_num_patients()], this function does not repeat the produced rows.
189		#'
190		#' @examples
191		#' df_tmp <- data.frame(
192		#' USUBJID = as.character(c(1, 2, 1, 4, NA, 6, 6, 8, 9)),
193		#' ARM = c("A", "A", "A", "A", "A", "B", "B", "B", "B"),
194		#' AGE = c(10, 15, 10, 17, 8, 11, 11, 19, 17)
195		#' )
196		#' tbl <- basic_table() %>%
197		#' split_cols_by("ARM") %>%
198		#' add_colcounts() %>%
199		#' analyze_num_patients("USUBJID", .stats = c("unique")) %>%
200		#' build_table(df_tmp)
201		#' tbl
202		#'
203		#' @export
204		analyze_num_patients <- function(lyt,
205		vars,
206		.stats = NULL,
207		.formats = NULL,
208		.labels = c(
209		unique = "Number of patients with at least one event",
210		nonunique = "Number of events"
211		),
212		show_labels = c("default", "visible", "hidden"),
213		indent_mod = lifecycle::deprecated(),
214		.indent_mods = 0L,
215		...) {
216	2x	if (lifecycle::is_present(indent_mod)) {
217	!	lifecycle::deprecate_warn("0.8.2", "analyze_num_patients(indent_mod)", "analyze_num_patients(.indent_mods)")
218	!	.indent_mods <- indent_mod
219		}
220
221	!	if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
222	!	if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]
223
224	2x	afun <- make_afun(
225	2x	c_num_patients,
226	2x	.stats = .stats,
227	2x	.formats = .formats,
228	2x	.labels = .labels
229		)
230
231	2x	analyze(
232	2x	afun = afun,
233	2x	lyt = lyt,
234	2x	vars = vars,
235	2x	extra_args = list(...),
236	2x	show_labels = show_labels,
237	2x	indent_mod = .indent_mods
238		)
239		}

1		#' Additional Assertions for `checkmate`
2		#'
3		#' Additional assertion functions which can be used together with the `checkmate` package.
4		#'
5		#' @inheritParams checkmate::assert_factor
6		#' @param x (`any`)\cr object to test.
7		#' @param df (`data.frame`)\cr data set to test.
8		#' @param variables (named `list` of `character`)\cr list of variables to test.
9		#' @param include_boundaries (`logical`)\cr whether to include boundaries when testing
10		#' for proportions.
11		#' @param na_level (`character`)\cr the string you have been using to represent NA or
12		#' missing data. For `NA` values please consider using directly [is.na()] or
13		#' similar approaches.
14		#' @param (`integer`)\cr minimum number of factor levels. Default is `1`.
15		#' @param ... a collection of objects to test.
16		#'
17		#' @return Nothing if assertion passes, otherwise prints the error message.
18		#'
19		#' @name assertions
20		NULL
21
22		check_list_of_variables <- function(x) {
23		# drop NULL elements in list
24	2156x	x <- Filter(Negate(is.null), x)
25
26	2156x	res <- checkmate::check_list(x,
27	2156x	names = "named",
28	2156x	min.len = 1,
29	2156x	any.missing = FALSE,
30	2156x	types = "character"
31		)
32		# no empty strings allowed
33	2156x	if (isTRUE(res)) {
34	2151x	res <- checkmate::check_character(unlist(x), min.chars = 1)
35		}
36	2156x	return(res)
37		}
38		#' @describeIn assertions Checks whether `x` is a valid list of variable names.
39		#' `NULL` elements of the list `x` are dropped with `Filter(Negate(is.null), x)`.
40		#'
41		#' @keywords internal
42		assert_list_of_variables <- checkmate::makeAssertionFunction(check_list_of_variables)
43
44		check_df_with_variables <- function(df, variables, na_level = NULL) {
45	1922x	checkmate::assert_data_frame(df)
46	1920x	assert_list_of_variables(variables)
47
48		# flag for equal variables and column names
49	1918x	err_flag <- all(unlist(variables) %in% colnames(df))
50	1918x	checkmate::assert_flag(err_flag)
51
52	1918x	if (isFALSE(err_flag)) {
53	5x	vars <- setdiff(unlist(variables), colnames(df))
54	5x	return(paste(
55	5x	deparse(substitute(df)),
56	5x	"does not contain all specified variables as column names. Missing from dataframe:",
57	5x	paste(vars, collapse = ", ")
58		))
59		}
60		# checking if na_level is present and in which column
61	1913x	if (!is.null(na_level)) {
62	9x	checkmate::assert_string(na_level)
63	9x	res <- unlist(lapply(as.list(df)[unlist(variables)], function(x) any(x == na_level)))
64	9x	if (any(res)) {
65	1x	return(paste0(
66	1x	deparse(substitute(df)), " contains explicit na_level (", na_level,
67	1x	") in the following columns: ", paste0(unlist(variables)[res],
68	1x	collapse = ", "
69		)
70		))
71		}
72		}
73	1912x	return(TRUE)
74		}
75		#' @describeIn assertions Check whether `df` is a data frame with the analysis `variables`.
76		#' Please notice how this produces an error when not all variables are present in the
77		#' data.frame while the opposite is not required.
78		#'
79		#' @keywords internal
80		assert_df_with_variables <- checkmate::makeAssertionFunction(check_df_with_variables)
81
82		check_valid_factor <- function(x,
83		min.levels = 1, # nolint
84		max.levels = NULL, # nolint
85		null.ok = TRUE, # nolint
86		any.missing = TRUE, # nolint
87		n.levels = NULL, # nolint
88		len = NULL) {
89		# checks on levels insertion
90	802x	checkmate::assert_int(min.levels, lower = 1)
91
92		# main factor check
93	802x	res <- checkmate::check_factor(x,
94	802x	min.levels = min.levels,
95	802x	null.ok = null.ok,
96	802x	max.levels = max.levels,
97	802x	any.missing = any.missing,
98	802x	n.levels = n.levels
99		)
100
101		# no empty strings allowed
102	802x	if (isTRUE(res)) {
103	788x	res <- checkmate::check_character(levels(x), min.chars = 1)
104		}
105
106	802x	return(res)
107		}
108		#' @describeIn assertions Check whether `x` is a valid factor (i.e. has levels and no empty
109		#' string levels). Note that `NULL` and `NA` elements are allowed.
110		#'
111		#' @keywords internal
112		assert_valid_factor <- checkmate::makeAssertionFunction(check_valid_factor)
113
114
115		check_df_with_factors <- function(df,
116		variables,
117		min.levels = 1, # nolint
118		max.levels = NULL, # nolint
119		any.missing = TRUE, # nolint
120		na_level = NULL) {
121	190x	res <- check_df_with_variables(df, variables, na_level)
122		# checking if all the columns specified by variables are valid factors
123	189x	if (isTRUE(res)) {
124		# searching the data.frame with selected columns (variables) as a list
125	187x	res <- lapply(
126	187x	X = as.list(df)[unlist(variables)],
127	187x	FUN = check_valid_factor,
128	187x	min.levels = min.levels,
129	187x	max.levels = max.levels,
130	187x	any.missing = any.missing
131		)
132	187x	res_lo <- unlist(vapply(res, Negate(isTRUE), logical(1)))
133	187x	if (any(res_lo)) {
134	6x	return(paste0(
135	6x	deparse(substitute(df)), " does not contain only factor variables among:",
136	6x	"\n* Column `", paste0(unlist(variables)[res_lo],
137	6x	"` of the data.frame -> ", res[res_lo],
138	6x	collapse = "\n* "
139		)
140		))
141		} else {
142	181x	res <- TRUE
143		}
144		}
145	183x	return(res)
146		}
147		#' @describeIn assertions Check whether `df` is a data frame where the analysis `variables`
148		#' are all factors. Note that the creation of `NA` by direct call of `factor()` will
149		#' trim `NA` levels out of the vector list itself.
150		#'
151		#' @keywords internal
152		assert_df_with_factors <- checkmate::makeAssertionFunction(check_df_with_factors)
153
154		#' @describeIn assertions Check whether `x` is a proportion: number between 0 and 1.
155		#'
156		#' @keywords internal
157		assert_proportion_value <- function(x, include_boundaries = FALSE) {
158	6225x	checkmate::assert_number(x, lower = 0, upper = 1)
159	6213x	checkmate::assert_flag(include_boundaries)
160	6213x	if (isFALSE(include_boundaries)) {
161	2495x	checkmate::assert_true(x > 0)
162	2493x	checkmate::assert_true(x < 1)
163		}
164		}

1		#' Control Function for `CoxPH` Model
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This is an auxiliary function for controlling arguments for `CoxPH` model, typically used internally to specify
6		#' details of `CoxPH` model for [s_coxph_pairwise()]. `conf_level` refers to Hazard Ratio estimation.
7		#'
8		#' @inheritParams argument_convention
9		#' @param pval_method (`string`)\cr p-value method for testing hazard ratio = 1.
10		#' Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
11		#' @param ties (`string`)\cr specifying the method for tie handling. Default is `"efron"`,
12		#' can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()].
13		#'
14		#' @return A list of components with the same names as the arguments
15		#'
16		#' @export
17		control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"),
18		ties = c("efron", "breslow", "exact"),
19		conf_level = 0.95) {
20	40x	pval_method <- match.arg(pval_method)
21	39x	ties <- match.arg(ties)
22	39x	assert_proportion_value(conf_level)
23
24	38x	list(pval_method = pval_method, ties = ties, conf_level = conf_level)
25		}
26
27		#' Control Function for `survfit` Model for Survival Time
28		#'
29		#' @description `r lifecycle::badge("stable")`
30		#'
31		#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
32		#' details of `survfit` model for [s_surv_time()]. `conf_level` refers to survival time estimation.
33		#'
34		#' @inheritParams argument_convention
35		#' @param conf_type (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
36		#' see more in [survival::survfit()]. Note option "none" is no longer supported.
37		#' @param quantiles (`numeric`)\cr of length two to specify the quantiles of survival time.
38		#'
39		#' @return A list of components with the same names as the arguments
40		#'
41		#' @export
42		control_surv_time <- function(conf_level = 0.95,
43		conf_type = c("plain", "log", "log-log"),
44		quantiles = c(0.25, 0.75)) {
45	154x	conf_type <- match.arg(conf_type)
46	153x	checkmate::assert_numeric(quantiles, lower = 0, upper = 1, len = 2, unique = TRUE, sorted = TRUE)
47	152x	nullo <- lapply(quantiles, assert_proportion_value)
48	152x	assert_proportion_value(conf_level)
49	151x	list(conf_level = conf_level, conf_type = conf_type, quantiles = quantiles)
50		}
51
52		#' Control Function for `survfit` Model for Patient's Survival Rate at time point
53		#'
54		#' @description `r lifecycle::badge("stable")`
55		#'
56		#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
57		#' details of `survfit` model for [s_surv_timepoint()]. `conf_level` refers to patient risk estimation at a time point.
58		#'
59		#' @inheritParams argument_convention
60		#' @inheritParams control_surv_time
61		#'
62		#' @return A list of components with the same names as the arguments
63		#'
64		#' @export
65		control_surv_timepoint <- function(conf_level = 0.95,
66		conf_type = c("plain", "log", "log-log")) {
67	28x	conf_type <- match.arg(conf_type)
68	27x	assert_proportion_value(conf_level)
69	26x	list(
70	26x	conf_level = conf_level,
71	26x	conf_type = conf_type
72		)
73		}

1		#' Helper Functions for Tabulating Survival Duration by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions that tabulate in a data frame statistics such as median survival
6		#' time and hazard ratio for population subgroups.
7		#'
8		#' @inheritParams argument_convention
9		#' @inheritParams survival_coxph_pairwise
10		#' @inheritParams survival_duration_subgroups
11		#' @param arm (`factor`)\cr the treatment group variable.
12		#'
13		#' @details Main functionality is to prepare data for use in a layout-creating function.
14		#'
15		#' @examples
16		#' library(dplyr)
17		#' library(forcats)
18		#'
19		#' adtte <- tern_ex_adtte
20		#'
21		#' # Save variable labels before data processing steps.
22		#' adtte_labels <- formatters::var_labels(adtte)
23		#'
24		#' adtte_f <- adtte %>%
25		#' filter(
26		#' PARAMCD == "OS",
27		#' ARM %in% c("B: Placebo", "A: Drug X"),
28		#' SEX %in% c("M", "F")
29		#' ) %>%
30		#' mutate(
31		#' # Reorder levels of ARM to display reference arm before treatment arm.
32		#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
33		#' SEX = droplevels(SEX),
34		#' is_event = CNSR == 0
35		#' )
36		#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
37		#' formatters::var_labels(adtte_f)[names(labels)] <- labels
38		#'
39		#' @name h_survival_duration_subgroups
40		NULL
41
42		#' @describeIn h_survival_duration_subgroups helper to prepare a data frame of median survival times by arm.
43		#'
44		#' @return
45		#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`.
46		#'
47		#' @examples
48		#' # Extract median survival time for one group.
49		#' h_survtime_df(
50		#' tte = adtte_f$AVAL,
51		#' is_event = adtte_f$is_event,
52		#' arm = adtte_f$ARM
53		#' )
54		#'
55		#' @export
56		h_survtime_df <- function(tte, is_event, arm) {
57	55x	checkmate::assert_numeric(tte)
58	54x	checkmate::assert_logical(is_event, len = length(tte))
59	54x	assert_valid_factor(arm, len = length(tte))
60
61	54x	df_tte <- data.frame(
62	54x	tte = tte,
63	54x	is_event = is_event,
64	54x	stringsAsFactors = FALSE
65		)
66
67		# Delete NAs
68	54x	non_missing_rows <- stats::complete.cases(df_tte)
69	54x	df_tte <- df_tte[non_missing_rows, ]
70	54x	arm <- arm[non_missing_rows]
71
72	54x	lst_tte <- split(df_tte, arm)
73	54x	lst_results <- Map(function(x, arm) {
74	108x	if (nrow(x) > 0) {
75	104x	s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event")
76	104x	median_est <- unname(as.numeric(s_surv$median))
77	104x	n_events <- sum(x$is_event)
78		} else {
79	4x	median_est <- NA
80	4x	n_events <- NA
81		}
82
83	108x	data.frame(
84	108x	arm = arm,
85	108x	n = nrow(x),
86	108x	n_events = n_events,
87	108x	median = median_est,
88	108x	stringsAsFactors = FALSE
89		)
90	54x	}, lst_tte, names(lst_tte))
91
92	54x	df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
93	54x	df$arm <- factor(df$arm, levels = levels(arm))
94	54x	df
95		}
96
97		#' @describeIn h_survival_duration_subgroups summarizes median survival times by arm and across subgroups
98		#' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
99		#' requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
100		#' groupings for `subgroups` variables.
101		#'
102		#' @return
103		#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`,
104		#' `var`, `var_label`, and `row_type`.
105		#'
106		#' @examples
107		#' # Extract median survival time for multiple groups.
108		#' h_survtime_subgroups_df(
109		#' variables = list(
110		#' tte = "AVAL",
111		#' is_event = "is_event",
112		#' arm = "ARM",
113		#' subgroups = c("SEX", "BMRKR2")
114		#' ),
115		#' data = adtte_f
116		#' )
117		#'
118		#' # Define groupings for BMRKR2 levels.
119		#' h_survtime_subgroups_df(
120		#' variables = list(
121		#' tte = "AVAL",
122		#' is_event = "is_event",
123		#' arm = "ARM",
124		#' subgroups = c("SEX", "BMRKR2")
125		#' ),
126		#' data = adtte_f,
127		#' groups_lists = list(
128		#' BMRKR2 = list(
129		#' "low" = "LOW",
130		#' "low/medium" = c("LOW", "MEDIUM"),
131		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
132		#' )
133		#' )
134		#' )
135		#'
136		#' @export
137		h_survtime_subgroups_df <- function(variables,
138		data,
139		groups_lists = list(),
140		label_all = "All Patients") {
141	11x	checkmate::assert_character(variables$tte)
142	11x	checkmate::assert_character(variables$is_event)
143	11x	checkmate::assert_character(variables$arm)
144	11x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
145
146	11x	assert_df_with_variables(data, variables)
147
148	11x	checkmate::assert_string(label_all)
149
150		# Add All Patients.
151	11x	result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]])
152	11x	result_all$subgroup <- label_all
153	11x	result_all$var <- "ALL"
154	11x	result_all$var_label <- label_all
155	11x	result_all$row_type <- "content"
156
157		# Add Subgroups.
158	11x	if (is.null(variables$subgroups)) {
159	3x	result_all
160		} else {
161	8x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
162	8x	l_result <- lapply(l_data, function(grp) {
163	40x	result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]])
164	40x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
165	40x	cbind(result, result_labels)
166		})
167	8x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
168	8x	result_subgroups$row_type <- "analysis"
169	8x	rbind(
170	8x	result_all,
171	8x	result_subgroups
172		)
173		}
174		}
175
176		#' @describeIn h_survival_duration_subgroups helper to prepare a data frame with estimates of
177		#' treatment hazard ratio.
178		#'
179		#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
180		#'
181		#' @return
182		#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`,
183		#' `conf_level`, `pval` and `pval_label`.
184		#'
185		#' @examples
186		#' # Extract hazard ratio for one group.
187		#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM)
188		#'
189		#' # Extract hazard ratio for one group with stratification factor.
190		#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1)
191		#'
192		#' @export
193		h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) {
194	58x	checkmate::assert_numeric(tte)
195	58x	checkmate::assert_logical(is_event, len = length(tte))
196	58x	assert_valid_factor(arm, n.levels = 2, len = length(tte))
197
198	58x	df_tte <- data.frame(tte = tte, is_event = is_event)
199	58x	strata_vars <- NULL
200
201	58x	if (!is.null(strata_data)) {
202	5x	if (is.data.frame(strata_data)) {
203	4x	strata_vars <- names(strata_data)
204	4x	checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte))
205	4x	assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars)))
206		} else {
207	1x	assert_valid_factor(strata_data, len = nrow(df_tte))
208	1x	strata_vars <- "strata_data"
209		}
210	5x	df_tte[strata_vars] <- strata_data
211		}
212
213	58x	l_df <- split(df_tte, arm)
214
215	58x	if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
216		# Hazard ratio and CI.
217	54x	result <- s_coxph_pairwise(
218	54x	df = l_df[[2]],
219	54x	.ref_group = l_df[[1]],
220	54x	.in_ref_col = FALSE,
221	54x	.var = "tte",
222	54x	is_event = "is_event",
223	54x	strat = strata_vars,
224	54x	control = control
225		)
226
227	54x	df <- data.frame(
228		# Dummy column needed downstream to create a nested header.
229	54x	arm = " ",
230	54x	n_tot = unname(as.numeric(result$n_tot)),
231	54x	n_tot_events = unname(as.numeric(result$n_tot_events)),
232	54x	hr = unname(as.numeric(result$hr)),
233	54x	lcl = unname(result$hr_ci[1]),
234	54x	ucl = unname(result$hr_ci[2]),
235	54x	conf_level = control[["conf_level"]],
236	54x	pval = as.numeric(result$pvalue),
237	54x	pval_label = obj_label(result$pvalue),
238	54x	stringsAsFactors = FALSE
239		)
240		} else if (
241	4x	(nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) \|\|
242	4x	(nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
243		) {
244	4x	df_tte_complete <- df_tte[stats::complete.cases(df_tte), ]
245	4x	df <- data.frame(
246		# Dummy column needed downstream to create a nested header.
247	4x	arm = " ",
248	4x	n_tot = nrow(df_tte_complete),
249	4x	n_tot_events = sum(df_tte_complete$is_event),
250	4x	hr = NA,
251	4x	lcl = NA,
252	4x	ucl = NA,
253	4x	conf_level = control[["conf_level"]],
254	4x	pval = NA,
255	4x	pval_label = NA,
256	4x	stringsAsFactors = FALSE
257		)
258		} else {
259	!	df <- data.frame(
260		# Dummy column needed downstream to create a nested header.
261	!	arm = " ",
262	!	n_tot = 0L,
263	!	n_tot_events = 0L,
264	!	hr = NA,
265	!	lcl = NA,
266	!	ucl = NA,
267	!	conf_level = control[["conf_level"]],
268	!	pval = NA,
269	!	pval_label = NA,
270	!	stringsAsFactors = FALSE
271		)
272		}
273
274	58x	df
275		}
276
277		#' @describeIn h_survival_duration_subgroups summarizes estimates of the treatment hazard ratio
278		#' across subgroups in a data frame. `variables` corresponds to the names of variables found in
279		#' `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and
280		#' optionally `subgroups` and `strat`. `groups_lists` optionally specifies
281		#' groupings for `subgroups` variables.
282		#'
283		#' @return
284		#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`,
285		#' `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
286		#'
287		#' @examples
288		#' # Extract hazard ratio for multiple groups.
289		#' h_coxph_subgroups_df(
290		#' variables = list(
291		#' tte = "AVAL",
292		#' is_event = "is_event",
293		#' arm = "ARM",
294		#' subgroups = c("SEX", "BMRKR2")
295		#' ),
296		#' data = adtte_f
297		#' )
298		#'
299		#' # Define groupings of BMRKR2 levels.
300		#' h_coxph_subgroups_df(
301		#' variables = list(
302		#' tte = "AVAL",
303		#' is_event = "is_event",
304		#' arm = "ARM",
305		#' subgroups = c("SEX", "BMRKR2")
306		#' ),
307		#' data = adtte_f,
308		#' groups_lists = list(
309		#' BMRKR2 = list(
310		#' "low" = "LOW",
311		#' "low/medium" = c("LOW", "MEDIUM"),
312		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
313		#' )
314		#' )
315		#' )
316		#'
317		#' # Extract hazard ratio for multiple groups with stratification factors.
318		#' h_coxph_subgroups_df(
319		#' variables = list(
320		#' tte = "AVAL",
321		#' is_event = "is_event",
322		#' arm = "ARM",
323		#' subgroups = c("SEX", "BMRKR2"),
324		#' strat = c("STRATA1", "STRATA2")
325		#' ),
326		#' data = adtte_f
327		#' )
328		#'
329		#' @export
330		h_coxph_subgroups_df <- function(variables,
331		data,
332		groups_lists = list(),
333		control = control_coxph(),
334		label_all = "All Patients") {
335	12x	checkmate::assert_character(variables$tte)
336	12x	checkmate::assert_character(variables$is_event)
337	12x	checkmate::assert_character(variables$arm)
338	12x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
339	12x	checkmate::assert_character(variables$strat, null.ok = TRUE)
340	12x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
341	12x	assert_df_with_variables(data, variables)
342	12x	checkmate::assert_string(label_all)
343
344		# Add All Patients.
345	12x	result_all <- h_coxph_df(
346	12x	tte = data[[variables$tte]],
347	12x	is_event = data[[variables$is_event]],
348	12x	arm = data[[variables$arm]],
349	12x	strata_data = if (is.null(variables$strat)) NULL else data[variables$strat],
350	12x	control = control
351		)
352	12x	result_all$subgroup <- label_all
353	12x	result_all$var <- "ALL"
354	12x	result_all$var_label <- label_all
355	12x	result_all$row_type <- "content"
356
357		# Add Subgroups.
358	12x	if (is.null(variables$subgroups)) {
359	3x	result_all
360		} else {
361	9x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
362
363	9x	l_result <- lapply(l_data, function(grp) {
364	42x	result <- h_coxph_df(
365	42x	tte = grp$df[[variables$tte]],
366	42x	is_event = grp$df[[variables$is_event]],
367	42x	arm = grp$df[[variables$arm]],
368	42x	strata_data = if (is.null(variables$strat)) NULL else grp$df[variables$strat],
369	42x	control = control
370		)
371	42x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
372	42x	cbind(result, result_labels)
373		})
374
375	9x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
376	9x	result_subgroups$row_type <- "analysis"
377
378	9x	rbind(
379	9x	result_all,
380	9x	result_subgroups
381		)
382		}
383		}
384
385		#' Split Dataframe by Subgroups
386		#'
387		#' @description `r lifecycle::badge("stable")`
388		#'
389		#' Split a dataframe into a non-nested list of subsets.
390		#'
391		#' @inheritParams survival_duration_subgroups
392		#' @param data (`data.frame`)\cr dataset to split.
393		#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets.
394		#' Unused levels not present in `data` are dropped. Note that the order in this vector
395		#' determines the order in the downstream table.
396		#'
397		#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`).
398		#'
399		#' @details Main functionality is to prepare data for use in forest plot layouts.
400		#'
401		#' @examples
402		#' df <- data.frame(
403		#' x = c(1:5),
404		#' y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
405		#' z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
406		#' )
407		#' formatters::var_labels(df) <- paste("label for", names(df))
408		#'
409		#' h_split_by_subgroups(
410		#' data = df,
411		#' subgroups = c("y", "z")
412		#' )
413		#'
414		#' h_split_by_subgroups(
415		#' data = df,
416		#' subgroups = c("y", "z"),
417		#' groups_lists = list(
418		#' y = list("AB" = c("A", "B"), "C" = "C")
419		#' )
420		#' )
421		#'
422		#' @export
423		h_split_by_subgroups <- function(data,
424		subgroups,
425		groups_lists = list()) {
426	46x	checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE)
427	46x	checkmate::assert_list(groups_lists, names = "named")
428	46x	checkmate::assert_subset(names(groups_lists), subgroups)
429	46x	assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups)))
430
431	46x	data_labels <- unname(formatters::var_labels(data))
432	46x	df_subgroups <- data[, subgroups, drop = FALSE]
433	46x	subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE)
434
435	46x	l_labels <- Map(function(grp_i, name_i) {
436	81x	existing_levels <- levels(droplevels(grp_i))
437	81x	grp_levels <- if (name_i %in% names(groups_lists)) {
438		# For this variable groupings are defined. We check which groups are contained in the data.
439	11x	group_list_i <- groups_lists[[name_i]]
440	11x	group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE)
441	11x	names(which(group_has_levels))
442		} else {
443	70x	existing_levels
444		}
445	81x	df_labels <- data.frame(
446	81x	subgroup = grp_levels,
447	81x	var = name_i,
448	81x	var_label = unname(subgroup_labels[name_i]),
449	81x	stringsAsFactors = FALSE # Rationale is that subgroups may not be unique.
450		)
451	46x	}, df_subgroups, names(df_subgroups))
452
453		# Create a dataframe with one row per subgroup.
454	46x	df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE))
455	46x	row_label <- paste0(df_labels$var, ".", df_labels$subgroup)
456	46x	row_split_var <- factor(row_label, levels = row_label)
457
458		# Create a list of data subsets.
459	46x	lapply(split(df_labels, row_split_var), function(row_i) {
460	205x	which_row <- if (row_i$var %in% names(groups_lists)) {
461	31x	data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]]
462		} else {
463	174x	data[[row_i$var]] == row_i$subgroup
464		}
465	205x	df <- data[which_row, ]
466	205x	rownames(df) <- NULL
467	205x	formatters::var_labels(df) <- data_labels
468
469	205x	list(
470	205x	df = df,
471	205x	df_labels = data.frame(row_i, row.names = NULL)
472		)
473		})
474		}

1		#' Occurrence Table Pruning
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Family of constructor and condition functions to flexibly prune occurrence tables.
6		#' The condition functions always return whether the row result is higher than the threshold.
7		#' Since they are of class [CombinationFunction()] they can be logically combined with other condition
8		#' functions.
9		#'
10		#' @note Since most table specifications are worded positively, we name our constructor and condition
11		#' functions positively, too. However, note that the result of [keep_rows()] says what
12		#' should be pruned, to conform with the [rtables::prune_table()] interface.
13		#'
14		#' @examples
15		#' \donttest{
16		#' tab <- basic_table() %>%
17		#' split_cols_by("ARM") %>%
18		#' split_rows_by("RACE") %>%
19		#' split_rows_by("STRATA1") %>%
20		#' summarize_row_groups() %>%
21		#' summarize_vars("COUNTRY", .stats = "count_fraction") %>%
22		#' build_table(DM)
23		#' }
24		#'
25		#' @name prune_occurrences
26		NULL
27
28		#' @describeIn prune_occurrences Constructor for creating pruning functions based on
29		#' a row condition function. This removes all analysis rows (`TableRow`) that should be
30		#' pruned, i.e., don't fulfill the row condition. It removes the sub-tree if there are no
31		#' children left.
32		#'
33		#' @param row_condition (`CombinationFunction`)\cr condition function which works on individual
34		#' analysis rows and flags whether these should be kept in the pruned table.
35		#'
36		#' @return
37		#' * `keep_rows()` returns a pruning function that can be used with [rtables::prune_table()]
38		#' to prune an `rtables` table.
39		#'
40		#' @examples
41		#' \donttest{
42		#' # `keep_rows`
43		#' is_non_empty <- !CombinationFunction(all_zero_or_na)
44		#' prune_table(tab, keep_rows(is_non_empty))
45		#' }
46		#'
47		#' @export
48		keep_rows <- function(row_condition) {
49	6x	checkmate::assert_function(row_condition)
50	6x	function(table_tree) {
51	2256x	if (inherits(table_tree, "TableRow")) {
52	1872x	return(!row_condition(table_tree))
53		}
54	384x	children <- tree_children(table_tree)
55	384x	identical(length(children), 0L)
56		}
57		}
58
59		#' @describeIn prune_occurrences Constructor for creating pruning functions based on
60		#' a condition for the (first) content row in leaf tables. This removes all leaf tables where
61		#' the first content row does not fulfill the condition. It does not check individual rows.
62		#' It then proceeds recursively by removing the sub tree if there are no children left.
63		#'
64		#' @param content_row_condition (`CombinationFunction`)\cr condition function which works on individual
65		#' first content rows of leaf tables and flags whether these leaf tables should be kept in the pruned table.
66		#'
67		#' @return
68		#' * `keep_content_rows()` returns a pruning function that checks the condition on the first content
69		#' row of leaf tables in the table.
70		#'
71		#' @examples
72		#' # `keep_content_rows`
73		#' \donttest{
74		#' more_than_twenty <- has_count_in_cols(atleast = 20L, col_names = names(tab))
75		#' prune_table(tab, keep_content_rows(more_than_twenty))
76		#' }
77		#'
78		#' @export
79		keep_content_rows <- function(content_row_condition) {
80	1x	checkmate::assert_function(content_row_condition)
81	1x	function(table_tree) {
82	166x	if (is_leaf_table(table_tree)) {
83	24x	content_row <- h_content_first_row(table_tree)
84	24x	return(!content_row_condition(content_row))
85		}
86	142x	if (inherits(table_tree, "DataRow")) {
87	120x	return(FALSE)
88		}
89	22x	children <- tree_children(table_tree)
90	22x	identical(length(children), 0L)
91		}
92		}
93
94		#' @describeIn prune_occurrences Constructor for creating condition functions on total counts in the specified columns.
95		#'
96		#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
97		#' @param ... arguments for row or column access, see [`rtables_access`]: either `col_names` (`character`) including
98		#' the names of the columns which should be used, or alternatively `col_indices` (`integer`) giving the indices
99		#' directly instead.
100		#'
101		#' @return
102		#' * `has_count_in_cols()` returns a condition function that sums the counts in the specified column.
103		#'
104		#' @examples
105		#' \donttest{
106		#' more_than_one <- has_count_in_cols(atleast = 1L, col_names = names(tab))
107		#' prune_table(tab, keep_rows(more_than_one))
108		#' }
109		#'
110		#' @export
111		has_count_in_cols <- function(atleast, ...) {
112	3x	checkmate::assert_count(atleast)
113	3x	CombinationFunction(function(table_row) {
114	334x	row_counts <- h_row_counts(table_row, ...)
115	334x	total_count <- sum(row_counts)
116	334x	total_count >= atleast
117		})
118		}
119
120		#' @describeIn prune_occurrences Constructor for creating condition functions on any of the counts in
121		#' the specified columns satisfying a threshold.
122		#'
123		#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
124		#'
125		#' @return
126		#' * `has_count_in_any_col()` returns a condition function that compares the counts in the
127		#' specified columns with the threshold.
128		#'
129		#' @examples
130		#' \donttest{
131		#' # `has_count_in_any_col`
132		#' any_more_than_one <- has_count_in_any_col(atleast = 1L, col_names = names(tab))
133		#' prune_table(tab, keep_rows(any_more_than_one))
134		#' }
135		#'
136		#' @export
137		has_count_in_any_col <- function(atleast, ...) {
138	!	checkmate::assert_count(atleast)
139	!	CombinationFunction(function(table_row) {
140	!	row_counts <- h_row_counts(table_row, ...)
141	!	any(row_counts >= atleast)
142		})
143		}
144
145		#' @describeIn prune_occurrences Constructor for creating condition functions on total fraction in
146		#' the specified columns.
147		#'
148		#' @return
149		#' * `has_fraction_in_cols()` returns a condition function that sums the counts in the
150		#' specified column, and computes the fraction by dividing by the total column counts.
151		#'
152		#' @examples
153		#' \donttest{
154		#' # `has_fraction_in_cols`
155		#' more_than_five_percent <- has_fraction_in_cols(atleast = 0.05, col_names = names(tab))
156		#' prune_table(tab, keep_rows(more_than_five_percent))
157		#' }
158		#'
159		#' @export
160		has_fraction_in_cols <- function(atleast, ...) {
161	1x	assert_proportion_value(atleast, include_boundaries = TRUE)
162	1x	CombinationFunction(function(table_row) {
163	303x	row_counts <- h_row_counts(table_row, ...)
164	303x	total_count <- sum(row_counts)
165	303x	col_counts <- h_col_counts(table_row, ...)
166	303x	total_n <- sum(col_counts)
167	303x	total_percent <- total_count / total_n
168	303x	total_percent >= atleast
169		})
170		}
171
172		#' @describeIn prune_occurrences Constructor for creating condition functions on any fraction in
173		#' the specified columns.
174		#'
175		#' @return
176		#' * `has_fraction_in_any_col()` returns a condition function that looks at the fractions
177		#' in the specified columns and checks whether any of them fulfill the threshold.
178		#'
179		#' @examples
180		#' \donttest{
181		#' # `has_fraction_in_any_col`
182		#' any_atleast_five_percent <- has_fraction_in_any_col(atleast = 0.05, col_names = names(tab))
183		#' prune_table(tab, keep_rows(more_than_five_percent))
184		#' }
185		#'
186		#' @export
187		has_fraction_in_any_col <- function(atleast, ...) {
188	!	assert_proportion_value(atleast, include_boundaries = TRUE)
189	!	CombinationFunction(function(table_row) {
190	!	row_fractions <- h_row_fractions(table_row, ...)
191	!	any(row_fractions >= atleast)
192		})
193		}
194
195		#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
196		#' between the fractions reported in each specified column.
197		#'
198		#' @return
199		#' * `has_fractions_difference()` returns a condition function that extracts the fractions of each
200		#' specified column, and computes the difference of the minimum and maximum.
201		#'
202		#' @examples
203		#' \donttest{
204		#' # `has_fractions_difference`
205		#' more_than_five_percent_diff <- has_fractions_difference(atleast = 0.05, col_names = names(tab))
206		#' prune_table(tab, keep_rows(more_than_five_percent_diff))
207		#' }
208		#'
209		#' @export
210		has_fractions_difference <- function(atleast, ...) {
211	1x	assert_proportion_value(atleast, include_boundaries = TRUE)
212	1x	CombinationFunction(function(table_row) {
213	243x	fractions <- h_row_fractions(table_row, ...)
214	243x	difference <- diff(range(fractions))
215	243x	difference >= atleast
216		})
217		}
218
219		#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
220		#' between the counts reported in each specified column.
221		#'
222		#' @return
223		#' * `has_counts_difference()` returns a condition function that extracts the counts of each
224		#' specified column, and computes the difference of the minimum and maximum.
225		#'
226		#' @examples
227		#' \donttest{
228		#' more_than_one_diff <- has_counts_difference(atleast = 1L, col_names = names(tab))
229		#' prune_table(tab, keep_rows(more_than_one_diff))
230		#' }
231		#'
232		#' @export
233		has_counts_difference <- function(atleast, ...) {
234	1x	checkmate::assert_count(atleast)
235	1x	CombinationFunction(function(table_row) {
236	27x	counts <- h_row_counts(table_row, ...)
237	27x	difference <- diff(range(counts))
238	27x	difference >= atleast
239		})
240		}

1		#' Helper Functions for Cox Proportional Hazards Regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()].
6		#'
7		#' @inheritParams argument_convention
8		#' @inheritParams h_coxreg_univar_extract
9		#' @inheritParams cox_regression_inter
10		#' @inheritParams control_coxreg
11		#'
12		#' @seealso [cox_regression]
13		#'
14		#' @name h_cox_regression
15		NULL
16
17		#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used
18		#' internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models.
19		#'
20		#' @return
21		#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]).
22		#'
23		#' @examples
24		#' # `h_coxreg_univar_formulas`
25		#'
26		#' ## Simple formulas.
27		#' h_coxreg_univar_formulas(
28		#' variables = list(
29		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
30		#' )
31		#' )
32		#'
33		#' ## Addition of an optional strata.
34		#' h_coxreg_univar_formulas(
35		#' variables = list(
36		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
37		#' strata = "SITE"
38		#' )
39		#' )
40		#'
41		#' ## Inclusion of the interaction term.
42		#' h_coxreg_univar_formulas(
43		#' variables = list(
44		#' time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
45		#' strata = "SITE"
46		#' ),
47		#' interaction = TRUE
48		#' )
49		#'
50		#' ## Only covariates fitted in separate models.
51		#' h_coxreg_univar_formulas(
52		#' variables = list(
53		#' time = "time", event = "status", covariates = c("X", "y")
54		#' )
55		#' )
56		#'
57		#' @export
58		h_coxreg_univar_formulas <- function(variables,
59		interaction = FALSE) {
60	41x	checkmate::assert_list(variables, names = "named")
61	41x	has_arm <- "arm" %in% names(variables)
62	41x	arm_name <- if (has_arm) "arm" else NULL
63
64	41x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
65
66	41x	checkmate::assert_flag(interaction)
67
68	41x	if (!has_arm \|\| is.null(variables$covariates)) {
69	10x	checkmate::assert_false(interaction)
70		}
71
72	39x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
73
74	39x	if (!is.null(variables$covariates)) {
75	38x	forms <- paste0(
76	38x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
77	38x	ifelse(has_arm, variables$arm, "1"),
78	38x	ifelse(interaction, " * ", " + "),
79	38x	variables$covariates,
80	38x	ifelse(
81	38x	!is.null(variables$strata),
82	38x	paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"),
83		""
84		)
85		)
86		} else {
87	1x	forms <- NULL
88		}
89	39x	nams <- variables$covariates
90	39x	if (has_arm) {
91	32x	ref <- paste0(
92	32x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
93	32x	variables$arm,
94	32x	ifelse(
95	32x	!is.null(variables$strata),
96	32x	paste0(
97	32x	" + strata(", paste0(variables$strata, collapse = ", "), ")"
98		),
99		""
100		)
101		)
102	32x	forms <- c(ref, forms)
103	32x	nams <- c("ref", nams)
104		}
105	39x	stats::setNames(forms, nams)
106		}
107
108		#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas
109		#' string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox
110		#' regression models. Interactions will not be included in multivariate Cox regression model.
111		#'
112		#' @return
113		#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]).
114		#'
115		#' @examples
116		#' # `h_coxreg_multivar_formula`
117		#'
118		#' h_coxreg_multivar_formula(
119		#' variables = list(
120		#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
121		#' )
122		#' )
123		#'
124		#' # Addition of an optional strata.
125		#' h_coxreg_multivar_formula(
126		#' variables = list(
127		#' time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
128		#' strata = "SITE"
129		#' )
130		#' )
131		#'
132		#' # Example without treatment arm.
133		#' h_coxreg_multivar_formula(
134		#' variables = list(
135		#' time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
136		#' strata = "SITE"
137		#' )
138		#' )
139		#'
140		#' @export
141		h_coxreg_multivar_formula <- function(variables) {
142	57x	checkmate::assert_list(variables, names = "named")
143	57x	has_arm <- "arm" %in% names(variables)
144	57x	arm_name <- if (has_arm) "arm" else NULL
145
146	57x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
147
148	57x	assert_list_of_variables(variables[c(arm_name, "event", "time")])
149
150	57x	y <- paste0(
151	57x	"survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
152	57x	ifelse(has_arm, variables$arm, "1")
153		)
154	57x	if (length(variables$covariates) > 0) {
155	18x	y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ")
156		}
157	57x	if (!is.null(variables$strata)) {
158	5x	y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")")
159		}
160	57x	y
161		}
162
163		#' @describeIn h_cox_regression Utility function to help tabulate the result of
164		#' a univariate Cox regression model.
165		#'
166		#' @param effect (`string`)\cr the treatment variable.
167		#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
168		#'
169		#' @return
170		#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`,
171		#' `n`, `hr`, `lcl`, `ucl`, and `pval`.
172		#'
173		#' @examples
174		#' library(survival)
175		#'
176		#' dta_simple <- data.frame(
177		#' time = c(5, 5, 10, 10, 5, 5, 10, 10),
178		#' status = c(0, 0, 1, 0, 0, 1, 1, 1),
179		#' armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
180		#' var1 = c(45, 55, 65, 75, 55, 65, 85, 75),
181		#' var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
182		#' )
183		#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
184		#' result <- h_coxreg_univar_extract(
185		#' effect = "armcd", covar = "armcd", mod = mod, data = dta_simple
186		#' )
187		#' result
188		#'
189		#' @export
190		h_coxreg_univar_extract <- function(effect,
191		covar,
192		data,
193		mod,
194		control = control_coxreg()) {
195	47x	checkmate::assert_string(covar)
196	47x	checkmate::assert_string(effect)
197	47x	checkmate::assert_class(mod, "coxph")
198	47x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
199
200	47x	mod_aov <- muffled_car_anova(mod, test_statistic)
201	47x	msum <- summary(mod, conf.int = control$conf_level)
202	47x	sum_cox <- broom::tidy(msum)
203
204		# Combine results together.
205	47x	effect_aov <- mod_aov[effect, , drop = TRUE]
206	47x	pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]]
207	47x	sum_main <- sum_cox[grepl(effect, sum_cox$level), ]
208
209	47x	term_label <- if (effect == covar) {
210	25x	paste0(
211	25x	levels(data[[covar]])[2],
212	25x	" vs control (",
213	25x	levels(data[[covar]])[1],
214		")"
215		)
216		} else {
217	22x	unname(labels_or_names(data[covar]))
218		}
219	47x	data.frame(
220	47x	effect = ifelse(covar == effect, "Treatment:", "Covariate:"),
221	47x	term = covar,
222	47x	term_label = term_label,
223	47x	level = levels(data[[effect]])[2],
224	47x	n = mod[["n"]],
225	47x	hr = unname(sum_main["exp(coef)"]),
226	47x	lcl = unname(sum_main[grep("lower", names(sum_main))]),
227	47x	ucl = unname(sum_main[grep("upper", names(sum_main))]),
228	47x	pval = pval,
229	47x	stringsAsFactors = FALSE
230		)
231		}
232
233		#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help
234		#' tabulate the result of a multivariate Cox regression model for a treatment/covariate variable.
235		#'
236		#' @return
237		#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`,
238		#' `n`, `term`, and `term_label`.
239		#'
240		#' @examples
241		#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
242		#' result <- h_coxreg_multivar_extract(
243		#' var = "var1", mod = mod, data = dta_simple
244		#' )
245		#' result
246		#'
247		#' @export
248		h_coxreg_multivar_extract <- function(var,
249		data,
250		mod,
251		control = control_coxreg()) {
252	76x	test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
253	76x	mod_aov <- muffled_car_anova(mod, test_statistic)
254
255	76x	msum <- summary(mod, conf.int = control$conf_level)
256	76x	sum_anova <- broom::tidy(mod_aov)
257	76x	sum_cox <- broom::tidy(msum)
258
259	76x	ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")]
260	76x	names(ret_anova)[2] <- "pval"
261	76x	if (is.factor(data[[var]])) {
262	29x	ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
263		} else {
264	47x	ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
265		}
266	76x	names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl")
267	76x	varlab <- unname(labels_or_names(data[var]))
268	76x	ret_cox$term <- varlab
269
270	76x	if (is.numeric(data[[var]])) {
271	47x	ret <- ret_cox
272	47x	ret$term_label <- ret$term
273	29x	} else if (length(levels(data[[var]])) <= 2) {
274	18x	ret_anova$pval <- NA
275	18x	ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
276	18x	ret_cox$level <- gsub(var, "", ret_cox$level)
277	18x	ret_cox$term_label <- ret_cox$level
278	18x	ret <- dplyr::bind_rows(ret_anova, ret_cox)
279		} else {
280	11x	ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
281	11x	ret_cox$level <- gsub(var, "", ret_cox$level)
282	11x	ret_cox$term_label <- ret_cox$level
283	11x	ret <- dplyr::bind_rows(ret_anova, ret_cox)
284		}
285
286	76x	as.data.frame(ret)
287		}

1		#' Helper Functions for Subgroup Treatment Effect Pattern (STEP) Calculations
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions that are used internally for the STEP calculations.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @name h_step
10		#' @include control_step.R
11		NULL
12
13		#' @describeIn h_step creates the windows for STEP, based on the control settings
14		#' provided.
15		#'
16		#' @param x (`numeric`)\cr biomarker value(s) to use (without `NA`).
17		#' @param control (named `list`)\cr output from `control_step()`.
18		#'
19		#' @return
20		#' * `h_step_window()` returns a list containing the window-selection matrix `sel`
21		#' and the interval information matrix `interval`.
22		#'
23		#' @export
24		h_step_window <- function(x,
25		control = control_step()) {
26	12x	checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
27	12x	checkmate::assert_list(control, names = "named")
28
29	12x	sel <- matrix(FALSE, length(x), control$num_points)
30	12x	out <- matrix(0, control$num_points, 3)
31	12x	colnames(out) <- paste("Interval", c("Center", "Lower", "Upper"))
32	12x	if (control$use_percentile) {
33		# Create windows according to percentile cutoffs.
34	9x	out <- cbind(out, out)
35	9x	colnames(out)[1:3] <- paste("Percentile", c("Center", "Lower", "Upper"))
36	9x	xs <- seq(0, 1, length = control$num_points + 2)[-1]
37	9x	for (i in seq_len(control$num_points)) {
38	185x	out[i, 2:3] <- c(
39	185x	max(xs[i] - control$bandwidth, 0),
40	185x	min(xs[i] + control$bandwidth, 1)
41		)
42	185x	out[i, 5:6] <- stats::quantile(x, out[i, 2:3])
43	185x	sel[, i] <- x >= out[i, 5] & x <= out[i, 6]
44		}
45		# Center is the middle point of the percentile window.
46	9x	out[, 1] <- xs[-control$num_points - 1]
47	9x	out[, 4] <- stats::quantile(x, out[, 1])
48		} else {
49		# Create windows according to cutoffs.
50	3x	m <- c(min(x), max(x))
51	3x	xs <- seq(m[1], m[2], length = control$num_points + 2)[-1]
52	3x	for (i in seq_len(control$num_points)) {
53	11x	out[i, 2:3] <- c(
54	11x	max(xs[i] - control$bandwidth, m[1]),
55	11x	min(xs[i] + control$bandwidth, m[2])
56		)
57	11x	sel[, i] <- x >= out[i, 2] & x <= out[i, 3]
58		}
59		# Center is the same as the point for predicting.
60	3x	out[, 1] <- xs[-control$num_points - 1]
61		}
62	12x	list(sel = sel, interval = out)
63		}
64
65		#' @describeIn h_step calculates the estimated treatment effect estimate
66		#' on the linear predictor scale and corresponding standard error from a STEP `model` fitted
67		#' on `data` given `variables` specification, for a single biomarker value `x`.
68		#' This works for both `coxph` and `glm` models, i.e. for calculating log hazard ratio or log odds
69		#' ratio estimates.
70		#'
71		#' @param model the regression model object.
72		#'
73		#' @return
74		#' * `h_step_trt_effect()` returns a vector with elements `est` and `se`.
75		#'
76		#' @export
77		h_step_trt_effect <- function(data,
78		model,
79		variables,
80		x) {
81	208x	checkmate::assert_multi_class(model, c("coxph", "glm"))
82	208x	checkmate::assert_number(x)
83	208x	assert_df_with_variables(data, variables)
84	208x	checkmate::assert_factor(data[[variables$arm]], n.levels = 2)
85
86	208x	newdata <- data[c(1, 1), ]
87	208x	newdata[, variables$biomarker] <- x
88	208x	newdata[, variables$arm] <- levels(data[[variables$arm]])
89	208x	model_terms <- stats::delete.response(stats::terms(model))
90	208x	model_frame <- stats::model.frame(model_terms, data = newdata, xlev = model$xlevels)
91	208x	mat <- stats::model.matrix(model_terms, data = model_frame, contrasts.arg = model$contrasts)
92	208x	coefs <- stats::coef(model)
93		# Note: It is important to use the coef subset from matrix, otherwise intercept and
94		# strata are included for coxph() models.
95	208x	mat <- mat[, names(coefs)]
96	208x	mat_diff <- diff(mat)
97	208x	est <- mat_diff %*% coefs
98	208x	var <- mat_diff %% stats::vcov(model) %% t(mat_diff)
99	208x	se <- sqrt(var)
100	208x	c(
101	208x	est = est,
102	208x	se = se
103		)
104		}
105
106		#' @describeIn h_step builds the model formula used in survival STEP calculations.
107		#'
108		#' @return
109		#' * `h_step_survival_formula()` returns a model formula.
110		#'
111		#' @export
112		h_step_survival_formula <- function(variables,
113		control = control_step()) {
114	10x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
115
116	10x	assert_list_of_variables(variables[c("arm", "biomarker", "event", "time")])
117	10x	form <- paste0("Surv(", variables$time, ", ", variables$event, ") ~ ", variables$arm)
118	10x	if (control$degree > 0) {
119	5x	form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
120		}
121	10x	if (!is.null(variables$covariates)) {
122	6x	form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
123		}
124	10x	if (!is.null(variables$strata)) {
125	2x	form <- paste0(form, " + strata(", paste0(variables$strata, collapse = ", "), ")")
126		}
127	10x	stats::as.formula(form)
128		}
129
130		#' @describeIn h_step estimates the model with `formula` built based on
131		#' `variables` in `data` for a given `subset` and `control` parameters for the
132		#' Cox regression.
133		#'
134		#' @param formula (`formula`)\cr the regression model formula.
135		#' @param subset (`logical`)\cr subset vector.
136		#'
137		#' @return
138		#' * `h_step_survival_est()` returns a matrix of number of observations `n`,
139		#' `events`, log hazard ratio estimates `loghr`, standard error `se`,
140		#' and Wald confidence interval bounds `ci_lower` and `ci_upper`. One row is
141		#' included for each biomarker value in `x`.
142		#'
143		#' @export
144		h_step_survival_est <- function(formula,
145		data,
146		variables,
147		x,
148		subset = rep(TRUE, nrow(data)),
149		control = control_coxph()) {
150	55x	checkmate::assert_formula(formula)
151	55x	assert_df_with_variables(data, variables)
152	55x	checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
153	55x	checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
154	55x	checkmate::assert_list(control, names = "named")
155
156		# Note: `subset` in `coxph` needs to be an expression referring to `data` variables.
157	55x	data$.subset <- subset
158	55x	coxph_warnings <- NULL
159	55x	tryCatch(
160	55x	withCallingHandlers(
161	55x	expr = {
162	55x	fit <- survival::coxph(
163	55x	formula = formula,
164	55x	data = data,
165	55x	subset = .subset,
166	55x	ties = control$ties
167		)
168		},
169	55x	warning = function(w) {
170	1x	coxph_warnings <<- c(coxph_warnings, w)
171	1x	invokeRestart("muffleWarning")
172		}
173		),
174	55x	finally = {
175		}
176		)
177	55x	if (!is.null(coxph_warnings)) {
178	1x	warning(paste(
179	1x	"Fit warnings occurred, please consider using a simpler model, or",
180	1x	"larger `bandwidth`, less `num_points` in `control_step()` settings"
181		))
182		}
183		# Produce a matrix with one row per `x` and columns `est` and `se`.
184	55x	estimates <- t(vapply(
185	55x	X = x,
186	55x	FUN = h_step_trt_effect,
187	55x	FUN.VALUE = c(1, 2),
188	55x	data = data,
189	55x	model = fit,
190	55x	variables = variables
191		))
192	55x	q_norm <- stats::qnorm((1 + control$conf_level) / 2)
193	55x	cbind(
194	55x	n = fit$n,
195	55x	events = fit$nevent,
196	55x	loghr = estimates[, "est"],
197	55x	se = estimates[, "se"],
198	55x	ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
199	55x	ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
200		)
201		}
202
203		#' @describeIn h_step builds the model formula used in response STEP calculations.
204		#'
205		#' @return
206		#' * `h_step_rsp_formula()` returns a model formula.
207		#'
208		#' @export
209		h_step_rsp_formula <- function(variables,
210		control = c(control_step(), control_logistic())) {
211	14x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
212	14x	assert_list_of_variables(variables[c("arm", "biomarker", "response")])
213	14x	response_definition <- sub(
214	14x	pattern = "response",
215	14x	replacement = variables$response,
216	14x	x = control$response_definition,
217	14x	fixed = TRUE
218		)
219	14x	form <- paste0(response_definition, " ~ ", variables$arm)
220	14x	if (control$degree > 0) {
221	8x	form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
222		}
223	14x	if (!is.null(variables$covariates)) {
224	8x	form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
225		}
226	14x	if (!is.null(variables$strata)) {
227	5x	strata_arg <- if (length(variables$strata) > 1) {
228	2x	paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
229		} else {
230	3x	variables$strata
231		}
232	5x	form <- paste0(form, "+ strata(", strata_arg, ")")
233		}
234	14x	stats::as.formula(form)
235		}
236
237		#' @describeIn h_step estimates the model with `formula` built based on
238		#' `variables` in `data` for a given `subset` and `control` parameters for the
239		#' logistic regression.
240		#'
241		#' @param formula (`formula`)\cr the regression model formula.
242		#' @param subset (`logical`)\cr subset vector.
243		#'
244		#' @return
245		#' * `h_step_rsp_est()` returns a matrix of number of observations `n`, log odds
246		#' ratio estimates `logor`, standard error `se`, and Wald confidence interval bounds
247		#' `ci_lower` and `ci_upper`. One row is included for each biomarker value in `x`.
248		#'
249		#' @export
250		h_step_rsp_est <- function(formula,
251		data,
252		variables,
253		x,
254		subset = rep(TRUE, nrow(data)),
255		control = control_logistic()) {
256	58x	checkmate::assert_formula(formula)
257	58x	assert_df_with_variables(data, variables)
258	58x	checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
259	58x	checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
260	58x	checkmate::assert_list(control, names = "named")
261		# Note: `subset` in `glm` needs to be an expression referring to `data` variables.
262	58x	data$.subset <- subset
263	58x	fit_warnings <- NULL
264	58x	tryCatch(
265	58x	withCallingHandlers(
266	58x	expr = {
267	58x	fit <- if (is.null(variables$strata)) {
268	54x	stats::glm(
269	54x	formula = formula,
270	54x	data = data,
271	54x	subset = .subset,
272	54x	family = stats::binomial("logit")
273		)
274		} else {
275		# clogit needs coxph and strata imported
276	4x	survival::clogit(
277	4x	formula = formula,
278	4x	data = data,
279	4x	subset = .subset
280		)
281		}
282		},
283	58x	warning = function(w) {
284	19x	fit_warnings <<- c(fit_warnings, w)
285	19x	invokeRestart("muffleWarning")
286		}
287		),
288	58x	finally = {
289		}
290		)
291	58x	if (!is.null(fit_warnings)) {
292	13x	warning(paste(
293	13x	"Fit warnings occurred, please consider using a simpler model, or",
294	13x	"larger `bandwidth`, less `num_points` in `control_step()` settings"
295		))
296		}
297		# Produce a matrix with one row per `x` and columns `est` and `se`.
298	58x	estimates <- t(vapply(
299	58x	X = x,
300	58x	FUN = h_step_trt_effect,
301	58x	FUN.VALUE = c(1, 2),
302	58x	data = data,
303	58x	model = fit,
304	58x	variables = variables
305		))
306	58x	q_norm <- stats::qnorm((1 + control$conf_level) / 2)
307	58x	cbind(
308	58x	n = length(fit$y),
309	58x	logor = estimates[, "est"],
310	58x	se = estimates[, "se"],
311	58x	ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
312	58x	ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
313		)
314		}

1		#' Helper Functions for Tabulating Biomarker Effects on Binary Response by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions which are documented here separately to not confuse the user
6		#' when reading about the user-facing functions.
7		#'
8		#' @inheritParams response_biomarkers_subgroups
9		#' @inheritParams extract_rsp_biomarkers
10		#' @inheritParams argument_convention
11		#'
12		#' @examples
13		#' library(dplyr)
14		#' library(forcats)
15		#'
16		#' adrs <- tern_ex_adrs
17		#' adrs_labels <- formatters::var_labels(adrs)
18		#'
19		#' adrs_f <- adrs %>%
20		#' filter(PARAMCD == "BESRSPI") %>%
21		#' mutate(rsp = AVALC == "CR")
22		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
23		#'
24		#' @name h_response_biomarkers_subgroups
25		NULL
26
27		#' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list
28		#' to the "logistic regression" variable list. The reason is that currently there is an
29		#' inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`.
30		#'
31		#' @param biomarker (`string`)\cr the name of the biomarker variable.
32		#'
33		#' @return
34		#' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`.
35		#'
36		#' @examples
37		#' # This is how the variable list is converted internally.
38		#' h_rsp_to_logistic_variables(
39		#' variables = list(
40		#' rsp = "RSP",
41		#' covariates = c("A", "B"),
42		#' strat = "D"
43		#' ),
44		#' biomarker = "AGE"
45		#' )
46		#'
47		#' @export
48		h_rsp_to_logistic_variables <- function(variables, biomarker) {
49	37x	checkmate::assert_list(variables)
50	37x	checkmate::assert_string(variables$rsp)
51	37x	checkmate::assert_string(biomarker)
52	37x	list(
53	37x	response = variables$rsp,
54	37x	arm = biomarker,
55	37x	covariates = variables$covariates,
56	37x	strata = variables$strat
57		)
58		}
59
60		#' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and
61		#' overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple
62		#' biomarkers in a given single data set.
63		#' `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
64		#' `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates`
65		#' and `strat`.
66		#'
67		#' @return
68		#' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
69		#'
70		#' @examples
71		#' # For a single population, estimate separately the effects
72		#' # of two biomarkers.
73		#' df <- h_logistic_mult_cont_df(
74		#' variables = list(
75		#' rsp = "rsp",
76		#' biomarkers = c("BMRKR1", "AGE"),
77		#' covariates = "SEX"
78		#' ),
79		#' data = adrs_f
80		#' )
81		#' df
82		#'
83		#' # If the data set is empty, still the corresponding rows with missings are returned.
84		#' h_coxreg_mult_cont_df(
85		#' variables = list(
86		#' rsp = "rsp",
87		#' biomarkers = c("BMRKR1", "AGE"),
88		#' covariates = "SEX",
89		#' strat = "STRATA1"
90		#' ),
91		#' data = adrs_f[NULL, ]
92		#' )
93		#'
94		#' @export
95		h_logistic_mult_cont_df <- function(variables,
96		data,
97		control = control_logistic()) {
98	22x	assert_df_with_variables(data, variables)
99
100	22x	checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
101	22x	checkmate::assert_list(control, names = "named")
102
103	22x	conf_level <- control[["conf_level"]]
104	22x	pval_label <- "p-value (Wald)"
105
106		# If there is any data, run model, otherwise return empty results.
107	22x	if (nrow(data) > 0) {
108	21x	bm_cols <- match(variables$biomarkers, names(data))
109	21x	l_result <- lapply(variables$biomarkers, function(bm) {
110	36x	model_fit <- fit_logistic(
111	36x	variables = h_rsp_to_logistic_variables(variables, bm),
112	36x	data = data,
113	36x	response_definition = control$response_definition
114		)
115	36x	result <- h_logistic_simple_terms(
116	36x	x = bm,
117	36x	fit_glm = model_fit,
118	36x	conf_level = control$conf_level
119		)
120	36x	resp_vector <- if (inherits(model_fit, "glm")) {
121	26x	model_fit$model[[variables$rsp]]
122		} else {
123	10x	as.logical(as.matrix(model_fit$y)[, "status"])
124		}
125	36x	data.frame(
126		# Dummy column needed downstream to create a nested header.
127	36x	biomarker = bm,
128	36x	biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
129	36x	n_tot = length(resp_vector),
130	36x	n_rsp = sum(resp_vector),
131	36x	prop = mean(resp_vector),
132	36x	or = as.numeric(result[1L, "odds_ratio"]),
133	36x	lcl = as.numeric(result[1L, "lcl"]),
134	36x	ucl = as.numeric(result[1L, "ucl"]),
135	36x	conf_level = conf_level,
136	36x	pval = as.numeric(result[1L, "pvalue"]),
137	36x	pval_label = pval_label,
138	36x	stringsAsFactors = FALSE
139		)
140		})
141	21x	do.call(rbind, args = c(l_result, make.row.names = FALSE))
142		} else {
143	1x	data.frame(
144	1x	biomarker = variables$biomarkers,
145	1x	biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
146	1x	n_tot = 0L,
147	1x	n_rsp = 0L,
148	1x	prop = NA,
149	1x	or = NA,
150	1x	lcl = NA,
151	1x	ucl = NA,
152	1x	conf_level = conf_level,
153	1x	pval = NA,
154	1x	pval_label = pval_label,
155	1x	row.names = seq_along(variables$biomarkers),
156	1x	stringsAsFactors = FALSE
157		)
158		}
159		}
160
161		#' @describeIn h_response_biomarkers_subgroups prepares a single sub-table given a `df_sub` containing
162		#' the results for a single biomarker.
163		#'
164		#' @param df (`data.frame`)\cr results for a single biomarker, as part of what is
165		#' returned by [extract_rsp_biomarkers()] (it needs a couple of columns which are
166		#' added by that high-level function relative to what is returned by [h_logistic_mult_cont_df()],
167		#' see the example).
168		#'
169		#' @return
170		#' * `h_tab_rsp_one_biomarker()` returns an `rtables` table object with the given statistics arranged in columns.
171		#'
172		#' @examples
173		#' # Starting from above `df`, zoom in on one biomarker and add required columns.
174		#' df1 <- df[1, ]
175		#' df1$subgroup <- "All patients"
176		#' df1$row_type <- "content"
177		#' df1$var <- "ALL"
178		#' df1$var_label <- "All patients"
179		#'
180		#' h_tab_rsp_one_biomarker(
181		#' df1,
182		#' vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval")
183		#' )
184		#'
185		#' @export
186		h_tab_rsp_one_biomarker <- function(df,
187		vars,
188		.indent_mods = 0L) {
189	6x	afuns <- a_response_subgroups()[vars]
190	6x	colvars <- d_rsp_subgroups_colvars(
191	6x	vars,
192	6x	conf_level = df$conf_level[1],
193	6x	method = df$pval_label[1]
194		)
195	6x	h_tab_one_biomarker(
196	6x	df = df,
197	6x	afuns = afuns,
198	6x	colvars = colvars,
199	6x	.indent_mods = .indent_mods
200		)
201		}

1		#' Helper Function for Deriving Analysis Datasets for `LBT13` and `LBT14`
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper function that merges `ADSL` and `ADLB` datasets so that missing lab test records are inserted in the
6		#' output dataset.
7		#'
8		#' @param adsl (`data.frame`)\cr `ADSL` dataframe.
9		#' @param adlb (`data.frame`)\cr `ADLB` dataframe.
10		#' @param worst_flag (named `vector`)\cr Worst post-baseline lab flag variable.
11		#' @param by_visit (`logical`)\cr defaults to `FALSE` to generate worst grade per patient.
12		#' If worst grade per patient per visit is specified for `worst_flag`, then
13		#' `by_visit` should be `TRUE` to generate worst grade patient per visit.
14		#' @param no_fillin_visits (named `character`)\cr Visits that are not considered for post-baseline worst toxicity
15		#' grade. Defaults to `c("SCREENING", "BASELINE")`.
16		#'
17		#' @return `df` containing variables shared between `adlb` and `adsl` along with variables `PARAM`, `PARAMCD`,
18		#' `ATOXGR`, and `BTOXGR` relevant for analysis. Optionally, `AVISIT` are `AVISITN` are included when
19		#' `by_visit = TRUE` and `no_fillin_visits = c("SCREENING", "BASELINE")`.
20		#'
21		#' @details In the result data missing records will be created for the following situations:
22		#' * Patients who are present in `adsl` but have no lab data in `adlb` (both baseline and post-baseline).
23		#' * Patients who do not have any post-baseline lab values.
24		#' * Patients without any post-baseline values flagged as the worst.
25		#'
26		#' @examples
27		#' # `h_adsl_adlb_merge_using_worst_flag`
28		#' adlb_out <- h_adsl_adlb_merge_using_worst_flag(
29		#' tern_ex_adsl,
30		#' tern_ex_adlb,
31		#' worst_flag = c("WGRHIFL" = "Y")
32		#' )
33		#'
34		#' # `h_adsl_adlb_merge_using_worst_flag` by visit example
35		#' adlb_out_by_visit <- h_adsl_adlb_merge_using_worst_flag(
36		#' tern_ex_adsl,
37		#' tern_ex_adlb,
38		#' worst_flag = c("WGRLOVFL" = "Y"),
39		#' by_visit = TRUE
40		#' )
41		#'
42		#' @export
43		h_adsl_adlb_merge_using_worst_flag <- function(adsl, # nolint
44		adlb,
45		worst_flag = c("WGRHIFL" = "Y"),
46		by_visit = FALSE,
47		no_fillin_visits = c("SCREENING", "BASELINE")) {
48	5x	col_names <- names(worst_flag)
49	5x	filter_values <- worst_flag
50
51	5x	temp <- Map(
52	5x	function(x, y) which(adlb[[x]] == y),
53	5x	col_names,
54	5x	filter_values
55		)
56
57	5x	position_satisfy_filters <- Reduce(intersect, temp)
58
59	5x	adsl_adlb_common_columns <- intersect(colnames(adsl), colnames(adlb))
60	5x	columns_from_adlb <- c("USUBJID", "PARAM", "PARAMCD", "AVISIT", "AVISITN", "ATOXGR", "BTOXGR")
61
62	5x	adlb_f <- adlb[position_satisfy_filters, ] %>%
63	5x	dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits)
64	5x	adlb_f <- adlb_f[, columns_from_adlb]
65
66	5x	avisits_grid <- adlb %>%
67	5x	dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) %>%
68	5x	dplyr::pull(.data[["AVISIT"]]) %>%
69	5x	unique()
70
71	5x	if (by_visit) {
72	1x	adsl_lb <- expand.grid(
73	1x	USUBJID = unique(adsl$USUBJID),
74	1x	AVISIT = avisits_grid,
75	1x	PARAMCD = unique(adlb$PARAMCD)
76		)
77
78	1x	adsl_lb <- adsl_lb %>%
79	1x	dplyr::left_join(unique(adlb[c("AVISIT", "AVISITN")]), by = "AVISIT") %>%
80	1x	dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")
81
82	1x	adsl1 <- adsl[, adsl_adlb_common_columns]
83	1x	adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")
84
85	1x	by_variables_from_adlb <- c("USUBJID", "AVISIT", "AVISITN", "PARAMCD", "PARAM")
86
87	1x	adlb_btoxgr <- adlb %>%
88	1x	dplyr::select(c("USUBJID", "PARAMCD", "BTOXGR")) %>%
89	1x	unique() %>%
90	1x	dplyr::rename("BTOXGR_MAP" = "BTOXGR")
91
92	1x	adlb_out <- merge(
93	1x	adlb_f,
94	1x	adsl_lb,
95	1x	by = by_variables_from_adlb,
96	1x	all = TRUE,
97	1x	sort = FALSE
98		)
99	1x	adlb_out <- adlb_out %>%
100	1x	dplyr::left_join(adlb_btoxgr, by = c("USUBJID", "PARAMCD")) %>%
101	1x	dplyr::mutate(BTOXGR = .data$BTOXGR_MAP) %>%
102	1x	dplyr::select(-"BTOXGR_MAP")
103
104	1x	adlb_var_labels <- c(
105	1x	formatters::var_labels(adlb[by_variables_from_adlb]),
106	1x	formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
107	1x	formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
108		)
109		} else {
110	4x	adsl_lb <- expand.grid(
111	4x	USUBJID = unique(adsl$USUBJID),
112	4x	PARAMCD = unique(adlb$PARAMCD)
113		)
114
115	4x	adsl_lb <- adsl_lb %>% dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")
116
117	4x	adsl1 <- adsl[, adsl_adlb_common_columns]
118	4x	adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")
119
120	4x	by_variables_from_adlb <- c("USUBJID", "PARAMCD", "PARAM")
121
122	4x	adlb_out <- merge(
123	4x	adlb_f,
124	4x	adsl_lb,
125	4x	by = by_variables_from_adlb,
126	4x	all = TRUE,
127	4x	sort = FALSE
128		)
129
130	4x	adlb_var_labels <- c(
131	4x	formatters::var_labels(adlb[by_variables_from_adlb]),
132	4x	formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
133	4x	formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
134		)
135		}
136
137	5x	adlb_out$ATOXGR <- as.factor(adlb_out$ATOXGR)
138	5x	adlb_out$BTOXGR <- as.factor(adlb_out$BTOXGR)
139
140	5x	adlb_out <- df_explicit_na(adlb_out)
141	5x	formatters::var_labels(adlb_out) <- adlb_var_labels
142
143	5x	adlb_out
144		}